Blame - magick/token.c - platform/external/ImageMagick

blob: d318470e9e7336dba568e088d590f14c842cb0ce [file] [log] [blame]

cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	1	/*
				2	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				3	% %
				4	% %
				5	% %
				6	% TTTTT OOO K K EEEEE N N %
				7	% T O O K K E NN N %
				8	% T O O KKK EEE N N N %
				9	% T O O K K E N NN %
				10	% T OOO K K EEEEE N N %
				11	% %
				12	% %
				13	% MagickCore Token Methods %
				14	% %
				15	% Software Design %
				16	% John Cristy %
				17	% January 1993 %
				18	% %
				19	% %
cristy	7e41fe8	2010-12-04 23:12:08 +0000	[diff] [blame]	20	% Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization %
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	21	% dedicated to making software imaging solutions freely available. %
				22	% %
				23	% You may not use this file except in compliance with the License. You may %
				24	% obtain a copy of the License at %
				25	% %
				26	% http://www.imagemagick.org/script/license.php %
				27	% %
				28	% Unless required by applicable law or agreed to in writing, software %
				29	% distributed under the License is distributed on an "AS IS" BASIS, %
				30	% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
				31	% See the License for the specific language governing permissions and %
				32	% limitations under the License. %
				33	% %
				34	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				35	%
				36	%
				37	%
				38	*/
				39
				40	/*
				41	Include declarations.
				42	*/
				43	#include "magick/studio.h"
				44	#include "magick/exception.h"
				45	#include "magick/exception-private.h"
				46	#include "magick/image.h"
				47	#include "magick/memory_.h"
				48	#include "magick/string_.h"
				49	#include "magick/token.h"
				50	#include "magick/token-private.h"
				51	#include "magick/utility.h"
				52
				53	/*
				54	Typedef declaractions.
				55	*/
				56	struct _TokenInfo
				57	{
				58	int
				59	state;
				60
				61	MagickStatusType
				62	flag;
				63
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	64	ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	65	offset;
				66
				67	char
				68	quote;
				69
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	70	size_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	71	signature;
				72	};
				73
				74	/*
				75	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				76	% %
				77	% %
				78	% %
				79	% A c q u i r e T o k e n I n f o %
				80	% %
				81	% %
				82	% %
				83	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				84	%
				85	% AcquireTokenInfo() allocates the TokenInfo structure.
				86	%
				87	% The format of the AcquireTokenInfo method is:
				88	%
				89	% TokenInfo *AcquireTokenInfo()
				90	%
				91	*/
				92	MagickExport TokenInfo *AcquireTokenInfo(void)
				93	{
				94	TokenInfo
				95	*token_info;
				96
cristy	73bd4a5	2010-10-05 11:24:23 +0000	[diff] [blame]	97	token_info=(TokenInfo ) AcquireMagickMemory(sizeof(token_info));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	98	if (token_info == (TokenInfo *) NULL)
				99	ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
				100	token_info->signature=MagickSignature;
				101	return(token_info);
				102	}
				103
				104	/*
				105	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				106	% %
				107	% %
				108	% %
				109	% D e s t r o y T o k e n I n f o %
				110	% %
				111	% %
				112	% %
				113	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				114	%
				115	% DestroyTokenInfo() deallocates memory associated with an TokenInfo
				116	% structure.
				117	%
				118	% The format of the DestroyTokenInfo method is:
				119	%
				120	% TokenInfo DestroyTokenInfo(TokenInfo token_info)
				121	%
				122	% A description of each parameter follows:
				123	%
				124	% o token_info: Specifies a pointer to an TokenInfo structure.
				125	%
				126	*/
				127	MagickExport TokenInfo DestroyTokenInfo(TokenInfo token_info)
				128	{
				129	(void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
				130	assert(token_info != (TokenInfo *) NULL);
				131	assert(token_info->signature == MagickSignature);
				132	token_info->signature=(~MagickSignature);
				133	token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
				134	return(token_info);
				135	}
				136
				137	/*
				138	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				139	% %
				140	% %
				141	% %
				142	+ G e t M a g i c k T o k e n %
				143	% %
				144	% %
				145	% %
				146	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				147	%
cristy	df11e55	2011-04-23 17:18:30 +0000	[diff] [blame]	148	% GetMagickToken() gets a token from the token stream. A token is defined as
				149	% a sequence of characters delimited by whitespace (e.g. clip-path), a
				150	% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	151	% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
				152	% separator characters: ':', '=', ',', and ';'.
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	153	%
				154	% The format of the GetMagickToken method is:
				155	%
				156	% void GetMagickToken(const char start,const char end,char token)
				157	%
				158	% A description of each parameter follows:
				159	%
				160	% o start: the start of the token sequence.
				161	%
				162	% o end: point to the end of the token sequence.
				163	%
				164	% o token: copy the token to this buffer.
				165	%
				166	*/
				167	MagickExport void GetMagickToken(const char start,const char end,char token)
				168	{
				169	double
				170	value;
				171
				172	register const char
				173	*p;
				174
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	175	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	176	i;
				177
cristy	32f6912	2011-04-22 02:26:00 +0000	[diff] [blame]	178	assert(start != (const char *) NULL);
				179	assert(token != (char *) NULL);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	180	i=0;
				181	for (p=start; *p != '\0'; )
				182	{
				183	while ((isspace((int) ((unsigned char) p)) != 0) && (p != '\0'))
				184	p++;
				185	if (*p == '\0')
				186	break;
				187	switch (*p)
				188	{
				189	case '"':
				190	case '\'':
				191	case '`':
				192	case '{':
				193	{
				194	register char
				195	escape;
				196
				197	switch (*p)
				198	{
				199	case '"': escape='"'; break;
				200	case '\'': escape='\''; break;
				201	case '`': escape='\''; break;
				202	case '{': escape='}'; break;
				203	default: escape=(*p); break;
				204	}
				205	for (p++; *p != '\0'; p++)
				206	{
				207	if ((p == '\\') && (((p+1) == escape) \|\| (*(p+1) == '\\')))
				208	p++;
				209	else
				210	if (*p == escape)
				211	{
				212	p++;
				213	break;
				214	}
				215	token[i++]=(*p);
				216	}
				217	break;
				218	}
				219	case '/':
				220	{
				221	token[i++]=(*p++);
				222	if ((p == '>') \|\| (p == '/'))
				223	token[i++]=(*p++);
				224	break;
				225	}
				226	default:
				227	{
				228	char
				229	*q;
				230
				231	value=strtod(p,&q);
cristy	da16f16	2011-02-19 23:52:17 +0000	[diff] [blame]	232	(void) value;
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	233	if ((p != q) && (*p != ','))
				234	{
				235	for ( ; (p < q) && (*p != ','); p++)
				236	token[i++]=(*p);
				237	if (*p == '%')
				238	token[i++]=(*p++);
				239	break;
				240	}
cristy	c507168	2011-04-22 02:06:27 +0000	[diff] [blame]	241	if ((p != '\0') && (isalpha((int) ((unsigned char) p)) == 0) &&
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	242	(p != DirectorySeparator) && (p != '#') && (p != '<'))
				243	{
				244	token[i++]=(*p++);
				245	break;
				246	}
				247	for ( ; *p != '\0'; p++)
				248	{
				249	if (((isspace((int) ((unsigned char) p)) != 0) \|\| (p == '=') \|\|
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	250	(p == ',') \|\| (p == ':') \|\| (p == ';')) && ((p-1) != '\\'))
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	251	break;
				252	if ((i > 0) && (*p == '<'))
				253	break;
				254	token[i++]=(*p);
				255	if (*p == '>')
				256	break;
				257	if (*p == '(')
				258	for (p++; *p != '\0'; p++)
				259	{
				260	token[i++]=(*p);
				261	if ((p == ')') && ((p-1) != '\\'))
				262	break;
				263	}
				264	}
				265	break;
				266	}
				267	}
				268	break;
				269	}
				270	token[i]='\0';
				271	if (LocaleNCompare(token,"url(",4) == 0)
				272	{
				273	ssize_t
				274	offset;
				275
				276	offset=4;
				277	if (token[offset] == '#')
				278	offset++;
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	279	i=(ssize_t) strlen(token);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	280	(void) CopyMagickString(token,token+offset,MaxTextExtent);
				281	token[i-offset-1]='\0';
				282	}
				283	while (isspace((int) ((unsigned char) *p)) != 0)
				284	p++;
				285	if (end != (const char **) NULL)
				286	end=(const char ) p;
				287	}
				288
				289	/*
				290	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				291	% %
				292	% %
				293	% %
				294	% G l o b E x p r e s s i o n %
				295	% %
				296	% %
				297	% %
				298	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				299	%
				300	% GlobExpression() returns MagickTrue if the expression matches the pattern.
				301	%
				302	% The format of the GlobExpression function is:
				303	%
				304	% MagickBooleanType GlobExpression(const char *expression,
				305	% const char *pattern,const MagickBooleanType case_insensitive)
				306	%
				307	% A description of each parameter follows:
				308	%
				309	% o expression: Specifies a pointer to a text string containing a file name.
				310	%
				311	% o pattern: Specifies a pointer to a text string containing a pattern.
				312	%
				313	% o case_insensitive: set to MagickTrue to ignore the case when matching
				314	% an expression.
				315	%
				316	*/
				317	MagickExport MagickBooleanType GlobExpression(const char *expression,
				318	const char *pattern,const MagickBooleanType case_insensitive)
				319	{
				320	MagickBooleanType
				321	done,
				322	match;
				323
				324	register const char
				325	*p;
				326
				327	/*
				328	Return on empty pattern or '*'.
				329	*/
				330	if (pattern == (char *) NULL)
				331	return(MagickTrue);
				332	if (GetUTFCode(pattern) == 0)
				333	return(MagickTrue);
				334	if (LocaleCompare(pattern,"*") == 0)
				335	return(MagickTrue);
				336	p=pattern+strlen(pattern)-1;
				337	if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
				338	{
				339	ExceptionInfo
				340	*exception;
				341
				342	ImageInfo
				343	*image_info;
				344
				345	/*
				346	Determine if pattern is a scene, i.e. img0001.pcd[2].
				347	*/
				348	image_info=AcquireImageInfo();
				349	(void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
				350	exception=AcquireExceptionInfo();
cristy	d965a42	2010-03-03 17:47:35 +0000	[diff] [blame]	351	(void) SetImageInfo(image_info,0,exception);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	352	exception=DestroyExceptionInfo(exception);
				353	if (LocaleCompare(image_info->filename,pattern) != 0)
				354	{
				355	image_info=DestroyImageInfo(image_info);
				356	return(MagickFalse);
				357	}
				358	image_info=DestroyImageInfo(image_info);
				359	}
				360	/*
				361	Evaluate glob expression.
				362	*/
				363	done=MagickFalse;
				364	while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
				365	{
				366	if (GetUTFCode(expression) == 0)
				367	if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
				368	break;
				369	switch (GetUTFCode(pattern))
				370	{
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	371	case '*':
				372	{
				373	MagickBooleanType
				374	status;
				375
				376	status=MagickFalse;
				377	pattern+=GetUTFOctets(pattern);
				378	while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
				379	{
				380	status=GlobExpression(expression,pattern,case_insensitive);
				381	expression+=GetUTFOctets(expression);
				382	}
				383	if (status != MagickFalse)
				384	{
				385	while (GetUTFCode(expression) != 0)
				386	expression+=GetUTFOctets(expression);
				387	while (GetUTFCode(pattern) != 0)
				388	pattern+=GetUTFOctets(pattern);
				389	}
				390	break;
				391	}
				392	case '[':
				393	{
cristy	55a91cd	2010-12-01 00:57:40 +0000	[diff] [blame]	394	int
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	395	c;
				396
				397	pattern+=GetUTFOctets(pattern);
				398	for ( ; ; )
				399	{
				400	if ((GetUTFCode(pattern) == 0) \|\| (GetUTFCode(pattern) == ']'))
				401	{
				402	done=MagickTrue;
				403	break;
				404	}
				405	if (GetUTFCode(pattern) == '\\')
				406	{
				407	pattern+=GetUTFOctets(pattern);
				408	if (GetUTFCode(pattern) == 0)
				409	{
				410	done=MagickTrue;
				411	break;
				412	}
				413	}
				414	if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
				415	{
				416	c=GetUTFCode(pattern);
				417	pattern+=GetUTFOctets(pattern);
				418	pattern+=GetUTFOctets(pattern);
				419	if (GetUTFCode(pattern) == ']')
				420	{
				421	done=MagickTrue;
				422	break;
				423	}
				424	if (GetUTFCode(pattern) == '\\')
				425	{
				426	pattern+=GetUTFOctets(pattern);
				427	if (GetUTFCode(pattern) == 0)
				428	{
				429	done=MagickTrue;
				430	break;
				431	}
				432	}
				433	if ((GetUTFCode(expression) < c) \|\|
				434	(GetUTFCode(expression) > GetUTFCode(pattern)))
				435	{
				436	pattern+=GetUTFOctets(pattern);
				437	continue;
				438	}
				439	}
				440	else
				441	if (GetUTFCode(pattern) != GetUTFCode(expression))
				442	{
				443	pattern+=GetUTFOctets(pattern);
				444	continue;
				445	}
				446	pattern+=GetUTFOctets(pattern);
				447	while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
				448	{
				449	if ((GetUTFCode(pattern) == '\\') &&
				450	(GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
				451	pattern+=GetUTFOctets(pattern);
				452	pattern+=GetUTFOctets(pattern);
				453	}
				454	if (GetUTFCode(pattern) != 0)
				455	{
				456	pattern+=GetUTFOctets(pattern);
				457	expression+=GetUTFOctets(expression);
				458	}
				459	break;
				460	}
				461	break;
				462	}
				463	case '?':
				464	{
				465	pattern+=GetUTFOctets(pattern);
				466	expression+=GetUTFOctets(expression);
				467	break;
				468	}
				469	case '{':
				470	{
				471	register const char
				472	*p;
				473
				474	pattern+=GetUTFOctets(pattern);
				475	while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
				476	{
				477	p=expression;
				478	match=MagickTrue;
				479	while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
				480	(GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
				481	(match != MagickFalse))
				482	{
				483	if (GetUTFCode(pattern) == '\\')
				484	pattern+=GetUTFOctets(pattern);
				485	match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
				486	MagickFalse;
				487	p+=GetUTFOctets(p);
				488	pattern+=GetUTFOctets(pattern);
				489	}
				490	if (GetUTFCode(pattern) == 0)
				491	{
				492	match=MagickFalse;
				493	done=MagickTrue;
				494	break;
				495	}
				496	else
				497	if (match != MagickFalse)
				498	{
				499	expression=p;
				500	while ((GetUTFCode(pattern) != '}') &&
				501	(GetUTFCode(pattern) != 0))
				502	{
				503	pattern+=GetUTFOctets(pattern);
				504	if (GetUTFCode(pattern) == '\\')
				505	{
				506	pattern+=GetUTFOctets(pattern);
				507	if (GetUTFCode(pattern) == '}')
				508	pattern+=GetUTFOctets(pattern);
				509	}
				510	}
				511	}
				512	else
				513	{
				514	while ((GetUTFCode(pattern) != '}') &&
				515	(GetUTFCode(pattern) != ',') &&
				516	(GetUTFCode(pattern) != 0))
				517	{
				518	pattern+=GetUTFOctets(pattern);
				519	if (GetUTFCode(pattern) == '\\')
				520	{
				521	pattern+=GetUTFOctets(pattern);
				522	if ((GetUTFCode(pattern) == '}') \|\|
				523	(GetUTFCode(pattern) == ','))
				524	pattern+=GetUTFOctets(pattern);
				525	}
				526	}
				527	}
				528	if (GetUTFCode(pattern) != 0)
				529	pattern+=GetUTFOctets(pattern);
				530	}
				531	break;
				532	}
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	533	case '\\':
				534	{
				535	pattern+=GetUTFOctets(pattern);
cristy	4705fe8	2010-04-23 16:20:03 +0000	[diff] [blame]	536	if (GetUTFCode(pattern) == 0)
				537	break;
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	538	}
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	539	default:
				540	{
				541	if (case_insensitive != MagickFalse)
				542	{
				543	if (tolower((int) GetUTFCode(expression)) !=
				544	tolower((int) GetUTFCode(pattern)))
				545	{
				546	done=MagickTrue;
				547	break;
				548	}
				549	}
				550	else
				551	if (GetUTFCode(expression) != GetUTFCode(pattern))
				552	{
				553	done=MagickTrue;
				554	break;
				555	}
				556	expression+=GetUTFOctets(expression);
				557	pattern+=GetUTFOctets(pattern);
				558	}
				559	}
				560	}
				561	while (GetUTFCode(pattern) == '*')
				562	pattern+=GetUTFOctets(pattern);
				563	match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
				564	MagickTrue : MagickFalse;
				565	return(match);
				566	}
				567
				568	/*
				569	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				570	% %
				571	% %
				572	% %
				573	+ I s G l o b %
				574	% %
				575	% %
				576	% %
				577	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				578	%
				579	% IsGlob() returns MagickTrue if the path specification contains a globbing
				580	% pattern.
				581	%
				582	% The format of the IsGlob method is:
				583	%
				584	% MagickBooleanType IsGlob(const char *geometry)
				585	%
				586	% A description of each parameter follows:
				587	%
				588	% o path: the path.
				589	%
				590	*/
				591	MagickExport MagickBooleanType IsGlob(const char *path)
				592	{
				593	MagickBooleanType
				594	status;
				595
				596	if (IsPathAccessible(path) != MagickFalse)
				597	return(MagickFalse);
				598	status=(strchr(path,'') != (char ) NULL) \|\|
				599	(strchr(path,'?') != (char *) NULL) \|\|
				600	(strchr(path,'{') != (char *) NULL) \|\|
				601	(strchr(path,'}') != (char *) NULL) \|\|
				602	(strchr(path,'[') != (char *) NULL) \|\|
				603	(strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
				604	return(status);
				605	}
				606
				607	/*
				608	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				609	% %
				610	% %
				611	% %
				612	% T o k e n i z e r %
				613	% %
				614	% %
				615	% %
				616	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				617	%
				618	% Tokenizer() is a generalized, finite state token parser. It extracts tokens
				619	% one at a time from a string of characters. The characters used for white
				620	% space, for break characters, and for quotes can be specified. Also,
				621	% characters in the string can be preceded by a specifiable escape character
				622	% which removes any special meaning the character may have.
				623	%
				624	% Here is some terminology:
				625	%
				626	% o token: A single unit of information in the form of a group of
				627	% characters.
				628	%
				629	% o white space: Apace that gets ignored (except within quotes or when
				630	% escaped), like blanks and tabs. in addition, white space terminates a
				631	% non-quoted token.
				632	%
				633	% o break set: One or more characters that separates non-quoted tokens.
				634	% Commas are a common break character. The usage of break characters to
				635	% signal the end of a token is the same as that of white space, except
				636	% multiple break characters with nothing or only white space between
				637	% generate a null token for each two break characters together.
				638	%
				639	% For example, if blank is set to be the white space and comma is set to
				640	% be the break character, the line
				641	%
				642	% A, B, C , , DEF
				643	%
				644	% ... consists of 5 tokens:
				645	%
				646	% 1) "A"
				647	% 2) "B"
				648	% 3) "C"
				649	% 4) "" (the null string)
				650	% 5) "DEF"
				651	%
				652	% o Quote character: A character that, when surrounding a group of other
				653	% characters, causes the group of characters to be treated as a single
				654	% token, no matter how many white spaces or break characters exist in
				655	% the group. Also, a token always terminates after the closing quote.
				656	% For example, if ' is the quote character, blank is white space, and
				657	% comma is the break character, the following string
				658	%
				659	% A, ' B, CD'EF GHI
				660	%
				661	% ... consists of 4 tokens:
				662	%
				663	% 1) "A"
				664	% 2) " B, CD" (note the blanks & comma)
				665	% 3) "EF"
				666	% 4) "GHI"
				667	%
				668	% The quote characters themselves do not appear in the resultant
				669	% tokens. The double quotes are delimiters i use here for
				670	% documentation purposes only.
				671	%
				672	% o Escape character: A character which itself is ignored but which
				673	% causes the next character to be used as is. ^ and \ are often used
				674	% as escape characters. An escape in the last position of the string
				675	% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
				676	% and non-escape) character. For example, assume white space, break
				677	% character, and quote are the same as in the above examples, and
				678	% further, assume that ^ is the escape character. Then, in the string
				679	%
				680	% ABC, ' DEF ^' GH' I ^ J K^ L ^
				681	%
				682	% ... there are 7 tokens:
				683	%
				684	% 1) "ABC"
				685	% 2) " DEF ' GH"
				686	% 3) "I"
				687	% 4) " " (a lone blank)
				688	% 5) "J"
				689	% 6) "K L"
				690	% 7) "^" (passed as is at end of line)
				691	%
				692	% The format of the Tokenizer method is:
				693	%
				694	% int Tokenizer(TokenInfo token_info,const unsigned flag,char token,
				695	% const size_t max_token_length,const char line,const char white,
				696	% const char break_set,const char quote,const char escape,
				697	% char breaker,int next,char *quoted)
				698	%
				699	% A description of each parameter follows:
				700	%
				701	% o flag: right now, only the low order 3 bits are used.
				702	%
				703	% 1 => convert non-quoted tokens to upper case
				704	% 2 => convert non-quoted tokens to lower case
				705	% 0 => do not convert non-quoted tokens
				706	%
				707	% o token: a character string containing the returned next token
				708	%
				709	% o max_token_length: the maximum size of "token". Characters beyond
				710	% "max_token_length" are truncated.
				711	%
				712	% o string: the string to be parsed.
				713	%
				714	% o white: a string of the valid white spaces. example:
				715	%
				716	% char whitesp[]={" \t"};
				717	%
				718	% blank and tab will be valid white space.
				719	%
				720	% o break: a string of the valid break characters. example:
				721	%
				722	% char breakch[]={";,"};
				723	%
				724	% semicolon and comma will be valid break characters.
				725	%
				726	% o quote: a string of the valid quote characters. An example would be
				727	%
				728	% char whitesp[]={"'\"");
				729	%
				730	% (this causes single and double quotes to be valid) Note that a
				731	% token starting with one of these characters needs the same quote
				732	% character to terminate it.
				733	%
				734	% for example:
				735	%
				736	% "ABC '
				737	%
				738	% is unterminated, but
				739	%
				740	% "DEF" and 'GHI'
				741	%
				742	% are properly terminated. Note that different quote characters
				743	% can appear on the same line; only for a given token do the quote
				744	% characters have to be the same.
				745	%
				746	% o escape: the escape character (NOT a string ... only one
				747	% allowed). Use zero if none is desired.
				748	%
				749	% o breaker: the break character used to terminate the current
				750	% token. If the token was quoted, this will be the quote used. If
				751	% the token is the last one on the line, this will be zero.
				752	%
				753	% o next: this variable points to the first character of the
				754	% next token. it gets reset by "tokenizer" as it steps through the
				755	% string. Set it to 0 upon initialization, and leave it alone
				756	% after that. You can change it if you want to jump around in the
				757	% string or re-parse from the beginning, but be careful.
				758	%
				759	% o quoted: set to True if the token was quoted and MagickFalse
				760	% if not. You may need this information (for example: in C, a
				761	% string with quotes around it is a character string, while one
				762	% without is an identifier).
				763	%
				764	% o result: 0 if we haven't reached EOS (end of string), and 1
				765	% if we have.
				766	%
				767	*/
				768
				769	#define IN_WHITE 0
				770	#define IN_TOKEN 1
				771	#define IN_QUOTE 2
				772	#define IN_OZONE 3
				773
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	774	static ssize_t sindex(int c,const char *string)
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	775	{
				776	register const char
				777	*p;
				778
				779	for (p=string; *p != '\0'; p++)
				780	if (c == (int) (*p))
cristy	cee9711	2010-05-28 00:44:52 +0000	[diff] [blame]	781	return((ssize_t) (p-string));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	782	return(-1);
				783	}
				784
				785	static void StoreToken(TokenInfo token_info,char string,
				786	size_t max_token_length,int c)
				787	{
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	788	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	789	i;
				790
				791	if ((token_info->offset < 0) \|\|
				792	((size_t) token_info->offset >= (max_token_length-1)))
				793	return;
				794	i=token_info->offset++;
				795	string[i]=(char) c;
				796	if (token_info->state == IN_QUOTE)
				797	return;
				798	switch (token_info->flag & 0x03)
				799	{
				800	case 1:
				801	{
				802	string[i]=(char) toupper(c);
				803	break;
				804	}
				805	case 2:
				806	{
				807	string[i]=(char) tolower(c);
				808	break;
				809	}
				810	default:
				811	break;
				812	}
				813	}
				814
				815	MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
				816	char token,const size_t max_token_length,const char line,const char *white,
				817	const char break_set,const char quote,const char escape,char *breaker,
				818	int next,char quoted)
				819	{
				820	int
				821	c;
				822
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	823	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	824	i;
				825
				826	*breaker='\0';
				827	*quoted='\0';
				828	if (line[*next] == '\0')
				829	return(1);
				830	token_info->state=IN_WHITE;
				831	token_info->quote=(char) MagickFalse;
				832	token_info->flag=flag;
				833	for (token_info->offset=0; (int) line[next] != 0; (next)++)
				834	{
				835	c=(int) line[*next];
				836	i=sindex(c,break_set);
				837	if (i >= 0)
				838	{
				839	switch (token_info->state)
				840	{
				841	case IN_WHITE:
				842	case IN_TOKEN:
				843	case IN_OZONE:
				844	{
				845	(*next)++;
				846	*breaker=break_set[i];
				847	token[token_info->offset]='\0';
				848	return(0);
				849	}
				850	case IN_QUOTE:
				851	{
				852	StoreToken(token_info,token,max_token_length,c);
				853	break;
				854	}
				855	}
				856	continue;
				857	}
				858	i=sindex(c,quote);
				859	if (i >= 0)
				860	{
				861	switch (token_info->state)
				862	{
				863	case IN_WHITE:
				864	{
				865	token_info->state=IN_QUOTE;
				866	token_info->quote=quote[i];
				867	*quoted=(char) MagickTrue;
				868	break;
				869	}
				870	case IN_QUOTE:
				871	{
				872	if (quote[i] != token_info->quote)
				873	StoreToken(token_info,token,max_token_length,c);
				874	else
				875	{
				876	token_info->state=IN_OZONE;
				877	token_info->quote='\0';
				878	}
				879	break;
				880	}
				881	case IN_TOKEN:
				882	case IN_OZONE:
				883	{
				884	*breaker=(char) c;
				885	token[token_info->offset]='\0';
				886	return(0);
				887	}
				888	}
				889	continue;
				890	}
				891	i=sindex(c,white);
				892	if (i >= 0)
				893	{
				894	switch (token_info->state)
				895	{
				896	case IN_WHITE:
				897	case IN_OZONE:
				898	break;
				899	case IN_TOKEN:
				900	{
				901	token_info->state=IN_OZONE;
				902	break;
				903	}
				904	case IN_QUOTE:
				905	{
				906	StoreToken(token_info,token,max_token_length,c);
				907	break;
				908	}
				909	}
				910	continue;
				911	}
				912	if (c == (int) escape)
				913	{
				914	if (line[(*next)+1] == '\0')
				915	{
				916	*breaker='\0';
				917	StoreToken(token_info,token,max_token_length,c);
				918	(*next)++;
				919	token[token_info->offset]='\0';
				920	return(0);
				921	}
				922	switch (token_info->state)
				923	{
				924	case IN_WHITE:
				925	{
				926	(*next)--;
				927	token_info->state=IN_TOKEN;
				928	break;
				929	}
				930	case IN_TOKEN:
				931	case IN_QUOTE:
				932	{
				933	(*next)++;
				934	c=(int) line[*next];
				935	StoreToken(token_info,token,max_token_length,c);
				936	break;
				937	}
				938	case IN_OZONE:
				939	{
				940	token[token_info->offset]='\0';
				941	return(0);
				942	}
				943	}
				944	continue;
				945	}
				946	switch (token_info->state)
				947	{
				948	case IN_WHITE:
				949	token_info->state=IN_TOKEN;
				950	case IN_TOKEN:
				951	case IN_QUOTE:
				952	{
				953	StoreToken(token_info,token,max_token_length,c);
				954	break;
				955	}
				956	case IN_OZONE:
				957	{
				958	token[token_info->offset]='\0';
				959	return(0);
				960	}
				961	}
				962	}
				963	token[token_info->offset]='\0';
				964	return(0);
				965	}