Blame - magick/token.c - platform/external/ImageMagick

blob: d5fac16b6dea5dffb6fefcb11d81efafcd7d2e3b [file] [log] [blame]

cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	1	/*
				2	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				3	% %
				4	% %
				5	% %
				6	% TTTTT OOO K K EEEEE N N %
				7	% T O O K K E NN N %
				8	% T O O KKK EEE N N N %
				9	% T O O K K E N NN %
				10	% T OOO K K EEEEE N N %
				11	% %
				12	% %
				13	% MagickCore Token Methods %
				14	% %
				15	% Software Design %
				16	% John Cristy %
				17	% January 1993 %
				18	% %
				19	% %
cristy	7e41fe8	2010-12-04 23:12:08 +0000	[diff] [blame]	20	% Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization %
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	21	% dedicated to making software imaging solutions freely available. %
				22	% %
				23	% You may not use this file except in compliance with the License. You may %
				24	% obtain a copy of the License at %
				25	% %
				26	% http://www.imagemagick.org/script/license.php %
				27	% %
				28	% Unless required by applicable law or agreed to in writing, software %
				29	% distributed under the License is distributed on an "AS IS" BASIS, %
				30	% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
				31	% See the License for the specific language governing permissions and %
				32	% limitations under the License. %
				33	% %
				34	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				35	%
				36	%
				37	%
				38	*/
				39
				40	/*
				41	Include declarations.
				42	*/
				43	#include "magick/studio.h"
				44	#include "magick/exception.h"
				45	#include "magick/exception-private.h"
				46	#include "magick/image.h"
				47	#include "magick/memory_.h"
				48	#include "magick/string_.h"
cristy	0df696d	2011-05-18 19:55:22 +0000	[diff] [blame]	49	#include "magick/string-private.h"
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	50	#include "magick/token.h"
				51	#include "magick/token-private.h"
				52	#include "magick/utility.h"
				53
				54	/*
				55	Typedef declaractions.
				56	*/
				57	struct _TokenInfo
				58	{
				59	int
				60	state;
				61
				62	MagickStatusType
				63	flag;
				64
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	65	ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	66	offset;
				67
				68	char
				69	quote;
				70
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	71	size_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	72	signature;
				73	};
				74
				75	/*
				76	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				77	% %
				78	% %
				79	% %
				80	% A c q u i r e T o k e n I n f o %
				81	% %
				82	% %
				83	% %
				84	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				85	%
				86	% AcquireTokenInfo() allocates the TokenInfo structure.
				87	%
				88	% The format of the AcquireTokenInfo method is:
				89	%
				90	% TokenInfo *AcquireTokenInfo()
				91	%
				92	*/
				93	MagickExport TokenInfo *AcquireTokenInfo(void)
				94	{
				95	TokenInfo
				96	*token_info;
				97
cristy	73bd4a5	2010-10-05 11:24:23 +0000	[diff] [blame]	98	token_info=(TokenInfo ) AcquireMagickMemory(sizeof(token_info));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	99	if (token_info == (TokenInfo *) NULL)
				100	ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
				101	token_info->signature=MagickSignature;
				102	return(token_info);
				103	}
				104
				105	/*
				106	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				107	% %
				108	% %
				109	% %
				110	% D e s t r o y T o k e n I n f o %
				111	% %
				112	% %
				113	% %
				114	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				115	%
				116	% DestroyTokenInfo() deallocates memory associated with an TokenInfo
				117	% structure.
				118	%
				119	% The format of the DestroyTokenInfo method is:
				120	%
				121	% TokenInfo DestroyTokenInfo(TokenInfo token_info)
				122	%
				123	% A description of each parameter follows:
				124	%
				125	% o token_info: Specifies a pointer to an TokenInfo structure.
				126	%
				127	*/
				128	MagickExport TokenInfo DestroyTokenInfo(TokenInfo token_info)
				129	{
				130	(void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
				131	assert(token_info != (TokenInfo *) NULL);
				132	assert(token_info->signature == MagickSignature);
				133	token_info->signature=(~MagickSignature);
				134	token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
				135	return(token_info);
				136	}
				137
				138	/*
				139	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				140	% %
				141	% %
				142	% %
				143	+ G e t M a g i c k T o k e n %
				144	% %
				145	% %
				146	% %
				147	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				148	%
cristy	df11e55	2011-04-23 17:18:30 +0000	[diff] [blame]	149	% GetMagickToken() gets a token from the token stream. A token is defined as
				150	% a sequence of characters delimited by whitespace (e.g. clip-path), a
				151	% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	152	% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
				153	% separator characters: ':', '=', ',', and ';'.
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	154	%
				155	% The format of the GetMagickToken method is:
				156	%
				157	% void GetMagickToken(const char start,const char end,char token)
				158	%
				159	% A description of each parameter follows:
				160	%
				161	% o start: the start of the token sequence.
				162	%
				163	% o end: point to the end of the token sequence.
				164	%
				165	% o token: copy the token to this buffer.
				166	%
				167	*/
				168	MagickExport void GetMagickToken(const char start,const char end,char token)
				169	{
				170	double
				171	value;
				172
				173	register const char
				174	*p;
				175
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	176	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	177	i;
				178
cristy	32f6912	2011-04-22 02:26:00 +0000	[diff] [blame]	179	assert(start != (const char *) NULL);
				180	assert(token != (char *) NULL);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	181	i=0;
				182	for (p=start; *p != '\0'; )
				183	{
				184	while ((isspace((int) ((unsigned char) p)) != 0) && (p != '\0'))
				185	p++;
				186	if (*p == '\0')
				187	break;
				188	switch (*p)
				189	{
				190	case '"':
				191	case '\'':
				192	case '`':
				193	case '{':
				194	{
				195	register char
				196	escape;
				197
				198	switch (*p)
				199	{
				200	case '"': escape='"'; break;
				201	case '\'': escape='\''; break;
				202	case '`': escape='\''; break;
				203	case '{': escape='}'; break;
				204	default: escape=(*p); break;
				205	}
				206	for (p++; *p != '\0'; p++)
				207	{
				208	if ((p == '\\') && (((p+1) == escape) \|\| (*(p+1) == '\\')))
				209	p++;
				210	else
				211	if (*p == escape)
				212	{
				213	p++;
				214	break;
				215	}
				216	token[i++]=(*p);
				217	}
				218	break;
				219	}
				220	case '/':
				221	{
				222	token[i++]=(*p++);
				223	if ((p == '>') \|\| (p == '/'))
				224	token[i++]=(*p++);
				225	break;
				226	}
				227	default:
				228	{
				229	char
				230	*q;
				231
cristy	c1acd84	2011-05-19 23:05:47 +0000	[diff] [blame^]	232	value=InterpretLocaleValue(p,&q);
cristy	da16f16	2011-02-19 23:52:17 +0000	[diff] [blame]	233	(void) value;
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	234	if ((p != q) && (*p != ','))
				235	{
				236	for ( ; (p < q) && (*p != ','); p++)
				237	token[i++]=(*p);
				238	if (*p == '%')
				239	token[i++]=(*p++);
				240	break;
				241	}
cristy	c507168	2011-04-22 02:06:27 +0000	[diff] [blame]	242	if ((p != '\0') && (isalpha((int) ((unsigned char) p)) == 0) &&
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	243	(p != DirectorySeparator) && (p != '#') && (p != '<'))
				244	{
				245	token[i++]=(*p++);
				246	break;
				247	}
				248	for ( ; *p != '\0'; p++)
				249	{
				250	if (((isspace((int) ((unsigned char) p)) != 0) \|\| (p == '=') \|\|
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	251	(p == ',') \|\| (p == ':') \|\| (p == ';')) && ((p-1) != '\\'))
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	252	break;
				253	if ((i > 0) && (*p == '<'))
				254	break;
				255	token[i++]=(*p);
				256	if (*p == '>')
				257	break;
				258	if (*p == '(')
				259	for (p++; *p != '\0'; p++)
				260	{
				261	token[i++]=(*p);
				262	if ((p == ')') && ((p-1) != '\\'))
				263	break;
				264	}
				265	}
				266	break;
				267	}
				268	}
				269	break;
				270	}
				271	token[i]='\0';
				272	if (LocaleNCompare(token,"url(",4) == 0)
				273	{
				274	ssize_t
				275	offset;
				276
				277	offset=4;
				278	if (token[offset] == '#')
				279	offset++;
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	280	i=(ssize_t) strlen(token);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	281	(void) CopyMagickString(token,token+offset,MaxTextExtent);
				282	token[i-offset-1]='\0';
				283	}
				284	while (isspace((int) ((unsigned char) *p)) != 0)
				285	p++;
				286	if (end != (const char **) NULL)
				287	end=(const char ) p;
				288	}
				289
				290	/*
				291	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				292	% %
				293	% %
				294	% %
				295	% G l o b E x p r e s s i o n %
				296	% %
				297	% %
				298	% %
				299	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				300	%
				301	% GlobExpression() returns MagickTrue if the expression matches the pattern.
				302	%
				303	% The format of the GlobExpression function is:
				304	%
				305	% MagickBooleanType GlobExpression(const char *expression,
				306	% const char *pattern,const MagickBooleanType case_insensitive)
				307	%
				308	% A description of each parameter follows:
				309	%
				310	% o expression: Specifies a pointer to a text string containing a file name.
				311	%
				312	% o pattern: Specifies a pointer to a text string containing a pattern.
				313	%
				314	% o case_insensitive: set to MagickTrue to ignore the case when matching
				315	% an expression.
				316	%
				317	*/
				318	MagickExport MagickBooleanType GlobExpression(const char *expression,
				319	const char *pattern,const MagickBooleanType case_insensitive)
				320	{
				321	MagickBooleanType
				322	done,
				323	match;
				324
				325	register const char
				326	*p;
				327
				328	/*
				329	Return on empty pattern or '*'.
				330	*/
				331	if (pattern == (char *) NULL)
				332	return(MagickTrue);
				333	if (GetUTFCode(pattern) == 0)
				334	return(MagickTrue);
				335	if (LocaleCompare(pattern,"*") == 0)
				336	return(MagickTrue);
				337	p=pattern+strlen(pattern)-1;
				338	if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
				339	{
				340	ExceptionInfo
				341	*exception;
				342
				343	ImageInfo
				344	*image_info;
				345
				346	/*
				347	Determine if pattern is a scene, i.e. img0001.pcd[2].
				348	*/
				349	image_info=AcquireImageInfo();
				350	(void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
				351	exception=AcquireExceptionInfo();
cristy	d965a42	2010-03-03 17:47:35 +0000	[diff] [blame]	352	(void) SetImageInfo(image_info,0,exception);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	353	exception=DestroyExceptionInfo(exception);
				354	if (LocaleCompare(image_info->filename,pattern) != 0)
				355	{
				356	image_info=DestroyImageInfo(image_info);
				357	return(MagickFalse);
				358	}
				359	image_info=DestroyImageInfo(image_info);
				360	}
				361	/*
				362	Evaluate glob expression.
				363	*/
				364	done=MagickFalse;
				365	while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
				366	{
				367	if (GetUTFCode(expression) == 0)
				368	if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
				369	break;
				370	switch (GetUTFCode(pattern))
				371	{
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	372	case '*':
				373	{
				374	MagickBooleanType
				375	status;
				376
				377	status=MagickFalse;
				378	pattern+=GetUTFOctets(pattern);
				379	while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
				380	{
				381	status=GlobExpression(expression,pattern,case_insensitive);
				382	expression+=GetUTFOctets(expression);
				383	}
				384	if (status != MagickFalse)
				385	{
				386	while (GetUTFCode(expression) != 0)
				387	expression+=GetUTFOctets(expression);
				388	while (GetUTFCode(pattern) != 0)
				389	pattern+=GetUTFOctets(pattern);
				390	}
				391	break;
				392	}
				393	case '[':
				394	{
cristy	55a91cd	2010-12-01 00:57:40 +0000	[diff] [blame]	395	int
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	396	c;
				397
				398	pattern+=GetUTFOctets(pattern);
				399	for ( ; ; )
				400	{
				401	if ((GetUTFCode(pattern) == 0) \|\| (GetUTFCode(pattern) == ']'))
				402	{
				403	done=MagickTrue;
				404	break;
				405	}
				406	if (GetUTFCode(pattern) == '\\')
				407	{
				408	pattern+=GetUTFOctets(pattern);
				409	if (GetUTFCode(pattern) == 0)
				410	{
				411	done=MagickTrue;
				412	break;
				413	}
				414	}
				415	if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
				416	{
				417	c=GetUTFCode(pattern);
				418	pattern+=GetUTFOctets(pattern);
				419	pattern+=GetUTFOctets(pattern);
				420	if (GetUTFCode(pattern) == ']')
				421	{
				422	done=MagickTrue;
				423	break;
				424	}
				425	if (GetUTFCode(pattern) == '\\')
				426	{
				427	pattern+=GetUTFOctets(pattern);
				428	if (GetUTFCode(pattern) == 0)
				429	{
				430	done=MagickTrue;
				431	break;
				432	}
				433	}
				434	if ((GetUTFCode(expression) < c) \|\|
				435	(GetUTFCode(expression) > GetUTFCode(pattern)))
				436	{
				437	pattern+=GetUTFOctets(pattern);
				438	continue;
				439	}
				440	}
				441	else
				442	if (GetUTFCode(pattern) != GetUTFCode(expression))
				443	{
				444	pattern+=GetUTFOctets(pattern);
				445	continue;
				446	}
				447	pattern+=GetUTFOctets(pattern);
				448	while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
				449	{
				450	if ((GetUTFCode(pattern) == '\\') &&
				451	(GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
				452	pattern+=GetUTFOctets(pattern);
				453	pattern+=GetUTFOctets(pattern);
				454	}
				455	if (GetUTFCode(pattern) != 0)
				456	{
				457	pattern+=GetUTFOctets(pattern);
				458	expression+=GetUTFOctets(expression);
				459	}
				460	break;
				461	}
				462	break;
				463	}
				464	case '?':
				465	{
				466	pattern+=GetUTFOctets(pattern);
				467	expression+=GetUTFOctets(expression);
				468	break;
				469	}
				470	case '{':
				471	{
				472	register const char
				473	*p;
				474
				475	pattern+=GetUTFOctets(pattern);
				476	while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
				477	{
				478	p=expression;
				479	match=MagickTrue;
				480	while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
				481	(GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
				482	(match != MagickFalse))
				483	{
				484	if (GetUTFCode(pattern) == '\\')
				485	pattern+=GetUTFOctets(pattern);
				486	match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
				487	MagickFalse;
				488	p+=GetUTFOctets(p);
				489	pattern+=GetUTFOctets(pattern);
				490	}
				491	if (GetUTFCode(pattern) == 0)
				492	{
				493	match=MagickFalse;
				494	done=MagickTrue;
				495	break;
				496	}
				497	else
				498	if (match != MagickFalse)
				499	{
				500	expression=p;
				501	while ((GetUTFCode(pattern) != '}') &&
				502	(GetUTFCode(pattern) != 0))
				503	{
				504	pattern+=GetUTFOctets(pattern);
				505	if (GetUTFCode(pattern) == '\\')
				506	{
				507	pattern+=GetUTFOctets(pattern);
				508	if (GetUTFCode(pattern) == '}')
				509	pattern+=GetUTFOctets(pattern);
				510	}
				511	}
				512	}
				513	else
				514	{
				515	while ((GetUTFCode(pattern) != '}') &&
				516	(GetUTFCode(pattern) != ',') &&
				517	(GetUTFCode(pattern) != 0))
				518	{
				519	pattern+=GetUTFOctets(pattern);
				520	if (GetUTFCode(pattern) == '\\')
				521	{
				522	pattern+=GetUTFOctets(pattern);
				523	if ((GetUTFCode(pattern) == '}') \|\|
				524	(GetUTFCode(pattern) == ','))
				525	pattern+=GetUTFOctets(pattern);
				526	}
				527	}
				528	}
				529	if (GetUTFCode(pattern) != 0)
				530	pattern+=GetUTFOctets(pattern);
				531	}
				532	break;
				533	}
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	534	case '\\':
				535	{
				536	pattern+=GetUTFOctets(pattern);
cristy	4705fe8	2010-04-23 16:20:03 +0000	[diff] [blame]	537	if (GetUTFCode(pattern) == 0)
				538	break;
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	539	}
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	540	default:
				541	{
				542	if (case_insensitive != MagickFalse)
				543	{
				544	if (tolower((int) GetUTFCode(expression)) !=
				545	tolower((int) GetUTFCode(pattern)))
				546	{
				547	done=MagickTrue;
				548	break;
				549	}
				550	}
				551	else
				552	if (GetUTFCode(expression) != GetUTFCode(pattern))
				553	{
				554	done=MagickTrue;
				555	break;
				556	}
				557	expression+=GetUTFOctets(expression);
				558	pattern+=GetUTFOctets(pattern);
				559	}
				560	}
				561	}
				562	while (GetUTFCode(pattern) == '*')
				563	pattern+=GetUTFOctets(pattern);
				564	match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
				565	MagickTrue : MagickFalse;
				566	return(match);
				567	}
				568
				569	/*
				570	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				571	% %
				572	% %
				573	% %
				574	+ I s G l o b %
				575	% %
				576	% %
				577	% %
				578	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				579	%
				580	% IsGlob() returns MagickTrue if the path specification contains a globbing
				581	% pattern.
				582	%
				583	% The format of the IsGlob method is:
				584	%
				585	% MagickBooleanType IsGlob(const char *geometry)
				586	%
				587	% A description of each parameter follows:
				588	%
				589	% o path: the path.
				590	%
				591	*/
				592	MagickExport MagickBooleanType IsGlob(const char *path)
				593	{
				594	MagickBooleanType
				595	status;
				596
				597	if (IsPathAccessible(path) != MagickFalse)
				598	return(MagickFalse);
				599	status=(strchr(path,'') != (char ) NULL) \|\|
				600	(strchr(path,'?') != (char *) NULL) \|\|
				601	(strchr(path,'{') != (char *) NULL) \|\|
				602	(strchr(path,'}') != (char *) NULL) \|\|
				603	(strchr(path,'[') != (char *) NULL) \|\|
				604	(strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
				605	return(status);
				606	}
				607
				608	/*
				609	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				610	% %
				611	% %
				612	% %
				613	% T o k e n i z e r %
				614	% %
				615	% %
				616	% %
				617	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				618	%
				619	% Tokenizer() is a generalized, finite state token parser. It extracts tokens
				620	% one at a time from a string of characters. The characters used for white
				621	% space, for break characters, and for quotes can be specified. Also,
				622	% characters in the string can be preceded by a specifiable escape character
				623	% which removes any special meaning the character may have.
				624	%
				625	% Here is some terminology:
				626	%
				627	% o token: A single unit of information in the form of a group of
				628	% characters.
				629	%
				630	% o white space: Apace that gets ignored (except within quotes or when
				631	% escaped), like blanks and tabs. in addition, white space terminates a
				632	% non-quoted token.
				633	%
				634	% o break set: One or more characters that separates non-quoted tokens.
				635	% Commas are a common break character. The usage of break characters to
				636	% signal the end of a token is the same as that of white space, except
				637	% multiple break characters with nothing or only white space between
				638	% generate a null token for each two break characters together.
				639	%
				640	% For example, if blank is set to be the white space and comma is set to
				641	% be the break character, the line
				642	%
				643	% A, B, C , , DEF
				644	%
				645	% ... consists of 5 tokens:
				646	%
				647	% 1) "A"
				648	% 2) "B"
				649	% 3) "C"
				650	% 4) "" (the null string)
				651	% 5) "DEF"
				652	%
				653	% o Quote character: A character that, when surrounding a group of other
				654	% characters, causes the group of characters to be treated as a single
				655	% token, no matter how many white spaces or break characters exist in
				656	% the group. Also, a token always terminates after the closing quote.
				657	% For example, if ' is the quote character, blank is white space, and
				658	% comma is the break character, the following string
				659	%
				660	% A, ' B, CD'EF GHI
				661	%
				662	% ... consists of 4 tokens:
				663	%
				664	% 1) "A"
				665	% 2) " B, CD" (note the blanks & comma)
				666	% 3) "EF"
				667	% 4) "GHI"
				668	%
				669	% The quote characters themselves do not appear in the resultant
				670	% tokens. The double quotes are delimiters i use here for
				671	% documentation purposes only.
				672	%
				673	% o Escape character: A character which itself is ignored but which
				674	% causes the next character to be used as is. ^ and \ are often used
				675	% as escape characters. An escape in the last position of the string
				676	% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
				677	% and non-escape) character. For example, assume white space, break
				678	% character, and quote are the same as in the above examples, and
				679	% further, assume that ^ is the escape character. Then, in the string
				680	%
				681	% ABC, ' DEF ^' GH' I ^ J K^ L ^
				682	%
				683	% ... there are 7 tokens:
				684	%
				685	% 1) "ABC"
				686	% 2) " DEF ' GH"
				687	% 3) "I"
				688	% 4) " " (a lone blank)
				689	% 5) "J"
				690	% 6) "K L"
				691	% 7) "^" (passed as is at end of line)
				692	%
				693	% The format of the Tokenizer method is:
				694	%
				695	% int Tokenizer(TokenInfo token_info,const unsigned flag,char token,
				696	% const size_t max_token_length,const char line,const char white,
				697	% const char break_set,const char quote,const char escape,
				698	% char breaker,int next,char *quoted)
				699	%
				700	% A description of each parameter follows:
				701	%
				702	% o flag: right now, only the low order 3 bits are used.
				703	%
				704	% 1 => convert non-quoted tokens to upper case
				705	% 2 => convert non-quoted tokens to lower case
				706	% 0 => do not convert non-quoted tokens
				707	%
				708	% o token: a character string containing the returned next token
				709	%
				710	% o max_token_length: the maximum size of "token". Characters beyond
				711	% "max_token_length" are truncated.
				712	%
				713	% o string: the string to be parsed.
				714	%
				715	% o white: a string of the valid white spaces. example:
				716	%
				717	% char whitesp[]={" \t"};
				718	%
				719	% blank and tab will be valid white space.
				720	%
				721	% o break: a string of the valid break characters. example:
				722	%
				723	% char breakch[]={";,"};
				724	%
				725	% semicolon and comma will be valid break characters.
				726	%
				727	% o quote: a string of the valid quote characters. An example would be
				728	%
				729	% char whitesp[]={"'\"");
				730	%
				731	% (this causes single and double quotes to be valid) Note that a
				732	% token starting with one of these characters needs the same quote
				733	% character to terminate it.
				734	%
				735	% for example:
				736	%
				737	% "ABC '
				738	%
				739	% is unterminated, but
				740	%
				741	% "DEF" and 'GHI'
				742	%
				743	% are properly terminated. Note that different quote characters
				744	% can appear on the same line; only for a given token do the quote
				745	% characters have to be the same.
				746	%
				747	% o escape: the escape character (NOT a string ... only one
				748	% allowed). Use zero if none is desired.
				749	%
				750	% o breaker: the break character used to terminate the current
				751	% token. If the token was quoted, this will be the quote used. If
				752	% the token is the last one on the line, this will be zero.
				753	%
				754	% o next: this variable points to the first character of the
				755	% next token. it gets reset by "tokenizer" as it steps through the
				756	% string. Set it to 0 upon initialization, and leave it alone
				757	% after that. You can change it if you want to jump around in the
				758	% string or re-parse from the beginning, but be careful.
				759	%
				760	% o quoted: set to True if the token was quoted and MagickFalse
				761	% if not. You may need this information (for example: in C, a
				762	% string with quotes around it is a character string, while one
				763	% without is an identifier).
				764	%
				765	% o result: 0 if we haven't reached EOS (end of string), and 1
				766	% if we have.
				767	%
				768	*/
				769
				770	#define IN_WHITE 0
				771	#define IN_TOKEN 1
				772	#define IN_QUOTE 2
				773	#define IN_OZONE 3
				774
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	775	static ssize_t sindex(int c,const char *string)
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	776	{
				777	register const char
				778	*p;
				779
				780	for (p=string; *p != '\0'; p++)
				781	if (c == (int) (*p))
cristy	cee9711	2010-05-28 00:44:52 +0000	[diff] [blame]	782	return((ssize_t) (p-string));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	783	return(-1);
				784	}
				785
				786	static void StoreToken(TokenInfo token_info,char string,
				787	size_t max_token_length,int c)
				788	{
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	789	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	790	i;
				791
				792	if ((token_info->offset < 0) \|\|
				793	((size_t) token_info->offset >= (max_token_length-1)))
				794	return;
				795	i=token_info->offset++;
				796	string[i]=(char) c;
				797	if (token_info->state == IN_QUOTE)
				798	return;
				799	switch (token_info->flag & 0x03)
				800	{
				801	case 1:
				802	{
				803	string[i]=(char) toupper(c);
				804	break;
				805	}
				806	case 2:
				807	{
				808	string[i]=(char) tolower(c);
				809	break;
				810	}
				811	default:
				812	break;
				813	}
				814	}
				815
				816	MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
				817	char token,const size_t max_token_length,const char line,const char *white,
				818	const char break_set,const char quote,const char escape,char *breaker,
				819	int next,char quoted)
				820	{
				821	int
				822	c;
				823
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	824	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	825	i;
				826
				827	*breaker='\0';
				828	*quoted='\0';
				829	if (line[*next] == '\0')
				830	return(1);
				831	token_info->state=IN_WHITE;
				832	token_info->quote=(char) MagickFalse;
				833	token_info->flag=flag;
				834	for (token_info->offset=0; (int) line[next] != 0; (next)++)
				835	{
				836	c=(int) line[*next];
				837	i=sindex(c,break_set);
				838	if (i >= 0)
				839	{
				840	switch (token_info->state)
				841	{
				842	case IN_WHITE:
				843	case IN_TOKEN:
				844	case IN_OZONE:
				845	{
				846	(*next)++;
				847	*breaker=break_set[i];
				848	token[token_info->offset]='\0';
				849	return(0);
				850	}
				851	case IN_QUOTE:
				852	{
				853	StoreToken(token_info,token,max_token_length,c);
				854	break;
				855	}
				856	}
				857	continue;
				858	}
				859	i=sindex(c,quote);
				860	if (i >= 0)
				861	{
				862	switch (token_info->state)
				863	{
				864	case IN_WHITE:
				865	{
				866	token_info->state=IN_QUOTE;
				867	token_info->quote=quote[i];
				868	*quoted=(char) MagickTrue;
				869	break;
				870	}
				871	case IN_QUOTE:
				872	{
				873	if (quote[i] != token_info->quote)
				874	StoreToken(token_info,token,max_token_length,c);
				875	else
				876	{
				877	token_info->state=IN_OZONE;
				878	token_info->quote='\0';
				879	}
				880	break;
				881	}
				882	case IN_TOKEN:
				883	case IN_OZONE:
				884	{
				885	*breaker=(char) c;
				886	token[token_info->offset]='\0';
				887	return(0);
				888	}
				889	}
				890	continue;
				891	}
				892	i=sindex(c,white);
				893	if (i >= 0)
				894	{
				895	switch (token_info->state)
				896	{
				897	case IN_WHITE:
				898	case IN_OZONE:
				899	break;
				900	case IN_TOKEN:
				901	{
				902	token_info->state=IN_OZONE;
				903	break;
				904	}
				905	case IN_QUOTE:
				906	{
				907	StoreToken(token_info,token,max_token_length,c);
				908	break;
				909	}
				910	}
				911	continue;
				912	}
				913	if (c == (int) escape)
				914	{
				915	if (line[(*next)+1] == '\0')
				916	{
				917	*breaker='\0';
				918	StoreToken(token_info,token,max_token_length,c);
				919	(*next)++;
				920	token[token_info->offset]='\0';
				921	return(0);
				922	}
				923	switch (token_info->state)
				924	{
				925	case IN_WHITE:
				926	{
				927	(*next)--;
				928	token_info->state=IN_TOKEN;
				929	break;
				930	}
				931	case IN_TOKEN:
				932	case IN_QUOTE:
				933	{
				934	(*next)++;
				935	c=(int) line[*next];
				936	StoreToken(token_info,token,max_token_length,c);
				937	break;
				938	}
				939	case IN_OZONE:
				940	{
				941	token[token_info->offset]='\0';
				942	return(0);
				943	}
				944	}
				945	continue;
				946	}
				947	switch (token_info->state)
				948	{
				949	case IN_WHITE:
				950	token_info->state=IN_TOKEN;
				951	case IN_TOKEN:
				952	case IN_QUOTE:
				953	{
				954	StoreToken(token_info,token,max_token_length,c);
				955	break;
				956	}
				957	case IN_OZONE:
				958	{
				959	token[token_info->offset]='\0';
				960	return(0);
				961	}
				962	}
				963	}
				964	token[token_info->offset]='\0';
				965	return(0);
				966	}