Blame - magick/token.c - platform/external/ImageMagick

blob: 79774a3a3c310826210dc2763b48fdb025f38c0f [file] [log] [blame]

cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	1	/*
				2	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				3	% %
				4	% %
				5	% %
				6	% TTTTT OOO K K EEEEE N N %
				7	% T O O K K E NN N %
				8	% T O O KKK EEE N N N %
				9	% T O O K K E N NN %
				10	% T OOO K K EEEEE N N %
				11	% %
				12	% %
				13	% MagickCore Token Methods %
				14	% %
				15	% Software Design %
				16	% John Cristy %
				17	% January 1993 %
				18	% %
				19	% %
cristy	16af1cb	2009-12-11 21:38:29 +0000	[diff] [blame]	20	% Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization %
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	21	% dedicated to making software imaging solutions freely available. %
				22	% %
				23	% You may not use this file except in compliance with the License. You may %
				24	% obtain a copy of the License at %
				25	% %
				26	% http://www.imagemagick.org/script/license.php %
				27	% %
				28	% Unless required by applicable law or agreed to in writing, software %
				29	% distributed under the License is distributed on an "AS IS" BASIS, %
				30	% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
				31	% See the License for the specific language governing permissions and %
				32	% limitations under the License. %
				33	% %
				34	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				35	%
				36	%
				37	%
				38	*/
				39
				40	/*
				41	Include declarations.
				42	*/
				43	#include "magick/studio.h"
				44	#include "magick/exception.h"
				45	#include "magick/exception-private.h"
				46	#include "magick/image.h"
				47	#include "magick/memory_.h"
				48	#include "magick/string_.h"
				49	#include "magick/token.h"
				50	#include "magick/token-private.h"
				51	#include "magick/utility.h"
				52
				53	/*
				54	Typedef declaractions.
				55	*/
				56	struct _TokenInfo
				57	{
				58	int
				59	state;
				60
				61	MagickStatusType
				62	flag;
				63
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	64	ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	65	offset;
				66
				67	char
				68	quote;
				69
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	70	size_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	71	signature;
				72	};
				73
				74	/*
				75	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				76	% %
				77	% %
				78	% %
				79	% A c q u i r e T o k e n I n f o %
				80	% %
				81	% %
				82	% %
				83	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				84	%
				85	% AcquireTokenInfo() allocates the TokenInfo structure.
				86	%
				87	% The format of the AcquireTokenInfo method is:
				88	%
				89	% TokenInfo *AcquireTokenInfo()
				90	%
				91	*/
				92	MagickExport TokenInfo *AcquireTokenInfo(void)
				93	{
				94	TokenInfo
				95	*token_info;
				96
cristy	9082321	2009-12-12 20:48:33 +0000	[diff] [blame]	97	token_info=(TokenInfo ) AcquireAlignedMemory(1,sizeof(token_info));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	98	if (token_info == (TokenInfo *) NULL)
				99	ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
				100	token_info->signature=MagickSignature;
				101	return(token_info);
				102	}
				103
				104	/*
				105	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				106	% %
				107	% %
				108	% %
				109	% D e s t r o y T o k e n I n f o %
				110	% %
				111	% %
				112	% %
				113	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				114	%
				115	% DestroyTokenInfo() deallocates memory associated with an TokenInfo
				116	% structure.
				117	%
				118	% The format of the DestroyTokenInfo method is:
				119	%
				120	% TokenInfo DestroyTokenInfo(TokenInfo token_info)
				121	%
				122	% A description of each parameter follows:
				123	%
				124	% o token_info: Specifies a pointer to an TokenInfo structure.
				125	%
				126	*/
				127	MagickExport TokenInfo DestroyTokenInfo(TokenInfo token_info)
				128	{
				129	(void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
				130	assert(token_info != (TokenInfo *) NULL);
				131	assert(token_info->signature == MagickSignature);
				132	token_info->signature=(~MagickSignature);
				133	token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
				134	return(token_info);
				135	}
				136
				137	/*
				138	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				139	% %
				140	% %
				141	% %
				142	+ G e t M a g i c k T o k e n %
				143	% %
				144	% %
				145	% %
				146	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				147	%
				148	% GetMagickToken() gets a token from the token stream. A token is defined as a
				149	% sequence of characters delimited by whitespace (e.g. clip-path), a sequence
				150	% delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	151	% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
				152	% separator characters: ':', '=', ',', and ';'.
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	153	%
				154	% The format of the GetMagickToken method is:
				155	%
				156	% void GetMagickToken(const char start,const char end,char token)
				157	%
				158	% A description of each parameter follows:
				159	%
				160	% o start: the start of the token sequence.
				161	%
				162	% o end: point to the end of the token sequence.
				163	%
				164	% o token: copy the token to this buffer.
				165	%
				166	*/
				167	MagickExport void GetMagickToken(const char start,const char end,char token)
				168	{
				169	double
				170	value;
				171
				172	register const char
				173	*p;
				174
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	175	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	176	i;
				177
				178	i=0;
				179	for (p=start; *p != '\0'; )
				180	{
				181	while ((isspace((int) ((unsigned char) p)) != 0) && (p != '\0'))
				182	p++;
				183	if (*p == '\0')
				184	break;
				185	switch (*p)
				186	{
				187	case '"':
				188	case '\'':
				189	case '`':
				190	case '{':
				191	{
				192	register char
				193	escape;
				194
				195	switch (*p)
				196	{
				197	case '"': escape='"'; break;
				198	case '\'': escape='\''; break;
				199	case '`': escape='\''; break;
				200	case '{': escape='}'; break;
				201	default: escape=(*p); break;
				202	}
				203	for (p++; *p != '\0'; p++)
				204	{
				205	if ((p == '\\') && (((p+1) == escape) \|\| (*(p+1) == '\\')))
				206	p++;
				207	else
				208	if (*p == escape)
				209	{
				210	p++;
				211	break;
				212	}
				213	token[i++]=(*p);
				214	}
				215	break;
				216	}
				217	case '/':
				218	{
				219	token[i++]=(*p++);
				220	if ((p == '>') \|\| (p == '/'))
				221	token[i++]=(*p++);
				222	break;
				223	}
				224	default:
				225	{
				226	char
				227	*q;
				228
				229	value=strtod(p,&q);
				230	if ((p != q) && (*p != ','))
				231	{
				232	for ( ; (p < q) && (*p != ','); p++)
				233	token[i++]=(*p);
				234	if (*p == '%')
				235	token[i++]=(*p++);
				236	break;
				237	}
				238	if ((isalpha((int) ((unsigned char) *p)) == 0) &&
				239	(p != DirectorySeparator) && (p != '#') && (p != '<'))
				240	{
				241	token[i++]=(*p++);
				242	break;
				243	}
				244	for ( ; *p != '\0'; p++)
				245	{
				246	if (((isspace((int) ((unsigned char) p)) != 0) \|\| (p == '=') \|\|
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	247	(p == ',') \|\| (p == ':') \|\| (p == ';')) && ((p-1) != '\\'))
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	248	break;
				249	if ((i > 0) && (*p == '<'))
				250	break;
				251	token[i++]=(*p);
				252	if (*p == '>')
				253	break;
				254	if (*p == '(')
				255	for (p++; *p != '\0'; p++)
				256	{
				257	token[i++]=(*p);
				258	if ((p == ')') && ((p-1) != '\\'))
				259	break;
				260	}
				261	}
				262	break;
				263	}
				264	}
				265	break;
				266	}
				267	token[i]='\0';
				268	if (LocaleNCompare(token,"url(",4) == 0)
				269	{
				270	ssize_t
				271	offset;
				272
				273	offset=4;
				274	if (token[offset] == '#')
				275	offset++;
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	276	i=(ssize_t) strlen(token);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	277	(void) CopyMagickString(token,token+offset,MaxTextExtent);
				278	token[i-offset-1]='\0';
				279	}
				280	while (isspace((int) ((unsigned char) *p)) != 0)
				281	p++;
				282	if (end != (const char **) NULL)
				283	end=(const char ) p;
				284	}
				285
				286	/*
				287	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				288	% %
				289	% %
				290	% %
				291	% G l o b E x p r e s s i o n %
				292	% %
				293	% %
				294	% %
				295	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				296	%
				297	% GlobExpression() returns MagickTrue if the expression matches the pattern.
				298	%
				299	% The format of the GlobExpression function is:
				300	%
				301	% MagickBooleanType GlobExpression(const char *expression,
				302	% const char *pattern,const MagickBooleanType case_insensitive)
				303	%
				304	% A description of each parameter follows:
				305	%
				306	% o expression: Specifies a pointer to a text string containing a file name.
				307	%
				308	% o pattern: Specifies a pointer to a text string containing a pattern.
				309	%
				310	% o case_insensitive: set to MagickTrue to ignore the case when matching
				311	% an expression.
				312	%
				313	*/
				314	MagickExport MagickBooleanType GlobExpression(const char *expression,
				315	const char *pattern,const MagickBooleanType case_insensitive)
				316	{
				317	MagickBooleanType
				318	done,
				319	match;
				320
				321	register const char
				322	*p;
				323
				324	/*
				325	Return on empty pattern or '*'.
				326	*/
				327	if (pattern == (char *) NULL)
				328	return(MagickTrue);
				329	if (GetUTFCode(pattern) == 0)
				330	return(MagickTrue);
				331	if (LocaleCompare(pattern,"*") == 0)
				332	return(MagickTrue);
				333	p=pattern+strlen(pattern)-1;
				334	if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
				335	{
				336	ExceptionInfo
				337	*exception;
				338
				339	ImageInfo
				340	*image_info;
				341
				342	/*
				343	Determine if pattern is a scene, i.e. img0001.pcd[2].
				344	*/
				345	image_info=AcquireImageInfo();
				346	(void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
				347	exception=AcquireExceptionInfo();
cristy	d965a42	2010-03-03 17:47:35 +0000	[diff] [blame]	348	(void) SetImageInfo(image_info,0,exception);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	349	exception=DestroyExceptionInfo(exception);
				350	if (LocaleCompare(image_info->filename,pattern) != 0)
				351	{
				352	image_info=DestroyImageInfo(image_info);
				353	return(MagickFalse);
				354	}
				355	image_info=DestroyImageInfo(image_info);
				356	}
				357	/*
				358	Evaluate glob expression.
				359	*/
				360	done=MagickFalse;
				361	while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
				362	{
				363	if (GetUTFCode(expression) == 0)
				364	if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
				365	break;
				366	switch (GetUTFCode(pattern))
				367	{
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	368	case '*':
				369	{
				370	MagickBooleanType
				371	status;
				372
				373	status=MagickFalse;
				374	pattern+=GetUTFOctets(pattern);
				375	while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
				376	{
				377	status=GlobExpression(expression,pattern,case_insensitive);
				378	expression+=GetUTFOctets(expression);
				379	}
				380	if (status != MagickFalse)
				381	{
				382	while (GetUTFCode(expression) != 0)
				383	expression+=GetUTFOctets(expression);
				384	while (GetUTFCode(pattern) != 0)
				385	pattern+=GetUTFOctets(pattern);
				386	}
				387	break;
				388	}
				389	case '[':
				390	{
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	391	ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	392	c;
				393
				394	pattern+=GetUTFOctets(pattern);
				395	for ( ; ; )
				396	{
				397	if ((GetUTFCode(pattern) == 0) \|\| (GetUTFCode(pattern) == ']'))
				398	{
				399	done=MagickTrue;
				400	break;
				401	}
				402	if (GetUTFCode(pattern) == '\\')
				403	{
				404	pattern+=GetUTFOctets(pattern);
				405	if (GetUTFCode(pattern) == 0)
				406	{
				407	done=MagickTrue;
				408	break;
				409	}
				410	}
				411	if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
				412	{
				413	c=GetUTFCode(pattern);
				414	pattern+=GetUTFOctets(pattern);
				415	pattern+=GetUTFOctets(pattern);
				416	if (GetUTFCode(pattern) == ']')
				417	{
				418	done=MagickTrue;
				419	break;
				420	}
				421	if (GetUTFCode(pattern) == '\\')
				422	{
				423	pattern+=GetUTFOctets(pattern);
				424	if (GetUTFCode(pattern) == 0)
				425	{
				426	done=MagickTrue;
				427	break;
				428	}
				429	}
				430	if ((GetUTFCode(expression) < c) \|\|
				431	(GetUTFCode(expression) > GetUTFCode(pattern)))
				432	{
				433	pattern+=GetUTFOctets(pattern);
				434	continue;
				435	}
				436	}
				437	else
				438	if (GetUTFCode(pattern) != GetUTFCode(expression))
				439	{
				440	pattern+=GetUTFOctets(pattern);
				441	continue;
				442	}
				443	pattern+=GetUTFOctets(pattern);
				444	while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
				445	{
				446	if ((GetUTFCode(pattern) == '\\') &&
				447	(GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
				448	pattern+=GetUTFOctets(pattern);
				449	pattern+=GetUTFOctets(pattern);
				450	}
				451	if (GetUTFCode(pattern) != 0)
				452	{
				453	pattern+=GetUTFOctets(pattern);
				454	expression+=GetUTFOctets(expression);
				455	}
				456	break;
				457	}
				458	break;
				459	}
				460	case '?':
				461	{
				462	pattern+=GetUTFOctets(pattern);
				463	expression+=GetUTFOctets(expression);
				464	break;
				465	}
				466	case '{':
				467	{
				468	register const char
				469	*p;
				470
				471	pattern+=GetUTFOctets(pattern);
				472	while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
				473	{
				474	p=expression;
				475	match=MagickTrue;
				476	while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
				477	(GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
				478	(match != MagickFalse))
				479	{
				480	if (GetUTFCode(pattern) == '\\')
				481	pattern+=GetUTFOctets(pattern);
				482	match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
				483	MagickFalse;
				484	p+=GetUTFOctets(p);
				485	pattern+=GetUTFOctets(pattern);
				486	}
				487	if (GetUTFCode(pattern) == 0)
				488	{
				489	match=MagickFalse;
				490	done=MagickTrue;
				491	break;
				492	}
				493	else
				494	if (match != MagickFalse)
				495	{
				496	expression=p;
				497	while ((GetUTFCode(pattern) != '}') &&
				498	(GetUTFCode(pattern) != 0))
				499	{
				500	pattern+=GetUTFOctets(pattern);
				501	if (GetUTFCode(pattern) == '\\')
				502	{
				503	pattern+=GetUTFOctets(pattern);
				504	if (GetUTFCode(pattern) == '}')
				505	pattern+=GetUTFOctets(pattern);
				506	}
				507	}
				508	}
				509	else
				510	{
				511	while ((GetUTFCode(pattern) != '}') &&
				512	(GetUTFCode(pattern) != ',') &&
				513	(GetUTFCode(pattern) != 0))
				514	{
				515	pattern+=GetUTFOctets(pattern);
				516	if (GetUTFCode(pattern) == '\\')
				517	{
				518	pattern+=GetUTFOctets(pattern);
				519	if ((GetUTFCode(pattern) == '}') \|\|
				520	(GetUTFCode(pattern) == ','))
				521	pattern+=GetUTFOctets(pattern);
				522	}
				523	}
				524	}
				525	if (GetUTFCode(pattern) != 0)
				526	pattern+=GetUTFOctets(pattern);
				527	}
				528	break;
				529	}
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	530	case '\\':
				531	{
				532	pattern+=GetUTFOctets(pattern);
cristy	4705fe8	2010-04-23 16:20:03 +0000	[diff] [blame]	533	if (GetUTFCode(pattern) == 0)
				534	break;
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	535	}
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	536	default:
				537	{
				538	if (case_insensitive != MagickFalse)
				539	{
				540	if (tolower((int) GetUTFCode(expression)) !=
				541	tolower((int) GetUTFCode(pattern)))
				542	{
				543	done=MagickTrue;
				544	break;
				545	}
				546	}
				547	else
				548	if (GetUTFCode(expression) != GetUTFCode(pattern))
				549	{
				550	done=MagickTrue;
				551	break;
				552	}
				553	expression+=GetUTFOctets(expression);
				554	pattern+=GetUTFOctets(pattern);
				555	}
				556	}
				557	}
				558	while (GetUTFCode(pattern) == '*')
				559	pattern+=GetUTFOctets(pattern);
				560	match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
				561	MagickTrue : MagickFalse;
				562	return(match);
				563	}
				564
				565	/*
				566	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				567	% %
				568	% %
				569	% %
				570	+ I s G l o b %
				571	% %
				572	% %
				573	% %
				574	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				575	%
				576	% IsGlob() returns MagickTrue if the path specification contains a globbing
				577	% pattern.
				578	%
				579	% The format of the IsGlob method is:
				580	%
				581	% MagickBooleanType IsGlob(const char *geometry)
				582	%
				583	% A description of each parameter follows:
				584	%
				585	% o path: the path.
				586	%
				587	*/
				588	MagickExport MagickBooleanType IsGlob(const char *path)
				589	{
				590	MagickBooleanType
				591	status;
				592
				593	if (IsPathAccessible(path) != MagickFalse)
				594	return(MagickFalse);
				595	status=(strchr(path,'') != (char ) NULL) \|\|
				596	(strchr(path,'?') != (char *) NULL) \|\|
				597	(strchr(path,'{') != (char *) NULL) \|\|
				598	(strchr(path,'}') != (char *) NULL) \|\|
				599	(strchr(path,'[') != (char *) NULL) \|\|
				600	(strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
				601	return(status);
				602	}
				603
				604	/*
				605	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				606	% %
				607	% %
				608	% %
				609	% T o k e n i z e r %
				610	% %
				611	% %
				612	% %
				613	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				614	%
				615	% Tokenizer() is a generalized, finite state token parser. It extracts tokens
				616	% one at a time from a string of characters. The characters used for white
				617	% space, for break characters, and for quotes can be specified. Also,
				618	% characters in the string can be preceded by a specifiable escape character
				619	% which removes any special meaning the character may have.
				620	%
				621	% Here is some terminology:
				622	%
				623	% o token: A single unit of information in the form of a group of
				624	% characters.
				625	%
				626	% o white space: Apace that gets ignored (except within quotes or when
				627	% escaped), like blanks and tabs. in addition, white space terminates a
				628	% non-quoted token.
				629	%
				630	% o break set: One or more characters that separates non-quoted tokens.
				631	% Commas are a common break character. The usage of break characters to
				632	% signal the end of a token is the same as that of white space, except
				633	% multiple break characters with nothing or only white space between
				634	% generate a null token for each two break characters together.
				635	%
				636	% For example, if blank is set to be the white space and comma is set to
				637	% be the break character, the line
				638	%
				639	% A, B, C , , DEF
				640	%
				641	% ... consists of 5 tokens:
				642	%
				643	% 1) "A"
				644	% 2) "B"
				645	% 3) "C"
				646	% 4) "" (the null string)
				647	% 5) "DEF"
				648	%
				649	% o Quote character: A character that, when surrounding a group of other
				650	% characters, causes the group of characters to be treated as a single
				651	% token, no matter how many white spaces or break characters exist in
				652	% the group. Also, a token always terminates after the closing quote.
				653	% For example, if ' is the quote character, blank is white space, and
				654	% comma is the break character, the following string
				655	%
				656	% A, ' B, CD'EF GHI
				657	%
				658	% ... consists of 4 tokens:
				659	%
				660	% 1) "A"
				661	% 2) " B, CD" (note the blanks & comma)
				662	% 3) "EF"
				663	% 4) "GHI"
				664	%
				665	% The quote characters themselves do not appear in the resultant
				666	% tokens. The double quotes are delimiters i use here for
				667	% documentation purposes only.
				668	%
				669	% o Escape character: A character which itself is ignored but which
				670	% causes the next character to be used as is. ^ and \ are often used
				671	% as escape characters. An escape in the last position of the string
				672	% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
				673	% and non-escape) character. For example, assume white space, break
				674	% character, and quote are the same as in the above examples, and
				675	% further, assume that ^ is the escape character. Then, in the string
				676	%
				677	% ABC, ' DEF ^' GH' I ^ J K^ L ^
				678	%
				679	% ... there are 7 tokens:
				680	%
				681	% 1) "ABC"
				682	% 2) " DEF ' GH"
				683	% 3) "I"
				684	% 4) " " (a lone blank)
				685	% 5) "J"
				686	% 6) "K L"
				687	% 7) "^" (passed as is at end of line)
				688	%
				689	% The format of the Tokenizer method is:
				690	%
				691	% int Tokenizer(TokenInfo token_info,const unsigned flag,char token,
				692	% const size_t max_token_length,const char line,const char white,
				693	% const char break_set,const char quote,const char escape,
				694	% char breaker,int next,char *quoted)
				695	%
				696	% A description of each parameter follows:
				697	%
				698	% o flag: right now, only the low order 3 bits are used.
				699	%
				700	% 1 => convert non-quoted tokens to upper case
				701	% 2 => convert non-quoted tokens to lower case
				702	% 0 => do not convert non-quoted tokens
				703	%
				704	% o token: a character string containing the returned next token
				705	%
				706	% o max_token_length: the maximum size of "token". Characters beyond
				707	% "max_token_length" are truncated.
				708	%
				709	% o string: the string to be parsed.
				710	%
				711	% o white: a string of the valid white spaces. example:
				712	%
				713	% char whitesp[]={" \t"};
				714	%
				715	% blank and tab will be valid white space.
				716	%
				717	% o break: a string of the valid break characters. example:
				718	%
				719	% char breakch[]={";,"};
				720	%
				721	% semicolon and comma will be valid break characters.
				722	%
				723	% o quote: a string of the valid quote characters. An example would be
				724	%
				725	% char whitesp[]={"'\"");
				726	%
				727	% (this causes single and double quotes to be valid) Note that a
				728	% token starting with one of these characters needs the same quote
				729	% character to terminate it.
				730	%
				731	% for example:
				732	%
				733	% "ABC '
				734	%
				735	% is unterminated, but
				736	%
				737	% "DEF" and 'GHI'
				738	%
				739	% are properly terminated. Note that different quote characters
				740	% can appear on the same line; only for a given token do the quote
				741	% characters have to be the same.
				742	%
				743	% o escape: the escape character (NOT a string ... only one
				744	% allowed). Use zero if none is desired.
				745	%
				746	% o breaker: the break character used to terminate the current
				747	% token. If the token was quoted, this will be the quote used. If
				748	% the token is the last one on the line, this will be zero.
				749	%
				750	% o next: this variable points to the first character of the
				751	% next token. it gets reset by "tokenizer" as it steps through the
				752	% string. Set it to 0 upon initialization, and leave it alone
				753	% after that. You can change it if you want to jump around in the
				754	% string or re-parse from the beginning, but be careful.
				755	%
				756	% o quoted: set to True if the token was quoted and MagickFalse
				757	% if not. You may need this information (for example: in C, a
				758	% string with quotes around it is a character string, while one
				759	% without is an identifier).
				760	%
				761	% o result: 0 if we haven't reached EOS (end of string), and 1
				762	% if we have.
				763	%
				764	*/
				765
				766	#define IN_WHITE 0
				767	#define IN_TOKEN 1
				768	#define IN_QUOTE 2
				769	#define IN_OZONE 3
				770
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	771	static ssize_t sindex(int c,const char *string)
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	772	{
				773	register const char
				774	*p;
				775
				776	for (p=string; *p != '\0'; p++)
				777	if (c == (int) (*p))
cristy	cee9711	2010-05-28 00:44:52 +0000	[diff] [blame^]	778	return((ssize_t) (p-string));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	779	return(-1);
				780	}
				781
				782	static void StoreToken(TokenInfo token_info,char string,
				783	size_t max_token_length,int c)
				784	{
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	785	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	786	i;
				787
				788	if ((token_info->offset < 0) \|\|
				789	((size_t) token_info->offset >= (max_token_length-1)))
				790	return;
				791	i=token_info->offset++;
				792	string[i]=(char) c;
				793	if (token_info->state == IN_QUOTE)
				794	return;
				795	switch (token_info->flag & 0x03)
				796	{
				797	case 1:
				798	{
				799	string[i]=(char) toupper(c);
				800	break;
				801	}
				802	case 2:
				803	{
				804	string[i]=(char) tolower(c);
				805	break;
				806	}
				807	default:
				808	break;
				809	}
				810	}
				811
				812	MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
				813	char token,const size_t max_token_length,const char line,const char *white,
				814	const char break_set,const char quote,const char escape,char *breaker,
				815	int next,char quoted)
				816	{
				817	int
				818	c;
				819
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	820	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	821	i;
				822
				823	*breaker='\0';
				824	*quoted='\0';
				825	if (line[*next] == '\0')
				826	return(1);
				827	token_info->state=IN_WHITE;
				828	token_info->quote=(char) MagickFalse;
				829	token_info->flag=flag;
				830	for (token_info->offset=0; (int) line[next] != 0; (next)++)
				831	{
				832	c=(int) line[*next];
				833	i=sindex(c,break_set);
				834	if (i >= 0)
				835	{
				836	switch (token_info->state)
				837	{
				838	case IN_WHITE:
				839	case IN_TOKEN:
				840	case IN_OZONE:
				841	{
				842	(*next)++;
				843	*breaker=break_set[i];
				844	token[token_info->offset]='\0';
				845	return(0);
				846	}
				847	case IN_QUOTE:
				848	{
				849	StoreToken(token_info,token,max_token_length,c);
				850	break;
				851	}
				852	}
				853	continue;
				854	}
				855	i=sindex(c,quote);
				856	if (i >= 0)
				857	{
				858	switch (token_info->state)
				859	{
				860	case IN_WHITE:
				861	{
				862	token_info->state=IN_QUOTE;
				863	token_info->quote=quote[i];
				864	*quoted=(char) MagickTrue;
				865	break;
				866	}
				867	case IN_QUOTE:
				868	{
				869	if (quote[i] != token_info->quote)
				870	StoreToken(token_info,token,max_token_length,c);
				871	else
				872	{
				873	token_info->state=IN_OZONE;
				874	token_info->quote='\0';
				875	}
				876	break;
				877	}
				878	case IN_TOKEN:
				879	case IN_OZONE:
				880	{
				881	*breaker=(char) c;
				882	token[token_info->offset]='\0';
				883	return(0);
				884	}
				885	}
				886	continue;
				887	}
				888	i=sindex(c,white);
				889	if (i >= 0)
				890	{
				891	switch (token_info->state)
				892	{
				893	case IN_WHITE:
				894	case IN_OZONE:
				895	break;
				896	case IN_TOKEN:
				897	{
				898	token_info->state=IN_OZONE;
				899	break;
				900	}
				901	case IN_QUOTE:
				902	{
				903	StoreToken(token_info,token,max_token_length,c);
				904	break;
				905	}
				906	}
				907	continue;
				908	}
				909	if (c == (int) escape)
				910	{
				911	if (line[(*next)+1] == '\0')
				912	{
				913	*breaker='\0';
				914	StoreToken(token_info,token,max_token_length,c);
				915	(*next)++;
				916	token[token_info->offset]='\0';
				917	return(0);
				918	}
				919	switch (token_info->state)
				920	{
				921	case IN_WHITE:
				922	{
				923	(*next)--;
				924	token_info->state=IN_TOKEN;
				925	break;
				926	}
				927	case IN_TOKEN:
				928	case IN_QUOTE:
				929	{
				930	(*next)++;
				931	c=(int) line[*next];
				932	StoreToken(token_info,token,max_token_length,c);
				933	break;
				934	}
				935	case IN_OZONE:
				936	{
				937	token[token_info->offset]='\0';
				938	return(0);
				939	}
				940	}
				941	continue;
				942	}
				943	switch (token_info->state)
				944	{
				945	case IN_WHITE:
				946	token_info->state=IN_TOKEN;
				947	case IN_TOKEN:
				948	case IN_QUOTE:
				949	{
				950	StoreToken(token_info,token,max_token_length,c);
				951	break;
				952	}
				953	case IN_OZONE:
				954	{
				955	token[token_info->offset]='\0';
				956	return(0);
				957	}
				958	}
				959	}
				960	token[token_info->offset]='\0';
				961	return(0);
				962	}