Blame - magick/token.c - platform/external/ImageMagick

blob: 517b848da45312a9628e0869253448bec81a507c [file] [log] [blame]

cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	1	/*
				2	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				3	% %
				4	% %
				5	% %
				6	% TTTTT OOO K K EEEEE N N %
				7	% T O O K K E NN N %
				8	% T O O KKK EEE N N N %
				9	% T O O K K E N NN %
				10	% T OOO K K EEEEE N N %
				11	% %
				12	% %
				13	% MagickCore Token Methods %
				14	% %
				15	% Software Design %
				16	% John Cristy %
				17	% January 1993 %
				18	% %
				19	% %
cristy	7e41fe8	2010-12-04 23:12:08 +0000	[diff] [blame]	20	% Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization %
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	21	% dedicated to making software imaging solutions freely available. %
				22	% %
				23	% You may not use this file except in compliance with the License. You may %
				24	% obtain a copy of the License at %
				25	% %
				26	% http://www.imagemagick.org/script/license.php %
				27	% %
				28	% Unless required by applicable law or agreed to in writing, software %
				29	% distributed under the License is distributed on an "AS IS" BASIS, %
				30	% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
				31	% See the License for the specific language governing permissions and %
				32	% limitations under the License. %
				33	% %
				34	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				35	%
				36	%
				37	%
				38	*/
				39
				40	/*
				41	Include declarations.
				42	*/
				43	#include "magick/studio.h"
				44	#include "magick/exception.h"
				45	#include "magick/exception-private.h"
				46	#include "magick/image.h"
				47	#include "magick/memory_.h"
				48	#include "magick/string_.h"
				49	#include "magick/token.h"
				50	#include "magick/token-private.h"
				51	#include "magick/utility.h"
				52
				53	/*
				54	Typedef declaractions.
				55	*/
				56	struct _TokenInfo
				57	{
				58	int
				59	state;
				60
				61	MagickStatusType
				62	flag;
				63
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	64	ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	65	offset;
				66
				67	char
				68	quote;
				69
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	70	size_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	71	signature;
				72	};
				73
				74	/*
				75	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				76	% %
				77	% %
				78	% %
				79	% A c q u i r e T o k e n I n f o %
				80	% %
				81	% %
				82	% %
				83	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				84	%
				85	% AcquireTokenInfo() allocates the TokenInfo structure.
				86	%
				87	% The format of the AcquireTokenInfo method is:
				88	%
				89	% TokenInfo *AcquireTokenInfo()
				90	%
				91	*/
				92	MagickExport TokenInfo *AcquireTokenInfo(void)
				93	{
				94	TokenInfo
				95	*token_info;
				96
cristy	73bd4a5	2010-10-05 11:24:23 +0000	[diff] [blame]	97	token_info=(TokenInfo ) AcquireMagickMemory(sizeof(token_info));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	98	if (token_info == (TokenInfo *) NULL)
				99	ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
				100	token_info->signature=MagickSignature;
				101	return(token_info);
				102	}
				103
				104	/*
				105	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				106	% %
				107	% %
				108	% %
				109	% D e s t r o y T o k e n I n f o %
				110	% %
				111	% %
				112	% %
				113	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				114	%
				115	% DestroyTokenInfo() deallocates memory associated with an TokenInfo
				116	% structure.
				117	%
				118	% The format of the DestroyTokenInfo method is:
				119	%
				120	% TokenInfo DestroyTokenInfo(TokenInfo token_info)
				121	%
				122	% A description of each parameter follows:
				123	%
				124	% o token_info: Specifies a pointer to an TokenInfo structure.
				125	%
				126	*/
				127	MagickExport TokenInfo DestroyTokenInfo(TokenInfo token_info)
				128	{
				129	(void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
				130	assert(token_info != (TokenInfo *) NULL);
				131	assert(token_info->signature == MagickSignature);
				132	token_info->signature=(~MagickSignature);
				133	token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
				134	return(token_info);
				135	}
				136
				137	/*
				138	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				139	% %
				140	% %
				141	% %
				142	+ G e t M a g i c k T o k e n %
				143	% %
				144	% %
				145	% %
				146	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				147	%
				148	% GetMagickToken() gets a token from the token stream. A token is defined as a
				149	% sequence of characters delimited by whitespace (e.g. clip-path), a sequence
				150	% delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	151	% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
				152	% separator characters: ':', '=', ',', and ';'.
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	153	%
				154	% The format of the GetMagickToken method is:
				155	%
				156	% void GetMagickToken(const char start,const char end,char token)
				157	%
				158	% A description of each parameter follows:
				159	%
				160	% o start: the start of the token sequence.
				161	%
				162	% o end: point to the end of the token sequence.
				163	%
				164	% o token: copy the token to this buffer.
				165	%
				166	*/
				167	MagickExport void GetMagickToken(const char start,const char end,char token)
				168	{
				169	double
				170	value;
				171
				172	register const char
				173	*p;
				174
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	175	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	176	i;
				177
				178	i=0;
				179	for (p=start; *p != '\0'; )
				180	{
				181	while ((isspace((int) ((unsigned char) p)) != 0) && (p != '\0'))
				182	p++;
				183	if (*p == '\0')
				184	break;
				185	switch (*p)
				186	{
				187	case '"':
				188	case '\'':
				189	case '`':
				190	case '{':
				191	{
				192	register char
				193	escape;
				194
				195	switch (*p)
				196	{
				197	case '"': escape='"'; break;
				198	case '\'': escape='\''; break;
				199	case '`': escape='\''; break;
				200	case '{': escape='}'; break;
				201	default: escape=(*p); break;
				202	}
				203	for (p++; *p != '\0'; p++)
				204	{
				205	if ((p == '\\') && (((p+1) == escape) \|\| (*(p+1) == '\\')))
				206	p++;
				207	else
				208	if (*p == escape)
				209	{
				210	p++;
				211	break;
				212	}
				213	token[i++]=(*p);
				214	}
				215	break;
				216	}
				217	case '/':
				218	{
				219	token[i++]=(*p++);
				220	if ((p == '>') \|\| (p == '/'))
				221	token[i++]=(*p++);
				222	break;
				223	}
				224	default:
				225	{
				226	char
				227	*q;
				228
				229	value=strtod(p,&q);
cristy	da16f16	2011-02-19 23:52:17 +0000	[diff] [blame]	230	(void) value;
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	231	if ((p != q) && (*p != ','))
				232	{
				233	for ( ; (p < q) && (*p != ','); p++)
				234	token[i++]=(*p);
				235	if (*p == '%')
				236	token[i++]=(*p++);
				237	break;
				238	}
cristy	c507168	2011-04-22 02:06:27 +0000	[diff] [blame^]	239	if ((p != '\0') && (isalpha((int) ((unsigned char) p)) == 0) &&
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	240	(p != DirectorySeparator) && (p != '#') && (p != '<'))
				241	{
				242	token[i++]=(*p++);
				243	break;
				244	}
				245	for ( ; *p != '\0'; p++)
				246	{
				247	if (((isspace((int) ((unsigned char) p)) != 0) \|\| (p == '=') \|\|
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	248	(p == ',') \|\| (p == ':') \|\| (p == ';')) && ((p-1) != '\\'))
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	249	break;
				250	if ((i > 0) && (*p == '<'))
				251	break;
				252	token[i++]=(*p);
				253	if (*p == '>')
				254	break;
				255	if (*p == '(')
				256	for (p++; *p != '\0'; p++)
				257	{
				258	token[i++]=(*p);
				259	if ((p == ')') && ((p-1) != '\\'))
				260	break;
				261	}
				262	}
				263	break;
				264	}
				265	}
				266	break;
				267	}
				268	token[i]='\0';
				269	if (LocaleNCompare(token,"url(",4) == 0)
				270	{
				271	ssize_t
				272	offset;
				273
				274	offset=4;
				275	if (token[offset] == '#')
				276	offset++;
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	277	i=(ssize_t) strlen(token);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	278	(void) CopyMagickString(token,token+offset,MaxTextExtent);
				279	token[i-offset-1]='\0';
				280	}
				281	while (isspace((int) ((unsigned char) *p)) != 0)
				282	p++;
				283	if (end != (const char **) NULL)
				284	end=(const char ) p;
				285	}
				286
				287	/*
				288	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				289	% %
				290	% %
				291	% %
				292	% G l o b E x p r e s s i o n %
				293	% %
				294	% %
				295	% %
				296	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				297	%
				298	% GlobExpression() returns MagickTrue if the expression matches the pattern.
				299	%
				300	% The format of the GlobExpression function is:
				301	%
				302	% MagickBooleanType GlobExpression(const char *expression,
				303	% const char *pattern,const MagickBooleanType case_insensitive)
				304	%
				305	% A description of each parameter follows:
				306	%
				307	% o expression: Specifies a pointer to a text string containing a file name.
				308	%
				309	% o pattern: Specifies a pointer to a text string containing a pattern.
				310	%
				311	% o case_insensitive: set to MagickTrue to ignore the case when matching
				312	% an expression.
				313	%
				314	*/
				315	MagickExport MagickBooleanType GlobExpression(const char *expression,
				316	const char *pattern,const MagickBooleanType case_insensitive)
				317	{
				318	MagickBooleanType
				319	done,
				320	match;
				321
				322	register const char
				323	*p;
				324
				325	/*
				326	Return on empty pattern or '*'.
				327	*/
				328	if (pattern == (char *) NULL)
				329	return(MagickTrue);
				330	if (GetUTFCode(pattern) == 0)
				331	return(MagickTrue);
				332	if (LocaleCompare(pattern,"*") == 0)
				333	return(MagickTrue);
				334	p=pattern+strlen(pattern)-1;
				335	if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
				336	{
				337	ExceptionInfo
				338	*exception;
				339
				340	ImageInfo
				341	*image_info;
				342
				343	/*
				344	Determine if pattern is a scene, i.e. img0001.pcd[2].
				345	*/
				346	image_info=AcquireImageInfo();
				347	(void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
				348	exception=AcquireExceptionInfo();
cristy	d965a42	2010-03-03 17:47:35 +0000	[diff] [blame]	349	(void) SetImageInfo(image_info,0,exception);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	350	exception=DestroyExceptionInfo(exception);
				351	if (LocaleCompare(image_info->filename,pattern) != 0)
				352	{
				353	image_info=DestroyImageInfo(image_info);
				354	return(MagickFalse);
				355	}
				356	image_info=DestroyImageInfo(image_info);
				357	}
				358	/*
				359	Evaluate glob expression.
				360	*/
				361	done=MagickFalse;
				362	while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
				363	{
				364	if (GetUTFCode(expression) == 0)
				365	if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
				366	break;
				367	switch (GetUTFCode(pattern))
				368	{
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	369	case '*':
				370	{
				371	MagickBooleanType
				372	status;
				373
				374	status=MagickFalse;
				375	pattern+=GetUTFOctets(pattern);
				376	while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
				377	{
				378	status=GlobExpression(expression,pattern,case_insensitive);
				379	expression+=GetUTFOctets(expression);
				380	}
				381	if (status != MagickFalse)
				382	{
				383	while (GetUTFCode(expression) != 0)
				384	expression+=GetUTFOctets(expression);
				385	while (GetUTFCode(pattern) != 0)
				386	pattern+=GetUTFOctets(pattern);
				387	}
				388	break;
				389	}
				390	case '[':
				391	{
cristy	55a91cd	2010-12-01 00:57:40 +0000	[diff] [blame]	392	int
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	393	c;
				394
				395	pattern+=GetUTFOctets(pattern);
				396	for ( ; ; )
				397	{
				398	if ((GetUTFCode(pattern) == 0) \|\| (GetUTFCode(pattern) == ']'))
				399	{
				400	done=MagickTrue;
				401	break;
				402	}
				403	if (GetUTFCode(pattern) == '\\')
				404	{
				405	pattern+=GetUTFOctets(pattern);
				406	if (GetUTFCode(pattern) == 0)
				407	{
				408	done=MagickTrue;
				409	break;
				410	}
				411	}
				412	if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
				413	{
				414	c=GetUTFCode(pattern);
				415	pattern+=GetUTFOctets(pattern);
				416	pattern+=GetUTFOctets(pattern);
				417	if (GetUTFCode(pattern) == ']')
				418	{
				419	done=MagickTrue;
				420	break;
				421	}
				422	if (GetUTFCode(pattern) == '\\')
				423	{
				424	pattern+=GetUTFOctets(pattern);
				425	if (GetUTFCode(pattern) == 0)
				426	{
				427	done=MagickTrue;
				428	break;
				429	}
				430	}
				431	if ((GetUTFCode(expression) < c) \|\|
				432	(GetUTFCode(expression) > GetUTFCode(pattern)))
				433	{
				434	pattern+=GetUTFOctets(pattern);
				435	continue;
				436	}
				437	}
				438	else
				439	if (GetUTFCode(pattern) != GetUTFCode(expression))
				440	{
				441	pattern+=GetUTFOctets(pattern);
				442	continue;
				443	}
				444	pattern+=GetUTFOctets(pattern);
				445	while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
				446	{
				447	if ((GetUTFCode(pattern) == '\\') &&
				448	(GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
				449	pattern+=GetUTFOctets(pattern);
				450	pattern+=GetUTFOctets(pattern);
				451	}
				452	if (GetUTFCode(pattern) != 0)
				453	{
				454	pattern+=GetUTFOctets(pattern);
				455	expression+=GetUTFOctets(expression);
				456	}
				457	break;
				458	}
				459	break;
				460	}
				461	case '?':
				462	{
				463	pattern+=GetUTFOctets(pattern);
				464	expression+=GetUTFOctets(expression);
				465	break;
				466	}
				467	case '{':
				468	{
				469	register const char
				470	*p;
				471
				472	pattern+=GetUTFOctets(pattern);
				473	while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
				474	{
				475	p=expression;
				476	match=MagickTrue;
				477	while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
				478	(GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
				479	(match != MagickFalse))
				480	{
				481	if (GetUTFCode(pattern) == '\\')
				482	pattern+=GetUTFOctets(pattern);
				483	match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
				484	MagickFalse;
				485	p+=GetUTFOctets(p);
				486	pattern+=GetUTFOctets(pattern);
				487	}
				488	if (GetUTFCode(pattern) == 0)
				489	{
				490	match=MagickFalse;
				491	done=MagickTrue;
				492	break;
				493	}
				494	else
				495	if (match != MagickFalse)
				496	{
				497	expression=p;
				498	while ((GetUTFCode(pattern) != '}') &&
				499	(GetUTFCode(pattern) != 0))
				500	{
				501	pattern+=GetUTFOctets(pattern);
				502	if (GetUTFCode(pattern) == '\\')
				503	{
				504	pattern+=GetUTFOctets(pattern);
				505	if (GetUTFCode(pattern) == '}')
				506	pattern+=GetUTFOctets(pattern);
				507	}
				508	}
				509	}
				510	else
				511	{
				512	while ((GetUTFCode(pattern) != '}') &&
				513	(GetUTFCode(pattern) != ',') &&
				514	(GetUTFCode(pattern) != 0))
				515	{
				516	pattern+=GetUTFOctets(pattern);
				517	if (GetUTFCode(pattern) == '\\')
				518	{
				519	pattern+=GetUTFOctets(pattern);
				520	if ((GetUTFCode(pattern) == '}') \|\|
				521	(GetUTFCode(pattern) == ','))
				522	pattern+=GetUTFOctets(pattern);
				523	}
				524	}
				525	}
				526	if (GetUTFCode(pattern) != 0)
				527	pattern+=GetUTFOctets(pattern);
				528	}
				529	break;
				530	}
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	531	case '\\':
				532	{
				533	pattern+=GetUTFOctets(pattern);
cristy	4705fe8	2010-04-23 16:20:03 +0000	[diff] [blame]	534	if (GetUTFCode(pattern) == 0)
				535	break;
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	536	}
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	537	default:
				538	{
				539	if (case_insensitive != MagickFalse)
				540	{
				541	if (tolower((int) GetUTFCode(expression)) !=
				542	tolower((int) GetUTFCode(pattern)))
				543	{
				544	done=MagickTrue;
				545	break;
				546	}
				547	}
				548	else
				549	if (GetUTFCode(expression) != GetUTFCode(pattern))
				550	{
				551	done=MagickTrue;
				552	break;
				553	}
				554	expression+=GetUTFOctets(expression);
				555	pattern+=GetUTFOctets(pattern);
				556	}
				557	}
				558	}
				559	while (GetUTFCode(pattern) == '*')
				560	pattern+=GetUTFOctets(pattern);
				561	match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
				562	MagickTrue : MagickFalse;
				563	return(match);
				564	}
				565
				566	/*
				567	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				568	% %
				569	% %
				570	% %
				571	+ I s G l o b %
				572	% %
				573	% %
				574	% %
				575	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				576	%
				577	% IsGlob() returns MagickTrue if the path specification contains a globbing
				578	% pattern.
				579	%
				580	% The format of the IsGlob method is:
				581	%
				582	% MagickBooleanType IsGlob(const char *geometry)
				583	%
				584	% A description of each parameter follows:
				585	%
				586	% o path: the path.
				587	%
				588	*/
				589	MagickExport MagickBooleanType IsGlob(const char *path)
				590	{
				591	MagickBooleanType
				592	status;
				593
				594	if (IsPathAccessible(path) != MagickFalse)
				595	return(MagickFalse);
				596	status=(strchr(path,'') != (char ) NULL) \|\|
				597	(strchr(path,'?') != (char *) NULL) \|\|
				598	(strchr(path,'{') != (char *) NULL) \|\|
				599	(strchr(path,'}') != (char *) NULL) \|\|
				600	(strchr(path,'[') != (char *) NULL) \|\|
				601	(strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
				602	return(status);
				603	}
				604
				605	/*
				606	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				607	% %
				608	% %
				609	% %
				610	% T o k e n i z e r %
				611	% %
				612	% %
				613	% %
				614	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				615	%
				616	% Tokenizer() is a generalized, finite state token parser. It extracts tokens
				617	% one at a time from a string of characters. The characters used for white
				618	% space, for break characters, and for quotes can be specified. Also,
				619	% characters in the string can be preceded by a specifiable escape character
				620	% which removes any special meaning the character may have.
				621	%
				622	% Here is some terminology:
				623	%
				624	% o token: A single unit of information in the form of a group of
				625	% characters.
				626	%
				627	% o white space: Apace that gets ignored (except within quotes or when
				628	% escaped), like blanks and tabs. in addition, white space terminates a
				629	% non-quoted token.
				630	%
				631	% o break set: One or more characters that separates non-quoted tokens.
				632	% Commas are a common break character. The usage of break characters to
				633	% signal the end of a token is the same as that of white space, except
				634	% multiple break characters with nothing or only white space between
				635	% generate a null token for each two break characters together.
				636	%
				637	% For example, if blank is set to be the white space and comma is set to
				638	% be the break character, the line
				639	%
				640	% A, B, C , , DEF
				641	%
				642	% ... consists of 5 tokens:
				643	%
				644	% 1) "A"
				645	% 2) "B"
				646	% 3) "C"
				647	% 4) "" (the null string)
				648	% 5) "DEF"
				649	%
				650	% o Quote character: A character that, when surrounding a group of other
				651	% characters, causes the group of characters to be treated as a single
				652	% token, no matter how many white spaces or break characters exist in
				653	% the group. Also, a token always terminates after the closing quote.
				654	% For example, if ' is the quote character, blank is white space, and
				655	% comma is the break character, the following string
				656	%
				657	% A, ' B, CD'EF GHI
				658	%
				659	% ... consists of 4 tokens:
				660	%
				661	% 1) "A"
				662	% 2) " B, CD" (note the blanks & comma)
				663	% 3) "EF"
				664	% 4) "GHI"
				665	%
				666	% The quote characters themselves do not appear in the resultant
				667	% tokens. The double quotes are delimiters i use here for
				668	% documentation purposes only.
				669	%
				670	% o Escape character: A character which itself is ignored but which
				671	% causes the next character to be used as is. ^ and \ are often used
				672	% as escape characters. An escape in the last position of the string
				673	% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
				674	% and non-escape) character. For example, assume white space, break
				675	% character, and quote are the same as in the above examples, and
				676	% further, assume that ^ is the escape character. Then, in the string
				677	%
				678	% ABC, ' DEF ^' GH' I ^ J K^ L ^
				679	%
				680	% ... there are 7 tokens:
				681	%
				682	% 1) "ABC"
				683	% 2) " DEF ' GH"
				684	% 3) "I"
				685	% 4) " " (a lone blank)
				686	% 5) "J"
				687	% 6) "K L"
				688	% 7) "^" (passed as is at end of line)
				689	%
				690	% The format of the Tokenizer method is:
				691	%
				692	% int Tokenizer(TokenInfo token_info,const unsigned flag,char token,
				693	% const size_t max_token_length,const char line,const char white,
				694	% const char break_set,const char quote,const char escape,
				695	% char breaker,int next,char *quoted)
				696	%
				697	% A description of each parameter follows:
				698	%
				699	% o flag: right now, only the low order 3 bits are used.
				700	%
				701	% 1 => convert non-quoted tokens to upper case
				702	% 2 => convert non-quoted tokens to lower case
				703	% 0 => do not convert non-quoted tokens
				704	%
				705	% o token: a character string containing the returned next token
				706	%
				707	% o max_token_length: the maximum size of "token". Characters beyond
				708	% "max_token_length" are truncated.
				709	%
				710	% o string: the string to be parsed.
				711	%
				712	% o white: a string of the valid white spaces. example:
				713	%
				714	% char whitesp[]={" \t"};
				715	%
				716	% blank and tab will be valid white space.
				717	%
				718	% o break: a string of the valid break characters. example:
				719	%
				720	% char breakch[]={";,"};
				721	%
				722	% semicolon and comma will be valid break characters.
				723	%
				724	% o quote: a string of the valid quote characters. An example would be
				725	%
				726	% char whitesp[]={"'\"");
				727	%
				728	% (this causes single and double quotes to be valid) Note that a
				729	% token starting with one of these characters needs the same quote
				730	% character to terminate it.
				731	%
				732	% for example:
				733	%
				734	% "ABC '
				735	%
				736	% is unterminated, but
				737	%
				738	% "DEF" and 'GHI'
				739	%
				740	% are properly terminated. Note that different quote characters
				741	% can appear on the same line; only for a given token do the quote
				742	% characters have to be the same.
				743	%
				744	% o escape: the escape character (NOT a string ... only one
				745	% allowed). Use zero if none is desired.
				746	%
				747	% o breaker: the break character used to terminate the current
				748	% token. If the token was quoted, this will be the quote used. If
				749	% the token is the last one on the line, this will be zero.
				750	%
				751	% o next: this variable points to the first character of the
				752	% next token. it gets reset by "tokenizer" as it steps through the
				753	% string. Set it to 0 upon initialization, and leave it alone
				754	% after that. You can change it if you want to jump around in the
				755	% string or re-parse from the beginning, but be careful.
				756	%
				757	% o quoted: set to True if the token was quoted and MagickFalse
				758	% if not. You may need this information (for example: in C, a
				759	% string with quotes around it is a character string, while one
				760	% without is an identifier).
				761	%
				762	% o result: 0 if we haven't reached EOS (end of string), and 1
				763	% if we have.
				764	%
				765	*/
				766
				767	#define IN_WHITE 0
				768	#define IN_TOKEN 1
				769	#define IN_QUOTE 2
				770	#define IN_OZONE 3
				771
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	772	static ssize_t sindex(int c,const char *string)
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	773	{
				774	register const char
				775	*p;
				776
				777	for (p=string; *p != '\0'; p++)
				778	if (c == (int) (*p))
cristy	cee9711	2010-05-28 00:44:52 +0000	[diff] [blame]	779	return((ssize_t) (p-string));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	780	return(-1);
				781	}
				782
				783	static void StoreToken(TokenInfo token_info,char string,
				784	size_t max_token_length,int c)
				785	{
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	786	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	787	i;
				788
				789	if ((token_info->offset < 0) \|\|
				790	((size_t) token_info->offset >= (max_token_length-1)))
				791	return;
				792	i=token_info->offset++;
				793	string[i]=(char) c;
				794	if (token_info->state == IN_QUOTE)
				795	return;
				796	switch (token_info->flag & 0x03)
				797	{
				798	case 1:
				799	{
				800	string[i]=(char) toupper(c);
				801	break;
				802	}
				803	case 2:
				804	{
				805	string[i]=(char) tolower(c);
				806	break;
				807	}
				808	default:
				809	break;
				810	}
				811	}
				812
				813	MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
				814	char token,const size_t max_token_length,const char line,const char *white,
				815	const char break_set,const char quote,const char escape,char *breaker,
				816	int next,char quoted)
				817	{
				818	int
				819	c;
				820
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	821	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	822	i;
				823
				824	*breaker='\0';
				825	*quoted='\0';
				826	if (line[*next] == '\0')
				827	return(1);
				828	token_info->state=IN_WHITE;
				829	token_info->quote=(char) MagickFalse;
				830	token_info->flag=flag;
				831	for (token_info->offset=0; (int) line[next] != 0; (next)++)
				832	{
				833	c=(int) line[*next];
				834	i=sindex(c,break_set);
				835	if (i >= 0)
				836	{
				837	switch (token_info->state)
				838	{
				839	case IN_WHITE:
				840	case IN_TOKEN:
				841	case IN_OZONE:
				842	{
				843	(*next)++;
				844	*breaker=break_set[i];
				845	token[token_info->offset]='\0';
				846	return(0);
				847	}
				848	case IN_QUOTE:
				849	{
				850	StoreToken(token_info,token,max_token_length,c);
				851	break;
				852	}
				853	}
				854	continue;
				855	}
				856	i=sindex(c,quote);
				857	if (i >= 0)
				858	{
				859	switch (token_info->state)
				860	{
				861	case IN_WHITE:
				862	{
				863	token_info->state=IN_QUOTE;
				864	token_info->quote=quote[i];
				865	*quoted=(char) MagickTrue;
				866	break;
				867	}
				868	case IN_QUOTE:
				869	{
				870	if (quote[i] != token_info->quote)
				871	StoreToken(token_info,token,max_token_length,c);
				872	else
				873	{
				874	token_info->state=IN_OZONE;
				875	token_info->quote='\0';
				876	}
				877	break;
				878	}
				879	case IN_TOKEN:
				880	case IN_OZONE:
				881	{
				882	*breaker=(char) c;
				883	token[token_info->offset]='\0';
				884	return(0);
				885	}
				886	}
				887	continue;
				888	}
				889	i=sindex(c,white);
				890	if (i >= 0)
				891	{
				892	switch (token_info->state)
				893	{
				894	case IN_WHITE:
				895	case IN_OZONE:
				896	break;
				897	case IN_TOKEN:
				898	{
				899	token_info->state=IN_OZONE;
				900	break;
				901	}
				902	case IN_QUOTE:
				903	{
				904	StoreToken(token_info,token,max_token_length,c);
				905	break;
				906	}
				907	}
				908	continue;
				909	}
				910	if (c == (int) escape)
				911	{
				912	if (line[(*next)+1] == '\0')
				913	{
				914	*breaker='\0';
				915	StoreToken(token_info,token,max_token_length,c);
				916	(*next)++;
				917	token[token_info->offset]='\0';
				918	return(0);
				919	}
				920	switch (token_info->state)
				921	{
				922	case IN_WHITE:
				923	{
				924	(*next)--;
				925	token_info->state=IN_TOKEN;
				926	break;
				927	}
				928	case IN_TOKEN:
				929	case IN_QUOTE:
				930	{
				931	(*next)++;
				932	c=(int) line[*next];
				933	StoreToken(token_info,token,max_token_length,c);
				934	break;
				935	}
				936	case IN_OZONE:
				937	{
				938	token[token_info->offset]='\0';
				939	return(0);
				940	}
				941	}
				942	continue;
				943	}
				944	switch (token_info->state)
				945	{
				946	case IN_WHITE:
				947	token_info->state=IN_TOKEN;
				948	case IN_TOKEN:
				949	case IN_QUOTE:
				950	{
				951	StoreToken(token_info,token,max_token_length,c);
				952	break;
				953	}
				954	case IN_OZONE:
				955	{
				956	token[token_info->offset]='\0';
				957	return(0);
				958	}
				959	}
				960	}
				961	token[token_info->offset]='\0';
				962	return(0);
				963	}