Blame - MagickCore/token.c - platform/external/ImageMagick

blob: c32ec63685b2c0b22d9b722ab2209bef70a2b1a3 [file] [log] [blame]

cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	1	/*
				2	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				3	% %
				4	% %
				5	% %
				6	% TTTTT OOO K K EEEEE N N %
				7	% T O O K K E NN N %
				8	% T O O KKK EEE N N N %
				9	% T O O K K E N NN %
				10	% T OOO K K EEEEE N N %
				11	% %
				12	% %
				13	% MagickCore Token Methods %
				14	% %
				15	% Software Design %
				16	% John Cristy %
				17	% January 1993 %
				18	% %
				19	% %
cristy	1454be7	2011-12-19 01:52:48 +0000	[diff] [blame]	20	% Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization %
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	21	% dedicated to making software imaging solutions freely available. %
				22	% %
				23	% You may not use this file except in compliance with the License. You may %
				24	% obtain a copy of the License at %
				25	% %
				26	% http://www.imagemagick.org/script/license.php %
				27	% %
				28	% Unless required by applicable law or agreed to in writing, software %
				29	% distributed under the License is distributed on an "AS IS" BASIS, %
				30	% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
				31	% See the License for the specific language governing permissions and %
				32	% limitations under the License. %
				33	% %
				34	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				35	%
				36	%
				37	%
				38	*/
				39
				40	/*
				41	Include declarations.
				42	*/
cristy	4c08aed	2011-07-01 19:47:50 +0000	[diff] [blame]	43	#include "MagickCore/studio.h"
				44	#include "MagickCore/exception.h"
				45	#include "MagickCore/exception-private.h"
				46	#include "MagickCore/image.h"
				47	#include "MagickCore/memory_.h"
				48	#include "MagickCore/string_.h"
				49	#include "MagickCore/string-private.h"
				50	#include "MagickCore/token.h"
				51	#include "MagickCore/token-private.h"
				52	#include "MagickCore/utility.h"
cristy	d1dd6e4	2011-09-04 01:46:08 +0000	[diff] [blame]	53	#include "MagickCore/utility-private.h"
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	54
				55	/*
				56	Typedef declaractions.
				57	*/
				58	struct _TokenInfo
				59	{
				60	int
				61	state;
				62
				63	MagickStatusType
				64	flag;
				65
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	66	ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	67	offset;
				68
				69	char
				70	quote;
				71
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	72	size_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	73	signature;
				74	};
				75
				76	/*
				77	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				78	% %
				79	% %
				80	% %
				81	% A c q u i r e T o k e n I n f o %
				82	% %
				83	% %
				84	% %
				85	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				86	%
				87	% AcquireTokenInfo() allocates the TokenInfo structure.
				88	%
				89	% The format of the AcquireTokenInfo method is:
				90	%
				91	% TokenInfo *AcquireTokenInfo()
				92	%
				93	*/
				94	MagickExport TokenInfo *AcquireTokenInfo(void)
				95	{
				96	TokenInfo
				97	*token_info;
				98
cristy	73bd4a5	2010-10-05 11:24:23 +0000	[diff] [blame]	99	token_info=(TokenInfo ) AcquireMagickMemory(sizeof(token_info));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	100	if (token_info == (TokenInfo *) NULL)
				101	ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
				102	token_info->signature=MagickSignature;
				103	return(token_info);
				104	}
				105
				106	/*
				107	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				108	% %
				109	% %
				110	% %
				111	% D e s t r o y T o k e n I n f o %
				112	% %
				113	% %
				114	% %
				115	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				116	%
				117	% DestroyTokenInfo() deallocates memory associated with an TokenInfo
				118	% structure.
				119	%
				120	% The format of the DestroyTokenInfo method is:
				121	%
				122	% TokenInfo DestroyTokenInfo(TokenInfo token_info)
				123	%
				124	% A description of each parameter follows:
				125	%
				126	% o token_info: Specifies a pointer to an TokenInfo structure.
				127	%
				128	*/
				129	MagickExport TokenInfo DestroyTokenInfo(TokenInfo token_info)
				130	{
				131	(void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
				132	assert(token_info != (TokenInfo *) NULL);
				133	assert(token_info->signature == MagickSignature);
				134	token_info->signature=(~MagickSignature);
				135	token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
				136	return(token_info);
				137	}
				138
				139	/*
				140	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				141	% %
				142	% %
				143	% %
				144	+ G e t M a g i c k T o k e n %
				145	% %
				146	% %
				147	% %
				148	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				149	%
cristy	df11e55	2011-04-23 17:18:30 +0000	[diff] [blame]	150	% GetMagickToken() gets a token from the token stream. A token is defined as
				151	% a sequence of characters delimited by whitespace (e.g. clip-path), a
				152	% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristy	dd8327f	2010-05-12 12:39:46 +0000	[diff] [blame]	153	% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
				154	% separator characters: ':', '=', ',', and ';'.
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	155	%
				156	% The format of the GetMagickToken method is:
				157	%
				158	% void GetMagickToken(const char start,const char end,char token)
				159	%
				160	% A description of each parameter follows:
				161	%
				162	% o start: the start of the token sequence.
				163	%
				164	% o end: point to the end of the token sequence.
				165	%
				166	% o token: copy the token to this buffer.
				167	%
				168	*/
				169	MagickExport void GetMagickToken(const char start,const char end,char token)
				170	{
				171	double
				172	value;
				173
				174	register const char
				175	*p;
				176
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	177	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	178	i;
				179
cristy	32f6912	2011-04-22 02:26:00 +0000	[diff] [blame]	180	assert(start != (const char *) NULL);
				181	assert(token != (char *) NULL);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	182	i=0;
				183	for (p=start; *p != '\0'; )
				184	{
				185	while ((isspace((int) ((unsigned char) p)) != 0) && (p != '\0'))
				186	p++;
				187	if (*p == '\0')
				188	break;
				189	switch (*p)
				190	{
				191	case '"':
				192	case '\'':
				193	case '`':
				194	case '{':
				195	{
				196	register char
				197	escape;
				198
				199	switch (*p)
				200	{
				201	case '"': escape='"'; break;
				202	case '\'': escape='\''; break;
				203	case '`': escape='\''; break;
				204	case '{': escape='}'; break;
				205	default: escape=(*p); break;
				206	}
				207	for (p++; *p != '\0'; p++)
				208	{
				209	if ((p == '\\') && (((p+1) == escape) \|\| (*(p+1) == '\\')))
				210	p++;
				211	else
				212	if (*p == escape)
				213	{
				214	p++;
				215	break;
				216	}
				217	token[i++]=(*p);
				218	}
				219	break;
				220	}
				221	case '/':
				222	{
				223	token[i++]=(*p++);
				224	if ((p == '>') \|\| (p == '/'))
				225	token[i++]=(*p++);
				226	break;
				227	}
				228	default:
				229	{
				230	char
				231	*q;
				232
cristy	dbdd0e3	2011-11-04 23:29:40 +0000	[diff] [blame]	233	value=StringToDouble(p,&q);
cristy	da16f16	2011-02-19 23:52:17 +0000	[diff] [blame]	234	(void) value;
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	235	if ((p != q) && (*p != ','))
				236	{
				237	for ( ; (p < q) && (*p != ','); p++)
				238	token[i++]=(*p);
				239	if (*p == '%')
				240	token[i++]=(*p++);
				241	break;
				242	}
cristy	c507168	2011-04-22 02:06:27 +0000	[diff] [blame]	243	if ((p != '\0') && (isalpha((int) ((unsigned char) p)) == 0) &&
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	244	(p != DirectorySeparator) && (p != '#') && (p != '<'))
				245	{
				246	token[i++]=(*p++);
				247	break;
				248	}
				249	for ( ; *p != '\0'; p++)
				250	{
				251	if (((isspace((int) ((unsigned char) p)) != 0) \|\| (p == '=') \|\|
cristy	fc68ef5	2012-03-11 23:33:15 +0000	[diff] [blame^]	252	(p == ':') \|\| (p == ',') \|\| (p == '\|') \|\| (p == ';')) &&
				253	(*(p-1) != '\\'))
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	254	break;
				255	if ((i > 0) && (*p == '<'))
				256	break;
				257	token[i++]=(*p);
				258	if (*p == '>')
				259	break;
				260	if (*p == '(')
				261	for (p++; *p != '\0'; p++)
				262	{
				263	token[i++]=(*p);
				264	if ((p == ')') && ((p-1) != '\\'))
				265	break;
				266	}
				267	}
				268	break;
				269	}
				270	}
				271	break;
				272	}
				273	token[i]='\0';
				274	if (LocaleNCompare(token,"url(",4) == 0)
				275	{
				276	ssize_t
				277	offset;
				278
				279	offset=4;
				280	if (token[offset] == '#')
				281	offset++;
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	282	i=(ssize_t) strlen(token);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	283	(void) CopyMagickString(token,token+offset,MaxTextExtent);
				284	token[i-offset-1]='\0';
				285	}
				286	while (isspace((int) ((unsigned char) *p)) != 0)
				287	p++;
				288	if (end != (const char **) NULL)
				289	end=(const char ) p;
				290	}
				291
				292	/*
				293	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				294	% %
				295	% %
				296	% %
				297	% G l o b E x p r e s s i o n %
				298	% %
				299	% %
				300	% %
				301	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				302	%
				303	% GlobExpression() returns MagickTrue if the expression matches the pattern.
				304	%
				305	% The format of the GlobExpression function is:
				306	%
				307	% MagickBooleanType GlobExpression(const char *expression,
				308	% const char *pattern,const MagickBooleanType case_insensitive)
				309	%
				310	% A description of each parameter follows:
				311	%
				312	% o expression: Specifies a pointer to a text string containing a file name.
				313	%
				314	% o pattern: Specifies a pointer to a text string containing a pattern.
				315	%
				316	% o case_insensitive: set to MagickTrue to ignore the case when matching
				317	% an expression.
				318	%
				319	*/
				320	MagickExport MagickBooleanType GlobExpression(const char *expression,
				321	const char *pattern,const MagickBooleanType case_insensitive)
				322	{
				323	MagickBooleanType
				324	done,
				325	match;
				326
				327	register const char
				328	*p;
				329
				330	/*
				331	Return on empty pattern or '*'.
				332	*/
				333	if (pattern == (char *) NULL)
				334	return(MagickTrue);
				335	if (GetUTFCode(pattern) == 0)
				336	return(MagickTrue);
				337	if (LocaleCompare(pattern,"*") == 0)
				338	return(MagickTrue);
				339	p=pattern+strlen(pattern)-1;
				340	if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
				341	{
				342	ExceptionInfo
				343	*exception;
				344
				345	ImageInfo
				346	*image_info;
				347
				348	/*
				349	Determine if pattern is a scene, i.e. img0001.pcd[2].
				350	*/
				351	image_info=AcquireImageInfo();
				352	(void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
				353	exception=AcquireExceptionInfo();
cristy	d965a42	2010-03-03 17:47:35 +0000	[diff] [blame]	354	(void) SetImageInfo(image_info,0,exception);
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	355	exception=DestroyExceptionInfo(exception);
				356	if (LocaleCompare(image_info->filename,pattern) != 0)
				357	{
				358	image_info=DestroyImageInfo(image_info);
				359	return(MagickFalse);
				360	}
				361	image_info=DestroyImageInfo(image_info);
				362	}
				363	/*
				364	Evaluate glob expression.
				365	*/
				366	done=MagickFalse;
				367	while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
				368	{
				369	if (GetUTFCode(expression) == 0)
				370	if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
				371	break;
				372	switch (GetUTFCode(pattern))
				373	{
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	374	case '*':
				375	{
				376	MagickBooleanType
				377	status;
				378
				379	status=MagickFalse;
				380	pattern+=GetUTFOctets(pattern);
				381	while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
				382	{
				383	status=GlobExpression(expression,pattern,case_insensitive);
				384	expression+=GetUTFOctets(expression);
				385	}
				386	if (status != MagickFalse)
				387	{
				388	while (GetUTFCode(expression) != 0)
				389	expression+=GetUTFOctets(expression);
				390	while (GetUTFCode(pattern) != 0)
				391	pattern+=GetUTFOctets(pattern);
				392	}
				393	break;
				394	}
				395	case '[':
				396	{
cristy	55a91cd	2010-12-01 00:57:40 +0000	[diff] [blame]	397	int
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	398	c;
				399
				400	pattern+=GetUTFOctets(pattern);
				401	for ( ; ; )
				402	{
				403	if ((GetUTFCode(pattern) == 0) \|\| (GetUTFCode(pattern) == ']'))
				404	{
				405	done=MagickTrue;
				406	break;
				407	}
				408	if (GetUTFCode(pattern) == '\\')
				409	{
				410	pattern+=GetUTFOctets(pattern);
				411	if (GetUTFCode(pattern) == 0)
				412	{
				413	done=MagickTrue;
				414	break;
				415	}
				416	}
				417	if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
				418	{
				419	c=GetUTFCode(pattern);
				420	pattern+=GetUTFOctets(pattern);
				421	pattern+=GetUTFOctets(pattern);
				422	if (GetUTFCode(pattern) == ']')
				423	{
				424	done=MagickTrue;
				425	break;
				426	}
				427	if (GetUTFCode(pattern) == '\\')
				428	{
				429	pattern+=GetUTFOctets(pattern);
				430	if (GetUTFCode(pattern) == 0)
				431	{
				432	done=MagickTrue;
				433	break;
				434	}
				435	}
				436	if ((GetUTFCode(expression) < c) \|\|
				437	(GetUTFCode(expression) > GetUTFCode(pattern)))
				438	{
				439	pattern+=GetUTFOctets(pattern);
				440	continue;
				441	}
				442	}
				443	else
				444	if (GetUTFCode(pattern) != GetUTFCode(expression))
				445	{
				446	pattern+=GetUTFOctets(pattern);
				447	continue;
				448	}
				449	pattern+=GetUTFOctets(pattern);
				450	while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
				451	{
				452	if ((GetUTFCode(pattern) == '\\') &&
				453	(GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
				454	pattern+=GetUTFOctets(pattern);
				455	pattern+=GetUTFOctets(pattern);
				456	}
				457	if (GetUTFCode(pattern) != 0)
				458	{
				459	pattern+=GetUTFOctets(pattern);
				460	expression+=GetUTFOctets(expression);
				461	}
				462	break;
				463	}
				464	break;
				465	}
				466	case '?':
				467	{
				468	pattern+=GetUTFOctets(pattern);
				469	expression+=GetUTFOctets(expression);
				470	break;
				471	}
				472	case '{':
				473	{
				474	register const char
				475	*p;
				476
				477	pattern+=GetUTFOctets(pattern);
				478	while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
				479	{
				480	p=expression;
				481	match=MagickTrue;
				482	while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
				483	(GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
				484	(match != MagickFalse))
				485	{
				486	if (GetUTFCode(pattern) == '\\')
				487	pattern+=GetUTFOctets(pattern);
				488	match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
				489	MagickFalse;
				490	p+=GetUTFOctets(p);
				491	pattern+=GetUTFOctets(pattern);
				492	}
				493	if (GetUTFCode(pattern) == 0)
				494	{
				495	match=MagickFalse;
				496	done=MagickTrue;
				497	break;
				498	}
				499	else
				500	if (match != MagickFalse)
				501	{
				502	expression=p;
				503	while ((GetUTFCode(pattern) != '}') &&
				504	(GetUTFCode(pattern) != 0))
				505	{
				506	pattern+=GetUTFOctets(pattern);
				507	if (GetUTFCode(pattern) == '\\')
				508	{
				509	pattern+=GetUTFOctets(pattern);
				510	if (GetUTFCode(pattern) == '}')
				511	pattern+=GetUTFOctets(pattern);
				512	}
				513	}
				514	}
				515	else
				516	{
				517	while ((GetUTFCode(pattern) != '}') &&
				518	(GetUTFCode(pattern) != ',') &&
				519	(GetUTFCode(pattern) != 0))
				520	{
				521	pattern+=GetUTFOctets(pattern);
				522	if (GetUTFCode(pattern) == '\\')
				523	{
				524	pattern+=GetUTFOctets(pattern);
				525	if ((GetUTFCode(pattern) == '}') \|\|
				526	(GetUTFCode(pattern) == ','))
				527	pattern+=GetUTFOctets(pattern);
				528	}
				529	}
				530	}
				531	if (GetUTFCode(pattern) != 0)
				532	pattern+=GetUTFOctets(pattern);
				533	}
				534	break;
				535	}
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	536	case '\\':
				537	{
				538	pattern+=GetUTFOctets(pattern);
cristy	4705fe8	2010-04-23 16:20:03 +0000	[diff] [blame]	539	if (GetUTFCode(pattern) == 0)
				540	break;
cristy	ecbe37f	2010-04-22 13:50:04 +0000	[diff] [blame]	541	}
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	542	default:
				543	{
				544	if (case_insensitive != MagickFalse)
				545	{
				546	if (tolower((int) GetUTFCode(expression)) !=
				547	tolower((int) GetUTFCode(pattern)))
				548	{
				549	done=MagickTrue;
				550	break;
				551	}
				552	}
				553	else
				554	if (GetUTFCode(expression) != GetUTFCode(pattern))
				555	{
				556	done=MagickTrue;
				557	break;
				558	}
				559	expression+=GetUTFOctets(expression);
				560	pattern+=GetUTFOctets(pattern);
				561	}
				562	}
				563	}
				564	while (GetUTFCode(pattern) == '*')
				565	pattern+=GetUTFOctets(pattern);
				566	match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
				567	MagickTrue : MagickFalse;
				568	return(match);
				569	}
				570
				571	/*
				572	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				573	% %
				574	% %
				575	% %
				576	+ I s G l o b %
				577	% %
				578	% %
				579	% %
				580	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				581	%
				582	% IsGlob() returns MagickTrue if the path specification contains a globbing
				583	% pattern.
				584	%
				585	% The format of the IsGlob method is:
				586	%
				587	% MagickBooleanType IsGlob(const char *geometry)
				588	%
				589	% A description of each parameter follows:
				590	%
				591	% o path: the path.
				592	%
				593	*/
cristy	7832dc2	2011-09-05 01:21:53 +0000	[diff] [blame]	594	MagickPrivate MagickBooleanType IsGlob(const char *path)
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	595	{
				596	MagickBooleanType
				597	status;
				598
				599	if (IsPathAccessible(path) != MagickFalse)
				600	return(MagickFalse);
				601	status=(strchr(path,'') != (char ) NULL) \|\|
				602	(strchr(path,'?') != (char *) NULL) \|\|
				603	(strchr(path,'{') != (char *) NULL) \|\|
				604	(strchr(path,'}') != (char *) NULL) \|\|
				605	(strchr(path,'[') != (char *) NULL) \|\|
				606	(strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
				607	return(status);
				608	}
				609
				610	/*
				611	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				612	% %
				613	% %
				614	% %
				615	% T o k e n i z e r %
				616	% %
				617	% %
				618	% %
				619	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
				620	%
				621	% Tokenizer() is a generalized, finite state token parser. It extracts tokens
				622	% one at a time from a string of characters. The characters used for white
				623	% space, for break characters, and for quotes can be specified. Also,
				624	% characters in the string can be preceded by a specifiable escape character
				625	% which removes any special meaning the character may have.
				626	%
				627	% Here is some terminology:
				628	%
				629	% o token: A single unit of information in the form of a group of
				630	% characters.
				631	%
				632	% o white space: Apace that gets ignored (except within quotes or when
				633	% escaped), like blanks and tabs. in addition, white space terminates a
				634	% non-quoted token.
				635	%
				636	% o break set: One or more characters that separates non-quoted tokens.
				637	% Commas are a common break character. The usage of break characters to
				638	% signal the end of a token is the same as that of white space, except
				639	% multiple break characters with nothing or only white space between
				640	% generate a null token for each two break characters together.
				641	%
				642	% For example, if blank is set to be the white space and comma is set to
				643	% be the break character, the line
				644	%
				645	% A, B, C , , DEF
				646	%
				647	% ... consists of 5 tokens:
				648	%
				649	% 1) "A"
				650	% 2) "B"
				651	% 3) "C"
				652	% 4) "" (the null string)
				653	% 5) "DEF"
				654	%
				655	% o Quote character: A character that, when surrounding a group of other
				656	% characters, causes the group of characters to be treated as a single
				657	% token, no matter how many white spaces or break characters exist in
				658	% the group. Also, a token always terminates after the closing quote.
				659	% For example, if ' is the quote character, blank is white space, and
				660	% comma is the break character, the following string
				661	%
				662	% A, ' B, CD'EF GHI
				663	%
				664	% ... consists of 4 tokens:
				665	%
				666	% 1) "A"
				667	% 2) " B, CD" (note the blanks & comma)
				668	% 3) "EF"
				669	% 4) "GHI"
				670	%
				671	% The quote characters themselves do not appear in the resultant
				672	% tokens. The double quotes are delimiters i use here for
				673	% documentation purposes only.
				674	%
				675	% o Escape character: A character which itself is ignored but which
				676	% causes the next character to be used as is. ^ and \ are often used
				677	% as escape characters. An escape in the last position of the string
				678	% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
				679	% and non-escape) character. For example, assume white space, break
				680	% character, and quote are the same as in the above examples, and
				681	% further, assume that ^ is the escape character. Then, in the string
				682	%
				683	% ABC, ' DEF ^' GH' I ^ J K^ L ^
				684	%
				685	% ... there are 7 tokens:
				686	%
				687	% 1) "ABC"
				688	% 2) " DEF ' GH"
				689	% 3) "I"
				690	% 4) " " (a lone blank)
				691	% 5) "J"
				692	% 6) "K L"
				693	% 7) "^" (passed as is at end of line)
				694	%
				695	% The format of the Tokenizer method is:
				696	%
				697	% int Tokenizer(TokenInfo token_info,const unsigned flag,char token,
				698	% const size_t max_token_length,const char line,const char white,
				699	% const char break_set,const char quote,const char escape,
				700	% char breaker,int next,char *quoted)
				701	%
				702	% A description of each parameter follows:
				703	%
				704	% o flag: right now, only the low order 3 bits are used.
				705	%
				706	% 1 => convert non-quoted tokens to upper case
				707	% 2 => convert non-quoted tokens to lower case
				708	% 0 => do not convert non-quoted tokens
				709	%
				710	% o token: a character string containing the returned next token
				711	%
				712	% o max_token_length: the maximum size of "token". Characters beyond
				713	% "max_token_length" are truncated.
				714	%
				715	% o string: the string to be parsed.
				716	%
				717	% o white: a string of the valid white spaces. example:
				718	%
				719	% char whitesp[]={" \t"};
				720	%
				721	% blank and tab will be valid white space.
				722	%
				723	% o break: a string of the valid break characters. example:
				724	%
				725	% char breakch[]={";,"};
				726	%
				727	% semicolon and comma will be valid break characters.
				728	%
				729	% o quote: a string of the valid quote characters. An example would be
				730	%
				731	% char whitesp[]={"'\"");
				732	%
				733	% (this causes single and double quotes to be valid) Note that a
				734	% token starting with one of these characters needs the same quote
				735	% character to terminate it.
				736	%
				737	% for example:
				738	%
				739	% "ABC '
				740	%
				741	% is unterminated, but
				742	%
				743	% "DEF" and 'GHI'
				744	%
				745	% are properly terminated. Note that different quote characters
				746	% can appear on the same line; only for a given token do the quote
				747	% characters have to be the same.
				748	%
				749	% o escape: the escape character (NOT a string ... only one
				750	% allowed). Use zero if none is desired.
				751	%
				752	% o breaker: the break character used to terminate the current
				753	% token. If the token was quoted, this will be the quote used. If
				754	% the token is the last one on the line, this will be zero.
				755	%
				756	% o next: this variable points to the first character of the
				757	% next token. it gets reset by "tokenizer" as it steps through the
				758	% string. Set it to 0 upon initialization, and leave it alone
				759	% after that. You can change it if you want to jump around in the
				760	% string or re-parse from the beginning, but be careful.
				761	%
				762	% o quoted: set to True if the token was quoted and MagickFalse
				763	% if not. You may need this information (for example: in C, a
				764	% string with quotes around it is a character string, while one
				765	% without is an identifier).
				766	%
				767	% o result: 0 if we haven't reached EOS (end of string), and 1
				768	% if we have.
				769	%
				770	*/
				771
				772	#define IN_WHITE 0
				773	#define IN_TOKEN 1
				774	#define IN_QUOTE 2
				775	#define IN_OZONE 3
				776
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	777	static ssize_t sindex(int c,const char *string)
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	778	{
				779	register const char
				780	*p;
				781
				782	for (p=string; *p != '\0'; p++)
				783	if (c == (int) (*p))
cristy	cee9711	2010-05-28 00:44:52 +0000	[diff] [blame]	784	return((ssize_t) (p-string));
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	785	return(-1);
				786	}
				787
				788	static void StoreToken(TokenInfo token_info,char string,
				789	size_t max_token_length,int c)
				790	{
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	791	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	792	i;
				793
				794	if ((token_info->offset < 0) \|\|
				795	((size_t) token_info->offset >= (max_token_length-1)))
				796	return;
				797	i=token_info->offset++;
				798	string[i]=(char) c;
				799	if (token_info->state == IN_QUOTE)
				800	return;
				801	switch (token_info->flag & 0x03)
				802	{
				803	case 1:
				804	{
				805	string[i]=(char) toupper(c);
				806	break;
				807	}
				808	case 2:
				809	{
				810	string[i]=(char) tolower(c);
				811	break;
				812	}
				813	default:
				814	break;
				815	}
				816	}
				817
				818	MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
				819	char token,const size_t max_token_length,const char line,const char *white,
				820	const char break_set,const char quote,const char escape,char *breaker,
				821	int next,char quoted)
				822	{
				823	int
				824	c;
				825
cristy	bb50337	2010-05-27 20:51:26 +0000	[diff] [blame]	826	register ssize_t
cristy	3ed852e	2009-09-05 21:47:34 +0000	[diff] [blame]	827	i;
				828
				829	*breaker='\0';
				830	*quoted='\0';
				831	if (line[*next] == '\0')
				832	return(1);
				833	token_info->state=IN_WHITE;
				834	token_info->quote=(char) MagickFalse;
				835	token_info->flag=flag;
				836	for (token_info->offset=0; (int) line[next] != 0; (next)++)
				837	{
				838	c=(int) line[*next];
				839	i=sindex(c,break_set);
				840	if (i >= 0)
				841	{
				842	switch (token_info->state)
				843	{
				844	case IN_WHITE:
				845	case IN_TOKEN:
				846	case IN_OZONE:
				847	{
				848	(*next)++;
				849	*breaker=break_set[i];
				850	token[token_info->offset]='\0';
				851	return(0);
				852	}
				853	case IN_QUOTE:
				854	{
				855	StoreToken(token_info,token,max_token_length,c);
				856	break;
				857	}
				858	}
				859	continue;
				860	}
				861	i=sindex(c,quote);
				862	if (i >= 0)
				863	{
				864	switch (token_info->state)
				865	{
				866	case IN_WHITE:
				867	{
				868	token_info->state=IN_QUOTE;
				869	token_info->quote=quote[i];
				870	*quoted=(char) MagickTrue;
				871	break;
				872	}
				873	case IN_QUOTE:
				874	{
				875	if (quote[i] != token_info->quote)
				876	StoreToken(token_info,token,max_token_length,c);
				877	else
				878	{
				879	token_info->state=IN_OZONE;
				880	token_info->quote='\0';
				881	}
				882	break;
				883	}
				884	case IN_TOKEN:
				885	case IN_OZONE:
				886	{
				887	*breaker=(char) c;
				888	token[token_info->offset]='\0';
				889	return(0);
				890	}
				891	}
				892	continue;
				893	}
				894	i=sindex(c,white);
				895	if (i >= 0)
				896	{
				897	switch (token_info->state)
				898	{
				899	case IN_WHITE:
				900	case IN_OZONE:
				901	break;
				902	case IN_TOKEN:
				903	{
				904	token_info->state=IN_OZONE;
				905	break;
				906	}
				907	case IN_QUOTE:
				908	{
				909	StoreToken(token_info,token,max_token_length,c);
				910	break;
				911	}
				912	}
				913	continue;
				914	}
				915	if (c == (int) escape)
				916	{
				917	if (line[(*next)+1] == '\0')
				918	{
				919	*breaker='\0';
				920	StoreToken(token_info,token,max_token_length,c);
				921	(*next)++;
				922	token[token_info->offset]='\0';
				923	return(0);
				924	}
				925	switch (token_info->state)
				926	{
				927	case IN_WHITE:
				928	{
				929	(*next)--;
				930	token_info->state=IN_TOKEN;
				931	break;
				932	}
				933	case IN_TOKEN:
				934	case IN_QUOTE:
				935	{
				936	(*next)++;
				937	c=(int) line[*next];
				938	StoreToken(token_info,token,max_token_length,c);
				939	break;
				940	}
				941	case IN_OZONE:
				942	{
				943	token[token_info->offset]='\0';
				944	return(0);
				945	}
				946	}
				947	continue;
				948	}
				949	switch (token_info->state)
				950	{
				951	case IN_WHITE:
				952	token_info->state=IN_TOKEN;
				953	case IN_TOKEN:
				954	case IN_QUOTE:
				955	{
				956	StoreToken(token_info,token,max_token_length,c);
				957	break;
				958	}
				959	case IN_OZONE:
				960	{
				961	token[token_info->offset]='\0';
				962	return(0);
				963	}
				964	}
				965	}
				966	token[token_info->offset]='\0';
				967	return(0);
				968	}