blob: c32ec63685b2c0b22d9b722ab2209bef70a2b1a3 [file] [log] [blame]
cristy3ed852e2009-09-05 21:47:34 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% John Cristy %
17% January 1993 %
18% %
19% %
cristy1454be72011-12-19 01:52:48 +000020% Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization %
cristy3ed852e2009-09-05 21:47:34 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
cristy4c08aed2011-07-01 19:47:50 +000043#include "MagickCore/studio.h"
44#include "MagickCore/exception.h"
45#include "MagickCore/exception-private.h"
46#include "MagickCore/image.h"
47#include "MagickCore/memory_.h"
48#include "MagickCore/string_.h"
49#include "MagickCore/string-private.h"
50#include "MagickCore/token.h"
51#include "MagickCore/token-private.h"
52#include "MagickCore/utility.h"
cristyd1dd6e42011-09-04 01:46:08 +000053#include "MagickCore/utility-private.h"
cristy3ed852e2009-09-05 21:47:34 +000054
55/*
56 Typedef declaractions.
57*/
58struct _TokenInfo
59{
60 int
61 state;
62
63 MagickStatusType
64 flag;
65
cristybb503372010-05-27 20:51:26 +000066 ssize_t
cristy3ed852e2009-09-05 21:47:34 +000067 offset;
68
69 char
70 quote;
71
cristybb503372010-05-27 20:51:26 +000072 size_t
cristy3ed852e2009-09-05 21:47:34 +000073 signature;
74};
75
76/*
77%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78% %
79% %
80% %
81% A c q u i r e T o k e n I n f o %
82% %
83% %
84% %
85%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
86%
87% AcquireTokenInfo() allocates the TokenInfo structure.
88%
89% The format of the AcquireTokenInfo method is:
90%
91% TokenInfo *AcquireTokenInfo()
92%
93*/
94MagickExport TokenInfo *AcquireTokenInfo(void)
95{
96 TokenInfo
97 *token_info;
98
cristy73bd4a52010-10-05 11:24:23 +000099 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
cristy3ed852e2009-09-05 21:47:34 +0000100 if (token_info == (TokenInfo *) NULL)
101 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
102 token_info->signature=MagickSignature;
103 return(token_info);
104}
105
106/*
107%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108% %
109% %
110% %
111% D e s t r o y T o k e n I n f o %
112% %
113% %
114% %
115%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116%
117% DestroyTokenInfo() deallocates memory associated with an TokenInfo
118% structure.
119%
120% The format of the DestroyTokenInfo method is:
121%
122% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123%
124% A description of each parameter follows:
125%
126% o token_info: Specifies a pointer to an TokenInfo structure.
127%
128*/
129MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130{
131 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132 assert(token_info != (TokenInfo *) NULL);
133 assert(token_info->signature == MagickSignature);
134 token_info->signature=(~MagickSignature);
135 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136 return(token_info);
137}
138
139/*
140%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141% %
142% %
143% %
144+ G e t M a g i c k T o k e n %
145% %
146% %
147% %
148%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149%
cristydf11e552011-04-23 17:18:30 +0000150% GetMagickToken() gets a token from the token stream. A token is defined as
151% a sequence of characters delimited by whitespace (e.g. clip-path), a
152% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristydd8327f2010-05-12 12:39:46 +0000153% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
154% separator characters: ':', '=', ',', and ';'.
cristy3ed852e2009-09-05 21:47:34 +0000155%
156% The format of the GetMagickToken method is:
157%
158% void GetMagickToken(const char *start,const char **end,char *token)
159%
160% A description of each parameter follows:
161%
162% o start: the start of the token sequence.
163%
164% o end: point to the end of the token sequence.
165%
166% o token: copy the token to this buffer.
167%
168*/
169MagickExport void GetMagickToken(const char *start,const char **end,char *token)
170{
171 double
172 value;
173
174 register const char
175 *p;
176
cristybb503372010-05-27 20:51:26 +0000177 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000178 i;
179
cristy32f69122011-04-22 02:26:00 +0000180 assert(start != (const char *) NULL);
181 assert(token != (char *) NULL);
cristy3ed852e2009-09-05 21:47:34 +0000182 i=0;
183 for (p=start; *p != '\0'; )
184 {
185 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
186 p++;
187 if (*p == '\0')
188 break;
189 switch (*p)
190 {
191 case '"':
192 case '\'':
193 case '`':
194 case '{':
195 {
196 register char
197 escape;
198
199 switch (*p)
200 {
201 case '"': escape='"'; break;
202 case '\'': escape='\''; break;
203 case '`': escape='\''; break;
204 case '{': escape='}'; break;
205 default: escape=(*p); break;
206 }
207 for (p++; *p != '\0'; p++)
208 {
209 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
210 p++;
211 else
212 if (*p == escape)
213 {
214 p++;
215 break;
216 }
217 token[i++]=(*p);
218 }
219 break;
220 }
221 case '/':
222 {
223 token[i++]=(*p++);
224 if ((*p == '>') || (*p == '/'))
225 token[i++]=(*p++);
226 break;
227 }
228 default:
229 {
230 char
231 *q;
232
cristydbdd0e32011-11-04 23:29:40 +0000233 value=StringToDouble(p,&q);
cristyda16f162011-02-19 23:52:17 +0000234 (void) value;
cristy3ed852e2009-09-05 21:47:34 +0000235 if ((p != q) && (*p != ','))
236 {
237 for ( ; (p < q) && (*p != ','); p++)
238 token[i++]=(*p);
239 if (*p == '%')
240 token[i++]=(*p++);
241 break;
242 }
cristyc5071682011-04-22 02:06:27 +0000243 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
cristy3ed852e2009-09-05 21:47:34 +0000244 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
245 {
246 token[i++]=(*p++);
247 break;
248 }
249 for ( ; *p != '\0'; p++)
250 {
251 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
cristyfc68ef52012-03-11 23:33:15 +0000252 (*p == ':') || (*p == ',') || (*p == '|') || (*p == ';')) &&
253 (*(p-1) != '\\'))
cristy3ed852e2009-09-05 21:47:34 +0000254 break;
255 if ((i > 0) && (*p == '<'))
256 break;
257 token[i++]=(*p);
258 if (*p == '>')
259 break;
260 if (*p == '(')
261 for (p++; *p != '\0'; p++)
262 {
263 token[i++]=(*p);
264 if ((*p == ')') && (*(p-1) != '\\'))
265 break;
266 }
267 }
268 break;
269 }
270 }
271 break;
272 }
273 token[i]='\0';
274 if (LocaleNCompare(token,"url(",4) == 0)
275 {
276 ssize_t
277 offset;
278
279 offset=4;
280 if (token[offset] == '#')
281 offset++;
cristybb503372010-05-27 20:51:26 +0000282 i=(ssize_t) strlen(token);
cristy3ed852e2009-09-05 21:47:34 +0000283 (void) CopyMagickString(token,token+offset,MaxTextExtent);
284 token[i-offset-1]='\0';
285 }
286 while (isspace((int) ((unsigned char) *p)) != 0)
287 p++;
288 if (end != (const char **) NULL)
289 *end=(const char *) p;
290}
291
292/*
293%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
294% %
295% %
296% %
297% G l o b E x p r e s s i o n %
298% %
299% %
300% %
301%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
302%
303% GlobExpression() returns MagickTrue if the expression matches the pattern.
304%
305% The format of the GlobExpression function is:
306%
307% MagickBooleanType GlobExpression(const char *expression,
308% const char *pattern,const MagickBooleanType case_insensitive)
309%
310% A description of each parameter follows:
311%
312% o expression: Specifies a pointer to a text string containing a file name.
313%
314% o pattern: Specifies a pointer to a text string containing a pattern.
315%
316% o case_insensitive: set to MagickTrue to ignore the case when matching
317% an expression.
318%
319*/
320MagickExport MagickBooleanType GlobExpression(const char *expression,
321 const char *pattern,const MagickBooleanType case_insensitive)
322{
323 MagickBooleanType
324 done,
325 match;
326
327 register const char
328 *p;
329
330 /*
331 Return on empty pattern or '*'.
332 */
333 if (pattern == (char *) NULL)
334 return(MagickTrue);
335 if (GetUTFCode(pattern) == 0)
336 return(MagickTrue);
337 if (LocaleCompare(pattern,"*") == 0)
338 return(MagickTrue);
339 p=pattern+strlen(pattern)-1;
340 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
341 {
342 ExceptionInfo
343 *exception;
344
345 ImageInfo
346 *image_info;
347
348 /*
349 Determine if pattern is a scene, i.e. img0001.pcd[2].
350 */
351 image_info=AcquireImageInfo();
352 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
353 exception=AcquireExceptionInfo();
cristyd965a422010-03-03 17:47:35 +0000354 (void) SetImageInfo(image_info,0,exception);
cristy3ed852e2009-09-05 21:47:34 +0000355 exception=DestroyExceptionInfo(exception);
356 if (LocaleCompare(image_info->filename,pattern) != 0)
357 {
358 image_info=DestroyImageInfo(image_info);
359 return(MagickFalse);
360 }
361 image_info=DestroyImageInfo(image_info);
362 }
363 /*
364 Evaluate glob expression.
365 */
366 done=MagickFalse;
367 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
368 {
369 if (GetUTFCode(expression) == 0)
370 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
371 break;
372 switch (GetUTFCode(pattern))
373 {
cristy3ed852e2009-09-05 21:47:34 +0000374 case '*':
375 {
376 MagickBooleanType
377 status;
378
379 status=MagickFalse;
380 pattern+=GetUTFOctets(pattern);
381 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
382 {
383 status=GlobExpression(expression,pattern,case_insensitive);
384 expression+=GetUTFOctets(expression);
385 }
386 if (status != MagickFalse)
387 {
388 while (GetUTFCode(expression) != 0)
389 expression+=GetUTFOctets(expression);
390 while (GetUTFCode(pattern) != 0)
391 pattern+=GetUTFOctets(pattern);
392 }
393 break;
394 }
395 case '[':
396 {
cristy55a91cd2010-12-01 00:57:40 +0000397 int
cristy3ed852e2009-09-05 21:47:34 +0000398 c;
399
400 pattern+=GetUTFOctets(pattern);
401 for ( ; ; )
402 {
403 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
404 {
405 done=MagickTrue;
406 break;
407 }
408 if (GetUTFCode(pattern) == '\\')
409 {
410 pattern+=GetUTFOctets(pattern);
411 if (GetUTFCode(pattern) == 0)
412 {
413 done=MagickTrue;
414 break;
415 }
416 }
417 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
418 {
419 c=GetUTFCode(pattern);
420 pattern+=GetUTFOctets(pattern);
421 pattern+=GetUTFOctets(pattern);
422 if (GetUTFCode(pattern) == ']')
423 {
424 done=MagickTrue;
425 break;
426 }
427 if (GetUTFCode(pattern) == '\\')
428 {
429 pattern+=GetUTFOctets(pattern);
430 if (GetUTFCode(pattern) == 0)
431 {
432 done=MagickTrue;
433 break;
434 }
435 }
436 if ((GetUTFCode(expression) < c) ||
437 (GetUTFCode(expression) > GetUTFCode(pattern)))
438 {
439 pattern+=GetUTFOctets(pattern);
440 continue;
441 }
442 }
443 else
444 if (GetUTFCode(pattern) != GetUTFCode(expression))
445 {
446 pattern+=GetUTFOctets(pattern);
447 continue;
448 }
449 pattern+=GetUTFOctets(pattern);
450 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
451 {
452 if ((GetUTFCode(pattern) == '\\') &&
453 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
454 pattern+=GetUTFOctets(pattern);
455 pattern+=GetUTFOctets(pattern);
456 }
457 if (GetUTFCode(pattern) != 0)
458 {
459 pattern+=GetUTFOctets(pattern);
460 expression+=GetUTFOctets(expression);
461 }
462 break;
463 }
464 break;
465 }
466 case '?':
467 {
468 pattern+=GetUTFOctets(pattern);
469 expression+=GetUTFOctets(expression);
470 break;
471 }
472 case '{':
473 {
474 register const char
475 *p;
476
477 pattern+=GetUTFOctets(pattern);
478 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
479 {
480 p=expression;
481 match=MagickTrue;
482 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
483 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
484 (match != MagickFalse))
485 {
486 if (GetUTFCode(pattern) == '\\')
487 pattern+=GetUTFOctets(pattern);
488 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
489 MagickFalse;
490 p+=GetUTFOctets(p);
491 pattern+=GetUTFOctets(pattern);
492 }
493 if (GetUTFCode(pattern) == 0)
494 {
495 match=MagickFalse;
496 done=MagickTrue;
497 break;
498 }
499 else
500 if (match != MagickFalse)
501 {
502 expression=p;
503 while ((GetUTFCode(pattern) != '}') &&
504 (GetUTFCode(pattern) != 0))
505 {
506 pattern+=GetUTFOctets(pattern);
507 if (GetUTFCode(pattern) == '\\')
508 {
509 pattern+=GetUTFOctets(pattern);
510 if (GetUTFCode(pattern) == '}')
511 pattern+=GetUTFOctets(pattern);
512 }
513 }
514 }
515 else
516 {
517 while ((GetUTFCode(pattern) != '}') &&
518 (GetUTFCode(pattern) != ',') &&
519 (GetUTFCode(pattern) != 0))
520 {
521 pattern+=GetUTFOctets(pattern);
522 if (GetUTFCode(pattern) == '\\')
523 {
524 pattern+=GetUTFOctets(pattern);
525 if ((GetUTFCode(pattern) == '}') ||
526 (GetUTFCode(pattern) == ','))
527 pattern+=GetUTFOctets(pattern);
528 }
529 }
530 }
531 if (GetUTFCode(pattern) != 0)
532 pattern+=GetUTFOctets(pattern);
533 }
534 break;
535 }
cristyecbe37f2010-04-22 13:50:04 +0000536 case '\\':
537 {
538 pattern+=GetUTFOctets(pattern);
cristy4705fe82010-04-23 16:20:03 +0000539 if (GetUTFCode(pattern) == 0)
540 break;
cristyecbe37f2010-04-22 13:50:04 +0000541 }
cristy3ed852e2009-09-05 21:47:34 +0000542 default:
543 {
544 if (case_insensitive != MagickFalse)
545 {
546 if (tolower((int) GetUTFCode(expression)) !=
547 tolower((int) GetUTFCode(pattern)))
548 {
549 done=MagickTrue;
550 break;
551 }
552 }
553 else
554 if (GetUTFCode(expression) != GetUTFCode(pattern))
555 {
556 done=MagickTrue;
557 break;
558 }
559 expression+=GetUTFOctets(expression);
560 pattern+=GetUTFOctets(pattern);
561 }
562 }
563 }
564 while (GetUTFCode(pattern) == '*')
565 pattern+=GetUTFOctets(pattern);
566 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
567 MagickTrue : MagickFalse;
568 return(match);
569}
570
571/*
572%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
573% %
574% %
575% %
576+ I s G l o b %
577% %
578% %
579% %
580%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
581%
582% IsGlob() returns MagickTrue if the path specification contains a globbing
583% pattern.
584%
585% The format of the IsGlob method is:
586%
587% MagickBooleanType IsGlob(const char *geometry)
588%
589% A description of each parameter follows:
590%
591% o path: the path.
592%
593*/
cristy7832dc22011-09-05 01:21:53 +0000594MagickPrivate MagickBooleanType IsGlob(const char *path)
cristy3ed852e2009-09-05 21:47:34 +0000595{
596 MagickBooleanType
597 status;
598
599 if (IsPathAccessible(path) != MagickFalse)
600 return(MagickFalse);
601 status=(strchr(path,'*') != (char *) NULL) ||
602 (strchr(path,'?') != (char *) NULL) ||
603 (strchr(path,'{') != (char *) NULL) ||
604 (strchr(path,'}') != (char *) NULL) ||
605 (strchr(path,'[') != (char *) NULL) ||
606 (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
607 return(status);
608}
609
610/*
611%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
612% %
613% %
614% %
615% T o k e n i z e r %
616% %
617% %
618% %
619%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
620%
621% Tokenizer() is a generalized, finite state token parser. It extracts tokens
622% one at a time from a string of characters. The characters used for white
623% space, for break characters, and for quotes can be specified. Also,
624% characters in the string can be preceded by a specifiable escape character
625% which removes any special meaning the character may have.
626%
627% Here is some terminology:
628%
629% o token: A single unit of information in the form of a group of
630% characters.
631%
632% o white space: Apace that gets ignored (except within quotes or when
633% escaped), like blanks and tabs. in addition, white space terminates a
634% non-quoted token.
635%
636% o break set: One or more characters that separates non-quoted tokens.
637% Commas are a common break character. The usage of break characters to
638% signal the end of a token is the same as that of white space, except
639% multiple break characters with nothing or only white space between
640% generate a null token for each two break characters together.
641%
642% For example, if blank is set to be the white space and comma is set to
643% be the break character, the line
644%
645% A, B, C , , DEF
646%
647% ... consists of 5 tokens:
648%
649% 1) "A"
650% 2) "B"
651% 3) "C"
652% 4) "" (the null string)
653% 5) "DEF"
654%
655% o Quote character: A character that, when surrounding a group of other
656% characters, causes the group of characters to be treated as a single
657% token, no matter how many white spaces or break characters exist in
658% the group. Also, a token always terminates after the closing quote.
659% For example, if ' is the quote character, blank is white space, and
660% comma is the break character, the following string
661%
662% A, ' B, CD'EF GHI
663%
664% ... consists of 4 tokens:
665%
666% 1) "A"
667% 2) " B, CD" (note the blanks & comma)
668% 3) "EF"
669% 4) "GHI"
670%
671% The quote characters themselves do not appear in the resultant
672% tokens. The double quotes are delimiters i use here for
673% documentation purposes only.
674%
675% o Escape character: A character which itself is ignored but which
676% causes the next character to be used as is. ^ and \ are often used
677% as escape characters. An escape in the last position of the string
678% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
679% and non-escape) character. For example, assume white space, break
680% character, and quote are the same as in the above examples, and
681% further, assume that ^ is the escape character. Then, in the string
682%
683% ABC, ' DEF ^' GH' I ^ J K^ L ^
684%
685% ... there are 7 tokens:
686%
687% 1) "ABC"
688% 2) " DEF ' GH"
689% 3) "I"
690% 4) " " (a lone blank)
691% 5) "J"
692% 6) "K L"
693% 7) "^" (passed as is at end of line)
694%
695% The format of the Tokenizer method is:
696%
697% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
698% const size_t max_token_length,const char *line,const char *white,
699% const char *break_set,const char *quote,const char escape,
700% char *breaker,int *next,char *quoted)
701%
702% A description of each parameter follows:
703%
704% o flag: right now, only the low order 3 bits are used.
705%
706% 1 => convert non-quoted tokens to upper case
707% 2 => convert non-quoted tokens to lower case
708% 0 => do not convert non-quoted tokens
709%
710% o token: a character string containing the returned next token
711%
712% o max_token_length: the maximum size of "token". Characters beyond
713% "max_token_length" are truncated.
714%
715% o string: the string to be parsed.
716%
717% o white: a string of the valid white spaces. example:
718%
719% char whitesp[]={" \t"};
720%
721% blank and tab will be valid white space.
722%
723% o break: a string of the valid break characters. example:
724%
725% char breakch[]={";,"};
726%
727% semicolon and comma will be valid break characters.
728%
729% o quote: a string of the valid quote characters. An example would be
730%
731% char whitesp[]={"'\"");
732%
733% (this causes single and double quotes to be valid) Note that a
734% token starting with one of these characters needs the same quote
735% character to terminate it.
736%
737% for example:
738%
739% "ABC '
740%
741% is unterminated, but
742%
743% "DEF" and 'GHI'
744%
745% are properly terminated. Note that different quote characters
746% can appear on the same line; only for a given token do the quote
747% characters have to be the same.
748%
749% o escape: the escape character (NOT a string ... only one
750% allowed). Use zero if none is desired.
751%
752% o breaker: the break character used to terminate the current
753% token. If the token was quoted, this will be the quote used. If
754% the token is the last one on the line, this will be zero.
755%
756% o next: this variable points to the first character of the
757% next token. it gets reset by "tokenizer" as it steps through the
758% string. Set it to 0 upon initialization, and leave it alone
759% after that. You can change it if you want to jump around in the
760% string or re-parse from the beginning, but be careful.
761%
762% o quoted: set to True if the token was quoted and MagickFalse
763% if not. You may need this information (for example: in C, a
764% string with quotes around it is a character string, while one
765% without is an identifier).
766%
767% o result: 0 if we haven't reached EOS (end of string), and 1
768% if we have.
769%
770*/
771
772#define IN_WHITE 0
773#define IN_TOKEN 1
774#define IN_QUOTE 2
775#define IN_OZONE 3
776
cristybb503372010-05-27 20:51:26 +0000777static ssize_t sindex(int c,const char *string)
cristy3ed852e2009-09-05 21:47:34 +0000778{
779 register const char
780 *p;
781
782 for (p=string; *p != '\0'; p++)
783 if (c == (int) (*p))
cristycee97112010-05-28 00:44:52 +0000784 return((ssize_t) (p-string));
cristy3ed852e2009-09-05 21:47:34 +0000785 return(-1);
786}
787
788static void StoreToken(TokenInfo *token_info,char *string,
789 size_t max_token_length,int c)
790{
cristybb503372010-05-27 20:51:26 +0000791 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000792 i;
793
794 if ((token_info->offset < 0) ||
795 ((size_t) token_info->offset >= (max_token_length-1)))
796 return;
797 i=token_info->offset++;
798 string[i]=(char) c;
799 if (token_info->state == IN_QUOTE)
800 return;
801 switch (token_info->flag & 0x03)
802 {
803 case 1:
804 {
805 string[i]=(char) toupper(c);
806 break;
807 }
808 case 2:
809 {
810 string[i]=(char) tolower(c);
811 break;
812 }
813 default:
814 break;
815 }
816}
817
818MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
819 char *token,const size_t max_token_length,const char *line,const char *white,
820 const char *break_set,const char *quote,const char escape,char *breaker,
821 int *next,char *quoted)
822{
823 int
824 c;
825
cristybb503372010-05-27 20:51:26 +0000826 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000827 i;
828
829 *breaker='\0';
830 *quoted='\0';
831 if (line[*next] == '\0')
832 return(1);
833 token_info->state=IN_WHITE;
834 token_info->quote=(char) MagickFalse;
835 token_info->flag=flag;
836 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
837 {
838 c=(int) line[*next];
839 i=sindex(c,break_set);
840 if (i >= 0)
841 {
842 switch (token_info->state)
843 {
844 case IN_WHITE:
845 case IN_TOKEN:
846 case IN_OZONE:
847 {
848 (*next)++;
849 *breaker=break_set[i];
850 token[token_info->offset]='\0';
851 return(0);
852 }
853 case IN_QUOTE:
854 {
855 StoreToken(token_info,token,max_token_length,c);
856 break;
857 }
858 }
859 continue;
860 }
861 i=sindex(c,quote);
862 if (i >= 0)
863 {
864 switch (token_info->state)
865 {
866 case IN_WHITE:
867 {
868 token_info->state=IN_QUOTE;
869 token_info->quote=quote[i];
870 *quoted=(char) MagickTrue;
871 break;
872 }
873 case IN_QUOTE:
874 {
875 if (quote[i] != token_info->quote)
876 StoreToken(token_info,token,max_token_length,c);
877 else
878 {
879 token_info->state=IN_OZONE;
880 token_info->quote='\0';
881 }
882 break;
883 }
884 case IN_TOKEN:
885 case IN_OZONE:
886 {
887 *breaker=(char) c;
888 token[token_info->offset]='\0';
889 return(0);
890 }
891 }
892 continue;
893 }
894 i=sindex(c,white);
895 if (i >= 0)
896 {
897 switch (token_info->state)
898 {
899 case IN_WHITE:
900 case IN_OZONE:
901 break;
902 case IN_TOKEN:
903 {
904 token_info->state=IN_OZONE;
905 break;
906 }
907 case IN_QUOTE:
908 {
909 StoreToken(token_info,token,max_token_length,c);
910 break;
911 }
912 }
913 continue;
914 }
915 if (c == (int) escape)
916 {
917 if (line[(*next)+1] == '\0')
918 {
919 *breaker='\0';
920 StoreToken(token_info,token,max_token_length,c);
921 (*next)++;
922 token[token_info->offset]='\0';
923 return(0);
924 }
925 switch (token_info->state)
926 {
927 case IN_WHITE:
928 {
929 (*next)--;
930 token_info->state=IN_TOKEN;
931 break;
932 }
933 case IN_TOKEN:
934 case IN_QUOTE:
935 {
936 (*next)++;
937 c=(int) line[*next];
938 StoreToken(token_info,token,max_token_length,c);
939 break;
940 }
941 case IN_OZONE:
942 {
943 token[token_info->offset]='\0';
944 return(0);
945 }
946 }
947 continue;
948 }
949 switch (token_info->state)
950 {
951 case IN_WHITE:
952 token_info->state=IN_TOKEN;
953 case IN_TOKEN:
954 case IN_QUOTE:
955 {
956 StoreToken(token_info,token,max_token_length,c);
957 break;
958 }
959 case IN_OZONE:
960 {
961 token[token_info->offset]='\0';
962 return(0);
963 }
964 }
965 }
966 token[token_info->offset]='\0';
967 return(0);
968}