blob: 79774a3a3c310826210dc2763b48fdb025f38c0f [file] [log] [blame]
cristy3ed852e2009-09-05 21:47:34 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% John Cristy %
17% January 1993 %
18% %
19% %
cristy16af1cb2009-12-11 21:38:29 +000020% Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization %
cristy3ed852e2009-09-05 21:47:34 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
43#include "magick/studio.h"
44#include "magick/exception.h"
45#include "magick/exception-private.h"
46#include "magick/image.h"
47#include "magick/memory_.h"
48#include "magick/string_.h"
49#include "magick/token.h"
50#include "magick/token-private.h"
51#include "magick/utility.h"
52
53/*
54 Typedef declaractions.
55*/
56struct _TokenInfo
57{
58 int
59 state;
60
61 MagickStatusType
62 flag;
63
cristybb503372010-05-27 20:51:26 +000064 ssize_t
cristy3ed852e2009-09-05 21:47:34 +000065 offset;
66
67 char
68 quote;
69
cristybb503372010-05-27 20:51:26 +000070 size_t
cristy3ed852e2009-09-05 21:47:34 +000071 signature;
72};
73
74/*
75%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
76% %
77% %
78% %
79% A c q u i r e T o k e n I n f o %
80% %
81% %
82% %
83%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84%
85% AcquireTokenInfo() allocates the TokenInfo structure.
86%
87% The format of the AcquireTokenInfo method is:
88%
89% TokenInfo *AcquireTokenInfo()
90%
91*/
92MagickExport TokenInfo *AcquireTokenInfo(void)
93{
94 TokenInfo
95 *token_info;
96
cristy90823212009-12-12 20:48:33 +000097 token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
cristy3ed852e2009-09-05 21:47:34 +000098 if (token_info == (TokenInfo *) NULL)
99 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
100 token_info->signature=MagickSignature;
101 return(token_info);
102}
103
104/*
105%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106% %
107% %
108% %
109% D e s t r o y T o k e n I n f o %
110% %
111% %
112% %
113%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
114%
115% DestroyTokenInfo() deallocates memory associated with an TokenInfo
116% structure.
117%
118% The format of the DestroyTokenInfo method is:
119%
120% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
121%
122% A description of each parameter follows:
123%
124% o token_info: Specifies a pointer to an TokenInfo structure.
125%
126*/
127MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
128{
129 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
130 assert(token_info != (TokenInfo *) NULL);
131 assert(token_info->signature == MagickSignature);
132 token_info->signature=(~MagickSignature);
133 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
134 return(token_info);
135}
136
137/*
138%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
139% %
140% %
141% %
142+ G e t M a g i c k T o k e n %
143% %
144% %
145% %
146%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
147%
148% GetMagickToken() gets a token from the token stream. A token is defined as a
149% sequence of characters delimited by whitespace (e.g. clip-path), a sequence
150% delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristydd8327f2010-05-12 12:39:46 +0000151% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
152% separator characters: ':', '=', ',', and ';'.
cristy3ed852e2009-09-05 21:47:34 +0000153%
154% The format of the GetMagickToken method is:
155%
156% void GetMagickToken(const char *start,const char **end,char *token)
157%
158% A description of each parameter follows:
159%
160% o start: the start of the token sequence.
161%
162% o end: point to the end of the token sequence.
163%
164% o token: copy the token to this buffer.
165%
166*/
167MagickExport void GetMagickToken(const char *start,const char **end,char *token)
168{
169 double
170 value;
171
172 register const char
173 *p;
174
cristybb503372010-05-27 20:51:26 +0000175 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000176 i;
177
178 i=0;
179 for (p=start; *p != '\0'; )
180 {
181 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
182 p++;
183 if (*p == '\0')
184 break;
185 switch (*p)
186 {
187 case '"':
188 case '\'':
189 case '`':
190 case '{':
191 {
192 register char
193 escape;
194
195 switch (*p)
196 {
197 case '"': escape='"'; break;
198 case '\'': escape='\''; break;
199 case '`': escape='\''; break;
200 case '{': escape='}'; break;
201 default: escape=(*p); break;
202 }
203 for (p++; *p != '\0'; p++)
204 {
205 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
206 p++;
207 else
208 if (*p == escape)
209 {
210 p++;
211 break;
212 }
213 token[i++]=(*p);
214 }
215 break;
216 }
217 case '/':
218 {
219 token[i++]=(*p++);
220 if ((*p == '>') || (*p == '/'))
221 token[i++]=(*p++);
222 break;
223 }
224 default:
225 {
226 char
227 *q;
228
229 value=strtod(p,&q);
230 if ((p != q) && (*p != ','))
231 {
232 for ( ; (p < q) && (*p != ','); p++)
233 token[i++]=(*p);
234 if (*p == '%')
235 token[i++]=(*p++);
236 break;
237 }
238 if ((isalpha((int) ((unsigned char) *p)) == 0) &&
239 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
240 {
241 token[i++]=(*p++);
242 break;
243 }
244 for ( ; *p != '\0'; p++)
245 {
246 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
cristydd8327f2010-05-12 12:39:46 +0000247 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
cristy3ed852e2009-09-05 21:47:34 +0000248 break;
249 if ((i > 0) && (*p == '<'))
250 break;
251 token[i++]=(*p);
252 if (*p == '>')
253 break;
254 if (*p == '(')
255 for (p++; *p != '\0'; p++)
256 {
257 token[i++]=(*p);
258 if ((*p == ')') && (*(p-1) != '\\'))
259 break;
260 }
261 }
262 break;
263 }
264 }
265 break;
266 }
267 token[i]='\0';
268 if (LocaleNCompare(token,"url(",4) == 0)
269 {
270 ssize_t
271 offset;
272
273 offset=4;
274 if (token[offset] == '#')
275 offset++;
cristybb503372010-05-27 20:51:26 +0000276 i=(ssize_t) strlen(token);
cristy3ed852e2009-09-05 21:47:34 +0000277 (void) CopyMagickString(token,token+offset,MaxTextExtent);
278 token[i-offset-1]='\0';
279 }
280 while (isspace((int) ((unsigned char) *p)) != 0)
281 p++;
282 if (end != (const char **) NULL)
283 *end=(const char *) p;
284}
285
286/*
287%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
288% %
289% %
290% %
291% G l o b E x p r e s s i o n %
292% %
293% %
294% %
295%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
296%
297% GlobExpression() returns MagickTrue if the expression matches the pattern.
298%
299% The format of the GlobExpression function is:
300%
301% MagickBooleanType GlobExpression(const char *expression,
302% const char *pattern,const MagickBooleanType case_insensitive)
303%
304% A description of each parameter follows:
305%
306% o expression: Specifies a pointer to a text string containing a file name.
307%
308% o pattern: Specifies a pointer to a text string containing a pattern.
309%
310% o case_insensitive: set to MagickTrue to ignore the case when matching
311% an expression.
312%
313*/
314MagickExport MagickBooleanType GlobExpression(const char *expression,
315 const char *pattern,const MagickBooleanType case_insensitive)
316{
317 MagickBooleanType
318 done,
319 match;
320
321 register const char
322 *p;
323
324 /*
325 Return on empty pattern or '*'.
326 */
327 if (pattern == (char *) NULL)
328 return(MagickTrue);
329 if (GetUTFCode(pattern) == 0)
330 return(MagickTrue);
331 if (LocaleCompare(pattern,"*") == 0)
332 return(MagickTrue);
333 p=pattern+strlen(pattern)-1;
334 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
335 {
336 ExceptionInfo
337 *exception;
338
339 ImageInfo
340 *image_info;
341
342 /*
343 Determine if pattern is a scene, i.e. img0001.pcd[2].
344 */
345 image_info=AcquireImageInfo();
346 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
347 exception=AcquireExceptionInfo();
cristyd965a422010-03-03 17:47:35 +0000348 (void) SetImageInfo(image_info,0,exception);
cristy3ed852e2009-09-05 21:47:34 +0000349 exception=DestroyExceptionInfo(exception);
350 if (LocaleCompare(image_info->filename,pattern) != 0)
351 {
352 image_info=DestroyImageInfo(image_info);
353 return(MagickFalse);
354 }
355 image_info=DestroyImageInfo(image_info);
356 }
357 /*
358 Evaluate glob expression.
359 */
360 done=MagickFalse;
361 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
362 {
363 if (GetUTFCode(expression) == 0)
364 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
365 break;
366 switch (GetUTFCode(pattern))
367 {
cristy3ed852e2009-09-05 21:47:34 +0000368 case '*':
369 {
370 MagickBooleanType
371 status;
372
373 status=MagickFalse;
374 pattern+=GetUTFOctets(pattern);
375 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
376 {
377 status=GlobExpression(expression,pattern,case_insensitive);
378 expression+=GetUTFOctets(expression);
379 }
380 if (status != MagickFalse)
381 {
382 while (GetUTFCode(expression) != 0)
383 expression+=GetUTFOctets(expression);
384 while (GetUTFCode(pattern) != 0)
385 pattern+=GetUTFOctets(pattern);
386 }
387 break;
388 }
389 case '[':
390 {
cristybb503372010-05-27 20:51:26 +0000391 ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000392 c;
393
394 pattern+=GetUTFOctets(pattern);
395 for ( ; ; )
396 {
397 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
398 {
399 done=MagickTrue;
400 break;
401 }
402 if (GetUTFCode(pattern) == '\\')
403 {
404 pattern+=GetUTFOctets(pattern);
405 if (GetUTFCode(pattern) == 0)
406 {
407 done=MagickTrue;
408 break;
409 }
410 }
411 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
412 {
413 c=GetUTFCode(pattern);
414 pattern+=GetUTFOctets(pattern);
415 pattern+=GetUTFOctets(pattern);
416 if (GetUTFCode(pattern) == ']')
417 {
418 done=MagickTrue;
419 break;
420 }
421 if (GetUTFCode(pattern) == '\\')
422 {
423 pattern+=GetUTFOctets(pattern);
424 if (GetUTFCode(pattern) == 0)
425 {
426 done=MagickTrue;
427 break;
428 }
429 }
430 if ((GetUTFCode(expression) < c) ||
431 (GetUTFCode(expression) > GetUTFCode(pattern)))
432 {
433 pattern+=GetUTFOctets(pattern);
434 continue;
435 }
436 }
437 else
438 if (GetUTFCode(pattern) != GetUTFCode(expression))
439 {
440 pattern+=GetUTFOctets(pattern);
441 continue;
442 }
443 pattern+=GetUTFOctets(pattern);
444 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
445 {
446 if ((GetUTFCode(pattern) == '\\') &&
447 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
448 pattern+=GetUTFOctets(pattern);
449 pattern+=GetUTFOctets(pattern);
450 }
451 if (GetUTFCode(pattern) != 0)
452 {
453 pattern+=GetUTFOctets(pattern);
454 expression+=GetUTFOctets(expression);
455 }
456 break;
457 }
458 break;
459 }
460 case '?':
461 {
462 pattern+=GetUTFOctets(pattern);
463 expression+=GetUTFOctets(expression);
464 break;
465 }
466 case '{':
467 {
468 register const char
469 *p;
470
471 pattern+=GetUTFOctets(pattern);
472 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
473 {
474 p=expression;
475 match=MagickTrue;
476 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
477 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
478 (match != MagickFalse))
479 {
480 if (GetUTFCode(pattern) == '\\')
481 pattern+=GetUTFOctets(pattern);
482 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
483 MagickFalse;
484 p+=GetUTFOctets(p);
485 pattern+=GetUTFOctets(pattern);
486 }
487 if (GetUTFCode(pattern) == 0)
488 {
489 match=MagickFalse;
490 done=MagickTrue;
491 break;
492 }
493 else
494 if (match != MagickFalse)
495 {
496 expression=p;
497 while ((GetUTFCode(pattern) != '}') &&
498 (GetUTFCode(pattern) != 0))
499 {
500 pattern+=GetUTFOctets(pattern);
501 if (GetUTFCode(pattern) == '\\')
502 {
503 pattern+=GetUTFOctets(pattern);
504 if (GetUTFCode(pattern) == '}')
505 pattern+=GetUTFOctets(pattern);
506 }
507 }
508 }
509 else
510 {
511 while ((GetUTFCode(pattern) != '}') &&
512 (GetUTFCode(pattern) != ',') &&
513 (GetUTFCode(pattern) != 0))
514 {
515 pattern+=GetUTFOctets(pattern);
516 if (GetUTFCode(pattern) == '\\')
517 {
518 pattern+=GetUTFOctets(pattern);
519 if ((GetUTFCode(pattern) == '}') ||
520 (GetUTFCode(pattern) == ','))
521 pattern+=GetUTFOctets(pattern);
522 }
523 }
524 }
525 if (GetUTFCode(pattern) != 0)
526 pattern+=GetUTFOctets(pattern);
527 }
528 break;
529 }
cristyecbe37f2010-04-22 13:50:04 +0000530 case '\\':
531 {
532 pattern+=GetUTFOctets(pattern);
cristy4705fe82010-04-23 16:20:03 +0000533 if (GetUTFCode(pattern) == 0)
534 break;
cristyecbe37f2010-04-22 13:50:04 +0000535 }
cristy3ed852e2009-09-05 21:47:34 +0000536 default:
537 {
538 if (case_insensitive != MagickFalse)
539 {
540 if (tolower((int) GetUTFCode(expression)) !=
541 tolower((int) GetUTFCode(pattern)))
542 {
543 done=MagickTrue;
544 break;
545 }
546 }
547 else
548 if (GetUTFCode(expression) != GetUTFCode(pattern))
549 {
550 done=MagickTrue;
551 break;
552 }
553 expression+=GetUTFOctets(expression);
554 pattern+=GetUTFOctets(pattern);
555 }
556 }
557 }
558 while (GetUTFCode(pattern) == '*')
559 pattern+=GetUTFOctets(pattern);
560 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
561 MagickTrue : MagickFalse;
562 return(match);
563}
564
565/*
566%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
567% %
568% %
569% %
570+ I s G l o b %
571% %
572% %
573% %
574%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
575%
576% IsGlob() returns MagickTrue if the path specification contains a globbing
577% pattern.
578%
579% The format of the IsGlob method is:
580%
581% MagickBooleanType IsGlob(const char *geometry)
582%
583% A description of each parameter follows:
584%
585% o path: the path.
586%
587*/
588MagickExport MagickBooleanType IsGlob(const char *path)
589{
590 MagickBooleanType
591 status;
592
593 if (IsPathAccessible(path) != MagickFalse)
594 return(MagickFalse);
595 status=(strchr(path,'*') != (char *) NULL) ||
596 (strchr(path,'?') != (char *) NULL) ||
597 (strchr(path,'{') != (char *) NULL) ||
598 (strchr(path,'}') != (char *) NULL) ||
599 (strchr(path,'[') != (char *) NULL) ||
600 (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
601 return(status);
602}
603
604/*
605%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
606% %
607% %
608% %
609% T o k e n i z e r %
610% %
611% %
612% %
613%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
614%
615% Tokenizer() is a generalized, finite state token parser. It extracts tokens
616% one at a time from a string of characters. The characters used for white
617% space, for break characters, and for quotes can be specified. Also,
618% characters in the string can be preceded by a specifiable escape character
619% which removes any special meaning the character may have.
620%
621% Here is some terminology:
622%
623% o token: A single unit of information in the form of a group of
624% characters.
625%
626% o white space: Apace that gets ignored (except within quotes or when
627% escaped), like blanks and tabs. in addition, white space terminates a
628% non-quoted token.
629%
630% o break set: One or more characters that separates non-quoted tokens.
631% Commas are a common break character. The usage of break characters to
632% signal the end of a token is the same as that of white space, except
633% multiple break characters with nothing or only white space between
634% generate a null token for each two break characters together.
635%
636% For example, if blank is set to be the white space and comma is set to
637% be the break character, the line
638%
639% A, B, C , , DEF
640%
641% ... consists of 5 tokens:
642%
643% 1) "A"
644% 2) "B"
645% 3) "C"
646% 4) "" (the null string)
647% 5) "DEF"
648%
649% o Quote character: A character that, when surrounding a group of other
650% characters, causes the group of characters to be treated as a single
651% token, no matter how many white spaces or break characters exist in
652% the group. Also, a token always terminates after the closing quote.
653% For example, if ' is the quote character, blank is white space, and
654% comma is the break character, the following string
655%
656% A, ' B, CD'EF GHI
657%
658% ... consists of 4 tokens:
659%
660% 1) "A"
661% 2) " B, CD" (note the blanks & comma)
662% 3) "EF"
663% 4) "GHI"
664%
665% The quote characters themselves do not appear in the resultant
666% tokens. The double quotes are delimiters i use here for
667% documentation purposes only.
668%
669% o Escape character: A character which itself is ignored but which
670% causes the next character to be used as is. ^ and \ are often used
671% as escape characters. An escape in the last position of the string
672% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
673% and non-escape) character. For example, assume white space, break
674% character, and quote are the same as in the above examples, and
675% further, assume that ^ is the escape character. Then, in the string
676%
677% ABC, ' DEF ^' GH' I ^ J K^ L ^
678%
679% ... there are 7 tokens:
680%
681% 1) "ABC"
682% 2) " DEF ' GH"
683% 3) "I"
684% 4) " " (a lone blank)
685% 5) "J"
686% 6) "K L"
687% 7) "^" (passed as is at end of line)
688%
689% The format of the Tokenizer method is:
690%
691% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
692% const size_t max_token_length,const char *line,const char *white,
693% const char *break_set,const char *quote,const char escape,
694% char *breaker,int *next,char *quoted)
695%
696% A description of each parameter follows:
697%
698% o flag: right now, only the low order 3 bits are used.
699%
700% 1 => convert non-quoted tokens to upper case
701% 2 => convert non-quoted tokens to lower case
702% 0 => do not convert non-quoted tokens
703%
704% o token: a character string containing the returned next token
705%
706% o max_token_length: the maximum size of "token". Characters beyond
707% "max_token_length" are truncated.
708%
709% o string: the string to be parsed.
710%
711% o white: a string of the valid white spaces. example:
712%
713% char whitesp[]={" \t"};
714%
715% blank and tab will be valid white space.
716%
717% o break: a string of the valid break characters. example:
718%
719% char breakch[]={";,"};
720%
721% semicolon and comma will be valid break characters.
722%
723% o quote: a string of the valid quote characters. An example would be
724%
725% char whitesp[]={"'\"");
726%
727% (this causes single and double quotes to be valid) Note that a
728% token starting with one of these characters needs the same quote
729% character to terminate it.
730%
731% for example:
732%
733% "ABC '
734%
735% is unterminated, but
736%
737% "DEF" and 'GHI'
738%
739% are properly terminated. Note that different quote characters
740% can appear on the same line; only for a given token do the quote
741% characters have to be the same.
742%
743% o escape: the escape character (NOT a string ... only one
744% allowed). Use zero if none is desired.
745%
746% o breaker: the break character used to terminate the current
747% token. If the token was quoted, this will be the quote used. If
748% the token is the last one on the line, this will be zero.
749%
750% o next: this variable points to the first character of the
751% next token. it gets reset by "tokenizer" as it steps through the
752% string. Set it to 0 upon initialization, and leave it alone
753% after that. You can change it if you want to jump around in the
754% string or re-parse from the beginning, but be careful.
755%
756% o quoted: set to True if the token was quoted and MagickFalse
757% if not. You may need this information (for example: in C, a
758% string with quotes around it is a character string, while one
759% without is an identifier).
760%
761% o result: 0 if we haven't reached EOS (end of string), and 1
762% if we have.
763%
764*/
765
766#define IN_WHITE 0
767#define IN_TOKEN 1
768#define IN_QUOTE 2
769#define IN_OZONE 3
770
cristybb503372010-05-27 20:51:26 +0000771static ssize_t sindex(int c,const char *string)
cristy3ed852e2009-09-05 21:47:34 +0000772{
773 register const char
774 *p;
775
776 for (p=string; *p != '\0'; p++)
777 if (c == (int) (*p))
cristycee97112010-05-28 00:44:52 +0000778 return((ssize_t) (p-string));
cristy3ed852e2009-09-05 21:47:34 +0000779 return(-1);
780}
781
782static void StoreToken(TokenInfo *token_info,char *string,
783 size_t max_token_length,int c)
784{
cristybb503372010-05-27 20:51:26 +0000785 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000786 i;
787
788 if ((token_info->offset < 0) ||
789 ((size_t) token_info->offset >= (max_token_length-1)))
790 return;
791 i=token_info->offset++;
792 string[i]=(char) c;
793 if (token_info->state == IN_QUOTE)
794 return;
795 switch (token_info->flag & 0x03)
796 {
797 case 1:
798 {
799 string[i]=(char) toupper(c);
800 break;
801 }
802 case 2:
803 {
804 string[i]=(char) tolower(c);
805 break;
806 }
807 default:
808 break;
809 }
810}
811
812MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
813 char *token,const size_t max_token_length,const char *line,const char *white,
814 const char *break_set,const char *quote,const char escape,char *breaker,
815 int *next,char *quoted)
816{
817 int
818 c;
819
cristybb503372010-05-27 20:51:26 +0000820 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000821 i;
822
823 *breaker='\0';
824 *quoted='\0';
825 if (line[*next] == '\0')
826 return(1);
827 token_info->state=IN_WHITE;
828 token_info->quote=(char) MagickFalse;
829 token_info->flag=flag;
830 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
831 {
832 c=(int) line[*next];
833 i=sindex(c,break_set);
834 if (i >= 0)
835 {
836 switch (token_info->state)
837 {
838 case IN_WHITE:
839 case IN_TOKEN:
840 case IN_OZONE:
841 {
842 (*next)++;
843 *breaker=break_set[i];
844 token[token_info->offset]='\0';
845 return(0);
846 }
847 case IN_QUOTE:
848 {
849 StoreToken(token_info,token,max_token_length,c);
850 break;
851 }
852 }
853 continue;
854 }
855 i=sindex(c,quote);
856 if (i >= 0)
857 {
858 switch (token_info->state)
859 {
860 case IN_WHITE:
861 {
862 token_info->state=IN_QUOTE;
863 token_info->quote=quote[i];
864 *quoted=(char) MagickTrue;
865 break;
866 }
867 case IN_QUOTE:
868 {
869 if (quote[i] != token_info->quote)
870 StoreToken(token_info,token,max_token_length,c);
871 else
872 {
873 token_info->state=IN_OZONE;
874 token_info->quote='\0';
875 }
876 break;
877 }
878 case IN_TOKEN:
879 case IN_OZONE:
880 {
881 *breaker=(char) c;
882 token[token_info->offset]='\0';
883 return(0);
884 }
885 }
886 continue;
887 }
888 i=sindex(c,white);
889 if (i >= 0)
890 {
891 switch (token_info->state)
892 {
893 case IN_WHITE:
894 case IN_OZONE:
895 break;
896 case IN_TOKEN:
897 {
898 token_info->state=IN_OZONE;
899 break;
900 }
901 case IN_QUOTE:
902 {
903 StoreToken(token_info,token,max_token_length,c);
904 break;
905 }
906 }
907 continue;
908 }
909 if (c == (int) escape)
910 {
911 if (line[(*next)+1] == '\0')
912 {
913 *breaker='\0';
914 StoreToken(token_info,token,max_token_length,c);
915 (*next)++;
916 token[token_info->offset]='\0';
917 return(0);
918 }
919 switch (token_info->state)
920 {
921 case IN_WHITE:
922 {
923 (*next)--;
924 token_info->state=IN_TOKEN;
925 break;
926 }
927 case IN_TOKEN:
928 case IN_QUOTE:
929 {
930 (*next)++;
931 c=(int) line[*next];
932 StoreToken(token_info,token,max_token_length,c);
933 break;
934 }
935 case IN_OZONE:
936 {
937 token[token_info->offset]='\0';
938 return(0);
939 }
940 }
941 continue;
942 }
943 switch (token_info->state)
944 {
945 case IN_WHITE:
946 token_info->state=IN_TOKEN;
947 case IN_TOKEN:
948 case IN_QUOTE:
949 {
950 StoreToken(token_info,token,max_token_length,c);
951 break;
952 }
953 case IN_OZONE:
954 {
955 token[token_info->offset]='\0';
956 return(0);
957 }
958 }
959 }
960 token[token_info->offset]='\0';
961 return(0);
962}