blob: d5fac16b6dea5dffb6fefcb11d81efafcd7d2e3b [file] [log] [blame]
cristy3ed852e2009-09-05 21:47:34 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% John Cristy %
17% January 1993 %
18% %
19% %
cristy7e41fe82010-12-04 23:12:08 +000020% Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization %
cristy3ed852e2009-09-05 21:47:34 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
43#include "magick/studio.h"
44#include "magick/exception.h"
45#include "magick/exception-private.h"
46#include "magick/image.h"
47#include "magick/memory_.h"
48#include "magick/string_.h"
cristy0df696d2011-05-18 19:55:22 +000049#include "magick/string-private.h"
cristy3ed852e2009-09-05 21:47:34 +000050#include "magick/token.h"
51#include "magick/token-private.h"
52#include "magick/utility.h"
53
54/*
55 Typedef declaractions.
56*/
57struct _TokenInfo
58{
59 int
60 state;
61
62 MagickStatusType
63 flag;
64
cristybb503372010-05-27 20:51:26 +000065 ssize_t
cristy3ed852e2009-09-05 21:47:34 +000066 offset;
67
68 char
69 quote;
70
cristybb503372010-05-27 20:51:26 +000071 size_t
cristy3ed852e2009-09-05 21:47:34 +000072 signature;
73};
74
75/*
76%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
77% %
78% %
79% %
80% A c q u i r e T o k e n I n f o %
81% %
82% %
83% %
84%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
85%
86% AcquireTokenInfo() allocates the TokenInfo structure.
87%
88% The format of the AcquireTokenInfo method is:
89%
90% TokenInfo *AcquireTokenInfo()
91%
92*/
93MagickExport TokenInfo *AcquireTokenInfo(void)
94{
95 TokenInfo
96 *token_info;
97
cristy73bd4a52010-10-05 11:24:23 +000098 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
cristy3ed852e2009-09-05 21:47:34 +000099 if (token_info == (TokenInfo *) NULL)
100 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
101 token_info->signature=MagickSignature;
102 return(token_info);
103}
104
105/*
106%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
107% %
108% %
109% %
110% D e s t r o y T o k e n I n f o %
111% %
112% %
113% %
114%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
115%
116% DestroyTokenInfo() deallocates memory associated with an TokenInfo
117% structure.
118%
119% The format of the DestroyTokenInfo method is:
120%
121% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
122%
123% A description of each parameter follows:
124%
125% o token_info: Specifies a pointer to an TokenInfo structure.
126%
127*/
128MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
129{
130 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
131 assert(token_info != (TokenInfo *) NULL);
132 assert(token_info->signature == MagickSignature);
133 token_info->signature=(~MagickSignature);
134 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
135 return(token_info);
136}
137
138/*
139%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
140% %
141% %
142% %
143+ G e t M a g i c k T o k e n %
144% %
145% %
146% %
147%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148%
cristydf11e552011-04-23 17:18:30 +0000149% GetMagickToken() gets a token from the token stream. A token is defined as
150% a sequence of characters delimited by whitespace (e.g. clip-path), a
151% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristydd8327f2010-05-12 12:39:46 +0000152% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
153% separator characters: ':', '=', ',', and ';'.
cristy3ed852e2009-09-05 21:47:34 +0000154%
155% The format of the GetMagickToken method is:
156%
157% void GetMagickToken(const char *start,const char **end,char *token)
158%
159% A description of each parameter follows:
160%
161% o start: the start of the token sequence.
162%
163% o end: point to the end of the token sequence.
164%
165% o token: copy the token to this buffer.
166%
167*/
168MagickExport void GetMagickToken(const char *start,const char **end,char *token)
169{
170 double
171 value;
172
173 register const char
174 *p;
175
cristybb503372010-05-27 20:51:26 +0000176 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000177 i;
178
cristy32f69122011-04-22 02:26:00 +0000179 assert(start != (const char *) NULL);
180 assert(token != (char *) NULL);
cristy3ed852e2009-09-05 21:47:34 +0000181 i=0;
182 for (p=start; *p != '\0'; )
183 {
184 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
185 p++;
186 if (*p == '\0')
187 break;
188 switch (*p)
189 {
190 case '"':
191 case '\'':
192 case '`':
193 case '{':
194 {
195 register char
196 escape;
197
198 switch (*p)
199 {
200 case '"': escape='"'; break;
201 case '\'': escape='\''; break;
202 case '`': escape='\''; break;
203 case '{': escape='}'; break;
204 default: escape=(*p); break;
205 }
206 for (p++; *p != '\0'; p++)
207 {
208 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
209 p++;
210 else
211 if (*p == escape)
212 {
213 p++;
214 break;
215 }
216 token[i++]=(*p);
217 }
218 break;
219 }
220 case '/':
221 {
222 token[i++]=(*p++);
223 if ((*p == '>') || (*p == '/'))
224 token[i++]=(*p++);
225 break;
226 }
227 default:
228 {
229 char
230 *q;
231
cristyc1acd842011-05-19 23:05:47 +0000232 value=InterpretLocaleValue(p,&q);
cristyda16f162011-02-19 23:52:17 +0000233 (void) value;
cristy3ed852e2009-09-05 21:47:34 +0000234 if ((p != q) && (*p != ','))
235 {
236 for ( ; (p < q) && (*p != ','); p++)
237 token[i++]=(*p);
238 if (*p == '%')
239 token[i++]=(*p++);
240 break;
241 }
cristyc5071682011-04-22 02:06:27 +0000242 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
cristy3ed852e2009-09-05 21:47:34 +0000243 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
244 {
245 token[i++]=(*p++);
246 break;
247 }
248 for ( ; *p != '\0'; p++)
249 {
250 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
cristydd8327f2010-05-12 12:39:46 +0000251 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
cristy3ed852e2009-09-05 21:47:34 +0000252 break;
253 if ((i > 0) && (*p == '<'))
254 break;
255 token[i++]=(*p);
256 if (*p == '>')
257 break;
258 if (*p == '(')
259 for (p++; *p != '\0'; p++)
260 {
261 token[i++]=(*p);
262 if ((*p == ')') && (*(p-1) != '\\'))
263 break;
264 }
265 }
266 break;
267 }
268 }
269 break;
270 }
271 token[i]='\0';
272 if (LocaleNCompare(token,"url(",4) == 0)
273 {
274 ssize_t
275 offset;
276
277 offset=4;
278 if (token[offset] == '#')
279 offset++;
cristybb503372010-05-27 20:51:26 +0000280 i=(ssize_t) strlen(token);
cristy3ed852e2009-09-05 21:47:34 +0000281 (void) CopyMagickString(token,token+offset,MaxTextExtent);
282 token[i-offset-1]='\0';
283 }
284 while (isspace((int) ((unsigned char) *p)) != 0)
285 p++;
286 if (end != (const char **) NULL)
287 *end=(const char *) p;
288}
289
290/*
291%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
292% %
293% %
294% %
295% G l o b E x p r e s s i o n %
296% %
297% %
298% %
299%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
300%
301% GlobExpression() returns MagickTrue if the expression matches the pattern.
302%
303% The format of the GlobExpression function is:
304%
305% MagickBooleanType GlobExpression(const char *expression,
306% const char *pattern,const MagickBooleanType case_insensitive)
307%
308% A description of each parameter follows:
309%
310% o expression: Specifies a pointer to a text string containing a file name.
311%
312% o pattern: Specifies a pointer to a text string containing a pattern.
313%
314% o case_insensitive: set to MagickTrue to ignore the case when matching
315% an expression.
316%
317*/
318MagickExport MagickBooleanType GlobExpression(const char *expression,
319 const char *pattern,const MagickBooleanType case_insensitive)
320{
321 MagickBooleanType
322 done,
323 match;
324
325 register const char
326 *p;
327
328 /*
329 Return on empty pattern or '*'.
330 */
331 if (pattern == (char *) NULL)
332 return(MagickTrue);
333 if (GetUTFCode(pattern) == 0)
334 return(MagickTrue);
335 if (LocaleCompare(pattern,"*") == 0)
336 return(MagickTrue);
337 p=pattern+strlen(pattern)-1;
338 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
339 {
340 ExceptionInfo
341 *exception;
342
343 ImageInfo
344 *image_info;
345
346 /*
347 Determine if pattern is a scene, i.e. img0001.pcd[2].
348 */
349 image_info=AcquireImageInfo();
350 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
351 exception=AcquireExceptionInfo();
cristyd965a422010-03-03 17:47:35 +0000352 (void) SetImageInfo(image_info,0,exception);
cristy3ed852e2009-09-05 21:47:34 +0000353 exception=DestroyExceptionInfo(exception);
354 if (LocaleCompare(image_info->filename,pattern) != 0)
355 {
356 image_info=DestroyImageInfo(image_info);
357 return(MagickFalse);
358 }
359 image_info=DestroyImageInfo(image_info);
360 }
361 /*
362 Evaluate glob expression.
363 */
364 done=MagickFalse;
365 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
366 {
367 if (GetUTFCode(expression) == 0)
368 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
369 break;
370 switch (GetUTFCode(pattern))
371 {
cristy3ed852e2009-09-05 21:47:34 +0000372 case '*':
373 {
374 MagickBooleanType
375 status;
376
377 status=MagickFalse;
378 pattern+=GetUTFOctets(pattern);
379 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
380 {
381 status=GlobExpression(expression,pattern,case_insensitive);
382 expression+=GetUTFOctets(expression);
383 }
384 if (status != MagickFalse)
385 {
386 while (GetUTFCode(expression) != 0)
387 expression+=GetUTFOctets(expression);
388 while (GetUTFCode(pattern) != 0)
389 pattern+=GetUTFOctets(pattern);
390 }
391 break;
392 }
393 case '[':
394 {
cristy55a91cd2010-12-01 00:57:40 +0000395 int
cristy3ed852e2009-09-05 21:47:34 +0000396 c;
397
398 pattern+=GetUTFOctets(pattern);
399 for ( ; ; )
400 {
401 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
402 {
403 done=MagickTrue;
404 break;
405 }
406 if (GetUTFCode(pattern) == '\\')
407 {
408 pattern+=GetUTFOctets(pattern);
409 if (GetUTFCode(pattern) == 0)
410 {
411 done=MagickTrue;
412 break;
413 }
414 }
415 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
416 {
417 c=GetUTFCode(pattern);
418 pattern+=GetUTFOctets(pattern);
419 pattern+=GetUTFOctets(pattern);
420 if (GetUTFCode(pattern) == ']')
421 {
422 done=MagickTrue;
423 break;
424 }
425 if (GetUTFCode(pattern) == '\\')
426 {
427 pattern+=GetUTFOctets(pattern);
428 if (GetUTFCode(pattern) == 0)
429 {
430 done=MagickTrue;
431 break;
432 }
433 }
434 if ((GetUTFCode(expression) < c) ||
435 (GetUTFCode(expression) > GetUTFCode(pattern)))
436 {
437 pattern+=GetUTFOctets(pattern);
438 continue;
439 }
440 }
441 else
442 if (GetUTFCode(pattern) != GetUTFCode(expression))
443 {
444 pattern+=GetUTFOctets(pattern);
445 continue;
446 }
447 pattern+=GetUTFOctets(pattern);
448 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
449 {
450 if ((GetUTFCode(pattern) == '\\') &&
451 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
452 pattern+=GetUTFOctets(pattern);
453 pattern+=GetUTFOctets(pattern);
454 }
455 if (GetUTFCode(pattern) != 0)
456 {
457 pattern+=GetUTFOctets(pattern);
458 expression+=GetUTFOctets(expression);
459 }
460 break;
461 }
462 break;
463 }
464 case '?':
465 {
466 pattern+=GetUTFOctets(pattern);
467 expression+=GetUTFOctets(expression);
468 break;
469 }
470 case '{':
471 {
472 register const char
473 *p;
474
475 pattern+=GetUTFOctets(pattern);
476 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
477 {
478 p=expression;
479 match=MagickTrue;
480 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
481 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
482 (match != MagickFalse))
483 {
484 if (GetUTFCode(pattern) == '\\')
485 pattern+=GetUTFOctets(pattern);
486 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
487 MagickFalse;
488 p+=GetUTFOctets(p);
489 pattern+=GetUTFOctets(pattern);
490 }
491 if (GetUTFCode(pattern) == 0)
492 {
493 match=MagickFalse;
494 done=MagickTrue;
495 break;
496 }
497 else
498 if (match != MagickFalse)
499 {
500 expression=p;
501 while ((GetUTFCode(pattern) != '}') &&
502 (GetUTFCode(pattern) != 0))
503 {
504 pattern+=GetUTFOctets(pattern);
505 if (GetUTFCode(pattern) == '\\')
506 {
507 pattern+=GetUTFOctets(pattern);
508 if (GetUTFCode(pattern) == '}')
509 pattern+=GetUTFOctets(pattern);
510 }
511 }
512 }
513 else
514 {
515 while ((GetUTFCode(pattern) != '}') &&
516 (GetUTFCode(pattern) != ',') &&
517 (GetUTFCode(pattern) != 0))
518 {
519 pattern+=GetUTFOctets(pattern);
520 if (GetUTFCode(pattern) == '\\')
521 {
522 pattern+=GetUTFOctets(pattern);
523 if ((GetUTFCode(pattern) == '}') ||
524 (GetUTFCode(pattern) == ','))
525 pattern+=GetUTFOctets(pattern);
526 }
527 }
528 }
529 if (GetUTFCode(pattern) != 0)
530 pattern+=GetUTFOctets(pattern);
531 }
532 break;
533 }
cristyecbe37f2010-04-22 13:50:04 +0000534 case '\\':
535 {
536 pattern+=GetUTFOctets(pattern);
cristy4705fe82010-04-23 16:20:03 +0000537 if (GetUTFCode(pattern) == 0)
538 break;
cristyecbe37f2010-04-22 13:50:04 +0000539 }
cristy3ed852e2009-09-05 21:47:34 +0000540 default:
541 {
542 if (case_insensitive != MagickFalse)
543 {
544 if (tolower((int) GetUTFCode(expression)) !=
545 tolower((int) GetUTFCode(pattern)))
546 {
547 done=MagickTrue;
548 break;
549 }
550 }
551 else
552 if (GetUTFCode(expression) != GetUTFCode(pattern))
553 {
554 done=MagickTrue;
555 break;
556 }
557 expression+=GetUTFOctets(expression);
558 pattern+=GetUTFOctets(pattern);
559 }
560 }
561 }
562 while (GetUTFCode(pattern) == '*')
563 pattern+=GetUTFOctets(pattern);
564 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
565 MagickTrue : MagickFalse;
566 return(match);
567}
568
569/*
570%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
571% %
572% %
573% %
574+ I s G l o b %
575% %
576% %
577% %
578%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
579%
580% IsGlob() returns MagickTrue if the path specification contains a globbing
581% pattern.
582%
583% The format of the IsGlob method is:
584%
585% MagickBooleanType IsGlob(const char *geometry)
586%
587% A description of each parameter follows:
588%
589% o path: the path.
590%
591*/
592MagickExport MagickBooleanType IsGlob(const char *path)
593{
594 MagickBooleanType
595 status;
596
597 if (IsPathAccessible(path) != MagickFalse)
598 return(MagickFalse);
599 status=(strchr(path,'*') != (char *) NULL) ||
600 (strchr(path,'?') != (char *) NULL) ||
601 (strchr(path,'{') != (char *) NULL) ||
602 (strchr(path,'}') != (char *) NULL) ||
603 (strchr(path,'[') != (char *) NULL) ||
604 (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
605 return(status);
606}
607
608/*
609%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
610% %
611% %
612% %
613% T o k e n i z e r %
614% %
615% %
616% %
617%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
618%
619% Tokenizer() is a generalized, finite state token parser. It extracts tokens
620% one at a time from a string of characters. The characters used for white
621% space, for break characters, and for quotes can be specified. Also,
622% characters in the string can be preceded by a specifiable escape character
623% which removes any special meaning the character may have.
624%
625% Here is some terminology:
626%
627% o token: A single unit of information in the form of a group of
628% characters.
629%
630% o white space: Apace that gets ignored (except within quotes or when
631% escaped), like blanks and tabs. in addition, white space terminates a
632% non-quoted token.
633%
634% o break set: One or more characters that separates non-quoted tokens.
635% Commas are a common break character. The usage of break characters to
636% signal the end of a token is the same as that of white space, except
637% multiple break characters with nothing or only white space between
638% generate a null token for each two break characters together.
639%
640% For example, if blank is set to be the white space and comma is set to
641% be the break character, the line
642%
643% A, B, C , , DEF
644%
645% ... consists of 5 tokens:
646%
647% 1) "A"
648% 2) "B"
649% 3) "C"
650% 4) "" (the null string)
651% 5) "DEF"
652%
653% o Quote character: A character that, when surrounding a group of other
654% characters, causes the group of characters to be treated as a single
655% token, no matter how many white spaces or break characters exist in
656% the group. Also, a token always terminates after the closing quote.
657% For example, if ' is the quote character, blank is white space, and
658% comma is the break character, the following string
659%
660% A, ' B, CD'EF GHI
661%
662% ... consists of 4 tokens:
663%
664% 1) "A"
665% 2) " B, CD" (note the blanks & comma)
666% 3) "EF"
667% 4) "GHI"
668%
669% The quote characters themselves do not appear in the resultant
670% tokens. The double quotes are delimiters i use here for
671% documentation purposes only.
672%
673% o Escape character: A character which itself is ignored but which
674% causes the next character to be used as is. ^ and \ are often used
675% as escape characters. An escape in the last position of the string
676% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
677% and non-escape) character. For example, assume white space, break
678% character, and quote are the same as in the above examples, and
679% further, assume that ^ is the escape character. Then, in the string
680%
681% ABC, ' DEF ^' GH' I ^ J K^ L ^
682%
683% ... there are 7 tokens:
684%
685% 1) "ABC"
686% 2) " DEF ' GH"
687% 3) "I"
688% 4) " " (a lone blank)
689% 5) "J"
690% 6) "K L"
691% 7) "^" (passed as is at end of line)
692%
693% The format of the Tokenizer method is:
694%
695% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
696% const size_t max_token_length,const char *line,const char *white,
697% const char *break_set,const char *quote,const char escape,
698% char *breaker,int *next,char *quoted)
699%
700% A description of each parameter follows:
701%
702% o flag: right now, only the low order 3 bits are used.
703%
704% 1 => convert non-quoted tokens to upper case
705% 2 => convert non-quoted tokens to lower case
706% 0 => do not convert non-quoted tokens
707%
708% o token: a character string containing the returned next token
709%
710% o max_token_length: the maximum size of "token". Characters beyond
711% "max_token_length" are truncated.
712%
713% o string: the string to be parsed.
714%
715% o white: a string of the valid white spaces. example:
716%
717% char whitesp[]={" \t"};
718%
719% blank and tab will be valid white space.
720%
721% o break: a string of the valid break characters. example:
722%
723% char breakch[]={";,"};
724%
725% semicolon and comma will be valid break characters.
726%
727% o quote: a string of the valid quote characters. An example would be
728%
729% char whitesp[]={"'\"");
730%
731% (this causes single and double quotes to be valid) Note that a
732% token starting with one of these characters needs the same quote
733% character to terminate it.
734%
735% for example:
736%
737% "ABC '
738%
739% is unterminated, but
740%
741% "DEF" and 'GHI'
742%
743% are properly terminated. Note that different quote characters
744% can appear on the same line; only for a given token do the quote
745% characters have to be the same.
746%
747% o escape: the escape character (NOT a string ... only one
748% allowed). Use zero if none is desired.
749%
750% o breaker: the break character used to terminate the current
751% token. If the token was quoted, this will be the quote used. If
752% the token is the last one on the line, this will be zero.
753%
754% o next: this variable points to the first character of the
755% next token. it gets reset by "tokenizer" as it steps through the
756% string. Set it to 0 upon initialization, and leave it alone
757% after that. You can change it if you want to jump around in the
758% string or re-parse from the beginning, but be careful.
759%
760% o quoted: set to True if the token was quoted and MagickFalse
761% if not. You may need this information (for example: in C, a
762% string with quotes around it is a character string, while one
763% without is an identifier).
764%
765% o result: 0 if we haven't reached EOS (end of string), and 1
766% if we have.
767%
768*/
769
770#define IN_WHITE 0
771#define IN_TOKEN 1
772#define IN_QUOTE 2
773#define IN_OZONE 3
774
cristybb503372010-05-27 20:51:26 +0000775static ssize_t sindex(int c,const char *string)
cristy3ed852e2009-09-05 21:47:34 +0000776{
777 register const char
778 *p;
779
780 for (p=string; *p != '\0'; p++)
781 if (c == (int) (*p))
cristycee97112010-05-28 00:44:52 +0000782 return((ssize_t) (p-string));
cristy3ed852e2009-09-05 21:47:34 +0000783 return(-1);
784}
785
786static void StoreToken(TokenInfo *token_info,char *string,
787 size_t max_token_length,int c)
788{
cristybb503372010-05-27 20:51:26 +0000789 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000790 i;
791
792 if ((token_info->offset < 0) ||
793 ((size_t) token_info->offset >= (max_token_length-1)))
794 return;
795 i=token_info->offset++;
796 string[i]=(char) c;
797 if (token_info->state == IN_QUOTE)
798 return;
799 switch (token_info->flag & 0x03)
800 {
801 case 1:
802 {
803 string[i]=(char) toupper(c);
804 break;
805 }
806 case 2:
807 {
808 string[i]=(char) tolower(c);
809 break;
810 }
811 default:
812 break;
813 }
814}
815
816MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
817 char *token,const size_t max_token_length,const char *line,const char *white,
818 const char *break_set,const char *quote,const char escape,char *breaker,
819 int *next,char *quoted)
820{
821 int
822 c;
823
cristybb503372010-05-27 20:51:26 +0000824 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000825 i;
826
827 *breaker='\0';
828 *quoted='\0';
829 if (line[*next] == '\0')
830 return(1);
831 token_info->state=IN_WHITE;
832 token_info->quote=(char) MagickFalse;
833 token_info->flag=flag;
834 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
835 {
836 c=(int) line[*next];
837 i=sindex(c,break_set);
838 if (i >= 0)
839 {
840 switch (token_info->state)
841 {
842 case IN_WHITE:
843 case IN_TOKEN:
844 case IN_OZONE:
845 {
846 (*next)++;
847 *breaker=break_set[i];
848 token[token_info->offset]='\0';
849 return(0);
850 }
851 case IN_QUOTE:
852 {
853 StoreToken(token_info,token,max_token_length,c);
854 break;
855 }
856 }
857 continue;
858 }
859 i=sindex(c,quote);
860 if (i >= 0)
861 {
862 switch (token_info->state)
863 {
864 case IN_WHITE:
865 {
866 token_info->state=IN_QUOTE;
867 token_info->quote=quote[i];
868 *quoted=(char) MagickTrue;
869 break;
870 }
871 case IN_QUOTE:
872 {
873 if (quote[i] != token_info->quote)
874 StoreToken(token_info,token,max_token_length,c);
875 else
876 {
877 token_info->state=IN_OZONE;
878 token_info->quote='\0';
879 }
880 break;
881 }
882 case IN_TOKEN:
883 case IN_OZONE:
884 {
885 *breaker=(char) c;
886 token[token_info->offset]='\0';
887 return(0);
888 }
889 }
890 continue;
891 }
892 i=sindex(c,white);
893 if (i >= 0)
894 {
895 switch (token_info->state)
896 {
897 case IN_WHITE:
898 case IN_OZONE:
899 break;
900 case IN_TOKEN:
901 {
902 token_info->state=IN_OZONE;
903 break;
904 }
905 case IN_QUOTE:
906 {
907 StoreToken(token_info,token,max_token_length,c);
908 break;
909 }
910 }
911 continue;
912 }
913 if (c == (int) escape)
914 {
915 if (line[(*next)+1] == '\0')
916 {
917 *breaker='\0';
918 StoreToken(token_info,token,max_token_length,c);
919 (*next)++;
920 token[token_info->offset]='\0';
921 return(0);
922 }
923 switch (token_info->state)
924 {
925 case IN_WHITE:
926 {
927 (*next)--;
928 token_info->state=IN_TOKEN;
929 break;
930 }
931 case IN_TOKEN:
932 case IN_QUOTE:
933 {
934 (*next)++;
935 c=(int) line[*next];
936 StoreToken(token_info,token,max_token_length,c);
937 break;
938 }
939 case IN_OZONE:
940 {
941 token[token_info->offset]='\0';
942 return(0);
943 }
944 }
945 continue;
946 }
947 switch (token_info->state)
948 {
949 case IN_WHITE:
950 token_info->state=IN_TOKEN;
951 case IN_TOKEN:
952 case IN_QUOTE:
953 {
954 StoreToken(token_info,token,max_token_length,c);
955 break;
956 }
957 case IN_OZONE:
958 {
959 token[token_info->offset]='\0';
960 return(0);
961 }
962 }
963 }
964 token[token_info->offset]='\0';
965 return(0);
966}