blob: 517b848da45312a9628e0869253448bec81a507c [file] [log] [blame]
cristy3ed852e2009-09-05 21:47:34 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% John Cristy %
17% January 1993 %
18% %
19% %
cristy7e41fe82010-12-04 23:12:08 +000020% Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization %
cristy3ed852e2009-09-05 21:47:34 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
43#include "magick/studio.h"
44#include "magick/exception.h"
45#include "magick/exception-private.h"
46#include "magick/image.h"
47#include "magick/memory_.h"
48#include "magick/string_.h"
49#include "magick/token.h"
50#include "magick/token-private.h"
51#include "magick/utility.h"
52
53/*
54 Typedef declaractions.
55*/
56struct _TokenInfo
57{
58 int
59 state;
60
61 MagickStatusType
62 flag;
63
cristybb503372010-05-27 20:51:26 +000064 ssize_t
cristy3ed852e2009-09-05 21:47:34 +000065 offset;
66
67 char
68 quote;
69
cristybb503372010-05-27 20:51:26 +000070 size_t
cristy3ed852e2009-09-05 21:47:34 +000071 signature;
72};
73
74/*
75%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
76% %
77% %
78% %
79% A c q u i r e T o k e n I n f o %
80% %
81% %
82% %
83%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84%
85% AcquireTokenInfo() allocates the TokenInfo structure.
86%
87% The format of the AcquireTokenInfo method is:
88%
89% TokenInfo *AcquireTokenInfo()
90%
91*/
92MagickExport TokenInfo *AcquireTokenInfo(void)
93{
94 TokenInfo
95 *token_info;
96
cristy73bd4a52010-10-05 11:24:23 +000097 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
cristy3ed852e2009-09-05 21:47:34 +000098 if (token_info == (TokenInfo *) NULL)
99 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
100 token_info->signature=MagickSignature;
101 return(token_info);
102}
103
104/*
105%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106% %
107% %
108% %
109% D e s t r o y T o k e n I n f o %
110% %
111% %
112% %
113%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
114%
115% DestroyTokenInfo() deallocates memory associated with an TokenInfo
116% structure.
117%
118% The format of the DestroyTokenInfo method is:
119%
120% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
121%
122% A description of each parameter follows:
123%
124% o token_info: Specifies a pointer to an TokenInfo structure.
125%
126*/
127MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
128{
129 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
130 assert(token_info != (TokenInfo *) NULL);
131 assert(token_info->signature == MagickSignature);
132 token_info->signature=(~MagickSignature);
133 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
134 return(token_info);
135}
136
137/*
138%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
139% %
140% %
141% %
142+ G e t M a g i c k T o k e n %
143% %
144% %
145% %
146%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
147%
148% GetMagickToken() gets a token from the token stream. A token is defined as a
149% sequence of characters delimited by whitespace (e.g. clip-path), a sequence
150% delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristydd8327f2010-05-12 12:39:46 +0000151% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
152% separator characters: ':', '=', ',', and ';'.
cristy3ed852e2009-09-05 21:47:34 +0000153%
154% The format of the GetMagickToken method is:
155%
156% void GetMagickToken(const char *start,const char **end,char *token)
157%
158% A description of each parameter follows:
159%
160% o start: the start of the token sequence.
161%
162% o end: point to the end of the token sequence.
163%
164% o token: copy the token to this buffer.
165%
166*/
167MagickExport void GetMagickToken(const char *start,const char **end,char *token)
168{
169 double
170 value;
171
172 register const char
173 *p;
174
cristybb503372010-05-27 20:51:26 +0000175 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000176 i;
177
178 i=0;
179 for (p=start; *p != '\0'; )
180 {
181 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
182 p++;
183 if (*p == '\0')
184 break;
185 switch (*p)
186 {
187 case '"':
188 case '\'':
189 case '`':
190 case '{':
191 {
192 register char
193 escape;
194
195 switch (*p)
196 {
197 case '"': escape='"'; break;
198 case '\'': escape='\''; break;
199 case '`': escape='\''; break;
200 case '{': escape='}'; break;
201 default: escape=(*p); break;
202 }
203 for (p++; *p != '\0'; p++)
204 {
205 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
206 p++;
207 else
208 if (*p == escape)
209 {
210 p++;
211 break;
212 }
213 token[i++]=(*p);
214 }
215 break;
216 }
217 case '/':
218 {
219 token[i++]=(*p++);
220 if ((*p == '>') || (*p == '/'))
221 token[i++]=(*p++);
222 break;
223 }
224 default:
225 {
226 char
227 *q;
228
229 value=strtod(p,&q);
cristyda16f162011-02-19 23:52:17 +0000230 (void) value;
cristy3ed852e2009-09-05 21:47:34 +0000231 if ((p != q) && (*p != ','))
232 {
233 for ( ; (p < q) && (*p != ','); p++)
234 token[i++]=(*p);
235 if (*p == '%')
236 token[i++]=(*p++);
237 break;
238 }
cristyc5071682011-04-22 02:06:27 +0000239 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
cristy3ed852e2009-09-05 21:47:34 +0000240 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
241 {
242 token[i++]=(*p++);
243 break;
244 }
245 for ( ; *p != '\0'; p++)
246 {
247 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
cristydd8327f2010-05-12 12:39:46 +0000248 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
cristy3ed852e2009-09-05 21:47:34 +0000249 break;
250 if ((i > 0) && (*p == '<'))
251 break;
252 token[i++]=(*p);
253 if (*p == '>')
254 break;
255 if (*p == '(')
256 for (p++; *p != '\0'; p++)
257 {
258 token[i++]=(*p);
259 if ((*p == ')') && (*(p-1) != '\\'))
260 break;
261 }
262 }
263 break;
264 }
265 }
266 break;
267 }
268 token[i]='\0';
269 if (LocaleNCompare(token,"url(",4) == 0)
270 {
271 ssize_t
272 offset;
273
274 offset=4;
275 if (token[offset] == '#')
276 offset++;
cristybb503372010-05-27 20:51:26 +0000277 i=(ssize_t) strlen(token);
cristy3ed852e2009-09-05 21:47:34 +0000278 (void) CopyMagickString(token,token+offset,MaxTextExtent);
279 token[i-offset-1]='\0';
280 }
281 while (isspace((int) ((unsigned char) *p)) != 0)
282 p++;
283 if (end != (const char **) NULL)
284 *end=(const char *) p;
285}
286
287/*
288%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
289% %
290% %
291% %
292% G l o b E x p r e s s i o n %
293% %
294% %
295% %
296%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
297%
298% GlobExpression() returns MagickTrue if the expression matches the pattern.
299%
300% The format of the GlobExpression function is:
301%
302% MagickBooleanType GlobExpression(const char *expression,
303% const char *pattern,const MagickBooleanType case_insensitive)
304%
305% A description of each parameter follows:
306%
307% o expression: Specifies a pointer to a text string containing a file name.
308%
309% o pattern: Specifies a pointer to a text string containing a pattern.
310%
311% o case_insensitive: set to MagickTrue to ignore the case when matching
312% an expression.
313%
314*/
315MagickExport MagickBooleanType GlobExpression(const char *expression,
316 const char *pattern,const MagickBooleanType case_insensitive)
317{
318 MagickBooleanType
319 done,
320 match;
321
322 register const char
323 *p;
324
325 /*
326 Return on empty pattern or '*'.
327 */
328 if (pattern == (char *) NULL)
329 return(MagickTrue);
330 if (GetUTFCode(pattern) == 0)
331 return(MagickTrue);
332 if (LocaleCompare(pattern,"*") == 0)
333 return(MagickTrue);
334 p=pattern+strlen(pattern)-1;
335 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
336 {
337 ExceptionInfo
338 *exception;
339
340 ImageInfo
341 *image_info;
342
343 /*
344 Determine if pattern is a scene, i.e. img0001.pcd[2].
345 */
346 image_info=AcquireImageInfo();
347 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
348 exception=AcquireExceptionInfo();
cristyd965a422010-03-03 17:47:35 +0000349 (void) SetImageInfo(image_info,0,exception);
cristy3ed852e2009-09-05 21:47:34 +0000350 exception=DestroyExceptionInfo(exception);
351 if (LocaleCompare(image_info->filename,pattern) != 0)
352 {
353 image_info=DestroyImageInfo(image_info);
354 return(MagickFalse);
355 }
356 image_info=DestroyImageInfo(image_info);
357 }
358 /*
359 Evaluate glob expression.
360 */
361 done=MagickFalse;
362 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
363 {
364 if (GetUTFCode(expression) == 0)
365 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
366 break;
367 switch (GetUTFCode(pattern))
368 {
cristy3ed852e2009-09-05 21:47:34 +0000369 case '*':
370 {
371 MagickBooleanType
372 status;
373
374 status=MagickFalse;
375 pattern+=GetUTFOctets(pattern);
376 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
377 {
378 status=GlobExpression(expression,pattern,case_insensitive);
379 expression+=GetUTFOctets(expression);
380 }
381 if (status != MagickFalse)
382 {
383 while (GetUTFCode(expression) != 0)
384 expression+=GetUTFOctets(expression);
385 while (GetUTFCode(pattern) != 0)
386 pattern+=GetUTFOctets(pattern);
387 }
388 break;
389 }
390 case '[':
391 {
cristy55a91cd2010-12-01 00:57:40 +0000392 int
cristy3ed852e2009-09-05 21:47:34 +0000393 c;
394
395 pattern+=GetUTFOctets(pattern);
396 for ( ; ; )
397 {
398 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
399 {
400 done=MagickTrue;
401 break;
402 }
403 if (GetUTFCode(pattern) == '\\')
404 {
405 pattern+=GetUTFOctets(pattern);
406 if (GetUTFCode(pattern) == 0)
407 {
408 done=MagickTrue;
409 break;
410 }
411 }
412 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
413 {
414 c=GetUTFCode(pattern);
415 pattern+=GetUTFOctets(pattern);
416 pattern+=GetUTFOctets(pattern);
417 if (GetUTFCode(pattern) == ']')
418 {
419 done=MagickTrue;
420 break;
421 }
422 if (GetUTFCode(pattern) == '\\')
423 {
424 pattern+=GetUTFOctets(pattern);
425 if (GetUTFCode(pattern) == 0)
426 {
427 done=MagickTrue;
428 break;
429 }
430 }
431 if ((GetUTFCode(expression) < c) ||
432 (GetUTFCode(expression) > GetUTFCode(pattern)))
433 {
434 pattern+=GetUTFOctets(pattern);
435 continue;
436 }
437 }
438 else
439 if (GetUTFCode(pattern) != GetUTFCode(expression))
440 {
441 pattern+=GetUTFOctets(pattern);
442 continue;
443 }
444 pattern+=GetUTFOctets(pattern);
445 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
446 {
447 if ((GetUTFCode(pattern) == '\\') &&
448 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
449 pattern+=GetUTFOctets(pattern);
450 pattern+=GetUTFOctets(pattern);
451 }
452 if (GetUTFCode(pattern) != 0)
453 {
454 pattern+=GetUTFOctets(pattern);
455 expression+=GetUTFOctets(expression);
456 }
457 break;
458 }
459 break;
460 }
461 case '?':
462 {
463 pattern+=GetUTFOctets(pattern);
464 expression+=GetUTFOctets(expression);
465 break;
466 }
467 case '{':
468 {
469 register const char
470 *p;
471
472 pattern+=GetUTFOctets(pattern);
473 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
474 {
475 p=expression;
476 match=MagickTrue;
477 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
478 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
479 (match != MagickFalse))
480 {
481 if (GetUTFCode(pattern) == '\\')
482 pattern+=GetUTFOctets(pattern);
483 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
484 MagickFalse;
485 p+=GetUTFOctets(p);
486 pattern+=GetUTFOctets(pattern);
487 }
488 if (GetUTFCode(pattern) == 0)
489 {
490 match=MagickFalse;
491 done=MagickTrue;
492 break;
493 }
494 else
495 if (match != MagickFalse)
496 {
497 expression=p;
498 while ((GetUTFCode(pattern) != '}') &&
499 (GetUTFCode(pattern) != 0))
500 {
501 pattern+=GetUTFOctets(pattern);
502 if (GetUTFCode(pattern) == '\\')
503 {
504 pattern+=GetUTFOctets(pattern);
505 if (GetUTFCode(pattern) == '}')
506 pattern+=GetUTFOctets(pattern);
507 }
508 }
509 }
510 else
511 {
512 while ((GetUTFCode(pattern) != '}') &&
513 (GetUTFCode(pattern) != ',') &&
514 (GetUTFCode(pattern) != 0))
515 {
516 pattern+=GetUTFOctets(pattern);
517 if (GetUTFCode(pattern) == '\\')
518 {
519 pattern+=GetUTFOctets(pattern);
520 if ((GetUTFCode(pattern) == '}') ||
521 (GetUTFCode(pattern) == ','))
522 pattern+=GetUTFOctets(pattern);
523 }
524 }
525 }
526 if (GetUTFCode(pattern) != 0)
527 pattern+=GetUTFOctets(pattern);
528 }
529 break;
530 }
cristyecbe37f2010-04-22 13:50:04 +0000531 case '\\':
532 {
533 pattern+=GetUTFOctets(pattern);
cristy4705fe82010-04-23 16:20:03 +0000534 if (GetUTFCode(pattern) == 0)
535 break;
cristyecbe37f2010-04-22 13:50:04 +0000536 }
cristy3ed852e2009-09-05 21:47:34 +0000537 default:
538 {
539 if (case_insensitive != MagickFalse)
540 {
541 if (tolower((int) GetUTFCode(expression)) !=
542 tolower((int) GetUTFCode(pattern)))
543 {
544 done=MagickTrue;
545 break;
546 }
547 }
548 else
549 if (GetUTFCode(expression) != GetUTFCode(pattern))
550 {
551 done=MagickTrue;
552 break;
553 }
554 expression+=GetUTFOctets(expression);
555 pattern+=GetUTFOctets(pattern);
556 }
557 }
558 }
559 while (GetUTFCode(pattern) == '*')
560 pattern+=GetUTFOctets(pattern);
561 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
562 MagickTrue : MagickFalse;
563 return(match);
564}
565
566/*
567%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
568% %
569% %
570% %
571+ I s G l o b %
572% %
573% %
574% %
575%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
576%
577% IsGlob() returns MagickTrue if the path specification contains a globbing
578% pattern.
579%
580% The format of the IsGlob method is:
581%
582% MagickBooleanType IsGlob(const char *geometry)
583%
584% A description of each parameter follows:
585%
586% o path: the path.
587%
588*/
589MagickExport MagickBooleanType IsGlob(const char *path)
590{
591 MagickBooleanType
592 status;
593
594 if (IsPathAccessible(path) != MagickFalse)
595 return(MagickFalse);
596 status=(strchr(path,'*') != (char *) NULL) ||
597 (strchr(path,'?') != (char *) NULL) ||
598 (strchr(path,'{') != (char *) NULL) ||
599 (strchr(path,'}') != (char *) NULL) ||
600 (strchr(path,'[') != (char *) NULL) ||
601 (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
602 return(status);
603}
604
605/*
606%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
607% %
608% %
609% %
610% T o k e n i z e r %
611% %
612% %
613% %
614%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
615%
616% Tokenizer() is a generalized, finite state token parser. It extracts tokens
617% one at a time from a string of characters. The characters used for white
618% space, for break characters, and for quotes can be specified. Also,
619% characters in the string can be preceded by a specifiable escape character
620% which removes any special meaning the character may have.
621%
622% Here is some terminology:
623%
624% o token: A single unit of information in the form of a group of
625% characters.
626%
627% o white space: Apace that gets ignored (except within quotes or when
628% escaped), like blanks and tabs. in addition, white space terminates a
629% non-quoted token.
630%
631% o break set: One or more characters that separates non-quoted tokens.
632% Commas are a common break character. The usage of break characters to
633% signal the end of a token is the same as that of white space, except
634% multiple break characters with nothing or only white space between
635% generate a null token for each two break characters together.
636%
637% For example, if blank is set to be the white space and comma is set to
638% be the break character, the line
639%
640% A, B, C , , DEF
641%
642% ... consists of 5 tokens:
643%
644% 1) "A"
645% 2) "B"
646% 3) "C"
647% 4) "" (the null string)
648% 5) "DEF"
649%
650% o Quote character: A character that, when surrounding a group of other
651% characters, causes the group of characters to be treated as a single
652% token, no matter how many white spaces or break characters exist in
653% the group. Also, a token always terminates after the closing quote.
654% For example, if ' is the quote character, blank is white space, and
655% comma is the break character, the following string
656%
657% A, ' B, CD'EF GHI
658%
659% ... consists of 4 tokens:
660%
661% 1) "A"
662% 2) " B, CD" (note the blanks & comma)
663% 3) "EF"
664% 4) "GHI"
665%
666% The quote characters themselves do not appear in the resultant
667% tokens. The double quotes are delimiters i use here for
668% documentation purposes only.
669%
670% o Escape character: A character which itself is ignored but which
671% causes the next character to be used as is. ^ and \ are often used
672% as escape characters. An escape in the last position of the string
673% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
674% and non-escape) character. For example, assume white space, break
675% character, and quote are the same as in the above examples, and
676% further, assume that ^ is the escape character. Then, in the string
677%
678% ABC, ' DEF ^' GH' I ^ J K^ L ^
679%
680% ... there are 7 tokens:
681%
682% 1) "ABC"
683% 2) " DEF ' GH"
684% 3) "I"
685% 4) " " (a lone blank)
686% 5) "J"
687% 6) "K L"
688% 7) "^" (passed as is at end of line)
689%
690% The format of the Tokenizer method is:
691%
692% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
693% const size_t max_token_length,const char *line,const char *white,
694% const char *break_set,const char *quote,const char escape,
695% char *breaker,int *next,char *quoted)
696%
697% A description of each parameter follows:
698%
699% o flag: right now, only the low order 3 bits are used.
700%
701% 1 => convert non-quoted tokens to upper case
702% 2 => convert non-quoted tokens to lower case
703% 0 => do not convert non-quoted tokens
704%
705% o token: a character string containing the returned next token
706%
707% o max_token_length: the maximum size of "token". Characters beyond
708% "max_token_length" are truncated.
709%
710% o string: the string to be parsed.
711%
712% o white: a string of the valid white spaces. example:
713%
714% char whitesp[]={" \t"};
715%
716% blank and tab will be valid white space.
717%
718% o break: a string of the valid break characters. example:
719%
720% char breakch[]={";,"};
721%
722% semicolon and comma will be valid break characters.
723%
724% o quote: a string of the valid quote characters. An example would be
725%
726% char whitesp[]={"'\"");
727%
728% (this causes single and double quotes to be valid) Note that a
729% token starting with one of these characters needs the same quote
730% character to terminate it.
731%
732% for example:
733%
734% "ABC '
735%
736% is unterminated, but
737%
738% "DEF" and 'GHI'
739%
740% are properly terminated. Note that different quote characters
741% can appear on the same line; only for a given token do the quote
742% characters have to be the same.
743%
744% o escape: the escape character (NOT a string ... only one
745% allowed). Use zero if none is desired.
746%
747% o breaker: the break character used to terminate the current
748% token. If the token was quoted, this will be the quote used. If
749% the token is the last one on the line, this will be zero.
750%
751% o next: this variable points to the first character of the
752% next token. it gets reset by "tokenizer" as it steps through the
753% string. Set it to 0 upon initialization, and leave it alone
754% after that. You can change it if you want to jump around in the
755% string or re-parse from the beginning, but be careful.
756%
757% o quoted: set to True if the token was quoted and MagickFalse
758% if not. You may need this information (for example: in C, a
759% string with quotes around it is a character string, while one
760% without is an identifier).
761%
762% o result: 0 if we haven't reached EOS (end of string), and 1
763% if we have.
764%
765*/
766
767#define IN_WHITE 0
768#define IN_TOKEN 1
769#define IN_QUOTE 2
770#define IN_OZONE 3
771
cristybb503372010-05-27 20:51:26 +0000772static ssize_t sindex(int c,const char *string)
cristy3ed852e2009-09-05 21:47:34 +0000773{
774 register const char
775 *p;
776
777 for (p=string; *p != '\0'; p++)
778 if (c == (int) (*p))
cristycee97112010-05-28 00:44:52 +0000779 return((ssize_t) (p-string));
cristy3ed852e2009-09-05 21:47:34 +0000780 return(-1);
781}
782
783static void StoreToken(TokenInfo *token_info,char *string,
784 size_t max_token_length,int c)
785{
cristybb503372010-05-27 20:51:26 +0000786 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000787 i;
788
789 if ((token_info->offset < 0) ||
790 ((size_t) token_info->offset >= (max_token_length-1)))
791 return;
792 i=token_info->offset++;
793 string[i]=(char) c;
794 if (token_info->state == IN_QUOTE)
795 return;
796 switch (token_info->flag & 0x03)
797 {
798 case 1:
799 {
800 string[i]=(char) toupper(c);
801 break;
802 }
803 case 2:
804 {
805 string[i]=(char) tolower(c);
806 break;
807 }
808 default:
809 break;
810 }
811}
812
813MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
814 char *token,const size_t max_token_length,const char *line,const char *white,
815 const char *break_set,const char *quote,const char escape,char *breaker,
816 int *next,char *quoted)
817{
818 int
819 c;
820
cristybb503372010-05-27 20:51:26 +0000821 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000822 i;
823
824 *breaker='\0';
825 *quoted='\0';
826 if (line[*next] == '\0')
827 return(1);
828 token_info->state=IN_WHITE;
829 token_info->quote=(char) MagickFalse;
830 token_info->flag=flag;
831 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
832 {
833 c=(int) line[*next];
834 i=sindex(c,break_set);
835 if (i >= 0)
836 {
837 switch (token_info->state)
838 {
839 case IN_WHITE:
840 case IN_TOKEN:
841 case IN_OZONE:
842 {
843 (*next)++;
844 *breaker=break_set[i];
845 token[token_info->offset]='\0';
846 return(0);
847 }
848 case IN_QUOTE:
849 {
850 StoreToken(token_info,token,max_token_length,c);
851 break;
852 }
853 }
854 continue;
855 }
856 i=sindex(c,quote);
857 if (i >= 0)
858 {
859 switch (token_info->state)
860 {
861 case IN_WHITE:
862 {
863 token_info->state=IN_QUOTE;
864 token_info->quote=quote[i];
865 *quoted=(char) MagickTrue;
866 break;
867 }
868 case IN_QUOTE:
869 {
870 if (quote[i] != token_info->quote)
871 StoreToken(token_info,token,max_token_length,c);
872 else
873 {
874 token_info->state=IN_OZONE;
875 token_info->quote='\0';
876 }
877 break;
878 }
879 case IN_TOKEN:
880 case IN_OZONE:
881 {
882 *breaker=(char) c;
883 token[token_info->offset]='\0';
884 return(0);
885 }
886 }
887 continue;
888 }
889 i=sindex(c,white);
890 if (i >= 0)
891 {
892 switch (token_info->state)
893 {
894 case IN_WHITE:
895 case IN_OZONE:
896 break;
897 case IN_TOKEN:
898 {
899 token_info->state=IN_OZONE;
900 break;
901 }
902 case IN_QUOTE:
903 {
904 StoreToken(token_info,token,max_token_length,c);
905 break;
906 }
907 }
908 continue;
909 }
910 if (c == (int) escape)
911 {
912 if (line[(*next)+1] == '\0')
913 {
914 *breaker='\0';
915 StoreToken(token_info,token,max_token_length,c);
916 (*next)++;
917 token[token_info->offset]='\0';
918 return(0);
919 }
920 switch (token_info->state)
921 {
922 case IN_WHITE:
923 {
924 (*next)--;
925 token_info->state=IN_TOKEN;
926 break;
927 }
928 case IN_TOKEN:
929 case IN_QUOTE:
930 {
931 (*next)++;
932 c=(int) line[*next];
933 StoreToken(token_info,token,max_token_length,c);
934 break;
935 }
936 case IN_OZONE:
937 {
938 token[token_info->offset]='\0';
939 return(0);
940 }
941 }
942 continue;
943 }
944 switch (token_info->state)
945 {
946 case IN_WHITE:
947 token_info->state=IN_TOKEN;
948 case IN_TOKEN:
949 case IN_QUOTE:
950 {
951 StoreToken(token_info,token,max_token_length,c);
952 break;
953 }
954 case IN_OZONE:
955 {
956 token[token_info->offset]='\0';
957 return(0);
958 }
959 }
960 }
961 token[token_info->offset]='\0';
962 return(0);
963}