blob: 9c8dc1bd890333105893dd9fa4aed1ef7a4060d5 [file] [log] [blame]
cristy3ed852e2009-09-05 21:47:34 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
cristyde984cd2013-12-01 14:49:27 +000016% Cristy %
cristy3ed852e2009-09-05 21:47:34 +000017% January 1993 %
18% %
19% %
cristyb56bb242014-11-25 17:12:48 +000020% Copyright 1999-2015 ImageMagick Studio LLC, a non-profit organization %
cristy3ed852e2009-09-05 21:47:34 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
cristy4c08aed2011-07-01 19:47:50 +000043#include "MagickCore/studio.h"
44#include "MagickCore/exception.h"
45#include "MagickCore/exception-private.h"
46#include "MagickCore/image.h"
47#include "MagickCore/memory_.h"
48#include "MagickCore/string_.h"
49#include "MagickCore/string-private.h"
50#include "MagickCore/token.h"
51#include "MagickCore/token-private.h"
52#include "MagickCore/utility.h"
cristyd1dd6e42011-09-04 01:46:08 +000053#include "MagickCore/utility-private.h"
cristy3ed852e2009-09-05 21:47:34 +000054
55/*
56 Typedef declaractions.
57*/
58struct _TokenInfo
59{
60 int
61 state;
62
63 MagickStatusType
64 flag;
65
cristybb503372010-05-27 20:51:26 +000066 ssize_t
cristy3ed852e2009-09-05 21:47:34 +000067 offset;
68
69 char
70 quote;
71
cristybb503372010-05-27 20:51:26 +000072 size_t
cristy3ed852e2009-09-05 21:47:34 +000073 signature;
74};
75
76/*
77%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78% %
79% %
80% %
81% A c q u i r e T o k e n I n f o %
82% %
83% %
84% %
85%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
86%
87% AcquireTokenInfo() allocates the TokenInfo structure.
88%
89% The format of the AcquireTokenInfo method is:
90%
91% TokenInfo *AcquireTokenInfo()
92%
93*/
94MagickExport TokenInfo *AcquireTokenInfo(void)
95{
96 TokenInfo
97 *token_info;
98
cristy73bd4a52010-10-05 11:24:23 +000099 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
cristy3ed852e2009-09-05 21:47:34 +0000100 if (token_info == (TokenInfo *) NULL)
101 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
cristye1c94d92015-06-28 12:16:33 +0000102 token_info->signature=MagickCoreSignature;
cristy3ed852e2009-09-05 21:47:34 +0000103 return(token_info);
104}
105
106/*
107%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108% %
109% %
110% %
111% D e s t r o y T o k e n I n f o %
112% %
113% %
114% %
115%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116%
117% DestroyTokenInfo() deallocates memory associated with an TokenInfo
118% structure.
119%
120% The format of the DestroyTokenInfo method is:
121%
122% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123%
124% A description of each parameter follows:
125%
126% o token_info: Specifies a pointer to an TokenInfo structure.
127%
128*/
129MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130{
131 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132 assert(token_info != (TokenInfo *) NULL);
cristye1c94d92015-06-28 12:16:33 +0000133 assert(token_info->signature == MagickCoreSignature);
134 token_info->signature=(~MagickCoreSignature);
cristy3ed852e2009-09-05 21:47:34 +0000135 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136 return(token_info);
137}
138
139/*
140%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141% %
142% %
143% %
144+ G e t M a g i c k T o k e n %
145% %
146% %
147% %
148%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149%
cristydf11e552011-04-23 17:18:30 +0000150% GetMagickToken() gets a token from the token stream. A token is defined as
151% a sequence of characters delimited by whitespace (e.g. clip-path), a
152% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
cristydd8327f2010-05-12 12:39:46 +0000153% parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
154% separator characters: ':', '=', ',', and ';'.
cristy3ed852e2009-09-05 21:47:34 +0000155%
156% The format of the GetMagickToken method is:
157%
158% void GetMagickToken(const char *start,const char **end,char *token)
159%
160% A description of each parameter follows:
161%
162% o start: the start of the token sequence.
163%
164% o end: point to the end of the token sequence.
165%
166% o token: copy the token to this buffer.
167%
168*/
169MagickExport void GetMagickToken(const char *start,const char **end,char *token)
170{
171 double
172 value;
173
174 register const char
175 *p;
176
cristybb503372010-05-27 20:51:26 +0000177 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000178 i;
179
cristy32f69122011-04-22 02:26:00 +0000180 assert(start != (const char *) NULL);
181 assert(token != (char *) NULL);
cristy3ed852e2009-09-05 21:47:34 +0000182 i=0;
cristy708e3c62014-05-01 00:48:26 +0000183 p=start;
184 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
185 p++;
cristy708e3c62014-05-01 00:48:26 +0000186 switch (*p)
187 {
cristy04861e22014-05-01 11:14:12 +0000188 case '\0':
189 break;
cristy708e3c62014-05-01 00:48:26 +0000190 case '"':
191 case '\'':
192 case '`':
193 case '{':
194 {
195 register char
196 escape;
197
198 switch (*p)
199 {
200 case '"': escape='"'; break;
201 case '\'': escape='\''; break;
202 case '`': escape='\''; break;
203 case '{': escape='}'; break;
204 default: escape=(*p); break;
205 }
206 for (p++; *p != '\0'; p++)
207 {
208 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
209 p++;
210 else
211 if (*p == escape)
212 {
213 p++;
214 break;
215 }
216 token[i++]=(*p);
217 }
218 break;
219 }
220 case '/':
221 {
222 token[i++]=(*p++);
223 if ((*p == '>') || (*p == '/'))
224 token[i++]=(*p++);
225 break;
226 }
227 default:
228 {
229 char
230 *q;
231
232 value=StringToDouble(p,&q);
233 (void) value;
234 if ((p != q) && (*p != ','))
235 {
236 for ( ; (p < q) && (*p != ','); p++)
237 token[i++]=(*p);
238 if (*p == '%')
239 token[i++]=(*p++);
240 break;
241 }
242 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
243 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
244 {
245 token[i++]=(*p++);
246 break;
247 }
248 for ( ; *p != '\0'; p++)
249 {
250 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
251 (*p == ':') || (*p == ',') || (*p == '|') || (*p == ';')) &&
252 (*(p-1) != '\\'))
253 break;
254 if ((i > 0) && (*p == '<'))
255 break;
256 token[i++]=(*p);
257 if (*p == '>')
258 break;
259 if (*p == '(')
260 for (p++; *p != '\0'; p++)
261 {
262 token[i++]=(*p);
263 if ((*p == ')') && (*(p-1) != '\\'))
264 break;
265 }
266 }
267 break;
268 }
cristy3ed852e2009-09-05 21:47:34 +0000269 }
270 token[i]='\0';
271 if (LocaleNCompare(token,"url(",4) == 0)
272 {
273 ssize_t
274 offset;
275
276 offset=4;
277 if (token[offset] == '#')
278 offset++;
cristybb503372010-05-27 20:51:26 +0000279 i=(ssize_t) strlen(token);
cristy151b66d2015-04-15 10:50:31 +0000280 (void) CopyMagickString(token,token+offset,MagickPathExtent);
cristy3ed852e2009-09-05 21:47:34 +0000281 token[i-offset-1]='\0';
282 }
283 while (isspace((int) ((unsigned char) *p)) != 0)
284 p++;
285 if (end != (const char **) NULL)
286 *end=(const char *) p;
287}
288
289/*
290%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
291% %
292% %
293% %
294% G l o b E x p r e s s i o n %
295% %
296% %
297% %
298%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
299%
300% GlobExpression() returns MagickTrue if the expression matches the pattern.
301%
302% The format of the GlobExpression function is:
303%
304% MagickBooleanType GlobExpression(const char *expression,
305% const char *pattern,const MagickBooleanType case_insensitive)
306%
307% A description of each parameter follows:
308%
309% o expression: Specifies a pointer to a text string containing a file name.
310%
311% o pattern: Specifies a pointer to a text string containing a pattern.
312%
313% o case_insensitive: set to MagickTrue to ignore the case when matching
314% an expression.
315%
316*/
317MagickExport MagickBooleanType GlobExpression(const char *expression,
318 const char *pattern,const MagickBooleanType case_insensitive)
319{
320 MagickBooleanType
321 done,
322 match;
323
324 register const char
325 *p;
326
327 /*
328 Return on empty pattern or '*'.
329 */
330 if (pattern == (char *) NULL)
331 return(MagickTrue);
332 if (GetUTFCode(pattern) == 0)
333 return(MagickTrue);
334 if (LocaleCompare(pattern,"*") == 0)
335 return(MagickTrue);
336 p=pattern+strlen(pattern)-1;
337 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
338 {
339 ExceptionInfo
340 *exception;
341
342 ImageInfo
343 *image_info;
344
345 /*
346 Determine if pattern is a scene, i.e. img0001.pcd[2].
347 */
348 image_info=AcquireImageInfo();
cristy151b66d2015-04-15 10:50:31 +0000349 (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
cristy3ed852e2009-09-05 21:47:34 +0000350 exception=AcquireExceptionInfo();
cristyd965a422010-03-03 17:47:35 +0000351 (void) SetImageInfo(image_info,0,exception);
cristy3ed852e2009-09-05 21:47:34 +0000352 exception=DestroyExceptionInfo(exception);
353 if (LocaleCompare(image_info->filename,pattern) != 0)
354 {
355 image_info=DestroyImageInfo(image_info);
356 return(MagickFalse);
357 }
358 image_info=DestroyImageInfo(image_info);
359 }
360 /*
361 Evaluate glob expression.
362 */
363 done=MagickFalse;
364 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
365 {
366 if (GetUTFCode(expression) == 0)
367 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
368 break;
369 switch (GetUTFCode(pattern))
370 {
cristy3ed852e2009-09-05 21:47:34 +0000371 case '*':
372 {
373 MagickBooleanType
374 status;
375
376 status=MagickFalse;
377 pattern+=GetUTFOctets(pattern);
378 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
379 {
380 status=GlobExpression(expression,pattern,case_insensitive);
381 expression+=GetUTFOctets(expression);
382 }
383 if (status != MagickFalse)
384 {
385 while (GetUTFCode(expression) != 0)
386 expression+=GetUTFOctets(expression);
387 while (GetUTFCode(pattern) != 0)
388 pattern+=GetUTFOctets(pattern);
389 }
390 break;
391 }
392 case '[':
393 {
cristy55a91cd2010-12-01 00:57:40 +0000394 int
cristy3ed852e2009-09-05 21:47:34 +0000395 c;
396
397 pattern+=GetUTFOctets(pattern);
398 for ( ; ; )
399 {
400 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
401 {
402 done=MagickTrue;
403 break;
404 }
405 if (GetUTFCode(pattern) == '\\')
406 {
407 pattern+=GetUTFOctets(pattern);
408 if (GetUTFCode(pattern) == 0)
409 {
410 done=MagickTrue;
411 break;
412 }
413 }
414 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
415 {
416 c=GetUTFCode(pattern);
417 pattern+=GetUTFOctets(pattern);
418 pattern+=GetUTFOctets(pattern);
419 if (GetUTFCode(pattern) == ']')
420 {
421 done=MagickTrue;
422 break;
423 }
424 if (GetUTFCode(pattern) == '\\')
425 {
426 pattern+=GetUTFOctets(pattern);
427 if (GetUTFCode(pattern) == 0)
428 {
429 done=MagickTrue;
430 break;
431 }
432 }
433 if ((GetUTFCode(expression) < c) ||
434 (GetUTFCode(expression) > GetUTFCode(pattern)))
435 {
436 pattern+=GetUTFOctets(pattern);
437 continue;
438 }
439 }
440 else
441 if (GetUTFCode(pattern) != GetUTFCode(expression))
442 {
443 pattern+=GetUTFOctets(pattern);
444 continue;
445 }
446 pattern+=GetUTFOctets(pattern);
447 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
448 {
449 if ((GetUTFCode(pattern) == '\\') &&
450 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
451 pattern+=GetUTFOctets(pattern);
452 pattern+=GetUTFOctets(pattern);
453 }
454 if (GetUTFCode(pattern) != 0)
455 {
456 pattern+=GetUTFOctets(pattern);
457 expression+=GetUTFOctets(expression);
458 }
459 break;
460 }
461 break;
462 }
463 case '?':
464 {
465 pattern+=GetUTFOctets(pattern);
466 expression+=GetUTFOctets(expression);
467 break;
468 }
469 case '{':
470 {
471 register const char
472 *p;
473
474 pattern+=GetUTFOctets(pattern);
475 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
476 {
477 p=expression;
478 match=MagickTrue;
479 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
480 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
481 (match != MagickFalse))
482 {
483 if (GetUTFCode(pattern) == '\\')
484 pattern+=GetUTFOctets(pattern);
485 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
486 MagickFalse;
487 p+=GetUTFOctets(p);
488 pattern+=GetUTFOctets(pattern);
489 }
490 if (GetUTFCode(pattern) == 0)
491 {
492 match=MagickFalse;
493 done=MagickTrue;
494 break;
495 }
496 else
497 if (match != MagickFalse)
498 {
499 expression=p;
500 while ((GetUTFCode(pattern) != '}') &&
501 (GetUTFCode(pattern) != 0))
502 {
503 pattern+=GetUTFOctets(pattern);
504 if (GetUTFCode(pattern) == '\\')
505 {
506 pattern+=GetUTFOctets(pattern);
507 if (GetUTFCode(pattern) == '}')
508 pattern+=GetUTFOctets(pattern);
509 }
510 }
511 }
512 else
513 {
514 while ((GetUTFCode(pattern) != '}') &&
515 (GetUTFCode(pattern) != ',') &&
516 (GetUTFCode(pattern) != 0))
517 {
518 pattern+=GetUTFOctets(pattern);
519 if (GetUTFCode(pattern) == '\\')
520 {
521 pattern+=GetUTFOctets(pattern);
522 if ((GetUTFCode(pattern) == '}') ||
523 (GetUTFCode(pattern) == ','))
524 pattern+=GetUTFOctets(pattern);
525 }
526 }
527 }
528 if (GetUTFCode(pattern) != 0)
529 pattern+=GetUTFOctets(pattern);
530 }
531 break;
532 }
cristyecbe37f2010-04-22 13:50:04 +0000533 case '\\':
534 {
535 pattern+=GetUTFOctets(pattern);
cristy4705fe82010-04-23 16:20:03 +0000536 if (GetUTFCode(pattern) == 0)
537 break;
cristyecbe37f2010-04-22 13:50:04 +0000538 }
cristy3ed852e2009-09-05 21:47:34 +0000539 default:
540 {
541 if (case_insensitive != MagickFalse)
542 {
543 if (tolower((int) GetUTFCode(expression)) !=
544 tolower((int) GetUTFCode(pattern)))
545 {
546 done=MagickTrue;
547 break;
548 }
549 }
550 else
551 if (GetUTFCode(expression) != GetUTFCode(pattern))
552 {
553 done=MagickTrue;
554 break;
555 }
556 expression+=GetUTFOctets(expression);
557 pattern+=GetUTFOctets(pattern);
558 }
559 }
560 }
561 while (GetUTFCode(pattern) == '*')
562 pattern+=GetUTFOctets(pattern);
563 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
564 MagickTrue : MagickFalse;
565 return(match);
566}
567
568/*
569%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
570% %
571% %
572% %
573+ I s G l o b %
574% %
575% %
576% %
577%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
578%
579% IsGlob() returns MagickTrue if the path specification contains a globbing
580% pattern.
581%
582% The format of the IsGlob method is:
583%
584% MagickBooleanType IsGlob(const char *geometry)
585%
586% A description of each parameter follows:
587%
588% o path: the path.
589%
590*/
cristy7832dc22011-09-05 01:21:53 +0000591MagickPrivate MagickBooleanType IsGlob(const char *path)
cristy3ed852e2009-09-05 21:47:34 +0000592{
593 MagickBooleanType
cristya21ae822012-11-11 17:12:12 +0000594 status = MagickFalse;
595
596 register const char
597 *p;
cristy3ed852e2009-09-05 21:47:34 +0000598
599 if (IsPathAccessible(path) != MagickFalse)
600 return(MagickFalse);
cristya21ae822012-11-11 17:12:12 +0000601 for (p=path; *p != '\0'; p++)
602 {
603 switch (*p)
604 {
605 case '*':
606 case '?':
607 case '{':
608 case '}':
609 case '[':
610 case ']':
611 {
612 status=MagickTrue;
613 break;
614 }
615 default:
616 break;
617 }
618 }
cristy3ed852e2009-09-05 21:47:34 +0000619 return(status);
620}
621
622/*
623%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
624% %
625% %
626% %
627% T o k e n i z e r %
628% %
629% %
630% %
631%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
632%
633% Tokenizer() is a generalized, finite state token parser. It extracts tokens
634% one at a time from a string of characters. The characters used for white
635% space, for break characters, and for quotes can be specified. Also,
636% characters in the string can be preceded by a specifiable escape character
637% which removes any special meaning the character may have.
638%
639% Here is some terminology:
640%
641% o token: A single unit of information in the form of a group of
642% characters.
643%
644% o white space: Apace that gets ignored (except within quotes or when
645% escaped), like blanks and tabs. in addition, white space terminates a
646% non-quoted token.
647%
648% o break set: One or more characters that separates non-quoted tokens.
649% Commas are a common break character. The usage of break characters to
650% signal the end of a token is the same as that of white space, except
651% multiple break characters with nothing or only white space between
652% generate a null token for each two break characters together.
653%
654% For example, if blank is set to be the white space and comma is set to
655% be the break character, the line
656%
657% A, B, C , , DEF
658%
659% ... consists of 5 tokens:
660%
661% 1) "A"
662% 2) "B"
663% 3) "C"
664% 4) "" (the null string)
665% 5) "DEF"
666%
667% o Quote character: A character that, when surrounding a group of other
668% characters, causes the group of characters to be treated as a single
669% token, no matter how many white spaces or break characters exist in
670% the group. Also, a token always terminates after the closing quote.
671% For example, if ' is the quote character, blank is white space, and
672% comma is the break character, the following string
673%
674% A, ' B, CD'EF GHI
675%
676% ... consists of 4 tokens:
677%
678% 1) "A"
679% 2) " B, CD" (note the blanks & comma)
680% 3) "EF"
681% 4) "GHI"
682%
683% The quote characters themselves do not appear in the resultant
684% tokens. The double quotes are delimiters i use here for
685% documentation purposes only.
686%
687% o Escape character: A character which itself is ignored but which
688% causes the next character to be used as is. ^ and \ are often used
689% as escape characters. An escape in the last position of the string
690% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
691% and non-escape) character. For example, assume white space, break
692% character, and quote are the same as in the above examples, and
693% further, assume that ^ is the escape character. Then, in the string
694%
695% ABC, ' DEF ^' GH' I ^ J K^ L ^
696%
697% ... there are 7 tokens:
698%
699% 1) "ABC"
700% 2) " DEF ' GH"
701% 3) "I"
702% 4) " " (a lone blank)
703% 5) "J"
704% 6) "K L"
705% 7) "^" (passed as is at end of line)
706%
707% The format of the Tokenizer method is:
708%
709% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
710% const size_t max_token_length,const char *line,const char *white,
711% const char *break_set,const char *quote,const char escape,
712% char *breaker,int *next,char *quoted)
713%
714% A description of each parameter follows:
715%
716% o flag: right now, only the low order 3 bits are used.
717%
718% 1 => convert non-quoted tokens to upper case
719% 2 => convert non-quoted tokens to lower case
720% 0 => do not convert non-quoted tokens
721%
722% o token: a character string containing the returned next token
723%
724% o max_token_length: the maximum size of "token". Characters beyond
725% "max_token_length" are truncated.
726%
727% o string: the string to be parsed.
728%
729% o white: a string of the valid white spaces. example:
730%
731% char whitesp[]={" \t"};
732%
733% blank and tab will be valid white space.
734%
735% o break: a string of the valid break characters. example:
736%
737% char breakch[]={";,"};
738%
739% semicolon and comma will be valid break characters.
740%
741% o quote: a string of the valid quote characters. An example would be
742%
743% char whitesp[]={"'\"");
744%
745% (this causes single and double quotes to be valid) Note that a
746% token starting with one of these characters needs the same quote
747% character to terminate it.
748%
749% for example:
750%
751% "ABC '
752%
753% is unterminated, but
754%
755% "DEF" and 'GHI'
756%
757% are properly terminated. Note that different quote characters
758% can appear on the same line; only for a given token do the quote
759% characters have to be the same.
760%
761% o escape: the escape character (NOT a string ... only one
762% allowed). Use zero if none is desired.
763%
764% o breaker: the break character used to terminate the current
765% token. If the token was quoted, this will be the quote used. If
766% the token is the last one on the line, this will be zero.
767%
768% o next: this variable points to the first character of the
769% next token. it gets reset by "tokenizer" as it steps through the
770% string. Set it to 0 upon initialization, and leave it alone
771% after that. You can change it if you want to jump around in the
772% string or re-parse from the beginning, but be careful.
773%
774% o quoted: set to True if the token was quoted and MagickFalse
775% if not. You may need this information (for example: in C, a
776% string with quotes around it is a character string, while one
777% without is an identifier).
778%
779% o result: 0 if we haven't reached EOS (end of string), and 1
780% if we have.
781%
782*/
783
784#define IN_WHITE 0
785#define IN_TOKEN 1
786#define IN_QUOTE 2
787#define IN_OZONE 3
788
cristybb503372010-05-27 20:51:26 +0000789static ssize_t sindex(int c,const char *string)
cristy3ed852e2009-09-05 21:47:34 +0000790{
791 register const char
792 *p;
793
794 for (p=string; *p != '\0'; p++)
795 if (c == (int) (*p))
cristycee97112010-05-28 00:44:52 +0000796 return((ssize_t) (p-string));
cristy3ed852e2009-09-05 21:47:34 +0000797 return(-1);
798}
799
800static void StoreToken(TokenInfo *token_info,char *string,
801 size_t max_token_length,int c)
802{
cristybb503372010-05-27 20:51:26 +0000803 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000804 i;
805
806 if ((token_info->offset < 0) ||
807 ((size_t) token_info->offset >= (max_token_length-1)))
808 return;
809 i=token_info->offset++;
810 string[i]=(char) c;
811 if (token_info->state == IN_QUOTE)
812 return;
813 switch (token_info->flag & 0x03)
814 {
815 case 1:
816 {
817 string[i]=(char) toupper(c);
818 break;
819 }
820 case 2:
821 {
822 string[i]=(char) tolower(c);
823 break;
824 }
825 default:
826 break;
827 }
828}
829
830MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
831 char *token,const size_t max_token_length,const char *line,const char *white,
832 const char *break_set,const char *quote,const char escape,char *breaker,
833 int *next,char *quoted)
834{
835 int
836 c;
837
cristybb503372010-05-27 20:51:26 +0000838 register ssize_t
cristy3ed852e2009-09-05 21:47:34 +0000839 i;
840
841 *breaker='\0';
842 *quoted='\0';
843 if (line[*next] == '\0')
844 return(1);
845 token_info->state=IN_WHITE;
846 token_info->quote=(char) MagickFalse;
847 token_info->flag=flag;
848 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
849 {
850 c=(int) line[*next];
851 i=sindex(c,break_set);
852 if (i >= 0)
853 {
854 switch (token_info->state)
855 {
856 case IN_WHITE:
857 case IN_TOKEN:
858 case IN_OZONE:
859 {
860 (*next)++;
861 *breaker=break_set[i];
862 token[token_info->offset]='\0';
863 return(0);
864 }
865 case IN_QUOTE:
866 {
867 StoreToken(token_info,token,max_token_length,c);
868 break;
869 }
870 }
871 continue;
872 }
873 i=sindex(c,quote);
874 if (i >= 0)
875 {
876 switch (token_info->state)
877 {
878 case IN_WHITE:
879 {
880 token_info->state=IN_QUOTE;
881 token_info->quote=quote[i];
882 *quoted=(char) MagickTrue;
883 break;
884 }
885 case IN_QUOTE:
886 {
887 if (quote[i] != token_info->quote)
888 StoreToken(token_info,token,max_token_length,c);
889 else
890 {
891 token_info->state=IN_OZONE;
892 token_info->quote='\0';
893 }
894 break;
895 }
896 case IN_TOKEN:
897 case IN_OZONE:
898 {
899 *breaker=(char) c;
900 token[token_info->offset]='\0';
901 return(0);
902 }
903 }
904 continue;
905 }
906 i=sindex(c,white);
907 if (i >= 0)
908 {
909 switch (token_info->state)
910 {
911 case IN_WHITE:
912 case IN_OZONE:
913 break;
914 case IN_TOKEN:
915 {
916 token_info->state=IN_OZONE;
917 break;
918 }
919 case IN_QUOTE:
920 {
921 StoreToken(token_info,token,max_token_length,c);
922 break;
923 }
924 }
925 continue;
926 }
927 if (c == (int) escape)
928 {
929 if (line[(*next)+1] == '\0')
930 {
931 *breaker='\0';
932 StoreToken(token_info,token,max_token_length,c);
933 (*next)++;
934 token[token_info->offset]='\0';
935 return(0);
936 }
937 switch (token_info->state)
938 {
939 case IN_WHITE:
940 {
941 (*next)--;
942 token_info->state=IN_TOKEN;
943 break;
944 }
945 case IN_TOKEN:
946 case IN_QUOTE:
947 {
948 (*next)++;
949 c=(int) line[*next];
950 StoreToken(token_info,token,max_token_length,c);
951 break;
952 }
953 case IN_OZONE:
954 {
955 token[token_info->offset]='\0';
956 return(0);
957 }
958 }
959 continue;
960 }
961 switch (token_info->state)
962 {
963 case IN_WHITE:
cristy326182d2014-05-18 21:48:30 +0000964 {
cristy3ed852e2009-09-05 21:47:34 +0000965 token_info->state=IN_TOKEN;
cristy326182d2014-05-18 21:48:30 +0000966 StoreToken(token_info,token,max_token_length,c);
967 break;
968 }
cristy3ed852e2009-09-05 21:47:34 +0000969 case IN_TOKEN:
970 case IN_QUOTE:
971 {
972 StoreToken(token_info,token,max_token_length,c);
973 break;
974 }
975 case IN_OZONE:
976 {
977 token[token_info->offset]='\0';
978 return(0);
979 }
980 }
981 }
982 token[token_info->offset]='\0';
983 return(0);
984}