blob: dbffcef71efb594c41169cdb1a66bf720f61243f [file] [log] [blame]
cristy3ed852e2009-09-05 21:47:34 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% John Cristy %
17% January 1993 %
18% %
19% %
cristy16af1cb2009-12-11 21:38:29 +000020% Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization %
cristy3ed852e2009-09-05 21:47:34 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
43#include "magick/studio.h"
44#include "magick/exception.h"
45#include "magick/exception-private.h"
46#include "magick/image.h"
47#include "magick/memory_.h"
48#include "magick/string_.h"
49#include "magick/token.h"
50#include "magick/token-private.h"
51#include "magick/utility.h"
52
53/*
54 Typedef declaractions.
55*/
56struct _TokenInfo
57{
58 int
59 state;
60
61 MagickStatusType
62 flag;
63
64 long
65 offset;
66
67 char
68 quote;
69
70 unsigned long
71 signature;
72};
73
74/*
75%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
76% %
77% %
78% %
79% A c q u i r e T o k e n I n f o %
80% %
81% %
82% %
83%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84%
85% AcquireTokenInfo() allocates the TokenInfo structure.
86%
87% The format of the AcquireTokenInfo method is:
88%
89% TokenInfo *AcquireTokenInfo()
90%
91*/
92MagickExport TokenInfo *AcquireTokenInfo(void)
93{
94 TokenInfo
95 *token_info;
96
cristy90823212009-12-12 20:48:33 +000097 token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
cristy3ed852e2009-09-05 21:47:34 +000098 if (token_info == (TokenInfo *) NULL)
99 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
100 token_info->signature=MagickSignature;
101 return(token_info);
102}
103
104/*
105%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106% %
107% %
108% %
109% D e s t r o y T o k e n I n f o %
110% %
111% %
112% %
113%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
114%
115% DestroyTokenInfo() deallocates memory associated with an TokenInfo
116% structure.
117%
118% The format of the DestroyTokenInfo method is:
119%
120% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
121%
122% A description of each parameter follows:
123%
124% o token_info: Specifies a pointer to an TokenInfo structure.
125%
126*/
127MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
128{
129 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
130 assert(token_info != (TokenInfo *) NULL);
131 assert(token_info->signature == MagickSignature);
132 token_info->signature=(~MagickSignature);
133 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
134 return(token_info);
135}
136
137/*
138%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
139% %
140% %
141% %
142+ G e t M a g i c k T o k e n %
143% %
144% %
145% %
146%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
147%
148% GetMagickToken() gets a token from the token stream. A token is defined as a
149% sequence of characters delimited by whitespace (e.g. clip-path), a sequence
150% delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
151% parenthesis (e.g. rgb(0,0,0)).
152%
153% The format of the GetMagickToken method is:
154%
155% void GetMagickToken(const char *start,const char **end,char *token)
156%
157% A description of each parameter follows:
158%
159% o start: the start of the token sequence.
160%
161% o end: point to the end of the token sequence.
162%
163% o token: copy the token to this buffer.
164%
165*/
166MagickExport void GetMagickToken(const char *start,const char **end,char *token)
167{
168 double
169 value;
170
171 register const char
172 *p;
173
174 register long
175 i;
176
177 i=0;
178 for (p=start; *p != '\0'; )
179 {
180 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
181 p++;
182 if (*p == '\0')
183 break;
184 switch (*p)
185 {
186 case '"':
187 case '\'':
188 case '`':
189 case '{':
190 {
191 register char
192 escape;
193
194 switch (*p)
195 {
196 case '"': escape='"'; break;
197 case '\'': escape='\''; break;
198 case '`': escape='\''; break;
199 case '{': escape='}'; break;
200 default: escape=(*p); break;
201 }
202 for (p++; *p != '\0'; p++)
203 {
204 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
205 p++;
206 else
207 if (*p == escape)
208 {
209 p++;
210 break;
211 }
212 token[i++]=(*p);
213 }
214 break;
215 }
216 case '/':
217 {
218 token[i++]=(*p++);
219 if ((*p == '>') || (*p == '/'))
220 token[i++]=(*p++);
221 break;
222 }
223 default:
224 {
225 char
226 *q;
227
228 value=strtod(p,&q);
229 if ((p != q) && (*p != ','))
230 {
231 for ( ; (p < q) && (*p != ','); p++)
232 token[i++]=(*p);
233 if (*p == '%')
234 token[i++]=(*p++);
235 break;
236 }
237 if ((isalpha((int) ((unsigned char) *p)) == 0) &&
238 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
239 {
240 token[i++]=(*p++);
241 break;
242 }
243 for ( ; *p != '\0'; p++)
244 {
245 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
246 (*p == ',') || (*p == ':')) && (*(p-1) != '\\'))
247 break;
248 if ((i > 0) && (*p == '<'))
249 break;
250 token[i++]=(*p);
251 if (*p == '>')
252 break;
253 if (*p == '(')
254 for (p++; *p != '\0'; p++)
255 {
256 token[i++]=(*p);
257 if ((*p == ')') && (*(p-1) != '\\'))
258 break;
259 }
260 }
261 break;
262 }
263 }
264 break;
265 }
266 token[i]='\0';
267 if (LocaleNCompare(token,"url(",4) == 0)
268 {
269 ssize_t
270 offset;
271
272 offset=4;
273 if (token[offset] == '#')
274 offset++;
275 i=(long) strlen(token);
276 (void) CopyMagickString(token,token+offset,MaxTextExtent);
277 token[i-offset-1]='\0';
278 }
279 while (isspace((int) ((unsigned char) *p)) != 0)
280 p++;
281 if (end != (const char **) NULL)
282 *end=(const char *) p;
283}
284
285/*
286%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
287% %
288% %
289% %
290% G l o b E x p r e s s i o n %
291% %
292% %
293% %
294%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
295%
296% GlobExpression() returns MagickTrue if the expression matches the pattern.
297%
298% The format of the GlobExpression function is:
299%
300% MagickBooleanType GlobExpression(const char *expression,
301% const char *pattern,const MagickBooleanType case_insensitive)
302%
303% A description of each parameter follows:
304%
305% o expression: Specifies a pointer to a text string containing a file name.
306%
307% o pattern: Specifies a pointer to a text string containing a pattern.
308%
309% o case_insensitive: set to MagickTrue to ignore the case when matching
310% an expression.
311%
312*/
313MagickExport MagickBooleanType GlobExpression(const char *expression,
314 const char *pattern,const MagickBooleanType case_insensitive)
315{
316 MagickBooleanType
317 done,
318 match;
319
320 register const char
321 *p;
322
323 /*
324 Return on empty pattern or '*'.
325 */
326 if (pattern == (char *) NULL)
327 return(MagickTrue);
328 if (GetUTFCode(pattern) == 0)
329 return(MagickTrue);
330 if (LocaleCompare(pattern,"*") == 0)
331 return(MagickTrue);
332 p=pattern+strlen(pattern)-1;
333 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
334 {
335 ExceptionInfo
336 *exception;
337
338 ImageInfo
339 *image_info;
340
341 /*
342 Determine if pattern is a scene, i.e. img0001.pcd[2].
343 */
344 image_info=AcquireImageInfo();
345 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
346 exception=AcquireExceptionInfo();
cristyd965a422010-03-03 17:47:35 +0000347 (void) SetImageInfo(image_info,0,exception);
cristy3ed852e2009-09-05 21:47:34 +0000348 exception=DestroyExceptionInfo(exception);
349 if (LocaleCompare(image_info->filename,pattern) != 0)
350 {
351 image_info=DestroyImageInfo(image_info);
352 return(MagickFalse);
353 }
354 image_info=DestroyImageInfo(image_info);
355 }
356 /*
357 Evaluate glob expression.
358 */
359 done=MagickFalse;
360 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
361 {
362 if (GetUTFCode(expression) == 0)
363 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
364 break;
365 switch (GetUTFCode(pattern))
366 {
cristy3ed852e2009-09-05 21:47:34 +0000367 case '*':
368 {
369 MagickBooleanType
370 status;
371
372 status=MagickFalse;
373 pattern+=GetUTFOctets(pattern);
374 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
375 {
376 status=GlobExpression(expression,pattern,case_insensitive);
377 expression+=GetUTFOctets(expression);
378 }
379 if (status != MagickFalse)
380 {
381 while (GetUTFCode(expression) != 0)
382 expression+=GetUTFOctets(expression);
383 while (GetUTFCode(pattern) != 0)
384 pattern+=GetUTFOctets(pattern);
385 }
386 break;
387 }
388 case '[':
389 {
cristybd512462010-02-13 02:13:14 +0000390 long
cristy3ed852e2009-09-05 21:47:34 +0000391 c;
392
393 pattern+=GetUTFOctets(pattern);
394 for ( ; ; )
395 {
396 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
397 {
398 done=MagickTrue;
399 break;
400 }
401 if (GetUTFCode(pattern) == '\\')
402 {
403 pattern+=GetUTFOctets(pattern);
404 if (GetUTFCode(pattern) == 0)
405 {
406 done=MagickTrue;
407 break;
408 }
409 }
410 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
411 {
412 c=GetUTFCode(pattern);
413 pattern+=GetUTFOctets(pattern);
414 pattern+=GetUTFOctets(pattern);
415 if (GetUTFCode(pattern) == ']')
416 {
417 done=MagickTrue;
418 break;
419 }
420 if (GetUTFCode(pattern) == '\\')
421 {
422 pattern+=GetUTFOctets(pattern);
423 if (GetUTFCode(pattern) == 0)
424 {
425 done=MagickTrue;
426 break;
427 }
428 }
429 if ((GetUTFCode(expression) < c) ||
430 (GetUTFCode(expression) > GetUTFCode(pattern)))
431 {
432 pattern+=GetUTFOctets(pattern);
433 continue;
434 }
435 }
436 else
437 if (GetUTFCode(pattern) != GetUTFCode(expression))
438 {
439 pattern+=GetUTFOctets(pattern);
440 continue;
441 }
442 pattern+=GetUTFOctets(pattern);
443 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
444 {
445 if ((GetUTFCode(pattern) == '\\') &&
446 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
447 pattern+=GetUTFOctets(pattern);
448 pattern+=GetUTFOctets(pattern);
449 }
450 if (GetUTFCode(pattern) != 0)
451 {
452 pattern+=GetUTFOctets(pattern);
453 expression+=GetUTFOctets(expression);
454 }
455 break;
456 }
457 break;
458 }
459 case '?':
460 {
461 pattern+=GetUTFOctets(pattern);
462 expression+=GetUTFOctets(expression);
463 break;
464 }
465 case '{':
466 {
467 register const char
468 *p;
469
470 pattern+=GetUTFOctets(pattern);
471 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
472 {
473 p=expression;
474 match=MagickTrue;
475 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
476 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
477 (match != MagickFalse))
478 {
479 if (GetUTFCode(pattern) == '\\')
480 pattern+=GetUTFOctets(pattern);
481 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
482 MagickFalse;
483 p+=GetUTFOctets(p);
484 pattern+=GetUTFOctets(pattern);
485 }
486 if (GetUTFCode(pattern) == 0)
487 {
488 match=MagickFalse;
489 done=MagickTrue;
490 break;
491 }
492 else
493 if (match != MagickFalse)
494 {
495 expression=p;
496 while ((GetUTFCode(pattern) != '}') &&
497 (GetUTFCode(pattern) != 0))
498 {
499 pattern+=GetUTFOctets(pattern);
500 if (GetUTFCode(pattern) == '\\')
501 {
502 pattern+=GetUTFOctets(pattern);
503 if (GetUTFCode(pattern) == '}')
504 pattern+=GetUTFOctets(pattern);
505 }
506 }
507 }
508 else
509 {
510 while ((GetUTFCode(pattern) != '}') &&
511 (GetUTFCode(pattern) != ',') &&
512 (GetUTFCode(pattern) != 0))
513 {
514 pattern+=GetUTFOctets(pattern);
515 if (GetUTFCode(pattern) == '\\')
516 {
517 pattern+=GetUTFOctets(pattern);
518 if ((GetUTFCode(pattern) == '}') ||
519 (GetUTFCode(pattern) == ','))
520 pattern+=GetUTFOctets(pattern);
521 }
522 }
523 }
524 if (GetUTFCode(pattern) != 0)
525 pattern+=GetUTFOctets(pattern);
526 }
527 break;
528 }
cristyecbe37f2010-04-22 13:50:04 +0000529 case '\\':
530 {
531 pattern+=GetUTFOctets(pattern);
532 if (GetUTFCode(pattern) != 0)
533 pattern+=GetUTFOctets(pattern);
534 }
cristy3ed852e2009-09-05 21:47:34 +0000535 default:
536 {
537 if (case_insensitive != MagickFalse)
538 {
539 if (tolower((int) GetUTFCode(expression)) !=
540 tolower((int) GetUTFCode(pattern)))
541 {
542 done=MagickTrue;
543 break;
544 }
545 }
546 else
547 if (GetUTFCode(expression) != GetUTFCode(pattern))
548 {
549 done=MagickTrue;
550 break;
551 }
552 expression+=GetUTFOctets(expression);
553 pattern+=GetUTFOctets(pattern);
554 }
555 }
556 }
557 while (GetUTFCode(pattern) == '*')
558 pattern+=GetUTFOctets(pattern);
559 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
560 MagickTrue : MagickFalse;
561 return(match);
562}
563
564/*
565%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
566% %
567% %
568% %
569+ I s G l o b %
570% %
571% %
572% %
573%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
574%
575% IsGlob() returns MagickTrue if the path specification contains a globbing
576% pattern.
577%
578% The format of the IsGlob method is:
579%
580% MagickBooleanType IsGlob(const char *geometry)
581%
582% A description of each parameter follows:
583%
584% o path: the path.
585%
586*/
587MagickExport MagickBooleanType IsGlob(const char *path)
588{
589 MagickBooleanType
590 status;
591
592 if (IsPathAccessible(path) != MagickFalse)
593 return(MagickFalse);
594 status=(strchr(path,'*') != (char *) NULL) ||
595 (strchr(path,'?') != (char *) NULL) ||
596 (strchr(path,'{') != (char *) NULL) ||
597 (strchr(path,'}') != (char *) NULL) ||
598 (strchr(path,'[') != (char *) NULL) ||
599 (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
600 return(status);
601}
602
603/*
604%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
605% %
606% %
607% %
608% T o k e n i z e r %
609% %
610% %
611% %
612%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
613%
614% Tokenizer() is a generalized, finite state token parser. It extracts tokens
615% one at a time from a string of characters. The characters used for white
616% space, for break characters, and for quotes can be specified. Also,
617% characters in the string can be preceded by a specifiable escape character
618% which removes any special meaning the character may have.
619%
620% Here is some terminology:
621%
622% o token: A single unit of information in the form of a group of
623% characters.
624%
625% o white space: Apace that gets ignored (except within quotes or when
626% escaped), like blanks and tabs. in addition, white space terminates a
627% non-quoted token.
628%
629% o break set: One or more characters that separates non-quoted tokens.
630% Commas are a common break character. The usage of break characters to
631% signal the end of a token is the same as that of white space, except
632% multiple break characters with nothing or only white space between
633% generate a null token for each two break characters together.
634%
635% For example, if blank is set to be the white space and comma is set to
636% be the break character, the line
637%
638% A, B, C , , DEF
639%
640% ... consists of 5 tokens:
641%
642% 1) "A"
643% 2) "B"
644% 3) "C"
645% 4) "" (the null string)
646% 5) "DEF"
647%
648% o Quote character: A character that, when surrounding a group of other
649% characters, causes the group of characters to be treated as a single
650% token, no matter how many white spaces or break characters exist in
651% the group. Also, a token always terminates after the closing quote.
652% For example, if ' is the quote character, blank is white space, and
653% comma is the break character, the following string
654%
655% A, ' B, CD'EF GHI
656%
657% ... consists of 4 tokens:
658%
659% 1) "A"
660% 2) " B, CD" (note the blanks & comma)
661% 3) "EF"
662% 4) "GHI"
663%
664% The quote characters themselves do not appear in the resultant
665% tokens. The double quotes are delimiters i use here for
666% documentation purposes only.
667%
668% o Escape character: A character which itself is ignored but which
669% causes the next character to be used as is. ^ and \ are often used
670% as escape characters. An escape in the last position of the string
671% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
672% and non-escape) character. For example, assume white space, break
673% character, and quote are the same as in the above examples, and
674% further, assume that ^ is the escape character. Then, in the string
675%
676% ABC, ' DEF ^' GH' I ^ J K^ L ^
677%
678% ... there are 7 tokens:
679%
680% 1) "ABC"
681% 2) " DEF ' GH"
682% 3) "I"
683% 4) " " (a lone blank)
684% 5) "J"
685% 6) "K L"
686% 7) "^" (passed as is at end of line)
687%
688% The format of the Tokenizer method is:
689%
690% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
691% const size_t max_token_length,const char *line,const char *white,
692% const char *break_set,const char *quote,const char escape,
693% char *breaker,int *next,char *quoted)
694%
695% A description of each parameter follows:
696%
697% o flag: right now, only the low order 3 bits are used.
698%
699% 1 => convert non-quoted tokens to upper case
700% 2 => convert non-quoted tokens to lower case
701% 0 => do not convert non-quoted tokens
702%
703% o token: a character string containing the returned next token
704%
705% o max_token_length: the maximum size of "token". Characters beyond
706% "max_token_length" are truncated.
707%
708% o string: the string to be parsed.
709%
710% o white: a string of the valid white spaces. example:
711%
712% char whitesp[]={" \t"};
713%
714% blank and tab will be valid white space.
715%
716% o break: a string of the valid break characters. example:
717%
718% char breakch[]={";,"};
719%
720% semicolon and comma will be valid break characters.
721%
722% o quote: a string of the valid quote characters. An example would be
723%
724% char whitesp[]={"'\"");
725%
726% (this causes single and double quotes to be valid) Note that a
727% token starting with one of these characters needs the same quote
728% character to terminate it.
729%
730% for example:
731%
732% "ABC '
733%
734% is unterminated, but
735%
736% "DEF" and 'GHI'
737%
738% are properly terminated. Note that different quote characters
739% can appear on the same line; only for a given token do the quote
740% characters have to be the same.
741%
742% o escape: the escape character (NOT a string ... only one
743% allowed). Use zero if none is desired.
744%
745% o breaker: the break character used to terminate the current
746% token. If the token was quoted, this will be the quote used. If
747% the token is the last one on the line, this will be zero.
748%
749% o next: this variable points to the first character of the
750% next token. it gets reset by "tokenizer" as it steps through the
751% string. Set it to 0 upon initialization, and leave it alone
752% after that. You can change it if you want to jump around in the
753% string or re-parse from the beginning, but be careful.
754%
755% o quoted: set to True if the token was quoted and MagickFalse
756% if not. You may need this information (for example: in C, a
757% string with quotes around it is a character string, while one
758% without is an identifier).
759%
760% o result: 0 if we haven't reached EOS (end of string), and 1
761% if we have.
762%
763*/
764
765#define IN_WHITE 0
766#define IN_TOKEN 1
767#define IN_QUOTE 2
768#define IN_OZONE 3
769
770static long sindex(int c,const char *string)
771{
772 register const char
773 *p;
774
775 for (p=string; *p != '\0'; p++)
776 if (c == (int) (*p))
777 return(p-string);
778 return(-1);
779}
780
781static void StoreToken(TokenInfo *token_info,char *string,
782 size_t max_token_length,int c)
783{
784 register long
785 i;
786
787 if ((token_info->offset < 0) ||
788 ((size_t) token_info->offset >= (max_token_length-1)))
789 return;
790 i=token_info->offset++;
791 string[i]=(char) c;
792 if (token_info->state == IN_QUOTE)
793 return;
794 switch (token_info->flag & 0x03)
795 {
796 case 1:
797 {
798 string[i]=(char) toupper(c);
799 break;
800 }
801 case 2:
802 {
803 string[i]=(char) tolower(c);
804 break;
805 }
806 default:
807 break;
808 }
809}
810
811MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
812 char *token,const size_t max_token_length,const char *line,const char *white,
813 const char *break_set,const char *quote,const char escape,char *breaker,
814 int *next,char *quoted)
815{
816 int
817 c;
818
819 register long
820 i;
821
822 *breaker='\0';
823 *quoted='\0';
824 if (line[*next] == '\0')
825 return(1);
826 token_info->state=IN_WHITE;
827 token_info->quote=(char) MagickFalse;
828 token_info->flag=flag;
829 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
830 {
831 c=(int) line[*next];
832 i=sindex(c,break_set);
833 if (i >= 0)
834 {
835 switch (token_info->state)
836 {
837 case IN_WHITE:
838 case IN_TOKEN:
839 case IN_OZONE:
840 {
841 (*next)++;
842 *breaker=break_set[i];
843 token[token_info->offset]='\0';
844 return(0);
845 }
846 case IN_QUOTE:
847 {
848 StoreToken(token_info,token,max_token_length,c);
849 break;
850 }
851 }
852 continue;
853 }
854 i=sindex(c,quote);
855 if (i >= 0)
856 {
857 switch (token_info->state)
858 {
859 case IN_WHITE:
860 {
861 token_info->state=IN_QUOTE;
862 token_info->quote=quote[i];
863 *quoted=(char) MagickTrue;
864 break;
865 }
866 case IN_QUOTE:
867 {
868 if (quote[i] != token_info->quote)
869 StoreToken(token_info,token,max_token_length,c);
870 else
871 {
872 token_info->state=IN_OZONE;
873 token_info->quote='\0';
874 }
875 break;
876 }
877 case IN_TOKEN:
878 case IN_OZONE:
879 {
880 *breaker=(char) c;
881 token[token_info->offset]='\0';
882 return(0);
883 }
884 }
885 continue;
886 }
887 i=sindex(c,white);
888 if (i >= 0)
889 {
890 switch (token_info->state)
891 {
892 case IN_WHITE:
893 case IN_OZONE:
894 break;
895 case IN_TOKEN:
896 {
897 token_info->state=IN_OZONE;
898 break;
899 }
900 case IN_QUOTE:
901 {
902 StoreToken(token_info,token,max_token_length,c);
903 break;
904 }
905 }
906 continue;
907 }
908 if (c == (int) escape)
909 {
910 if (line[(*next)+1] == '\0')
911 {
912 *breaker='\0';
913 StoreToken(token_info,token,max_token_length,c);
914 (*next)++;
915 token[token_info->offset]='\0';
916 return(0);
917 }
918 switch (token_info->state)
919 {
920 case IN_WHITE:
921 {
922 (*next)--;
923 token_info->state=IN_TOKEN;
924 break;
925 }
926 case IN_TOKEN:
927 case IN_QUOTE:
928 {
929 (*next)++;
930 c=(int) line[*next];
931 StoreToken(token_info,token,max_token_length,c);
932 break;
933 }
934 case IN_OZONE:
935 {
936 token[token_info->offset]='\0';
937 return(0);
938 }
939 }
940 continue;
941 }
942 switch (token_info->state)
943 {
944 case IN_WHITE:
945 token_info->state=IN_TOKEN;
946 case IN_TOKEN:
947 case IN_QUOTE:
948 {
949 StoreToken(token_info,token,max_token_length,c);
950 break;
951 }
952 case IN_OZONE:
953 {
954 token[token_info->offset]='\0';
955 return(0);
956 }
957 }
958 }
959 token[token_info->offset]='\0';
960 return(0);
961}