blob: 34ddfbac2de3d4456589c82f2f4bc6068939b608 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Reid Spencer5f016e22007-07-11 17:01:13 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the NumericLiteralParser, CharLiteralParser, and
11// StringLiteralParser interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/LiteralSupport.h"
16#include "clang/Lex/Preprocessor.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000017#include "clang/Basic/Diagnostic.h"
Chris Lattner136f93a2007-07-16 06:55:01 +000018#include "clang/Basic/SourceManager.h"
19#include "clang/Basic/TargetInfo.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000020#include "llvm/ADT/StringExtras.h"
21using namespace clang;
22
23/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
24/// not valid.
25static int HexDigitValue(char C) {
26 if (C >= '0' && C <= '9') return C-'0';
27 if (C >= 'a' && C <= 'f') return C-'a'+10;
28 if (C >= 'A' && C <= 'F') return C-'A'+10;
29 return -1;
30}
31
32/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
33/// either a character or a string literal.
34static unsigned ProcessCharEscape(const char *&ThisTokBuf,
35 const char *ThisTokEnd, bool &HadError,
36 SourceLocation Loc, bool IsWide,
37 Preprocessor &PP) {
38 // Skip the '\' char.
39 ++ThisTokBuf;
40
41 // We know that this character can't be off the end of the buffer, because
42 // that would have been \", which would not have been the end of string.
43 unsigned ResultChar = *ThisTokBuf++;
44 switch (ResultChar) {
45 // These map to themselves.
46 case '\\': case '\'': case '"': case '?': break;
47
48 // These have fixed mappings.
49 case 'a':
50 // TODO: K&R: the meaning of '\\a' is different in traditional C
51 ResultChar = 7;
52 break;
53 case 'b':
54 ResultChar = 8;
55 break;
56 case 'e':
57 PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
58 ResultChar = 27;
59 break;
60 case 'f':
61 ResultChar = 12;
62 break;
63 case 'n':
64 ResultChar = 10;
65 break;
66 case 'r':
67 ResultChar = 13;
68 break;
69 case 't':
70 ResultChar = 9;
71 break;
72 case 'v':
73 ResultChar = 11;
74 break;
75
76 //case 'u': case 'U': // FIXME: UCNs.
77 case 'x': { // Hex escape.
78 ResultChar = 0;
79 if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
80 PP.Diag(Loc, diag::err_hex_escape_no_digits);
81 HadError = 1;
82 break;
83 }
84
85 // Hex escapes are a maximal series of hex digits.
86 bool Overflow = false;
87 for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
88 int CharVal = HexDigitValue(ThisTokBuf[0]);
89 if (CharVal == -1) break;
Chris Lattnerb8128142007-09-03 18:28:41 +000090 Overflow |= (ResultChar & 0xF0000000) ? true : false; // About to shift out a digit?
Reid Spencer5f016e22007-07-11 17:01:13 +000091 ResultChar <<= 4;
92 ResultChar |= CharVal;
93 }
94
95 // See if any bits will be truncated when evaluated as a character.
Ted Kremenek9c728dc2007-12-12 22:39:36 +000096 unsigned CharWidth = IsWide
97 ? PP.getTargetInfo().getWCharWidth(PP.getFullLoc(Loc))
98 : PP.getTargetInfo().getCharWidth(PP.getFullLoc(Loc));
99
Reid Spencer5f016e22007-07-11 17:01:13 +0000100 if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
101 Overflow = true;
102 ResultChar &= ~0U >> (32-CharWidth);
103 }
104
105 // Check for overflow.
106 if (Overflow) // Too many digits to fit in
107 PP.Diag(Loc, diag::warn_hex_escape_too_large);
108 break;
109 }
110 case '0': case '1': case '2': case '3':
111 case '4': case '5': case '6': case '7': {
112 // Octal escapes.
113 --ThisTokBuf;
114 ResultChar = 0;
115
116 // Octal escapes are a series of octal digits with maximum length 3.
117 // "\0123" is a two digit sequence equal to "\012" "3".
118 unsigned NumDigits = 0;
119 do {
120 ResultChar <<= 3;
121 ResultChar |= *ThisTokBuf++ - '0';
122 ++NumDigits;
123 } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
124 ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
125
126 // Check for overflow. Reject '\777', but not L'\777'.
Ted Kremenek9c728dc2007-12-12 22:39:36 +0000127 unsigned CharWidth = IsWide
128 ? PP.getTargetInfo().getWCharWidth(PP.getFullLoc(Loc))
129 : PP.getTargetInfo().getCharWidth(PP.getFullLoc(Loc));
130
Reid Spencer5f016e22007-07-11 17:01:13 +0000131 if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
132 PP.Diag(Loc, diag::warn_octal_escape_too_large);
133 ResultChar &= ~0U >> (32-CharWidth);
134 }
135 break;
136 }
137
138 // Otherwise, these are not valid escapes.
139 case '(': case '{': case '[': case '%':
140 // GCC accepts these as extensions. We warn about them as such though.
141 if (!PP.getLangOptions().NoExtensions) {
142 PP.Diag(Loc, diag::ext_nonstandard_escape,
143 std::string()+(char)ResultChar);
144 break;
145 }
146 // FALL THROUGH.
147 default:
148 if (isgraph(ThisTokBuf[0])) {
149 PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
150 } else {
151 PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
152 }
153 break;
154 }
155
156 return ResultChar;
157}
158
159
160
161
162/// integer-constant: [C99 6.4.4.1]
163/// decimal-constant integer-suffix
164/// octal-constant integer-suffix
165/// hexadecimal-constant integer-suffix
166/// decimal-constant:
167/// nonzero-digit
168/// decimal-constant digit
169/// octal-constant:
170/// 0
171/// octal-constant octal-digit
172/// hexadecimal-constant:
173/// hexadecimal-prefix hexadecimal-digit
174/// hexadecimal-constant hexadecimal-digit
175/// hexadecimal-prefix: one of
176/// 0x 0X
177/// integer-suffix:
178/// unsigned-suffix [long-suffix]
179/// unsigned-suffix [long-long-suffix]
180/// long-suffix [unsigned-suffix]
181/// long-long-suffix [unsigned-sufix]
182/// nonzero-digit:
183/// 1 2 3 4 5 6 7 8 9
184/// octal-digit:
185/// 0 1 2 3 4 5 6 7
186/// hexadecimal-digit:
187/// 0 1 2 3 4 5 6 7 8 9
188/// a b c d e f
189/// A B C D E F
190/// unsigned-suffix: one of
191/// u U
192/// long-suffix: one of
193/// l L
194/// long-long-suffix: one of
195/// ll LL
196///
197/// floating-constant: [C99 6.4.4.2]
198/// TODO: add rules...
199///
200
201NumericLiteralParser::
202NumericLiteralParser(const char *begin, const char *end,
203 SourceLocation TokLoc, Preprocessor &pp)
204 : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
205 s = DigitsBegin = begin;
206 saw_exponent = false;
207 saw_period = false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000208 isLong = false;
209 isUnsigned = false;
210 isLongLong = false;
Chris Lattner6e400c22007-08-26 03:29:23 +0000211 isFloat = false;
Chris Lattner506b8de2007-08-26 01:58:14 +0000212 isImaginary = false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000213 hadError = false;
214
215 if (*s == '0') { // parse radix
216 s++;
217 if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
218 s++;
219 radix = 16;
220 DigitsBegin = s;
221 s = SkipHexDigits(s);
222 if (s == ThisTokEnd) {
223 // Done.
224 } else if (*s == '.') {
225 s++;
226 saw_period = true;
227 s = SkipHexDigits(s);
228 }
229 // A binary exponent can appear with or with a '.'. If dotted, the
230 // binary exponent is required.
Chris Lattner921e9ff2007-11-14 16:14:50 +0000231 if ((*s == 'p' || *s == 'P') && PP.getLangOptions().HexFloats) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000232 s++;
233 saw_exponent = true;
234 if (*s == '+' || *s == '-') s++; // sign
235 const char *first_non_digit = SkipDigits(s);
236 if (first_non_digit == s) {
237 Diag(TokLoc, diag::err_exponent_has_no_digits);
238 return;
239 } else {
240 s = first_non_digit;
241 }
242 } else if (saw_period) {
243 Diag(TokLoc, diag::err_hexconstant_requires_exponent);
244 return;
245 }
246 } else if (*s == 'b' || *s == 'B') {
247 // 0b101010 is a GCC extension.
248 ++s;
249 radix = 2;
250 DigitsBegin = s;
251 s = SkipBinaryDigits(s);
252 if (s == ThisTokEnd) {
253 // Done.
254 } else if (isxdigit(*s)) {
255 Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1));
256 return;
257 }
258 PP.Diag(TokLoc, diag::ext_binary_literal);
259 } else {
260 // For now, the radix is set to 8. If we discover that we have a
261 // floating point constant, the radix will change to 10. Octal floating
262 // point constants are not permitted (only decimal and hexadecimal).
263 radix = 8;
264 DigitsBegin = s;
265 s = SkipOctalDigits(s);
266 if (s == ThisTokEnd) {
267 // Done.
Christopher Lamb016765e2007-11-29 06:06:27 +0000268 } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
Chris Lattner136f93a2007-07-16 06:55:01 +0000269 TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
Reid Spencer5f016e22007-07-11 17:01:13 +0000270 Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1));
271 return;
272 } else if (*s == '.') {
273 s++;
274 radix = 10;
275 saw_period = true;
276 s = SkipDigits(s);
277 }
278 if (*s == 'e' || *s == 'E') { // exponent
279 s++;
280 radix = 10;
281 saw_exponent = true;
282 if (*s == '+' || *s == '-') s++; // sign
283 const char *first_non_digit = SkipDigits(s);
284 if (first_non_digit == s) {
285 Diag(TokLoc, diag::err_exponent_has_no_digits);
286 return;
287 } else {
288 s = first_non_digit;
289 }
290 }
291 }
292 } else { // the first digit is non-zero
293 radix = 10;
294 s = SkipDigits(s);
295 if (s == ThisTokEnd) {
296 // Done.
Christopher Lamb016765e2007-11-29 06:06:27 +0000297 } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000298 Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1));
299 return;
300 } else if (*s == '.') {
301 s++;
302 saw_period = true;
303 s = SkipDigits(s);
304 }
305 if (*s == 'e' || *s == 'E') { // exponent
306 s++;
307 saw_exponent = true;
308 if (*s == '+' || *s == '-') s++; // sign
309 const char *first_non_digit = SkipDigits(s);
310 if (first_non_digit == s) {
311 Diag(TokLoc, diag::err_exponent_has_no_digits);
312 return;
313 } else {
314 s = first_non_digit;
315 }
316 }
317 }
318
319 SuffixBegin = s;
Chris Lattner506b8de2007-08-26 01:58:14 +0000320
321 // Parse the suffix. At this point we can classify whether we have an FP or
322 // integer constant.
323 bool isFPConstant = isFloatingLiteral();
324
325 // Loop over all of the characters of the suffix. If we see something bad,
326 // we break out of the loop.
327 for (; s != ThisTokEnd; ++s) {
328 switch (*s) {
329 case 'f': // FP Suffix for "float"
330 case 'F':
331 if (!isFPConstant) break; // Error for integer constant.
Chris Lattner6e400c22007-08-26 03:29:23 +0000332 if (isFloat || isLong) break; // FF, LF invalid.
333 isFloat = true;
Chris Lattner506b8de2007-08-26 01:58:14 +0000334 continue; // Success.
335 case 'u':
336 case 'U':
337 if (isFPConstant) break; // Error for floating constant.
338 if (isUnsigned) break; // Cannot be repeated.
339 isUnsigned = true;
340 continue; // Success.
341 case 'l':
342 case 'L':
343 if (isLong || isLongLong) break; // Cannot be repeated.
Chris Lattner6e400c22007-08-26 03:29:23 +0000344 if (isFloat) break; // LF invalid.
Chris Lattner506b8de2007-08-26 01:58:14 +0000345
346 // Check for long long. The L's need to be adjacent and the same case.
347 if (s+1 != ThisTokEnd && s[1] == s[0]) {
348 if (isFPConstant) break; // long long invalid for floats.
349 isLongLong = true;
350 ++s; // Eat both of them.
351 } else {
Reid Spencer5f016e22007-07-11 17:01:13 +0000352 isLong = true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000353 }
Chris Lattner506b8de2007-08-26 01:58:14 +0000354 continue; // Success.
355 case 'i':
356 case 'I':
357 case 'j':
358 case 'J':
359 if (isImaginary) break; // Cannot be repeated.
360 PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
361 diag::ext_imaginary_constant);
362 isImaginary = true;
363 continue; // Success.
Reid Spencer5f016e22007-07-11 17:01:13 +0000364 }
Chris Lattner506b8de2007-08-26 01:58:14 +0000365 // If we reached here, there was an error.
366 break;
367 }
368
369 // Report an error if there are any.
370 if (s != ThisTokEnd) {
371 TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
372 Diag(TokLoc, isFPConstant ? diag::err_invalid_suffix_float_constant :
373 diag::err_invalid_suffix_integer_constant,
374 std::string(SuffixBegin, ThisTokEnd));
375 return;
Reid Spencer5f016e22007-07-11 17:01:13 +0000376 }
377}
378
379/// GetIntegerValue - Convert this numeric literal value to an APInt that
380/// matches Val's input width. If there is an overflow, set Val to the low bits
381/// of the result and return true. Otherwise, return false.
382bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
383 Val = 0;
384 s = DigitsBegin;
385
386 llvm::APInt RadixVal(Val.getBitWidth(), radix);
387 llvm::APInt CharVal(Val.getBitWidth(), 0);
388 llvm::APInt OldVal = Val;
389
390 bool OverflowOccurred = false;
391 while (s < SuffixBegin) {
392 unsigned C = HexDigitValue(*s++);
393
394 // If this letter is out of bound for this radix, reject it.
395 assert(C < radix && "NumericLiteralParser ctor should have rejected this");
396
397 CharVal = C;
398
399 // Add the digit to the value in the appropriate radix. If adding in digits
400 // made the value smaller, then this overflowed.
401 OldVal = Val;
402
403 // Multiply by radix, did overflow occur on the multiply?
404 Val *= RadixVal;
405 OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
406
407 OldVal = Val;
408 // Add value, did overflow occur on the value?
409 Val += CharVal;
410 OverflowOccurred |= Val.ult(OldVal);
411 OverflowOccurred |= Val.ult(CharVal);
412 }
413 return OverflowOccurred;
414}
415
Chris Lattner525a0502007-09-22 18:29:59 +0000416llvm::APFloat NumericLiteralParser::
Ted Kremenek427d5af2007-11-26 23:12:30 +0000417GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) {
418 using llvm::APFloat;
419
Ted Kremenek32e61bf2007-11-29 00:54:29 +0000420 llvm::SmallVector<char,256> floatChars;
421 for (unsigned i = 0, n = ThisTokEnd-ThisTokBegin; i != n; ++i)
422 floatChars.push_back(ThisTokBegin[i]);
423
424 floatChars.push_back('\0');
425
Ted Kremenek427d5af2007-11-26 23:12:30 +0000426 APFloat V (Format, APFloat::fcZero, false);
Ted Kremenek427d5af2007-11-26 23:12:30 +0000427 APFloat::opStatus status;
Ted Kremenek32e61bf2007-11-29 00:54:29 +0000428
429 status = V.convertFromString(&floatChars[0],APFloat::rmNearestTiesToEven);
Ted Kremenek427d5af2007-11-26 23:12:30 +0000430
431 if (isExact)
432 *isExact = status == APFloat::opOK;
433
434 return V;
Reid Spencer5f016e22007-07-11 17:01:13 +0000435}
436
437void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
438 const std::string &M) {
439 PP.Diag(Loc, DiagID, M);
440 hadError = true;
441}
442
443
444CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
445 SourceLocation Loc, Preprocessor &PP) {
446 // At this point we know that the character matches the regex "L?'.*'".
447 HadError = false;
448 Value = 0;
449
450 // Determine if this is a wide character.
451 IsWide = begin[0] == 'L';
452 if (IsWide) ++begin;
453
454 // Skip over the entry quote.
455 assert(begin[0] == '\'' && "Invalid token lexed");
456 ++begin;
457
458 // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
459 // size of "value".
Ted Kremenek9c728dc2007-12-12 22:39:36 +0000460 assert(PP.getTargetInfo().getIntWidth(PP.getFullLoc(Loc)) == 32 &&
Reid Spencer5f016e22007-07-11 17:01:13 +0000461 "Assumes sizeof(int) == 4 for now");
462 // FIXME: This assumes that wchar_t is 32-bits for now.
Ted Kremenek9c728dc2007-12-12 22:39:36 +0000463 assert(PP.getTargetInfo().getWCharWidth(PP.getFullLoc(Loc)) == 32 &&
Reid Spencer5f016e22007-07-11 17:01:13 +0000464 "Assumes sizeof(wchar_t) == 4 for now");
465 // FIXME: This extensively assumes that 'char' is 8-bits.
Ted Kremenek9c728dc2007-12-12 22:39:36 +0000466 assert(PP.getTargetInfo().getCharWidth(PP.getFullLoc(Loc)) == 8 &&
Reid Spencer5f016e22007-07-11 17:01:13 +0000467 "Assumes char is 8 bits");
468
469 bool isFirstChar = true;
470 bool isMultiChar = false;
471 while (begin[0] != '\'') {
472 unsigned ResultChar;
473 if (begin[0] != '\\') // If this is a normal character, consume it.
474 ResultChar = *begin++;
475 else // Otherwise, this is an escape character.
476 ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
477
478 // If this is a multi-character constant (e.g. 'abc'), handle it. These are
479 // implementation defined (C99 6.4.4.4p10).
480 if (!isFirstChar) {
481 // If this is the second character being processed, do special handling.
482 if (!isMultiChar) {
483 isMultiChar = true;
484
485 // Warn about discarding the top bits for multi-char wide-character
486 // constants (L'abcd').
487 if (IsWide)
488 PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
489 }
490
491 if (IsWide) {
492 // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
493 Value = 0;
494 } else {
495 // Narrow character literals act as though their value is concatenated
496 // in this implementation.
497 if (((Value << 8) >> 8) != Value)
498 PP.Diag(Loc, diag::warn_char_constant_too_large);
499 Value <<= 8;
500 }
501 }
502
503 Value += ResultChar;
504 isFirstChar = false;
505 }
506
507 // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
508 // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
509 // character constants are not sign extended in the this implementation:
510 // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
511 if (!IsWide && !isMultiChar && (Value & 128) &&
Ted Kremenek9c728dc2007-12-12 22:39:36 +0000512 PP.getTargetInfo().isCharSigned(PP.getFullLoc(Loc)))
Reid Spencer5f016e22007-07-11 17:01:13 +0000513 Value = (signed char)Value;
514}
515
516
517/// string-literal: [C99 6.4.5]
518/// " [s-char-sequence] "
519/// L" [s-char-sequence] "
520/// s-char-sequence:
521/// s-char
522/// s-char-sequence s-char
523/// s-char:
524/// any source character except the double quote ",
525/// backslash \, or newline character
526/// escape-character
527/// universal-character-name
528/// escape-character: [C99 6.4.4.4]
529/// \ escape-code
530/// universal-character-name
531/// escape-code:
532/// character-escape-code
533/// octal-escape-code
534/// hex-escape-code
535/// character-escape-code: one of
536/// n t b r f v a
537/// \ ' " ?
538/// octal-escape-code:
539/// octal-digit
540/// octal-digit octal-digit
541/// octal-digit octal-digit octal-digit
542/// hex-escape-code:
543/// x hex-digit
544/// hex-escape-code hex-digit
545/// universal-character-name:
546/// \u hex-quad
547/// \U hex-quad hex-quad
548/// hex-quad:
549/// hex-digit hex-digit hex-digit hex-digit
550///
551StringLiteralParser::
Chris Lattnerd2177732007-07-20 16:59:19 +0000552StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
Reid Spencer5f016e22007-07-11 17:01:13 +0000553 Preprocessor &pp, TargetInfo &t)
554 : PP(pp), Target(t) {
555 // Scan all of the string portions, remember the max individual token length,
556 // computing a bound on the concatenated string length, and see whether any
557 // piece is a wide-string. If any of the string portions is a wide-string
558 // literal, the result is a wide-string literal [C99 6.4.5p4].
559 MaxTokenLength = StringToks[0].getLength();
560 SizeBound = StringToks[0].getLength()-2; // -2 for "".
Chris Lattner22f6bbc2007-10-09 18:02:16 +0000561 AnyWide = StringToks[0].is(tok::wide_string_literal);
Reid Spencer5f016e22007-07-11 17:01:13 +0000562
563 hadError = false;
564
565 // Implement Translation Phase #6: concatenation of string literals
566 /// (C99 5.1.1.2p1). The common case is only one string fragment.
567 for (unsigned i = 1; i != NumStringToks; ++i) {
568 // The string could be shorter than this if it needs cleaning, but this is a
569 // reasonable bound, which is all we need.
570 SizeBound += StringToks[i].getLength()-2; // -2 for "".
571
572 // Remember maximum string piece length.
573 if (StringToks[i].getLength() > MaxTokenLength)
574 MaxTokenLength = StringToks[i].getLength();
575
576 // Remember if we see any wide strings.
Chris Lattner22f6bbc2007-10-09 18:02:16 +0000577 AnyWide |= StringToks[i].is(tok::wide_string_literal);
Reid Spencer5f016e22007-07-11 17:01:13 +0000578 }
579
580
581 // Include space for the null terminator.
582 ++SizeBound;
583
584 // TODO: K&R warning: "traditional C rejects string constant concatenation"
585
586 // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
587 // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
588 wchar_tByteWidth = ~0U;
589 if (AnyWide) {
Ted Kremenek9c728dc2007-12-12 22:39:36 +0000590 wchar_tByteWidth =
591 Target.getWCharWidth(PP.getFullLoc(StringToks[0].getLocation()));
592
Reid Spencer5f016e22007-07-11 17:01:13 +0000593 assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
594 wchar_tByteWidth /= 8;
595 }
596
597 // The output buffer size needs to be large enough to hold wide characters.
598 // This is a worst-case assumption which basically corresponds to L"" "long".
599 if (AnyWide)
600 SizeBound *= wchar_tByteWidth;
601
602 // Size the temporary buffer to hold the result string data.
603 ResultBuf.resize(SizeBound);
604
605 // Likewise, but for each string piece.
606 llvm::SmallString<512> TokenBuf;
607 TokenBuf.resize(MaxTokenLength);
608
609 // Loop over all the strings, getting their spelling, and expanding them to
610 // wide strings as appropriate.
611 ResultPtr = &ResultBuf[0]; // Next byte to fill in.
612
Anders Carlssonee98ac52007-10-15 02:50:23 +0000613 Pascal = false;
614
Reid Spencer5f016e22007-07-11 17:01:13 +0000615 for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
616 const char *ThisTokBuf = &TokenBuf[0];
617 // Get the spelling of the token, which eliminates trigraphs, etc. We know
618 // that ThisTokBuf points to a buffer that is big enough for the whole token
619 // and 'spelled' tokens can only shrink.
620 unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
621 const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
622
623 // TODO: Input character set mapping support.
624
625 // Skip L marker for wide strings.
626 bool ThisIsWide = false;
627 if (ThisTokBuf[0] == 'L') {
628 ++ThisTokBuf;
629 ThisIsWide = true;
630 }
631
632 assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
633 ++ThisTokBuf;
634
Anders Carlssonee98ac52007-10-15 02:50:23 +0000635 // Check if this is a pascal string
636 if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
637 ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
638
639 // If the \p sequence is found in the first token, we have a pascal string
640 // Otherwise, if we already have a pascal string, ignore the first \p
641 if (i == 0) {
642 ++ThisTokBuf;
643 Pascal = true;
644 } else if (Pascal)
645 ThisTokBuf += 2;
646 }
647
Reid Spencer5f016e22007-07-11 17:01:13 +0000648 while (ThisTokBuf != ThisTokEnd) {
649 // Is this a span of non-escape characters?
650 if (ThisTokBuf[0] != '\\') {
651 const char *InStart = ThisTokBuf;
652 do {
653 ++ThisTokBuf;
654 } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
655
656 // Copy the character span over.
657 unsigned Len = ThisTokBuf-InStart;
658 if (!AnyWide) {
659 memcpy(ResultPtr, InStart, Len);
660 ResultPtr += Len;
661 } else {
662 // Note: our internal rep of wide char tokens is always little-endian.
663 for (; Len; --Len, ++InStart) {
664 *ResultPtr++ = InStart[0];
665 // Add zeros at the end.
666 for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
667 *ResultPtr++ = 0;
668 }
669 }
670 continue;
671 }
672
673 // Otherwise, this is an escape character. Process it.
674 unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
675 StringToks[i].getLocation(),
676 ThisIsWide, PP);
677
678 // Note: our internal rep of wide char tokens is always little-endian.
679 *ResultPtr++ = ResultChar & 0xFF;
680
681 if (AnyWide) {
682 for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
683 *ResultPtr++ = ResultChar >> i*8;
684 }
685 }
686 }
687
688 // Add zero terminator.
689 *ResultPtr = 0;
690 if (AnyWide) {
691 for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
692 *ResultPtr++ = 0;
693 }
Anders Carlssonee98ac52007-10-15 02:50:23 +0000694
695 if (Pascal)
696 ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
Reid Spencer5f016e22007-07-11 17:01:13 +0000697}