blob: 2b59b160197721d9f64b388bca28680b5912f036 [file] [log] [blame]
Enrico Granataca6c8ee2014-10-30 01:45:39 +00001//===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "lldb/DataFormatters/StringPrinter.h"
11
12#include "lldb/Core/DataExtractor.h"
13#include "lldb/Core/Error.h"
14#include "lldb/Target/Process.h"
15#include "lldb/Target/Target.h"
16
17#include "llvm/Support/ConvertUTF.h"
18
Enrico Granataca6c8ee2014-10-30 01:45:39 +000019#include <ctype.h>
20#include <functional>
21#include <locale>
22
23using namespace lldb;
24using namespace lldb_private;
25using namespace lldb_private::formatters;
26
27// I can't use a std::unique_ptr for this because the Deleter is a template argument there
28// and I want the same type to represent both pointers I want to free and pointers I don't need
29// to free - which is what this class essentially is
30// It's very specialized to the needs of this file, and not suggested for general use
31template <typename T = uint8_t, typename U = char, typename S = size_t>
32struct StringPrinterBufferPointer
33{
34public:
35
36 typedef std::function<void(const T*)> Deleter;
37
38 StringPrinterBufferPointer (std::nullptr_t ptr) :
39 m_data(nullptr),
40 m_size(0),
41 m_deleter()
42 {}
43
44 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) :
45 m_data(bytes),
46 m_size(size),
47 m_deleter(deleter)
48 {}
49
50 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) :
51 m_data((T*)bytes),
52 m_size(size),
53 m_deleter(deleter)
54 {}
55
56 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) :
57 m_data(rhs.m_data),
58 m_size(rhs.m_size),
59 m_deleter(rhs.m_deleter)
60 {
61 rhs.m_data = nullptr;
62 }
63
64 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) :
65 m_data(rhs.m_data),
66 m_size(rhs.m_size),
67 m_deleter(rhs.m_deleter)
68 {
69 rhs.m_data = nullptr; // this is why m_data has to be mutable
70 }
71
72 const T*
73 GetBytes () const
74 {
75 return m_data;
76 }
77
78 const S
79 GetSize () const
80 {
81 return m_size;
82 }
83
84 ~StringPrinterBufferPointer ()
85 {
86 if (m_data && m_deleter)
87 m_deleter(m_data);
88 m_data = nullptr;
89 }
90
91 StringPrinterBufferPointer&
92 operator = (const StringPrinterBufferPointer& rhs)
93 {
94 if (m_data && m_deleter)
95 m_deleter(m_data);
96 m_data = rhs.m_data;
97 m_size = rhs.m_size;
98 m_deleter = rhs.m_deleter;
99 rhs.m_data = nullptr;
100 return *this;
101 }
102
103private:
104 mutable const T* m_data;
105 size_t m_size;
106 Deleter m_deleter;
107};
108
109// we define this for all values of type but only implement it for those we care about
110// that's good because we get linker errors for any unsupported type
111template <StringElementType type>
112static StringPrinterBufferPointer<>
113GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next);
114
115// mimic isprint() for Unicode codepoints
116static bool
117isprint(char32_t codepoint)
118{
119 if (codepoint <= 0x1F || codepoint == 0x7F) // C0
120 {
121 return false;
122 }
123 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
124 {
125 return false;
126 }
127 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
128 {
129 return false;
130 }
131 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control
132 {
133 return false;
134 }
135 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials
136 {
137 return false;
138 }
139 return true;
140}
141
142template <>
143StringPrinterBufferPointer<>
144GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
145{
146 StringPrinterBufferPointer<> retval = {nullptr};
147
148 switch (*buffer)
149 {
150 case '\a':
151 retval = {"\\a",2};
152 break;
153 case '\b':
154 retval = {"\\b",2};
155 break;
156 case '\f':
157 retval = {"\\f",2};
158 break;
159 case '\n':
160 retval = {"\\n",2};
161 break;
162 case '\r':
163 retval = {"\\r",2};
164 break;
165 case '\t':
166 retval = {"\\t",2};
167 break;
168 case '\v':
169 retval = {"\\v",2};
170 break;
171 case '\"':
172 retval = {"\\\"",2};
173 break;
174 case '\\':
175 retval = {"\\\\",2};
176 break;
177 default:
178 if (isprint(*buffer))
179 retval = {buffer,1};
180 else
181 {
182 retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} };
183 sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer);
184 break;
185 }
186 }
187
188 next = buffer + 1;
189 return retval;
190}
191
192static char32_t
193ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1)
194{
195 return (c0-192)*64+(c1-128);
196}
197static char32_t
198ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2)
199{
200 return (c0-224)*4096+(c1-128)*64+(c2-128);
201}
202static char32_t
203ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3)
204{
205 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128);
206}
207
208template <>
209StringPrinterBufferPointer<>
210GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
211{
212 StringPrinterBufferPointer<> retval {nullptr};
213
214 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
215
216 if (1+buffer_end-buffer < utf8_encoded_len)
217 {
218 // I don't have enough bytes - print whatever I have left
219 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)};
220 next = buffer_end+1;
221 return retval;
222 }
223
224 char32_t codepoint = 0;
225 switch (utf8_encoded_len)
226 {
227 case 1:
228 // this is just an ASCII byte - ask ASCII
229 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
230 case 2:
231 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1));
232 break;
233 case 3:
234 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2));
235 break;
236 case 4:
237 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3));
238 break;
239 default:
240 // this is probably some bogus non-character thing
241 // just print it as-is and hope to sync up again soon
242 retval = {buffer,1};
243 next = buffer+1;
244 return retval;
245 }
246
247 if (codepoint)
248 {
249 switch (codepoint)
250 {
251 case '\a':
252 retval = {"\\a",2};
253 break;
254 case '\b':
255 retval = {"\\b",2};
256 break;
257 case '\f':
258 retval = {"\\f",2};
259 break;
260 case '\n':
261 retval = {"\\n",2};
262 break;
263 case '\r':
264 retval = {"\\r",2};
265 break;
266 case '\t':
267 retval = {"\\t",2};
268 break;
269 case '\v':
270 retval = {"\\v",2};
271 break;
272 case '\"':
273 retval = {"\\\"",2};
274 break;
275 case '\\':
276 retval = {"\\\\",2};
277 break;
278 default:
279 if (isprint(codepoint))
280 retval = {buffer,utf8_encoded_len};
281 else
282 {
283 retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} };
284 sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint);
285 break;
286 }
287 }
288
289 next = buffer + utf8_encoded_len;
290 return retval;
291 }
292
293 // this should not happen - but just in case.. try to resync at some point
294 retval = {buffer,1};
295 next = buffer+1;
296 return retval;
297}
298
299// Given a sequence of bytes, this function returns:
300// a sequence of bytes to actually print out + a length
301// the following unscanned position of the buffer is in next
302static StringPrinterBufferPointer<>
303GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next)
304{
305 if (!buffer)
306 return {nullptr};
307
308 switch (type)
309 {
310 case StringElementType::ASCII:
311 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next);
312 case StringElementType::UTF8:
313 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next);
314 default:
315 return {nullptr};
316 }
317}
318
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000319// use this call if you already have an LLDB-side buffer for the data
320template<typename SourceDataType>
321static bool
322DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**,
323 const SourceDataType*,
324 UTF8**,
325 UTF8*,
326 ConversionFlags),
327 const DataExtractor& data,
328 Stream& stream,
329 char prefix_token,
330 char quote,
331 uint32_t sourceSize,
332 bool escapeNonPrintables)
333{
334 if (prefix_token != 0)
335 stream.Printf("%c",prefix_token);
336 if (quote != 0)
337 stream.Printf("%c",quote);
338 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd())
339 {
340 const int bufferSPSize = data.GetByteSize();
341 if (sourceSize == 0)
342 {
343 const int origin_encoding = 8*sizeof(SourceDataType);
344 sourceSize = bufferSPSize/(origin_encoding / 4);
345 }
346
347 SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart();
348 SourceDataType *data_end_ptr = data_ptr + sourceSize;
349
350 while (data_ptr < data_end_ptr)
351 {
352 if (!*data_ptr)
353 {
354 data_end_ptr = data_ptr;
355 break;
356 }
357 data_ptr++;
358 }
359
360 data_ptr = (SourceDataType*)data.GetDataStart();
361
362 lldb::DataBufferSP utf8_data_buffer_sp;
363 UTF8* utf8_data_ptr = nullptr;
364 UTF8* utf8_data_end_ptr = nullptr;
365
366 if (ConvertFunction)
367 {
368 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0));
369 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes();
370 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
371 ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion );
372 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr
373 }
374 else
375 {
376 // just copy the pointers - the cast is necessary to make the compiler happy
377 // but this should only happen if we are reading UTF8 data
378 utf8_data_ptr = (UTF8*)data_ptr;
379 utf8_data_end_ptr = (UTF8*)data_end_ptr;
380 }
381
382 // since we tend to accept partial data (and even partially malformed data)
383 // we might end up with no NULL terminator before the end_ptr
384 // hence we need to take a slower route and ensure we stay within boundaries
385 for (;utf8_data_ptr < utf8_data_end_ptr;)
386 {
387 if (!*utf8_data_ptr)
388 break;
389
390 if (escapeNonPrintables)
391 {
392 uint8_t* next_data = nullptr;
393 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data);
394 auto printable_bytes = printable.GetBytes();
395 auto printable_size = printable.GetSize();
396 if (!printable_bytes || !next_data)
397 {
398 // GetPrintable() failed on us - print one byte in a desperate resync attempt
399 printable_bytes = utf8_data_ptr;
400 printable_size = 1;
401 next_data = utf8_data_ptr+1;
402 }
403 for (int c = 0; c < printable_size; c++)
404 stream.Printf("%c", *(printable_bytes+c));
405 utf8_data_ptr = (uint8_t*)next_data;
406 }
407 else
408 {
409 stream.Printf("%c",*utf8_data_ptr);
410 utf8_data_ptr++;
411 }
412 }
413 }
414 if (quote != 0)
415 stream.Printf("%c",quote);
416 return true;
417}
418
Shawn Bestfd137432014-11-04 22:43:34 +0000419namespace lldb_private
420{
421
422namespace formatters
423{
424
425template <>
426bool
427ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options)
428{
429 assert(options.GetStream() && "need a Stream to print the string to");
430 Error my_error;
431 size_t my_data_read;
432
433 ProcessSP process_sp(options.GetProcessSP());
434
435 if (process_sp.get() == nullptr || options.GetLocation() == 0)
436 return false;
437
438 size_t size;
439
440 if (options.GetSourceSize() == 0)
441 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
442 else
443 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary());
444
445 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0));
446
447 my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error);
448
449 if (my_error.Fail())
450 return false;
451
452 char prefix_token = options.GetPrefixToken();
453 char quote = options.GetQuote();
454
455 if (prefix_token != 0)
456 options.GetStream()->Printf("%c%c",prefix_token,quote);
457 else if (quote != 0)
458 options.GetStream()->Printf("%c",quote);
459
460 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize();
461
462 // since we tend to accept partial data (and even partially malformed data)
463 // we might end up with no NULL terminator before the end_ptr
464 // hence we need to take a slower route and ensure we stay within boundaries
465 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);)
466 {
467 if (options.GetEscapeNonPrintables())
468 {
469 uint8_t* next_data = nullptr;
470 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data);
471 auto printable_bytes = printable.GetBytes();
472 auto printable_size = printable.GetSize();
473 if (!printable_bytes || !next_data)
474 {
475 // GetPrintable() failed on us - print one byte in a desperate resync attempt
476 printable_bytes = data;
477 printable_size = 1;
478 next_data = data+1;
479 }
480 for (int c = 0; c < printable_size; c++)
481 options.GetStream()->Printf("%c", *(printable_bytes+c));
482 data = (uint8_t*)next_data;
483 }
484 else
485 {
486 options.GetStream()->Printf("%c",*data);
487 data++;
488 }
489 }
490
491 if (quote != 0)
492 options.GetStream()->Printf("%c",quote);
493
494 return true;
495}
496
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000497template<typename SourceDataType>
498static bool
499ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options,
500 ConversionResult (*ConvertFunction) (const SourceDataType**,
501 const SourceDataType*,
502 UTF8**,
503 UTF8*,
504 ConversionFlags))
505{
506 assert(options.GetStream() && "need a Stream to print the string to");
507
508 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS)
509 return false;
Shawn Bestfd137432014-11-04 22:43:34 +0000510
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000511 lldb::ProcessSP process_sp(options.GetProcessSP());
Shawn Bestfd137432014-11-04 22:43:34 +0000512
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000513 if (!process_sp)
514 return false;
Shawn Bestfd137432014-11-04 22:43:34 +0000515
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000516 const int type_width = sizeof(SourceDataType);
517 const int origin_encoding = 8 * type_width ;
518 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
519 return false;
520 // if not UTF8, I need a conversion function to return proper UTF8
521 if (origin_encoding != 8 && !ConvertFunction)
522 return false;
Shawn Bestfd137432014-11-04 22:43:34 +0000523
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000524 if (!options.GetStream())
525 return false;
Shawn Bestfd137432014-11-04 22:43:34 +0000526
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000527 uint32_t sourceSize = options.GetSourceSize();
528 bool needs_zero_terminator = options.GetNeedsZeroTermination();
Shawn Bestfd137432014-11-04 22:43:34 +0000529
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000530 if (!sourceSize)
531 {
532 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
533 needs_zero_terminator = true;
534 }
535 else
536 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary());
Shawn Bestfd137432014-11-04 22:43:34 +0000537
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000538 const int bufferSPSize = sourceSize * type_width;
Shawn Bestfd137432014-11-04 22:43:34 +0000539
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000540 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0));
Shawn Bestfd137432014-11-04 22:43:34 +0000541
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000542 if (!buffer_sp->GetBytes())
543 return false;
Shawn Bestfd137432014-11-04 22:43:34 +0000544
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000545 Error error;
546 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
Shawn Bestfd137432014-11-04 22:43:34 +0000547
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000548 size_t data_read = 0;
549 if (needs_zero_terminator)
550 data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width);
551 else
552 data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error);
Shawn Bestfd137432014-11-04 22:43:34 +0000553
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000554 if (error.Fail() || data_read == 0)
555 {
556 options.GetStream()->Printf("unable to read data");
557 return true;
558 }
Shawn Bestfd137432014-11-04 22:43:34 +0000559
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000560 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize());
Shawn Bestfd137432014-11-04 22:43:34 +0000561
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000562 return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables());
563}
564
565template <>
566bool
Shawn Bestfd137432014-11-04 22:43:34 +0000567ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options)
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000568{
569 return ReadUTFBufferAndDumpToStream<UTF8>(options,
570 nullptr);
571}
572
573template <>
574bool
Shawn Bestfd137432014-11-04 22:43:34 +0000575ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options)
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000576{
577 return ReadUTFBufferAndDumpToStream<UTF16>(options,
578 ConvertUTF16toUTF8);
579}
580
581template <>
582bool
Shawn Bestfd137432014-11-04 22:43:34 +0000583ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options)
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000584{
585 return ReadUTFBufferAndDumpToStream<UTF32>(options,
586 ConvertUTF32toUTF8);
587}
588
589template <>
590bool
Shawn Bestfd137432014-11-04 22:43:34 +0000591ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options)
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000592{
593 assert(options.GetStream() && "need a Stream to print the string to");
594
595 return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
596}
597
598template <>
599bool
Shawn Bestfd137432014-11-04 22:43:34 +0000600ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options)
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000601{
602 // treat ASCII the same as UTF8
603 // FIXME: can we optimize ASCII some more?
604 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
605}
606
607template <>
608bool
Shawn Bestfd137432014-11-04 22:43:34 +0000609ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options)
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000610{
611 assert(options.GetStream() && "need a Stream to print the string to");
Shawn Bestfd137432014-11-04 22:43:34 +0000612
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000613 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
614}
615
616template <>
617bool
Shawn Bestfd137432014-11-04 22:43:34 +0000618ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options)
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000619{
620 assert(options.GetStream() && "need a Stream to print the string to");
Shawn Bestfd137432014-11-04 22:43:34 +0000621
Enrico Granataca6c8ee2014-10-30 01:45:39 +0000622 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables());
623}
Shawn Bestfd137432014-11-04 22:43:34 +0000624
625} // namespace formatters
626
627} // namespace lldb_private