| Patrick Benavoli | 68a9128 | 2011-08-31 11:23:23 +0200 | [diff] [blame] | 1 | ///////////////////////////////////////////////////////////////////////////////
|
| 2 | // Tokenizer.cpp
|
| 3 | // =============
|
| 4 | // General purpose string tokenizer (C++ string version)
|
| 5 | //
|
| 6 | // The default delimiters are space(" "), tab(\t, \v), newline(\n),
|
| 7 | // carriage return(\r), and form feed(\f).
|
| 8 | // If you want to use different delimiters, then use setDelimiter() to override
|
| 9 | // the delimiters. Note that the delimiter string can hold multiple characters.
|
| 10 | //
|
| 11 | // AUTHOR: Song Ho Ahn (song.ahn@gmail.com)
|
| 12 | // CREATED: 2005-05-25
|
| 13 | // UPDATED: 2011-03-08
|
| 14 | ///////////////////////////////////////////////////////////////////////////////
|
| 15 |
|
| 16 | #include "Tokenizer.h"
|
| 17 |
|
| 18 |
|
| 19 | ///////////////////////////////////////////////////////////////////////////////
|
| 20 | // constructor
|
| 21 | ///////////////////////////////////////////////////////////////////////////////
|
| 22 | Tokenizer::Tokenizer() : buffer(""), token(""), delimiter(DEFAULT_DELIMITER)
|
| 23 | {
|
| 24 | currPos = buffer.begin();
|
| 25 | }
|
| 26 |
|
| 27 | Tokenizer::Tokenizer(const std::string& str, const std::string& delimiter) : buffer(str), token(""), delimiter(delimiter)
|
| 28 | {
|
| 29 | currPos = buffer.begin();
|
| 30 | }
|
| 31 |
|
| 32 |
|
| 33 |
|
| 34 | ///////////////////////////////////////////////////////////////////////////////
|
| 35 | // destructor
|
| 36 | ///////////////////////////////////////////////////////////////////////////////
|
| 37 | Tokenizer::~Tokenizer()
|
| 38 | {
|
| 39 | }
|
| 40 |
|
| 41 |
|
| 42 |
|
| 43 | ///////////////////////////////////////////////////////////////////////////////
|
| 44 | // reset string buffer, delimiter and the currsor position
|
| 45 | ///////////////////////////////////////////////////////////////////////////////
|
| 46 | void Tokenizer::set(const std::string& str, const std::string& delimiter)
|
| 47 | {
|
| 48 | this->buffer = str;
|
| 49 | this->delimiter = delimiter;
|
| 50 | this->currPos = buffer.begin();
|
| 51 | }
|
| 52 |
|
| 53 | void Tokenizer::setString(const std::string& str)
|
| 54 | {
|
| 55 | this->buffer = str;
|
| 56 | this->currPos = buffer.begin();
|
| 57 | }
|
| 58 |
|
| 59 | void Tokenizer::setDelimiter(const std::string& delimiter)
|
| 60 | {
|
| 61 | this->delimiter = delimiter;
|
| 62 | this->currPos = buffer.begin();
|
| 63 | }
|
| 64 |
|
| 65 |
|
| 66 |
|
| 67 | ///////////////////////////////////////////////////////////////////////////////
|
| 68 | // return the next token
|
| 69 | // If cannot find a token anymore, return "".
|
| 70 | ///////////////////////////////////////////////////////////////////////////////
|
| 71 | std::string Tokenizer::next()
|
| 72 | {
|
| 73 | if(buffer.size() <= 0) return ""; // skip if buffer is empty
|
| 74 |
|
| 75 | token.clear(); // reset token string
|
| 76 |
|
| 77 | this->skipDelimiter(); // skip leading delimiters
|
| 78 |
|
| 79 | // append each char to token string until it meets delimiter
|
| 80 | while(currPos != buffer.end() && !isDelimiter(*currPos))
|
| 81 | {
|
| 82 | token += *currPos;
|
| 83 | ++currPos;
|
| 84 | }
|
| 85 | return token;
|
| 86 | }
|
| 87 |
|
| 88 |
|
| 89 |
|
| 90 | ///////////////////////////////////////////////////////////////////////////////
|
| 91 | // skip ang leading delimiters
|
| 92 | ///////////////////////////////////////////////////////////////////////////////
|
| 93 | void Tokenizer::skipDelimiter()
|
| 94 | {
|
| 95 | while(currPos != buffer.end() && isDelimiter(*currPos))
|
| 96 | ++currPos;
|
| 97 | }
|
| 98 |
|
| 99 |
|
| 100 |
|
| 101 | ///////////////////////////////////////////////////////////////////////////////
|
| 102 | // return true if the current character is delimiter
|
| 103 | ///////////////////////////////////////////////////////////////////////////////
|
| 104 | bool Tokenizer::isDelimiter(char c)
|
| 105 | {
|
| 106 | return (delimiter.find(c) != std::string::npos);
|
| 107 | }
|
| 108 |
|
| 109 |
|
| 110 |
|
| 111 | ///////////////////////////////////////////////////////////////////////////////
|
| 112 | // split the input string into multiple tokens
|
| 113 | // This function scans tokens from the current cursor position.
|
| 114 | ///////////////////////////////////////////////////////////////////////////////
|
| 115 | std::vector<std::string> Tokenizer::split()
|
| 116 | {
|
| 117 | std::vector<std::string> tokens;
|
| 118 | std::string token;
|
| 119 | while((token = this->next()) != "")
|
| 120 | {
|
| 121 | tokens.push_back(token);
|
| 122 | }
|
| 123 |
|
| 124 | return tokens;
|
| 125 | }
|