blob: 9ea4ea4e18f63b22b694a2ff233a1622b693e247 [file] [log] [blame]
Patrick Benavoli68a91282011-08-31 11:23:23 +02001///////////////////////////////////////////////////////////////////////////////
2// Tokenizer.cpp
3// =============
4// General purpose string tokenizer (C++ string version)
5//
6// The default delimiters are space(" "), tab(\t, \v), newline(\n),
7// carriage return(\r), and form feed(\f).
8// If you want to use different delimiters, then use setDelimiter() to override
9// the delimiters. Note that the delimiter string can hold multiple characters.
10//
11// AUTHOR: Song Ho Ahn (song.ahn@gmail.com)
12// CREATED: 2005-05-25
13// UPDATED: 2011-03-08
14///////////////////////////////////////////////////////////////////////////////
15
16#include "Tokenizer.h"
17
18
19///////////////////////////////////////////////////////////////////////////////
20// constructor
21///////////////////////////////////////////////////////////////////////////////
22Tokenizer::Tokenizer() : buffer(""), token(""), delimiter(DEFAULT_DELIMITER)
23{
24 currPos = buffer.begin();
25}
26
27Tokenizer::Tokenizer(const std::string& str, const std::string& delimiter) : buffer(str), token(""), delimiter(delimiter)
28{
29 currPos = buffer.begin();
30}
31
32
33
34///////////////////////////////////////////////////////////////////////////////
35// destructor
36///////////////////////////////////////////////////////////////////////////////
37Tokenizer::~Tokenizer()
38{
39}
40
41
42
43///////////////////////////////////////////////////////////////////////////////
44// reset string buffer, delimiter and the currsor position
45///////////////////////////////////////////////////////////////////////////////
46void Tokenizer::set(const std::string& str, const std::string& delimiter)
47{
48 this->buffer = str;
49 this->delimiter = delimiter;
50 this->currPos = buffer.begin();
51}
52
53void Tokenizer::setString(const std::string& str)
54{
55 this->buffer = str;
56 this->currPos = buffer.begin();
57}
58
59void Tokenizer::setDelimiter(const std::string& delimiter)
60{
61 this->delimiter = delimiter;
62 this->currPos = buffer.begin();
63}
64
65
66
67///////////////////////////////////////////////////////////////////////////////
68// return the next token
69// If cannot find a token anymore, return "".
70///////////////////////////////////////////////////////////////////////////////
71std::string Tokenizer::next()
72{
73 if(buffer.size() <= 0) return ""; // skip if buffer is empty
74
75 token.clear(); // reset token string
76
77 this->skipDelimiter(); // skip leading delimiters
78
79 // append each char to token string until it meets delimiter
80 while(currPos != buffer.end() && !isDelimiter(*currPos))
81 {
82 token += *currPos;
83 ++currPos;
84 }
85 return token;
86}
87
88
89
90///////////////////////////////////////////////////////////////////////////////
91// skip ang leading delimiters
92///////////////////////////////////////////////////////////////////////////////
93void Tokenizer::skipDelimiter()
94{
95 while(currPos != buffer.end() && isDelimiter(*currPos))
96 ++currPos;
97}
98
99
100
101///////////////////////////////////////////////////////////////////////////////
102// return true if the current character is delimiter
103///////////////////////////////////////////////////////////////////////////////
104bool Tokenizer::isDelimiter(char c)
105{
106 return (delimiter.find(c) != std::string::npos);
107}
108
109
110
111///////////////////////////////////////////////////////////////////////////////
112// split the input string into multiple tokens
113// This function scans tokens from the current cursor position.
114///////////////////////////////////////////////////////////////////////////////
115std::vector<std::string> Tokenizer::split()
116{
117 std::vector<std::string> tokens;
118 std::string token;
119 while((token = this->next()) != "")
120 {
121 tokens.push_back(token);
122 }
123
124 return tokens;
125}