Re-implement the Tokenizer class from scratch

It didn't have a license header. Even though we received explicit authorization
to use it, it wasn't a comfortable situation.

This is an original implementation that only keeps the APIs we use in the
parameter-framework and keep them identical (except for a small exception). The
behaviour is also exactly the same.

Change-Id: I85a69c76027ee026a693d79cd19edd3b86796f9d
Signed-off-by: David Wagner <david.wagner@intel.com>
diff --git a/parameter/ArrayParameter.cpp b/parameter/ArrayParameter.cpp
index c946392..ec69191 100644
--- a/parameter/ArrayParameter.cpp
+++ b/parameter/ArrayParameter.cpp
@@ -257,7 +257,7 @@
 bool CArrayParameter::setValues(uint32_t uiStartIndex, uint32_t uiBaseOffset, const string& strValue, CParameterAccessContext& parameterAccessContext) const
 {
     // Deal with value(s)
-    Tokenizer tok(strValue, DEFAULT_DELIMITER + ",");
+    Tokenizer tok(strValue, Tokenizer::defaultDelimiters + ",");
 
     std::vector<string> astrValues = tok.split();
     uint32_t uiNbValues = astrValues.size();
diff --git a/utility/Tokenizer.cpp b/utility/Tokenizer.cpp
index 9ea4ea4..a4cfcf0 100644
--- a/utility/Tokenizer.cpp
+++ b/utility/Tokenizer.cpp
@@ -1,125 +1,75 @@
-///////////////////////////////////////////////////////////////////////////////

-// Tokenizer.cpp

-// =============

-// General purpose string tokenizer (C++ string version)

-//

-// The default delimiters are space(" "), tab(\t, \v), newline(\n),

-// carriage return(\r), and form feed(\f).

-// If you want to use different delimiters, then use setDelimiter() to override

-// the delimiters. Note that the delimiter string can hold multiple characters.

-//

-//  AUTHOR: Song Ho Ahn (song.ahn@gmail.com)

-// CREATED: 2005-05-25

-// UPDATED: 2011-03-08

-///////////////////////////////////////////////////////////////////////////////

-

-#include "Tokenizer.h"

-

-

-///////////////////////////////////////////////////////////////////////////////

-// constructor

-///////////////////////////////////////////////////////////////////////////////

-Tokenizer::Tokenizer() : buffer(""), token(""), delimiter(DEFAULT_DELIMITER)

-{

-    currPos = buffer.begin();

-}

-

-Tokenizer::Tokenizer(const std::string& str, const std::string& delimiter) : buffer(str), token(""), delimiter(delimiter)

-{

-    currPos = buffer.begin();

-}

-

-

-

-///////////////////////////////////////////////////////////////////////////////

-// destructor

-///////////////////////////////////////////////////////////////////////////////

-Tokenizer::~Tokenizer()

-{

-}

-

-

-

-///////////////////////////////////////////////////////////////////////////////

-// reset string buffer, delimiter and the currsor position

-///////////////////////////////////////////////////////////////////////////////

-void Tokenizer::set(const std::string& str, const std::string& delimiter)

-{

-    this->buffer = str;

-    this->delimiter = delimiter;

-    this->currPos = buffer.begin();

-}

-

-void Tokenizer::setString(const std::string& str)

-{

-    this->buffer = str;

-    this->currPos = buffer.begin();

-}

-

-void Tokenizer::setDelimiter(const std::string& delimiter)

-{

-    this->delimiter = delimiter;

-    this->currPos = buffer.begin();

-}

-

-

-

-///////////////////////////////////////////////////////////////////////////////

-// return the next token

-// If cannot find a token anymore, return "".

-///////////////////////////////////////////////////////////////////////////////

-std::string Tokenizer::next()

-{

-    if(buffer.size() <= 0) return "";           // skip if buffer is empty

-

-    token.clear();                              // reset token string

-

-    this->skipDelimiter();                      // skip leading delimiters

-

-    // append each char to token string until it meets delimiter

-    while(currPos != buffer.end() && !isDelimiter(*currPos))

-    {

-        token += *currPos;

-        ++currPos;

-    }

-    return token;

-}

-

-

-

-///////////////////////////////////////////////////////////////////////////////

-// skip ang leading delimiters

-///////////////////////////////////////////////////////////////////////////////

-void Tokenizer::skipDelimiter()

-{

-    while(currPos != buffer.end() && isDelimiter(*currPos))

-        ++currPos;

-}

-

-

-

-///////////////////////////////////////////////////////////////////////////////

-// return true if the current character is delimiter

-///////////////////////////////////////////////////////////////////////////////

-bool Tokenizer::isDelimiter(char c)

-{

-    return (delimiter.find(c) != std::string::npos);

-}

-

-

-

-///////////////////////////////////////////////////////////////////////////////

-// split the input string into multiple tokens

-// This function scans tokens from the current cursor position.

-///////////////////////////////////////////////////////////////////////////////

-std::vector<std::string> Tokenizer::split()

-{

-    std::vector<std::string> tokens;

-    std::string token;

-    while((token = this->next()) != "")

-    {

-        tokens.push_back(token);

-    }

-

-    return tokens;

-}

+/*
+ * Copyright (c) 2015, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "Tokenizer.h"
+
+using std::string;
+using std::vector;
+
+const string Tokenizer::defaultDelimiters = " \n\r\t\v\f";
+
+Tokenizer::Tokenizer(const string &input, const string &delimiters)
+    : _input(input), _delimiters(delimiters), _position(0)
+{
+}
+
+string Tokenizer::next()
+{
+    string token;
+
+    // Skip all leading delimiters
+    string::size_type tokenStart = _input.find_first_not_of(_delimiters, _position);
+
+    // Special case if there isn't any token anymore (string::substr's
+    // throws when pos==npos)
+    if (tokenStart == string::npos) {
+        return "";
+    }
+
+    // Starting from the token's start, find the first delimiter
+    string::size_type tokenEnd = _input.find_first_of(_delimiters, tokenStart);
+
+    _position = tokenEnd;
+
+    return _input.substr(tokenStart, tokenEnd - tokenStart);
+}
+
+vector<string> Tokenizer::split()
+{
+    vector<string> result;
+    string token;
+
+    while (true) {
+        token = next();
+        if (token.empty()) {
+            return result;
+        }
+        result.push_back(token);
+    }
+}
diff --git a/utility/Tokenizer.h b/utility/Tokenizer.h
index de3f86c..c48747a 100644
--- a/utility/Tokenizer.h
+++ b/utility/Tokenizer.h
@@ -1,56 +1,75 @@
-///////////////////////////////////////////////////////////////////////////////

-// Tokenizer.h

-// ===========

-// General purpose string tokenizer (C++ string version)

-//

-// The default delimiters are space(" "), tab(\t, \v), newline(\n),

-// carriage return(\r), and form feed(\f).

-// If you want to use different delimiters, then use setDelimiter() to override

-// the delimiters. Note that the delimiter string can hold multiple characters.

-//

-//  AUTHOR: Song Ho Ahn (song.ahn@gmail.com)

-// CREATED: 2005-05-25

-// UPDATED: 2011-03-08

-///////////////////////////////////////////////////////////////////////////////

-

-#ifndef TOKENIZER_H

-#define TOKENIZER_H

-

-#include <string>

+/*
+ * Copyright (c) 2015, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+
+#include <string>
 #include <vector>
-

-// default delimiter string (space, tab, newline, carriage return, form feed)

-const std::string DEFAULT_DELIMITER = " \t\v\n\r\f";

-

-class Tokenizer

-{

-public:

-    // ctor/dtor

-    Tokenizer();

-    Tokenizer(const std::string& str, const std::string& delimiter=DEFAULT_DELIMITER);

-    ~Tokenizer();

-

-    // set string and delimiter

-    void set(const std::string& str, const std::string& delimiter=DEFAULT_DELIMITER);

-    void setString(const std::string& str);             // set source string only

-    void setDelimiter(const std::string& delimiter);    // set delimiter string only

-

-    std::string next();                                 // return the next token, return "" if it ends

-

-    std::vector<std::string> split();                   // return array of tokens from current cursor

-

-protected:

-

-

-private:

-    void skipDelimiter();                               // ignore leading delimiters

-    bool isDelimiter(char c);                           // check if the current char is delimiter

-

-    std::string buffer;                                 // input string

-    std::string token;                                  // output string

-    std::string delimiter;                              // delimiter string

-    std::string::const_iterator currPos;                // string iterator pointing the current position

-

-};

-

-#endif // TOKENIZER_H

+
+/** Tokenizer class
+ *
+ * Must be initialized with a string to be tokenized and, optionally, a string
+ * of delimiters (@see Tokenizer::defaultDelimiters).
+ *
+ * Multiple consecutive delimiters (even if different) are considered as a
+ * single one. As a result, there can't be empty tokens.
+ */
+class Tokenizer
+{
+public:
+    /** Constructs a Tokenizer
+     *
+     * @param[in] input The string to be tokenized
+     * @param[in] delimiters A string containing all the token delimiters
+     *            (hence, each delimiter can only be a single character)
+     */
+    Tokenizer(const std::string &input, const std::string &delimiters=defaultDelimiters);
+    ~Tokenizer() {};
+
+    /** Return the next token or an empty string if no more token
+     *
+     * Multiple consecutive delimiters are considered as a single one - i.e.
+     * "a     bc d   " will be tokenized as ("a", "bc", "d") if the delimiter
+     * is ' '.
+     */
+    std::string next();
+
+    /** Return a vector of all tokens
+     */
+    std::vector<std::string> split();
+
+    /** Default list of delimiters (" \n\r\t\v\f") */
+    static const std::string defaultDelimiters;
+
+private:
+    const std::string _input; //< string to be tokenized
+    const std::string _delimiters; //< token delimiters
+
+    std::string::size_type _position; //< end of the last returned token
+};