linetokenizer.cc - LINETOKENIZER.CC - part of Discrete Syst…

/omnetpp-4.1/src/common/linetokenizer.cc

https://bitbucket.org/indigopony/omnetproject · C++ · 201 lines · 161 code · 18 blank · 22 comment · 26 complexity · cfc4d55bd13b6ee2f87775bf45a890d1 MD5 · raw file

//=========================================================================
//  LINETOKENIZER.CC - part of
//                  OMNeT++/OMNEST
//           Discrete System Simulation in C++
//
//  Author: Andras Varga
//
//=========================================================================

/*--------------------------------------------------------------*
  Copyright (C) 2006-2008 OpenSim Ltd.

  This file is distributed WITHOUT ANY WARRANTY. See the file
  `license' for details on this and other legal matters.
*--------------------------------------------------------------*/


#include <assert.h>
#include <sstream>
#include <string.h>
#include "exception.h"
#include "linetokenizer.h"

USING_NAMESPACE


LineTokenizer::LineTokenizer(int bufferSize, int maxTokenNum, char sep1, char sep2)
    : sep1(sep1), sep2(sep2)
{
    if (maxTokenNum < 0)
        maxTokenNum = bufferSize/4;

    vecsize = maxTokenNum;
    vec = new char *[vecsize];

    lineBufferSize = bufferSize;
    lineBuffer = new char[lineBufferSize];
}

LineTokenizer::~LineTokenizer()
{
    delete [] vec;
    delete [] lineBuffer;
}

inline int h2d(char c)
{
    if (c>='0' && c<='9') return c-'0';
    if (c>='A' && c<='F') return c-'A'+10;
    if (c>='a' && c<='f') return c-'a'+10;
    return -1;
}

inline int h2d(char *&s)
{
    int a = h2d(*s);
    if (a<0) return 0;
    s++;
    int b = h2d(*s);
    if (b<0) return a;
    s++;
    return a*16+b;
}

static void interpretBackslashes(char *buffer)
{
    // interpret backslashes in-place. This works because the output
    // is always shorter (or equal) than the input.
    char *s = buffer;
    char *d = buffer;
    for (; *s; s++, d++)
    {
        if (*s=='\\')
        {
            // allow backslash as quote character, also interpret backslash sequences
            // note: this must be kept consistent with opp_quotestr()/opp_parsequotedstr()
            s++;
            switch(*s)
            {
                case 'b': *d = '\b'; break;
                case 'f': *d = '\f'; break;
                case 'n': *d = '\n'; break;
                case 'r': *d = '\r'; break;
                case 't': *d = '\t'; break;
                case 'x': s++; *d = h2d(s); s--; break; // hex code
                case '"': *d = '"'; break;  // quote needs to be backslashed
                case '\\': *d = '\\'; break;  // backslash needs to be backslashed
                case '\n': d--; break; // don't store line continuation (backslash followed by newline)
                case '\0': d--; s--; break; // string ends in stray backslash
                default: *d = *s; // be tolerant with unrecogized backslash sequences
            }
        }
        else
        {
            *d = *s;
        }
    }
    *d = '\0';
}

int LineTokenizer::tokenize(const char *line, int length)
{
    if (length >= lineBufferSize)
        throw opp_runtime_error("Cannot tokenize lines longer than %d", lineBufferSize - 1);

    strncpy(lineBuffer, line, length);
    lineBuffer[length] = '\0'; // guard

    char *s = lineBuffer + length - 1;
    while (s >= lineBuffer && (*s == '\r' || *s == '\n'))
        *s-- = '\0';

    numtokens = 0;
    s = lineBuffer;

    // loop through the tokens on the line
    for (;;)
    {
        // skip separators before token
        while (*s==sep1 || *s==sep2) s++;

        char *token;
        if (!*s)
        {
            // end of line found -- exit loop
            break;
        }
        else if (*s=='"')
        {
            // parse quoted string
            token = s+1;
            s++;
            // try to find end of quoted string
            bool containsBackslash = false;
            while (*s && *s!='"')
                if (*s++=='\\')
                    {s++; containsBackslash = true;}
            // check we found the close quote
            if (*s!='"')
                throw opp_runtime_error("Unmatched quote in file");
            // terminate quoted string with zero, overwriting close quote
            *s++ = 0;
            // if token contained a backslash (rare!), we need post-processing
            // to interpret the escape sequences
            if (containsBackslash)
                interpretBackslashes(token);

        }
        else
        {
            // parse unquoted string
            token = s;
            // try find end of string
            while (*s && *s!=sep1 && *s!=sep2) s++;
            // terminate string with zero (if we are not already at end of the line)
            if (*s) *s++ = 0;
        }

        // add token to the array (if there's room); s points to the rest of the string
        if (numtokens==vecsize)
            throw opp_runtime_error("Too many tokens on a line, max %d allowed", vecsize-1);
        vec[numtokens++] = token;
    }
    return numtokens;
}

/*
Example code:

#include <string.h>
#include <iostream>
using namespace std;

void tok(const char *s)
{
    char *buf = new char[strlen(s)+1];
    strcpy(buf, s);
    cout << buf << " --> ";

    LineTokenizer t;
    bool ok = t.tokenize(buf);
    if (!ok)
        cout << t.errorMsg(1) << endl;

    int numtokens = t.numTokens();
    char **vec = t.tokens();
    for (int i=0; i<numtokens; i++)
        cout << (i==0?"":":") << vec[i];
    cout << "\n";
}

int main(int argc, char **argv)
{
    tok("E 121.1344 e434");
    tok("E \"121.1344 e434\" 222");

    return 0;
}
*/