PageRenderTime 57ms CodeModel.GetById 44ms app.highlight 10ms RepoModel.GetById 2ms app.codeStats 0ms

/omnetpp-4.1/src/common/linetokenizer.cc

https://bitbucket.org/indigopony/omnetproject
C++ | 201 lines | 161 code | 18 blank | 22 comment | 25 complexity | cfc4d55bd13b6ee2f87775bf45a890d1 MD5 | raw file
  1//=========================================================================
  2//  LINETOKENIZER.CC - part of
  3//                  OMNeT++/OMNEST
  4//           Discrete System Simulation in C++
  5//
  6//  Author: Andras Varga
  7//
  8//=========================================================================
  9
 10/*--------------------------------------------------------------*
 11  Copyright (C) 2006-2008 OpenSim Ltd.
 12
 13  This file is distributed WITHOUT ANY WARRANTY. See the file
 14  `license' for details on this and other legal matters.
 15*--------------------------------------------------------------*/
 16
 17
 18#include <assert.h>
 19#include <sstream>
 20#include <string.h>
 21#include "exception.h"
 22#include "linetokenizer.h"
 23
 24USING_NAMESPACE
 25
 26
 27LineTokenizer::LineTokenizer(int bufferSize, int maxTokenNum, char sep1, char sep2)
 28    : sep1(sep1), sep2(sep2)
 29{
 30    if (maxTokenNum < 0)
 31        maxTokenNum = bufferSize/4;
 32
 33    vecsize = maxTokenNum;
 34    vec = new char *[vecsize];
 35
 36    lineBufferSize = bufferSize;
 37    lineBuffer = new char[lineBufferSize];
 38}
 39
 40LineTokenizer::~LineTokenizer()
 41{
 42    delete [] vec;
 43    delete [] lineBuffer;
 44}
 45
 46inline int h2d(char c)
 47{
 48    if (c>='0' && c<='9') return c-'0';
 49    if (c>='A' && c<='F') return c-'A'+10;
 50    if (c>='a' && c<='f') return c-'a'+10;
 51    return -1;
 52}
 53
 54inline int h2d(char *&s)
 55{
 56    int a = h2d(*s);
 57    if (a<0) return 0;
 58    s++;
 59    int b = h2d(*s);
 60    if (b<0) return a;
 61    s++;
 62    return a*16+b;
 63}
 64
 65static void interpretBackslashes(char *buffer)
 66{
 67    // interpret backslashes in-place. This works because the output
 68    // is always shorter (or equal) than the input.
 69    char *s = buffer;
 70    char *d = buffer;
 71    for (; *s; s++, d++)
 72    {
 73        if (*s=='\\')
 74        {
 75            // allow backslash as quote character, also interpret backslash sequences
 76            // note: this must be kept consistent with opp_quotestr()/opp_parsequotedstr()
 77            s++;
 78            switch(*s)
 79            {
 80                case 'b': *d = '\b'; break;
 81                case 'f': *d = '\f'; break;
 82                case 'n': *d = '\n'; break;
 83                case 'r': *d = '\r'; break;
 84                case 't': *d = '\t'; break;
 85                case 'x': s++; *d = h2d(s); s--; break; // hex code
 86                case '"': *d = '"'; break;  // quote needs to be backslashed
 87                case '\\': *d = '\\'; break;  // backslash needs to be backslashed
 88                case '\n': d--; break; // don't store line continuation (backslash followed by newline)
 89                case '\0': d--; s--; break; // string ends in stray backslash
 90                default: *d = *s; // be tolerant with unrecogized backslash sequences
 91            }
 92        }
 93        else
 94        {
 95            *d = *s;
 96        }
 97    }
 98    *d = '\0';
 99}
100
101int LineTokenizer::tokenize(const char *line, int length)
102{
103    if (length >= lineBufferSize)
104        throw opp_runtime_error("Cannot tokenize lines longer than %d", lineBufferSize - 1);
105
106    strncpy(lineBuffer, line, length);
107    lineBuffer[length] = '\0'; // guard
108
109    char *s = lineBuffer + length - 1;
110    while (s >= lineBuffer && (*s == '\r' || *s == '\n'))
111        *s-- = '\0';
112
113    numtokens = 0;
114    s = lineBuffer;
115
116    // loop through the tokens on the line
117    for (;;)
118    {
119        // skip separators before token
120        while (*s==sep1 || *s==sep2) s++;
121
122        char *token;
123        if (!*s)
124        {
125            // end of line found -- exit loop
126            break;
127        }
128        else if (*s=='"')
129        {
130            // parse quoted string
131            token = s+1;
132            s++;
133            // try to find end of quoted string
134            bool containsBackslash = false;
135            while (*s && *s!='"')
136                if (*s++=='\\')
137                    {s++; containsBackslash = true;}
138            // check we found the close quote
139            if (*s!='"')
140                throw opp_runtime_error("Unmatched quote in file");
141            // terminate quoted string with zero, overwriting close quote
142            *s++ = 0;
143            // if token contained a backslash (rare!), we need post-processing
144            // to interpret the escape sequences
145            if (containsBackslash)
146                interpretBackslashes(token);
147
148        }
149        else
150        {
151            // parse unquoted string
152            token = s;
153            // try find end of string
154            while (*s && *s!=sep1 && *s!=sep2) s++;
155            // terminate string with zero (if we are not already at end of the line)
156            if (*s) *s++ = 0;
157        }
158
159        // add token to the array (if there's room); s points to the rest of the string
160        if (numtokens==vecsize)
161            throw opp_runtime_error("Too many tokens on a line, max %d allowed", vecsize-1);
162        vec[numtokens++] = token;
163    }
164    return numtokens;
165}
166
167/*
168Example code:
169
170#include <string.h>
171#include <iostream>
172using namespace std;
173
174void tok(const char *s)
175{
176    char *buf = new char[strlen(s)+1];
177    strcpy(buf, s);
178    cout << buf << " --> ";
179
180    LineTokenizer t;
181    bool ok = t.tokenize(buf);
182    if (!ok)
183        cout << t.errorMsg(1) << endl;
184
185    int numtokens = t.numTokens();
186    char **vec = t.tokens();
187    for (int i=0; i<numtokens; i++)
188        cout << (i==0?"":":") << vec[i];
189    cout << "\n";
190}
191
192int main(int argc, char **argv)
193{
194    tok("E 121.1344 e434");
195    tok("E \"121.1344 e434\" 222");
196
197    return 0;
198}
199*/
200
201