/src/mongo/util/text.h

https://github.com/matulef/mongo · C Header · 162 lines · 96 code · 24 blank · 42 comment · 21 complexity · 767ac67f6cff16c51aad30c999cb4991 MD5 · raw file

  1. // text.h
  2. /*
  3. * Copyright 2010 10gen Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* Copyright 2009 10gen Inc.
  18. *
  19. * Licensed under the Apache License, Version 2.0 (the "License");
  20. * you may not use this file except in compliance with the License.
  21. * You may obtain a copy of the License at
  22. *
  23. * http://www.apache.org/licenses/LICENSE-2.0
  24. *
  25. * Unless required by applicable law or agreed to in writing, software
  26. * distributed under the License is distributed on an "AS IS" BASIS,
  27. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  28. * See the License for the specific language governing permissions and
  29. * limitations under the License.
  30. */
  31. #pragma once
  32. namespace mongo {
  33. class StringSplitter {
  34. public:
  35. /** @param big the string to be split
  36. @param splitter the delimiter
  37. */
  38. StringSplitter( const char * big , const char * splitter )
  39. : _big( big ) , _splitter( splitter ) {
  40. }
  41. /** @return true if more to be taken via next() */
  42. bool more() {
  43. return _big[0] != 0;
  44. }
  45. /** get next split string fragment */
  46. string next() {
  47. const char * foo = strstr( _big , _splitter );
  48. if ( foo ) {
  49. string s( _big , foo - _big );
  50. _big = foo + 1;
  51. while ( *_big && strstr( _big , _splitter ) == _big )
  52. _big++;
  53. return s;
  54. }
  55. string s = _big;
  56. _big += strlen( _big );
  57. return s;
  58. }
  59. void split( vector<string>& l ) {
  60. while ( more() ) {
  61. l.push_back( next() );
  62. }
  63. }
  64. vector<string> split() {
  65. vector<string> l;
  66. split( l );
  67. return l;
  68. }
  69. static vector<string> split( const string& big , const string& splitter ) {
  70. StringSplitter ss( big.c_str() , splitter.c_str() );
  71. return ss.split();
  72. }
  73. static string join( vector<string>& l , const string& split ) {
  74. stringstream ss;
  75. for ( unsigned i=0; i<l.size(); i++ ) {
  76. if ( i > 0 )
  77. ss << split;
  78. ss << l[i];
  79. }
  80. return ss.str();
  81. }
  82. private:
  83. const char * _big;
  84. const char * _splitter;
  85. };
  86. /* This doesn't defend against ALL bad UTF8, but it will guarantee that the
  87. * string can be converted to sequence of codepoints. However, it doesn't
  88. * guarantee that the codepoints are valid.
  89. */
  90. bool isValidUTF8(const char *s);
  91. inline bool isValidUTF8(string s) { return isValidUTF8(s.c_str()); }
  92. #if defined(_WIN32)
  93. std::string toUtf8String(const std::wstring& wide);
  94. std::wstring toWideString(const char *s);
  95. /* like toWideString but UNICODE macro sensitive */
  96. # if !defined(_UNICODE)
  97. #error temp error
  98. inline std::string toNativeString(const char *s) { return s; }
  99. # else
  100. inline std::wstring toNativeString(const char *s) { return toWideString(s); }
  101. # endif
  102. #endif
  103. // expect that n contains a base ten number and nothing else after it
  104. // NOTE win version hasn't been tested directly
  105. inline long long parseLL( const char *n ) {
  106. long long ret;
  107. uassert( 13307, "cannot convert empty string to long long", *n != 0 );
  108. #if !defined(_WIN32)
  109. char *endPtr = 0;
  110. errno = 0;
  111. ret = strtoll( n, &endPtr, 10 );
  112. uassert( 13305, "could not convert string to long long", *endPtr == 0 && errno == 0 );
  113. #elif _MSC_VER>=1600 // 1600 is VS2k10 1500 is VS2k8
  114. size_t endLen = 0;
  115. try {
  116. ret = stoll( n, &endLen, 10 );
  117. }
  118. catch ( ... ) {
  119. endLen = 0;
  120. }
  121. uassert( 13306, "could not convert string to long long", endLen != 0 && n[ endLen ] == 0 );
  122. #else // stoll() wasn't introduced until VS 2010.
  123. char* endPtr = 0;
  124. ret = _strtoi64( n, &endPtr, 10 );
  125. uassert( 13310, "could not convert string to long long", (*endPtr == 0) && (ret != _I64_MAX) && (ret != _I64_MIN) );
  126. #endif // !defined(_WIN32)
  127. return ret;
  128. }
  129. #if defined(_WIN32)
  130. class WindowsCommandLine {
  131. char** _argv;
  132. public:
  133. WindowsCommandLine( int argc, wchar_t* argvW[] );
  134. ~WindowsCommandLine();
  135. char** argv( void ) const { return _argv; };
  136. };
  137. #endif // #if defined(_WIN32)
  138. } // namespace mongo