/strings.cpp

https://github.com/boq/dirtsand · C++ · 592 lines · 516 code · 57 blank · 19 comment · 153 complexity · 1f9d7b0f9ce1921d10736780e2ee4e00 MD5 · raw file

  1. /******************************************************************************
  2. * This file is part of dirtsand. *
  3. * *
  4. * dirtsand is free software: you can redistribute it and/or modify *
  5. * it under the terms of the GNU General Public License as published by *
  6. * the Free Software Foundation, either version 3 of the License, or *
  7. * (at your option) any later version. *
  8. * *
  9. * dirtsand is distributed in the hope that it will be useful, *
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  12. * GNU General Public License for more details. *
  13. * *
  14. * You should have received a copy of the GNU General Public License *
  15. * along with dirtsand. If not, see <http://www.gnu.org/licenses/>. *
  16. ******************************************************************************/
  17. #include "strings.h"
  18. #include "errors.h"
  19. #include <list>
  20. #include <cstdio>
  21. #include <ctype.h>
  22. template <typename char_type>
  23. static size_t tstrlen(const char_type* string)
  24. {
  25. const char_type* sptr = string;
  26. while (*sptr)
  27. ++sptr;
  28. return sptr - string;
  29. }
  30. static void raw_to_utf8(chr8_t** dest, size_t* destlen,
  31. const chr8_t* src, ssize_t srclen)
  32. {
  33. if (srclen < 0)
  34. srclen = static_cast<size_t>(tstrlen(src));
  35. size_t convlen = 0;
  36. const chr8_t* sptr = src;
  37. while (sptr < src + srclen) {
  38. if (*sptr >= 0x80)
  39. convlen += 2;
  40. else
  41. convlen += 1;
  42. ++sptr;
  43. }
  44. if (destlen)
  45. *destlen = convlen;
  46. if (dest) {
  47. *dest = new chr8_t[convlen + 1];
  48. chr8_t* dptr = *dest;
  49. sptr = src;
  50. while (sptr < src + srclen) {
  51. if (*sptr >= 0x80) {
  52. *dptr++ = 0xC0 | ((*sptr >> 6) & 0x1F);
  53. *dptr++ = 0x80 | ((*sptr ) & 0x3F);
  54. } else {
  55. *dptr++ = *sptr;
  56. }
  57. ++sptr;
  58. }
  59. (*dest)[convlen] = 0;
  60. }
  61. }
  62. static void utf16_to_utf8(chr8_t** dest, size_t* destlen,
  63. const chr16_t* src, ssize_t srclen)
  64. {
  65. if (srclen < 0)
  66. srclen = static_cast<size_t>(tstrlen(src));
  67. size_t convlen = 0;
  68. const chr16_t* sptr = src;
  69. while (sptr < src + srclen) {
  70. if (*sptr >= 0xD800 && *sptr <= 0xDFFF) {
  71. convlen += 4;
  72. ++sptr;
  73. } else if (*sptr >= 0x800) {
  74. convlen += 3;
  75. } else if (*sptr >= 0x80) {
  76. convlen += 2;
  77. } else {
  78. convlen += 1;
  79. }
  80. ++sptr;
  81. }
  82. if (destlen)
  83. *destlen = convlen;
  84. if (dest) {
  85. *dest = new chr8_t[convlen + 1];
  86. chr8_t* dptr = *dest;
  87. sptr = src;
  88. while (sptr < src + srclen) {
  89. if (*sptr >= 0xD800 && *sptr <= 0xDFFF) {
  90. uint32_t ch = 0x10000;
  91. if (sptr + 1 >= src + srclen) {
  92. /* Incomplete surrogate pair */
  93. *dptr++ = 0;
  94. break;
  95. } else if (*sptr < 0xDC00) {
  96. ch += (*sptr & 0x3FF) << 10;
  97. ++sptr;
  98. ch += (*sptr & 0x3FF);
  99. } else {
  100. ch += (*sptr & 0x3FF);
  101. ++sptr;
  102. ch += (*sptr & 0x3FF) << 10;
  103. }
  104. *dptr++ = 0xF0 | ((ch >> 18) & 0x07);
  105. *dptr++ = 0x80 | ((ch >> 12) & 0x3F);
  106. *dptr++ = 0x80 | ((ch >> 6) & 0x3F);
  107. *dptr++ = 0x80 | ((ch ) & 0x3F);
  108. } else if (*sptr >= 0x800) {
  109. *dptr++ = 0xE0 | ((*sptr >> 12) & 0x0F);
  110. *dptr++ = 0x80 | ((*sptr >> 6) & 0x3F);
  111. *dptr++ = 0x80 | ((*sptr ) & 0x3F);
  112. } else if (*sptr >= 0x80) {
  113. *dptr++ = 0xC0 | ((*sptr >> 6) & 0x1F);
  114. *dptr++ = 0x80 | ((*sptr ) & 0x3F);
  115. } else {
  116. *dptr++ = *sptr;
  117. }
  118. ++sptr;
  119. }
  120. (*dest)[convlen] = 0;
  121. }
  122. }
  123. static void utf8_to_raw(chr8_t** dest, size_t* destlen,
  124. const chr8_t* src, ssize_t srclen)
  125. {
  126. if (srclen < 0)
  127. srclen = static_cast<size_t>(tstrlen(src));
  128. size_t convlen = 0;
  129. const chr8_t* sptr = src;
  130. while (sptr < src + srclen) {
  131. if ((*sptr & 0xF8) == 0xF0)
  132. sptr += 4;
  133. else if ((*sptr & 0xF0) == 0xE0)
  134. sptr += 3;
  135. else if ((*sptr & 0xE0) == 0xC0)
  136. sptr += 2;
  137. else
  138. sptr += 1;
  139. ++convlen;
  140. }
  141. if (destlen)
  142. *destlen = convlen;
  143. if (dest) {
  144. *dest = new chr8_t[convlen + 1];
  145. chr8_t* dptr = *dest;
  146. sptr = src;
  147. while (sptr < src + srclen) {
  148. if ((*sptr & 0xF8) == 0xF0) {
  149. *dptr++ = '?';
  150. sptr += 4;
  151. } else if ((*sptr & 0xF0) == 0xE0) {
  152. *dptr++ = '?';
  153. sptr += 3;
  154. } else if ((*sptr & 0xE0) == 0xC0) {
  155. int ch = (*sptr++ & 0x1F) << 6;
  156. if (sptr < src + srclen)
  157. ch += (*sptr++ & 0x3F);
  158. *dptr++ = (ch <= 0xFF) ? static_cast<chr8_t>(ch) : '?';
  159. } else {
  160. *dptr++ = *sptr++;
  161. }
  162. }
  163. (*dest)[convlen] = 0;
  164. }
  165. }
  166. static void utf8_to_utf16(chr16_t** dest, size_t* destlen,
  167. const chr8_t* src, ssize_t srclen)
  168. {
  169. if (srclen < 0)
  170. srclen = static_cast<size_t>(tstrlen(src));
  171. size_t convlen = 0;
  172. const chr8_t* sptr = src;
  173. while (sptr < src + srclen) {
  174. if ((*sptr & 0xF8) == 0xF0) {
  175. /* Surrogate pair needed */
  176. ++convlen;
  177. sptr += 4;
  178. } else if ((*sptr & 0xF0) == 0xE0) {
  179. sptr += 3;
  180. } else if ((*sptr & 0xE0) == 0xC0) {
  181. sptr += 2;
  182. } else {
  183. sptr += 1;
  184. }
  185. ++convlen;
  186. }
  187. if (destlen)
  188. *destlen = convlen;
  189. if (dest) {
  190. *dest = new chr16_t[convlen + 1];
  191. chr16_t* dptr = *dest;
  192. sptr = src;
  193. while (sptr < src + srclen) {
  194. if ((*sptr & 0xF8) == 0xF0) {
  195. int ch = (*sptr++ & 0x07) << 18;
  196. if (sptr < src + srclen)
  197. ch += (*sptr++ & 0x3F) << 12;
  198. if (sptr < src + srclen)
  199. ch += (*sptr++ & 0x3F) << 6;
  200. if (sptr < src + srclen)
  201. ch += (*sptr++ & 0x3F);
  202. *dptr++ = 0xD800 | ((ch >> 10) & 0x3FF);
  203. *dptr++ = 0xDC00 | ((ch ) & 0x3FF);
  204. } else if ((*sptr & 0xF0) == 0xE0) {
  205. int ch = (*sptr++ & 0x0F) << 12;
  206. if (sptr < src + srclen)
  207. ch += (*sptr++ & 0x3F) << 6;
  208. if (sptr < src + srclen)
  209. ch += (*sptr++ & 0x3F);
  210. *dptr++ = ch;
  211. } else if ((*sptr & 0xE0) == 0xC0) {
  212. int ch = (*sptr++ & 0x1F) << 6;
  213. if (sptr < src + srclen)
  214. ch += (*sptr++ & 0x3F);
  215. *dptr++ = ch;
  216. } else {
  217. *dptr++ = *sptr++;
  218. }
  219. }
  220. (*dest)[convlen] = 0;
  221. }
  222. }
  223. DS::String& DS::String::operator=(const char* strconst)
  224. {
  225. if (strconst) {
  226. size_t length = tstrlen(strconst);
  227. chr8_t* bufdata = new chr8_t[length + 1];
  228. memcpy(bufdata, strconst, length);
  229. bufdata[length] = 0;
  230. m_data = StringBuffer<chr8_t>(bufdata, length);
  231. }
  232. return *this;
  233. }
  234. DS::String& DS::String::operator+=(const char* strconst)
  235. {
  236. if (isEmpty())
  237. return operator=(strconst);
  238. if (strconst && *strconst != 0) {
  239. size_t addlen = tstrlen(strconst);
  240. chr8_t* buffer = new chr8_t[m_data.m_buffer->m_length + addlen + 1];
  241. memcpy(buffer, m_data.m_buffer->m_string, m_data.m_buffer->m_length);
  242. memcpy(buffer + m_data.m_buffer->m_length, strconst, addlen);
  243. buffer[m_data.m_buffer->m_length + addlen] = 0;
  244. m_data = StringBuffer<chr8_t>(buffer, m_data.m_buffer->m_length + addlen);
  245. }
  246. return *this;
  247. }
  248. DS::String& DS::String::operator+=(const String& other)
  249. {
  250. if (isEmpty())
  251. return operator=(other);
  252. if (!other.isEmpty()) {
  253. chr8_t* buffer = new chr8_t[m_data.m_buffer->m_length + other.m_data.m_buffer->m_length + 1];
  254. memcpy(buffer, m_data.m_buffer->m_string, m_data.m_buffer->m_length);
  255. memcpy(buffer + m_data.m_buffer->m_length, other.m_data.m_buffer->m_string, other.m_data.m_buffer->m_length);
  256. buffer[m_data.m_buffer->m_length + other.m_data.m_buffer->m_length] = 0;
  257. m_data = StringBuffer<chr8_t>(buffer, m_data.m_buffer->m_length + other.m_data.m_buffer->m_length);
  258. }
  259. return *this;
  260. }
  261. int DS::String::compare(const char* strconst, CaseSensitivity cs) const
  262. {
  263. if (isEmpty())
  264. return (!strconst || *strconst == 0) ? 0 : -1;
  265. if (!strconst || *strconst == 0)
  266. return 1;
  267. if (cs == e_CaseSensitive)
  268. return strcmp(reinterpret_cast<const char*>(m_data.m_buffer->m_string), strconst);
  269. else
  270. return strcasecmp(reinterpret_cast<const char*>(m_data.m_buffer->m_string), strconst);
  271. }
  272. int DS::String::compare(const String& other, CaseSensitivity cs) const
  273. {
  274. if (isEmpty())
  275. return other.isEmpty() ? 0 : -1;
  276. if (other.isEmpty())
  277. return 1;
  278. if (cs == e_CaseSensitive)
  279. return strcmp(reinterpret_cast<const char*>(m_data.m_buffer->m_string),
  280. reinterpret_cast<const char*>(other.m_data.m_buffer->m_string));
  281. else
  282. return strcasecmp(reinterpret_cast<const char*>(m_data.m_buffer->m_string),
  283. reinterpret_cast<const char*>(other.m_data.m_buffer->m_string));
  284. }
  285. DS::StringBuffer<chr8_t> DS::String::toRaw() const
  286. {
  287. if (isNull())
  288. return StringBuffer<chr8_t>();
  289. chr8_t* buffer;
  290. size_t length;
  291. utf8_to_raw(&buffer, &length, m_data.m_buffer->m_string, m_data.m_buffer->m_length);
  292. return StringBuffer<chr8_t>(buffer, length);
  293. }
  294. DS::StringBuffer<chr8_t> DS::String::toUtf8() const
  295. {
  296. /* Provide a deep copy so the original string doesn't affect this buffer. */
  297. if (isNull())
  298. return StringBuffer<chr8_t>();
  299. chr8_t* buffer = new chr8_t[m_data.m_buffer->m_length + 1];
  300. memcpy(buffer, m_data.m_buffer->m_string, m_data.m_buffer->m_length);
  301. buffer[m_data.m_buffer->m_length] = 0;
  302. return StringBuffer<chr8_t>(buffer, m_data.m_buffer->m_length);
  303. }
  304. DS::StringBuffer<chr16_t> DS::String::toUtf16() const
  305. {
  306. if (isNull())
  307. return StringBuffer<chr16_t>();
  308. chr16_t* buffer;
  309. size_t length;
  310. utf8_to_utf16(&buffer, &length, m_data.m_buffer->m_string, m_data.m_buffer->m_length);
  311. return StringBuffer<chr16_t>(buffer, length);
  312. }
  313. DS::String DS::String::FromRaw(const chr8_t* string, ssize_t length)
  314. {
  315. if (!string)
  316. return String();
  317. chr8_t* buffer;
  318. size_t buflen;
  319. raw_to_utf8(&buffer, &buflen, string, length);
  320. String result;
  321. result.m_data = StringBuffer<chr8_t>(buffer, buflen);
  322. return result;
  323. }
  324. DS::String DS::String::FromUtf8(const chr8_t* string, ssize_t length)
  325. {
  326. if (!string)
  327. return String();
  328. if (length < 0)
  329. length = tstrlen(string);
  330. chr8_t* buffer = new chr8_t[length + 1];
  331. memcpy(buffer, string, length);
  332. buffer[length] = 0;
  333. String result;
  334. result.m_data = StringBuffer<chr8_t>(buffer, length);
  335. return result;
  336. }
  337. DS::String DS::String::FromUtf16(const chr16_t* string, ssize_t length)
  338. {
  339. if (!string)
  340. return String();
  341. chr8_t* buffer;
  342. size_t buflen;
  343. utf16_to_utf8(&buffer, &buflen, string, length);
  344. String result;
  345. result.m_data = StringBuffer<chr8_t>(buffer, buflen);
  346. return result;
  347. }
  348. int32_t DS::String::toInt(int base) const
  349. {
  350. if (isEmpty())
  351. return 0;
  352. return static_cast<int32_t>(strtol(c_str(), 0, base));
  353. }
  354. uint32_t DS::String::toUint(int base) const
  355. {
  356. if (isEmpty())
  357. return 0;
  358. return static_cast<uint32_t>(strtoul(c_str(), 0, base));
  359. }
  360. float DS::String::toFloat() const
  361. {
  362. if (isEmpty())
  363. return 0;
  364. return strtof(c_str(), 0);
  365. }
  366. double DS::String::toDouble() const
  367. {
  368. if (isEmpty())
  369. return 0;
  370. return strtod(c_str(), 0);
  371. }
  372. bool DS::String::toBool() const
  373. {
  374. if (isEmpty())
  375. return false;
  376. if (compare("true", e_CaseInsensitive) == 0)
  377. return true;
  378. return toInt() != 0;
  379. }
  380. std::vector<DS::String> DS::String::split(char separator, ssize_t max)
  381. {
  382. if (isEmpty())
  383. return std::vector<DS::String>();
  384. std::list<DS::String> subs;
  385. const chr8_t* cptr = m_data.data();
  386. const chr8_t* scanp = cptr;
  387. while (*scanp && max) {
  388. if (!separator && isspace(*scanp)) {
  389. subs.push_back(DS::String::FromUtf8(cptr, scanp - cptr));
  390. --max;
  391. while (*scanp && isspace(*scanp))
  392. ++scanp;
  393. cptr = scanp;
  394. } else if (*scanp == static_cast<chr8_t>(separator)) {
  395. subs.push_back(DS::String::FromUtf8(cptr, scanp - cptr));
  396. --max;
  397. cptr = scanp + 1;
  398. }
  399. ++scanp;
  400. }
  401. subs.push_back(DS::String::FromUtf8(cptr));
  402. return std::vector<DS::String>(subs.begin(), subs.end());
  403. }
  404. DS::String DS::String::left(ssize_t count)
  405. {
  406. if (count < 0) {
  407. count += length();
  408. if (count < 0)
  409. return String();
  410. }
  411. if (static_cast<size_t>(count) >= length())
  412. return *this;
  413. chr8_t* trimbuf = new chr8_t[count+1];
  414. memcpy(trimbuf, m_data.data(), count);
  415. trimbuf[count] = 0;
  416. String result;
  417. result.m_data = StringBuffer<chr8_t>(trimbuf, count);
  418. return result;
  419. }
  420. DS::String DS::String::right(ssize_t count)
  421. {
  422. if (count < 0) {
  423. count += length();
  424. if (count < 0)
  425. return String();
  426. }
  427. if (static_cast<size_t>(count) >= length())
  428. return *this;
  429. chr8_t* trimbuf = new chr8_t[count+1];
  430. memcpy(trimbuf, m_data.data() + m_data.length() - count, count);
  431. trimbuf[count] = 0;
  432. String result;
  433. result.m_data = StringBuffer<chr8_t>(trimbuf, count);
  434. return result;
  435. }
  436. DS::String DS::String::mid(size_t start, ssize_t count)
  437. {
  438. if (count < 0)
  439. count = length() - start;
  440. if (static_cast<size_t>(count) >= length())
  441. return *this;
  442. if (start >= length())
  443. return String();
  444. chr8_t* trimbuf = new chr8_t[count+1];
  445. memcpy(trimbuf, m_data.data() + start, count);
  446. trimbuf[count] = 0;
  447. String result;
  448. result.m_data = StringBuffer<chr8_t>(trimbuf, count);
  449. return result;
  450. }
  451. DS::String DS::String::strip(char comment)
  452. {
  453. if (isNull())
  454. return DS::String();
  455. char* strbuf = new char[m_data.length()+1];
  456. memcpy(strbuf, m_data.data(), m_data.length());
  457. strbuf[m_data.length()] = 0;
  458. char* startp = strbuf;
  459. while (isspace(*startp))
  460. ++startp;
  461. char* scanp;
  462. if (comment) {
  463. scanp = startp;
  464. while (*scanp) {
  465. if (*scanp == comment)
  466. *scanp = 0;
  467. else
  468. ++scanp;
  469. }
  470. }
  471. scanp = startp + strlen(startp);
  472. while (scanp > startp && isspace(*(scanp-1)))
  473. --scanp;
  474. *scanp = 0;
  475. DS::String result(startp);
  476. delete[] strbuf;
  477. return result;
  478. }
  479. ssize_t DS::String::find(const char* substr, ssize_t start)
  480. {
  481. DS_DASSERT(substr);
  482. DS_DASSERT(start >= 0);
  483. size_t sublen = strlen(substr);
  484. while (start + sublen <= length()) {
  485. if (strncmp(reinterpret_cast<const char*>(m_data.data()) + start, substr, sublen) == 0)
  486. return start;
  487. ++start;
  488. }
  489. return -1;
  490. }
  491. void DS::String::replace(const char* from, const char* to)
  492. {
  493. ssize_t start = 0;
  494. size_t skiplen = strlen(from);
  495. String result;
  496. for ( ;; ) {
  497. ssize_t next = find(from, start);
  498. if (next == -1)
  499. break;
  500. result += mid(start, next - start) + to;
  501. start = next + skiplen;
  502. }
  503. operator=(result + mid(start));
  504. }
  505. DS::String DS::String::Format(const char* fmt, ... )
  506. {
  507. va_list aptr;
  508. va_start(aptr, fmt);
  509. String result = FormatV(fmt, aptr);
  510. va_end(aptr);
  511. return result;
  512. }
  513. DS::String DS::String::FormatV(const char* fmt, va_list aptr)
  514. {
  515. char buffer[256];
  516. va_list aptr_save;
  517. va_copy(aptr_save, aptr);
  518. int chars = vsnprintf(buffer, 256, fmt, aptr);
  519. DS_DASSERT(chars >= 0);
  520. if (chars >= 256) {
  521. va_copy(aptr, aptr_save);
  522. char* bigbuf = new char[chars+1];
  523. vsnprintf(bigbuf, chars+1, fmt, aptr);
  524. return Steal(bigbuf);
  525. }
  526. return DS::String(buffer);
  527. }
  528. DS::String DS::String::Steal(const char* buffer, ssize_t length)
  529. {
  530. DS::String stolen;
  531. if (length < 0)
  532. length = tstrlen(buffer);
  533. stolen.m_data = StringBuffer<chr8_t>(reinterpret_cast<const chr8_t*>(buffer), length);
  534. return stolen;
  535. }