PageRenderTime 38ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/xbmc/utils/StringUtils.cpp

https://github.com/Raven2005/xbmc
C++ | 693 lines | 533 code | 96 blank | 64 comment | 161 complexity | f5bf5eb30cbfa2438acaa9289eeec959 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*
  2. * Copyright (C) 2005-2013 Team XBMC
  3. * http://www.xbmc.org
  4. *
  5. * This Program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2, or (at your option)
  8. * any later version.
  9. *
  10. * This Program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with XBMC; see the file COPYING. If not, see
  17. * <http://www.gnu.org/licenses/>.
  18. *
  19. */
  20. //-----------------------------------------------------------------------
  21. //
  22. // File: StringUtils.cpp
  23. //
  24. // Purpose: ATL split string utility
  25. // Author: Paul J. Weiss
  26. //
  27. // Modified to use J O'Leary's CStdString class by kraqh3d
  28. //
  29. //------------------------------------------------------------------------
  30. #include "StringUtils.h"
  31. #include "utils/RegExp.h"
  32. #include "utils/fstrcmp.h"
  33. #include <locale>
  34. #include <math.h>
  35. #include <sstream>
  36. #include <time.h>
  37. #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  38. using namespace std;
  39. const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  40. /* empty string for use in returns by ref */
  41. const CStdString StringUtils::EmptyString = "";
  42. CStdString StringUtils::m_lastUUID = "";
  43. string StringUtils::Format(const char *fmt, ...)
  44. {
  45. va_list args;
  46. va_start(args, fmt);
  47. string str = FormatV(fmt, args);
  48. va_end(args);
  49. return str;
  50. }
  51. string StringUtils::FormatV(const char *fmt, va_list args)
  52. {
  53. if (fmt == NULL)
  54. return "";
  55. int size = FORMAT_BLOCK_SIZE;
  56. va_list argCopy;
  57. char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  58. if (cstr == NULL)
  59. return "";
  60. while (1)
  61. {
  62. va_copy(argCopy, args);
  63. int nActual = vsnprintf(cstr, size, fmt, argCopy);
  64. va_end(argCopy);
  65. if (nActual > -1 && nActual < size) // We got a valid result
  66. {
  67. string str(cstr, nActual);
  68. free(cstr);
  69. return str;
  70. }
  71. if (nActual > -1) // Exactly what we will need (glibc 2.1)
  72. size = nActual + 1;
  73. else // Let's try to double the size (glibc 2.0)
  74. size *= 2;
  75. char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  76. if (new_cstr == NULL)
  77. {
  78. free(cstr);
  79. return "";
  80. }
  81. cstr = new_cstr;
  82. }
  83. return "";
  84. }
  85. void StringUtils::ToUpper(string &str)
  86. {
  87. transform(str.begin(), str.end(), str.begin(), ::toupper);
  88. }
  89. void StringUtils::ToLower(string &str)
  90. {
  91. transform(str.begin(), str.end(), str.begin(), ::tolower);
  92. }
  93. bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
  94. {
  95. string tmp1 = str1;
  96. string tmp2 = str2;
  97. ToLower(tmp1);
  98. ToLower(tmp2);
  99. return tmp1.compare(tmp2) == 0;
  100. }
  101. string StringUtils::Left(const string &str, size_t count)
  102. {
  103. count = max((size_t)0, min(count, str.size()));
  104. return str.substr(0, count);
  105. }
  106. string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
  107. {
  108. if (first + count > str.size())
  109. count = str.size() - first;
  110. if (first > str.size())
  111. return string();
  112. ASSERT(first + count <= str.size());
  113. return str.substr(first, count);
  114. }
  115. string StringUtils::Right(const string &str, size_t count)
  116. {
  117. count = max((size_t)0, min(count, str.size()));
  118. return str.substr(str.size() - count);
  119. }
  120. std::string& StringUtils::Trim(std::string &str)
  121. {
  122. TrimLeft(str);
  123. return TrimRight(str);
  124. }
  125. std::string& StringUtils::TrimLeft(std::string &str)
  126. {
  127. str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun<int, int>(::isspace))));
  128. return str;
  129. }
  130. std::string& StringUtils::TrimRight(std::string &str)
  131. {
  132. str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun<int, int>(::isspace))).base(), str.end());
  133. return str;
  134. }
  135. int StringUtils::Replace(string &str, char oldChar, char newChar)
  136. {
  137. int replacedChars = 0;
  138. for (string::iterator it = str.begin(); it != str.end(); it++)
  139. {
  140. if (*it == oldChar)
  141. {
  142. *it = newChar;
  143. replacedChars++;
  144. }
  145. }
  146. return replacedChars;
  147. }
  148. int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
  149. {
  150. int replacedChars = 0;
  151. size_t index = 0;
  152. while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
  153. {
  154. str.replace(index, oldStr.size(), newStr);
  155. index += newStr.size();
  156. replacedChars++;
  157. }
  158. return replacedChars;
  159. }
  160. bool StringUtils::StartsWith(const std::string &str, const std::string &str2, bool useCase /* = false */)
  161. {
  162. std::string left = StringUtils::Left(str, str2.size());
  163. if (useCase)
  164. return left.compare(str2) == 0;
  165. return StringUtils::EqualsNoCase(left, str2);
  166. }
  167. bool StringUtils::EndsWith(const std::string &str, const std::string &str2, bool useCase /* = false */)
  168. {
  169. std::string right = StringUtils::Right(str, str2.size());
  170. if (useCase)
  171. return right.compare(str2) == 0;
  172. return StringUtils::EqualsNoCase(right, str2);
  173. }
  174. void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
  175. {
  176. result = "";
  177. for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
  178. result += (*it) + delimiter;
  179. if(result != "")
  180. result.Delete(result.size()-delimiter.size(), delimiter.size());
  181. }
  182. CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
  183. {
  184. CStdString result;
  185. JoinString(strings, delimiter, result);
  186. return result;
  187. }
  188. CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
  189. {
  190. CStdStringArray strArray;
  191. for (unsigned int index = 0; index < strings.size(); index++)
  192. strArray.push_back(strings.at(index));
  193. return JoinString(strArray, delimiter);
  194. }
  195. // Splits the string input into pieces delimited by delimiter.
  196. // if 2 delimiters are in a row, it will include the empty string between them.
  197. // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
  198. int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
  199. {
  200. int iPos = -1;
  201. int newPos = -1;
  202. int sizeS2 = delimiter.GetLength();
  203. int isize = input.GetLength();
  204. results.clear();
  205. vector<unsigned int> positions;
  206. newPos = input.Find (delimiter, 0);
  207. if ( newPos < 0 )
  208. {
  209. results.push_back(input);
  210. return 1;
  211. }
  212. while ( newPos > iPos )
  213. {
  214. positions.push_back(newPos);
  215. iPos = newPos;
  216. newPos = input.Find (delimiter, iPos + sizeS2);
  217. }
  218. // numFound is the number of delimiters which is one less
  219. // than the number of substrings
  220. unsigned int numFound = positions.size();
  221. if (iMaxStrings > 0 && numFound >= iMaxStrings)
  222. numFound = iMaxStrings - 1;
  223. for ( unsigned int i = 0; i <= numFound; i++ )
  224. {
  225. CStdString s;
  226. if ( i == 0 )
  227. {
  228. if ( i == numFound )
  229. s = input;
  230. else
  231. s = input.Mid( i, positions[i] );
  232. }
  233. else
  234. {
  235. int offset = positions[i - 1] + sizeS2;
  236. if ( offset < isize )
  237. {
  238. if ( i == numFound )
  239. s = input.Mid(offset);
  240. else if ( i > 0 )
  241. s = input.Mid( positions[i - 1] + sizeS2,
  242. positions[i] - positions[i - 1] - sizeS2 );
  243. }
  244. }
  245. results.push_back(s);
  246. }
  247. // return the number of substrings
  248. return results.size();
  249. }
  250. CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
  251. {
  252. CStdStringArray result;
  253. SplitString(input, delimiter, result, iMaxStrings);
  254. return result;
  255. }
  256. vector<string> StringUtils::Split(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
  257. {
  258. CStdStringArray result;
  259. SplitString(input, delimiter, result, iMaxStrings);
  260. vector<string> strArray;
  261. for (unsigned int index = 0; index < result.size(); index++)
  262. strArray.push_back(result.at(index));
  263. return strArray;
  264. }
  265. // returns the number of occurrences of strFind in strInput.
  266. int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
  267. {
  268. int pos = strInput.Find(strFind, 0);
  269. int numfound = 0;
  270. while (pos >= 0)
  271. {
  272. numfound++;
  273. pos = strInput.Find(strFind, pos + 1);
  274. }
  275. return numfound;
  276. }
  277. // Compares separately the numeric and alphabetic parts of a string.
  278. // returns negative if left < right, positive if left > right
  279. // and 0 if they are identical (essentially calculates left - right)
  280. int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
  281. {
  282. wchar_t *l = (wchar_t *)left;
  283. wchar_t *r = (wchar_t *)right;
  284. wchar_t *ld, *rd;
  285. wchar_t lc, rc;
  286. int64_t lnum, rnum;
  287. const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
  288. int cmp_res = 0;
  289. while (*l != 0 && *r != 0)
  290. {
  291. // check if we have a numerical value
  292. if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
  293. {
  294. ld = l;
  295. lnum = 0;
  296. while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
  297. { // compare only up to 15 digits
  298. lnum *= 10;
  299. lnum += *ld++ - '0';
  300. }
  301. rd = r;
  302. rnum = 0;
  303. while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
  304. { // compare only up to 15 digits
  305. rnum *= 10;
  306. rnum += *rd++ - L'0';
  307. }
  308. // do we have numbers?
  309. if (lnum != rnum)
  310. { // yes - and they're different!
  311. return lnum - rnum;
  312. }
  313. l = ld;
  314. r = rd;
  315. continue;
  316. }
  317. // do case less comparison
  318. lc = *l;
  319. if (lc >= L'A' && lc <= L'Z')
  320. lc += L'a'-L'A';
  321. rc = *r;
  322. if (rc >= L'A' && rc <= L'Z')
  323. rc += L'a'- L'A';
  324. // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
  325. if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
  326. {
  327. return cmp_res;
  328. }
  329. l++; r++;
  330. }
  331. if (*r)
  332. { // r is longer
  333. return -1;
  334. }
  335. else if (*l)
  336. { // l is longer
  337. return 1;
  338. }
  339. return 0; // files are the same
  340. }
  341. int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
  342. {
  343. CStdStringArray days;
  344. int splitCount = StringUtils::SplitString(dateString, "-", days);
  345. if (splitCount == 1)
  346. return atoi(days[0].c_str());
  347. else if (splitCount == 2)
  348. return atoi(days[0].c_str())*100+atoi(days[1].c_str());
  349. else if (splitCount == 3)
  350. return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
  351. else
  352. return -1;
  353. }
  354. long StringUtils::TimeStringToSeconds(const CStdString &timeString)
  355. {
  356. CStdString strCopy(timeString);
  357. strCopy.TrimLeft(" \n\r\t");
  358. strCopy.TrimRight(" \n\r\t");
  359. if(strCopy.Right(4).Equals(" min"))
  360. {
  361. // this is imdb format of "XXX min"
  362. return 60 * atoi(strCopy.c_str());
  363. }
  364. else
  365. {
  366. CStdStringArray secs;
  367. StringUtils::SplitString(strCopy, ":", secs);
  368. int timeInSecs = 0;
  369. for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
  370. {
  371. timeInSecs *= 60;
  372. timeInSecs += atoi(secs[i]);
  373. }
  374. return timeInSecs;
  375. }
  376. }
  377. CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
  378. {
  379. int hh = lSeconds / 3600;
  380. lSeconds = lSeconds % 3600;
  381. int mm = lSeconds / 60;
  382. int ss = lSeconds % 60;
  383. if (format == TIME_FORMAT_GUESS)
  384. format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
  385. CStdString strHMS;
  386. if (format & TIME_FORMAT_HH)
  387. strHMS.AppendFormat("%02.2i", hh);
  388. else if (format & TIME_FORMAT_H)
  389. strHMS.AppendFormat("%i", hh);
  390. if (format & TIME_FORMAT_MM)
  391. strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", mm);
  392. if (format & TIME_FORMAT_SS)
  393. strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", ss);
  394. return strHMS;
  395. }
  396. bool StringUtils::IsNaturalNumber(const CStdString& str)
  397. {
  398. size_t i = 0, n = 0;
  399. // allow whitespace,digits,whitespace
  400. while (i < str.size() && isspace((unsigned char) str[i]))
  401. i++;
  402. while (i < str.size() && isdigit((unsigned char) str[i]))
  403. {
  404. i++; n++;
  405. }
  406. while (i < str.size() && isspace((unsigned char) str[i]))
  407. i++;
  408. return i == str.size() && n > 0;
  409. }
  410. bool StringUtils::IsInteger(const CStdString& str)
  411. {
  412. size_t i = 0, n = 0;
  413. // allow whitespace,-,digits,whitespace
  414. while (i < str.size() && isspace((unsigned char) str[i]))
  415. i++;
  416. if (i < str.size() && str[i] == '-')
  417. i++;
  418. while (i < str.size() && isdigit((unsigned char) str[i]))
  419. {
  420. i++; n++;
  421. }
  422. while (i < str.size() && isspace((unsigned char) str[i]))
  423. i++;
  424. return i == str.size() && n > 0;
  425. }
  426. void StringUtils::RemoveCRLF(CStdString& strLine)
  427. {
  428. while ( strLine.size() && (strLine.Right(1) == "\n" || strLine.Right(1) == "\r") )
  429. {
  430. strLine = strLine.Left(std::max(0, (int)strLine.size() - 1));
  431. }
  432. }
  433. CStdString StringUtils::SizeToString(int64_t size)
  434. {
  435. CStdString strLabel;
  436. const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
  437. unsigned int i = 0;
  438. double s = (double)size;
  439. while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
  440. {
  441. s /= 1024.0;
  442. i++;
  443. }
  444. if (!i)
  445. strLabel.Format("%.0lf %cB ", s, prefixes[i]);
  446. else if (s >= 100.0)
  447. strLabel.Format("%.1lf %cB", s, prefixes[i]);
  448. else
  449. strLabel.Format("%.2lf %cB", s, prefixes[i]);
  450. return strLabel;
  451. }
  452. size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
  453. {
  454. // NOTE: This assumes word is lowercase!
  455. unsigned char *s = (unsigned char *)str;
  456. do
  457. {
  458. // start with a compare
  459. unsigned char *c = s;
  460. unsigned char *w = (unsigned char *)wordLowerCase;
  461. bool same = true;
  462. while (same && *c && *w)
  463. {
  464. unsigned char lc = *c++;
  465. if (lc >= 'A' && lc <= 'Z')
  466. lc += 'a'-'A';
  467. if (lc != *w++) // different
  468. same = false;
  469. }
  470. if (same && *w == 0) // only the same if word has been exhausted
  471. return (const char *)s - str;
  472. // otherwise, find a space and skip to the end of the whitespace
  473. while (*s && *s != ' ') s++;
  474. while (*s && *s == ' ') s++;
  475. // and repeat until we're done
  476. } while (*s);
  477. return CStdString::npos;
  478. }
  479. // assumes it is called from after the first open bracket is found
  480. int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
  481. {
  482. int blocks = 1;
  483. for (unsigned int i = startPos; i < str.size(); i++)
  484. {
  485. if (str[i] == opener)
  486. blocks++;
  487. else if (str[i] == closer)
  488. {
  489. blocks--;
  490. if (!blocks)
  491. return i;
  492. }
  493. }
  494. return (int)CStdString::npos;
  495. }
  496. void StringUtils::WordToDigits(CStdString &word)
  497. {
  498. static const char word_to_letter[] = "22233344455566677778889999";
  499. word.ToLower();
  500. for (unsigned int i = 0; i < word.size(); ++i)
  501. { // NB: This assumes ascii, which probably needs extending at some point.
  502. char letter = word[i];
  503. if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
  504. {
  505. word[i] = word_to_letter[letter-'a'];
  506. }
  507. else if (letter < '0' || letter > '9') // We want to keep 0-9!
  508. {
  509. word[i] = ' '; // replace everything else with a space
  510. }
  511. }
  512. }
  513. CStdString StringUtils::CreateUUID()
  514. {
  515. /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
  516. * Version 4 conform local unique UUID based upon random number generation.
  517. */
  518. char UuidStrTmp[40];
  519. char *pUuidStr = UuidStrTmp;
  520. int i;
  521. static bool m_uuidInitialized = false;
  522. if (!m_uuidInitialized)
  523. {
  524. /* use current time as the seed for rand()*/
  525. srand(time(NULL));
  526. m_uuidInitialized = true;
  527. }
  528. /*Data1 - 8 characters.*/
  529. for(i = 0; i < 8; i++, pUuidStr++)
  530. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  531. /*Data2 - 4 characters.*/
  532. *pUuidStr++ = '-';
  533. for(i = 0; i < 4; i++, pUuidStr++)
  534. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  535. /*Data3 - 4 characters.*/
  536. *pUuidStr++ = '-';
  537. for(i = 0; i < 4; i++, pUuidStr++)
  538. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  539. /*Data4 - 4 characters.*/
  540. *pUuidStr++ = '-';
  541. for(i = 0; i < 4; i++, pUuidStr++)
  542. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  543. /*Data5 - 12 characters.*/
  544. *pUuidStr++ = '-';
  545. for(i = 0; i < 12; i++, pUuidStr++)
  546. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  547. *pUuidStr = '\0';
  548. m_lastUUID = UuidStrTmp;
  549. return UuidStrTmp;
  550. }
  551. bool StringUtils::ValidateUUID(const CStdString &uuid)
  552. {
  553. CRegExp guidRE;
  554. guidRE.RegComp(ADDON_GUID_RE);
  555. return (guidRE.RegFind(uuid.c_str()) == 0);
  556. }
  557. double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
  558. {
  559. return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
  560. }
  561. int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
  562. {
  563. int best = -1;
  564. matchscore = 0;
  565. int i = 0;
  566. for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
  567. {
  568. int maxlength = max(str.length(), it->length());
  569. double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
  570. if (score > matchscore)
  571. {
  572. matchscore = score;
  573. best = i;
  574. }
  575. }
  576. return best;
  577. }
  578. size_t StringUtils::utf8_strlen(const char *s)
  579. {
  580. size_t length = 0;
  581. while (*s)
  582. {
  583. if ((*s++ & 0xC0) != 0x80)
  584. length++;
  585. }
  586. return length;
  587. }
  588. std::string StringUtils::Paramify(const std::string &param)
  589. {
  590. std::string result = param;
  591. // escape backspaces
  592. StringUtils::Replace(result, "\\", "\\\\");
  593. // escape double quotes
  594. StringUtils::Replace(result, "\"", "\\\"");
  595. // add double quotes around the whole string
  596. return "\"" + result + "\"";
  597. }