PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/xbmc/utils/StringUtils.cpp

https://github.com/weitao2012/android-1
C++ | 542 lines | 418 code | 62 blank | 62 comment | 135 complexity | 33f06c2d1fc2c20871e6903438cad33e MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*
  2. * Copyright (C) 2005-2008 Team XBMC
  3. * http://www.xbmc.org
  4. *
  5. * This Program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2, or (at your option)
  8. * any later version.
  9. *
  10. * This Program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with XBMC; see the file COPYING. If not, write to
  17. * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. */
  21. //-----------------------------------------------------------------------
  22. //
  23. // File: StringUtils.cpp
  24. //
  25. // Purpose: ATL split string utility
  26. // Author: Paul J. Weiss
  27. //
  28. // Modified to use J O'Leary's CStdString class by kraqh3d
  29. //
  30. //------------------------------------------------------------------------
  31. #include "StringUtils.h"
  32. #include "utils/RegExp.h"
  33. #include "utils/fstrcmp.h"
  34. #include <locale>
  35. #include <math.h>
  36. #include <sstream>
  37. #include <time.h>
  38. using namespace std;
  39. const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  40. /* empty string for use in returns by ref */
  41. const CStdString StringUtils::EmptyString = "";
  42. CStdString StringUtils::m_lastUUID = "";
  43. void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
  44. {
  45. result = "";
  46. for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
  47. result += (*it) + delimiter;
  48. if(result != "")
  49. result.Delete(result.size()-delimiter.size(), delimiter.size());
  50. }
  51. CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
  52. {
  53. CStdString result;
  54. JoinString(strings, delimiter, result);
  55. return result;
  56. }
  57. CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
  58. {
  59. CStdStringArray strArray;
  60. for (unsigned int index = 0; index < strings.size(); index++)
  61. strArray.push_back(strings.at(index));
  62. return JoinString(strArray, delimiter);
  63. }
  64. // Splits the string input into pieces delimited by delimiter.
  65. // if 2 delimiters are in a row, it will include the empty string between them.
  66. // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
  67. int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
  68. {
  69. int iPos = -1;
  70. int newPos = -1;
  71. int sizeS2 = delimiter.GetLength();
  72. int isize = input.GetLength();
  73. results.clear();
  74. vector<unsigned int> positions;
  75. newPos = input.Find (delimiter, 0);
  76. if ( newPos < 0 )
  77. {
  78. results.push_back(input);
  79. return 1;
  80. }
  81. while ( newPos > iPos )
  82. {
  83. positions.push_back(newPos);
  84. iPos = newPos;
  85. newPos = input.Find (delimiter, iPos + sizeS2);
  86. }
  87. // numFound is the number of delimeters which is one less
  88. // than the number of substrings
  89. unsigned int numFound = positions.size();
  90. if (iMaxStrings > 0 && numFound >= iMaxStrings)
  91. numFound = iMaxStrings - 1;
  92. for ( unsigned int i = 0; i <= numFound; i++ )
  93. {
  94. CStdString s;
  95. if ( i == 0 )
  96. {
  97. if ( i == numFound )
  98. s = input;
  99. else
  100. s = input.Mid( i, positions[i] );
  101. }
  102. else
  103. {
  104. int offset = positions[i - 1] + sizeS2;
  105. if ( offset < isize )
  106. {
  107. if ( i == numFound )
  108. s = input.Mid(offset);
  109. else if ( i > 0 )
  110. s = input.Mid( positions[i - 1] + sizeS2,
  111. positions[i] - positions[i - 1] - sizeS2 );
  112. }
  113. }
  114. results.push_back(s);
  115. }
  116. // return the number of substrings
  117. return results.size();
  118. }
  119. CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
  120. {
  121. CStdStringArray result;
  122. SplitString(input, delimiter, result, iMaxStrings);
  123. return result;
  124. }
  125. vector<string> StringUtils::Split(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
  126. {
  127. CStdStringArray result;
  128. SplitString(input, delimiter, result, iMaxStrings);
  129. vector<string> strArray;
  130. for (unsigned int index = 0; index < result.size(); index++)
  131. strArray.push_back(result.at(index));
  132. return strArray;
  133. }
  134. // returns the number of occurences of strFind in strInput.
  135. int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
  136. {
  137. int pos = strInput.Find(strFind, 0);
  138. int numfound = 0;
  139. while (pos > 0)
  140. {
  141. numfound++;
  142. pos = strInput.Find(strFind, pos + 1);
  143. }
  144. return numfound;
  145. }
  146. // Compares separately the numeric and alphabetic parts of a string.
  147. // returns negative if left < right, positive if left > right
  148. // and 0 if they are identical (essentially calculates left - right)
  149. int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
  150. {
  151. wchar_t *l = (wchar_t *)left;
  152. wchar_t *r = (wchar_t *)right;
  153. wchar_t *ld, *rd;
  154. wchar_t lc, rc;
  155. int64_t lnum, rnum;
  156. const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
  157. int cmp_res = 0;
  158. while (*l != 0 && *r != 0)
  159. {
  160. // check if we have a numerical value
  161. if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
  162. {
  163. ld = l;
  164. lnum = 0;
  165. while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
  166. { // compare only up to 15 digits
  167. lnum *= 10;
  168. lnum += *ld++ - '0';
  169. }
  170. rd = r;
  171. rnum = 0;
  172. while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
  173. { // compare only up to 15 digits
  174. rnum *= 10;
  175. rnum += *rd++ - L'0';
  176. }
  177. // do we have numbers?
  178. if (lnum != rnum)
  179. { // yes - and they're different!
  180. return lnum - rnum;
  181. }
  182. l = ld;
  183. r = rd;
  184. continue;
  185. }
  186. // do case less comparison
  187. lc = *l;
  188. if (lc >= L'A' && lc <= L'Z')
  189. lc += L'a'-L'A';
  190. rc = *r;
  191. if (rc >= L'A' && rc <= L'Z')
  192. rc += L'a'- L'A';
  193. // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
  194. if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
  195. {
  196. return cmp_res;
  197. }
  198. l++; r++;
  199. }
  200. if (*r)
  201. { // r is longer
  202. return -1;
  203. }
  204. else if (*l)
  205. { // l is longer
  206. return 1;
  207. }
  208. return 0; // files are the same
  209. }
  210. int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
  211. {
  212. CStdStringArray days;
  213. int splitCount = StringUtils::SplitString(dateString, "-", days);
  214. if (splitCount == 1)
  215. return atoi(days[0].c_str());
  216. else if (splitCount == 2)
  217. return atoi(days[0].c_str())*100+atoi(days[1].c_str());
  218. else if (splitCount == 3)
  219. return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
  220. else
  221. return -1;
  222. }
  223. long StringUtils::TimeStringToSeconds(const CStdString &timeString)
  224. {
  225. if(timeString.Right(4).Equals(" min"))
  226. {
  227. // this is imdb format of "XXX min"
  228. return 60 * atoi(timeString.c_str());
  229. }
  230. else
  231. {
  232. CStdStringArray secs;
  233. StringUtils::SplitString(timeString, ":", secs);
  234. int timeInSecs = 0;
  235. for (unsigned int i = 0; i < secs.size(); i++)
  236. {
  237. timeInSecs *= 60;
  238. timeInSecs += atoi(secs[i]);
  239. }
  240. return timeInSecs;
  241. }
  242. }
  243. CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
  244. {
  245. int hh = lSeconds / 3600;
  246. lSeconds = lSeconds % 3600;
  247. int mm = lSeconds / 60;
  248. int ss = lSeconds % 60;
  249. if (format == TIME_FORMAT_GUESS)
  250. format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
  251. CStdString strHMS;
  252. if (format & TIME_FORMAT_HH)
  253. strHMS.AppendFormat("%02.2i", hh);
  254. else if (format & TIME_FORMAT_H)
  255. strHMS.AppendFormat("%i", hh);
  256. if (format & TIME_FORMAT_MM)
  257. strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", mm);
  258. if (format & TIME_FORMAT_SS)
  259. strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", ss);
  260. return strHMS;
  261. }
  262. bool StringUtils::IsNaturalNumber(const CStdString& str)
  263. {
  264. size_t i = 0, n = 0;
  265. // allow whitespace,digits,whitespace
  266. while (i < str.size() && isspace(str[i]))
  267. i++;
  268. while (i < str.size() && isdigit(str[i]))
  269. {
  270. i++; n++;
  271. }
  272. while (i < str.size() && isspace(str[i]))
  273. i++;
  274. return i == str.size() && n > 0;
  275. }
  276. bool StringUtils::IsInteger(const CStdString& str)
  277. {
  278. size_t i = 0, n = 0;
  279. // allow whitespace,-,digits,whitespace
  280. while (i < str.size() && isspace(str[i]))
  281. i++;
  282. if (i < str.size() && str[i] == '-')
  283. i++;
  284. while (i < str.size() && isdigit(str[i]))
  285. {
  286. i++; n++;
  287. }
  288. while (i < str.size() && isspace(str[i]))
  289. i++;
  290. return i == str.size() && n > 0;
  291. }
  292. bool StringUtils::Test()
  293. {
  294. bool ret = true;
  295. ret |= IsNaturalNumber("10");
  296. ret |= IsNaturalNumber(" 10");
  297. ret |= IsNaturalNumber("0");
  298. ret |= !IsNaturalNumber(" 1 0");
  299. ret |= !IsNaturalNumber("1.0");
  300. ret |= !IsNaturalNumber("1.1");
  301. ret |= !IsNaturalNumber("0x1");
  302. ret |= !IsNaturalNumber("blah");
  303. ret |= !IsNaturalNumber("120 h");
  304. ret |= !IsNaturalNumber(" ");
  305. ret |= !IsNaturalNumber("");
  306. ret |= IsInteger("10");
  307. ret |= IsInteger(" -10");
  308. ret |= IsInteger("0");
  309. ret |= !IsInteger(" 1 0");
  310. ret |= !IsInteger("1.0");
  311. ret |= !IsInteger("1.1");
  312. ret |= !IsInteger("0x1");
  313. ret |= !IsInteger("blah");
  314. ret |= !IsInteger("120 h");
  315. ret |= !IsInteger(" ");
  316. ret |= !IsInteger("");
  317. return ret;
  318. }
  319. void StringUtils::RemoveCRLF(CStdString& strLine)
  320. {
  321. while ( strLine.size() && (strLine.Right(1) == "\n" || strLine.Right(1) == "\r") )
  322. {
  323. strLine = strLine.Left(std::max(0, (int)strLine.size() - 1));
  324. }
  325. }
  326. CStdString StringUtils::SizeToString(int64_t size)
  327. {
  328. CStdString strLabel;
  329. const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
  330. unsigned int i = 0;
  331. double s = (double)size;
  332. while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
  333. {
  334. s /= 1024.0;
  335. i++;
  336. }
  337. if (!i)
  338. strLabel.Format("%.0lf %cB ", s, prefixes[i]);
  339. else if (s >= 100.0)
  340. strLabel.Format("%.1lf %cB", s, prefixes[i]);
  341. else
  342. strLabel.Format("%.2lf %cB", s, prefixes[i]);
  343. return strLabel;
  344. }
  345. size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
  346. {
  347. // NOTE: This assumes word is lowercase!
  348. unsigned char *s = (unsigned char *)str;
  349. do
  350. {
  351. // start with a compare
  352. unsigned char *c = s;
  353. unsigned char *w = (unsigned char *)wordLowerCase;
  354. bool same = true;
  355. while (same && *c && *w)
  356. {
  357. unsigned char lc = *c++;
  358. if (lc >= 'A' && lc <= 'Z')
  359. lc += 'a'-'A';
  360. if (lc != *w++) // different
  361. same = false;
  362. }
  363. if (same && *w == 0) // only the same if word has been exhausted
  364. return (const char *)s - str;
  365. // otherwise, find a space and skip to the end of the whitespace
  366. while (*s && *s != ' ') s++;
  367. while (*s && *s == ' ') s++;
  368. // and repeat until we're done
  369. } while (*s);
  370. return CStdString::npos;
  371. }
  372. // assumes it is called from after the first open bracket is found
  373. int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
  374. {
  375. int blocks = 1;
  376. for (unsigned int i = startPos; i < str.size(); i++)
  377. {
  378. if (str[i] == opener)
  379. blocks++;
  380. else if (str[i] == closer)
  381. {
  382. blocks--;
  383. if (!blocks)
  384. return i;
  385. }
  386. }
  387. return (int)CStdString::npos;
  388. }
  389. void StringUtils::WordToDigits(CStdString &word)
  390. {
  391. static const char word_to_letter[] = "22233344455566677778889999";
  392. word.ToLower();
  393. for (unsigned int i = 0; i < word.size(); ++i)
  394. { // NB: This assumes ascii, which probably needs extending at some point.
  395. char letter = word[i];
  396. if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
  397. {
  398. word[i] = word_to_letter[letter-'a'];
  399. }
  400. else if (letter < '0' || letter > '9') // We want to keep 0-9!
  401. {
  402. word[i] = ' '; // replace everything else with a space
  403. }
  404. }
  405. }
  406. CStdString StringUtils::CreateUUID()
  407. {
  408. /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
  409. * Version 4 conform local unique UUID based upon random number generation.
  410. */
  411. char UuidStrTmp[40];
  412. char *pUuidStr = UuidStrTmp;
  413. int i;
  414. static bool m_uuidInitialized = false;
  415. if (!m_uuidInitialized)
  416. {
  417. /* use current time as the seed for rand()*/
  418. srand(time(NULL));
  419. m_uuidInitialized = true;
  420. }
  421. /*Data1 - 8 characters.*/
  422. for(i = 0; i < 8; i++, pUuidStr++)
  423. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  424. /*Data2 - 4 characters.*/
  425. *pUuidStr++ = '-';
  426. for(i = 0; i < 4; i++, pUuidStr++)
  427. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  428. /*Data3 - 4 characters.*/
  429. *pUuidStr++ = '-';
  430. for(i = 0; i < 4; i++, pUuidStr++)
  431. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  432. /*Data4 - 4 characters.*/
  433. *pUuidStr++ = '-';
  434. for(i = 0; i < 4; i++, pUuidStr++)
  435. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  436. /*Data5 - 12 characters.*/
  437. *pUuidStr++ = '-';
  438. for(i = 0; i < 12; i++, pUuidStr++)
  439. ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
  440. *pUuidStr = '\0';
  441. m_lastUUID = UuidStrTmp;
  442. return UuidStrTmp;
  443. }
  444. bool StringUtils::ValidateUUID(const CStdString &uuid)
  445. {
  446. CRegExp guidRE;
  447. guidRE.RegComp(ADDON_GUID_RE);
  448. return (guidRE.RegFind(uuid.c_str()) == 0);
  449. }
  450. double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
  451. {
  452. return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
  453. }
  454. int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
  455. {
  456. int best = -1;
  457. matchscore = 0;
  458. int i = 0;
  459. for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
  460. {
  461. int maxlength = max(str.length(), it->length());
  462. double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
  463. if (score > matchscore)
  464. {
  465. matchscore = score;
  466. best = i;
  467. }
  468. }
  469. return best;
  470. }
  471. size_t StringUtils::utf8_strlen(const char *s)
  472. {
  473. size_t length = 0;
  474. while (*s)
  475. {
  476. if ((*s++ & 0xC0) != 0x80)
  477. length++;
  478. }
  479. return length;
  480. }