PageRenderTime 63ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp

https://bitbucket.org/romaxa/mozilla-central
C++ | 1416 lines | 1253 code | 75 blank | 88 comment | 194 complexity | 4cc44c865dc79b4ccd7d43a5f6d22862 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, 0BSD, LGPL-3.0, AGPL-1.0, MIT, JSON, Apache-2.0, BSD-3-Clause, LGPL-2.1, GPL-2.0
  1. /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* ***** BEGIN LICENSE BLOCK *****
  3. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. * http://www.mozilla.org/MPL/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * The Original Code is the Mozilla Text to HTML converter code.
  16. *
  17. * The Initial Developer of the Original Code is
  18. * Ben Bucksch <http://www.bucksch.org>.
  19. * Portions created by the Initial Developer are Copyright (C) 1999, 2000
  20. * the Initial Developer. All Rights Reserved.
  21. *
  22. * Contributor(s):
  23. *
  24. * Alternatively, the contents of this file may be used under the terms of
  25. * either the GNU General Public License Version 2 or later (the "GPL"), or
  26. * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  27. * in which case the provisions of the GPL or the LGPL are applicable instead
  28. * of those above. If you wish to allow use of your version of this file only
  29. * under the terms of either the GPL or the LGPL, and not to allow others to
  30. * use your version of this file under the terms of the MPL, indicate your
  31. * decision by deleting the provisions above and replace them with the notice
  32. * and other provisions required by the GPL or the LGPL. If you do not delete
  33. * the provisions above, a recipient may use your version of this file under
  34. * the terms of any one of the MPL, the GPL or the LGPL.
  35. *
  36. * ***** END LICENSE BLOCK ***** */
  37. #include "mozTXTToHTMLConv.h"
  38. #include "nsIServiceManager.h"
  39. #include "nsNetCID.h"
  40. #include "nsNetUtil.h"
  41. #include "nsReadableUtils.h"
  42. #include "nsUnicharUtils.h"
  43. #include "nsCRT.h"
  44. #include "nsIExternalProtocolHandler.h"
  45. static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
  46. #ifdef DEBUG_BenB_Perf
  47. #include "prtime.h"
  48. #include "prinrval.h"
  49. #endif
  50. const PRFloat64 growthRate = 1.2;
  51. // Bug 183111, editor now replaces multiple spaces with leading
  52. // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
  53. // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
  54. // Also recognize the Japanese ideographic space 0x3000 as a space.
  55. static inline PRBool IsSpace(const PRUnichar aChar)
  56. {
  57. return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
  58. }
  59. // Escape Char will take ch, escape it and append the result to
  60. // aStringToAppendTo
  61. void
  62. mozTXTToHTMLConv::EscapeChar(const PRUnichar ch, nsString& aStringToAppendTo,
  63. PRBool inAttribute)
  64. {
  65. switch (ch)
  66. {
  67. case '<':
  68. aStringToAppendTo.AppendLiteral("&lt;");
  69. break;
  70. case '>':
  71. aStringToAppendTo.AppendLiteral("&gt;");
  72. break;
  73. case '&':
  74. aStringToAppendTo.AppendLiteral("&amp;");
  75. break;
  76. case '"':
  77. if (inAttribute)
  78. {
  79. aStringToAppendTo.AppendLiteral("&quot;");
  80. break;
  81. }
  82. // else fall through
  83. default:
  84. aStringToAppendTo += ch;
  85. }
  86. return;
  87. }
  88. // EscapeStr takes the passed in string and
  89. // escapes it IN PLACE.
  90. void
  91. mozTXTToHTMLConv::EscapeStr(nsString& aInString, PRBool inAttribute)
  92. {
  93. // the replace substring routines
  94. // don't seem to work if you have a character
  95. // in the in string that is also in the replacement
  96. // string! =(
  97. //aInString.ReplaceSubstring("&", "&amp;");
  98. //aInString.ReplaceSubstring("<", "&lt;");
  99. //aInString.ReplaceSubstring(">", "&gt;");
  100. for (PRUint32 i = 0; i < aInString.Length();)
  101. {
  102. switch (aInString[i])
  103. {
  104. case '<':
  105. aInString.Cut(i, 1);
  106. aInString.Insert(NS_LITERAL_STRING("&lt;"), i);
  107. i += 4; // skip past the integers we just added
  108. break;
  109. case '>':
  110. aInString.Cut(i, 1);
  111. aInString.Insert(NS_LITERAL_STRING("&gt;"), i);
  112. i += 4; // skip past the integers we just added
  113. break;
  114. case '&':
  115. aInString.Cut(i, 1);
  116. aInString.Insert(NS_LITERAL_STRING("&amp;"), i);
  117. i += 5; // skip past the integers we just added
  118. break;
  119. case '"':
  120. if (inAttribute)
  121. {
  122. aInString.Cut(i, 1);
  123. aInString.Insert(NS_LITERAL_STRING("&quot;"), i);
  124. i += 6;
  125. break;
  126. }
  127. // else fall through
  128. default:
  129. i++;
  130. }
  131. }
  132. }
  133. void
  134. mozTXTToHTMLConv::UnescapeStr(const PRUnichar * aInString, PRInt32 aStartPos, PRInt32 aLength, nsString& aOutString)
  135. {
  136. const PRUnichar * subString = nsnull;
  137. for (PRUint32 i = aStartPos; PRInt32(i) - aStartPos < aLength;)
  138. {
  139. PRInt32 remainingChars = i - aStartPos;
  140. if (aInString[i] == '&')
  141. {
  142. subString = &aInString[i];
  143. if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&lt;").get(), MinInt(4, aLength - remainingChars)))
  144. {
  145. aOutString.Append(PRUnichar('<'));
  146. i += 4;
  147. }
  148. else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&gt;").get(), MinInt(4, aLength - remainingChars)))
  149. {
  150. aOutString.Append(PRUnichar('>'));
  151. i += 4;
  152. }
  153. else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&amp;").get(), MinInt(5, aLength - remainingChars)))
  154. {
  155. aOutString.Append(PRUnichar('&'));
  156. i += 5;
  157. }
  158. else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&quot;").get(), MinInt(6, aLength - remainingChars)))
  159. {
  160. aOutString.Append(PRUnichar('"'));
  161. i += 6;
  162. }
  163. else
  164. {
  165. aOutString += aInString[i];
  166. i++;
  167. }
  168. }
  169. else
  170. {
  171. aOutString += aInString[i];
  172. i++;
  173. }
  174. }
  175. }
  176. void
  177. mozTXTToHTMLConv::CompleteAbbreviatedURL(const PRUnichar * aInString, PRInt32 aInLength,
  178. const PRUint32 pos, nsString& aOutString)
  179. {
  180. NS_ASSERTION(PRInt32(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
  181. if (PRInt32(pos) >= aInLength)
  182. return;
  183. if (aInString[pos] == '@')
  184. {
  185. // only pre-pend a mailto url if the string contains a .domain in it..
  186. //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
  187. nsDependentString inString(aInString, aInLength);
  188. if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
  189. {
  190. aOutString.AssignLiteral("mailto:");
  191. aOutString += aInString;
  192. }
  193. }
  194. else if (aInString[pos] == '.')
  195. {
  196. if (ItMatchesDelimited(aInString, aInLength,
  197. NS_LITERAL_STRING("www.").get(), 4, LT_IGNORE, LT_IGNORE))
  198. {
  199. aOutString.AssignLiteral("http://");
  200. aOutString += aInString;
  201. }
  202. else if (ItMatchesDelimited(aInString,aInLength, NS_LITERAL_STRING("ftp.").get(), 4, LT_IGNORE, LT_IGNORE))
  203. {
  204. aOutString.AssignLiteral("ftp://");
  205. aOutString += aInString;
  206. }
  207. }
  208. }
  209. PRBool
  210. mozTXTToHTMLConv::FindURLStart(const PRUnichar * aInString, PRInt32 aInLength,
  211. const PRUint32 pos, const modetype check,
  212. PRUint32& start)
  213. {
  214. switch(check)
  215. { // no breaks, because end of blocks is never reached
  216. case RFC1738:
  217. {
  218. if (!nsCRT::strncmp(&aInString[MaxInt(pos - 4, 0)], NS_LITERAL_STRING("<URL:").get(), 5))
  219. {
  220. start = pos + 1;
  221. return PR_TRUE;
  222. }
  223. else
  224. return PR_FALSE;
  225. }
  226. case RFC2396E:
  227. {
  228. nsString temp(aInString, aInLength);
  229. PRInt32 i = pos <= 0 ? kNotFound : temp.RFindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos - 1);
  230. if (i != kNotFound && (temp[PRUint32(i)] == '<' ||
  231. temp[PRUint32(i)] == '"'))
  232. {
  233. start = PRUint32(++i);
  234. return start < pos;
  235. }
  236. else
  237. return PR_FALSE;
  238. }
  239. case freetext:
  240. {
  241. PRInt32 i = pos - 1;
  242. for (; i >= 0 && (
  243. nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
  244. nsCRT::IsAsciiDigit(aInString[PRUint32(i)]) ||
  245. aInString[PRUint32(i)] == '+' ||
  246. aInString[PRUint32(i)] == '-' ||
  247. aInString[PRUint32(i)] == '.'
  248. ); i--)
  249. ;
  250. if (++i >= 0 && PRUint32(i) < pos && nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]))
  251. {
  252. start = PRUint32(i);
  253. return PR_TRUE;
  254. }
  255. else
  256. return PR_FALSE;
  257. }
  258. case abbreviated:
  259. {
  260. PRInt32 i = pos - 1;
  261. // This disallows non-ascii-characters for email.
  262. // Currently correct, but revisit later after standards changed.
  263. PRBool isEmail = aInString[pos] == (PRUnichar)'@';
  264. // These chars mark the start of the URL
  265. for (; i >= 0
  266. && aInString[PRUint32(i)] != '>' && aInString[PRUint32(i)] != '<'
  267. && aInString[PRUint32(i)] != '"' && aInString[PRUint32(i)] != '\''
  268. && aInString[PRUint32(i)] != '`' && aInString[PRUint32(i)] != ','
  269. && aInString[PRUint32(i)] != '{' && aInString[PRUint32(i)] != '['
  270. && aInString[PRUint32(i)] != '(' && aInString[PRUint32(i)] != '|'
  271. && aInString[PRUint32(i)] != '\\'
  272. && !IsSpace(aInString[PRUint32(i)])
  273. && (!isEmail || nsCRT::IsAscii(aInString[PRUint32(i)]))
  274. ; i--)
  275. ;
  276. if
  277. (
  278. ++i >= 0 && PRUint32(i) < pos
  279. &&
  280. (
  281. nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
  282. nsCRT::IsAsciiDigit(aInString[PRUint32(i)])
  283. )
  284. )
  285. {
  286. start = PRUint32(i);
  287. return PR_TRUE;
  288. }
  289. else
  290. return PR_FALSE;
  291. }
  292. default:
  293. return PR_FALSE;
  294. } //switch
  295. }
  296. PRBool
  297. mozTXTToHTMLConv::FindURLEnd(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos,
  298. const modetype check, const PRUint32 start, PRUint32& end)
  299. {
  300. switch(check)
  301. { // no breaks, because end of blocks is never reached
  302. case RFC1738:
  303. case RFC2396E:
  304. {
  305. nsString temp(aInString, aInStringLength);
  306. PRInt32 i = temp.FindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos + 1);
  307. if (i != kNotFound && temp[PRUint32(i--)] ==
  308. (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
  309. {
  310. end = PRUint32(i);
  311. return end > pos;
  312. }
  313. else
  314. return PR_FALSE;
  315. }
  316. case freetext:
  317. case abbreviated:
  318. {
  319. PRUint32 i = pos + 1;
  320. PRBool isEmail = aInString[pos] == (PRUnichar)'@';
  321. PRBool haveOpeningBracket = PR_FALSE;
  322. for (; PRInt32(i) < aInStringLength; i++)
  323. {
  324. // These chars mark the end of the URL
  325. if (aInString[i] == '>' || aInString[i] == '<' ||
  326. aInString[i] == '"' || aInString[i] == '`' ||
  327. aInString[i] == '}' || aInString[i] == ']' ||
  328. aInString[i] == '{' || aInString[i] == '[' ||
  329. aInString[i] == '|' ||
  330. (aInString[i] == ')' && !haveOpeningBracket) ||
  331. IsSpace(aInString[i]) )
  332. break;
  333. // Disallow non-ascii-characters for email.
  334. // Currently correct, but revisit later after standards changed.
  335. if (isEmail && (
  336. aInString[i] == '(' || aInString[i] == '\'' ||
  337. !nsCRT::IsAscii(aInString[i]) ))
  338. break;
  339. if (aInString[i] == '(')
  340. haveOpeningBracket = PR_TRUE;
  341. }
  342. // These chars are allowed in the middle of the URL, but not at end.
  343. // Technically they are, but are used in normal text after the URL.
  344. while (--i > pos && (
  345. aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
  346. aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
  347. aInString[i] == '\''
  348. ))
  349. ;
  350. if (i > pos)
  351. {
  352. end = i;
  353. return PR_TRUE;
  354. }
  355. else
  356. return PR_FALSE;
  357. }
  358. default:
  359. return PR_FALSE;
  360. } //switch
  361. }
  362. void
  363. mozTXTToHTMLConv::CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength,
  364. const PRUint32 pos, const PRUint32 whathasbeendone,
  365. const modetype check, const PRUint32 start, const PRUint32 end,
  366. nsString& txtURL, nsString& desc,
  367. PRInt32& replaceBefore, PRInt32& replaceAfter)
  368. {
  369. PRUint32 descstart = start;
  370. switch(check)
  371. {
  372. case RFC1738:
  373. {
  374. descstart = start - 5;
  375. desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">"
  376. replaceAfter = end - pos + 1;
  377. } break;
  378. case RFC2396E:
  379. {
  380. descstart = start - 1;
  381. desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
  382. replaceAfter = end - pos + 1;
  383. } break;
  384. case freetext:
  385. case abbreviated:
  386. {
  387. descstart = start;
  388. desc.Append(&aInString[descstart], end - start + 1); // don't include brackets
  389. replaceAfter = end - pos;
  390. } break;
  391. default: break;
  392. } //switch
  393. EscapeStr(desc, PR_FALSE);
  394. txtURL.Append(&aInString[start], end - start + 1);
  395. txtURL.StripWhitespace();
  396. // FIX ME
  397. nsAutoString temp2;
  398. ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
  399. replaceBefore = temp2.Length();
  400. return;
  401. }
  402. PRBool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
  403. {
  404. if (!mIOService)
  405. return PR_FALSE;
  406. nsCAutoString scheme;
  407. nsresult rv = mIOService->ExtractScheme(aURL, scheme);
  408. if(NS_FAILED(rv))
  409. return PR_FALSE;
  410. // Get the handler for this scheme.
  411. nsCOMPtr<nsIProtocolHandler> handler;
  412. rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
  413. if(NS_FAILED(rv))
  414. return PR_FALSE;
  415. // Is it an external protocol handler? If not, linkify it.
  416. nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
  417. if (!externalHandler)
  418. return PR_TRUE; // handler is built-in, linkify it!
  419. // If external app exists for the scheme then linkify it.
  420. PRBool exists;
  421. rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
  422. return(NS_SUCCEEDED(rv) && exists);
  423. }
  424. PRBool
  425. mozTXTToHTMLConv::CheckURLAndCreateHTML(
  426. const nsString& txtURL, const nsString& desc, const modetype mode,
  427. nsString& outputHTML)
  428. {
  429. // Create *uri from txtURL
  430. nsCOMPtr<nsIURI> uri;
  431. nsresult rv;
  432. // Lazily initialize mIOService
  433. if (!mIOService)
  434. {
  435. mIOService = do_GetIOService();
  436. if (!mIOService)
  437. return PR_FALSE;
  438. }
  439. // See if the url should be linkified.
  440. NS_ConvertUTF16toUTF8 utf8URL(txtURL);
  441. if (!ShouldLinkify(utf8URL))
  442. return PR_FALSE;
  443. // it would be faster if we could just check to see if there is a protocol
  444. // handler for the url and return instead of actually trying to create a url...
  445. rv = mIOService->NewURI(utf8URL, nsnull, nsnull, getter_AddRefs(uri));
  446. // Real work
  447. if (NS_SUCCEEDED(rv) && uri)
  448. {
  449. outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
  450. switch(mode)
  451. {
  452. case RFC1738:
  453. outputHTML.AppendLiteral("rfc1738");
  454. break;
  455. case RFC2396E:
  456. outputHTML.AppendLiteral("rfc2396E");
  457. break;
  458. case freetext:
  459. outputHTML.AppendLiteral("freetext");
  460. break;
  461. case abbreviated:
  462. outputHTML.AppendLiteral("abbreviated");
  463. break;
  464. default: break;
  465. }
  466. nsAutoString escapedURL(txtURL);
  467. EscapeStr(escapedURL, PR_TRUE);
  468. outputHTML.AppendLiteral("\" href=\"");
  469. outputHTML += escapedURL;
  470. outputHTML.AppendLiteral("\">");
  471. outputHTML += desc;
  472. outputHTML.AppendLiteral("</a>");
  473. return PR_TRUE;
  474. }
  475. else
  476. return PR_FALSE;
  477. }
  478. NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const PRUnichar * aInString, PRInt32 aInLength, PRInt32 aPos, PRInt32 * aStartPos, PRInt32 * aEndPos)
  479. {
  480. // call FindURL on the passed in string
  481. nsAutoString outputHTML; // we'll ignore the generated output HTML
  482. *aStartPos = -1;
  483. *aEndPos = -1;
  484. FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
  485. return NS_OK;
  486. }
  487. PRBool
  488. mozTXTToHTMLConv::FindURL(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
  489. const PRUint32 whathasbeendone,
  490. nsString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter)
  491. {
  492. enum statetype {unchecked, invalid, startok, endok, success};
  493. static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
  494. statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
  495. /* I don't like this abuse of enums as index for the array,
  496. but I don't know a better method */
  497. // Define, which modes to check
  498. /* all modes but abbreviated are checked for text[pos] == ':',
  499. only abbreviated for '.', RFC2396E and abbreviated for '@' */
  500. for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
  501. iState = modetype(iState + 1))
  502. state[iState] = aInString[pos] == ':' ? unchecked : invalid;
  503. switch (aInString[pos])
  504. {
  505. case '@':
  506. state[RFC2396E] = unchecked;
  507. // no break here
  508. case '.':
  509. state[abbreviated] = unchecked;
  510. break;
  511. case ':':
  512. state[abbreviated] = invalid;
  513. break;
  514. default:
  515. break;
  516. }
  517. // Test, first successful mode wins, sequence defined by |ranking|
  518. PRInt32 iCheck = 0; // the currently tested modetype
  519. modetype check = ranking[iCheck];
  520. for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
  521. iCheck++)
  522. /* check state from last run.
  523. If this is the first, check this one, which isn't = success yet */
  524. {
  525. check = ranking[iCheck];
  526. PRUint32 start, end;
  527. if (state[check] == unchecked)
  528. if (FindURLStart(aInString, aInLength, pos, check, start))
  529. state[check] = startok;
  530. if (state[check] == startok)
  531. if (FindURLEnd(aInString, aInLength, pos, check, start, end))
  532. state[check] = endok;
  533. if (state[check] == endok)
  534. {
  535. nsAutoString txtURL, desc;
  536. PRInt32 resultReplaceBefore, resultReplaceAfter;
  537. CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
  538. txtURL, desc,
  539. resultReplaceBefore, resultReplaceAfter);
  540. if (aInString[pos] != ':')
  541. {
  542. nsAutoString temp = txtURL;
  543. txtURL.SetLength(0);
  544. CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
  545. }
  546. if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
  547. outputHTML))
  548. {
  549. replaceBefore = resultReplaceBefore;
  550. replaceAfter = resultReplaceAfter;
  551. state[check] = success;
  552. }
  553. } // if
  554. } // for
  555. return state[check] == success;
  556. }
  557. PRBool
  558. mozTXTToHTMLConv::ItMatchesDelimited(const PRUnichar * aInString,
  559. PRInt32 aInLength, const PRUnichar* rep, PRInt32 aRepLen,
  560. LIMTYPE before, LIMTYPE after)
  561. {
  562. // this little method gets called a LOT. I found we were spending a
  563. // lot of time just calculating the length of the variable "rep"
  564. // over and over again every time we called it. So we're now passing
  565. // an integer in here.
  566. PRInt32 textLen = aInLength;
  567. if
  568. (
  569. (before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
  570. && textLen < aRepLen ||
  571. (before != LT_IGNORE || after != LT_IGNORE && after != LT_DELIMITER)
  572. && textLen < aRepLen + 1 ||
  573. before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
  574. && textLen < aRepLen + 2
  575. )
  576. return PR_FALSE;
  577. PRUnichar text0 = aInString[0];
  578. PRUnichar textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
  579. if
  580. (
  581. before == LT_ALPHA
  582. && !nsCRT::IsAsciiAlpha(text0) ||
  583. before == LT_DIGIT
  584. && !nsCRT::IsAsciiDigit(text0) ||
  585. before == LT_DELIMITER
  586. &&
  587. (
  588. nsCRT::IsAsciiAlpha(text0) ||
  589. nsCRT::IsAsciiDigit(text0) ||
  590. text0 == *rep
  591. ) ||
  592. after == LT_ALPHA
  593. && !nsCRT::IsAsciiAlpha(textAfterPos) ||
  594. after == LT_DIGIT
  595. && !nsCRT::IsAsciiDigit(textAfterPos) ||
  596. after == LT_DELIMITER
  597. &&
  598. (
  599. nsCRT::IsAsciiAlpha(textAfterPos) ||
  600. nsCRT::IsAsciiDigit(textAfterPos) ||
  601. textAfterPos == *rep
  602. ) ||
  603. !Substring(Substring(aInString, aInString+aInLength),
  604. (before == LT_IGNORE ? 0 : 1),
  605. aRepLen).Equals(Substring(rep, rep+aRepLen),
  606. nsCaseInsensitiveStringComparator())
  607. )
  608. return PR_FALSE;
  609. return PR_TRUE;
  610. }
  611. PRUint32
  612. mozTXTToHTMLConv::NumberOfMatches(const PRUnichar * aInString, PRInt32 aInStringLength,
  613. const PRUnichar* rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after)
  614. {
  615. PRUint32 result = 0;
  616. for (PRInt32 i = 0; i < aInStringLength; i++)
  617. {
  618. const PRUnichar * indexIntoString = &aInString[i];
  619. if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
  620. result++;
  621. }
  622. return result;
  623. }
  624. // NOTE: the converted html for the phrase is appended to aOutString
  625. // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
  626. PRBool
  627. mozTXTToHTMLConv::StructPhraseHit(const PRUnichar * aInString, PRInt32 aInStringLength, PRBool col0,
  628. const PRUnichar* tagTXT, PRInt32 aTagTXTLen,
  629. const char* tagHTML, const char* attributeHTML,
  630. nsString& aOutString, PRUint32& openTags)
  631. {
  632. /* We're searching for the following pattern:
  633. LT_DELIMITER - "*" - ALPHA -
  634. [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
  635. <strong> is only inserted, if existence of a pair could be verified
  636. We use the first opening/closing tag, if we can choose */
  637. const PRUnichar * newOffset = aInString;
  638. PRInt32 newLength = aInStringLength;
  639. if (!col0) // skip the first element?
  640. {
  641. newOffset = &aInString[1];
  642. newLength = aInStringLength - 1;
  643. }
  644. // opening tag
  645. if
  646. (
  647. ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
  648. (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
  649. && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen,
  650. LT_ALPHA, LT_DELIMITER) // remaining closing tags
  651. > openTags
  652. )
  653. {
  654. openTags++;
  655. aOutString.AppendLiteral("<");
  656. aOutString.AppendASCII(tagHTML);
  657. aOutString.Append(PRUnichar(' '));
  658. aOutString.AppendASCII(attributeHTML);
  659. aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
  660. aOutString.Append(tagTXT);
  661. aOutString.AppendLiteral("</span>");
  662. return PR_TRUE;
  663. }
  664. // closing tag
  665. else if (openTags > 0
  666. && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
  667. {
  668. openTags--;
  669. aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
  670. aOutString.Append(tagTXT);
  671. aOutString.AppendLiteral("</span></");
  672. aOutString.AppendASCII(tagHTML);
  673. aOutString.Append(PRUnichar('>'));
  674. return PR_TRUE;
  675. }
  676. return PR_FALSE;
  677. }
  678. PRBool
  679. mozTXTToHTMLConv::SmilyHit(const PRUnichar * aInString, PRInt32 aLength, PRBool col0,
  680. const char* tagTXT, const char* imageName,
  681. nsString& outputHTML, PRInt32& glyphTextLen)
  682. {
  683. if ( !aInString || !tagTXT || !imageName )
  684. return PR_FALSE;
  685. PRInt32 tagLen = nsCRT::strlen(tagTXT);
  686. PRUint32 delim = (col0 ? 0 : 1) + tagLen;
  687. if
  688. (
  689. (col0 || IsSpace(aInString[0]))
  690. &&
  691. (
  692. aLength <= PRInt32(delim) ||
  693. IsSpace(aInString[delim]) ||
  694. aLength > PRInt32(delim + 1)
  695. &&
  696. (
  697. aInString[delim] == '.' ||
  698. aInString[delim] == ',' ||
  699. aInString[delim] == ';' ||
  700. aInString[delim] == '8' ||
  701. aInString[delim] == '>' ||
  702. aInString[delim] == '!' ||
  703. aInString[delim] == '?'
  704. )
  705. && IsSpace(aInString[delim + 1])
  706. )
  707. && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
  708. col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
  709. // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
  710. )
  711. {
  712. if (!col0)
  713. {
  714. outputHTML.Truncate();
  715. outputHTML.Append(PRUnichar(' '));
  716. }
  717. outputHTML.AppendLiteral("<span class=\""); // <span class="
  718. AppendASCIItoUTF16(imageName, outputHTML); // e.g. smiley-frown
  719. outputHTML.AppendLiteral("\" title=\""); // " title="
  720. AppendASCIItoUTF16(tagTXT, outputHTML); // smiley tooltip
  721. outputHTML.AppendLiteral("\"><span>"); // "><span>
  722. AppendASCIItoUTF16(tagTXT, outputHTML); // original text
  723. outputHTML.AppendLiteral("</span></span>"); // </span></span>
  724. glyphTextLen = (col0 ? 0 : 1) + tagLen;
  725. return PR_TRUE;
  726. }
  727. return PR_FALSE;
  728. }
  729. // the glyph is appended to aOutputString instead of the original string...
  730. PRBool
  731. mozTXTToHTMLConv::GlyphHit(const PRUnichar * aInString, PRInt32 aInLength, PRBool col0,
  732. nsString& aOutputString, PRInt32& glyphTextLen)
  733. {
  734. PRUnichar text0 = aInString[0];
  735. PRUnichar text1 = aInString[1];
  736. PRUnichar firstChar = (col0 ? text0 : text1);
  737. // temporary variable used to store the glyph html text
  738. nsAutoString outputHTML;
  739. PRBool bTestSmilie;
  740. PRBool bArg;
  741. int i;
  742. // refactor some of this mess to avoid code duplication and speed execution a bit
  743. // there are two cases that need to be tried one after another. To avoid a lot of
  744. // duplicate code, rolling into a loop
  745. i = 0;
  746. while ( i < 2 )
  747. {
  748. bTestSmilie = PR_FALSE;
  749. if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
  750. {
  751. // first test passed
  752. bTestSmilie = PR_TRUE;
  753. bArg = col0;
  754. }
  755. if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
  756. {
  757. // second test passed
  758. bTestSmilie = PR_TRUE;
  759. bArg = PR_FALSE;
  760. }
  761. if ( bTestSmilie && (
  762. SmilyHit(aInString, aInLength, bArg,
  763. ":-)",
  764. "moz-smiley-s1", // smile
  765. outputHTML, glyphTextLen) ||
  766. SmilyHit(aInString, aInLength, bArg,
  767. ":)",
  768. "moz-smiley-s1", // smile
  769. outputHTML, glyphTextLen) ||
  770. SmilyHit(aInString, aInLength, bArg,
  771. ":-D",
  772. "moz-smiley-s5", // laughing
  773. outputHTML, glyphTextLen) ||
  774. SmilyHit(aInString, aInLength, bArg,
  775. ":-(",
  776. "moz-smiley-s2", // frown
  777. outputHTML, glyphTextLen) ||
  778. SmilyHit(aInString, aInLength, bArg,
  779. ":(",
  780. "moz-smiley-s2", // frown
  781. outputHTML, glyphTextLen) ||
  782. SmilyHit(aInString, aInLength, bArg,
  783. ":-[",
  784. "moz-smiley-s6", // embarassed
  785. outputHTML, glyphTextLen) ||
  786. SmilyHit(aInString, aInLength, bArg,
  787. ";-)",
  788. "moz-smiley-s3", // wink
  789. outputHTML, glyphTextLen) ||
  790. SmilyHit(aInString, aInLength, col0,
  791. ";)",
  792. "moz-smiley-s3", // wink
  793. outputHTML, glyphTextLen) ||
  794. SmilyHit(aInString, aInLength, bArg,
  795. ":-\\",
  796. "moz-smiley-s7", // undecided
  797. outputHTML, glyphTextLen) ||
  798. SmilyHit(aInString, aInLength, bArg,
  799. ":-P",
  800. "moz-smiley-s4", // tongue
  801. outputHTML, glyphTextLen) ||
  802. SmilyHit(aInString, aInLength, bArg,
  803. ";-P",
  804. "moz-smiley-s4", // tongue
  805. outputHTML, glyphTextLen) ||
  806. SmilyHit(aInString, aInLength, bArg,
  807. "=-O",
  808. "moz-smiley-s8", // surprise
  809. outputHTML, glyphTextLen) ||
  810. SmilyHit(aInString, aInLength, bArg,
  811. ":-*",
  812. "moz-smiley-s9", // kiss
  813. outputHTML, glyphTextLen) ||
  814. SmilyHit(aInString, aInLength, bArg,
  815. ">:o",
  816. "moz-smiley-s10", // yell
  817. outputHTML, glyphTextLen) ||
  818. SmilyHit(aInString, aInLength, bArg,
  819. ">:-o",
  820. "moz-smiley-s10", // yell
  821. outputHTML, glyphTextLen) ||
  822. SmilyHit(aInString, aInLength, bArg,
  823. "8-)",
  824. "moz-smiley-s11", // cool
  825. outputHTML, glyphTextLen) ||
  826. SmilyHit(aInString, aInLength, bArg,
  827. ":-$",
  828. "moz-smiley-s12", // money
  829. outputHTML, glyphTextLen) ||
  830. SmilyHit(aInString, aInLength, bArg,
  831. ":-!",
  832. "moz-smiley-s13", // foot
  833. outputHTML, glyphTextLen) ||
  834. SmilyHit(aInString, aInLength, bArg,
  835. "O:-)",
  836. "moz-smiley-s14", // innocent
  837. outputHTML, glyphTextLen) ||
  838. SmilyHit(aInString, aInLength, bArg,
  839. ":'(",
  840. "moz-smiley-s15", // cry
  841. outputHTML, glyphTextLen) ||
  842. SmilyHit(aInString, aInLength, bArg,
  843. ":-X",
  844. "moz-smiley-s16", // sealed
  845. outputHTML, glyphTextLen)
  846. )
  847. )
  848. {
  849. aOutputString.Append(outputHTML);
  850. return PR_TRUE;
  851. }
  852. i++;
  853. }
  854. if (text0 == '\f')
  855. {
  856. aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
  857. glyphTextLen = 1;
  858. return PR_TRUE;
  859. }
  860. if (text0 == '+' || text1 == '+')
  861. {
  862. if (ItMatchesDelimited(aInString, aInLength,
  863. NS_LITERAL_STRING(" +/-").get(), 4,
  864. LT_IGNORE, LT_IGNORE))
  865. {
  866. aOutputString.AppendLiteral(" &plusmn;");
  867. glyphTextLen = 4;
  868. return PR_TRUE;
  869. }
  870. if (col0 && ItMatchesDelimited(aInString, aInLength,
  871. NS_LITERAL_STRING("+/-").get(), 3,
  872. LT_IGNORE, LT_IGNORE))
  873. {
  874. aOutputString.AppendLiteral("&plusmn;");
  875. glyphTextLen = 3;
  876. return PR_TRUE;
  877. }
  878. }
  879. // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5
  880. // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
  881. if
  882. (
  883. text1 == '^'
  884. &&
  885. (
  886. nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) ||
  887. text0 == ')' || text0 == ']' || text0 == '}'
  888. )
  889. &&
  890. (
  891. 2 < aInLength && nsCRT::IsAsciiDigit(aInString[2]) ||
  892. 3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3])
  893. )
  894. )
  895. {
  896. // Find first non-digit
  897. PRInt32 delimPos = 3; // skip "^" and first digit (or '-')
  898. for (; delimPos < aInLength
  899. &&
  900. (
  901. nsCRT::IsAsciiDigit(aInString[delimPos]) ||
  902. aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
  903. nsCRT::IsAsciiDigit(aInString[delimPos + 1])
  904. );
  905. delimPos++)
  906. ;
  907. if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
  908. {
  909. return PR_FALSE;
  910. }
  911. outputHTML.Truncate();
  912. outputHTML += text0;
  913. outputHTML.AppendLiteral("<sup class=\"moz-txt-sup\">");
  914. aOutputString.Append(outputHTML);
  915. aOutputString.Append(&aInString[2], delimPos - 2);
  916. aOutputString.AppendLiteral("</sup>");
  917. glyphTextLen = delimPos /* - 1 + 1 */ ;
  918. return PR_TRUE;
  919. }
  920. /*
  921. The following strings are not substituted:
  922. |TXT |HTML |Reason
  923. +------+---------+----------
  924. -> &larr; Bug #454
  925. => &lArr; dito
  926. <- &rarr; dito
  927. <= &rArr; dito
  928. (tm) &trade; dito
  929. 1/4 &frac14; is triggered by 1/4 Part 1, 2/4 Part 2, ...
  930. 3/4 &frac34; dito
  931. 1/2 &frac12; similar
  932. */
  933. return PR_FALSE;
  934. }
  935. /***************************************************************************
  936. Library-internal Interface
  937. ****************************************************************************/
  938. mozTXTToHTMLConv::mozTXTToHTMLConv()
  939. {
  940. }
  941. mozTXTToHTMLConv::~mozTXTToHTMLConv()
  942. {
  943. }
  944. NS_IMPL_ISUPPORTS4(mozTXTToHTMLConv,
  945. mozITXTToHTMLConv,
  946. nsIStreamConverter,
  947. nsIStreamListener,
  948. nsIRequestObserver)
  949. PRInt32
  950. mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line,
  951. PRUint32& logLineStart)
  952. {
  953. PRInt32 result = 0;
  954. PRInt32 lineLength = nsCRT::strlen(line);
  955. PRBool moreCites = PR_TRUE;
  956. while (moreCites)
  957. {
  958. /* E.g. the following lines count as quote:
  959. > text
  960. //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
  961. >text
  962. //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
  963. > text
  964. ] text
  965. USER> text
  966. USER] text
  967. //#endif
  968. logLineStart is the position of "t" in this example
  969. */
  970. PRUint32 i = logLineStart;
  971. #ifdef QUOTE_RECOGNITION_AGGRESSIVE
  972. for (; PRInt32(i) < lineLength && IsSpace(line[i]); i++)
  973. ;
  974. for (; PRInt32(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
  975. && nsCRT::IsUpper(line[i]) ; i++)
  976. ;
  977. if (PRInt32(i) < lineLength && (line[i] == '>' || line[i] == ']'))
  978. #else
  979. if (PRInt32(i) < lineLength && line[i] == '>')
  980. #endif
  981. {
  982. i++;
  983. if (PRInt32(i) < lineLength && line[i] == ' ')
  984. i++;
  985. // sendmail/mbox
  986. // Placed here for performance increase
  987. const PRUnichar * indexString = &line[logLineStart];
  988. // here, |logLineStart < lineLength| is always true
  989. PRUint32 minlength = MinInt(6,nsCRT::strlen(indexString));
  990. if (Substring(indexString,
  991. indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
  992. nsCaseInsensitiveStringComparator()))
  993. //XXX RFC2646
  994. moreCites = PR_FALSE;
  995. else
  996. {
  997. result++;
  998. logLineStart = i;
  999. }
  1000. }
  1001. else
  1002. moreCites = PR_FALSE;
  1003. }
  1004. return result;
  1005. }
  1006. void
  1007. mozTXTToHTMLConv::ScanTXT(const PRUnichar * aInString, PRInt32 aInStringLength, PRUint32 whattodo, nsString& aOutString)
  1008. {
  1009. PRBool doURLs = 0 != (whattodo & kURLs);
  1010. PRBool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
  1011. PRBool doStructPhrase = 0 != (whattodo & kStructPhrase);
  1012. PRUint32 structPhrase_strong = 0; // Number of currently open tags
  1013. PRUint32 structPhrase_underline = 0;
  1014. PRUint32 structPhrase_italic = 0;
  1015. PRUint32 structPhrase_code = 0;
  1016. nsAutoString outputHTML; // moved here for performance increase
  1017. for(PRUint32 i = 0; PRInt32(i) < aInStringLength;)
  1018. {
  1019. if (doGlyphSubstitution)
  1020. {
  1021. PRInt32 glyphTextLen;
  1022. if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
  1023. {
  1024. i += glyphTextLen;
  1025. continue;
  1026. }
  1027. }
  1028. if (doStructPhrase)
  1029. {
  1030. const PRUnichar * newOffset = aInString;
  1031. PRInt32 newLength = aInStringLength;
  1032. if (i > 0 ) // skip the first element?
  1033. {
  1034. newOffset = &aInString[i-1];
  1035. newLength = aInStringLength - i + 1;
  1036. }
  1037. switch (aInString[i]) // Performance increase
  1038. {
  1039. case '*':
  1040. if (StructPhraseHit(newOffset, newLength, i == 0,
  1041. NS_LITERAL_STRING("*").get(), 1,
  1042. "b", "class=\"moz-txt-star\"",
  1043. aOutString, structPhrase_strong))
  1044. {
  1045. i++;
  1046. continue;
  1047. }
  1048. break;
  1049. case '/':
  1050. if (StructPhraseHit(newOffset, newLength, i == 0,
  1051. NS_LITERAL_STRING("/").get(), 1,
  1052. "i", "class=\"moz-txt-slash\"",
  1053. aOutString, structPhrase_italic))
  1054. {
  1055. i++;
  1056. continue;
  1057. }
  1058. break;
  1059. case '_':
  1060. if (StructPhraseHit(newOffset, newLength, i == 0,
  1061. NS_LITERAL_STRING("_").get(), 1,
  1062. "span" /* <u> is deprecated */,
  1063. "class=\"moz-txt-underscore\"",
  1064. aOutString, structPhrase_underline))
  1065. {
  1066. i++;
  1067. continue;
  1068. }
  1069. break;
  1070. case '|':
  1071. if (StructPhraseHit(newOffset, newLength, i == 0,
  1072. NS_LITERAL_STRING("|").get(), 1,
  1073. "code", "class=\"moz-txt-verticalline\"",
  1074. aOutString, structPhrase_code))
  1075. {
  1076. i++;
  1077. continue;
  1078. }
  1079. break;
  1080. }
  1081. }
  1082. if (doURLs)
  1083. {
  1084. switch (aInString[i])
  1085. {
  1086. case ':':
  1087. case '@':
  1088. case '.':
  1089. if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
  1090. {
  1091. PRInt32 replaceBefore;
  1092. PRInt32 replaceAfter;
  1093. if (FindURL(aInString, aInStringLength, i, whattodo,
  1094. outputHTML, replaceBefore, replaceAfter)
  1095. && structPhrase_strong + structPhrase_italic +
  1096. structPhrase_underline + structPhrase_code == 0
  1097. /* workaround for bug #19445 */ )
  1098. {
  1099. aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
  1100. aOutString += outputHTML;
  1101. i += replaceAfter + 1;
  1102. continue;
  1103. }
  1104. }
  1105. break;
  1106. } //switch
  1107. }
  1108. switch (aInString[i])
  1109. {
  1110. // Special symbols
  1111. case '<':
  1112. case '>':
  1113. case '&':
  1114. EscapeChar(aInString[i], aOutString, PR_FALSE);
  1115. i++;
  1116. break;
  1117. // Normal characters
  1118. default:
  1119. aOutString += aInString[i];
  1120. i++;
  1121. break;
  1122. }
  1123. }
  1124. }
  1125. void
  1126. mozTXTToHTMLConv::ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString)
  1127. {
  1128. // some common variables we were recalculating
  1129. // every time inside the for loop...
  1130. PRInt32 lengthOfInString = aInString.Length();
  1131. const PRUnichar * uniBuffer = aInString.get();
  1132. #ifdef DEBUG_BenB_Perf
  1133. PRTime parsing_start = PR_IntervalNow();
  1134. #endif
  1135. // Look for simple entities not included in a tags and scan them.
  1136. /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
  1137. or in a tag ("<!--[...]-->").
  1138. Unescape the rest (text between tags) and pass it to ScanTXT. */
  1139. for (PRInt32 i = 0; i < lengthOfInString;)
  1140. {
  1141. if (aInString[i] == '<') // html tag
  1142. {
  1143. PRUint32 start = PRUint32(i);
  1144. if (nsCRT::ToLower((char)aInString[PRUint32(i) + 1]) == 'a')
  1145. // if a tag, skip until </a>
  1146. {
  1147. i = aInString.Find("</a>", PR_TRUE, i);
  1148. if (i == kNotFound)
  1149. i = lengthOfInString;
  1150. else
  1151. i += 4;
  1152. }
  1153. else if (aInString[PRUint32(i) + 1] == '!' && aInString[PRUint32(i) + 2] == '-' &&
  1154. aInString[PRUint32(i) + 3] == '-')
  1155. //if out-commended code, skip until -->
  1156. {
  1157. i = aInString.Find("-->", PR_FALSE, i);
  1158. if (i == kNotFound)
  1159. i = lengthOfInString;
  1160. else
  1161. i += 3;
  1162. }
  1163. else // just skip tag (attributes etc.)
  1164. {
  1165. i = aInString.FindChar('>', i);
  1166. if (i == kNotFound)
  1167. i = lengthOfInString;
  1168. else
  1169. i++;
  1170. }
  1171. aOutString.Append(&uniBuffer[start], PRUint32(i) - start);
  1172. }
  1173. else
  1174. {
  1175. PRUint32 start = PRUint32(i);
  1176. i = aInString.FindChar('<', i);
  1177. if (i == kNotFound)
  1178. i = lengthOfInString;
  1179. nsString tempString;
  1180. tempString.SetCapacity(PRUint32((PRUint32(i) - start) * growthRate));
  1181. UnescapeStr(uniBuffer, start, PRUint32(i) - start, tempString);
  1182. ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
  1183. }
  1184. }
  1185. #ifdef DEBUG_BenB_Perf
  1186. printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
  1187. #endif
  1188. }
  1189. /****************************************************************************
  1190. XPCOM Interface
  1191. *****************************************************************************/
  1192. NS_IMETHODIMP
  1193. mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
  1194. const char *aFromType,
  1195. const char *aToType,
  1196. nsISupports *aCtxt, nsIInputStream **_retval)
  1197. {
  1198. return NS_ERROR_NOT_IMPLEMENTED;
  1199. }
  1200. NS_IMETHODIMP
  1201. mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
  1202. const char *aToType,
  1203. nsIStreamListener *aListener, nsISupports *aCtxt) {
  1204. return NS_ERROR_NOT_IMPLEMENTED;
  1205. }
  1206. NS_IMETHODIMP
  1207. mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
  1208. nsIInputStream *inStr, PRUint32 sourceOffset,
  1209. PRUint32 count)
  1210. {
  1211. return NS_ERROR_NOT_IMPLEMENTED;
  1212. }
  1213. NS_IMETHODIMP
  1214. mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
  1215. {
  1216. return NS_ERROR_NOT_IMPLEMENTED;
  1217. }
  1218. NS_IMETHODIMP
  1219. mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
  1220. nsresult aStatus)
  1221. {
  1222. return NS_ERROR_NOT_IMPLEMENTED;
  1223. }
  1224. NS_IMETHODIMP
  1225. mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line, PRUint32 *logLineStart,
  1226. PRUint32 *_retval)
  1227. {
  1228. if (!logLineStart || !_retval || !line)
  1229. return NS_ERROR_NULL_POINTER;
  1230. *_retval = CiteLevelTXT(line, *logLineStart);
  1231. return NS_OK;
  1232. }
  1233. NS_IMETHODIMP
  1234. mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo,
  1235. PRUnichar **_retval)
  1236. {
  1237. NS_ENSURE_ARG(text);
  1238. // FIX ME!!!
  1239. nsString outString;
  1240. PRInt32 inLength = nsCRT::strlen(text);
  1241. // by setting a large capacity up front, we save time
  1242. // when appending characters to the output string because we don't
  1243. // need to reallocate and re-copy the characters already in the out String.
  1244. NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
  1245. if (inLength == 0) {
  1246. *_retval = nsCRT::strdup(text);
  1247. return NS_OK;
  1248. }
  1249. outString.SetCapacity(PRUint32(inLength * growthRate));
  1250. ScanTXT(text, inLength, whattodo, outString);
  1251. *_retval = ToNewUnicode(outString);
  1252. return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
  1253. }
  1254. NS_IMETHODIMP
  1255. mozTXTToHTMLConv::ScanHTML(const PRUnichar *text, PRUint32 whattodo,
  1256. PRUnichar **_retval)
  1257. {
  1258. NS_ENSURE_ARG(text);
  1259. // FIX ME!!!
  1260. nsString outString;
  1261. nsString inString (text); // look at this nasty extra copy of the entire input buffer!
  1262. outString.SetCapacity(PRUint32(inString.Length() * growthRate));
  1263. ScanHTML(inString, whattodo, outString);
  1264. *_retval = ToNewUnicode(outString);
  1265. return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
  1266. }
  1267. nsresult
  1268. MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
  1269. {
  1270. NS_PRECONDITION(aConv != nsnull, "null ptr");
  1271. if (!aConv)
  1272. return NS_ERROR_NULL_POINTER;
  1273. *aConv = new mozTXTToHTMLConv();
  1274. if (!*aConv)
  1275. return NS_ERROR_OUT_OF_MEMORY;
  1276. NS_ADDREF(*aConv);
  1277. // return (*aConv)->Init();
  1278. return NS_OK;
  1279. }