/alcinoe/source/ALHTML.pas

https://bitbucket.org/sas_team/sas.requires · Pascal · 1374 lines · 1111 code · 120 blank · 143 comment · 148 complexity · b6ea3cfaef7d7505bfb1bd8689a8e5c6 MD5 · raw file

Large files are truncated click here to view the full file

  1. {*************************************************************
  2. www: http://sourceforge.net/projects/alcinoe/
  3. svn: svn checkout svn://svn.code.sf.net/p/alcinoe/code/ alcinoe-code
  4. Author(s): Stéphane Vander Clock (alcinoe@arkadia.com)
  5. Sponsor(s): Arkadia SA (http://www.arkadia.com)
  6. product: Alcinoe HTML Functions
  7. Version: 4.00
  8. Description: Functions to work on Html Tag (extract Text, HTML Encode,
  9. HTML Decode, etc. The function ALHTMLdecode and
  10. ALHTMLEncode is to encode decode HTML entity
  11. like  
  12. Legal issues: Copyright (C) 1999-2013 by Arkadia Software Engineering
  13. This software is provided 'as-is', without any express
  14. or implied warranty. In no event will the author be
  15. held liable for any damages arising from the use of
  16. this software.
  17. Permission is granted to anyone to use this software
  18. for any purpose, including commercial applications,
  19. and to alter it and redistribute it freely, subject
  20. to the following restrictions:
  21. 1. The origin of this software must not be
  22. misrepresented, you must not claim that you wrote
  23. the original software. If you use this software in
  24. a product, an acknowledgment in the product
  25. documentation would be appreciated but is not
  26. required.
  27. 2. Altered source versions must be plainly marked as
  28. such, and must not be misrepresented as being the
  29. original software.
  30. 3. This notice may not be removed or altered from any
  31. source distribution.
  32. 4. You must register this software by sending a picture
  33. postcard to the author. Use a nice stamp and mention
  34. your name, street address, EMail address and any
  35. comment you like to say.
  36. Know bug :
  37. History : 19/10/2005: Make The code independant of the current local
  38. and work with UTF-8 encoding; Also build a most
  39. complete list of HTML entities
  40. 10/09/2007: create ALCompactHtmlTagParams in ALFcnHTML
  41. 10/11/2007: move ALXMLTextElementDecode to ALUTF8XMLTextElementDecode and
  42. to ALUTF8XMLTextElementDecode
  43. add support for reference like € and '
  44. 26/06/2012: Add xe2 support
  45. 28/01/2013: Add xe2 ALJavascriptEncode / ALUTF8JavascriptDecode
  46. 05/09/2014: Add ALRunJavascript function
  47. Link :
  48. * Please send all your feedback to alcinoe@arkadia.com
  49. * If you have downloaded this source from a website different from
  50. sourceforge.net, please get the last version on http://sourceforge.net/projects/alcinoe/
  51. * Please, help us to keep the development of these components free by
  52. promoting the sponsor on http://static.arkadia.com/html/alcinoe_like.html
  53. **************************************************************}
  54. unit ALHTML;
  55. interface
  56. {$IF CompilerVersion >= 25} {Delphi XE4}
  57. {$LEGACYIFEND ON} // http://docwiki.embarcadero.com/RADStudio/XE4/en/Legacy_IFEND_(Delphi)
  58. {$IFEND}
  59. uses AlStringList;
  60. procedure ALUTF8ExtractHTMLText(HtmlContent: AnsiString;
  61. LstExtractedResourceText: TALStrings;
  62. Const DecodeHTMLText: Boolean = True); overload;
  63. function ALUTF8ExtractHTMLText(const HtmlContent: AnsiString;
  64. Const DecodeHTMLText: Boolean = True): AnsiString; overload;
  65. function ALXMLCDataElementEncode(const Src: AnsiString): AnsiString;
  66. function ALXMLTextElementEncode(const Src: AnsiString; const useNumericReference: boolean = True): AnsiString;
  67. function ALUTF8XMLTextElementDecode(const Src: AnsiString): AnsiString;
  68. function ALUTF8HTMLEncode(const Src: AnsiString;
  69. const EncodeASCIIHtmlEntities: Boolean = True;
  70. const useNumericReference: boolean = True): AnsiString;
  71. function ALUTF8HTMLDecode(const Src: AnsiString): AnsiString;
  72. function ALJavascriptEncode(const Src: AnsiString; const useNumericReference: boolean = true): AnsiString;
  73. function ALUTF8JavascriptDecode(const Src: AnsiString): AnsiString;
  74. function ALRunJavascript(const aCode: AnsiString): AnsiString;
  75. procedure ALHideHtmlUnwantedTagForHTMLHandleTagfunct(Var HtmlContent: AnsiString;
  76. Const DeleteBodyOfUnwantedTag: Boolean = False;
  77. const ReplaceUnwantedTagCharBy: AnsiChar = #1);
  78. procedure ALCompactHtmlTagParams(TagParams: TALStrings);
  79. implementation
  80. uses {$IF CompilerVersion >= 23} {Delphi XE2}
  81. System.Math,
  82. System.Classes,
  83. System.sysutils,
  84. System.Win.Comobj,
  85. Winapi.Ole2,
  86. {$ELSE}
  87. Math,
  88. Classes,
  89. sysutils,
  90. Comobj,
  91. Ole2,
  92. {$IFEND}
  93. ALString,
  94. ALQuickSortList;
  95. Var vALhtml_LstEntities: TALStrings;
  96. {************************************************************}
  97. procedure ALInitHtmlEntitiesLst(aLstHtmlEntities: TALStrings);
  98. Begin
  99. aLstHtmlEntities.Clear;
  100. aLstHtmlEntities.AddObject('zwnj',pointer(8204)); // zero width non-joiner, U+200C NEW RFC 2070 -->
  101. aLstHtmlEntities.AddObject('zwj',pointer(8205)); // zero width joiner, U+200D NEW RFC 2070 -->
  102. aLstHtmlEntities.AddObject('zeta',pointer(950)); // greek small letter zeta, U+03B6 ISOgrk3 -->
  103. aLstHtmlEntities.AddObject('Zeta',pointer(918)); // greek capital letter zeta, U+0396 -->
  104. aLstHtmlEntities.AddObject('yuml',pointer(255)); // latin small letter y with diaeresis, U+00FF ISOlat1 -->
  105. aLstHtmlEntities.AddObject('Yuml',pointer(376)); // latin capital letter Y with diaeresis, U+0178 ISOlat2 -->
  106. aLstHtmlEntities.AddObject('yen',pointer(165)); // yen sign = yuan sign, U+00A5 ISOnum -->
  107. aLstHtmlEntities.AddObject('yacute',pointer(253)); // latin small letter y with acute, U+00FD ISOlat1 -->
  108. aLstHtmlEntities.AddObject('Yacute',pointer(221)); // latin capital letter Y with acute, U+00DD ISOlat1 -->
  109. aLstHtmlEntities.AddObject('xi',pointer(958)); // greek small letter xi, U+03BE ISOgrk3 -->
  110. aLstHtmlEntities.AddObject('Xi',pointer(926)); // greek capital letter xi, U+039E ISOgrk3 -->
  111. aLstHtmlEntities.AddObject('weierp',pointer(8472)); // script capital P = power set = Weierstrass p, U+2118 ISOamso -->
  112. aLstHtmlEntities.AddObject('uuml',pointer(252)); // latin small letter u with diaeresis, U+00FC ISOlat1 -->
  113. aLstHtmlEntities.AddObject('Uuml',pointer(220)); // latin capital letter U with diaeresis, U+00DC ISOlat1 -->
  114. aLstHtmlEntities.AddObject('upsilon',pointer(965)); // greek small letter upsilon, U+03C5 ISOgrk3 -->
  115. aLstHtmlEntities.AddObject('Upsilon',pointer(933)); // greek capital letter upsilon, U+03A5 ISOgrk3 -->
  116. aLstHtmlEntities.AddObject('upsih',pointer(978)); // greek upsilon with hook symbol, U+03D2 NEW -->
  117. aLstHtmlEntities.AddObject('uml',pointer(168)); // diaeresis = spacing diaeresis, U+00A8 ISOdia -->
  118. aLstHtmlEntities.AddObject('ugrave',pointer(249)); // latin small letter u with grave, U+00F9 ISOlat1 -->
  119. aLstHtmlEntities.AddObject('Ugrave',pointer(217)); // latin capital letter U with grave, U+00D9 ISOlat1 -->
  120. aLstHtmlEntities.AddObject('ucirc',pointer(251)); // latin small letter u with circumflex, U+00FB ISOlat1 -->
  121. aLstHtmlEntities.AddObject('Ucirc',pointer(219)); // latin capital letter U with circumflex, U+00DB ISOlat1 -->
  122. aLstHtmlEntities.AddObject('uArr',pointer(8657)); // upwards double arrow, U+21D1 ISOamsa -->
  123. aLstHtmlEntities.AddObject('uarr',pointer(8593)); // upwards arrow, U+2191 ISOnum-->
  124. aLstHtmlEntities.AddObject('uacute',pointer(250)); // latin small letter u with acute, U+00FA ISOlat1 -->
  125. aLstHtmlEntities.AddObject('Uacute',pointer(218)); // latin capital letter U with acute, U+00DA ISOlat1 -->
  126. aLstHtmlEntities.AddObject('trade',pointer(8482)); // trade mark sign, U+2122 ISOnum -->
  127. aLstHtmlEntities.AddObject('times',pointer(215)); // multiplication sign, U+00D7 ISOnum -->
  128. aLstHtmlEntities.AddObject('tilde',pointer(732)); // small tilde, U+02DC ISOdia -->
  129. aLstHtmlEntities.AddObject('thorn',pointer(254)); // latin small letter thorn, U+00FE ISOlat1 -->
  130. aLstHtmlEntities.AddObject('THORN',pointer(222)); // latin capital letter THORN, U+00DE ISOlat1 -->
  131. aLstHtmlEntities.AddObject('thinsp',pointer(8201)); // thin space, U+2009 ISOpub -->
  132. aLstHtmlEntities.AddObject('thetasym',pointer(977)); // greek small letter theta symbol, U+03D1 NEW -->
  133. aLstHtmlEntities.AddObject('theta',pointer(952)); // greek small letter theta, U+03B8 ISOgrk3 -->
  134. aLstHtmlEntities.AddObject('Theta',pointer(920)); // greek capital letter theta, U+0398 ISOgrk3 -->
  135. aLstHtmlEntities.AddObject('there4',pointer(8756)); // therefore, U+2234 ISOtech -->
  136. aLstHtmlEntities.AddObject('tau',pointer(964)); // greek small letter tau, U+03C4 ISOgrk3 -->
  137. aLstHtmlEntities.AddObject('Tau',pointer(932)); // greek capital letter tau, U+03A4 -->
  138. aLstHtmlEntities.AddObject('szlig',pointer(223)); // latin small letter sharp s = ess-zed, U+00DF ISOlat1 -->
  139. aLstHtmlEntities.AddObject('supe',pointer(8839)); // superset of or equal to, U+2287 ISOtech -->
  140. aLstHtmlEntities.AddObject('sup3',pointer(179)); // superscript three = superscript digit three = cubed, U+00B3 ISOnum -->
  141. aLstHtmlEntities.AddObject('sup2',pointer(178)); // superscript two = superscript digit two = squared, U+00B2 ISOnum -->
  142. aLstHtmlEntities.AddObject('sup1',pointer(185)); // superscript one = superscript digit one, U+00B9 ISOnum -->
  143. aLstHtmlEntities.AddObject('sup',pointer(8835)); // superset of, U+2283 ISOtech -->
  144. aLstHtmlEntities.AddObject('sum',pointer(8721)); // n-ary sumation, U+2211 ISOamsb -->
  145. aLstHtmlEntities.AddObject('sube',pointer(8838)); // subset of or equal to, U+2286 ISOtech -->
  146. aLstHtmlEntities.AddObject('sub',pointer(8834)); // subset of, U+2282 ISOtech -->
  147. aLstHtmlEntities.AddObject('spades',pointer(9824)); // black spade suit, U+2660 ISOpub -->
  148. aLstHtmlEntities.AddObject('sim',pointer(8764)); // tilde operator = varies with = similar to, U+223C ISOtech -->
  149. aLstHtmlEntities.AddObject('sigmaf',pointer(962)); // greek small letter final sigma, U+03C2 ISOgrk3 -->
  150. aLstHtmlEntities.AddObject('sigma',pointer(963)); // greek small letter sigma, U+03C3 ISOgrk3 -->
  151. aLstHtmlEntities.AddObject('Sigma',pointer(931)); // greek capital letter sigma, U+03A3 ISOgrk3 -->
  152. aLstHtmlEntities.AddObject('shy',pointer(173)); // soft hyphen = discretionary hyphen, U+00AD ISOnum -->
  153. aLstHtmlEntities.AddObject('sect',pointer(167)); // section sign, U+00A7 ISOnum -->
  154. aLstHtmlEntities.AddObject('sdot',pointer(8901)); // dot operator, U+22C5 ISOamsb -->
  155. aLstHtmlEntities.AddObject('scaron',pointer(353)); // latin small letter s with caron, U+0161 ISOlat2 -->
  156. aLstHtmlEntities.AddObject('Scaron',pointer(352)); // latin capital letter S with caron, U+0160 ISOlat2 -->
  157. aLstHtmlEntities.AddObject('sbquo',pointer(8218)); // single low-9 quotation mark, U+201A NEW -->
  158. aLstHtmlEntities.AddObject('rsquo',pointer(8217)); // right single quotation mark, U+2019 ISOnum -->
  159. aLstHtmlEntities.AddObject('rsaquo',pointer(8250)); // single right-pointing angle quotation mark, U+203A ISO proposed -->
  160. aLstHtmlEntities.AddObject('rlm',pointer(8207)); // right-to-left mark, U+200F NEW RFC 2070 -->
  161. aLstHtmlEntities.AddObject('rho',pointer(961)); // greek small letter rho, U+03C1 ISOgrk3 -->
  162. aLstHtmlEntities.AddObject('Rho',pointer(929)); // greek capital letter rho, U+03A1 -->
  163. aLstHtmlEntities.AddObject('rfloor',pointer(8971)); // right floor, U+230B ISOamsc -->
  164. aLstHtmlEntities.AddObject('reg',pointer(174)); // registered sign = registered trade mark sign, U+00AE ISOnum -->
  165. aLstHtmlEntities.AddObject('real',pointer(8476)); // blackletter capital R = real part symbol, U+211C ISOamso -->
  166. aLstHtmlEntities.AddObject('rdquo',pointer(8221)); // right double quotation mark, U+201D ISOnum -->
  167. aLstHtmlEntities.AddObject('rceil',pointer(8969)); // right ceiling, U+2309 ISOamsc -->
  168. aLstHtmlEntities.AddObject('rArr',pointer(8658)); // rightwards double arrow, U+21D2 ISOtech -->
  169. aLstHtmlEntities.AddObject('rarr',pointer(8594)); // rightwards arrow, U+2192 ISOnum -->
  170. aLstHtmlEntities.AddObject('raquo',pointer(187)); // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum -->
  171. aLstHtmlEntities.AddObject('rang',pointer(9002)); // right-pointing angle bracket = ket, U+232A ISOtech -->
  172. aLstHtmlEntities.AddObject('radic',pointer(8730)); // square root = radical sign, U+221A ISOtech -->
  173. aLstHtmlEntities.AddObject('quot',pointer(34)); // quotation mark = APL quote, U+0022 ISOnum -->
  174. aLstHtmlEntities.AddObject('psi',pointer(968)); // greek small letter psi, U+03C8 ISOgrk3 -->
  175. aLstHtmlEntities.AddObject('Psi',pointer(936)); // greek capital letter psi, U+03A8 ISOgrk3 -->
  176. aLstHtmlEntities.AddObject('prop',pointer(8733)); // proportional to, U+221D ISOtech -->
  177. aLstHtmlEntities.AddObject('prod',pointer(8719)); // n-ary product = product sign, U+220F ISOamsb -->
  178. aLstHtmlEntities.AddObject('Prime',pointer(8243)); // double prime = seconds = inches, U+2033 ISOtech -->
  179. aLstHtmlEntities.AddObject('prime',pointer(8242)); // prime = minutes = feet, U+2032 ISOtech -->
  180. aLstHtmlEntities.AddObject('pound',pointer(163)); // pound sign, U+00A3 ISOnum -->
  181. aLstHtmlEntities.AddObject('plusmn',pointer(177)); // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum -->
  182. aLstHtmlEntities.AddObject('piv',pointer(982)); // greek pi symbol, U+03D6 ISOgrk3 -->
  183. aLstHtmlEntities.AddObject('pi',pointer(960)); // greek small letter pi, U+03C0 ISOgrk3 -->
  184. aLstHtmlEntities.AddObject('Pi',pointer(928)); // greek capital letter pi, U+03A0 ISOgrk3 -->
  185. aLstHtmlEntities.AddObject('phi',pointer(966)); // greek small letter phi, U+03C6 ISOgrk3 -->
  186. aLstHtmlEntities.AddObject('Phi',pointer(934)); // greek capital letter phi, U+03A6 ISOgrk3 -->
  187. aLstHtmlEntities.AddObject('perp',pointer(8869)); // up tack = orthogonal to = perpendicular, U+22A5 ISOtech -->
  188. aLstHtmlEntities.AddObject('permil',pointer(8240)); // per mille sign, U+2030 ISOtech -->
  189. aLstHtmlEntities.AddObject('part',pointer(8706)); // partial differential, U+2202 ISOtech -->
  190. aLstHtmlEntities.AddObject('para',pointer(182)); // pilcrow sign = paragraph sign, U+00B6 ISOnum -->
  191. aLstHtmlEntities.AddObject('ouml',pointer(246)); // latin small letter o with diaeresis, U+00F6 ISOlat1 -->
  192. aLstHtmlEntities.AddObject('Ouml',pointer(214)); // latin capital letter O with diaeresis, U+00D6 ISOlat1 -->
  193. aLstHtmlEntities.AddObject('otimes',pointer(8855)); // circled times = vector product, U+2297 ISOamsb -->
  194. aLstHtmlEntities.AddObject('otilde',pointer(245)); // latin small letter o with tilde, U+00F5 ISOlat1 -->
  195. aLstHtmlEntities.AddObject('Otilde',pointer(213)); // latin capital letter O with tilde, U+00D5 ISOlat1 -->
  196. aLstHtmlEntities.AddObject('oslash',pointer(248)); // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 -->
  197. aLstHtmlEntities.AddObject('Oslash',pointer(216)); // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 -->
  198. aLstHtmlEntities.AddObject('ordm',pointer(186)); // masculine ordinal indicator, U+00BA ISOnum -->
  199. aLstHtmlEntities.AddObject('ordf',pointer(170)); // feminine ordinal indicator, U+00AA ISOnum -->
  200. aLstHtmlEntities.AddObject('or',pointer(8744)); // logical or = vee, U+2228 ISOtech -->
  201. aLstHtmlEntities.AddObject('oplus',pointer(8853)); // circled plus = direct sum, U+2295 ISOamsb -->
  202. aLstHtmlEntities.AddObject('omicron',pointer(959)); // greek small letter omicron, U+03BF NEW -->
  203. aLstHtmlEntities.AddObject('Omicron',pointer(927)); // greek capital letter omicron, U+039F -->
  204. aLstHtmlEntities.AddObject('omega',pointer(969)); // greek small letter omega, U+03C9 ISOgrk3 -->
  205. aLstHtmlEntities.AddObject('Omega',pointer(937)); // greek capital letter omega, U+03A9 ISOgrk3 -->
  206. aLstHtmlEntities.AddObject('oline',pointer(8254)); // overline = spacing overscore, U+203E NEW -->
  207. aLstHtmlEntities.AddObject('ograve',pointer(242)); // latin small letter o with grave, U+00F2 ISOlat1 -->
  208. aLstHtmlEntities.AddObject('Ograve',pointer(210)); // latin capital letter O with grave, U+00D2 ISOlat1 -->
  209. aLstHtmlEntities.AddObject('oelig',pointer(339)); // latin small ligature oe, U+0153 ISOlat2 -->
  210. aLstHtmlEntities.AddObject('OElig',pointer(338)); // latin capital ligature OE, U+0152 ISOlat2 -->
  211. aLstHtmlEntities.AddObject('ocirc',pointer(244)); // latin small letter o with circumflex, U+00F4 ISOlat1 -->
  212. aLstHtmlEntities.AddObject('Ocirc',pointer(212)); // latin capital letter O with circumflex, U+00D4 ISOlat1 -->
  213. aLstHtmlEntities.AddObject('oacute',pointer(243)); // latin small letter o with acute, U+00F3 ISOlat1 -->
  214. aLstHtmlEntities.AddObject('Oacute',pointer(211)); // latin capital letter O with acute, U+00D3 ISOlat1 -->
  215. aLstHtmlEntities.AddObject('nu',pointer(957)); // greek small letter nu, U+03BD ISOgrk3 -->
  216. aLstHtmlEntities.AddObject('Nu',pointer(925)); // greek capital letter nu, U+039D -->
  217. aLstHtmlEntities.AddObject('ntilde',pointer(241)); // latin small letter n with tilde, U+00F1 ISOlat1 -->
  218. aLstHtmlEntities.AddObject('Ntilde',pointer(209)); // latin capital letter N with tilde, U+00D1 ISOlat1 -->
  219. aLstHtmlEntities.AddObject('nsub',pointer(8836)); // not a subset of, U+2284 ISOamsn -->
  220. aLstHtmlEntities.AddObject('notin',pointer(8713)); // not an element of, U+2209 ISOtech -->
  221. aLstHtmlEntities.AddObject('not',pointer(172)); // not sign, U+00AC ISOnum -->
  222. aLstHtmlEntities.AddObject('ni',pointer(8715)); // contains as member, U+220B ISOtech -->
  223. aLstHtmlEntities.AddObject('ne',pointer(8800)); // not equal to, U+2260 ISOtech -->
  224. aLstHtmlEntities.AddObject('ndash',pointer(8211)); // en dash, U+2013 ISOpub -->
  225. aLstHtmlEntities.AddObject('nbsp',pointer(160)); // no-break space = non-breaking space, U+00A0 ISOnum -->
  226. aLstHtmlEntities.AddObject('nabla',pointer(8711)); // nabla = backward difference, U+2207 ISOtech -->
  227. aLstHtmlEntities.AddObject('mu',pointer(956)); // greek small letter mu, U+03BC ISOgrk3 -->
  228. aLstHtmlEntities.AddObject('Mu',pointer(924)); // greek capital letter mu, U+039C -->
  229. aLstHtmlEntities.AddObject('minus',pointer(8722)); // minus sign, U+2212 ISOtech -->
  230. aLstHtmlEntities.AddObject('middot',pointer(183)); // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum -->
  231. aLstHtmlEntities.AddObject('micro',pointer(181)); // micro sign, U+00B5 ISOnum -->
  232. aLstHtmlEntities.AddObject('mdash',pointer(8212)); // em dash, U+2014 ISOpub -->
  233. aLstHtmlEntities.AddObject('macr',pointer(175)); // macron = spacing macron = overline = APL overbar, U+00AF ISOdia -->
  234. aLstHtmlEntities.AddObject('lt',pointer(60)); // less-than sign, U+003C ISOnum -->
  235. aLstHtmlEntities.AddObject('lsquo',pointer(8216)); // left single quotation mark, U+2018 ISOnum -->
  236. aLstHtmlEntities.AddObject('lsaquo',pointer(8249)); // single left-pointing angle quotation mark, U+2039 ISO proposed -->
  237. aLstHtmlEntities.AddObject('lrm',pointer(8206)); // left-to-right mark, U+200E NEW RFC 2070 -->
  238. aLstHtmlEntities.AddObject('loz',pointer(9674)); // lozenge, U+25CA ISOpub -->
  239. aLstHtmlEntities.AddObject('lowast',pointer(8727)); // asterisk operator, U+2217 ISOtech -->
  240. aLstHtmlEntities.AddObject('lfloor',pointer(8970)); // left floor = apl downstile, U+230A ISOamsc -->
  241. aLstHtmlEntities.AddObject('le',pointer(8804)); // less-than or equal to, U+2264 ISOtech -->
  242. aLstHtmlEntities.AddObject('ldquo',pointer(8220)); // left double quotation mark, U+201C ISOnum -->
  243. aLstHtmlEntities.AddObject('lceil',pointer(8968)); // left ceiling = apl upstile, U+2308 ISOamsc -->
  244. aLstHtmlEntities.AddObject('lArr',pointer(8656)); // leftwards double arrow, U+21D0 ISOtech -->
  245. aLstHtmlEntities.AddObject('larr',pointer(8592)); // leftwards arrow, U+2190 ISOnum -->
  246. aLstHtmlEntities.AddObject('laquo',pointer(171)); // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum -->
  247. aLstHtmlEntities.AddObject('lang',pointer(9001)); // left-pointing angle bracket = bra, U+2329 ISOtech -->
  248. aLstHtmlEntities.AddObject('lambda',pointer(955)); // greek small letter lambda, U+03BB ISOgrk3 -->
  249. aLstHtmlEntities.AddObject('Lambda',pointer(923)); // greek capital letter lambda, U+039B ISOgrk3 -->
  250. aLstHtmlEntities.AddObject('kappa',pointer(954)); // greek small letter kappa, U+03BA ISOgrk3 -->
  251. aLstHtmlEntities.AddObject('Kappa',pointer(922)); // greek capital letter kappa, U+039A -->
  252. aLstHtmlEntities.AddObject('iuml',pointer(239)); // latin small letter i with diaeresis, U+00EF ISOlat1 -->
  253. aLstHtmlEntities.AddObject('Iuml',pointer(207)); // latin capital letter I with diaeresis, U+00CF ISOlat1 -->
  254. aLstHtmlEntities.AddObject('isin',pointer(8712)); // element of, U+2208 ISOtech -->
  255. aLstHtmlEntities.AddObject('iquest',pointer(191)); // inverted question mark = turned question mark, U+00BF ISOnum -->
  256. aLstHtmlEntities.AddObject('iota',pointer(953)); // greek small letter iota, U+03B9 ISOgrk3 -->
  257. aLstHtmlEntities.AddObject('Iota',pointer(921)); // greek capital letter iota, U+0399 -->
  258. aLstHtmlEntities.AddObject('int',pointer(8747)); // integral, U+222B ISOtech -->
  259. aLstHtmlEntities.AddObject('infin',pointer(8734)); // infinity, U+221E ISOtech -->
  260. aLstHtmlEntities.AddObject('image',pointer(8465)); // blackletter capital I = imaginary part, U+2111 ISOamso -->
  261. aLstHtmlEntities.AddObject('igrave',pointer(236)); // latin small letter i with grave, U+00EC ISOlat1 -->
  262. aLstHtmlEntities.AddObject('Igrave',pointer(204)); // latin capital letter I with grave, U+00CC ISOlat1 -->
  263. aLstHtmlEntities.AddObject('iexcl',pointer(161)); // inverted exclamation mark, U+00A1 ISOnum -->
  264. aLstHtmlEntities.AddObject('icirc',pointer(238)); // latin small letter i with circumflex, U+00EE ISOlat1 -->
  265. aLstHtmlEntities.AddObject('Icirc',pointer(206)); // latin capital letter I with circumflex, U+00CE ISOlat1 -->
  266. aLstHtmlEntities.AddObject('iacute',pointer(237)); // latin small letter i with acute, U+00ED ISOlat1 -->
  267. aLstHtmlEntities.AddObject('Iacute',pointer(205)); // latin capital letter I with acute, U+00CD ISOlat1 -->
  268. aLstHtmlEntities.AddObject('hellip',pointer(8230)); // horizontal ellipsis = three dot leader, U+2026 ISOpub -->
  269. aLstHtmlEntities.AddObject('hearts',pointer(9829)); // black heart suit = valentine, U+2665 ISOpub -->
  270. aLstHtmlEntities.AddObject('hArr',pointer(8660)); // left right double arrow, U+21D4 ISOamsa -->
  271. aLstHtmlEntities.AddObject('harr',pointer(8596)); // left right arrow, U+2194 ISOamsa -->
  272. aLstHtmlEntities.AddObject('gt',pointer(62)); // greater-than sign, U+003E ISOnum -->
  273. aLstHtmlEntities.AddObject('ge',pointer(8805)); // greater-than or equal to, U+2265 ISOtech -->
  274. aLstHtmlEntities.AddObject('gamma',pointer(947)); // greek small letter gamma, U+03B3 ISOgrk3 -->
  275. aLstHtmlEntities.AddObject('Gamma',pointer(915)); // greek capital letter gamma, U+0393 ISOgrk3 -->
  276. aLstHtmlEntities.AddObject('frasl',pointer(8260)); // fraction slash, U+2044 NEW -->
  277. aLstHtmlEntities.AddObject('frac34',pointer(190)); // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum -->
  278. aLstHtmlEntities.AddObject('frac14',pointer(188)); // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum -->
  279. aLstHtmlEntities.AddObject('frac12',pointer(189)); // vulgar fraction one half = fraction one half, U+00BD ISOnum -->
  280. aLstHtmlEntities.AddObject('forall',pointer(8704)); // for all, U+2200 ISOtech -->
  281. aLstHtmlEntities.AddObject('fnof',pointer(402)); // latin small f with hook = function = florin, U+0192 ISOtech -->
  282. aLstHtmlEntities.AddObject('exist',pointer(8707)); // there exists, U+2203 ISOtech -->
  283. aLstHtmlEntities.AddObject('euro',pointer(8364)); // euro sign, U+20AC NEW -->
  284. aLstHtmlEntities.AddObject('euml',pointer(235)); // latin small letter e with diaeresis, U+00EB ISOlat1 -->
  285. aLstHtmlEntities.AddObject('Euml',pointer(203)); // latin capital letter E with diaeresis, U+00CB ISOlat1 -->
  286. aLstHtmlEntities.AddObject('eth',pointer(240)); // latin small letter eth, U+00F0 ISOlat1 -->
  287. aLstHtmlEntities.AddObject('ETH',pointer(208)); // latin capital letter ETH, U+00D0 ISOlat1 -->
  288. aLstHtmlEntities.AddObject('eta',pointer(951)); // greek small letter eta, U+03B7 ISOgrk3 -->
  289. aLstHtmlEntities.AddObject('Eta',pointer(919)); // greek capital letter eta, U+0397 -->
  290. aLstHtmlEntities.AddObject('equiv',pointer(8801)); // identical to, U+2261 ISOtech -->
  291. aLstHtmlEntities.AddObject('epsilon',pointer(949)); // greek small letter epsilon, U+03B5 ISOgrk3 -->
  292. aLstHtmlEntities.AddObject('Epsilon',pointer(917)); // greek capital letter epsilon, U+0395 -->
  293. aLstHtmlEntities.AddObject('ensp',pointer(8194)); // en space, U+2002 ISOpub -->
  294. aLstHtmlEntities.AddObject('emsp',pointer(8195)); // em space, U+2003 ISOpub -->
  295. aLstHtmlEntities.AddObject('empty',pointer(8709)); // empty set = null set = diameter, U+2205 ISOamso -->
  296. aLstHtmlEntities.AddObject('egrave',pointer(232)); // latin small letter e with grave, U+00E8 ISOlat1 -->
  297. aLstHtmlEntities.AddObject('Egrave',pointer(200)); // latin capital letter E with grave, U+00C8 ISOlat1 -->
  298. aLstHtmlEntities.AddObject('ecirc',pointer(234)); // latin small letter e with circumflex, U+00EA ISOlat1 -->
  299. aLstHtmlEntities.AddObject('Ecirc',pointer(202)); // latin capital letter E with circumflex, U+00CA ISOlat1 -->
  300. aLstHtmlEntities.AddObject('eacute',pointer(233)); // latin small letter e with acute, U+00E9 ISOlat1 -->
  301. aLstHtmlEntities.AddObject('Eacute',pointer(201)); // latin capital letter E with acute, U+00C9 ISOlat1 -->
  302. aLstHtmlEntities.AddObject('divide',pointer(247)); // division sign, U+00F7 ISOnum -->
  303. aLstHtmlEntities.AddObject('diams',pointer(9830)); // black diamond suit, U+2666 ISOpub -->
  304. aLstHtmlEntities.AddObject('delta',pointer(948)); // greek small letter delta, U+03B4 ISOgrk3 -->
  305. aLstHtmlEntities.AddObject('Delta',pointer(916)); // greek capital letter delta, U+0394 ISOgrk3 -->
  306. aLstHtmlEntities.AddObject('deg',pointer(176)); // degree sign, U+00B0 ISOnum -->
  307. aLstHtmlEntities.AddObject('dArr',pointer(8659)); // downwards double arrow, U+21D3 ISOamsa -->
  308. aLstHtmlEntities.AddObject('darr',pointer(8595)); // downwards arrow, U+2193 ISOnum -->
  309. aLstHtmlEntities.AddObject('Dagger',pointer(8225)); // double dagger, U+2021 ISOpub -->
  310. aLstHtmlEntities.AddObject('dagger',pointer(8224)); // dagger, U+2020 ISOpub -->
  311. aLstHtmlEntities.AddObject('curren',pointer(164)); // currency sign, U+00A4 ISOnum -->
  312. aLstHtmlEntities.AddObject('cup',pointer(8746)); // union = cup, U+222A ISOtech -->
  313. aLstHtmlEntities.AddObject('crarr',pointer(8629)); // downwards arrow with corner leftwards = carriage return, U+21B5 NEW -->
  314. aLstHtmlEntities.AddObject('copy',pointer(169)); // copyright sign, U+00A9 ISOnum -->
  315. aLstHtmlEntities.AddObject('cong',pointer(8773)); // approximately equal to, U+2245 ISOtech -->
  316. aLstHtmlEntities.AddObject('clubs',pointer(9827)); // black club suit = shamrock, U+2663 ISOpub -->
  317. aLstHtmlEntities.AddObject('circ',pointer(710)); // modifier letter circumflex accent, U+02C6 ISOpub -->
  318. aLstHtmlEntities.AddObject('chi',pointer(967)); // greek small letter chi, U+03C7 ISOgrk3 -->
  319. aLstHtmlEntities.AddObject('Chi',pointer(935)); // greek capital letter chi, U+03A7 -->
  320. aLstHtmlEntities.AddObject('cent',pointer(162)); // cent sign, U+00A2 ISOnum -->
  321. aLstHtmlEntities.AddObject('cedil',pointer(184)); // cedilla = spacing cedilla, U+00B8 ISOdia -->
  322. aLstHtmlEntities.AddObject('ccedil',pointer(231)); // latin small letter c with cedilla, U+00E7 ISOlat1 -->
  323. aLstHtmlEntities.AddObject('Ccedil',pointer(199)); // latin capital letter C with cedilla, U+00C7 ISOlat1 -->
  324. aLstHtmlEntities.AddObject('cap',pointer(8745)); // intersection = cap, U+2229 ISOtech -->
  325. aLstHtmlEntities.AddObject('bull',pointer(8226)); // bullet = black small circle, U+2022 ISOpub -->
  326. aLstHtmlEntities.AddObject('brvbar',pointer(166)); // broken bar = broken vertical bar, U+00A6 ISOnum -->
  327. aLstHtmlEntities.AddObject('beta',pointer(946)); // greek small letter beta, U+03B2 ISOgrk3 -->
  328. aLstHtmlEntities.AddObject('Beta',pointer(914)); // greek capital letter beta, U+0392 -->
  329. aLstHtmlEntities.AddObject('bdquo',pointer(8222)); // double low-9 quotation mark, U+201E NEW -->
  330. aLstHtmlEntities.AddObject('auml',pointer(228)); // latin small letter a with diaeresis, U+00E4 ISOlat1 -->
  331. aLstHtmlEntities.AddObject('Auml',pointer(196)); // latin capital letter A with diaeresis, U+00C4 ISOlat1 -->
  332. aLstHtmlEntities.AddObject('atilde',pointer(227)); // latin small letter a with tilde, U+00E3 ISOlat1 -->
  333. aLstHtmlEntities.AddObject('Atilde',pointer(195)); // latin capital letter A with tilde, U+00C3 ISOlat1 -->
  334. aLstHtmlEntities.AddObject('asymp',pointer(8776)); // almost equal to = asymptotic to, U+2248 ISOamsr -->
  335. aLstHtmlEntities.AddObject('aring',pointer(229)); // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 -->
  336. aLstHtmlEntities.AddObject('Aring',pointer(197)); // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 -->
  337. aLstHtmlEntities.AddObject('ang',pointer(8736)); // angle, U+2220 ISOamso -->
  338. aLstHtmlEntities.AddObject('and',pointer(8743)); // logical and = wedge, U+2227 ISOtech -->
  339. aLstHtmlEntities.AddObject('amp',pointer(38)); // ampersand, U+0026 ISOnum -->
  340. aLstHtmlEntities.AddObject('alpha',pointer(945)); // greek small letter alpha, U+03B1 ISOgrk3 -->
  341. aLstHtmlEntities.AddObject('Alpha',pointer(913)); // greek capital letter alpha, U+0391 -->
  342. aLstHtmlEntities.AddObject('alefsym',pointer(8501)); // alef symbol = first transfinite cardinal, U+2135 NEW -->
  343. aLstHtmlEntities.AddObject('agrave',pointer(224)); // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 -->
  344. aLstHtmlEntities.AddObject('Agrave',pointer(192)); // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 -->
  345. aLstHtmlEntities.AddObject('aelig',pointer(230)); // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 -->
  346. aLstHtmlEntities.AddObject('AElig',pointer(198)); // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 -->
  347. aLstHtmlEntities.AddObject('acute',pointer(180)); // acute accent = spacing acute, U+00B4 ISOdia -->
  348. aLstHtmlEntities.AddObject('acirc',pointer(226)); // latin small letter a with circumflex, U+00E2 ISOlat1 -->
  349. aLstHtmlEntities.AddObject('Acirc',pointer(194)); // latin capital letter A with circumflex, U+00C2 ISOlat1 -->
  350. aLstHtmlEntities.AddObject('aacute',pointer(225)); // latin small letter a with acute, U+00E1 ISOlat1 -->
  351. aLstHtmlEntities.AddObject('Aacute',pointer(193)); // latin capital letter A with acute, U+00C1 ISOlat1 -->
  352. end;
  353. {*******************************************************************}
  354. function ALXMLCDataElementEncode(const Src: AnsiString): AnsiString;
  355. Begin
  356. // The preferred approach to using CDATA sections for encoding text that contains the triad "]]>" is to use multiple CDATA sections by splitting each
  357. // occurrence of the triad just before the ">". For example, to encode "]]>" one would write:
  358. // <![CDATA[]]]]><![CDATA[>]]>
  359. // This means that to encode "]]>" in the middle of a CDATA section, replace all occurrences of "]]>" with the following:
  360. // ]]]]><![CDATA[>
  361. Result := alStringReplace(Src,']]>',']]]]><![CDATA[>',[rfReplaceAll]);
  362. End;
  363. {*************************************************}
  364. {we use useNumericReference by default because it's
  365. compatible with XHTML, especially because of the &apos; entity}
  366. function ALXMLTextElementEncode(const Src: AnsiString; const useNumericReference: boolean = True): AnsiString;
  367. var i, l: integer;
  368. Buf, P: PAnsiChar;
  369. ch: Integer;
  370. begin
  371. Result := '';
  372. L := Length(src);
  373. if L = 0 then exit;
  374. GetMem(Buf, L * 6); // to be on the *very* safe side
  375. try
  376. P := Buf;
  377. for i := 1 to L do begin
  378. ch := Ord(src[i]);
  379. case ch of
  380. 34: begin // quot "
  381. if useNumericReference then begin
  382. ALStrMove('&#34;', P, 5);
  383. Inc(P, 5);
  384. end
  385. else begin
  386. ALStrMove('&quot;', P, 6);
  387. Inc(P, 6);
  388. end;
  389. end;
  390. 38: begin // amp &
  391. if useNumericReference then begin
  392. ALStrMove('&#38;', P, 5);
  393. Inc(P, 5);
  394. end
  395. else begin
  396. ALStrMove('&amp;', P, 5);
  397. Inc(P, 5);
  398. end;
  399. end;
  400. 39: begin // apos '
  401. if useNumericReference then begin
  402. ALStrMove('&#39;', P, 5);
  403. Inc(P, 5);
  404. end
  405. else begin
  406. ALStrMove('&apos;', P, 6); // !! warning this entity not work in HTML nor in XHTML under IE !!
  407. Inc(P, 6);
  408. end;
  409. end;
  410. 60: begin // lt <
  411. if useNumericReference then begin
  412. ALStrMove('&#60;', P, 5);
  413. Inc(P, 5);
  414. end
  415. else begin
  416. ALStrMove('&lt;', P, 4);
  417. Inc(P, 4);
  418. end;
  419. end;
  420. 62: begin // gt >
  421. if useNumericReference then begin
  422. ALStrMove('&#62;', P, 5);
  423. Inc(P, 5);
  424. end
  425. else begin
  426. ALStrMove('&gt;', P, 4);
  427. Inc(P, 4);
  428. end;
  429. end;
  430. else Begin
  431. P^:= AnsiChar(ch);
  432. Inc(P);
  433. end;
  434. end;
  435. end;
  436. SetString(Result, Buf, P - Buf);
  437. finally
  438. FreeMem(Buf);
  439. end;
  440. end;
  441. {*********************************************************************}
  442. function ALUTF8XMLTextElementDecode(const Src: AnsiString): AnsiString;
  443. var CurrentSrcPos, CurrentResultPos : Integer;
  444. j: integer;
  445. aTmpInteger: Integer;
  446. SrcLength: integer;
  447. aEntity: AnsiString;
  448. {---------------------------------------}
  449. procedure _CopyCurrentSrcPosCharToResult;
  450. Begin
  451. result[CurrentResultPos] := src[CurrentSrcPos];
  452. inc(CurrentResultPos);
  453. inc(CurrentSrcPos);
  454. end;
  455. {----------------------------------------------------------------------------------}
  456. procedure _CopyCharToResult(aUnicodeOrdEntity: Integer; aNewCurrentSrcPos: integer);
  457. Var aUTF8String: AnsiString;
  458. K: integer;
  459. Begin
  460. aUTF8String := UTF8Encode(WideChar(aUnicodeOrdEntity));
  461. For k := 1 to length(aUTF8String) do begin
  462. result[CurrentResultPos] := aUTF8String[k];
  463. inc(CurrentResultPos);
  464. end;
  465. CurrentSrcPos := aNewCurrentSrcPos;
  466. end;
  467. begin
  468. {init var}
  469. CurrentSrcPos := 1;
  470. CurrentResultPos := 1;
  471. SrcLength := Length(src);
  472. SetLength(Result,SrcLength);
  473. {start loop}
  474. while (CurrentSrcPos <= SrcLength) do begin
  475. {HTMLentity detected}
  476. If src[CurrentSrcPos]='&' then begin
  477. {extract the HTML entity}
  478. j := CurrentSrcPos;
  479. while (J <= SrcLength) and (src[j] <> ';') and (j-CurrentSrcPos<=12) do inc(j);
  480. {HTML entity is valid}
  481. If (J<=SrcLength) and (j-CurrentSrcPos<=12) then Begin
  482. {HTML entity is numeric}
  483. IF (Src[CurrentSrcPos+1] = '#') then begin
  484. {HTML entity is hexa}
  485. IF (Src[CurrentSrcPos+2] = 'x') then begin
  486. if ALTryStrToInt('$' + ALCopyStr(Src,
  487. CurrentSrcPos+3,
  488. j-CurrentSrcPos-3),
  489. aTmpInteger)
  490. then _CopyCharToResult(aTmpInteger, J+1)
  491. else _CopyCurrentSrcPosCharToResult;
  492. end
  493. {HTML entity is numeric}
  494. else begin
  495. {numeric HTML entity is valid}
  496. if ALTryStrToInt(ALCopyStr(Src,
  497. CurrentSrcPos+2,
  498. j-CurrentSrcPos-2),
  499. aTmpInteger)
  500. then _CopyCharToResult(aTmpInteger, J+1)
  501. else _CopyCurrentSrcPosCharToResult;
  502. end;
  503. end
  504. {HTML entity is litteral}
  505. else begin
  506. //amp
  507. aEntity := ALCopyStr(Src,
  508. CurrentSrcPos+1,
  509. j-CurrentSrcPos-1);
  510. If aEntity ='quot' then _CopyCharToResult(34, J+1) // "
  511. else if aEntity = 'apos' then _CopyCharToResult(39, J+1) // '
  512. else if aEntity = 'amp' then _CopyCharToResult(38, J+1) // &
  513. else if aEntity = 'lt' then _CopyCharToResult(60, J+1) // <
  514. else if aEntity = 'gt' then _CopyCharToResult(62, J+1) // >
  515. else _CopyCurrentSrcPosCharToResult;
  516. end;
  517. end
  518. else _CopyCurrentSrcPosCharToResult;
  519. end
  520. else _CopyCurrentSrcPosCharToResult;
  521. end;
  522. setLength(Result,CurrentResultPos-1);
  523. end;
  524. {**********************************************}
  525. function ALUTF8HTMLEncode(const Src: AnsiString;
  526. const EncodeASCIIHtmlEntities: Boolean = True;
  527. const useNumericReference: boolean = True): AnsiString;
  528. var i, k, l: integer;
  529. Buf, P: PAnsiChar;
  530. aEntityStr: AnsiString;
  531. aEntityInt: Integer;
  532. aIndex: integer;
  533. aTmpWideString: WideString;
  534. LstUnicodeEntitiesNumber: TALIntegerList;
  535. begin
  536. Result := '';
  537. If Src='' then Exit;
  538. LstUnicodeEntitiesNumber := TALIntegerList.create;
  539. Try
  540. if not useNumericReference then begin
  541. LstUnicodeEntitiesNumber.Duplicates := DupIgnore;
  542. LstUnicodeEntitiesNumber.Sorted := True;
  543. For i := 0 to vALhtml_LstEntities.Count - 1 do
  544. LstUnicodeEntitiesNumber.AddObject(integer(vALhtml_LstEntities.Objects[i]),pointer(i));
  545. end;
  546. {$IFDEF UNICODE}
  547. aTmpWideString := UTF8ToWideString(Src);
  548. {$ELSE}
  549. aTmpWideString := UTF8Decode(Src);
  550. {$ENDIF}
  551. L := length(aTmpWideString);
  552. If L=0 then Exit;
  553. GetMem(Buf, length(Src) * 12); // to be on the *very* safe side
  554. try
  555. P := Buf;
  556. For i := 1 to L do begin
  557. aEntityInt := Integer(aTmpWideString[i]);
  558. Case aEntityInt of
  559. 34: begin // quot "
  560. If EncodeASCIIHtmlEntities then begin
  561. if useNumericReference then begin
  562. ALStrMove('&#34;', P, 5);
  563. Inc(P, 5);
  564. end
  565. else begin
  566. ALStrMove('&quot;', P, 6);
  567. Inc(P, 6);
  568. end;
  569. end
  570. else Begin
  571. P^ := '"';
  572. Inc(P, 1);
  573. end;
  574. end;
  575. 38: begin // amp &
  576. If EncodeASCIIHtmlEntities then begin
  577. if useNumericReference then begin
  578. ALStrMove('&#38;', P, 5);
  579. Inc(P, 5);
  580. end
  581. else begin
  582. ALStrMove('&amp;', P, 5);
  583. Inc(P, 5);
  584. end;
  585. end
  586. else Begin
  587. P^ := '&';
  588. Inc(P, 1);
  589. end;
  590. end;
  591. 39: begin // '
  592. If EncodeASCIIHtmlEntities then begin
  593. ALStrMove('&#39;', P, 5);
  594. Inc(P, 5);
  595. end
  596. else Begin
  597. P^ := '''';
  598. Inc(P, 1);
  599. end;
  600. end;
  601. 60: begin // lt <
  602. If EncodeASCIIHtmlEntities then begin
  603. if useNumericReference then begin
  604. ALStrMove('&#60;', P, 5);
  605. Inc(P, 5);
  606. end
  607. else begin
  608. ALStrMove('&lt;', P, 4);
  609. Inc(P, 4);
  610. end;
  611. end
  612. else Begin
  613. P^ := '<';
  614. Inc(P, 1);
  615. end;
  616. end;
  617. 62: begin // gt >
  618. If EncodeASCIIHtmlEntities then begin
  619. if useNumericReference then begin
  620. ALStrMove('&#62;', P, 5);
  621. Inc(P, 5);
  622. end
  623. else begin
  624. ALStrMove('&gt;', P, 4);
  625. Inc(P, 4);
  626. end;
  627. end
  628. else Begin
  629. P^ := '>';
  630. Inc(P, 1);
  631. end;
  632. end;
  633. else begin
  634. if (aEntityInt > 127) then begin
  635. if useNumericReference then aEntityStr := '&#'+ALIntToStr(aEntityInt)+';'
  636. else begin
  637. aIndex := LstUnicodeEntitiesNumber.IndexOf(aEntityInt);
  638. If aIndex >= 0 Then begin
  639. aEntityStr := vALhtml_LstEntities[integer(LstUnicodeEntitiesNumber.Objects[aIndex])];
  640. If aEntityStr <> '' then aEntityStr := '&' + aEntityStr + ';'
  641. else aEntityStr := '&#'+ALIntToStr(aEntityInt)+';'
  642. end
  643. else aEntityStr := '&#'+ALIntToStr(aEntityInt)+';'
  644. end;
  645. end
  646. else aEntityStr := ansistring(aTmpWideString[i]);
  647. for k := 1 to Length(aEntityStr) do begin
  648. P^ := aEntityStr[k];
  649. Inc(P)
  650. end;
  651. end;
  652. end;
  653. end;
  654. SetString(Result, Buf, P - Buf);
  655. finally
  656. FreeMem(Buf);
  657. end;
  658. finally
  659. LstUnicodeEntitiesNumber.free;
  660. end;
  661. end;
  662. {***********************************************************}
  663. function ALUTF8HTMLDecode(const Src: AnsiString): AnsiString;
  664. var CurrentSrcPos, CurrentResultPos : Integer;
  665. {---------------------------------------}
  666. procedure _CopyCurrentSrcPosCharToResult;
  667. Begin
  668. result[CurrentResultPos] := src[CurrentSrcPos];
  669. inc(CurrentResultPos);
  670. inc(CurrentSrcPos);
  671. end;
  672. {----------------------------------------------------------------------------------}
  673. procedure _CopyCharToResult(aUnicodeOrdEntity: Integer; aNewCurrentSrcPos: integer);
  674. Var aUTF8String: AnsiString;
  675. K: integer;
  676. Begin
  677. aUTF8String := UTF8Encode(WideChar(aUnicodeOrdEntity));
  678. For k := 1 to length(aUTF8String) do begin
  679. result[CurrentResultPos] := aUTF8String[k];
  680. inc(CurrentResultPos);
  681. end;
  682. CurrentSrcPos := aNewCurrentSrcPos;
  683. end;
  684. var j: integer;
  685. aTmpInteger: Integer;
  686. SrcLength: integer;
  687. begin
  688. {init var}
  689. CurrentSrcPos := 1;
  690. CurrentResultPos := 1;
  691. SrcLength := Length(src);
  692. SetLength(Result,SrcLength);
  693. {start loop}
  694. while (CurrentSrcPos <= SrcLength) do begin
  695. {HTMLentity detected}
  696. If src[CurrentSrcPos]='&' then begin
  697. {extract the HTML entity}
  698. j := CurrentSrcPos;
  699. while (J <= SrcLength) and (src[j] <> ';') and (j-CurrentSrcPos<=12) do inc(j);
  700. {HTML entity is valid}
  701. If (J<=SrcLength) and (j-CurrentSrcPos<=12) then Begin
  702. {HTML entity is numeric}
  703. IF (Src[CurrentSrcPos+1] = '#') then begin
  704. {HTML entity is hexa}
  705. IF (Src[CurrentSrcPos+2] = 'x') then begin
  706. if ALTryStrToInt('$' + ALCopyStr(Src,
  707. CurrentSrcPos+3,
  708. j-CurrentSrcPos-3),
  709. aTmpInteger)
  710. then _CopyCharToResult(aTmpInteger, J+1)
  711. else _CopyCurrentSrcPosCharToResult;
  712. end
  713. {HTML entity is numeric}
  714. else begin
  715. {numeric HTML entity is valid}
  716. if ALTryStrToInt(ALCopyStr(Src,
  717. CurrentSrcPos+2,
  718. j-CurrentSrcPos-2),
  719. aTmpInteger)
  720. then _CopyCharToResult(aTmpInteger, J+1)
  721. else _CopyCurrentSrcPosCharToResult;
  722. end;
  723. end
  724. {HTML entity is litteral}
  725. else begin
  726. aTmpInteger := vALhtml_LstEntities.IndexOf(ALCopyStr(Src,
  727. CurrentSrcPos+1,
  728. j-CurrentSrcPos-1));
  729. If aTmpInteger >= 0 then _CopyCharToResult(integer(vALhtml_LstEntities.Objects[aTmpInteger]),J+1)
  730. else _CopyCurrentSrcPosCharToResult;
  731. end;
  732. end
  733. else _CopyCurrentSrcPosCharToResult;
  734. end
  735. else _CopyCurrentSrcPosCharToResult;
  736. end;
  737. setLength(Result,CurrentResultPos-1);
  738. end;
  739. {******************************************************************************************}
  740. // https://developer.mozilla.org/en-US/docs/JavaScript/Guide/Values,_variables,_and_literals
  741. function ALJavascriptEncode(const Src: AnsiString; const useNumericReference: boolean = True): AnsiString;
  742. var i, l: integer;
  743. Buf, P: PAnsiChar;
  744. ch: Integer;
  745. begin
  746. Result := '';
  747. L := Length(src);
  748. if L = 0 then exit;
  749. if useNumericReference then GetMem(Buf, L * 6) // to be on the *very* safe side
  750. else GetMem(Buf, L * 2); // to be on the *very* safe side
  751. try
  752. P := Buf;
  753. for i := 1 to L do begin
  754. ch := Ord(src[i]);
  755. case ch of
  756. 8: begin // Backspace
  757. if useNumericReference then begin
  758. ALStrMove('\u0008', P, 6);
  759. Inc(P, 6);
  760. end
  761. else begin
  762. ALStrMove('\b', P, 2);
  763. Inc(P, 2);
  764. end;
  765. end;
  766. 9: begin // Tab
  767. if useNumericReference then begin
  768. ALStrMove('\u0009', P, 6);
  769. Inc(P, 6);
  770. end
  771. else begin
  772. ALStrMove('\t', P, 2);
  773. Inc(P, 2);
  774. end;
  775. end;
  776. 10: begin // New line
  777. if useNumericReference then begin
  778. ALStrMove('\u000A', P, 6);
  779. Inc(P, 6);
  780. end
  781. else begin
  782. ALStrMove('\n', P, 2);
  783. Inc(P, 2);
  784. end;
  785. end;
  786. 11: begin // Vertical tab
  787. if useNumericReference then begin
  788. ALStrMove('\u000B', P, 6);
  789. Inc(P, 6);
  790. end
  791. else begin
  792. ALStrMove('\v', P, 2);
  793. Inc(P, 2);
  794. end;
  795. end;
  796. 12: begin // Form feed
  797. if useNumericReference then begin
  798. ALStrMove('\u000C', P, 6);
  799. Inc(P, 6);
  800. end
  801. else begin
  802. ALStrMove('\f', P, 2);
  803. Inc(P, 2);
  804. end;
  805. end;
  806. 13: begin // Carriage return
  807. if useNumericReference then begin
  808. ALStrMove('\u000D', P, 6);
  809. Inc(P, 6);
  810. end
  811. else begin
  812. ALStrMove('\r', P, 2);
  813. Inc(P, 2);
  814. end;
  815. end;
  816. 34: begin // Double quote
  817. if useNumericReference then begin
  818. ALStrMove('\u0022', P, 6);
  819. Inc(P, 6);
  820. end
  821. else begin
  822. ALStrMove('\"', P, 2);
  823. Inc(P, 2);
  824. end;
  825. end;
  826. 38: begin // & ... we need to encode it because in javascript &#39; or &amp; will be converted to ' and error unterminated string
  827. ALStrMove('\u0026', P, 6);
  828. Inc(P, 6);
  829. end;
  830. 39: begin // Apostrophe or single quote
  831. if useNumericReference then begin
  832. ALStrMove('\u0027', P, 6);
  833. Inc(P, 6);
  834. end
  835. else begin
  836. ALStrMove('\''', P, 2);
  837. Inc(P, 2);
  838. end;
  839. end;
  840. 60: begin // < ... mostly to hide all </script> tag inside javascript.
  841. // http://www.wwco.com/~wls/blog/2007/04/25/using-script-in-a-javascript-literal/
  842. ALStrMove('\u003C', P, 6);
  843. Inc(P, 6);
  844. end;
  845. 62: begin // > ... mostly to hide all HTML tag inside javascript.
  846. ALStrMove('\u003E', P, 6);
  847. Inc(P, 6);
  848. end;
  849. 92: begin // Backslash character (\).
  850. if useNumericReference then begin
  851. ALStrMove('\u005C', P, 6);
  852. Inc(P, 6);
  853. end
  854. else begin
  855. ALStrMove('\\', P, 2);
  856. Inc(P, 2);
  857. end;
  858. end;
  859. else Begin
  860. P^:= AnsiChar(ch);
  861. Inc(P);
  862. end;
  863. end;
  864. end;
  865. SetString(Result, Buf, P - Buf);