PageRenderTime 28ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/ExtLibs/wxWidgets/src/html/htmltag.cpp

https://bitbucket.org/lennonchan/cafu
C++ | 644 lines | 528 code | 58 blank | 58 comment | 128 complexity | 3e0c04a731f87f55646ba12bbc6a5d11 MD5 | raw file
  1. /////////////////////////////////////////////////////////////////////////////
  2. // Name: src/html/htmltag.cpp
  3. // Purpose: wxHtmlTag class (represents single tag)
  4. // Author: Vaclav Slavik
  5. // RCS-ID: $Id$
  6. // Copyright: (c) 1999 Vaclav Slavik
  7. // Licence: wxWindows licence
  8. /////////////////////////////////////////////////////////////////////////////
  9. #include "wx/wxprec.h"
  10. #ifdef __BORLANDC__
  11. #pragma hdrstop
  12. #endif
  13. #if wxUSE_HTML
  14. #include "wx/html/htmltag.h"
  15. #ifndef WX_PRECOMP
  16. #include "wx/colour.h"
  17. #include "wx/wxcrtvararg.h"
  18. #endif
  19. #include "wx/html/htmlpars.h"
  20. #include "wx/html/styleparams.h"
  21. #include "wx/vector.h"
  22. #include <stdio.h> // for vsscanf
  23. #include <stdarg.h>
  24. //-----------------------------------------------------------------------------
  25. // wxHtmlTagsCache
  26. //-----------------------------------------------------------------------------
  27. struct wxHtmlCacheItem
  28. {
  29. // this is "pos" value passed to wxHtmlTag's constructor.
  30. // it is position of '<' character of the tag
  31. wxString::const_iterator Key;
  32. // Tag type
  33. enum Type
  34. {
  35. Type_Normal, // normal tag with a matching ending tag
  36. Type_NoMatchingEndingTag, // there's no ending tag for this tag
  37. Type_EndingTag // this is ending tag </..>
  38. };
  39. Type type;
  40. // end positions for the tag:
  41. // end1 is '<' of ending tag,
  42. // end2 is '>' or both are
  43. wxString::const_iterator End1, End2;
  44. // name of this tag
  45. wxChar *Name;
  46. };
  47. // NB: this is an empty class and not typedef because of forward declaration
  48. class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
  49. {
  50. };
  51. bool wxIsCDATAElement(const wxChar *tag)
  52. {
  53. return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
  54. (wxStrcmp(tag, wxT("STYLE")) == 0);
  55. }
  56. bool wxIsCDATAElement(const wxString& tag)
  57. {
  58. return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
  59. (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
  60. }
  61. wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
  62. {
  63. m_Cache = new wxHtmlTagsCacheData;
  64. m_CachePos = 0;
  65. wxChar tagBuffer[256];
  66. const wxString::const_iterator end = source.end();
  67. for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
  68. {
  69. if (*pos != wxT('<'))
  70. continue;
  71. // possible tag start found:
  72. // don't cache comment tags
  73. if ( wxHtmlParser::SkipCommentTag(pos, end) )
  74. continue;
  75. // Remember the starting tag position.
  76. wxString::const_iterator stpos = pos++;
  77. // And look for the ending one.
  78. int i;
  79. for ( i = 0;
  80. pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
  81. *pos != wxT('>') && !wxIsspace(*pos);
  82. ++i, ++pos )
  83. {
  84. tagBuffer[i] = (wxChar)wxToupper(*pos);
  85. }
  86. tagBuffer[i] = wxT('\0');
  87. while (pos < end && *pos != wxT('>'))
  88. ++pos;
  89. if ( pos == end )
  90. {
  91. // We didn't find a closing bracket, this is not a valid tag after
  92. // all. Notice that we need to roll back pos to avoid creating an
  93. // invalid iterator when "++pos" is done in the loop statement.
  94. --pos;
  95. continue;
  96. }
  97. // We have a valid tag, add it to the cache.
  98. size_t tg = Cache().size();
  99. Cache().push_back(wxHtmlCacheItem());
  100. Cache()[tg].Key = stpos;
  101. Cache()[tg].Name = new wxChar[i+1];
  102. memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
  103. if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
  104. {
  105. Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
  106. // find matching begin tag:
  107. for (i = tg; i >= 0; i--)
  108. {
  109. if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
  110. {
  111. Cache()[i].type = wxHtmlCacheItem::Type_Normal;
  112. Cache()[i].End1 = stpos;
  113. Cache()[i].End2 = pos + 1;
  114. break;
  115. }
  116. }
  117. }
  118. else
  119. {
  120. Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
  121. if (wxIsCDATAElement(tagBuffer))
  122. {
  123. // store the orig pos in case we are missing the closing
  124. // tag (see below)
  125. const wxString::const_iterator old_pos = pos;
  126. bool foundCloseTag = false;
  127. // find next matching tag
  128. int tag_len = wxStrlen(tagBuffer);
  129. while (pos < end)
  130. {
  131. // find the ending tag
  132. while (pos + 1 < end &&
  133. (*pos != '<' || *(pos+1) != '/'))
  134. ++pos;
  135. if (*pos == '<')
  136. ++pos;
  137. // see if it matches
  138. int match_pos = 0;
  139. while (pos < end && match_pos < tag_len )
  140. {
  141. wxChar c = *pos;
  142. if ( c == '>' || c == '<' )
  143. break;
  144. // cast to wxChar needed to suppress warning in
  145. // Unicode build
  146. if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
  147. {
  148. ++match_pos;
  149. }
  150. else if (c == wxT(' ') || c == wxT('\n') ||
  151. c == wxT('\r') || c == wxT('\t'))
  152. {
  153. // need to skip over these
  154. }
  155. else
  156. {
  157. match_pos = 0;
  158. }
  159. ++pos;
  160. }
  161. // found a match
  162. if (match_pos == tag_len)
  163. {
  164. pos = pos - tag_len - 3;
  165. foundCloseTag = true;
  166. break;
  167. }
  168. else // keep looking for the closing tag
  169. {
  170. ++pos;
  171. }
  172. }
  173. if (!foundCloseTag)
  174. {
  175. // we didn't find closing tag; this means the markup
  176. // is incorrect and the best thing we can do is to
  177. // ignore the unclosed tag and continue parsing as if
  178. // it didn't exist:
  179. pos = old_pos;
  180. }
  181. }
  182. }
  183. }
  184. // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
  185. for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
  186. i != Cache().end(); ++i )
  187. {
  188. wxDELETEA(i->Name);
  189. }
  190. }
  191. wxHtmlTagsCache::~wxHtmlTagsCache()
  192. {
  193. delete m_Cache;
  194. }
  195. void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
  196. const wxString::const_iterator& inputEnd,
  197. wxString::const_iterator *end1,
  198. wxString::const_iterator *end2,
  199. bool *hasEnding)
  200. {
  201. if (Cache().empty())
  202. {
  203. *end1 =
  204. *end2 = inputEnd;
  205. *hasEnding = true;
  206. return;
  207. }
  208. if (Cache()[m_CachePos].Key != at)
  209. {
  210. int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
  211. do
  212. {
  213. m_CachePos += delta;
  214. if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
  215. {
  216. if ( m_CachePos < 0 )
  217. m_CachePos = 0;
  218. else
  219. m_CachePos = Cache().size() - 1;
  220. // something is very wrong with HTML, give up by returning an
  221. // impossibly large value which is going to be ignored by the
  222. // caller
  223. *end1 =
  224. *end2 = inputEnd;
  225. *hasEnding = true;
  226. return;
  227. }
  228. }
  229. while (Cache()[m_CachePos].Key != at);
  230. }
  231. switch ( Cache()[m_CachePos].type )
  232. {
  233. case wxHtmlCacheItem::Type_Normal:
  234. *end1 = Cache()[m_CachePos].End1;
  235. *end2 = Cache()[m_CachePos].End2;
  236. *hasEnding = true;
  237. break;
  238. case wxHtmlCacheItem::Type_EndingTag:
  239. wxFAIL_MSG("QueryTag called for ending tag - can't be");
  240. // but if it does happen, fall through, better than crashing
  241. case wxHtmlCacheItem::Type_NoMatchingEndingTag:
  242. // If input HTML is invalid and there's no closing tag for this
  243. // one, pretend that it runs all the way to the end of input
  244. *end1 = inputEnd;
  245. *end2 = inputEnd;
  246. *hasEnding = false;
  247. break;
  248. }
  249. }
  250. //-----------------------------------------------------------------------------
  251. // wxHtmlTag
  252. //-----------------------------------------------------------------------------
  253. wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
  254. const wxString *source,
  255. const wxString::const_iterator& pos,
  256. const wxString::const_iterator& end_pos,
  257. wxHtmlTagsCache *cache,
  258. wxHtmlEntitiesParser *entParser)
  259. {
  260. /* Setup DOM relations */
  261. m_Next = NULL;
  262. m_FirstChild = m_LastChild = NULL;
  263. m_Parent = parent;
  264. if (parent)
  265. {
  266. m_Prev = m_Parent->m_LastChild;
  267. if (m_Prev == NULL)
  268. m_Parent->m_FirstChild = this;
  269. else
  270. m_Prev->m_Next = this;
  271. m_Parent->m_LastChild = this;
  272. }
  273. else
  274. m_Prev = NULL;
  275. /* Find parameters and their values: */
  276. wxChar c wxDUMMY_INITIALIZE(0);
  277. // fill-in name, params and begin pos:
  278. wxString::const_iterator i(pos+1);
  279. // find tag's name and convert it to uppercase:
  280. while ((i < end_pos) &&
  281. ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
  282. c != wxT('\n') && c != wxT('\t') &&
  283. c != wxT('>') && c != wxT('/')))
  284. {
  285. if ((c >= wxT('a')) && (c <= wxT('z')))
  286. c -= (wxT('a') - wxT('A'));
  287. m_Name << c;
  288. }
  289. // if the tag has parameters, read them and "normalize" them,
  290. // i.e. convert to uppercase, replace whitespaces by spaces and
  291. // remove whitespaces around '=':
  292. if (*(i-1) != wxT('>'))
  293. {
  294. #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
  295. c == wxT('\n') || c == wxT('\t'))
  296. wxString pname, pvalue;
  297. wxChar quote;
  298. enum
  299. {
  300. ST_BEFORE_NAME = 1,
  301. ST_NAME,
  302. ST_BEFORE_EQ,
  303. ST_BEFORE_VALUE,
  304. ST_VALUE
  305. } state;
  306. quote = 0;
  307. state = ST_BEFORE_NAME;
  308. while (i < end_pos)
  309. {
  310. c = *(i++);
  311. if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
  312. {
  313. if (state == ST_BEFORE_EQ || state == ST_NAME)
  314. {
  315. m_ParamNames.Add(pname);
  316. m_ParamValues.Add(wxGetEmptyString());
  317. }
  318. else if (state == ST_VALUE && quote == 0)
  319. {
  320. m_ParamNames.Add(pname);
  321. if (entParser)
  322. m_ParamValues.Add(entParser->Parse(pvalue));
  323. else
  324. m_ParamValues.Add(pvalue);
  325. }
  326. break;
  327. }
  328. switch (state)
  329. {
  330. case ST_BEFORE_NAME:
  331. if (!IS_WHITE(c))
  332. {
  333. pname = c;
  334. state = ST_NAME;
  335. }
  336. break;
  337. case ST_NAME:
  338. if (IS_WHITE(c))
  339. state = ST_BEFORE_EQ;
  340. else if (c == wxT('='))
  341. state = ST_BEFORE_VALUE;
  342. else
  343. pname << c;
  344. break;
  345. case ST_BEFORE_EQ:
  346. if (c == wxT('='))
  347. state = ST_BEFORE_VALUE;
  348. else if (!IS_WHITE(c))
  349. {
  350. m_ParamNames.Add(pname);
  351. m_ParamValues.Add(wxGetEmptyString());
  352. pname = c;
  353. state = ST_NAME;
  354. }
  355. break;
  356. case ST_BEFORE_VALUE:
  357. if (!IS_WHITE(c))
  358. {
  359. if (c == wxT('"') || c == wxT('\''))
  360. quote = c, pvalue = wxGetEmptyString();
  361. else
  362. quote = 0, pvalue = c;
  363. state = ST_VALUE;
  364. }
  365. break;
  366. case ST_VALUE:
  367. if ((quote != 0 && c == quote) ||
  368. (quote == 0 && IS_WHITE(c)))
  369. {
  370. m_ParamNames.Add(pname);
  371. if (quote == 0)
  372. {
  373. // VS: backward compatibility, no real reason,
  374. // but wxHTML code relies on this... :(
  375. pvalue.MakeUpper();
  376. }
  377. if (entParser)
  378. m_ParamValues.Add(entParser->Parse(pvalue));
  379. else
  380. m_ParamValues.Add(pvalue);
  381. state = ST_BEFORE_NAME;
  382. }
  383. else
  384. pvalue << c;
  385. break;
  386. }
  387. }
  388. #undef IS_WHITE
  389. }
  390. m_Begin = i;
  391. cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
  392. if (m_End1 > end_pos) m_End1 = end_pos;
  393. if (m_End2 > end_pos) m_End2 = end_pos;
  394. #if WXWIN_COMPATIBILITY_2_8
  395. m_sourceStart = source->begin();
  396. #endif
  397. // Try to parse any style parameters that can be handled simply by
  398. // converting them to the equivalent HTML 3 attributes: this is a far cry
  399. // from perfect but better than nothing.
  400. static const struct EquivAttr
  401. {
  402. const char *style;
  403. const char *attr;
  404. } equivAttrs[] =
  405. {
  406. { "text-align", "ALIGN" },
  407. { "width", "WIDTH" },
  408. { "vertical-align", "VALIGN" },
  409. { "background", "BGCOLOR" },
  410. };
  411. wxHtmlStyleParams styleParams(*this);
  412. for ( unsigned n = 0; n < WXSIZEOF(equivAttrs); n++ )
  413. {
  414. const EquivAttr& ea = equivAttrs[n];
  415. if ( styleParams.HasParam(ea.style) && !HasParam(ea.attr) )
  416. {
  417. m_ParamNames.Add(ea.attr);
  418. m_ParamValues.Add(styleParams.GetParam(ea.style));
  419. }
  420. }
  421. }
  422. wxHtmlTag::~wxHtmlTag()
  423. {
  424. wxHtmlTag *t1, *t2;
  425. t1 = m_FirstChild;
  426. while (t1)
  427. {
  428. t2 = t1->GetNextSibling();
  429. delete t1;
  430. t1 = t2;
  431. }
  432. }
  433. bool wxHtmlTag::HasParam(const wxString& par) const
  434. {
  435. return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
  436. }
  437. wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
  438. {
  439. int index = m_ParamNames.Index(par, false);
  440. if (index == wxNOT_FOUND)
  441. return wxGetEmptyString();
  442. if (with_quotes)
  443. {
  444. // VS: backward compatibility, seems to be never used by wxHTML...
  445. wxString s;
  446. s << wxT('"') << m_ParamValues[index] << wxT('"');
  447. return s;
  448. }
  449. else
  450. return m_ParamValues[index];
  451. }
  452. int wxHtmlTag::ScanParam(const wxString& par,
  453. const char *format,
  454. void *param) const
  455. {
  456. wxString parval = GetParam(par);
  457. return wxSscanf(parval, format, param);
  458. }
  459. int wxHtmlTag::ScanParam(const wxString& par,
  460. const wchar_t *format,
  461. void *param) const
  462. {
  463. wxString parval = GetParam(par);
  464. return wxSscanf(parval, format, param);
  465. }
  466. /* static */
  467. bool wxHtmlTag::ParseAsColour(const wxString& str, wxColour *clr)
  468. {
  469. wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
  470. // handle colours defined in HTML 4.0 first:
  471. if (str.length() > 1 && str[0] != wxT('#'))
  472. {
  473. #define HTML_COLOUR(name, r, g, b) \
  474. if (str.IsSameAs(wxS(name), false)) \
  475. { clr->Set(r, g, b); return true; }
  476. HTML_COLOUR("black", 0x00,0x00,0x00)
  477. HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
  478. HTML_COLOUR("gray", 0x80,0x80,0x80)
  479. HTML_COLOUR("white", 0xFF,0xFF,0xFF)
  480. HTML_COLOUR("maroon", 0x80,0x00,0x00)
  481. HTML_COLOUR("red", 0xFF,0x00,0x00)
  482. HTML_COLOUR("purple", 0x80,0x00,0x80)
  483. HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
  484. HTML_COLOUR("green", 0x00,0x80,0x00)
  485. HTML_COLOUR("lime", 0x00,0xFF,0x00)
  486. HTML_COLOUR("olive", 0x80,0x80,0x00)
  487. HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
  488. HTML_COLOUR("navy", 0x00,0x00,0x80)
  489. HTML_COLOUR("blue", 0x00,0x00,0xFF)
  490. HTML_COLOUR("teal", 0x00,0x80,0x80)
  491. HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
  492. #undef HTML_COLOUR
  493. }
  494. // then try to parse #rrggbb representations or set from other well
  495. // known names (note that this doesn't strictly conform to HTML spec,
  496. // but it doesn't do real harm -- but it *must* be done after the standard
  497. // colors are handled above):
  498. if (clr->Set(str))
  499. return true;
  500. return false;
  501. }
  502. bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
  503. {
  504. const wxString str = GetParam(par);
  505. return !str.empty() && ParseAsColour(str, clr);
  506. }
  507. bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
  508. {
  509. if ( !HasParam(par) )
  510. return false;
  511. long i;
  512. if ( !GetParam(par).ToLong(&i) )
  513. return false;
  514. *clr = (int)i;
  515. return true;
  516. }
  517. wxString wxHtmlTag::GetAllParams() const
  518. {
  519. // VS: this function is for backward compatibility only,
  520. // never used by wxHTML
  521. wxString s;
  522. size_t cnt = m_ParamNames.GetCount();
  523. for (size_t i = 0; i < cnt; i++)
  524. {
  525. s << m_ParamNames[i];
  526. s << wxT('=');
  527. if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
  528. s << wxT('\'') << m_ParamValues[i] << wxT('\'');
  529. else
  530. s << wxT('"') << m_ParamValues[i] << wxT('"');
  531. }
  532. return s;
  533. }
  534. wxHtmlTag *wxHtmlTag::GetFirstSibling() const
  535. {
  536. if (m_Parent)
  537. return m_Parent->m_FirstChild;
  538. else
  539. {
  540. wxHtmlTag *cur = (wxHtmlTag*)this;
  541. while (cur->m_Prev)
  542. cur = cur->m_Prev;
  543. return cur;
  544. }
  545. }
  546. wxHtmlTag *wxHtmlTag::GetLastSibling() const
  547. {
  548. if (m_Parent)
  549. return m_Parent->m_LastChild;
  550. else
  551. {
  552. wxHtmlTag *cur = (wxHtmlTag*)this;
  553. while (cur->m_Next)
  554. cur = cur->m_Next;
  555. return cur;
  556. }
  557. }
  558. wxHtmlTag *wxHtmlTag::GetNextTag() const
  559. {
  560. if (m_FirstChild) return m_FirstChild;
  561. if (m_Next) return m_Next;
  562. wxHtmlTag *cur = m_Parent;
  563. if (!cur) return NULL;
  564. while (cur->m_Parent && !cur->m_Next)
  565. cur = cur->m_Parent;
  566. return cur->m_Next;
  567. }
  568. #endif