/tags/sp-opensp_1_5_1/lib/Text.cxx

# · C++ · 410 lines · 364 code · 32 blank · 14 comment · 103 complexity · 2ac1e98e25c4fc1096f9ebf2e99f7d8d MD5 · raw file

  1. // Copyright (c) 1994 James Clark
  2. // See the file COPYING for copying permission.
  3. #ifdef __GNUG__
  4. #pragma implementation
  5. #endif
  6. #include "splib.h"
  7. #include "Text.h"
  8. // for memcmp()
  9. #include <string.h>
  10. #ifdef SP_NAMESPACE
  11. namespace SP_NAMESPACE {
  12. #endif
  13. Text::Text()
  14. {
  15. }
  16. Text::~Text()
  17. {
  18. }
  19. void Text::addChar(Char c, const Location &loc)
  20. {
  21. if (items_.size() == 0
  22. || items_.back().type != TextItem::data
  23. || loc.origin().pointer() != items_.back().loc.origin().pointer()
  24. || loc.index() != (items_.back().loc.index()
  25. + (chars_.size() - items_.back().index))) {
  26. items_.resize(items_.size() + 1);
  27. items_.back().loc = loc;
  28. items_.back().type = TextItem::data;
  29. items_.back().index = chars_.size();
  30. }
  31. chars_ += c;
  32. }
  33. void Text::addChars(const Char *p, size_t length, const Location &loc)
  34. {
  35. if (items_.size() == 0
  36. || items_.back().type != TextItem::data
  37. || loc.origin().pointer() != items_.back().loc.origin().pointer()
  38. || loc.index() != (items_.back().loc.index()
  39. + (chars_.size() - items_.back().index))) {
  40. items_.resize(items_.size() + 1);
  41. items_.back().loc = loc;
  42. items_.back().type = TextItem::data;
  43. items_.back().index = chars_.size();
  44. }
  45. chars_.append(p, length);
  46. }
  47. void Text::addCdata(const StringC &str,
  48. const ConstPtr<Origin> &origin)
  49. {
  50. addSimple(TextItem::cdata, Location(origin, 0));
  51. chars_.append(str.data(), str.size());
  52. }
  53. void Text::addSdata(const StringC &str,
  54. const ConstPtr<Origin> &origin)
  55. {
  56. addSimple(TextItem::sdata, Location(origin, 0));
  57. chars_.append(str.data(), str.size());
  58. }
  59. void Text::addNonSgmlChar(Char c, const Location &loc)
  60. {
  61. addSimple(TextItem::nonSgml, loc);
  62. chars_ += c;
  63. }
  64. void Text::addCharsTokenize(const Char *str, size_t n, const Location &loc,
  65. Char space)
  66. {
  67. Location loci(loc);
  68. // FIXME speed this up
  69. for (size_t i = 0; i < n; loci += 1, i++) {
  70. if (str[i] == space && (size() == 0 || lastChar() == space))
  71. ignoreChar(str[i], loci);
  72. else
  73. addChar(str[i], loci);
  74. }
  75. }
  76. void Text::tokenize(Char space, Text &text) const
  77. {
  78. TextIter iter(*this);
  79. TextItem::Type type;
  80. const Char *p;
  81. size_t n;
  82. const Location *loc;
  83. while (iter.next(type, p, n, loc)) {
  84. switch (type) {
  85. case TextItem::data:
  86. text.addCharsTokenize(p, n, *loc, space);
  87. break;
  88. case TextItem::sdata:
  89. case TextItem::cdata:
  90. {
  91. text.addEntityStart(*loc);
  92. text.addCharsTokenize(p, n, *loc, space);
  93. Location tem(*loc);
  94. tem += n;
  95. text.addEntityEnd(tem);
  96. }
  97. break;
  98. case TextItem::ignore:
  99. text.ignoreChar(*p, *loc);
  100. break;
  101. default:
  102. text.addSimple(type, *loc);
  103. break;
  104. }
  105. }
  106. if (text.size() > 0 && text.lastChar() == space)
  107. text.ignoreLastChar();
  108. }
  109. void Text::addSimple(TextItem::Type type, const Location &loc)
  110. {
  111. items_.resize(items_.size() + 1);
  112. items_.back().loc = loc;
  113. items_.back().type = type;
  114. items_.back().index = chars_.size();
  115. }
  116. void Text::ignoreChar(Char c, const Location &loc)
  117. {
  118. items_.resize(items_.size() + 1);
  119. items_.back().loc = loc;
  120. items_.back().type = TextItem::ignore;
  121. items_.back().c = c;
  122. items_.back().index = chars_.size();
  123. }
  124. void Text::ignoreLastChar()
  125. {
  126. size_t lastIndex = chars_.size() - 1;
  127. size_t i;
  128. for (i = items_.size() - 1; items_[i].index > lastIndex; i--)
  129. ;
  130. // lastIndex >= items_[i].index
  131. if (items_[i].index != lastIndex) {
  132. items_.resize(items_.size() + 1);
  133. i++;
  134. for (size_t j = items_.size() - 1; j > i; j--)
  135. items_[j] = items_[j - 1];
  136. items_[i].index = lastIndex;
  137. items_[i].loc = items_[i - 1].loc;
  138. items_[i].loc += lastIndex - items_[i - 1].index;
  139. }
  140. items_[i].c = chars_[chars_.size() - 1];
  141. items_[i].type = TextItem::ignore;
  142. for (size_t j = i + 1; j < items_.size(); j++)
  143. items_[j].index = lastIndex;
  144. chars_.resize(chars_.size() - 1);
  145. }
  146. // All characters other than spaces are substed.
  147. void Text::subst(const SubstTable &table, Char space)
  148. {
  149. for (size_t i = 0; i < items_.size(); i++)
  150. if (items_[i].type == TextItem::data) {
  151. size_t lim = (i + 1 < items_.size()
  152. ? items_[i + 1].index
  153. : chars_.size());
  154. size_t j;
  155. for (j = items_[i].index; j < lim; j++) {
  156. Char c = chars_[j];
  157. if (c != space && c != table[c])
  158. break;
  159. }
  160. if (j < lim) {
  161. size_t start = items_[i].index;
  162. StringC origChars(chars_.data() + start, lim - start);
  163. for (; j < lim; j++)
  164. if (chars_[j] != space)
  165. table.subst(chars_[j]);
  166. items_[i].loc = Location(new MultiReplacementOrigin(items_[i].loc,
  167. origChars),
  168. 0);
  169. }
  170. }
  171. }
  172. void Text::clear()
  173. {
  174. chars_.resize(0);
  175. items_.clear();
  176. }
  177. Boolean Text::startDelimLocation(Location &loc) const
  178. {
  179. if (items_.size() == 0 || items_[0].type != TextItem::startDelim)
  180. return 0;
  181. loc = items_[0].loc;
  182. return 1;
  183. }
  184. Boolean Text::endDelimLocation(Location &loc) const
  185. {
  186. if (items_.size() == 0)
  187. return 0;
  188. switch (items_.back().type) {
  189. case TextItem::endDelim:
  190. case TextItem::endDelimA:
  191. break;
  192. default:
  193. return 0;
  194. }
  195. loc = items_.back().loc;
  196. return 1;
  197. }
  198. Boolean Text::delimType(Boolean &lita) const
  199. {
  200. if (items_.size() == 0)
  201. return 0;
  202. switch (items_.back().type) {
  203. case TextItem::endDelim:
  204. lita = 0;
  205. return 1;
  206. case TextItem::endDelimA:
  207. lita = 1;
  208. return 1;
  209. default:
  210. break;
  211. }
  212. return 0;
  213. }
  214. TextItem::TextItem()
  215. {
  216. }
  217. void Text::swap(Text &to)
  218. {
  219. items_.swap(to.items_);
  220. chars_.swap(to.chars_);
  221. }
  222. TextIter::TextIter(const Text &text)
  223. : ptr_(text.items_.begin()), text_(&text)
  224. {
  225. }
  226. const Char *TextIter::chars(size_t &length) const
  227. {
  228. if (ptr_->type == TextItem::ignore) {
  229. length = 1;
  230. return &ptr_->c;
  231. }
  232. else {
  233. const StringC &chars = text_->chars_;
  234. size_t charsIndex = ptr_->index;
  235. if (ptr_ + 1 != text_->items_.begin() + text_->items_.size())
  236. length = ptr_[1].index - charsIndex;
  237. else
  238. length = chars.size() - charsIndex;
  239. return chars.data() + charsIndex;
  240. }
  241. }
  242. Boolean TextIter::next(TextItem::Type &type, const Char *&str, size_t &length,
  243. const Location *&loc)
  244. {
  245. const TextItem *end = text_->items_.begin() + text_->items_.size();
  246. if (ptr_ == end)
  247. return 0;
  248. type = ptr_->type;
  249. loc = &ptr_->loc;
  250. if (type == TextItem::ignore) {
  251. str = &ptr_->c;
  252. length = 1;
  253. }
  254. else {
  255. const StringC &chars = text_->chars_;
  256. size_t charsIndex = ptr_->index;
  257. str = chars.data() + charsIndex;
  258. if (ptr_ + 1 != end)
  259. length = ptr_[1].index - charsIndex;
  260. else
  261. length = chars.size() - charsIndex;
  262. }
  263. ptr_++;
  264. return 1;
  265. }
  266. void Text::insertChars(const StringC &s, const Location &loc)
  267. {
  268. chars_.insert(0, s);
  269. items_.resize(items_.size() + 1);
  270. for (size_t i = items_.size() - 1; i > 0; i--) {
  271. items_[i] = items_[i - 1];
  272. items_[i].index += s.size();
  273. }
  274. items_[0].loc = loc;
  275. items_[0].type = TextItem::data;
  276. items_[0].index = 0;
  277. }
  278. size_t Text::normalizedLength(size_t normsep) const
  279. {
  280. size_t n = size();
  281. n += normsep;
  282. for (size_t i = 0; i < items_.size(); i++)
  283. switch (items_[i].type) {
  284. case TextItem::sdata:
  285. case TextItem::cdata:
  286. n += normsep;
  287. break;
  288. default:
  289. break;
  290. }
  291. return n;
  292. }
  293. // This is used to determine for a FIXED CDATA attribute
  294. // whether a specified value if equal to the default value.
  295. Boolean Text::fixedEqual(const Text &text) const
  296. {
  297. if (string() != text.string())
  298. return 0;
  299. size_t j = 0;
  300. for (size_t i = 0; i < items_.size(); i++)
  301. switch (items_[i].type) {
  302. case TextItem::cdata:
  303. case TextItem::sdata:
  304. for (;;) {
  305. if (j >= text.items_.size())
  306. return 0;
  307. if (text.items_[j].type == TextItem::nonSgml)
  308. return 0;
  309. if (text.items_[j].type == TextItem::cdata
  310. || text.items_[j].type == TextItem::sdata)
  311. break;
  312. j++;
  313. }
  314. if (text.items_[j].index != items_[i].index
  315. || (text.items_[j].loc.origin()->entityDecl()
  316. != items_[i].loc.origin()->entityDecl()))
  317. return 0;
  318. break;
  319. case TextItem::nonSgml:
  320. for (;;) {
  321. if (j >= text.items_.size())
  322. return 0;
  323. if (text.items_[j].type == TextItem::cdata
  324. || text.items_[j].type == TextItem::sdata)
  325. return 0;
  326. if (text.items_[j].type == TextItem::nonSgml)
  327. break;
  328. j++;
  329. }
  330. if (text.items_[j].index != items_[i].index)
  331. return 0;
  332. break;
  333. default:
  334. break;
  335. }
  336. for (; j < text.items_.size(); j++)
  337. switch (text.items_[j].type) {
  338. case TextItem::cdata:
  339. case TextItem::sdata:
  340. case TextItem::nonSgml:
  341. return 0;
  342. default:
  343. break;
  344. }
  345. return 1;
  346. }
  347. Boolean Text::charLocation(size_t ind, const ConstPtr<Origin> *&origin, Index &index) const
  348. {
  349. // Find the last item whose index <= ind.
  350. // Invariant:
  351. // indexes < i implies index <= ind
  352. // indexes >= lim implies index > ind
  353. // The first item will always have index 0.
  354. size_t i = 1;
  355. size_t lim = items_.size();
  356. while (i < lim) {
  357. size_t mid = i + (lim - i)/2;
  358. if (items_[mid].index > ind)
  359. lim = mid;
  360. else
  361. i = mid + 1;
  362. }
  363. #if 0
  364. for (size_t i = 1; i < items_.size(); i++)
  365. if (items_[i].index > ind)
  366. break;
  367. #endif
  368. i--;
  369. // If items_.size() == 0, then i == lim.
  370. if (i < lim) {
  371. origin = &items_[i].loc.origin();
  372. index = items_[i].loc.index() + (ind - items_[i].index);
  373. }
  374. return 1;
  375. }
  376. #ifdef SP_NAMESPACE
  377. }
  378. #endif