/projects/xerces-2.10.0/src/org/apache/xerces/impl/xpath/regex/RangeToken.java

https://gitlab.com/essere.lab.public/qualitas.class-corpus · Java · 623 lines · 494 code · 29 blank · 100 comment · 190 complexity · 457392883e3ee4fcb9618395ef812aec MD5 · raw file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.xerces.impl.xpath.regex;
  18. /**
  19. * This class represents a character class such as [a-z] or a period.
  20. *
  21. * @xerces.internal
  22. *
  23. * @version $Id: RangeToken.java 928735 2010-03-29 11:54:01Z mrglavas $
  24. */
  25. final class RangeToken extends Token implements java.io.Serializable {
  26. private static final long serialVersionUID = -553983121197679934L;
  27. int[] ranges;
  28. boolean sorted;
  29. boolean compacted;
  30. RangeToken icaseCache = null;
  31. int[] map = null;
  32. int nonMapIndex;
  33. RangeToken(int type) {
  34. super(type);
  35. this.setSorted(false);
  36. }
  37. // for RANGE or NRANGE
  38. protected void addRange(int start, int end) {
  39. this.icaseCache = null;
  40. //System.err.println("Token#addRange(): "+start+" "+end);
  41. int r1, r2;
  42. if (start <= end) {
  43. r1 = start;
  44. r2 = end;
  45. } else {
  46. r1 = end;
  47. r2 = start;
  48. }
  49. int pos = 0;
  50. if (this.ranges == null) {
  51. this.ranges = new int[2];
  52. this.ranges[0] = r1;
  53. this.ranges[1] = r2;
  54. this.setSorted(true);
  55. } else {
  56. pos = this.ranges.length;
  57. if (this.ranges[pos-1]+1 == r1) {
  58. this.ranges[pos-1] = r2;
  59. return;
  60. }
  61. int[] temp = new int[pos+2];
  62. System.arraycopy(this.ranges, 0, temp, 0, pos);
  63. this.ranges = temp;
  64. if (this.ranges[pos-1] >= r1)
  65. this.setSorted(false);
  66. this.ranges[pos++] = r1;
  67. this.ranges[pos] = r2;
  68. if (!this.sorted)
  69. this.sortRanges();
  70. }
  71. }
  72. private final boolean isSorted() {
  73. return this.sorted;
  74. }
  75. private final void setSorted(boolean sort) {
  76. this.sorted = sort;
  77. if (!sort) this.compacted = false;
  78. }
  79. private final boolean isCompacted() {
  80. return this.compacted;
  81. }
  82. private final void setCompacted() {
  83. this.compacted = true;
  84. }
  85. protected void sortRanges() {
  86. if (this.isSorted())
  87. return;
  88. if (this.ranges == null)
  89. return;
  90. //System.err.println("Do sorting: "+this.ranges.length);
  91. // Bubble sort
  92. // Why? -- In many cases,
  93. // this.ranges has few elements.
  94. for (int i = this.ranges.length-4; i >= 0; i -= 2) {
  95. for (int j = 0; j <= i; j += 2) {
  96. if (this.ranges[j] > this.ranges[j+2]
  97. || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
  98. int tmp;
  99. tmp = this.ranges[j+2];
  100. this.ranges[j+2] = this.ranges[j];
  101. this.ranges[j] = tmp;
  102. tmp = this.ranges[j+3];
  103. this.ranges[j+3] = this.ranges[j+1];
  104. this.ranges[j+1] = tmp;
  105. }
  106. }
  107. }
  108. this.setSorted(true);
  109. }
  110. /**
  111. * this.ranges is sorted.
  112. */
  113. protected void compactRanges() {
  114. boolean DEBUG = false;
  115. if (this.ranges == null || this.ranges.length <= 2)
  116. return;
  117. if (this.isCompacted())
  118. return;
  119. int base = 0; // Index of writing point
  120. int target = 0; // Index of processing point
  121. while (target < this.ranges.length) {
  122. if (base != target) {
  123. this.ranges[base] = this.ranges[target++];
  124. this.ranges[base+1] = this.ranges[target++];
  125. } else
  126. target += 2;
  127. int baseend = this.ranges[base+1];
  128. while (target < this.ranges.length) {
  129. if (baseend+1 < this.ranges[target])
  130. break;
  131. if (baseend+1 == this.ranges[target]) {
  132. if (DEBUG)
  133. System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
  134. +", "+this.ranges[base+1]
  135. +"], ["+this.ranges[target]
  136. +", "+this.ranges[target+1]
  137. +"] -> ["+this.ranges[base]
  138. +", "+this.ranges[target+1]
  139. +"]");
  140. this.ranges[base+1] = this.ranges[target+1];
  141. baseend = this.ranges[base+1];
  142. target += 2;
  143. } else if (baseend >= this.ranges[target+1]) {
  144. if (DEBUG)
  145. System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
  146. +", "+this.ranges[base+1]
  147. +"], ["+this.ranges[target]
  148. +", "+this.ranges[target+1]
  149. +"] -> ["+this.ranges[base]
  150. +", "+this.ranges[base+1]
  151. +"]");
  152. target += 2;
  153. } else if (baseend < this.ranges[target+1]) {
  154. if (DEBUG)
  155. System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
  156. +", "+this.ranges[base+1]
  157. +"], ["+this.ranges[target]
  158. +", "+this.ranges[target+1]
  159. +"] -> ["+this.ranges[base]
  160. +", "+this.ranges[target+1]
  161. +"]");
  162. this.ranges[base+1] = this.ranges[target+1];
  163. baseend = this.ranges[base+1];
  164. target += 2;
  165. } else {
  166. throw new RuntimeException("Token#compactRanges(): Internel Error: ["
  167. +this.ranges[base]
  168. +","+this.ranges[base+1]
  169. +"] ["+this.ranges[target]
  170. +","+this.ranges[target+1]+"]");
  171. }
  172. } // while
  173. base += 2;
  174. }
  175. if (base != this.ranges.length) {
  176. int[] result = new int[base];
  177. System.arraycopy(this.ranges, 0, result, 0, base);
  178. this.ranges = result;
  179. }
  180. this.setCompacted();
  181. }
  182. protected void mergeRanges(Token token) {
  183. RangeToken tok = (RangeToken)token;
  184. this.sortRanges();
  185. tok.sortRanges();
  186. if (tok.ranges == null)
  187. return;
  188. this.icaseCache = null;
  189. this.setSorted(true);
  190. if (this.ranges == null) {
  191. this.ranges = new int[tok.ranges.length];
  192. System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
  193. return;
  194. }
  195. int[] result = new int[this.ranges.length+tok.ranges.length];
  196. for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) {
  197. if (i >= this.ranges.length) {
  198. result[k++] = tok.ranges[j++];
  199. result[k++] = tok.ranges[j++];
  200. } else if (j >= tok.ranges.length) {
  201. result[k++] = this.ranges[i++];
  202. result[k++] = this.ranges[i++];
  203. } else if (tok.ranges[j] < this.ranges[i]
  204. || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
  205. result[k++] = tok.ranges[j++];
  206. result[k++] = tok.ranges[j++];
  207. } else {
  208. result[k++] = this.ranges[i++];
  209. result[k++] = this.ranges[i++];
  210. }
  211. }
  212. this.ranges = result;
  213. }
  214. protected void subtractRanges(Token token) {
  215. if (token.type == NRANGE) {
  216. this.intersectRanges(token);
  217. return;
  218. }
  219. RangeToken tok = (RangeToken)token;
  220. if (tok.ranges == null || this.ranges == null)
  221. return;
  222. this.icaseCache = null;
  223. this.sortRanges();
  224. this.compactRanges();
  225. tok.sortRanges();
  226. tok.compactRanges();
  227. //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
  228. int[] result = new int[this.ranges.length+tok.ranges.length];
  229. int wp = 0, src = 0, sub = 0;
  230. while (src < this.ranges.length && sub < tok.ranges.length) {
  231. int srcbegin = this.ranges[src];
  232. int srcend = this.ranges[src+1];
  233. int subbegin = tok.ranges[sub];
  234. int subend = tok.ranges[sub+1];
  235. if (srcend < subbegin) { // Not overlapped
  236. // src: o-----o
  237. // sub: o-----o
  238. // res: o-----o
  239. // Reuse sub
  240. result[wp++] = this.ranges[src++];
  241. result[wp++] = this.ranges[src++];
  242. } else if (srcend >= subbegin
  243. && srcbegin <= subend) { // Overlapped
  244. // src: o--------o
  245. // sub: o----o
  246. // sub: o----o
  247. // sub: o----o
  248. // sub: o------------o
  249. if (subbegin <= srcbegin && srcend <= subend) {
  250. // src: o--------o
  251. // sub: o------------o
  252. // res: empty
  253. // Reuse sub
  254. src += 2;
  255. } else if (subbegin <= srcbegin) {
  256. // src: o--------o
  257. // sub: o----o
  258. // res: o-----o
  259. // Reuse src(=res)
  260. this.ranges[src] = subend+1;
  261. sub += 2;
  262. } else if (srcend <= subend) {
  263. // src: o--------o
  264. // sub: o----o
  265. // res: o-----o
  266. // Reuse sub
  267. result[wp++] = srcbegin;
  268. result[wp++] = subbegin-1;
  269. src += 2;
  270. } else {
  271. // src: o--------o
  272. // sub: o----o
  273. // res: o-o o-o
  274. // Reuse src(=right res)
  275. result[wp++] = srcbegin;
  276. result[wp++] = subbegin-1;
  277. this.ranges[src] = subend+1;
  278. sub += 2;
  279. }
  280. } else if (subend < srcbegin) {
  281. // Not overlapped
  282. // src: o-----o
  283. // sub: o----o
  284. sub += 2;
  285. } else {
  286. throw new RuntimeException("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
  287. +","+this.ranges[src+1]
  288. +"] - ["+tok.ranges[sub]
  289. +","+tok.ranges[sub+1]
  290. +"]");
  291. }
  292. }
  293. while (src < this.ranges.length) {
  294. result[wp++] = this.ranges[src++];
  295. result[wp++] = this.ranges[src++];
  296. }
  297. this.ranges = new int[wp];
  298. System.arraycopy(result, 0, this.ranges, 0, wp);
  299. // this.ranges is sorted and compacted.
  300. }
  301. /**
  302. * @param tok Ignore whether it is NRANGE or not.
  303. */
  304. protected void intersectRanges(Token token) {
  305. RangeToken tok = (RangeToken)token;
  306. if (tok.ranges == null || this.ranges == null)
  307. return;
  308. this.icaseCache = null;
  309. this.sortRanges();
  310. this.compactRanges();
  311. tok.sortRanges();
  312. tok.compactRanges();
  313. int[] result = new int[this.ranges.length+tok.ranges.length];
  314. int wp = 0, src1 = 0, src2 = 0;
  315. while (src1 < this.ranges.length && src2 < tok.ranges.length) {
  316. int src1begin = this.ranges[src1];
  317. int src1end = this.ranges[src1+1];
  318. int src2begin = tok.ranges[src2];
  319. int src2end = tok.ranges[src2+1];
  320. if (src1end < src2begin) { // Not overlapped
  321. // src1: o-----o
  322. // src2: o-----o
  323. // res: empty
  324. // Reuse src2
  325. src1 += 2;
  326. } else if (src1end >= src2begin
  327. && src1begin <= src2end) { // Overlapped
  328. // src1: o--------o
  329. // src2: o----o
  330. // src2: o----o
  331. // src2: o----o
  332. // src2: o------------o
  333. if (src2begin <= src1begin && src1end <= src2end) {
  334. // src1: o--------o
  335. // src2: o------------o
  336. // res: o--------o
  337. // Reuse src2
  338. result[wp++] = src1begin;
  339. result[wp++] = src1end;
  340. src1 += 2;
  341. } else if (src2begin <= src1begin) {
  342. // src1: o--------o
  343. // src2: o----o
  344. // res: o--o
  345. // Reuse the rest of src1
  346. result[wp++] = src1begin;
  347. result[wp++] = src2end;
  348. this.ranges[src1] = src2end+1;
  349. src2 += 2;
  350. } else if (src1end <= src2end) {
  351. // src1: o--------o
  352. // src2: o----o
  353. // res: o--o
  354. // Reuse src2
  355. result[wp++] = src2begin;
  356. result[wp++] = src1end;
  357. src1 += 2;
  358. } else {
  359. // src1: o--------o
  360. // src2: o----o
  361. // res: o----o
  362. // Reuse the rest of src1
  363. result[wp++] = src2begin;
  364. result[wp++] = src2end;
  365. this.ranges[src1] = src2end+1;
  366. }
  367. } else if (src2end < src1begin) {
  368. // Not overlapped
  369. // src1: o-----o
  370. // src2: o----o
  371. src2 += 2;
  372. } else {
  373. throw new RuntimeException("Token#intersectRanges(): Internal Error: ["
  374. +this.ranges[src1]
  375. +","+this.ranges[src1+1]
  376. +"] & ["+tok.ranges[src2]
  377. +","+tok.ranges[src2+1]
  378. +"]");
  379. }
  380. }
  381. while (src1 < this.ranges.length) {
  382. result[wp++] = this.ranges[src1++];
  383. result[wp++] = this.ranges[src1++];
  384. }
  385. this.ranges = new int[wp];
  386. System.arraycopy(result, 0, this.ranges, 0, wp);
  387. // this.ranges is sorted and compacted.
  388. }
  389. /**
  390. * for RANGE: Creates complement.
  391. * for NRANGE: Creates the same meaning RANGE.
  392. */
  393. static Token complementRanges(Token token) {
  394. if (token.type != RANGE && token.type != NRANGE)
  395. throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type);
  396. RangeToken tok = (RangeToken)token;
  397. tok.sortRanges();
  398. tok.compactRanges();
  399. int len = tok.ranges.length+2;
  400. if (tok.ranges[0] == 0)
  401. len -= 2;
  402. int last = tok.ranges[tok.ranges.length-1];
  403. if (last == UTF16_MAX)
  404. len -= 2;
  405. RangeToken ret = Token.createRange();
  406. ret.ranges = new int[len];
  407. int wp = 0;
  408. if (tok.ranges[0] > 0) {
  409. ret.ranges[wp++] = 0;
  410. ret.ranges[wp++] = tok.ranges[0]-1;
  411. }
  412. for (int i = 1; i < tok.ranges.length-2; i += 2) {
  413. ret.ranges[wp++] = tok.ranges[i]+1;
  414. ret.ranges[wp++] = tok.ranges[i+1]-1;
  415. }
  416. if (last != UTF16_MAX) {
  417. ret.ranges[wp++] = last+1;
  418. ret.ranges[wp] = UTF16_MAX;
  419. }
  420. ret.setCompacted();
  421. return ret;
  422. }
  423. synchronized RangeToken getCaseInsensitiveToken() {
  424. if (this.icaseCache != null)
  425. return this.icaseCache;
  426. RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
  427. for (int i = 0; i < this.ranges.length; i += 2) {
  428. for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
  429. if (ch > 0xffff)
  430. uppers.addRange(ch, ch);
  431. else {
  432. char uch = Character.toUpperCase((char)ch);
  433. uppers.addRange(uch, uch);
  434. }
  435. }
  436. }
  437. RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
  438. for (int i = 0; i < uppers.ranges.length; i += 2) {
  439. for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
  440. if (ch > 0xffff)
  441. lowers.addRange(ch, ch);
  442. else {
  443. char uch = Character.toLowerCase((char)ch);
  444. lowers.addRange(uch, uch);
  445. }
  446. }
  447. }
  448. lowers.mergeRanges(uppers);
  449. lowers.mergeRanges(this);
  450. lowers.compactRanges();
  451. this.icaseCache = lowers;
  452. return lowers;
  453. }
  454. void dumpRanges() {
  455. System.err.print("RANGE: ");
  456. if (this.ranges == null) {
  457. System.err.println(" NULL");
  458. return;
  459. }
  460. for (int i = 0; i < this.ranges.length; i += 2) {
  461. System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
  462. }
  463. System.err.println("");
  464. }
  465. boolean match(int ch) {
  466. if (this.map == null) this.createMap();
  467. boolean ret;
  468. if (this.type == RANGE) {
  469. if (ch < MAPSIZE)
  470. return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
  471. ret = false;
  472. for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
  473. if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
  474. return true;
  475. }
  476. } else {
  477. if (ch < MAPSIZE)
  478. return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
  479. ret = true;
  480. for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
  481. if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
  482. return false;
  483. }
  484. }
  485. return ret;
  486. }
  487. private static final int MAPSIZE = 256;
  488. private void createMap() {
  489. int asize = MAPSIZE/32; // 32 is the number of bits in `int'.
  490. int [] map = new int[asize];
  491. int nonMapIndex = this.ranges.length;
  492. for (int i = 0; i < asize; ++i) {
  493. map[i] = 0;
  494. }
  495. for (int i = 0; i < this.ranges.length; i += 2) {
  496. int s = this.ranges[i];
  497. int e = this.ranges[i+1];
  498. if (s < MAPSIZE) {
  499. for (int j = s; j <= e && j < MAPSIZE; j++) {
  500. map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
  501. }
  502. }
  503. else {
  504. nonMapIndex = i;
  505. break;
  506. }
  507. if (e >= MAPSIZE) {
  508. nonMapIndex = i;
  509. break;
  510. }
  511. }
  512. this.map = map;
  513. this.nonMapIndex = nonMapIndex;
  514. //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
  515. }
  516. public String toString(int options) {
  517. String ret;
  518. if (this.type == RANGE) {
  519. if (this == Token.token_dot)
  520. ret = ".";
  521. else if (this == Token.token_0to9)
  522. ret = "\\d";
  523. else if (this == Token.token_wordchars)
  524. ret = "\\w";
  525. else if (this == Token.token_spaces)
  526. ret = "\\s";
  527. else {
  528. StringBuffer sb = new StringBuffer();
  529. sb.append("[");
  530. for (int i = 0; i < this.ranges.length; i += 2) {
  531. if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
  532. if (this.ranges[i] == this.ranges[i+1]) {
  533. sb.append(escapeCharInCharClass(this.ranges[i]));
  534. } else {
  535. sb.append(escapeCharInCharClass(this.ranges[i]));
  536. sb.append((char)'-');
  537. sb.append(escapeCharInCharClass(this.ranges[i+1]));
  538. }
  539. }
  540. sb.append("]");
  541. ret = sb.toString();
  542. }
  543. } else {
  544. if (this == Token.token_not_0to9)
  545. ret = "\\D";
  546. else if (this == Token.token_not_wordchars)
  547. ret = "\\W";
  548. else if (this == Token.token_not_spaces)
  549. ret = "\\S";
  550. else {
  551. StringBuffer sb = new StringBuffer();
  552. sb.append("[^");
  553. for (int i = 0; i < this.ranges.length; i += 2) {
  554. if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
  555. if (this.ranges[i] == this.ranges[i+1]) {
  556. sb.append(escapeCharInCharClass(this.ranges[i]));
  557. } else {
  558. sb.append(escapeCharInCharClass(this.ranges[i]));
  559. sb.append('-');
  560. sb.append(escapeCharInCharClass(this.ranges[i+1]));
  561. }
  562. }
  563. sb.append("]");
  564. ret = sb.toString();
  565. }
  566. }
  567. return ret;
  568. }
  569. private static String escapeCharInCharClass(int ch) {
  570. String ret;
  571. switch (ch) {
  572. case '[': case ']': case '-': case '^':
  573. case ',': case '\\':
  574. ret = "\\"+(char)ch;
  575. break;
  576. case '\f': ret = "\\f"; break;
  577. case '\n': ret = "\\n"; break;
  578. case '\r': ret = "\\r"; break;
  579. case '\t': ret = "\\t"; break;
  580. case 0x1b: ret = "\\e"; break;
  581. //case 0x0b: ret = "\\v"; break;
  582. default:
  583. if (ch < 0x20) {
  584. String pre = "0"+Integer.toHexString(ch);
  585. ret = "\\x"+pre.substring(pre.length()-2, pre.length());
  586. } else if (ch >= 0x10000) {
  587. String pre = "0"+Integer.toHexString(ch);
  588. ret = "\\v"+pre.substring(pre.length()-6, pre.length());
  589. } else
  590. ret = ""+(char)ch;
  591. }
  592. return ret;
  593. }
  594. }