PageRenderTime 71ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/Transforms/Obfuscation/antlr/src/misc/IntervalSet.cpp

https://bitbucket.org/HanyunseopEverspin/eversafe_ios_obfuscator_archive
C++ | 521 lines | 395 code | 84 blank | 42 comment | 111 complexity | 3b4e84b5646fca5c605c3db52daa8d42 MD5 | raw file
Possible License(s): AGPL-3.0
  1. /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  2. * Use of this file is governed by the BSD 3-clause license that
  3. * can be found in the LICENSE.txt file in the project root.
  4. */
  5. #include "misc/MurmurHash.h"
  6. #include "Lexer.h"
  7. #include "Exceptions.h"
  8. #include "Vocabulary.h"
  9. #include "misc/IntervalSet.h"
  10. using namespace antlr4;
  11. using namespace antlr4::misc;
  12. IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
  13. IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
  14. IntervalSet const IntervalSet::EMPTY_SET;
  15. IntervalSet::IntervalSet() : _intervals() {
  16. }
  17. IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
  18. _intervals = set._intervals;
  19. }
  20. IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) {
  21. }
  22. IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
  23. }
  24. IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
  25. _intervals = other._intervals;
  26. return *this;
  27. }
  28. IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
  29. _intervals = move(other._intervals);
  30. return *this;
  31. }
  32. IntervalSet IntervalSet::of(ssize_t a) {
  33. return IntervalSet({ Interval(a, a) });
  34. }
  35. IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
  36. return IntervalSet({ Interval(a, b) });
  37. }
  38. void IntervalSet::clear() {
  39. _intervals.clear();
  40. }
  41. void IntervalSet::add(ssize_t el) {
  42. add(el, el);
  43. }
  44. void IntervalSet::add(ssize_t a, ssize_t b) {
  45. add(Interval(a, b));
  46. }
  47. void IntervalSet::add(const Interval &addition) {
  48. if (addition.b < addition.a) {
  49. return;
  50. }
  51. // find position in list
  52. for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) {
  53. Interval r = *iterator;
  54. if (addition == r) {
  55. return;
  56. }
  57. if (addition.adjacent(r) || !addition.disjoint(r)) {
  58. // next to each other, make a single larger interval
  59. Interval bigger = addition.Union(r);
  60. *iterator = bigger;
  61. // make sure we didn't just create an interval that
  62. // should be merged with next interval in list
  63. while (iterator + 1 != _intervals.end()) {
  64. Interval next = *++iterator;
  65. if (!bigger.adjacent(next) && bigger.disjoint(next)) {
  66. break;
  67. }
  68. // if we bump up against or overlap next, merge
  69. iterator = _intervals.erase(iterator);// remove this one
  70. --iterator; // move backwards to what we just set
  71. *iterator = bigger.Union(next); // set to 3 merged ones
  72. // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result
  73. }
  74. return;
  75. }
  76. if (addition.startsBeforeDisjoint(r)) {
  77. // insert before r
  78. //--iterator;
  79. _intervals.insert(iterator, addition);
  80. return;
  81. }
  82. // if disjoint and after r, a future iteration will handle it
  83. }
  84. // ok, must be after last interval (and disjoint from last interval)
  85. // just add it
  86. _intervals.push_back(addition);
  87. }
  88. IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
  89. IntervalSet result;
  90. for (auto &s : sets) {
  91. result.addAll(s);
  92. }
  93. return result;
  94. }
  95. IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
  96. // walk set and add each interval
  97. for (auto const& interval : set._intervals) {
  98. add(interval);
  99. }
  100. return *this;
  101. }
  102. IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const {
  103. return complement(IntervalSet::of(minElement, maxElement));
  104. }
  105. IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const {
  106. return vocabulary.subtract(*this);
  107. }
  108. IntervalSet IntervalSet::subtract(const IntervalSet &other) const {
  109. return subtract(*this, other);
  110. }
  111. IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) {
  112. if (left.isEmpty()) {
  113. return IntervalSet();
  114. }
  115. if (right.isEmpty()) {
  116. // right set has no elements; just return the copy of the current set
  117. return left;
  118. }
  119. IntervalSet result(left);
  120. size_t resultI = 0;
  121. size_t rightI = 0;
  122. while (resultI < result._intervals.size() && rightI < right._intervals.size()) {
  123. Interval &resultInterval = result._intervals[resultI];
  124. const Interval &rightInterval = right._intervals[rightI];
  125. // operation: (resultInterval - rightInterval) and update indexes
  126. if (rightInterval.b < resultInterval.a) {
  127. rightI++;
  128. continue;
  129. }
  130. if (rightInterval.a > resultInterval.b) {
  131. resultI++;
  132. continue;
  133. }
  134. Interval beforeCurrent;
  135. Interval afterCurrent;
  136. if (rightInterval.a > resultInterval.a) {
  137. beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1);
  138. }
  139. if (rightInterval.b < resultInterval.b) {
  140. afterCurrent = Interval(rightInterval.b + 1, resultInterval.b);
  141. }
  142. if (beforeCurrent.a > -1) { // -1 is the default value
  143. if (afterCurrent.a > -1) {
  144. // split the current interval into two
  145. result._intervals[resultI] = beforeCurrent;
  146. result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent);
  147. resultI++;
  148. rightI++;
  149. } else {
  150. // replace the current interval
  151. result._intervals[resultI] = beforeCurrent;
  152. resultI++;
  153. }
  154. } else {
  155. if (afterCurrent.a > -1) {
  156. // replace the current interval
  157. result._intervals[resultI] = afterCurrent;
  158. rightI++;
  159. } else {
  160. // remove the current interval (thus no need to increment resultI)
  161. result._intervals.erase(result._intervals.begin() + resultI);
  162. }
  163. }
  164. }
  165. // If rightI reached right.intervals.size(), no more intervals to subtract from result.
  166. // If resultI reached result.intervals.size(), we would be subtracting from an empty set.
  167. // Either way, we are done.
  168. return result;
  169. }
  170. IntervalSet IntervalSet::Or(const IntervalSet &a) const {
  171. IntervalSet result;
  172. result.addAll(*this);
  173. result.addAll(a);
  174. return result;
  175. }
  176. IntervalSet IntervalSet::And(const IntervalSet &other) const {
  177. IntervalSet intersection;
  178. size_t i = 0;
  179. size_t j = 0;
  180. // iterate down both interval lists looking for nondisjoint intervals
  181. while (i < _intervals.size() && j < other._intervals.size()) {
  182. Interval mine = _intervals[i];
  183. Interval theirs = other._intervals[j];
  184. if (mine.startsBeforeDisjoint(theirs)) {
  185. // move this iterator looking for interval that might overlap
  186. i++;
  187. } else if (theirs.startsBeforeDisjoint(mine)) {
  188. // move other iterator looking for interval that might overlap
  189. j++;
  190. } else if (mine.properlyContains(theirs)) {
  191. // overlap, add intersection, get next theirs
  192. intersection.add(mine.intersection(theirs));
  193. j++;
  194. } else if (theirs.properlyContains(mine)) {
  195. // overlap, add intersection, get next mine
  196. intersection.add(mine.intersection(theirs));
  197. i++;
  198. } else if (!mine.disjoint(theirs)) {
  199. // overlap, add intersection
  200. intersection.add(mine.intersection(theirs));
  201. // Move the iterator of lower range [a..b], but not
  202. // the upper range as it may contain elements that will collide
  203. // with the next iterator. So, if mine=[0..115] and
  204. // theirs=[115..200], then intersection is 115 and move mine
  205. // but not theirs as theirs may collide with the next range
  206. // in thisIter.
  207. // move both iterators to next ranges
  208. if (mine.startsAfterNonDisjoint(theirs)) {
  209. j++;
  210. } else if (theirs.startsAfterNonDisjoint(mine)) {
  211. i++;
  212. }
  213. }
  214. }
  215. return intersection;
  216. }
  217. bool IntervalSet::contains(size_t el) const {
  218. return contains(symbolToNumeric(el));
  219. }
  220. bool IntervalSet::contains(ssize_t el) const {
  221. if (_intervals.empty())
  222. return false;
  223. if (el < _intervals[0].a) // list is sorted and el is before first interval; not here
  224. return false;
  225. for (auto &interval : _intervals) {
  226. if (el >= interval.a && el <= interval.b) {
  227. return true; // found in this interval
  228. }
  229. }
  230. return false;
  231. }
  232. bool IntervalSet::isEmpty() const {
  233. return _intervals.empty();
  234. }
  235. ssize_t IntervalSet::getSingleElement() const {
  236. if (_intervals.size() == 1) {
  237. if (_intervals[0].a == _intervals[0].b) {
  238. return _intervals[0].a;
  239. }
  240. }
  241. return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work?
  242. }
  243. ssize_t IntervalSet::getMaxElement() const {
  244. if (_intervals.empty()) {
  245. return Token::INVALID_TYPE;
  246. }
  247. return _intervals.back().b;
  248. }
  249. ssize_t IntervalSet::getMinElement() const {
  250. if (_intervals.empty()) {
  251. return Token::INVALID_TYPE;
  252. }
  253. return _intervals[0].a;
  254. }
  255. std::vector<Interval> const& IntervalSet::getIntervals() const {
  256. return _intervals;
  257. }
  258. size_t IntervalSet::hashCode() const {
  259. size_t hash = MurmurHash::initialize();
  260. for (auto &interval : _intervals) {
  261. hash = MurmurHash::update(hash, interval.a);
  262. hash = MurmurHash::update(hash, interval.b);
  263. }
  264. return MurmurHash::finish(hash, _intervals.size() * 2);
  265. }
  266. bool IntervalSet::operator == (const IntervalSet &other) const {
  267. if (_intervals.empty() && other._intervals.empty())
  268. return true;
  269. if (_intervals.size() != other._intervals.size())
  270. return false;
  271. return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin());
  272. }
  273. std::string IntervalSet::toString() const {
  274. return toString(false);
  275. }
  276. std::string IntervalSet::toString(bool elemAreChar) const {
  277. if (_intervals.empty()) {
  278. return "{}";
  279. }
  280. std::stringstream ss;
  281. size_t effectiveSize = size();
  282. if (effectiveSize > 1) {
  283. ss << "{";
  284. }
  285. bool firstEntry = true;
  286. for (auto &interval : _intervals) {
  287. if (!firstEntry)
  288. ss << ", ";
  289. firstEntry = false;
  290. ssize_t a = interval.a;
  291. ssize_t b = interval.b;
  292. if (a == b) {
  293. if (a == -1) {
  294. ss << "<EOF>";
  295. } else if (elemAreChar) {
  296. ss << "'" << static_cast<char>(a) << "'";
  297. } else {
  298. ss << a;
  299. }
  300. } else {
  301. if (elemAreChar) {
  302. ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'";
  303. } else {
  304. ss << a << ".." << b;
  305. }
  306. }
  307. }
  308. if (effectiveSize > 1) {
  309. ss << "}";
  310. }
  311. return ss.str();
  312. }
  313. std::string IntervalSet::toString(const std::vector<std::string> &tokenNames) const {
  314. return toString(dfa::Vocabulary::fromTokenNames(tokenNames));
  315. }
  316. std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const {
  317. if (_intervals.empty()) {
  318. return "{}";
  319. }
  320. std::stringstream ss;
  321. size_t effectiveSize = size();
  322. if (effectiveSize > 1) {
  323. ss << "{";
  324. }
  325. bool firstEntry = true;
  326. for (auto &interval : _intervals) {
  327. if (!firstEntry)
  328. ss << ", ";
  329. firstEntry = false;
  330. ssize_t a = interval.a;
  331. ssize_t b = interval.b;
  332. if (a == b) {
  333. ss << elementName(vocabulary, a);
  334. } else {
  335. for (ssize_t i = a; i <= b; i++) {
  336. if (i > a) {
  337. ss << ", ";
  338. }
  339. ss << elementName(vocabulary, i);
  340. }
  341. }
  342. }
  343. if (effectiveSize > 1) {
  344. ss << "}";
  345. }
  346. return ss.str();
  347. }
  348. std::string IntervalSet::elementName(const std::vector<std::string> &tokenNames, ssize_t a) const {
  349. return elementName(dfa::Vocabulary::fromTokenNames(tokenNames), a);
  350. }
  351. std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const {
  352. if (a == -1) {
  353. return "<EOF>";
  354. } else if (a == -2) {
  355. return "<EPSILON>";
  356. } else {
  357. return vocabulary.getDisplayName(a);
  358. }
  359. }
  360. size_t IntervalSet::size() const {
  361. size_t result = 0;
  362. for (auto &interval : _intervals) {
  363. result += size_t(interval.b - interval.a + 1);
  364. }
  365. return result;
  366. }
  367. std::vector<ssize_t> IntervalSet::toList() const {
  368. std::vector<ssize_t> result;
  369. for (auto &interval : _intervals) {
  370. ssize_t a = interval.a;
  371. ssize_t b = interval.b;
  372. for (ssize_t v = a; v <= b; v++) {
  373. result.push_back(v);
  374. }
  375. }
  376. return result;
  377. }
  378. std::set<ssize_t> IntervalSet::toSet() const {
  379. std::set<ssize_t> result;
  380. for (auto &interval : _intervals) {
  381. ssize_t a = interval.a;
  382. ssize_t b = interval.b;
  383. for (ssize_t v = a; v <= b; v++) {
  384. result.insert(v);
  385. }
  386. }
  387. return result;
  388. }
  389. ssize_t IntervalSet::get(size_t i) const {
  390. size_t index = 0;
  391. for (auto &interval : _intervals) {
  392. ssize_t a = interval.a;
  393. ssize_t b = interval.b;
  394. for (ssize_t v = a; v <= b; v++) {
  395. if (index == i) {
  396. return v;
  397. }
  398. index++;
  399. }
  400. }
  401. return -1;
  402. }
  403. void IntervalSet::remove(size_t el) {
  404. remove(symbolToNumeric(el));
  405. }
  406. void IntervalSet::remove(ssize_t el) {
  407. for (size_t i = 0; i < _intervals.size(); ++i) {
  408. Interval &interval = _intervals[i];
  409. ssize_t a = interval.a;
  410. ssize_t b = interval.b;
  411. if (el < a) {
  412. break; // list is sorted and el is before this interval; not here
  413. }
  414. // if whole interval x..x, rm
  415. if (el == a && el == b) {
  416. _intervals.erase(_intervals.begin() + (long)i);
  417. break;
  418. }
  419. // if on left edge x..b, adjust left
  420. if (el == a) {
  421. interval.a++;
  422. break;
  423. }
  424. // if on right edge a..x, adjust right
  425. if (el == b) {
  426. interval.b--;
  427. break;
  428. }
  429. // if in middle a..x..b, split interval
  430. if (el > a && el < b) { // found in this interval
  431. ssize_t oldb = interval.b;
  432. interval.b = el - 1; // [a..x-1]
  433. add(el + 1, oldb); // add [x+1..b]
  434. break; // ml: not in the Java code but I believe we also should stop searching here, as we found x.
  435. }
  436. }
  437. }