PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/petitparser-core/src/main/java/org/petitparser/parser/Parser.java

http://github.com/renggli/PetitParserJava
Java | 526 lines | 216 code | 53 blank | 257 comment | 9 complexity | ea56bb742c543e812e73280c73eec65d MD5 | raw file
Possible License(s): MIT
  1. package org.petitparser.parser;
  2. import static org.petitparser.parser.primitive.CharacterParser.any;
  3. import org.petitparser.context.Context;
  4. import org.petitparser.context.Result;
  5. import org.petitparser.context.Token;
  6. import org.petitparser.parser.actions.ActionParser;
  7. import org.petitparser.parser.actions.ContinuationParser;
  8. import org.petitparser.parser.actions.FlattenParser;
  9. import org.petitparser.parser.actions.TokenParser;
  10. import org.petitparser.parser.actions.TrimmingParser;
  11. import org.petitparser.parser.combinators.AndParser;
  12. import org.petitparser.parser.combinators.ChoiceParser;
  13. import org.petitparser.parser.combinators.EndOfInputParser;
  14. import org.petitparser.parser.combinators.NotParser;
  15. import org.petitparser.parser.combinators.OptionalParser;
  16. import org.petitparser.parser.combinators.SequenceParser;
  17. import org.petitparser.parser.combinators.SettableParser;
  18. import org.petitparser.parser.primitive.CharacterParser;
  19. import org.petitparser.parser.repeating.GreedyRepeatingParser;
  20. import org.petitparser.parser.repeating.LazyRepeatingParser;
  21. import org.petitparser.parser.repeating.PossessiveRepeatingParser;
  22. import org.petitparser.parser.repeating.RepeatingParser;
  23. import org.petitparser.utils.Functions;
  24. import java.util.ArrayList;
  25. import java.util.Collections;
  26. import java.util.HashSet;
  27. import java.util.List;
  28. import java.util.Objects;
  29. import java.util.Set;
  30. import java.util.function.Function;
  31. /**
  32. * An abstract parser that forms the root of all parsers in this package.
  33. */
  34. public abstract class Parser {
  35. /**
  36. * Primitive method doing the actual parsing.
  37. *
  38. * <p>The method is overridden in concrete subclasses to implement the parser
  39. * specific logic. The methods takes a parse {@code context} and returns the
  40. * resulting context, which is either a
  41. * {@link org.petitparser.context.Success}
  42. * or {@link org.petitparser.context.Failure} context.
  43. */
  44. public abstract Result parseOn(Context context);
  45. /**
  46. * Primitive method doing the actual parsing.
  47. *
  48. * <p>This method is an optimized version of {@link #parseOn(Context)} that
  49. * is getting its speed advantage by avoiding any unnecessary memory
  50. * allocations.
  51. *
  52. * <p>The method is overridden in most concrete subclasses to implement the
  53. * optimized logic. As an input the method takes a {@code buffer} and the
  54. * current {@code position} in that buffer. It returns a new (positive)
  55. * position in case of a successful parse, or `-1` in case of a failure.
  56. *
  57. * <p>Subclasses don't necessarily have to override this method, since it is
  58. * emulated using its slower brother.
  59. */
  60. public int fastParseOn(String buffer, int position) {
  61. Result result = parseOn(new Context(buffer, position));
  62. return result.isSuccess() ? result.getPosition() : -1;
  63. }
  64. /**
  65. * Returns the parse result of the {@code input}.
  66. */
  67. public Result parse(String input) {
  68. return parseOn(new Context(input, 0));
  69. }
  70. /**
  71. * Tests if the {@code input} can be successfully parsed.
  72. */
  73. public boolean accept(String input) {
  74. return fastParseOn(input, 0) >= 0;
  75. }
  76. /**
  77. * Returns a list of all successful overlapping parses of the {@code input}.
  78. */
  79. @SuppressWarnings("unchecked")
  80. public <T> List<T> matches(String input) {
  81. List<Object> list = new ArrayList<>();
  82. and().mapWithSideEffects(list::add).seq(any()).or(any()).star()
  83. .fastParseOn(input, 0);
  84. return (List<T>) list;
  85. }
  86. /**
  87. * Returns a list of all successful non-overlapping parses of the {@code
  88. * input}.
  89. */
  90. @SuppressWarnings("unchecked")
  91. public <T> List<T> matchesSkipping(String input) {
  92. List<Object> list = new ArrayList<>();
  93. mapWithSideEffects(list::add).or(any()).star().fastParseOn(input, 0);
  94. return (List<T>) list;
  95. }
  96. /**
  97. * Returns new parser that accepts the receiver, if possible. The resulting
  98. * parser returns the result of the receiver, or {@code null} if not
  99. * applicable.
  100. */
  101. public Parser optional() {
  102. return optional(null);
  103. }
  104. /**
  105. * Returns new parser that accepts the receiver, if possible. The returned
  106. * value can be provided as {@code otherwise}.
  107. */
  108. public Parser optional(Object otherwise) {
  109. return new OptionalParser(this, otherwise);
  110. }
  111. /**
  112. * Returns a parser that accepts the receiver zero or more times. The
  113. * resulting parser returns a list of the parse results of the receiver.
  114. *
  115. * <p>This is a greedy and blind implementation that tries to consume as much
  116. * input as possible and that does not consider what comes afterwards.
  117. */
  118. public Parser star() {
  119. return repeat(0, RepeatingParser.UNBOUNDED);
  120. }
  121. /**
  122. * Returns a parser that parses the receiver zero or more times until it
  123. * reaches a {@code limit}.
  124. *
  125. * <p>This is a greedy non-blind implementation of the {@link Parser#star()}
  126. * operator. The {@code limit} is not consumed.
  127. */
  128. public Parser starGreedy(Parser limit) {
  129. return repeatGreedy(limit, 0, RepeatingParser.UNBOUNDED);
  130. }
  131. /**
  132. * Returns a parser that parses the receiver zero or more times until it
  133. * reaches a {@code limit}.
  134. *
  135. * <p>This is a lazy non-blind implementation of the {@link Parser#star()}
  136. * operator. The {@code limit} is not consumed.
  137. */
  138. public Parser starLazy(Parser limit) {
  139. return repeatLazy(limit, 0, RepeatingParser.UNBOUNDED);
  140. }
  141. /**
  142. * Returns a parser that accepts the receiver one or more times. The resulting
  143. * parser returns a list of the parse results of the receiver.
  144. *
  145. * <p>This is a greedy and blind implementation that tries to consume as much
  146. * input as possible and that does not consider what comes afterwards.
  147. */
  148. public Parser plus() {
  149. return repeat(1, RepeatingParser.UNBOUNDED);
  150. }
  151. /**
  152. * Returns a parser that parses the receiver one or more times until it
  153. * reaches {@code limit}.
  154. *
  155. * <p>This is a greedy non-blind implementation of the {@link Parser#plus()}
  156. * operator. The {@code limit} is not consumed.
  157. */
  158. public Parser plusGreedy(Parser limit) {
  159. return repeatGreedy(limit, 1, RepeatingParser.UNBOUNDED);
  160. }
  161. /**
  162. * Returns a parser that parses the receiver one or more times until it
  163. * reaches a {@code limit}.
  164. *
  165. * <p>This is a lazy non-blind implementation of the {@link Parser#plus()}
  166. * operator. The {@code limit} is not consumed.
  167. */
  168. public Parser plusLazy(Parser limit) {
  169. return repeatLazy(limit, 1, RepeatingParser.UNBOUNDED);
  170. }
  171. /**
  172. * Returns a parser that accepts the receiver between {@code min} and {@code
  173. * max} times. The resulting parser returns a list of the parse results of the
  174. * receiver.
  175. *
  176. * <p>This is a greedy and blind implementation that tries to consume as much
  177. * input as possible and that does not consider what comes afterwards.
  178. */
  179. public Parser repeat(int min, int max) {
  180. return new PossessiveRepeatingParser(this, min, max);
  181. }
  182. /**
  183. * Returns a parser that parses the receiver at least {@code min} and at most
  184. * {@code max} times until it reaches a {@code limit}.
  185. *
  186. * <p>This is a greedy non-blind implementation of the {@link
  187. * Parser#repeat(int, int)} operator. The {@code limit} is not consumed.
  188. */
  189. public Parser repeatGreedy(Parser limit, int min, int max) {
  190. return new GreedyRepeatingParser(this, limit, min, max);
  191. }
  192. /**
  193. * Returns a parser that parses the receiver at least {@code min} and at most
  194. * {@code max} times until it reaches a {@code limit}.
  195. *
  196. * <p>This is a lazy non-blind implementation of the {@link
  197. * Parser#repeat(int, int)} operator. The {@code limit} is not consumed.
  198. */
  199. public Parser repeatLazy(Parser limit, int min, int max) {
  200. return new LazyRepeatingParser(this, limit, min, max);
  201. }
  202. /**
  203. * Returns a parser that accepts the receiver exactly {@code count} times. The
  204. * resulting parser eturns a list of the parse results of the receiver.
  205. */
  206. public Parser times(int count) {
  207. return repeat(count, count);
  208. }
  209. /**
  210. * Returns a parser that accepts the receiver followed by {@code others}. The
  211. * resulting parser returns a list of the parse result of the receiver
  212. * followed by the parse result of {@code others}.
  213. *
  214. * <p>Calling this method on an existing sequence code not nest this sequence
  215. * into a new one, but instead augments the existing sequence with {@code
  216. * others}.
  217. */
  218. public SequenceParser seq(Parser... others) {
  219. Parser[] parsers = new Parser[1 + others.length];
  220. parsers[0] = this;
  221. System.arraycopy(others, 0, parsers, 1, others.length);
  222. return new SequenceParser(parsers);
  223. }
  224. /**
  225. * Returns a parser that accepts the receiver or {@code other}. The resulting
  226. * parser returns the parse result of the receiver, if the receiver fails it
  227. * returns the parse result of {@code other} (exclusive ordered choice).
  228. */
  229. public ChoiceParser or(Parser... others) {
  230. Parser[] parsers = new Parser[1 + others.length];
  231. parsers[0] = this;
  232. System.arraycopy(others, 0, parsers, 1, others.length);
  233. return new ChoiceParser(parsers);
  234. }
  235. /**
  236. * Returns a parser (logical and-predicate) that succeeds whenever the
  237. * receiver does, but never consumes input.
  238. */
  239. public Parser and() {
  240. return new AndParser(this);
  241. }
  242. /**
  243. * Returns a parser that is called with its current continuation.
  244. */
  245. public Parser callCC(ContinuationParser.ContinuationHandler handler) {
  246. return new ContinuationParser(this, handler);
  247. }
  248. /**
  249. * Returns a parser (logical not-predicate) that succeeds whenever the
  250. * receiver fails, but never consumes input.
  251. */
  252. public Parser not() {
  253. return not("unexpected");
  254. }
  255. /**
  256. * Returns a parser (logical not-predicate) that succeeds whenever the
  257. * receiver fails, but never consumes input.
  258. */
  259. public Parser not(String message) {
  260. return new NotParser(this, message);
  261. }
  262. /**
  263. * Returns a parser that consumes any input token (character), but the
  264. * receiver.
  265. */
  266. public Parser neg() {
  267. return neg(this + " not expected");
  268. }
  269. /**
  270. * Returns a parser that consumes any input token (character), but the
  271. * receiver.
  272. */
  273. public Parser neg(String message) {
  274. return not(message).seq(CharacterParser.any()).pick(1);
  275. }
  276. /**
  277. * Returns a parser that discards the result of the receiver, and instead
  278. * returns a sub-string of the consumed range in the buffer being parsed.
  279. */
  280. public Parser flatten() {
  281. return new FlattenParser(this);
  282. }
  283. /**
  284. * Returns a parser that discards the result of the receiver, and instead
  285. * returns a sub-string of the consumed range in the buffer being parsed.
  286. * Reports the provided {@code message} in case of an error.
  287. */
  288. public Parser flatten(String message) {
  289. return new FlattenParser(this, message);
  290. }
  291. /**
  292. * Returns a parser that returns a {@link Token}. The token carries the parsed
  293. * value of the receiver {@link Token#getValue()}, as well as the consumed
  294. * input {@link Token#getInput()} from {@link Token#getStart()} to {@link
  295. * Token#getStop()} of the input being parsed.
  296. */
  297. public Parser token() {
  298. return new TokenParser(this);
  299. }
  300. /**
  301. * Returns a parser that consumes whitespace before and after the receiver.
  302. */
  303. public Parser trim() {
  304. return trim(CharacterParser.whitespace());
  305. }
  306. /**
  307. * Returns a parser that consumes input on {@code both} sides of the
  308. * receiver.
  309. */
  310. public Parser trim(Parser both) {
  311. return trim(both, both);
  312. }
  313. /**
  314. * Returns a parser that consumes input {@code before} and {@code after} the
  315. * receiver.
  316. */
  317. public Parser trim(Parser before, Parser after) {
  318. return new TrimmingParser(this, before, after);
  319. }
  320. /**
  321. * Returns a parser that succeeds only if the receiver consumes the complete
  322. * input.
  323. */
  324. public Parser end() {
  325. return end("end of input expected");
  326. }
  327. /**
  328. * Returns a parser that succeeds only if the receiver consumes the complete
  329. * input, otherwise return a failure with the {@code message}.
  330. */
  331. public Parser end(String message) {
  332. return new SequenceParser(this, new EndOfInputParser(message)).pick(0);
  333. }
  334. /**
  335. * Returns a parser that points to the receiver, but can be changed to point
  336. * to something else at a later point in time.
  337. */
  338. public SettableParser settable() {
  339. return SettableParser.with(this);
  340. }
  341. /**
  342. * Returns a parser that evaluates a {@code function} as the production action
  343. * on success of the receiver.
  344. *
  345. * @param function production action without side-effects.
  346. */
  347. public <A, B> Parser map(Function<A, B> function) {
  348. return new ActionParser<>(this, function);
  349. }
  350. /**
  351. * Returns a parser that evaluates a {@code function} as the production action
  352. * on success of the receiver.
  353. *
  354. * @param function production action with possible side-effects.
  355. */
  356. public <A, B> Parser mapWithSideEffects(Function<A, B> function) {
  357. return new ActionParser<>(this, function, true);
  358. }
  359. /**
  360. * Returns a parser that transform a successful parse result by returning the
  361. * element at {@code index} of a list. A negative index can be used to access
  362. * the elements from the back of the list.
  363. */
  364. public Parser pick(int index) {
  365. return map(Functions.nthOfList(index));
  366. }
  367. /**
  368. * Returns a parser that transforms a successful parse result by returning the
  369. * permuted elements at {@code indexes} of a list. Negative indexes can be
  370. * used to access the elements from the back of the list.
  371. */
  372. public Parser permute(int... indexes) {
  373. return this.map(Functions.permutationOfList(indexes));
  374. }
  375. /**
  376. * Returns a new parser that parses the receiver one or more times, separated
  377. * by a {@code separator}.
  378. */
  379. public Parser separatedBy(Parser separator) {
  380. return new SequenceParser(this, new SequenceParser(separator, this).star())
  381. .map((List<List<List<Object>>> input) -> {
  382. List<Object> result = new ArrayList<>();
  383. result.add(input.get(0));
  384. input.get(1).forEach(result::addAll);
  385. return result;
  386. });
  387. }
  388. /**
  389. * Returns a new parser that parses the receiver one or more times, separated
  390. * and possibly ended by a {@code separator}."
  391. */
  392. public Parser delimitedBy(Parser separator) {
  393. return separatedBy(separator).seq(separator.optional())
  394. .map((List<List<Object>> input) -> {
  395. List<Object> result = new ArrayList<>(input.get(0));
  396. if (input.get(1) != null) {
  397. result.add(input.get(1));
  398. }
  399. return result;
  400. });
  401. }
  402. /**
  403. * Returns a shallow copy of the receiver.
  404. */
  405. public abstract Parser copy();
  406. /**
  407. * Recursively tests for structural similarity of two parsers.
  408. *
  409. * <p>The code can automatically deals with recursive parsers and parsers
  410. * that refer to other parsers. This code is supposed to be overridden by
  411. * parsers that add other state.
  412. */
  413. public boolean isEqualTo(Parser other) {
  414. return isEqualTo(other, new HashSet<>());
  415. }
  416. /**
  417. * Recursively tests for structural similarity of two parsers.
  418. */
  419. protected boolean isEqualTo(Parser other, Set<Parser> seen) {
  420. if (this.equals(other) || seen.contains(this)) {
  421. return true;
  422. }
  423. seen.add(this);
  424. return Objects.equals(getClass(), other.getClass()) &&
  425. hasEqualProperties(other) && hasEqualChildren(other, seen);
  426. }
  427. /**
  428. * Compares the properties of two parsers.
  429. *
  430. * <p>Override this method in all subclasses that add new state.
  431. */
  432. protected boolean hasEqualProperties(Parser other) {
  433. return true;
  434. }
  435. /**
  436. * Compares the children of two parsers.
  437. *
  438. * <p>Normally subclasses should not override this method, but instead {@link
  439. * #getChildren()}.
  440. */
  441. protected boolean hasEqualChildren(Parser other, Set<Parser> seen) {
  442. List<Parser> thisChildren = this.getChildren();
  443. List<Parser> otherChildren = other.getChildren();
  444. if (thisChildren.size() != otherChildren.size()) {
  445. return false;
  446. }
  447. for (int i = 0; i < thisChildren.size(); i++) {
  448. if (!thisChildren.get(i).isEqualTo(otherChildren.get(i), seen)) {
  449. return false;
  450. }
  451. }
  452. return true;
  453. }
  454. /**
  455. * Returns a list of directly referring parsers.
  456. */
  457. public List<Parser> getChildren() {
  458. return Collections.emptyList();
  459. }
  460. /**
  461. * Replaces the referring parser {@code source} with {@code target}. Does
  462. * nothing if the parser does not exist.
  463. */
  464. public void replace(Parser source, Parser target) {
  465. // no referring parsers
  466. }
  467. /**
  468. * Returns a human readable string identifying this parser.
  469. */
  470. public String toString() {
  471. return getClass().getSimpleName();
  472. }
  473. }