PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/antlr-3.0/runtime/C/include/antlr3input.h

http://perseph.googlecode.com/
C Header | 223 lines | 49 code | 41 blank | 133 comment | 0 complexity | 24a7512155f6745de86f101704ebcc85 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0
  1. /** \file
  2. * Defines the basic structures used to manipulate character
  3. * streams from any input source. The first implementation of
  4. * this stream was ASCII 8 bit, but any character size and encoding
  5. * can in theory be used, so long as they can return a 32 bit Integer
  6. * representation of their characters amd efficiently mark and revert
  7. * to specific offsets into their input streams.
  8. */
  9. #ifndef _ANTLR3_INPUT_H
  10. #define _ANTLR3_INPUT_H
  11. #include <antlr3defs.h>
  12. #include <antlr3string.h>
  13. #include <antlr3commontoken.h>
  14. #include <antlr3intstream.h>
  15. /** \brief Master context structure for an ANTLR3
  16. * C runtime based input stream.
  17. */
  18. typedef struct ANTLR3_INPUT_STREAM_struct
  19. {
  20. /** Interfaces that provide streams must all provide
  21. * a generic ANTLR3_INT_STREAM interface and an ANTLR3_INPUT_STREAM
  22. * is no different.
  23. */
  24. pANTLR3_INT_STREAM istream;
  25. /** Whatever super structure is providing the INPUT stream needs a pointer to itself
  26. * so that this can be passed back to it whenever the api functions
  27. * are called back from this interface.
  28. */
  29. void * super;
  30. /** Pointer the start of the input string, characters may be
  31. * taken as offsets from here and in original input format encoding.
  32. */
  33. void * data;
  34. /** Indicates if the data pointer was allocated by us, and so should be freed
  35. * when the stream dies.
  36. */
  37. int isAllocated;
  38. /** String factory for this input stream
  39. */
  40. pANTLR3_STRING_FACTORY strFactory;
  41. /** Pointer to the next character to be consumed from the input data
  42. * This is cast to point at the encoding of the original file that
  43. * was read by the functions installed as pointer in this input stream
  44. * context instance at file/string/whatever load time.
  45. */
  46. void * nextChar;
  47. /** Number of characters that can be consumed at this point in time.
  48. * Mostly this is just what is left in the pre-read buffer, but if the
  49. * input source is a stream such as a socket or something then we may
  50. * call special read code to wait for more input.
  51. */
  52. ANTLR3_UINT64 sizeBuf;
  53. /** The line number we are traversing in the input file. This gets incremented
  54. * by a newline() call in the lexer grammar actions.
  55. */
  56. ANTLR3_UINT64 line;
  57. /** Pointer into the input buffer where the current line
  58. * started.
  59. */
  60. void * currentLine;
  61. /** The offset within the current line of the current character
  62. */
  63. ANTLR3_INT32 charPositionInLine;
  64. /** Tracks how deep mark() calls are nested
  65. */
  66. ANTLR3_UINT64 markDepth;
  67. /** List of mark() points in the input stream
  68. */
  69. pANTLR3_VECTOR markers;
  70. /** File name string, set to pointer to memory if
  71. * you set it manually as it will be free()d
  72. */
  73. pANTLR3_STRING fileName;
  74. /** File number, needs to be set manually to some file index of your devising.
  75. */
  76. ANTLR3_UINT32 fileNo;
  77. /** Character that automatically causes an internal line count
  78. * increment.
  79. */
  80. ANTLR3_UCHAR newlineChar;
  81. /* API */
  82. /** Pointer to function that closes the input stream
  83. */
  84. void (*close) (struct ANTLR3_INPUT_STREAM_struct * input);
  85. /** Pointer to function that resets the input stream
  86. */
  87. void (*reset) (struct ANTLR3_INPUT_STREAM_struct * input);
  88. /**
  89. * Pinter to function that installs a version of LA that always
  90. * returns upper case. Only valid for character streams and creates a case
  91. * insensitive lexer if the lexer tokens are described in upper case. The
  92. * tokens will preserve case in the token text.
  93. */
  94. void (*setUcaseLA) (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag);
  95. /** Pointer to function to return input stream element at 1 based
  96. * offset from nextChar. Same as _LA for char stream, but token
  97. * streams etc. have one of these that does other stuff of course.
  98. */
  99. void * (*_LT) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_INT64 lt);
  100. /** Pointer to function to return the total size of the input buffer. For streams
  101. * this may be just the total we have available so far. This means of course that
  102. * the input stream must be careful to accumulate enough input so that any backtracking
  103. * can be satisfied.
  104. */
  105. ANTLR3_UINT64 (*size) (struct ANTLR3_INPUT_STREAM_struct * input);
  106. /** Pointer to function to return a substring of the input stream. String is returned in allocated
  107. * memory and is in same encoding as the input stream itself, NOT internal ANTLR3_UCHAR form.
  108. */
  109. pANTLR3_STRING (*substr) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_INT64 start, ANTLR3_INT64 stop);
  110. /** Pointer to function to return the current line number in the innput stream
  111. */
  112. ANTLR3_UINT64 (*getLine) (struct ANTLR3_INPUT_STREAM_struct * input);
  113. /** Pointer to function to return the current line buffer in the input stream
  114. * The pointer returned is directly into the input stream so you must copy
  115. * it if you wish to manipulate it without damaging the input stream. Encoding
  116. * is obviously in the same form as the input stream.
  117. * \remark
  118. * - Note taht this function wil lbe inaccurate if setLine is called as there
  119. * is no way at the moment to position the input stream at a particular line
  120. * number offset.
  121. */
  122. void * (*getLineBuf) (struct ANTLR3_INPUT_STREAM_struct * input);
  123. /** Pointer to function to return the current offset in the current input stream line
  124. */
  125. ANTLR3_UINT32 (*getCharPositionInLine) (struct ANTLR3_INPUT_STREAM_struct * input);
  126. /** Pointer to function to set the current line number in the input stream
  127. */
  128. void (*setLine) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 line);
  129. /** Pointer to function to set the current position in the current line.
  130. */
  131. void (*setCharPositionInLine) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 position);
  132. /** Pointer to function to override the default newline character that the input stream
  133. * looks for to trigger the line and offset and line buffer recording information.
  134. * \remark
  135. * - By default the chracter '\n' will be instaleldas tehe newline trigger character. When this
  136. * character is seen by the consume() function then the current line number is incremented and the
  137. * current line offset is reset to 0. The Pointer for the line of input we are consuming
  138. * is updated to point to the next character after this one in the input stream (which means it
  139. * may become invlaid if the last newline character in the file is seen (so watch out).
  140. * - If for some reason you do not want teh counters and pointesr to be restee, yu can set the
  141. * chracter to some impossible charater such as '\0' or whatever.
  142. * - This is a single character only, so choose the last chracter in a sequence of two or more.
  143. * - This is only a simple aid to error reporting - if you have a complicated binary inptu structure
  144. * it may not be adequate, but you can always override every function in the input stream with your
  145. * own of course, and can even write your own complete input stream set if you like.
  146. * - It is your responsiblity to set a valid cahracter for the input stream type. Ther is no point
  147. * setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII as this will just be truncated and never
  148. * trigger as the comparison will be (INT32)0xFF == (INT32)0xFFFFFFFF
  149. */
  150. void (*SetNewLineChar) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 newlineChar);
  151. }
  152. ANTLR3_INPUT_STREAM;
  153. /** \brief Structure for track lex input states as part of mark()
  154. * and rewind() of lexer.
  155. */
  156. typedef struct ANTLR3_LEX_STATE_struct
  157. {
  158. /** Pointer to the next character to be consumed from the input data
  159. * This is cast to point at the encoding of the original file that
  160. * was read by the functions installed as pointer in this input stream
  161. * context instance at file/string/whatever load time.
  162. */
  163. void * nextChar;
  164. /** The line number we are traversing in the input file. This gets incremented
  165. * by a newline() call in the lexer grammer actions.
  166. */
  167. ANTLR3_UINT64 line;
  168. /** Pointer into the input buffer where the current line
  169. * started.
  170. */
  171. void * currentLine;
  172. /** The offset within the current line of the current character
  173. */
  174. ANTLR3_INT32 charPositionInLine;
  175. }
  176. ANTLR3_LEX_STATE;
  177. /* Prototypes
  178. */
  179. void antlr3AsciiSetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 type);
  180. void antlr3UCS2SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 type);
  181. void antlr3GenericSetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 type);
  182. #endif /* _ANTLR3_INPUT_H */