PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/src/e_editor/regex.c

https://bitbucket.org/nrnhines/nrngpu
C | 402 lines | 320 code | 46 blank | 36 comment | 150 complexity | a1ca91b36943cf5778d4f003c199d42c MD5 | raw file
Possible License(s): GPL-2.0
  1. /* regex.c: regular expression interface routines for the ed line editor. */
  2. /* GNU ed - The GNU line editor.
  3. Copyright (C) 1993, 1994, 2006, 2007, 2008, 2009, 2010
  4. Free Software Foundation, Inc.
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. */
  16. #include <stddef.h>
  17. #include <errno.h>
  18. #include <sys/types.h>
  19. #include <regex.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include "ed.h"
  24. static regex_t * global_pat = 0;
  25. static bool patlock = false; /* if set, pattern not freed by get_compiled_pattern */
  26. static char * stbuf = 0; /* substitution template buffer */
  27. static int stbufsz = 0; /* substitution template buffer size */
  28. static int stlen = 0; /* substitution template length */
  29. static char * rbuf = 0; /* replace_matching_text buffer */
  30. static int rbufsz = 0; /* replace_matching_text buffer size */
  31. bool prev_pattern( void ) { return global_pat != 0; }
  32. /* translate characters in a string */
  33. static void translit_text( char * p, int len, const char from, const char to )
  34. {
  35. while( --len > 0 )
  36. {
  37. if( *p == from ) *p = to;
  38. ++p;
  39. }
  40. }
  41. /* overwrite newlines with ASCII NULs */
  42. static void newline_to_nul( char * const s, const int len )
  43. { translit_text( s, len, '\n', '\0' ); }
  44. /* overwrite ASCII NULs with newlines */
  45. static void nul_to_newline( char * const s, const int len )
  46. { translit_text( s, len, '\0', '\n' ); }
  47. /* expand a POSIX character class */
  48. static const char * parse_char_class( const char * p )
  49. {
  50. char c, d;
  51. if( *p == '^' ) ++p;
  52. if( *p == ']' ) ++p;
  53. for( ; *p != ']' && *p != '\n'; ++p )
  54. if( *p == '[' && ( ( d = p[1] ) == '.' || d == ':' || d == '=' ) )
  55. for( ++p, c = *++p; *p != ']' || c != d; ++p )
  56. if( ( c = *p ) == '\n' )
  57. return 0;
  58. return ( ( *p == ']' ) ? p : 0 );
  59. }
  60. /* copy a pattern string from the command buffer; return pointer to the copy */
  61. static char * extract_pattern( const char ** const ibufpp, const char delimiter )
  62. {
  63. static char * buf = 0;
  64. static int bufsz = 0;
  65. const char * nd = *ibufpp;
  66. int len;
  67. while( *nd != delimiter && *nd != '\n' )
  68. {
  69. if( *nd == '[' )
  70. {
  71. nd = parse_char_class( ++nd );
  72. if( !nd ) { set_error_msg( "Unbalanced brackets ([])" ); return 0; }
  73. }
  74. else if( *nd == '\\' && *++nd == '\n' )
  75. { set_error_msg( "Trailing backslash (\\)" ); return 0; }
  76. ++nd;
  77. }
  78. len = nd - *ibufpp;
  79. if( !resize_buffer( &buf, &bufsz, len + 1 ) ) return 0;
  80. memcpy( buf, *ibufpp, len );
  81. buf[len] = 0;
  82. *ibufpp = nd;
  83. if( isbinary() ) nul_to_newline( buf, len );
  84. return buf;
  85. }
  86. /* return pointer to compiled pattern from command buffer */
  87. static regex_t * get_compiled_pattern( const char ** const ibufpp )
  88. {
  89. static regex_t * exp = 0;
  90. const char * exps;
  91. const char delimiter = **ibufpp;
  92. int n;
  93. if( delimiter == ' ' )
  94. { set_error_msg( "Invalid pattern delimiter" ); return 0; }
  95. if( delimiter == '\n' || *++*ibufpp == '\n' || **ibufpp == delimiter )
  96. {
  97. if( !exp ) set_error_msg( "No previous pattern" );
  98. return exp;
  99. }
  100. exps = extract_pattern( ibufpp, delimiter );
  101. if( !exps ) return 0;
  102. /* buffer alloc'd && not reserved */
  103. if( exp && !patlock ) regfree( exp );
  104. else
  105. {
  106. exp = (regex_t *) malloc( sizeof (regex_t) );
  107. if( !exp )
  108. {
  109. show_strerror( 0, errno );
  110. set_error_msg( "Memory exhausted" );
  111. return 0;
  112. }
  113. }
  114. patlock = false;
  115. n = regcomp( exp, exps, 0 );
  116. if( n )
  117. {
  118. char buf[80];
  119. regerror( n, exp, buf, sizeof buf );
  120. set_error_msg( buf );
  121. free( exp );
  122. exp = 0;
  123. }
  124. return exp;
  125. }
  126. /* add line matching a pattern to the global-active list */
  127. bool build_active_list( const char ** const ibufpp, const int first_addr,
  128. const int second_addr, const bool match )
  129. {
  130. const regex_t * pat;
  131. const line_t * lp;
  132. int addr;
  133. const char delimiter = **ibufpp;
  134. if( delimiter == ' ' || delimiter == '\n' )
  135. { set_error_msg( "Invalid pattern delimiter" ); return false; }
  136. pat = get_compiled_pattern( ibufpp );
  137. if( !pat ) return false;
  138. if( **ibufpp == delimiter ) ++*ibufpp;
  139. clear_active_list();
  140. lp = search_line_node( first_addr );
  141. for( addr = first_addr; addr <= second_addr; ++addr, lp = lp->q_forw )
  142. {
  143. char * const s = get_sbuf_line( lp );
  144. if( !s ) return false;
  145. if( isbinary() ) nul_to_newline( s, lp->len );
  146. if( !regexec( pat, s, 0, 0, 0 ) == match && !set_active_node( lp ) )
  147. return false;
  148. }
  149. return true;
  150. }
  151. /* return pointer to copy of substitution template in the command buffer */
  152. static char * extract_subst_template( const char ** const ibufpp,
  153. const bool isglobal )
  154. {
  155. int i = 0, n = 0;
  156. char c;
  157. const char delimiter = **ibufpp;
  158. ++*ibufpp;
  159. if( **ibufpp == '%' && (*ibufpp)[1] == delimiter )
  160. {
  161. ++*ibufpp;
  162. if( !stbuf ) set_error_msg( "No previous substitution" );
  163. return stbuf;
  164. }
  165. while( **ibufpp != delimiter )
  166. {
  167. if( !resize_buffer( &stbuf, &stbufsz, i + 2 ) ) return 0;
  168. c = stbuf[i++] = *(*ibufpp)++;
  169. if( c == '\n' && **ibufpp == 0 ) { --i, --*ibufpp; break; }
  170. if( c == '\\' && ( stbuf[i++] = *(*ibufpp)++ ) == '\n' && !isglobal )
  171. {
  172. while( ( *ibufpp = get_tty_line( &n ) ) &&
  173. ( n == 0 || ( n > 0 && (*ibufpp)[n-1] != '\n' ) ) )
  174. clearerr( stdin );
  175. if( !*ibufpp ) return 0;
  176. }
  177. }
  178. if( !resize_buffer( &stbuf, &stbufsz, i + 1 ) ) return 0;
  179. stbuf[stlen = i] = 0;
  180. return stbuf;
  181. }
  182. /* extract substitution tail from the command buffer */
  183. bool extract_subst_tail( const char ** const ibufpp, int * const gflagsp,
  184. int * const snump, const bool isglobal )
  185. {
  186. const char delimiter = **ibufpp;
  187. *gflagsp = *snump = 0;
  188. if( delimiter == '\n' ) { stlen = 0; *gflagsp = GPR; return true; }
  189. if( !extract_subst_template( ibufpp, isglobal ) ) return false;
  190. if( **ibufpp == '\n' ) { *gflagsp = GPR; return true; }
  191. if( **ibufpp == delimiter ) ++*ibufpp;
  192. if( **ibufpp >= '1' && **ibufpp <= '9' )
  193. return parse_int( snump, *ibufpp, ibufpp );
  194. if( **ibufpp == 'g' ) { ++*ibufpp; *gflagsp = GSG; }
  195. return true;
  196. }
  197. /* return the address of the next line matching a pattern in a given
  198. direction. wrap around begin/end of editor buffer if necessary */
  199. int next_matching_node_addr( const char ** const ibufpp, const bool forward )
  200. {
  201. const regex_t * const pat = get_compiled_pattern( ibufpp );
  202. int addr = current_addr();
  203. if( !pat ) return -1;
  204. do {
  205. addr = ( forward ? inc_addr( addr ) : dec_addr( addr ) );
  206. if( addr )
  207. {
  208. const line_t * const lp = search_line_node( addr );
  209. char * const s = get_sbuf_line( lp );
  210. if( !s ) return -1;
  211. if( isbinary() ) nul_to_newline( s, lp->len );
  212. if( !regexec( pat, s, 0, 0, 0 ) ) return addr;
  213. }
  214. }
  215. while( addr != current_addr() );
  216. set_error_msg( "No match" );
  217. return -1;
  218. }
  219. bool new_compiled_pattern( const char ** const ibufpp )
  220. {
  221. regex_t * tpat;
  222. disable_interrupts();
  223. tpat = get_compiled_pattern( ibufpp );
  224. if( tpat && tpat != global_pat )
  225. {
  226. if( global_pat ) { regfree( global_pat ); free( global_pat ); }
  227. global_pat = tpat;
  228. patlock = true; /* reserve pattern */
  229. }
  230. enable_interrupts();
  231. return ( tpat ? true : false );
  232. }
  233. /* modify text according to a substitution template; return offset to
  234. end of modified text */
  235. static int apply_subst_template( const char * const boln,
  236. const regmatch_t * const rm, int offset,
  237. const int re_nsub )
  238. {
  239. const char * sub = stbuf;
  240. for( ; sub - stbuf < stlen; ++sub )
  241. {
  242. int n;
  243. if( *sub == '&' )
  244. {
  245. int j = rm[0].rm_so; int k = rm[0].rm_eo;
  246. if( !resize_buffer( &rbuf, &rbufsz, offset + k - j ) ) return -1;
  247. while( j < k ) rbuf[offset++] = boln[j++];
  248. }
  249. else if( *sub == '\\' && *++sub >= '1' && *sub <= '9' &&
  250. ( n = *sub - '0' ) <= re_nsub )
  251. {
  252. int j = rm[n].rm_so; int k = rm[n].rm_eo;
  253. if( !resize_buffer( &rbuf, &rbufsz, offset + k - j ) ) return -1;
  254. while( j < k ) rbuf[offset++] = boln[j++];
  255. }
  256. else
  257. {
  258. if( !resize_buffer( &rbuf, &rbufsz, offset + 1 ) ) return -1;
  259. rbuf[offset++] = *sub;
  260. }
  261. }
  262. if( !resize_buffer( &rbuf, &rbufsz, offset + 1 ) ) return -1;
  263. rbuf[offset] = 0;
  264. return offset;
  265. }
  266. /* replace text matched by a pattern according to a substitution
  267. template; return length of the modified text */
  268. static int replace_matching_text( const line_t * const lp, const int gflags,
  269. const int snum )
  270. {
  271. enum { se_max = 30 }; /* max subexpressions in a regular expression */
  272. regmatch_t rm[se_max];
  273. char * txt = get_sbuf_line( lp );
  274. const char * eot;
  275. int i = 0, offset = 0;
  276. bool changed = false;
  277. if( !txt ) return -1;
  278. if( isbinary() ) nul_to_newline( txt, lp->len );
  279. eot = txt + lp->len;
  280. if( !regexec( global_pat, txt, se_max, rm, 0 ) )
  281. {
  282. int matchno = 0;
  283. do {
  284. if( !snum || snum == ++matchno )
  285. {
  286. changed = true; i = rm[0].rm_so;
  287. if( !resize_buffer( &rbuf, &rbufsz, offset + i ) ) return -1;
  288. if( isbinary() ) newline_to_nul( txt, rm[0].rm_eo );
  289. memcpy( rbuf + offset, txt, i ); offset += i;
  290. offset = apply_subst_template( txt, rm, offset, global_pat->re_nsub );
  291. if( offset < 0 ) return -1;
  292. }
  293. else
  294. {
  295. i = rm[0].rm_eo;
  296. if( !resize_buffer( &rbuf, &rbufsz, offset + i ) ) return -1;
  297. if( isbinary() ) newline_to_nul( txt, i );
  298. memcpy( rbuf + offset, txt, i ); offset += i;
  299. }
  300. txt += rm[0].rm_eo;
  301. }
  302. while( *txt && ( !changed || ( ( gflags & GSG ) && rm[0].rm_eo ) ) &&
  303. !regexec( global_pat, txt, se_max, rm, REG_NOTBOL ) );
  304. i = eot - txt;
  305. if( !resize_buffer( &rbuf, &rbufsz, offset + i + 2 ) ) return -1;
  306. if( i > 0 && !rm[0].rm_eo && ( gflags & GSG ) )
  307. { set_error_msg( "Infinite substitution loop" ); return -1; }
  308. if( isbinary() ) newline_to_nul( txt, i );
  309. memcpy( rbuf + offset, txt, i );
  310. memcpy( rbuf + offset + i, "\n", 2 );
  311. }
  312. return ( changed ? offset + i + 1 : 0 );
  313. }
  314. /* for each line in a range, change text matching a pattern according to
  315. a substitution template; return false if error */
  316. bool search_and_replace( const int first_addr, const int second_addr,
  317. const int gflags, const int snum, const bool isglobal )
  318. {
  319. int lc;
  320. bool match_found = false;
  321. set_current_addr( first_addr - 1 );
  322. for( lc = 0; lc <= second_addr - first_addr; ++lc )
  323. {
  324. const line_t * const lp = search_line_node( inc_current_addr() );
  325. int len = replace_matching_text( lp, gflags, snum );
  326. if( len < 0 ) return false;
  327. if( len )
  328. {
  329. const char * txt = rbuf;
  330. const char * const eot = rbuf + len;
  331. undo_t * up = 0;
  332. disable_interrupts();
  333. if( !delete_lines( current_addr(), current_addr(), isglobal ) )
  334. { enable_interrupts(); return false; }
  335. do {
  336. txt = put_sbuf_line( txt, current_addr() );
  337. if( !txt ) { enable_interrupts(); return false; }
  338. if( up ) up->tail = search_line_node( current_addr() );
  339. else
  340. {
  341. up = push_undo_atom( UADD, current_addr(), current_addr() );
  342. if( !up ) { enable_interrupts(); return false; }
  343. }
  344. }
  345. while( txt != eot );
  346. enable_interrupts();
  347. match_found = true;
  348. }
  349. }
  350. if( !match_found && !( gflags & GLB ) )
  351. { set_error_msg( "No match" ); return false; }
  352. return true;
  353. }