PageRenderTime 70ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/build48/harbour/source/rtl/hbregex.c

#
C | 563 lines | 430 code | 47 blank | 86 comment | 78 complexity | 77fe17dfda4e6aeb81d4210f9cf9cd41 MD5 | raw file
Possible License(s): AGPL-1.0, BSD-3-Clause, CC-BY-SA-3.0, LGPL-3.0, GPL-2.0, LGPL-2.0, LGPL-2.1
  1. /*
  2. * $Id: hbregex.c 7530 2007-06-06 13:40:09Z druzus $
  3. */
  4. /*
  5. * Harbour Project source code:
  6. *
  7. *
  8. * Copyright 2007 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
  9. * www - http://www.harbour-project.org
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2, or (at your option)
  14. * any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this software; see the file COPYING. If not, write to
  23. * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  24. * Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
  25. *
  26. * As a special exception, the Harbour Project gives permission for
  27. * additional uses of the text contained in its release of Harbour.
  28. *
  29. * The exception is that, if you link the Harbour libraries with other
  30. * files to produce an executable, this does not by itself cause the
  31. * resulting executable to be covered by the GNU General Public License.
  32. * Your use of that executable is in no way restricted on account of
  33. * linking the Harbour library code into it.
  34. *
  35. * This exception does not however invalidate any other reasons why
  36. * the executable file might be covered by the GNU General Public License.
  37. *
  38. * This exception applies only to the code released by the Harbour
  39. * Project under the name Harbour. If you copy code from other
  40. * Harbour Project or Free Software Foundation releases into a copy of
  41. * Harbour, as the General Public License permits, the exception does
  42. * not apply to the code that you add in this way. To avoid misleading
  43. * anyone as to the status of such modified files, you must delete
  44. * this exception notice from them.
  45. *
  46. * If you write modifications of your own for Harbour, it is your choice
  47. * whether to permit this exception to apply to your modifications.
  48. * If you do not wish that, delete this exception notice.
  49. *
  50. */
  51. /* #define HB_PCRE_REGEX */
  52. #define _HB_REGEX_INTERNAL_
  53. #include "hbregex.h"
  54. #include "hbapiitm.h"
  55. #include "hbapierr.h"
  56. #include "hbinit.h"
  57. static void hb_regfree( PHB_REGEX pRegEx )
  58. {
  59. #if defined( HB_PCRE_REGEX )
  60. ( pcre_free )( pRegEx->re_pcre );
  61. #elif defined( HB_POSIX_REGEX )
  62. regfree( &pRegEx->reg );
  63. #else
  64. HB_SYMBOL_UNUSED( pRegEx );
  65. #endif
  66. }
  67. static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx )
  68. {
  69. #if defined( HB_PCRE_REGEX )
  70. const unsigned char * pCharTable = NULL;
  71. const char *szError = NULL;
  72. int iErrOffset = 0;
  73. int iCFlags = ( ( pRegEx->iFlags & HBREG_ICASE ) ? PCRE_CASELESS : 0 ) |
  74. ( ( pRegEx->iFlags & HBREG_NEWLINE ) ? PCRE_MULTILINE : 0 ) |
  75. ( ( pRegEx->iFlags & HBREG_DOTALL ) ? PCRE_DOTALL : 0 );
  76. pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) |
  77. ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 );
  78. pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError,
  79. &iErrOffset, pCharTable );
  80. return pRegEx->re_pcre ? 0 : -1;
  81. #elif defined( HB_POSIX_REGEX )
  82. int iCFlags = REG_EXTENDED |
  83. ( ( pRegEx->iFlags & HBREG_ICASE ) ? REG_ICASE : 0 ) |
  84. ( ( pRegEx->iFlags & HBREG_NEWLINE ) ? REG_NEWLINE : 0 ) |
  85. ( ( pRegEx->iFlags & HBREG_NOSUB ) ? REG_NOSUB : 0 );
  86. pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? REG_NOTBOL : 0 ) |
  87. ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? REG_NOTEOL : 0 );
  88. return regcomp( &pRegEx->reg, szRegEx, iCFlags );
  89. #else
  90. HB_SYMBOL_UNUSED( pRegEx );
  91. HB_SYMBOL_UNUSED( szRegEx );
  92. return -1;
  93. #endif
  94. }
  95. static int hb_regexec( PHB_REGEX pRegEx, const char * szString, ULONG ulLen,
  96. int iMatches, HB_REGMATCH * aMatches )
  97. {
  98. #if defined( HB_PCRE_REGEX )
  99. int iResult, i;
  100. iResult = pcre_exec( pRegEx->re_pcre, NULL /* pcre_extra */,
  101. szString, ulLen, 0 /* startoffset */,
  102. pRegEx->iEFlags, aMatches, HB_REGMATCH_SIZE( iMatches ) );
  103. if( iResult == 0 )
  104. {
  105. for( i = 0; i < iMatches; i++ )
  106. {
  107. if( HB_REGMATCH_EO( aMatches, i ) != -1 )
  108. iResult = i + 1;
  109. }
  110. }
  111. return iResult;
  112. #elif defined( HB_POSIX_REGEX )
  113. char * szBuffer = NULL;
  114. int iResult, i;
  115. if( szString[ ulLen ] != 0 )
  116. {
  117. szBuffer = hb_strndup( szString, ulLen );
  118. szString = szBuffer;
  119. }
  120. for( i = 0; i < iMatches; i++ )
  121. HB_REGMATCH_EO( aMatches, i ) = -1;
  122. iResult = regexec( &pRegEx->reg, szString, iMatches, aMatches, pRegEx->iEFlags );
  123. if( iResult == 0 )
  124. {
  125. for( i = 0; i < iMatches; i++ )
  126. {
  127. if( HB_REGMATCH_EO( aMatches, i ) != -1 )
  128. iResult = i + 1;
  129. }
  130. }
  131. else
  132. iResult = -1;
  133. if( szBuffer )
  134. hb_xfree( szBuffer );
  135. return iResult;
  136. #else
  137. HB_SYMBOL_UNUSED( pRegEx );
  138. HB_SYMBOL_UNUSED( szString );
  139. HB_SYMBOL_UNUSED( ulLen );
  140. HB_SYMBOL_UNUSED( iMatches );
  141. HB_SYMBOL_UNUSED( aMatches );
  142. return -1;
  143. #endif
  144. }
  145. HB_FUNC( HB_REGEXCOMP )
  146. {
  147. ULONG ulLen = hb_parclen( 1 );
  148. if( ulLen == 0 )
  149. hb_errRT_BASE_SubstR( EG_ARG, 3012, "Wrong parameter count/type",
  150. &hb_errFuncName, HB_ERR_ARGS_BASEPARAMS );
  151. else
  152. {
  153. int iFlags = HBREG_EXTENDED;
  154. PHB_REGEX pRegEx;
  155. if( ISLOG( 2 ) && !hb_parl( 2 ) )
  156. iFlags |= HBREG_ICASE;
  157. if( hb_parl( 3 ) )
  158. iFlags |= HBREG_NEWLINE;
  159. pRegEx = hb_regexCompile( hb_parc( 1 ), ulLen, iFlags );
  160. if( pRegEx )
  161. {
  162. pRegEx->fFree = FALSE;
  163. hb_retptrGC( pRegEx );
  164. hb_gcUnlock( pRegEx );
  165. }
  166. }
  167. }
  168. HB_FUNC( HB_ISREGEX )
  169. {
  170. hb_retl( hb_parptrGC( hb_regexRelease, 1 ) != NULL );
  171. }
  172. HB_FUNC( HB_ATX )
  173. {
  174. char * pszString;
  175. ULONG ulLen, ulStart, ulEnd;
  176. PHB_REGEX pRegEx;
  177. PHB_ITEM pString;
  178. int iPCount = hb_pcount();
  179. pString = hb_param( 2, HB_IT_STRING );
  180. if( !pString )
  181. {
  182. hb_errRT_BASE_SubstR( EG_ARG, 3012, "Wrong parameters",
  183. &hb_errFuncName, HB_ERR_ARGS_BASEPARAMS );
  184. return;
  185. }
  186. pszString = hb_itemGetCPtr( pString );
  187. ulLen = hb_itemGetCLen( pString );
  188. pRegEx = hb_regexGet( hb_param( 1, HB_IT_ANY ),
  189. ISLOG( 3 ) && !hb_parl( 3 ) ? HBREG_ICASE : 0 );
  190. if( !pRegEx )
  191. return;
  192. ulStart = hb_parnl( 4 );
  193. ulEnd = hb_parnl( 5 );
  194. if( ulLen && ulStart <= ulLen && ulStart <= ulEnd )
  195. {
  196. HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( 1 ) ];
  197. if( ulEnd < ulLen )
  198. ulLen = ulEnd;
  199. if( ulStart )
  200. {
  201. --ulStart;
  202. ulLen -= ulStart;
  203. }
  204. if( hb_regexec( pRegEx, pszString + ulStart, ulLen, 1, aMatches ) > 0 )
  205. {
  206. ulStart += HB_REGMATCH_SO( aMatches, 0 ) + 1;
  207. ulLen = HB_REGMATCH_EO( aMatches, 0 ) - HB_REGMATCH_SO( aMatches, 0 );
  208. hb_retclen( pszString + ulStart - 1, ulLen );
  209. }
  210. else
  211. ulStart = ulLen = 0;
  212. }
  213. else
  214. ulStart = ulLen = 0;
  215. hb_regexFree( pRegEx );
  216. if( iPCount > 3 )
  217. {
  218. hb_stornl( ulStart, 4 );
  219. if( iPCount > 4 )
  220. hb_stornl( ulLen, 5 );
  221. }
  222. }
  223. static BOOL hb_regex( int iRequest )
  224. {
  225. HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( REGEX_MAX_GROUPS ) ];
  226. PHB_ITEM pRetArray, pMatch, pString;
  227. int i, iMatches, iMaxMatch;
  228. BOOL fResult = FALSE;
  229. PHB_REGEX pRegEx;
  230. char * pszString;
  231. ULONG ulLen;
  232. pString = hb_param( 2, HB_IT_STRING );
  233. if( !pString )
  234. {
  235. hb_errRT_BASE_SubstR( EG_ARG, 3012, "Wrong parameters",
  236. &hb_errFuncName, HB_ERR_ARGS_BASEPARAMS );
  237. return FALSE;
  238. }
  239. pRegEx = hb_regexGet( hb_param( 1, HB_IT_ANY ),
  240. ( ISLOG( 3 ) && !hb_parl( 3 ) ? HBREG_ICASE : 0 ) |
  241. ( hb_parl( 4 ) ? HBREG_NEWLINE : 0 ) );
  242. if( !pRegEx )
  243. return FALSE;
  244. pszString = hb_itemGetCPtr( pString );
  245. ulLen = hb_itemGetCLen( pString );
  246. iMaxMatch = iRequest == 0 || iRequest == 4 || iRequest == 5 ?
  247. REGEX_MAX_GROUPS : 1;
  248. iMatches = hb_regexec( pRegEx, pszString, ulLen, iMaxMatch, aMatches );
  249. if( iMatches > 0 )
  250. {
  251. switch ( iRequest )
  252. {
  253. case 0:
  254. pRetArray = hb_itemArrayNew( iMatches );
  255. for( i = 0; i < iMatches; i++ )
  256. {
  257. if( HB_REGMATCH_EO( aMatches, i ) > -1 )
  258. hb_itemPutCL( hb_arrayGetItemPtr( pRetArray, i + 1 ),
  259. pszString + HB_REGMATCH_SO( aMatches, i ),
  260. HB_REGMATCH_EO( aMatches, i ) -
  261. HB_REGMATCH_SO( aMatches, i ) );
  262. else
  263. hb_itemPutCL( hb_arrayGetItemPtr( pRetArray, i + 1 ), "", 0 );
  264. }
  265. hb_itemRelease( hb_itemReturnForward( pRetArray ) );
  266. fResult = TRUE;
  267. break;
  268. case 1: /* LIKE */
  269. fResult = HB_REGMATCH_SO( aMatches, 0 ) == 0 &&
  270. ( ULONG ) HB_REGMATCH_EO( aMatches, 0 ) == ulLen;
  271. break;
  272. case 2: /* MATCH ( HAS ) */
  273. fResult = TRUE;
  274. break;
  275. case 3: /* SPLIT */
  276. iMaxMatch = hb_parni( 5 );
  277. pRetArray = hb_itemArrayNew( 0 );
  278. pMatch = hb_itemNew( NULL );
  279. iMatches = 0;
  280. do
  281. {
  282. hb_itemPutCL( pMatch, pszString, HB_REGMATCH_SO( aMatches, 0 ) );
  283. hb_arrayAddForward( pRetArray, pMatch );
  284. ulLen -= HB_REGMATCH_EO( aMatches, 0 );
  285. pszString += HB_REGMATCH_EO( aMatches, 0 );
  286. iMatches++;
  287. }
  288. while( HB_REGMATCH_EO( aMatches, 0 ) > 0 && ulLen &&
  289. ( iMaxMatch == 0 || iMatches < iMaxMatch ) &&
  290. hb_regexec( pRegEx, pszString, ulLen, 1, aMatches ) > 0 );
  291. /* last match must be done also in case that pszString is empty;
  292. this would mean an empty split field at the end of the string */
  293. /* if( ulLen ) */
  294. {
  295. hb_itemPutCL( pMatch, pszString, ulLen );
  296. hb_arrayAddForward( pRetArray, pMatch );
  297. }
  298. hb_itemRelease( pMatch );
  299. hb_itemRelease( hb_itemReturnForward( pRetArray ) );
  300. fResult = TRUE;
  301. break;
  302. case 4: /* results AND positions */
  303. pRetArray = hb_itemArrayNew( iMatches );
  304. for( i = 0; i < iMatches; i++ )
  305. {
  306. int iSO = HB_REGMATCH_SO( aMatches, i ),
  307. iEO = HB_REGMATCH_EO( aMatches, i );
  308. pMatch = hb_arrayGetItemPtr( pRetArray, i + 1 );
  309. hb_arrayNew( pMatch, 3 );
  310. if( iEO != -1 )
  311. {
  312. /* matched string */
  313. hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + iSO, iEO - iSO );
  314. /* begin of match */
  315. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), iSO + 1 );
  316. /* End of match */
  317. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), iEO );
  318. }
  319. else
  320. {
  321. hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), "", 0 );
  322. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), 0 );
  323. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), 0 );
  324. }
  325. }
  326. hb_itemRelease( hb_itemReturnForward( pRetArray ) );
  327. fResult = TRUE;
  328. break;
  329. case 5: /* _ALL_ results AND positions */
  330. {
  331. PHB_ITEM pAtxArray;
  332. int iMax = hb_parni( 5 ); /* max nuber of matches I want, 0 = unlimited */
  333. int iGetMatch = hb_parni( 6 ); /* Gets if want only one single match or a sub-match */
  334. BOOL fOnlyMatch = !ISLOG( 7 ) || hb_parl( 7 ); /* if TRUE returns only matches and sub-matches, not positions */
  335. ULONG ulOffSet = 0;
  336. int iCount = 0;
  337. int iSO, iEO;
  338. /* Set new array */
  339. pRetArray = hb_itemArrayNew( 0 );
  340. do
  341. {
  342. /* If I want all matches */
  343. if( iGetMatch == 0 || // Check boundaries
  344. ( iGetMatch < 0 || iGetMatch > iMatches ) )
  345. {
  346. pAtxArray = hb_itemArrayNew( iMatches );
  347. for( i = 0; i < iMatches; i++ )
  348. {
  349. iSO = HB_REGMATCH_SO( aMatches, i );
  350. iEO = HB_REGMATCH_EO( aMatches, i );
  351. pMatch = hb_arrayGetItemPtr( pAtxArray, i + 1 );
  352. if( !fOnlyMatch )
  353. {
  354. hb_arrayNew( pMatch, 3 );
  355. if ( iEO != -1 )
  356. {
  357. /* matched string */
  358. hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + iSO, iEO - iSO );
  359. /* begin of match */
  360. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), ulOffSet + iSO + 1 );
  361. /* End of match */
  362. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), ulOffSet + iEO );
  363. }
  364. else
  365. {
  366. hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), "", 0 );
  367. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), 0 );
  368. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), 0 );
  369. }
  370. }
  371. else
  372. {
  373. if( iEO != -1 )
  374. /* matched string */
  375. hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
  376. else
  377. hb_itemPutCL( pMatch, "", 0 );
  378. }
  379. }
  380. hb_arrayAddForward( pRetArray, pAtxArray );
  381. hb_itemRelease( pAtxArray );
  382. }
  383. else /* Here I get only single matches */
  384. {
  385. i = iGetMatch - 1;
  386. iSO = HB_REGMATCH_SO( aMatches, i );
  387. iEO = HB_REGMATCH_EO( aMatches, i );
  388. pMatch = hb_itemNew( NULL );
  389. if( !fOnlyMatch )
  390. {
  391. hb_arrayNew( pMatch, 3 );
  392. if( iEO != -1 )
  393. {
  394. /* matched string */
  395. hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + iSO, iEO - iSO );
  396. /* begin of match */
  397. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), ulOffSet + iSO + 1 );
  398. /* End of match */
  399. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), ulOffSet + iEO );
  400. }
  401. else
  402. {
  403. hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), "", 0 );
  404. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), 0 );
  405. hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), 0 );
  406. }
  407. }
  408. else
  409. {
  410. if( iEO != -1 )
  411. /* matched string */
  412. hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
  413. else
  414. hb_itemPutCL( pMatch, "", 0 );
  415. }
  416. hb_arrayAddForward( pRetArray, pMatch );
  417. hb_itemRelease( pMatch );
  418. }
  419. iEO = HB_REGMATCH_EO( aMatches, 0 );
  420. if( iEO == -1 )
  421. break;
  422. ulLen -= iEO;
  423. pszString += iEO;
  424. ulOffSet += iEO;
  425. iCount++;
  426. }
  427. while( iEO && ulLen && ( iMax == 0 || iCount < iMax ) &&
  428. ( iMatches = hb_regexec( pRegEx, pszString, ulLen, iMaxMatch, aMatches ) ) > 0 );
  429. hb_itemRelease( hb_itemReturnForward( pRetArray ) );
  430. fResult = TRUE;
  431. break;
  432. }
  433. }
  434. }
  435. else if( iRequest == 3 )
  436. {
  437. pRetArray = hb_itemArrayNew( 1 );
  438. hb_arraySet( pRetArray, 1, pString );
  439. hb_itemRelease( hb_itemReturnForward( pRetArray ) );
  440. fResult = TRUE;
  441. }
  442. hb_regexFree( pRegEx );
  443. return fResult;
  444. }
  445. /* Returns array of Match + Sub-Matches. */
  446. HB_FUNC( HB_REGEX )
  447. {
  448. hb_regex( 0 );
  449. }
  450. /* Returns just .T. if match found or .F. otherwise. */
  451. HB_FUNC( HB_REGEXMATCH )
  452. {
  453. hb_retl( hb_regex( hb_parl( 3 ) ? 1 /* LIKE */ : 2 /* HAS */ ) );
  454. }
  455. HB_FUNC( HB_REGEXLIKE )
  456. {
  457. hb_retl( hb_regex( 1 ) );
  458. }
  459. HB_FUNC( HB_REGEXHAS )
  460. {
  461. hb_retl( hb_regex( 2 ) );
  462. }
  463. /* Splits the string in an array of matched expressions */
  464. HB_FUNC( HB_REGEXSPLIT )
  465. {
  466. hb_regex( 3 );
  467. }
  468. /* Returns array of { Match, start, end }, { Sub-Matches, start, end } */
  469. HB_FUNC( HB_REGEXATX )
  470. {
  471. hb_regex( 4 );
  472. }
  473. /* 2005-12-16 - Francesco Saverio Giudice
  474. HB_RegExAll( cRegex, cString, lCaseSensitive, lNewLine, nMaxMatches, nGetMatch, lOnlyMatch ) -> aAllRegexMatches
  475. This function return all matches from a Regex search.
  476. It is a mix from hb_RegEx() and hb_RegExAtX()
  477. PARAMETERS:
  478. cRegex - Regex pattern string or precompiled Regex
  479. cString - The string you want to search
  480. lCaseSensitive - default = FALSE
  481. lNewLine - default = FALSE
  482. nMaxMatches - default = unlimited, this limit number of matches that have to return
  483. nGetMatch - default = unlimited, this returns only one from Match + Sub-Matches
  484. lOnlyMatch - default = TRUE, if TRUE returns Matches, otherwise it returns also start and end positions
  485. */
  486. HB_FUNC( HB_REGEXALL )
  487. {
  488. hb_regex( 5 );
  489. }
  490. #if defined( HB_PCRE_REGEX )
  491. static void * hb_pcre_grab( size_t size )
  492. {
  493. return hb_xgrab( size );
  494. }
  495. #endif
  496. HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ )
  497. #if defined( HB_PCRE_REGEX )
  498. pcre_malloc = hb_pcre_grab;
  499. pcre_free = hb_xfree;
  500. #endif
  501. hb_regexInit( hb_regfree, hb_regcomp, hb_regexec );
  502. HB_CALL_ON_STARTUP_END( _hb_regex_init_ )
  503. #if defined(HB_PRAGMA_STARTUP)
  504. #pragma startup _hb_regex_init_
  505. #elif defined(HB_MSC_STARTUP)
  506. #if _MSC_VER >= 1010
  507. #pragma data_seg( ".CRT$XIY" )
  508. #pragma comment( linker, "/Merge:.CRT=.data" )
  509. #else
  510. #pragma data_seg( "XIY" )
  511. #endif
  512. static HB_$INITSYM hb_vm_auto_regex_init_ = _hb_regex_init_;
  513. #pragma data_seg()
  514. #endif