PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/harbour-3.0.0/src/rtl/hbregex.c

#
C | 574 lines | 435 code | 51 blank | 88 comment | 75 complexity | c7ff5a760b7271c8d72b12e35cb27720 MD5 | raw file
Possible License(s): AGPL-1.0, BSD-3-Clause, CC-BY-SA-3.0, LGPL-3.0, GPL-2.0, LGPL-2.0, LGPL-2.1
  1. /*
  2. * $Id: hbregex.c 15024 2010-07-06 21:32:46Z vszakats $
  3. */
  4. /*
  5. * Harbour Project source code:
  6. *
  7. *
  8. * Copyright 2007 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
  9. * www - http://harbour-project.org
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2, or (at your option)
  14. * any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this software; see the file COPYING. If not, write to
  23. * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  24. * Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
  25. *
  26. * As a special exception, the Harbour Project gives permission for
  27. * additional uses of the text contained in its release of Harbour.
  28. *
  29. * The exception is that, if you link the Harbour libraries with other
  30. * files to produce an executable, this does not by itself cause the
  31. * resulting executable to be covered by the GNU General Public License.
  32. * Your use of that executable is in no way restricted on account of
  33. * linking the Harbour library code into it.
  34. *
  35. * This exception does not however invalidate any other reasons why
  36. * the executable file might be covered by the GNU General Public License.
  37. *
  38. * This exception applies only to the code released by the Harbour
  39. * Project under the name Harbour. If you copy code from other
  40. * Harbour Project or Free Software Foundation releases into a copy of
  41. * Harbour, as the General Public License permits, the exception does
  42. * not apply to the code that you add in this way. To avoid misleading
  43. * anyone as to the status of such modified files, you must delete
  44. * this exception notice from them.
  45. *
  46. * If you write modifications of your own for Harbour, it is your choice
  47. * whether to permit this exception to apply to your modifications.
  48. * If you do not wish that, delete this exception notice.
  49. *
  50. */
  51. #define _HB_REGEX_INTERNAL_
  52. #include "hbregex.h"
  53. #include "hbapiitm.h"
  54. #include "hbapierr.h"
  55. #include "hbinit.h"
  56. static void hb_regfree( PHB_REGEX pRegEx )
  57. {
  58. #if defined( HB_HAS_PCRE )
  59. ( pcre_free )( pRegEx->re_pcre );
  60. #elif defined( HB_POSIX_REGEX )
  61. regfree( &pRegEx->reg );
  62. #else
  63. HB_SYMBOL_UNUSED( pRegEx );
  64. #endif
  65. }
  66. static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx )
  67. {
  68. #if defined( HB_HAS_PCRE )
  69. const unsigned char * pCharTable = NULL;
  70. const char *szError = NULL;
  71. int iErrOffset = 0;
  72. int iCFlags = ( ( pRegEx->iFlags & HBREG_ICASE ) ? PCRE_CASELESS : 0 ) |
  73. ( ( pRegEx->iFlags & HBREG_NEWLINE ) ? PCRE_MULTILINE : 0 ) |
  74. ( ( pRegEx->iFlags & HBREG_DOTALL ) ? PCRE_DOTALL : 0 );
  75. pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) |
  76. ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 );
  77. pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError,
  78. &iErrOffset, pCharTable );
  79. return pRegEx->re_pcre ? 0 : -1;
  80. #elif defined( HB_POSIX_REGEX )
  81. int iCFlags = REG_EXTENDED |
  82. ( ( pRegEx->iFlags & HBREG_ICASE ) ? REG_ICASE : 0 ) |
  83. ( ( pRegEx->iFlags & HBREG_NEWLINE ) ? REG_NEWLINE : 0 ) |
  84. ( ( pRegEx->iFlags & HBREG_NOSUB ) ? REG_NOSUB : 0 );
  85. pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? REG_NOTBOL : 0 ) |
  86. ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? REG_NOTEOL : 0 );
  87. return regcomp( &pRegEx->reg, szRegEx, iCFlags );
  88. #else
  89. HB_SYMBOL_UNUSED( pRegEx );
  90. HB_SYMBOL_UNUSED( szRegEx );
  91. return -1;
  92. #endif
  93. }
  94. static int hb_regexec( PHB_REGEX pRegEx, const char * szString, HB_SIZE nLen,
  95. int iMatches, HB_REGMATCH * aMatches )
  96. {
  97. #if defined( HB_HAS_PCRE )
  98. int iResult, i;
  99. iResult = pcre_exec( pRegEx->re_pcre, NULL /* pcre_extra */,
  100. szString, ( int ) nLen, 0 /* startoffset */,
  101. pRegEx->iEFlags, aMatches, HB_REGMATCH_SIZE( iMatches ) );
  102. if( iResult == 0 )
  103. {
  104. for( i = 0; i < iMatches; i++ )
  105. {
  106. if( HB_REGMATCH_EO( aMatches, i ) != -1 )
  107. iResult = i + 1;
  108. }
  109. }
  110. return iResult;
  111. #elif defined( HB_POSIX_REGEX )
  112. char * szBuffer = NULL;
  113. int iResult, i;
  114. if( szString[ nLen ] != 0 )
  115. {
  116. szBuffer = hb_strndup( szString, nLen );
  117. szString = szBuffer;
  118. }
  119. for( i = 0; i < iMatches; i++ )
  120. HB_REGMATCH_EO( aMatches, i ) = -1;
  121. iResult = regexec( &pRegEx->reg, szString, iMatches, aMatches, pRegEx->iEFlags );
  122. if( iResult == 0 )
  123. {
  124. for( i = 0; i < iMatches; i++ )
  125. {
  126. if( HB_REGMATCH_EO( aMatches, i ) != -1 )
  127. iResult = i + 1;
  128. }
  129. }
  130. else
  131. iResult = -1;
  132. if( szBuffer )
  133. hb_xfree( szBuffer );
  134. return iResult;
  135. #else
  136. HB_SYMBOL_UNUSED( pRegEx );
  137. HB_SYMBOL_UNUSED( szString );
  138. HB_SYMBOL_UNUSED( nLen );
  139. HB_SYMBOL_UNUSED( iMatches );
  140. HB_SYMBOL_UNUSED( aMatches );
  141. return -1;
  142. #endif
  143. }
  144. HB_FUNC( HB_REGEXCOMP )
  145. {
  146. HB_SIZE nLen = hb_parclen( 1 );
  147. if( nLen == 0 )
  148. hb_errRT_BASE_SubstR( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
  149. else
  150. {
  151. int iFlags = HBREG_EXTENDED;
  152. PHB_REGEX pRegEx;
  153. if( ! hb_parldef( 2, 1 ) )
  154. iFlags |= HBREG_ICASE;
  155. if( hb_parl( 3 ) )
  156. iFlags |= HBREG_NEWLINE;
  157. pRegEx = hb_regexCompile( hb_parc( 1 ), nLen, iFlags );
  158. if( pRegEx )
  159. {
  160. pRegEx->fFree = HB_FALSE;
  161. hb_retptrGC( pRegEx );
  162. }
  163. }
  164. }
  165. HB_FUNC( HB_ISREGEX )
  166. {
  167. hb_retl( hb_regexIs( hb_param( 1, HB_IT_ANY ) ) );
  168. }
  169. HB_FUNC( HB_ATX )
  170. {
  171. PHB_ITEM pString = hb_param( 2, HB_IT_STRING );
  172. if( pString )
  173. {
  174. PHB_REGEX pRegEx = hb_regexGet( hb_param( 1, HB_IT_ANY ),
  175. ! hb_parldef( 3, 1 ) ? HBREG_ICASE : 0 );
  176. if( pRegEx )
  177. {
  178. HB_SIZE nLen = hb_itemGetCLen( pString );
  179. HB_SIZE nStart = hb_parns( 4 );
  180. HB_SIZE nEnd = hb_parnsdef( 5, nLen );
  181. if( nLen && nStart <= nLen && nStart <= nEnd )
  182. {
  183. const char * pszString = hb_itemGetCPtr( pString );
  184. HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( 1 ) ];
  185. if( nEnd < nLen )
  186. nLen = nEnd;
  187. if( nStart )
  188. {
  189. --nStart;
  190. nLen -= nStart;
  191. }
  192. if( hb_regexec( pRegEx, pszString + nStart, nLen, 1, aMatches ) > 0 )
  193. {
  194. nStart += HB_REGMATCH_SO( aMatches, 0 ) + 1;
  195. nLen = HB_REGMATCH_EO( aMatches, 0 ) - HB_REGMATCH_SO( aMatches, 0 );
  196. hb_retclen( pszString + nStart - 1, nLen );
  197. }
  198. else
  199. nStart = nLen = 0;
  200. }
  201. else
  202. nStart = nLen = 0;
  203. hb_regexFree( pRegEx );
  204. hb_storns( nStart, 4 );
  205. hb_storns( nLen, 5 );
  206. }
  207. else
  208. {
  209. hb_storns( 0, 4 );
  210. hb_storns( 0, 5 );
  211. }
  212. }
  213. else
  214. hb_errRT_BASE_SubstR( EG_ARG, 3013, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
  215. }
  216. static HB_BOOL hb_regex( int iRequest )
  217. {
  218. HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( REGEX_MAX_GROUPS ) ];
  219. PHB_ITEM pRetArray, pMatch, pString;
  220. int i, iMatches, iMaxMatch;
  221. HB_BOOL fResult = HB_FALSE;
  222. PHB_REGEX pRegEx;
  223. const char * pszString;
  224. HB_SIZE nLen;
  225. pString = hb_param( 2, HB_IT_STRING );
  226. if( !pString )
  227. {
  228. hb_errRT_BASE_SubstR( EG_ARG, 3014, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
  229. return HB_FALSE;
  230. }
  231. pRegEx = hb_regexGet( hb_param( 1, HB_IT_ANY ),
  232. ( !hb_parldef( 3, 1 ) ? HBREG_ICASE : 0 ) |
  233. ( hb_parl( 4 ) ? HBREG_NEWLINE : 0 ) );
  234. if( !pRegEx )
  235. return HB_FALSE;
  236. pszString = hb_itemGetCPtr( pString );
  237. nLen = hb_itemGetCLen( pString );
  238. iMaxMatch = iRequest == 0 || iRequest == 4 || iRequest == 5 ?
  239. REGEX_MAX_GROUPS : 1;
  240. iMatches = hb_regexec( pRegEx, pszString, nLen, iMaxMatch, aMatches );
  241. if( iMatches > 0 )
  242. {
  243. switch( iRequest )
  244. {
  245. case 0:
  246. pRetArray = hb_itemArrayNew( iMatches );
  247. for( i = 0; i < iMatches; i++ )
  248. {
  249. if( HB_REGMATCH_EO( aMatches, i ) > -1 )
  250. hb_arraySetCL( pRetArray, i + 1,
  251. pszString + HB_REGMATCH_SO( aMatches, i ),
  252. HB_REGMATCH_EO( aMatches, i ) -
  253. HB_REGMATCH_SO( aMatches, i ) );
  254. else
  255. hb_arraySetCL( pRetArray, i + 1, NULL, 0 );
  256. }
  257. hb_itemReturnRelease( pRetArray );
  258. fResult = HB_TRUE;
  259. break;
  260. case 1: /* LIKE */
  261. fResult = HB_REGMATCH_SO( aMatches, 0 ) == 0 &&
  262. ( HB_SIZE ) HB_REGMATCH_EO( aMatches, 0 ) == nLen;
  263. break;
  264. case 2: /* MATCH ( HAS ) */
  265. fResult = HB_TRUE;
  266. break;
  267. case 3: /* SPLIT */
  268. iMaxMatch = hb_parni( 5 );
  269. pRetArray = hb_itemArrayNew( 0 );
  270. pMatch = hb_itemNew( NULL );
  271. iMatches = 0;
  272. do
  273. {
  274. hb_itemPutCL( pMatch, pszString, HB_REGMATCH_SO( aMatches, 0 ) );
  275. hb_arrayAddForward( pRetArray, pMatch );
  276. nLen -= HB_REGMATCH_EO( aMatches, 0 );
  277. pszString += HB_REGMATCH_EO( aMatches, 0 );
  278. iMatches++;
  279. }
  280. while( HB_REGMATCH_EO( aMatches, 0 ) > 0 && nLen &&
  281. ( iMaxMatch == 0 || iMatches < iMaxMatch ) &&
  282. hb_regexec( pRegEx, pszString, nLen, 1, aMatches ) > 0 );
  283. /* last match must be done also in case that pszString is empty;
  284. this would mean an empty split field at the end of the string */
  285. /* if( nLen ) */
  286. {
  287. hb_itemPutCL( pMatch, pszString, nLen );
  288. hb_arrayAddForward( pRetArray, pMatch );
  289. }
  290. hb_itemRelease( pMatch );
  291. hb_itemReturnRelease( pRetArray );
  292. fResult = HB_TRUE;
  293. break;
  294. case 4: /* results AND positions */
  295. pRetArray = hb_itemArrayNew( iMatches );
  296. for( i = 0; i < iMatches; i++ )
  297. {
  298. int iSO = HB_REGMATCH_SO( aMatches, i ),
  299. iEO = HB_REGMATCH_EO( aMatches, i );
  300. pMatch = hb_arrayGetItemPtr( pRetArray, i + 1 );
  301. hb_arrayNew( pMatch, 3 );
  302. if( iEO != -1 )
  303. {
  304. /* matched string */
  305. hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
  306. /* begin of match */
  307. hb_arraySetNS( pMatch, 2, iSO + 1 );
  308. /* End of match */
  309. hb_arraySetNS( pMatch, 3, iEO );
  310. }
  311. else
  312. {
  313. hb_arraySetCL( pMatch, 1, NULL, 0 );
  314. hb_arraySetNS( pMatch, 2, 0 );
  315. hb_arraySetNS( pMatch, 3, 0 );
  316. }
  317. }
  318. hb_itemReturnRelease( pRetArray );
  319. fResult = HB_TRUE;
  320. break;
  321. case 5: /* _ALL_ results AND positions */
  322. {
  323. PHB_ITEM pAtxArray;
  324. int iMax = hb_parni( 5 ); /* max nuber of matches I want, 0 = unlimited */
  325. int iGetMatch = hb_parni( 6 ); /* Gets if want only one single match or a sub-match */
  326. HB_BOOL fOnlyMatch = hb_parldef( 7, 1 ); /* if HB_TRUE returns only matches and sub-matches, not positions */
  327. HB_SIZE nOffset = 0;
  328. int iCount = 0;
  329. int iSO, iEO;
  330. /* Set new array */
  331. pRetArray = hb_itemArrayNew( 0 );
  332. do
  333. {
  334. /* If I want all matches */
  335. if( iGetMatch == 0 || /* Check boundaries */
  336. ( iGetMatch < 0 || iGetMatch > iMatches ) )
  337. {
  338. pAtxArray = hb_itemArrayNew( iMatches );
  339. for( i = 0; i < iMatches; i++ )
  340. {
  341. iSO = HB_REGMATCH_SO( aMatches, i );
  342. iEO = HB_REGMATCH_EO( aMatches, i );
  343. pMatch = hb_arrayGetItemPtr( pAtxArray, i + 1 );
  344. if( !fOnlyMatch )
  345. {
  346. hb_arrayNew( pMatch, 3 );
  347. if( iEO != -1 )
  348. {
  349. /* matched string */
  350. hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
  351. /* begin of match */
  352. hb_arraySetNS( pMatch, 2, nOffset + iSO + 1 );
  353. /* End of match */
  354. hb_arraySetNS( pMatch, 3, nOffset + iEO );
  355. }
  356. else
  357. {
  358. hb_arraySetCL( pMatch, 1, NULL, 0 );
  359. hb_arraySetNS( pMatch, 2, 0 );
  360. hb_arraySetNS( pMatch, 3, 0 );
  361. }
  362. }
  363. else
  364. {
  365. if( iEO != -1 )
  366. /* matched string */
  367. hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
  368. else
  369. hb_itemPutC( pMatch, NULL );
  370. }
  371. }
  372. hb_arrayAddForward( pRetArray, pAtxArray );
  373. hb_itemRelease( pAtxArray );
  374. }
  375. else /* Here I get only single matches */
  376. {
  377. i = iGetMatch - 1;
  378. iSO = HB_REGMATCH_SO( aMatches, i );
  379. iEO = HB_REGMATCH_EO( aMatches, i );
  380. pMatch = hb_itemNew( NULL );
  381. if( !fOnlyMatch )
  382. {
  383. hb_arrayNew( pMatch, 3 );
  384. if( iEO != -1 )
  385. {
  386. /* matched string */
  387. hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
  388. /* begin of match */
  389. hb_arraySetNS( pMatch, 2, nOffset + iSO + 1 );
  390. /* End of match */
  391. hb_arraySetNS( pMatch, 3, nOffset + iEO );
  392. }
  393. else
  394. {
  395. hb_arraySetCL( pMatch, 1, NULL, 0 );
  396. hb_arraySetNS( pMatch, 2, 0 );
  397. hb_arraySetNS( pMatch, 3, 0 );
  398. }
  399. }
  400. else
  401. {
  402. if( iEO != -1 )
  403. /* matched string */
  404. hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
  405. else
  406. hb_itemPutC( pMatch, NULL );
  407. }
  408. hb_arrayAddForward( pRetArray, pMatch );
  409. hb_itemRelease( pMatch );
  410. }
  411. iEO = HB_REGMATCH_EO( aMatches, 0 );
  412. if( iEO == -1 )
  413. break;
  414. nLen -= iEO;
  415. pszString += iEO;
  416. nOffset += iEO;
  417. iCount++;
  418. }
  419. while( iEO && nLen && ( iMax == 0 || iCount < iMax ) &&
  420. ( iMatches = hb_regexec( pRegEx, pszString, nLen, iMaxMatch, aMatches ) ) > 0 );
  421. hb_itemReturnRelease( pRetArray );
  422. fResult = HB_TRUE;
  423. break;
  424. }
  425. }
  426. }
  427. else if( iRequest == 3 )
  428. {
  429. pRetArray = hb_itemArrayNew( 1 );
  430. hb_arraySet( pRetArray, 1, pString );
  431. hb_itemReturnRelease( pRetArray );
  432. fResult = HB_TRUE;
  433. }
  434. hb_regexFree( pRegEx );
  435. return fResult;
  436. }
  437. /* Returns array of Match + Sub-Matches. */
  438. HB_FUNC( HB_REGEX )
  439. {
  440. if( ! hb_regex( 0 ) )
  441. hb_reta( 0 );
  442. }
  443. /* Returns just .T. if match found or .F. otherwise. */
  444. /* NOTE: Deprecated compatibility function.
  445. Please use HB_REGEXLIKE() and HB_REGEXHAS() instead. */
  446. #if defined( HB_LEGACY_LEVEL4 )
  447. HB_FUNC( HB_REGEXMATCH )
  448. {
  449. hb_retl( hb_regex( hb_parl( 5 ) ? 1 /* LIKE */ : 2 /* HAS */ ) );
  450. }
  451. #endif
  452. HB_FUNC( HB_REGEXLIKE )
  453. {
  454. hb_retl( hb_regex( 1 ) );
  455. }
  456. HB_FUNC( HB_REGEXHAS )
  457. {
  458. hb_retl( hb_regex( 2 ) );
  459. }
  460. /* Splits the string in an array of matched expressions */
  461. HB_FUNC( HB_REGEXSPLIT )
  462. {
  463. if( ! hb_regex( 3 ) )
  464. hb_reta( 0 );
  465. }
  466. /* Returns array of { Match, start, end }, { Sub-Matches, start, end } */
  467. HB_FUNC( HB_REGEXATX )
  468. {
  469. if( ! hb_regex( 4 ) )
  470. hb_reta( 0 );
  471. }
  472. /* 2005-12-16 - Francesco Saverio Giudice
  473. HB_RegExAll( cRegex, cString, lCaseSensitive, lNewLine, nMaxMatches, nGetMatch, lOnlyMatch ) -> aAllRegexMatches
  474. This function return all matches from a Regex search.
  475. It is a mix from hb_RegEx() and hb_RegExAtX()
  476. PARAMETERS:
  477. cRegex - Regex pattern string or precompiled Regex
  478. cString - The string you want to search
  479. lCaseSensitive - default = FALSE
  480. lNewLine - default = FALSE
  481. nMaxMatches - default = unlimited, this limit number of matches that have to return
  482. nGetMatch - default = unlimited, this returns only one from Match + Sub-Matches
  483. lOnlyMatch - default = TRUE, if TRUE returns Matches, otherwise it returns also start and end positions
  484. */
  485. HB_FUNC( HB_REGEXALL )
  486. {
  487. if( ! hb_regex( 5 ) )
  488. hb_reta( 0 );
  489. }
  490. #if defined( HB_HAS_PCRE )
  491. static void * hb_pcre_grab( size_t size )
  492. {
  493. return hb_xgrab( size );
  494. }
  495. static void hb_pcre_free( void * ptr )
  496. {
  497. hb_xfree( ptr );
  498. }
  499. #endif
  500. HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ )
  501. #if defined( HB_HAS_PCRE )
  502. /* Hack to force linking newer PCRE versions not the one included in BCC RTL */
  503. # if defined( __BORLANDC__ )
  504. {
  505. int iUTF8Enabled;
  506. pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled );
  507. }
  508. # endif
  509. pcre_malloc = hb_pcre_grab;
  510. pcre_free = hb_pcre_free;
  511. pcre_stack_malloc = hb_pcre_grab;
  512. pcre_stack_free = hb_pcre_free;
  513. #endif
  514. hb_regexInit( hb_regfree, hb_regcomp, hb_regexec );
  515. HB_CALL_ON_STARTUP_END( _hb_regex_init_ )
  516. #if defined( HB_PRAGMA_STARTUP )
  517. #pragma startup _hb_regex_init_
  518. #elif defined( HB_DATASEG_STARTUP )
  519. #define HB_DATASEG_BODY HB_DATASEG_FUNC( _hb_regex_init_ )
  520. #include "hbiniseg.h"
  521. #endif