PageRenderTime 56ms CodeModel.GetById 6ms RepoModel.GetById 0ms app.codeStats 0ms

/amanda/branches/amanda-252/common-src/match.c

#
C | 654 lines | 485 code | 68 blank | 101 comment | 237 complexity | 907b9954bac6a6add55b6d82723e789b MD5 | raw file
  1. /*
  2. * Amanda, The Advanced Maryland Automatic Network Disk Archiver
  3. * Copyright (c) 1991-1998 University of Maryland at College Park
  4. * All Rights Reserved.
  5. *
  6. * Permission to use, copy, modify, distribute, and sell this software and its
  7. * documentation for any purpose is hereby granted without fee, provided that
  8. * the above copyright notice appear in all copies and that both that
  9. * copyright notice and this permission notice appear in supporting
  10. * documentation, and that the name of U.M. not be used in advertising or
  11. * publicity pertaining to distribution of the software without specific,
  12. * written prior permission. U.M. makes no representations about the
  13. * suitability of this software for any purpose. It is provided "as is"
  14. * without express or implied warranty.
  15. *
  16. * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  18. * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  19. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  20. * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  21. * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  22. *
  23. * Authors: the Amanda Development Team. Its members are listed in a
  24. * file named AUTHORS, in the root directory of this distribution.
  25. */
  26. /*
  27. * $Id: match.c,v 1.23 2006/05/25 01:47:12 johnfranks Exp $
  28. *
  29. * functions for checking and matching regular expressions
  30. */
  31. #include "amanda.h"
  32. #include <regex.h>
  33. static int match_word(const char *glob, const char *word, const char separator);
  34. char *
  35. validate_regexp(
  36. const char * regex)
  37. {
  38. regex_t regc;
  39. int result;
  40. static char errmsg[STR_SIZE];
  41. if ((result = regcomp(&regc, regex,
  42. REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  43. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  44. return errmsg;
  45. }
  46. regfree(&regc);
  47. return NULL;
  48. }
  49. char *
  50. clean_regex(
  51. const char * regex)
  52. {
  53. char *result;
  54. int j;
  55. size_t i;
  56. result = alloc(2*strlen(regex)+1);
  57. for(i=0,j=0;i<strlen(regex);i++) {
  58. if(!isalnum((int)regex[i]))
  59. result[j++]='\\';
  60. result[j++]=regex[i];
  61. }
  62. result[j] = '\0';
  63. return result;
  64. }
  65. int
  66. match(
  67. const char * regex,
  68. const char * str)
  69. {
  70. regex_t regc;
  71. int result;
  72. char errmsg[STR_SIZE];
  73. if((result = regcomp(&regc, regex,
  74. REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  75. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  76. error("regex \"%s\": %s", regex, errmsg);
  77. /*NOTREACHED*/
  78. }
  79. if((result = regexec(&regc, str, 0, 0, 0)) != 0
  80. && result != REG_NOMATCH) {
  81. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  82. error("regex \"%s\": %s", regex, errmsg);
  83. /*NOTREACHED*/
  84. }
  85. regfree(&regc);
  86. return result == 0;
  87. }
  88. char *
  89. validate_glob(
  90. const char * glob)
  91. {
  92. char *regex;
  93. regex_t regc;
  94. int result;
  95. static char errmsg[STR_SIZE];
  96. regex = glob_to_regex(glob);
  97. if ((result = regcomp(&regc, regex,
  98. REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  99. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  100. amfree(regex);
  101. return errmsg;
  102. }
  103. regfree(&regc);
  104. amfree(regex);
  105. return NULL;
  106. }
  107. int
  108. match_glob(
  109. const char * glob,
  110. const char * str)
  111. {
  112. char *regex;
  113. regex_t regc;
  114. int result;
  115. char errmsg[STR_SIZE];
  116. regex = glob_to_regex(glob);
  117. if((result = regcomp(&regc, regex,
  118. REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  119. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  120. error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
  121. /*NOTREACHED*/
  122. }
  123. if((result = regexec(&regc, str, 0, 0, 0)) != 0
  124. && result != REG_NOMATCH) {
  125. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  126. error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
  127. /*NOTREACHED*/
  128. }
  129. regfree(&regc);
  130. amfree(regex);
  131. return result == 0;
  132. }
  133. char *
  134. glob_to_regex(
  135. const char * glob)
  136. {
  137. char *regex;
  138. char *r;
  139. size_t len;
  140. int ch;
  141. int last_ch;
  142. /*
  143. * Allocate an area to convert into. The worst case is a five to
  144. * one expansion.
  145. */
  146. len = strlen(glob);
  147. regex = alloc(1 + len * 5 + 1 + 1);
  148. /*
  149. * Do the conversion:
  150. *
  151. * ? -> [^/]
  152. * * -> [^/]*
  153. * [!...] -> [^...]
  154. *
  155. * The following are given a leading backslash to protect them
  156. * unless they already have a backslash:
  157. *
  158. * ( ) { } + . ^ $ |
  159. *
  160. * Put a leading ^ and trailing $ around the result. If the last
  161. * non-escaped character is \ leave the $ off to cause a syntax
  162. * error when the regex is compiled.
  163. */
  164. r = regex;
  165. *r++ = '^';
  166. last_ch = '\0';
  167. for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
  168. if (last_ch == '\\') {
  169. *r++ = (char)ch;
  170. ch = '\0'; /* so last_ch != '\\' next time */
  171. } else if (last_ch == '[' && ch == '!') {
  172. *r++ = '^';
  173. } else if (ch == '\\') {
  174. *r++ = (char)ch;
  175. } else if (ch == '*' || ch == '?') {
  176. *r++ = '[';
  177. *r++ = '^';
  178. *r++ = '/';
  179. *r++ = ']';
  180. if (ch == '*') {
  181. *r++ = '*';
  182. }
  183. } else if (ch == '('
  184. || ch == ')'
  185. || ch == '{'
  186. || ch == '}'
  187. || ch == '+'
  188. || ch == '.'
  189. || ch == '^'
  190. || ch == '$'
  191. || ch == '|') {
  192. *r++ = '\\';
  193. *r++ = (char)ch;
  194. } else {
  195. *r++ = (char)ch;
  196. }
  197. }
  198. if (last_ch != '\\') {
  199. *r++ = '$';
  200. }
  201. *r = '\0';
  202. return regex;
  203. }
  204. int
  205. match_tar(
  206. const char * glob,
  207. const char * str)
  208. {
  209. char *regex;
  210. regex_t regc;
  211. int result;
  212. char errmsg[STR_SIZE];
  213. regex = tar_to_regex(glob);
  214. if((result = regcomp(&regc, regex,
  215. REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  216. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  217. error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
  218. /*NOTREACHED*/
  219. }
  220. if((result = regexec(&regc, str, 0, 0, 0)) != 0
  221. && result != REG_NOMATCH) {
  222. regerror(result, &regc, errmsg, SIZEOF(errmsg));
  223. error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
  224. /*NOTREACHED*/
  225. }
  226. regfree(&regc);
  227. amfree(regex);
  228. return result == 0;
  229. }
  230. char *
  231. tar_to_regex(
  232. const char * glob)
  233. {
  234. char *regex;
  235. char *r;
  236. size_t len;
  237. int ch;
  238. int last_ch;
  239. /*
  240. * Allocate an area to convert into. The worst case is a five to
  241. * one expansion.
  242. */
  243. len = strlen(glob);
  244. regex = alloc(1 + len * 5 + 1 + 1);
  245. /*
  246. * Do the conversion:
  247. *
  248. * ? -> [^/]
  249. * * -> .*
  250. * [!...] -> [^...]
  251. *
  252. * The following are given a leading backslash to protect them
  253. * unless they already have a backslash:
  254. *
  255. * ( ) { } + . ^ $ |
  256. *
  257. * Put a leading ^ and trailing $ around the result. If the last
  258. * non-escaped character is \ leave the $ off to cause a syntax
  259. * error when the regex is compiled.
  260. */
  261. r = regex;
  262. *r++ = '^';
  263. last_ch = '\0';
  264. for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
  265. if (last_ch == '\\') {
  266. *r++ = (char)ch;
  267. ch = '\0'; /* so last_ch != '\\' next time */
  268. } else if (last_ch == '[' && ch == '!') {
  269. *r++ = '^';
  270. } else if (ch == '\\') {
  271. *r++ = (char)ch;
  272. } else if (ch == '*') {
  273. *r++ = '.';
  274. *r++ = '*';
  275. } else if (ch == '?') {
  276. *r++ = '[';
  277. *r++ = '^';
  278. *r++ = '/';
  279. *r++ = ']';
  280. } else if (ch == '('
  281. || ch == ')'
  282. || ch == '{'
  283. || ch == '}'
  284. || ch == '+'
  285. || ch == '.'
  286. || ch == '^'
  287. || ch == '$'
  288. || ch == '|') {
  289. *r++ = '\\';
  290. *r++ = (char)ch;
  291. } else {
  292. *r++ = (char)ch;
  293. }
  294. }
  295. if (last_ch != '\\') {
  296. *r++ = '$';
  297. }
  298. *r = '\0';
  299. return regex;
  300. }
  301. static int
  302. match_word(
  303. const char * glob,
  304. const char * word,
  305. const char separator)
  306. {
  307. char *regex;
  308. char *r;
  309. size_t len;
  310. int ch;
  311. int last_ch;
  312. int next_ch;
  313. size_t lenword;
  314. char *nword;
  315. char *nglob;
  316. char *g;
  317. const char *w;
  318. int i;
  319. lenword = strlen(word);
  320. nword = (char *)alloc(lenword + 3);
  321. r = nword;
  322. w = word;
  323. if(lenword == 1 && *w == separator) {
  324. *r++ = separator;
  325. *r++ = separator;
  326. }
  327. else {
  328. if(*w != separator)
  329. *r++ = separator;
  330. while(*w != '\0')
  331. *r++ = *w++;
  332. if(*(r-1) != separator)
  333. *r++ = separator;
  334. }
  335. *r = '\0';
  336. /*
  337. * Allocate an area to convert into. The worst case is a six to
  338. * one expansion.
  339. */
  340. len = strlen(glob);
  341. regex = (char *)alloc(1 + len * 6 + 1 + 1 + 2 + 2);
  342. r = regex;
  343. nglob = stralloc(glob);
  344. g = nglob;
  345. if((len == 1 && nglob[0] == separator) ||
  346. (len == 2 && nglob[0] == '^' && nglob[1] == separator) ||
  347. (len == 2 && nglob[0] == separator && nglob[1] == '$') ||
  348. (len == 3 && nglob[0] == '^' && nglob[1] == separator &&
  349. nglob[2] == '$')) {
  350. *r++ = '^';
  351. *r++ = '\\';
  352. *r++ = separator;
  353. *r++ = '\\';
  354. *r++ = separator;
  355. *r++ = '$';
  356. }
  357. else {
  358. /*
  359. * Do the conversion:
  360. *
  361. * ? -> [^\separator]
  362. * * -> [^\separator]*
  363. * [!...] -> [^...]
  364. * ** -> .*
  365. *
  366. * The following are given a leading backslash to protect them
  367. * unless they already have a backslash:
  368. *
  369. * ( ) { } + . ^ $ |
  370. *
  371. * If the last
  372. * non-escaped character is \ leave it to cause a syntax
  373. * error when the regex is compiled.
  374. */
  375. if(*g == '^') {
  376. *r++ = '^';
  377. *r++ = '\\'; /* escape the separator */
  378. *r++ = separator;
  379. g++;
  380. if(*g == separator) g++;
  381. }
  382. else if(*g != separator) {
  383. *r++ = '\\'; /* add a leading \separator */
  384. *r++ = separator;
  385. }
  386. last_ch = '\0';
  387. for (ch = *g++; ch != '\0'; last_ch = ch, ch = *g++) {
  388. next_ch = *g;
  389. if (last_ch == '\\') {
  390. *r++ = (char)ch;
  391. ch = '\0'; /* so last_ch != '\\' next time */
  392. } else if (last_ch == '[' && ch == '!') {
  393. *r++ = '^';
  394. } else if (ch == '\\') {
  395. *r++ = (char)ch;
  396. } else if (ch == '*' || ch == '?') {
  397. if(ch == '*' && next_ch == '*') {
  398. *r++ = '.';
  399. g++;
  400. }
  401. else {
  402. *r++ = '[';
  403. *r++ = '^';
  404. *r++ = '\\';
  405. *r++ = separator;
  406. *r++ = ']';
  407. }
  408. if (ch == '*') {
  409. *r++ = '*';
  410. }
  411. } else if (ch == '$' && next_ch == '\0') {
  412. if(last_ch != separator) {
  413. *r++ = '\\';
  414. *r++ = separator;
  415. }
  416. *r++ = (char)ch;
  417. } else if ( ch == '('
  418. || ch == ')'
  419. || ch == '{'
  420. || ch == '}'
  421. || ch == '+'
  422. || ch == '.'
  423. || ch == '^'
  424. || ch == '$'
  425. || ch == '|') {
  426. *r++ = '\\';
  427. *r++ = (char)ch;
  428. } else {
  429. *r++ = (char)ch;
  430. }
  431. }
  432. if(last_ch != '\\') {
  433. if(last_ch != separator && last_ch != '$') {
  434. *r++ = '\\';
  435. *r++ = separator; /* add a trailing \separator */
  436. }
  437. }
  438. }
  439. *r = '\0';
  440. i = match(regex,nword);
  441. amfree(nword);
  442. amfree(nglob);
  443. amfree(regex);
  444. return i;
  445. }
  446. int
  447. match_host(
  448. const char * glob,
  449. const char * host)
  450. {
  451. char *lglob, *lhost;
  452. char *c;
  453. const char *d;
  454. int i;
  455. lglob = (char *)alloc(strlen(glob)+1);
  456. c = lglob, d=glob;
  457. while( *d != '\0')
  458. *c++ = (char)tolower(*d++);
  459. *c = *d;
  460. lhost = (char *)alloc(strlen(host)+1);
  461. c = lhost, d=host;
  462. while( *d != '\0')
  463. *c++ = (char)tolower(*d++);
  464. *c = *d;
  465. i = match_word(lglob, lhost, (int)'.');
  466. amfree(lglob);
  467. amfree(lhost);
  468. return i;
  469. }
  470. int
  471. match_disk(
  472. const char * glob,
  473. const char * disk)
  474. {
  475. return match_word(glob, disk, '/');
  476. }
  477. int
  478. match_datestamp(
  479. const char * dateexp,
  480. const char * datestamp)
  481. {
  482. char *dash;
  483. size_t len, len_suffix;
  484. size_t len_prefix;
  485. char firstdate[100], lastdate[100];
  486. char mydateexp[100];
  487. int match_exact;
  488. if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
  489. error("Illegal datestamp expression %s",dateexp);
  490. /*NOTREACHED*/
  491. }
  492. if(dateexp[0] == '^') {
  493. strncpy(mydateexp, dateexp+1, strlen(dateexp)-1);
  494. mydateexp[strlen(dateexp)-1] = '\0';
  495. }
  496. else {
  497. strncpy(mydateexp, dateexp, strlen(dateexp));
  498. mydateexp[strlen(dateexp)] = '\0';
  499. }
  500. if(mydateexp[strlen(mydateexp)] == '$') {
  501. match_exact = 1;
  502. mydateexp[strlen(mydateexp)] = '\0';
  503. }
  504. else
  505. match_exact = 0;
  506. if((dash = strchr(mydateexp,'-'))) {
  507. if(match_exact == 1) {
  508. error("Illegal datestamp expression %s",dateexp);
  509. /*NOTREACHED*/
  510. }
  511. len = (size_t)(dash - mydateexp);
  512. len_suffix = strlen(dash) - 1;
  513. len_prefix = len - len_suffix;
  514. dash++;
  515. strncpy(firstdate, mydateexp, len);
  516. firstdate[len] = '\0';
  517. strncpy(lastdate, mydateexp, len_prefix);
  518. strncpy(&(lastdate[len_prefix]), dash, len_suffix);
  519. lastdate[len] = '\0';
  520. return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
  521. (strncmp(datestamp, lastdate , strlen(lastdate)) <= 0));
  522. }
  523. else {
  524. if(match_exact == 1) {
  525. return (strcmp(datestamp, mydateexp) == 0);
  526. }
  527. else {
  528. return (strncmp(datestamp, mydateexp, strlen(mydateexp)) == 0);
  529. }
  530. }
  531. }
  532. int
  533. match_level(
  534. const char * levelexp,
  535. const char * level)
  536. {
  537. char *dash;
  538. size_t len, len_suffix;
  539. size_t len_prefix;
  540. char lowend[100], highend[100];
  541. char mylevelexp[100];
  542. int match_exact;
  543. if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
  544. error("Illegal level expression %s",levelexp);
  545. /*NOTREACHED*/
  546. }
  547. if(levelexp[0] == '^') {
  548. strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
  549. mylevelexp[strlen(levelexp)-1] = '\0';
  550. }
  551. else {
  552. strncpy(mylevelexp, levelexp, strlen(levelexp));
  553. mylevelexp[strlen(levelexp)] = '\0';
  554. }
  555. if(mylevelexp[strlen(mylevelexp)] == '$') {
  556. match_exact = 1;
  557. mylevelexp[strlen(mylevelexp)] = '\0';
  558. }
  559. else
  560. match_exact = 0;
  561. if((dash = strchr(mylevelexp,'-'))) {
  562. if(match_exact == 1) {
  563. error("Illegal level expression %s",levelexp);
  564. /*NOTREACHED*/
  565. }
  566. len = (size_t)(dash - mylevelexp);
  567. len_suffix = strlen(dash) - 1;
  568. len_prefix = len - len_suffix;
  569. dash++;
  570. strncpy(lowend, mylevelexp, len);
  571. lowend[len] = '\0';
  572. strncpy(highend, mylevelexp, len_prefix);
  573. strncpy(&(highend[len_prefix]), dash, len_suffix);
  574. highend[len] = '\0';
  575. return ((strncmp(level, lowend, strlen(lowend)) >= 0) &&
  576. (strncmp(level, highend , strlen(highend)) <= 0));
  577. }
  578. else {
  579. if(match_exact == 1) {
  580. return (strcmp(level, mylevelexp) == 0);
  581. }
  582. else {
  583. return (strncmp(level, mylevelexp, strlen(mylevelexp)) == 0);
  584. }
  585. }
  586. }