/contrib/dict_xsyn/dict_xsyn.c

https://github.com/LuaDist/libpq · C · 262 lines · 191 code · 49 blank · 22 comment · 47 complexity · b7cee4f1448f51139c50e6061a7653a0 MD5 · raw file

  1. /*-------------------------------------------------------------------------
  2. *
  3. * dict_xsyn.c
  4. * Extended synonym dictionary
  5. *
  6. * Copyright (c) 2007-2012, PostgreSQL Global Development Group
  7. *
  8. * IDENTIFICATION
  9. * contrib/dict_xsyn/dict_xsyn.c
  10. *
  11. *-------------------------------------------------------------------------
  12. */
  13. #include "postgres.h"
  14. #include <ctype.h>
  15. #include "commands/defrem.h"
  16. #include "tsearch/ts_locale.h"
  17. #include "tsearch/ts_utils.h"
  18. PG_MODULE_MAGIC;
  19. typedef struct
  20. {
  21. char *key; /* Word */
  22. char *value; /* Unparsed list of synonyms, including the
  23. * word itself */
  24. } Syn;
  25. typedef struct
  26. {
  27. int len;
  28. Syn *syn;
  29. bool matchorig;
  30. bool keeporig;
  31. bool matchsynonyms;
  32. bool keepsynonyms;
  33. } DictSyn;
  34. PG_FUNCTION_INFO_V1(dxsyn_init);
  35. Datum dxsyn_init(PG_FUNCTION_ARGS);
  36. PG_FUNCTION_INFO_V1(dxsyn_lexize);
  37. Datum dxsyn_lexize(PG_FUNCTION_ARGS);
  38. static char *
  39. find_word(char *in, char **end)
  40. {
  41. char *start;
  42. *end = NULL;
  43. while (*in && t_isspace(in))
  44. in += pg_mblen(in);
  45. if (!*in || *in == '#')
  46. return NULL;
  47. start = in;
  48. while (*in && !t_isspace(in))
  49. in += pg_mblen(in);
  50. *end = in;
  51. return start;
  52. }
  53. static int
  54. compare_syn(const void *a, const void *b)
  55. {
  56. return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
  57. }
  58. static void
  59. read_dictionary(DictSyn *d, char *filename)
  60. {
  61. char *real_filename = get_tsearch_config_filename(filename, "rules");
  62. tsearch_readline_state trst;
  63. char *line;
  64. int cur = 0;
  65. if (!tsearch_readline_begin(&trst, real_filename))
  66. ereport(ERROR,
  67. (errcode(ERRCODE_CONFIG_FILE_ERROR),
  68. errmsg("could not open synonym file \"%s\": %m",
  69. real_filename)));
  70. while ((line = tsearch_readline(&trst)) != NULL)
  71. {
  72. char *value;
  73. char *key;
  74. char *pos;
  75. char *end;
  76. if (*line == '\0')
  77. continue;
  78. value = lowerstr(line);
  79. pfree(line);
  80. pos = value;
  81. while ((key = find_word(pos, &end)) != NULL)
  82. {
  83. /* Enlarge syn structure if full */
  84. if (cur == d->len)
  85. {
  86. d->len = (d->len > 0) ? 2 * d->len : 16;
  87. if (d->syn)
  88. d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
  89. else
  90. d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
  91. }
  92. /* Save first word only if we will match it */
  93. if (pos != value || d->matchorig)
  94. {
  95. d->syn[cur].key = pnstrdup(key, end - key);
  96. d->syn[cur].value = pstrdup(value);
  97. cur++;
  98. }
  99. pos = end;
  100. /* Don't bother scanning synonyms if we will not match them */
  101. if (!d->matchsynonyms)
  102. break;
  103. }
  104. pfree(value);
  105. }
  106. tsearch_readline_end(&trst);
  107. d->len = cur;
  108. if (cur > 1)
  109. qsort(d->syn, d->len, sizeof(Syn), compare_syn);
  110. pfree(real_filename);
  111. }
  112. Datum
  113. dxsyn_init(PG_FUNCTION_ARGS)
  114. {
  115. List *dictoptions = (List *) PG_GETARG_POINTER(0);
  116. DictSyn *d;
  117. ListCell *l;
  118. char *filename = NULL;
  119. d = (DictSyn *) palloc0(sizeof(DictSyn));
  120. d->len = 0;
  121. d->syn = NULL;
  122. d->matchorig = true;
  123. d->keeporig = true;
  124. d->matchsynonyms = false;
  125. d->keepsynonyms = true;
  126. foreach(l, dictoptions)
  127. {
  128. DefElem *defel = (DefElem *) lfirst(l);
  129. if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0)
  130. {
  131. d->matchorig = defGetBoolean(defel);
  132. }
  133. else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
  134. {
  135. d->keeporig = defGetBoolean(defel);
  136. }
  137. else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0)
  138. {
  139. d->matchsynonyms = defGetBoolean(defel);
  140. }
  141. else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0)
  142. {
  143. d->keepsynonyms = defGetBoolean(defel);
  144. }
  145. else if (pg_strcasecmp(defel->defname, "RULES") == 0)
  146. {
  147. /* we can't read the rules before parsing all options! */
  148. filename = defGetString(defel);
  149. }
  150. else
  151. {
  152. ereport(ERROR,
  153. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  154. errmsg("unrecognized xsyn parameter: \"%s\"",
  155. defel->defname)));
  156. }
  157. }
  158. if (filename)
  159. read_dictionary(d, filename);
  160. PG_RETURN_POINTER(d);
  161. }
  162. Datum
  163. dxsyn_lexize(PG_FUNCTION_ARGS)
  164. {
  165. DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
  166. char *in = (char *) PG_GETARG_POINTER(1);
  167. int length = PG_GETARG_INT32(2);
  168. Syn word;
  169. Syn *found;
  170. TSLexeme *res = NULL;
  171. if (!length || d->len == 0)
  172. PG_RETURN_POINTER(NULL);
  173. /* Create search pattern */
  174. {
  175. char *temp = pnstrdup(in, length);
  176. word.key = lowerstr(temp);
  177. pfree(temp);
  178. word.value = NULL;
  179. }
  180. /* Look for matching syn */
  181. found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
  182. pfree(word.key);
  183. if (!found)
  184. PG_RETURN_POINTER(NULL);
  185. /* Parse string of synonyms and return array of words */
  186. {
  187. char *value = found->value;
  188. char *syn;
  189. char *pos;
  190. char *end;
  191. int nsyns = 0;
  192. res = palloc(sizeof(TSLexeme));
  193. pos = value;
  194. while ((syn = find_word(pos, &end)) != NULL)
  195. {
  196. res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
  197. /* The first word is output only if keeporig=true */
  198. if (pos != value || d->keeporig)
  199. {
  200. res[nsyns].lexeme = pnstrdup(syn, end - syn);
  201. res[nsyns].nvariant = 0;
  202. res[nsyns].flags = 0;
  203. nsyns++;
  204. }
  205. pos = end;
  206. /* Stop if we are not to output the synonyms */
  207. if (!d->keepsynonyms)
  208. break;
  209. }
  210. res[nsyns].lexeme = NULL;
  211. }
  212. PG_RETURN_POINTER(res);
  213. }