/modules/mod_search/mod_search.erl

http://github.com/zotonic/zotonic · Erlang · 648 lines · 466 code · 83 blank · 99 comment · 17 complexity · 0f66ce9f9200270e2629f3b8cb654e47 MD5 · raw file

  1. %% @author Marc Worrell <marc@worrell.nl>
  2. %% @copyright 2009 Marc Worrell
  3. %% Date: 2009-06-09
  4. %% @doc Defines PostgreSQL queries for basic content searches in Zotonic.
  5. %% This module needs to be split in specific PostgreSQL queries and standard SQL queries when you want to
  6. %% support other databases (like MySQL).
  7. %% Copyright 2009 Marc Worrell
  8. %%
  9. %% Licensed under the Apache License, Version 2.0 (the "License");
  10. %% you may not use this file except in compliance with the License.
  11. %% You may obtain a copy of the License at
  12. %%
  13. %% http://www.apache.org/licenses/LICENSE-2.0
  14. %%
  15. %% Unless required by applicable law or agreed to in writing, software
  16. %% distributed under the License is distributed on an "AS IS" BASIS,
  17. %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18. %% See the License for the specific language governing permissions and
  19. %% limitations under the License.
  20. -module(mod_search).
  21. -author("Marc Worrell <marc@worrell.nl>").
  22. -behaviour(gen_server).
  23. -mod_title("Search Queries").
  24. -mod_description("Defines PostgreSQL queries for basic content searches in Zotonic.").
  25. -mod_prio(1000).
  26. %% gen_server exports
  27. -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
  28. -export([start_link/1]).
  29. %% interface functions
  30. -export([
  31. observe_search_query/2,
  32. observe_module_activate/2,
  33. to_tsquery/2,
  34. rank_weight/0,
  35. rank_behaviour/1,
  36. find_by_id/2,
  37. find_by_id/3
  38. ]).
  39. -include("zotonic.hrl").
  40. -record(state, {context, query_watches=[]}).
  41. observe_search_query({search_query, Req, OffsetLimit}, Context) ->
  42. search(Req, OffsetLimit, Context).
  43. observe_module_activate(#module_activate{module=?MODULE, pid=Pid}, _Context) ->
  44. gen_server:cast(Pid, init_query_watches);
  45. observe_module_activate(_, _Context) ->
  46. ok.
  47. %%====================================================================
  48. %% API
  49. %%====================================================================
  50. %% @spec start_link(Args) -> {ok,Pid} | ignore | {error,Error}
  51. %% @doc Starts the server
  52. start_link(Args) when is_list(Args) ->
  53. gen_server:start_link(?MODULE, Args, []).
  54. %%====================================================================
  55. %% gen_server callbacks
  56. %%====================================================================
  57. %% @spec init(Args) -> {ok, State} |
  58. %% {ok, State, Timeout} |
  59. %% ignore |
  60. %% {stop, Reason}
  61. %% @doc Initiates the server.
  62. init(Args) ->
  63. process_flag(trap_exit, true),
  64. {context, Context} = proplists:lookup(context, Args),
  65. lager:md([
  66. {site, z_context:site(Context)},
  67. {module, ?MODULE}
  68. ]),
  69. %% Watch for changes to resources
  70. z_notifier:observe(rsc_update_done, self(), Context),
  71. z_notifier:observe(rsc_delete, self(), Context),
  72. {ok, #state{context=z_acl:sudo(z_context:new(Context))}}.
  73. %% @spec handle_call(Request, From, State) -> {reply, Reply, State} |
  74. %% {reply, Reply, State, Timeout} |
  75. %% {noreply, State} |
  76. %% {noreply, State, Timeout} |
  77. %% {stop, Reason, Reply, State} |
  78. %% {stop, Reason, State}
  79. %% @doc Trap unknown calls
  80. handle_call(Message, _From, State) ->
  81. {stop, {unknown_call, Message}, State}.
  82. %% @spec handle_cast(Msg, State) -> {noreply, State} |
  83. %% {noreply, State, Timeout} |
  84. %% {stop, Reason, State}
  85. %% @doc Casts for updates to resources
  86. handle_cast({#rsc_delete{id=Id, is_a=IsA}, _Ctx}, State=#state{context=Context,query_watches=Watches}) ->
  87. Watches1 = case lists:member('query', IsA) of
  88. false -> Watches;
  89. true -> search_query_notify:watches_remove(Id, Watches, Context)
  90. end,
  91. {noreply, State#state{query_watches=Watches1}};
  92. handle_cast(init_query_watches, State) ->
  93. Watches = search_query_notify:init(State#state.context),
  94. {noreply, State#state{query_watches=Watches}};
  95. handle_cast({#rsc_update_done{action=delete}, _Ctx}, State) ->
  96. {noreply, State};
  97. handle_cast({#rsc_update_done{id=Id, pre_is_a=Cats, post_is_a=Cats}, _Ctx}, State=#state{query_watches=Watches,context=Context}) ->
  98. %% Update; categories have not changed.
  99. Watches1 = case lists:member('query', Cats) of
  100. false -> Watches;
  101. true -> search_query_notify:watches_update(Id, Watches, Context)
  102. end,
  103. %% Item updated; send notifications for matched queries.
  104. search_query_notify:send_notifications(Id, search_query_notify:check_rsc(Id, Watches1, Context), Context),
  105. {noreply, State#state{query_watches=Watches1}};
  106. handle_cast({#rsc_update_done{id=Id, pre_is_a=CatsOld, post_is_a=CatsNew}, _Ctx}, State=#state{query_watches=Watches,context=Context}) ->
  107. %% Update; categories *have* changed.
  108. Watches1 = case lists:member('query', CatsOld) of
  109. true ->
  110. case lists:member('query', CatsNew) of
  111. true ->
  112. %% It still is a query; but might have changes; update watches.
  113. search_query_notify:watches_update(Id, Watches, Context);
  114. false ->
  115. %% Its no longer a query; remove from watches.
  116. search_query_notify:watches_remove(Id, Watches, Context)
  117. end;
  118. false ->
  119. case lists:member('query', CatsNew) of
  120. true ->
  121. %% It has become a query
  122. search_query_notify:watches_update(Id, Watches, Context);
  123. false ->
  124. %% It has not been a query
  125. Watches
  126. end
  127. end,
  128. search_query_notify:send_notifications(Id, search_query_notify:check_rsc(Id, Watches1, Context), Context),
  129. {noreply, State#state{query_watches=Watches1}};
  130. %% @doc Trap unknown casts
  131. handle_cast(Message, State) ->
  132. {stop, {unknown_cast, Message}, State}.
  133. %% @spec handle_info(Info, State) -> {noreply, State} |
  134. %% {noreply, State, Timeout} |
  135. %% {stop, Reason, State}
  136. %% @doc Handling all non call/cast messages
  137. handle_info(_Info, State) ->
  138. {noreply, State}.
  139. %% @spec terminate(Reason, State) -> void()
  140. %% @doc This function is called by a gen_server when it is about to
  141. %% terminate. It should be the opposite of Module:init/1 and do any necessary
  142. %% cleaning up. When it returns, the gen_server terminates with Reason.
  143. %% The return value is ignored.
  144. terminate(_Reason, State) ->
  145. Context = State#state.context,
  146. z_notifier:detach(rsc_update_done, self(), Context),
  147. z_notifier:detach(rsc_delete, self(), Context),
  148. ok.
  149. %% @spec code_change(OldVsn, State, Extra) -> {ok, NewState}
  150. %% @doc Convert process state when code is changed
  151. code_change(_OldVsn, State, _Extra) ->
  152. {ok, State}.
  153. %%====================================================================
  154. %% support functions
  155. %%====================================================================
  156. search_prevnext(Type, Args, Context) ->
  157. Order = fun(next) -> "ASC"; (previous) -> "DESC" end,
  158. Operator = fun(next) -> " > "; (previous) -> " < " end,
  159. MapField = fun("date_start") -> "pivot_date_start";
  160. ("date_end") -> "pivot_date_end";
  161. ("title") -> "pivot_title";
  162. (X) -> X end,
  163. Field = z_convert:to_list(proplists:get_value(sort, Args, publication_start)),
  164. Limit = z_convert:to_integer(proplists:get_value(limit, Args, 1)),
  165. {id, Id} = proplists:lookup(id, Args),
  166. {cat, Cat} = proplists:lookup(cat, Args),
  167. FieldValue = m_rsc:p(Id, z_convert:to_atom(Field), Context),
  168. #search_sql{
  169. select="r.id",
  170. from="rsc r",
  171. where="(" ++ MapField(Field) ++ " " ++ Operator(Type) ++ " $1) and r.id <> $2",
  172. tables=[{rsc, "r"}],
  173. cats=[{"r", Cat}],
  174. args=[FieldValue, z_convert:to_integer(Id), Limit],
  175. order=MapField(Field) ++ " " ++ Order(Type) ++ ", id " ++ Order(Type),
  176. limit="limit $3"
  177. }.
  178. %% Retrieve the previous/next id(s) (on sort field, defaults to publication date)
  179. search({previous, Args}, _OffsetLimit, Context) ->
  180. search_prevnext(previous, Args, Context);
  181. search({next, Args}, _OffsetLimit, Context) ->
  182. search_prevnext(next, Args, Context);
  183. search({keyword_cloud, Props}, _OffsetLimit, Context) ->
  184. Cat = proplists:get_value(cat, Props),
  185. KeywordCatName = proplists:get_value(keywordcat, Props, "keyword"),
  186. KeywordCat = list_to_atom(KeywordCatName),
  187. KeywordPredName = proplists:get_value(keywordpred, Props, "subject"),
  188. Subject = m_predicate:name_to_id_check(KeywordPredName, Context),
  189. #search_sql{
  190. select="kw.id as id, count(*) as count",
  191. from="rsc kw, edge e, rsc r",
  192. where="kw.id = e.object_id AND e.predicate_id = $1 AND e.subject_id = r.id",
  193. tables=[{rsc, "kw"}, {edge, "e"}, {rsc, "r"}],
  194. cats=[{"kw", KeywordCat}, {"r", Cat}],
  195. args=[Subject],
  196. group_by="kw.id, kw.pivot_title",
  197. order="kw.pivot_title"
  198. };
  199. search({archive_year, [{cat,Cat}]}, OffsetLimit, Context) ->
  200. Q = #search_sql{
  201. select="date_part('year', r.publication_start)::int as year, count(*) as count",
  202. from="rsc r",
  203. tables=[{rsc, "r"}],
  204. assoc=true,
  205. cats=[{"r", Cat}],
  206. group_by="date_part('year', r.publication_start)",
  207. order="year desc"
  208. },
  209. R = z_search:search_result(Q, OffsetLimit, Context),
  210. Result = [ [{as_date, {{z_convert:to_integer(Y),1,1},{0,0,0}}}|Rest]
  211. || Rest = [{year, Y}, {count, _}] <- R#search_result.result],
  212. #search_result{result=Result};
  213. search({archive_year_month, [{cat,Cat}]}, OffsetLimit, Context) ->
  214. Q = #search_sql{
  215. select="date_part('year', r.publication_start)::int as year, date_part('month', r.publication_start)::int as month, count(*) as count",
  216. from="rsc r",
  217. tables=[{rsc, "r"}],
  218. assoc=true,
  219. cats=[{"r", Cat}],
  220. group_by="date_part('year', r.publication_start), date_part('month', r.publication_start)",
  221. order="year desc, month desc"
  222. },
  223. R = z_search:search_result(Q, OffsetLimit, Context),
  224. Result = [ [{month_as_date, {{z_convert:to_integer(Y),z_convert:to_integer(M),1},{0,0,0}}}|Rest]
  225. || Rest = [{year, Y}, {month, M}, {count, _}] <- R#search_result.result],
  226. #search_result{result=z_utils:group_proplists(year, Result)};
  227. %% @doc Return the rsc records that have similar objects
  228. search({match_objects, [{id,Id}]}, _OffsetLimit, Context) ->
  229. ObjectIds = m_edge:objects(Id, Context),
  230. MatchTerms = [ ["zpo",integer_to_list(ObjId)] || ObjId <- ObjectIds ],
  231. TsQuery = lists:flatten(z_utils:combine("|", MatchTerms)),
  232. case TsQuery of
  233. [] ->
  234. #search_result{};
  235. _ ->
  236. #search_sql{
  237. select="r.id, ts_rank(pivot_rtsv, query) AS rank",
  238. from="rsc r, to_tsquery($1) query",
  239. where=" query @@ pivot_rtsv and id <> $2",
  240. order="rank desc",
  241. args=[TsQuery, z_convert:to_integer(Id)],
  242. tables=[{rsc,"r"}]
  243. }
  244. end;
  245. search({match_objects, [{cat,Cat},{id,Id}]}, OffsetLimit, Context) ->
  246. case search({match_objects, [{id,Id}]}, OffsetLimit, Context) of
  247. #search_sql{} = Search -> Search#search_sql{cats=[{"r", Cat}]};
  248. Result -> Result
  249. end;
  250. %% @doc Return the rsc records that have similar objects
  251. search({match_objects_cats, [{id,Id}]}, _OffsetLimit, Context) ->
  252. IsCats = m_rsc:is_a_id(Id, Context),
  253. CatTerms = [ ["zpc",integer_to_list(CatId)] || CatId <- IsCats ],
  254. ObjectIds = m_edge:objects(Id, Context),
  255. ObjectTerms = [ ["zpo",integer_to_list(ObjId)] || ObjId <- ObjectIds ],
  256. TsQuery = lists:flatten(z_utils:combine("|", CatTerms++ObjectTerms)),
  257. case TsQuery of
  258. [] ->
  259. #search_result{};
  260. _ ->
  261. #search_sql{
  262. select="r.id, ts_rank(pivot_rtsv, query) AS rank",
  263. from="rsc r, to_tsquery($1) query",
  264. where=" query @@ pivot_rtsv and id <> $2",
  265. order="rank desc",
  266. args=[TsQuery, z_convert:to_integer(Id)],
  267. tables=[{rsc,"r"}]
  268. }
  269. end;
  270. search({match_objects_cats, [{cat,Cat},{id,Id}]}, OffsetLimit, Context) ->
  271. case search({match_objects_cats, [{id,Id}]}, OffsetLimit, Context) of
  272. #search_sql{} = Search -> Search#search_sql{cats=[{"r", Cat}]};
  273. Result -> Result
  274. end;
  275. %% @doc Return a list of resource ids, featured ones first
  276. %% @spec search(SearchSpec, Range, Context) -> #search_sql{}
  277. search({featured, []}, OffsetLimit, Context) ->
  278. search({'query', [{sort, "-rsc.is_featured"}, {sort, "-rsc.publication_start"}]}, OffsetLimit, Context);
  279. %% @doc Return a list of resource ids inside a category, featured ones first
  280. %% @spec search(SearchSpec, Range, Context) -> IdList | {error, Reason}
  281. search({featured, [{cat, Cat}]}, OffsetLimit, Context) ->
  282. search({'query', [{cat, Cat}, {sort, "-rsc.is_featured"}, {sort, "-rsc.publication_start"}]}, OffsetLimit, Context);
  283. %% @doc Return the list of resource ids, on descending id
  284. %% @spec search(SearchSpec, Range, Context) -> IdList | {error, Reason}
  285. search({all, []}, OffsetLimit, Context) ->
  286. search({'query', []}, OffsetLimit, Context);
  287. %% @doc Return the list of resource ids inside a category, on descending id
  288. %% @spec search(SearchSpec, Range, Context) -> IdList | {error, Reason}
  289. search({all, [{cat, Cat}]}, OffsetLimit, Context) ->
  290. search({'query', [{cat, Cat}]}, OffsetLimit, Context);
  291. %% @doc Return a list of featured resource ids inside a category having a object_id as predicate
  292. %% @spec search(SearchSpec, Range, Context) -> IdList | {error, Reason}
  293. search({featured, [{cat,Cat},{object,ObjectId},{predicate,Predicate}]}, OffsetLimit, Context) ->
  294. search({'query', [{cat, Cat}, {hassubject, [ObjectId, Predicate]}]}, OffsetLimit, Context);
  295. search({published, []}, OffsetLimit, Context) ->
  296. search({'query', [{sort, "-rsc.publication_start"}]}, OffsetLimit, Context);
  297. search({published, [{cat, Cat}]}, OffsetLimit, Context) ->
  298. search({'query', [{cat, Cat}, {sort, "-rsc.publication_start"}]}, OffsetLimit, Context);
  299. search({latest, []}, OffsetLimit, Context) ->
  300. search({'query', [{sort, "-rsc.modified"}]}, OffsetLimit, Context);
  301. search({latest, [{cat, Cat}]}, OffsetLimit, Context) ->
  302. search({'query', [{cat, Cat}, {sort, "-rsc.modified"}]}, OffsetLimit, Context);
  303. search({latest, [{creator_id,CreatorId}]}, _OffsetLimit, _Context) ->
  304. #search_sql{
  305. select="r.id",
  306. from="rsc r",
  307. where="r.creator_id = $1",
  308. order="r.modified desc",
  309. args=[z_convert:to_integer(CreatorId)],
  310. tables=[{rsc,"r"}]
  311. };
  312. search({latest, [{cat, Cat}, {creator_id,CreatorId}]}, _OffsetLimit, _Context) ->
  313. #search_sql{
  314. select="r.id",
  315. from="rsc r",
  316. where="r.creator_id = $1",
  317. order="r.modified desc",
  318. args=[z_convert:to_integer(CreatorId)],
  319. cats=[{"r", Cat}],
  320. tables=[{rsc,"r"}]
  321. };
  322. search({upcoming, [{cat, Cat}]}, OffsetLimit, Context) ->
  323. search({'query', [{upcoming, true}, {cat, Cat}, {sort, "rsc.pivot_date_start"}]}, OffsetLimit, Context);
  324. search({finished, [{cat, Cat}]}, OffsetLimit, Context) ->
  325. search({'query', [{finished, true}, {cat, Cat}, {sort, '-rsc.pivot_date_start'}]}, OffsetLimit, Context);
  326. search({autocomplete, [{text,QueryText}]}, OffsetLimit, Context) ->
  327. search({autocomplete, [{cat,[]}, {text,QueryText}]}, OffsetLimit, Context);
  328. search({autocomplete, [{cat,Cat}, {text,QueryText}]}, _OffsetLimit, Context) ->
  329. case z_string:trim(QueryText) of
  330. "id:" ++ S ->
  331. find_by_id(S, true, Context);
  332. _ ->
  333. TsQuery = to_tsquery(QueryText, Context),
  334. case TsQuery of
  335. A when A == undefined orelse A == [] ->
  336. #search_result{};
  337. _ ->
  338. #search_sql{
  339. select="r.id, ts_rank_cd("++rank_weight()++", pivot_tsv, $1, $2) AS rank",
  340. from="rsc r",
  341. where=" $1 @@ r.pivot_tsv",
  342. order="rank desc",
  343. args=[TsQuery, rank_behaviour(Context)],
  344. cats=[{"r", Cat}],
  345. tables=[{rsc,"r"}]
  346. }
  347. end
  348. end;
  349. search({fulltext, [{cat,Cat},{text,QueryText}]}, OffsetLimit, Context) when Cat == undefined orelse Cat == [] orelse Cat == <<>> ->
  350. search({fulltext, [{text,QueryText}]}, OffsetLimit, Context);
  351. search({fulltext, [{text,QueryText}]}, _OffsetLimit, Context) ->
  352. case z_string:trim(QueryText) of
  353. A when A == undefined orelse A == "" orelse A == <<>> ->
  354. #search_sql{
  355. select="r.id, 1 AS rank",
  356. from="rsc r",
  357. order="r.modified desc",
  358. args=[],
  359. tables=[{rsc,"r"}]
  360. };
  361. "id:" ++ S ->
  362. find_by_id(S, true, Context);
  363. _ ->
  364. TsQuery = to_tsquery(QueryText, Context),
  365. #search_sql{
  366. select="r.id, ts_rank_cd("++rank_weight()++", pivot_tsv, $1, $2) AS rank",
  367. from="rsc r",
  368. where=" $1 @@ r.pivot_tsv",
  369. order="rank desc",
  370. args=[TsQuery, rank_behaviour(Context)],
  371. tables=[{rsc,"r"}]
  372. }
  373. end;
  374. search({fulltext, [{cat,Cat},{text,QueryText}]}, _OffsetLimit, Context) ->
  375. case z_string:trim(QueryText) of
  376. A when A == undefined orelse A == "" orelse A == <<>> ->
  377. #search_sql{
  378. select="r.id, 1 AS rank",
  379. from="rsc r",
  380. order="r.modified desc",
  381. cats=[{"r", Cat}],
  382. tables=[{rsc,"r"}]
  383. };
  384. "id:" ++ S ->
  385. find_by_id(S, true, Context);
  386. _ ->
  387. TsQuery = to_tsquery(QueryText, Context),
  388. #search_sql{
  389. select="r.id, ts_rank_cd("++rank_weight()++", pivot_tsv, $1, $2) AS rank",
  390. from="rsc r",
  391. where=" $1 @@ pivot_tsv",
  392. order="rank desc",
  393. args=[TsQuery, rank_behaviour(Context)],
  394. cats=[{"r", Cat}],
  395. tables=[{rsc,"r"}]
  396. }
  397. end;
  398. search({referrers, [{id,Id}]}, _OffsetLimit, _Context) ->
  399. #search_sql{
  400. select="o.id, e.predicate_id",
  401. from="edge e join rsc o on o.id = e.subject_id",
  402. where="e.object_id = $1",
  403. order="e.id desc",
  404. args=[z_convert:to_integer(Id)],
  405. tables=[{rsc,"o"}]
  406. };
  407. search({media_category_image, [{cat,Cat}]}, _OffsetLimit, _Context) ->
  408. #search_sql{
  409. select="m.filename",
  410. from="rsc r, medium m",
  411. where="m.id = r.id",
  412. cats=[{"r", Cat}],
  413. tables=[{rsc,"r"}, {medium, "m"}]
  414. };
  415. search({media_category_depiction, [{cat,Cat}]}, _OffsetLimit, Context) ->
  416. PredDepictionId = m_predicate:name_to_id_check(depiction, Context),
  417. #search_sql{
  418. select="m.filename",
  419. from="rsc r, rsc ro, medium m, edge e",
  420. where="ro.id = e.object_id and e.subject_id = r.id and e.predicate_id = $1 and ro.id = m.id",
  421. tables=[{rsc,"r"}, {rsc, "ro"}, {medium, "m"}],
  422. args=[PredDepictionId],
  423. cats=[{"r", Cat}]
  424. };
  425. search({media, []}, _OffsetLimit, _Context) ->
  426. #search_sql{
  427. select="m.*",
  428. from="media m",
  429. tables=[{medium, "m"}],
  430. order="m.created desc",
  431. args=[],
  432. assoc=true
  433. };
  434. search({all_bytitle, [{cat, Cat}]}, _OffsetLimit, Context) ->
  435. search_all_bytitle:search(Cat, all_bytitle, Context);
  436. search({all_bytitle_featured, [{cat, Cat}]}, _OffsetLimit, Context) ->
  437. search_all_bytitle:search(Cat, all_bytitle_featured, Context);
  438. search({all_bytitle, [{cat_is, Cat}]}, _OffsetLimit, Context) ->
  439. search_all_bytitle:search_cat_is(Cat, all_bytitle, Context);
  440. search({all_bytitle_featured, [{cat_is, Cat}]}, _OffsetLimit, Context) ->
  441. search_all_bytitle:search_cat_is(Cat, all_bytitle_featured, Context);
  442. search({'query', Args}, _OffsetLimit, Context) ->
  443. search_query:search(Args, Context);
  444. search({events, [{cat, Cat}, {'end', End}, {start, Start}]}, _OffsetLimit, _Context) ->
  445. #search_sql{
  446. select="r.id, r.pivot_date_start, r.pivot_date_end",
  447. from="rsc r",
  448. where="r.pivot_date_end >= $1 AND r.pivot_date_start <= $2",
  449. args =[Start, End],
  450. order="r.pivot_date_start asc",
  451. cats=[{"r", Cat}],
  452. tables=[{rsc,"r"}]
  453. };
  454. search({events, [{'end', End}, {start, Start}]}, OffsetLimit, Context) ->
  455. search({events, [{cat, event}, {'end', End}, {start, Start}]}, OffsetLimit, Context);
  456. search(_, _, _) ->
  457. undefined.
  458. %% @doc Expand a search string like "hello wor" to a PostgreSQL tsquery string.
  459. %% If the search string ends in a word character then a wildcard is appended
  460. %% to the last search term.
  461. -spec to_tsquery(binary()|string(), #context{}) -> binary().
  462. to_tsquery(undefined, _Context) ->
  463. <<>>;
  464. to_tsquery(Text, Context) when is_list(Text) ->
  465. to_tsquery(z_convert:to_binary(Text), Context);
  466. to_tsquery(<<>>, _Context) ->
  467. <<>>;
  468. to_tsquery(Text, Context) when is_binary(Text) ->
  469. case to_tsquery_1(Text, Context) of
  470. <<>> ->
  471. % Check if the wildcard prefix was a stopword like the dutch "de"
  472. case is_separator(binary:last(Text)) of
  473. true ->
  474. <<>>;
  475. false ->
  476. Text1 = <<(z_convert:to_binary(Text))/binary, "xcvvcx">>,
  477. TsQuery = to_tsquery_1(Text1, Context),
  478. binary:replace(TsQuery, <<"xcvvcx">>, <<>>)
  479. end;
  480. TsQuery ->
  481. TsQuery
  482. end.
  483. to_tsquery_1(Text, Context) when is_binary(Text) ->
  484. Stemmer = z_pivot_rsc:stemmer_language(Context),
  485. [{TsQuery, Version}] = z_db:q("select plainto_tsquery($2, $1), version()",
  486. [z_pivot_rsc:cleanup_tsv_text(Text), Stemmer],
  487. Context),
  488. % Version is something like "PostgreSQL 8.3.5 on i386-apple-darwin8.11.1, compiled by ..."
  489. fixup_tsquery(z_convert:to_list(Stemmer), append_wildcard(Text, TsQuery, Version)).
  490. is_separator(C) when C < $0 -> true;
  491. is_separator(C) when C >= $0, C =< $9 -> false;
  492. is_separator(C) when C >= $A, C =< $Z -> false;
  493. is_separator(C) when C >= $a, C =< $z -> false;
  494. is_separator(C) when C >= 128 -> false;
  495. is_separator(_) -> true.
  496. append_wildcard(_Text, <<>>, _Version) ->
  497. <<>>;
  498. append_wildcard(_Text, TsQ, Version) when Version < <<"PostgreSQL 8.4">> ->
  499. TsQ;
  500. append_wildcard(Text, TsQ, _Version) ->
  501. case is_wordchar(z_string:last_char(Text)) of
  502. true -> <<TsQ/binary, ":*">>;
  503. false -> TsQ
  504. end.
  505. is_wordchar(C) when C >= 0, C =< 9 -> true;
  506. is_wordchar(C) when C >= $a, C =< $z -> true;
  507. is_wordchar(C) when C >= $A, C =< $Z -> true;
  508. is_wordchar(C) when C > 255 -> true;
  509. is_wordchar(_) -> false.
  510. % There are some problems with the stemming of prefixes.
  511. % For now we fix this up by removing the one case we found.
  512. %
  513. % to_tsquery('dutch', 'overstee') -> 'overstee'
  514. % to_tsquery('dutch', 'oversteek') -> 'overstek'
  515. fixup_tsquery(_Stemmer, <<>>) ->
  516. <<>>;
  517. fixup_tsquery("dutch", TsQ) ->
  518. iolist_to_binary(re:replace(TsQ, <<"([a-z]([aieou]))\\2':\\*">>, <<"\\1':\\*">>));
  519. fixup_tsquery(_Stemmer, TsQ) ->
  520. TsQ.
  521. %% @doc Find one more more resources by id or name, when the resources exists.
  522. %% Input may be a single token or a comma-separated string.
  523. %% Search results contain a list of ids.
  524. -spec find_by_id(string(), #context{}) -> #search_result{}.
  525. find_by_id(S, Context) ->
  526. find_by_id(S, false, Context).
  527. %% @doc As find_by_id/2, but when Rank is true, results contain a list of tuples: {id, 1}.
  528. -spec find_by_id(string(), boolean(), #context{}) -> #search_result{}.
  529. find_by_id(S, Rank, Context) ->
  530. Ids = lists:foldl(fun(Id, Acc) ->
  531. case m_rsc:exists(Id, Context) of
  532. false -> Acc;
  533. true -> [m_rsc:rid(Id, Context)|Acc]
  534. end
  535. end, [], string:tokens(S, ", ")),
  536. Ids1 = lists:sort(sets:to_list(sets:from_list(Ids))),
  537. Ids2 = case Rank of
  538. false -> Ids1;
  539. true ->
  540. lists:map(fun(Id) ->
  541. {Id, 1}
  542. end, Ids1)
  543. end,
  544. case length(Ids2) of
  545. 0 ->
  546. #search_result{};
  547. L ->
  548. #search_result{
  549. result=Ids2,
  550. total=L
  551. }
  552. end.
  553. %% @doc The ranking behaviour for scoring words in a full text search
  554. %% See also: http://www.postgresql.org/docs/9.3/static/textsearch-controls.html
  555. -spec rank_behaviour(#context{}) -> integer().
  556. rank_behaviour(Context) ->
  557. case m_config:get_value(mod_search, rank_behaviour, Context) of
  558. Empty when Empty =:= undefined; Empty =:= <<>> -> 1 bor 4 bor 32;
  559. Rank -> z_convert:to_integer(Rank)
  560. end.
  561. %% @doc The weights for the ranking of the ABCD indexing categories.
  562. %% See also: http://www.postgresql.org/docs/9.3/static/textsearch-controls.html
  563. -spec rank_weight() -> string().
  564. rank_weight() ->
  565. "'{0.05, 0.25, 0.5, 1.0}'".