PageRenderTime 44ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/apps/couch/src/couch_httpd_rewrite.erl

http://github.com/cloudant/bigcouch
Erlang | 463 lines | 287 code | 46 blank | 130 comment | 4 complexity | 3b0d076972cd268f549bbbdb4e38f8ce MD5 | raw file
Possible License(s): Apache-2.0
  1. % Licensed under the Apache License, Version 2.0 (the "License"); you may not
  2. % use this file except in compliance with the License. You may obtain a copy of
  3. % the License at
  4. %
  5. % http://www.apache.org/licenses/LICENSE-2.0
  6. %
  7. % Unless required by applicable law or agreed to in writing, software
  8. % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  9. % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  10. % License for the specific language governing permissions and limitations under
  11. % the License.
  12. %
  13. % bind_path is based on bind method from Webmachine
  14. %% @doc Module for URL rewriting by pattern matching.
  15. -module(couch_httpd_rewrite).
  16. -export([handle_rewrite_req/3]).
  17. -include("couch_db.hrl").
  18. -define(SEPARATOR, $\/).
  19. -define(MATCH_ALL, {bind, <<"*">>}).
  20. %% doc The http rewrite handler. All rewriting is done from
  21. %% /dbname/_design/ddocname/_rewrite by default.
  22. %%
  23. %% each rules should be in rewrites member of the design doc.
  24. %% Ex of a complete rule :
  25. %%
  26. %% {
  27. %% ....
  28. %% "rewrites": [
  29. %% {
  30. %% "from": "",
  31. %% "to": "index.html",
  32. %% "method": "GET",
  33. %% "query": {}
  34. %% }
  35. %% ]
  36. %% }
  37. %%
  38. %% from: is the path rule used to bind current uri to the rule. It
  39. %% use pattern matching for that.
  40. %%
  41. %% to: rule to rewrite an url. It can contain variables depending on binding
  42. %% variables discovered during pattern matching and query args (url args and from
  43. %% the query member.)
  44. %%
  45. %% method: method to bind the request method to the rule. by default "*"
  46. %% query: query args you want to define they can contain dynamic variable
  47. %% by binding the key to the bindings
  48. %%
  49. %%
  50. %% to and from are path with patterns. pattern can be string starting with ":" or
  51. %% "*". ex:
  52. %% /somepath/:var/*
  53. %%
  54. %% This path is converted in erlang list by splitting "/". Each var are
  55. %% converted in atom. "*" is converted to '*' atom. The pattern matching is done
  56. %% by splitting "/" in request url in a list of token. A string pattern will
  57. %% match equal token. The star atom ('*' in single quotes) will match any number
  58. %% of tokens, but may only be present as the last pathtern in a pathspec. If all
  59. %% tokens are matched and all pathterms are used, then the pathspec matches. It works
  60. %% like webmachine. Each identified token will be reused in to rule and in query
  61. %%
  62. %% The pattern matching is done by first matching the request method to a rule. by
  63. %% default all methods match a rule. (method is equal to "*" by default). Then
  64. %% It will try to match the path to one rule. If no rule match, then a 404 error
  65. %% is displayed.
  66. %%
  67. %% Once a rule is found we rewrite the request url using the "to" and
  68. %% "query" members. The identified token are matched to the rule and
  69. %% will replace var. if '*' is found in the rule it will contain the remaining
  70. %% part if it exists.
  71. %%
  72. %% Examples:
  73. %%
  74. %% Dispatch rule URL TO Tokens
  75. %%
  76. %% {"from": "/a/b", /a/b?k=v /some/b?k=v var =:= b
  77. %% "to": "/some/"} k = v
  78. %%
  79. %% {"from": "/a/b", /a/b /some/b?var=b var =:= b
  80. %% "to": "/some/:var"}
  81. %%
  82. %% {"from": "/a", /a /some
  83. %% "to": "/some/*"}
  84. %%
  85. %% {"from": "/a/*", /a/b/c /some/b/c
  86. %% "to": "/some/*"}
  87. %%
  88. %% {"from": "/a", /a /some
  89. %% "to": "/some/*"}
  90. %%
  91. %% {"from": "/a/:foo/*", /a/b/c /some/b/c?foo=b foo =:= b
  92. %% "to": "/some/:foo/*"}
  93. %%
  94. %% {"from": "/a/:foo", /a/b /some/?k=b&foo=b foo =:= b
  95. %% "to": "/some",
  96. %% "query": {
  97. %% "k": ":foo"
  98. %% }}
  99. %%
  100. %% {"from": "/a", /a?foo=b /some/b foo =:= b
  101. %% "to": "/some/:foo",
  102. %% }}
  103. handle_rewrite_req(#httpd{
  104. path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts],
  105. method=Method,
  106. mochi_req=MochiReq}=Req, _Db, DDoc) ->
  107. % we are in a design handler
  108. DesignId = <<"_design/", DesignName/binary>>,
  109. Prefix = <<"/", DbName/binary, "/", DesignId/binary>>,
  110. QueryList = lists:map(fun decode_query_value/1, couch_httpd:qs(Req)),
  111. #doc{body={Props}} = DDoc,
  112. % get rules from ddoc
  113. case couch_util:get_value(<<"rewrites">>, Props) of
  114. undefined ->
  115. couch_httpd:send_error(Req, 404, <<"rewrite_error">>,
  116. <<"Invalid path.">>);
  117. Bin when is_binary(Bin) ->
  118. couch_httpd:send_error(Req, 400, <<"rewrite_error">>,
  119. <<"Rewrite rules are a String. They must be a JSON Array.">>);
  120. Rules ->
  121. % create dispatch list from rules
  122. DispatchList = [make_rule(Rule) || {Rule} <- Rules],
  123. Method1 = couch_util:to_binary(Method),
  124. %% get raw path by matching url to a rule.
  125. RawPath = case try_bind_path(DispatchList, Method1,
  126. PathParts, QueryList) of
  127. no_dispatch_path ->
  128. throw(not_found);
  129. {NewPathParts, Bindings} ->
  130. Parts = [quote_plus(X) || X <- NewPathParts],
  131. % build new path, reencode query args, eventually convert
  132. % them to json
  133. Bindings1 = maybe_encode_bindings(Bindings),
  134. Path = binary_to_list(
  135. iolist_to_binary([
  136. string:join(Parts, [?SEPARATOR]),
  137. [["?", mochiweb_util:urlencode(Bindings1)]
  138. || Bindings1 =/= [] ]
  139. ])),
  140. % if path is relative detect it and rewrite path
  141. case mochiweb_util:safe_relative_path(Path) of
  142. undefined ->
  143. ?b2l(Prefix) ++ "/" ++ Path;
  144. P1 ->
  145. ?b2l(Prefix) ++ "/" ++ P1
  146. end
  147. end,
  148. % normalize final path (fix levels "." and "..")
  149. RawPath1 = ?b2l(iolist_to_binary(normalize_path(RawPath))),
  150. ?LOG_DEBUG("rewrite to ~p ~n", [RawPath1]),
  151. % build a new mochiweb request
  152. MochiReq1 = mochiweb_request:new(MochiReq:get(socket),
  153. MochiReq:get(method),
  154. RawPath1,
  155. MochiReq:get(version),
  156. MochiReq:get(headers)),
  157. % cleanup, It force mochiweb to reparse raw uri.
  158. MochiReq1:cleanup(),
  159. #httpd{
  160. db_url_handlers = DbUrlHandlers,
  161. design_url_handlers = DesignUrlHandlers,
  162. default_fun = DefaultFun,
  163. url_handlers = UrlHandlers
  164. } = Req,
  165. couch_httpd:handle_request_int(MochiReq1, DefaultFun,
  166. UrlHandlers, DbUrlHandlers, DesignUrlHandlers)
  167. end.
  168. quote_plus({bind, X}) ->
  169. mochiweb_util:quote_plus(X);
  170. quote_plus(X) ->
  171. mochiweb_util:quote_plus(X).
  172. %% @doc Try to find a rule matching current url. If none is found
  173. %% 404 error not_found is raised
  174. try_bind_path([], _Method, _PathParts, _QueryList) ->
  175. no_dispatch_path;
  176. try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) ->
  177. [{PathParts1, Method1}, RedirectPath, QueryArgs, Formats] = Dispatch,
  178. case bind_method(Method1, Method) of
  179. true ->
  180. case bind_path(PathParts1, PathParts, []) of
  181. {ok, Remaining, Bindings} ->
  182. Bindings1 = Bindings ++ QueryList,
  183. % we parse query args from the rule and fill
  184. % it eventually with bindings vars
  185. QueryArgs1 = make_query_list(QueryArgs, Bindings1,
  186. Formats, []),
  187. % remove params in QueryLists1 that are already in
  188. % QueryArgs1
  189. Bindings2 = lists:foldl(fun({K, V}, Acc) ->
  190. K1 = to_binding(K),
  191. KV = case couch_util:get_value(K1, QueryArgs1) of
  192. undefined -> [{K1, V}];
  193. _V1 -> []
  194. end,
  195. Acc ++ KV
  196. end, [], Bindings1),
  197. FinalBindings = Bindings2 ++ QueryArgs1,
  198. NewPathParts = make_new_path(RedirectPath, FinalBindings,
  199. Remaining, []),
  200. {NewPathParts, FinalBindings};
  201. fail ->
  202. try_bind_path(Rest, Method, PathParts, QueryList)
  203. end;
  204. false ->
  205. try_bind_path(Rest, Method, PathParts, QueryList)
  206. end.
  207. %% rewriting dynamically the quey list given as query member in
  208. %% rewrites. Each value is replaced by one binding or an argument
  209. %% passed in url.
  210. make_query_list([], _Bindings, _Formats, Acc) ->
  211. Acc;
  212. make_query_list([{Key, {Value}}|Rest], Bindings, Formats, Acc) ->
  213. Value1 = {Value},
  214. make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
  215. make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_binary(Value) ->
  216. Value1 = replace_var(Value, Bindings, Formats),
  217. make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
  218. make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_list(Value) ->
  219. Value1 = replace_var(Value, Bindings, Formats),
  220. make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
  221. make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) ->
  222. make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value}|Acc]).
  223. replace_var(<<"*">>=Value, Bindings, Formats) ->
  224. get_var(Value, Bindings, Value, Formats);
  225. replace_var(<<":", Var/binary>> = Value, Bindings, Formats) ->
  226. get_var(Var, Bindings, Value, Formats);
  227. replace_var(Value, _Bindings, _Formats) when is_binary(Value) ->
  228. Value;
  229. replace_var(Value, Bindings, Formats) when is_list(Value) ->
  230. lists:reverse(lists:foldl(fun
  231. (<<":", Var/binary>>=Value1, Acc) ->
  232. [get_var(Var, Bindings, Value1, Formats)|Acc];
  233. (Value1, Acc) ->
  234. [Value1|Acc]
  235. end, [], Value));
  236. replace_var(Value, _Bindings, _Formats) ->
  237. Value.
  238. maybe_json(Key, Value) ->
  239. case lists:member(Key, [<<"key">>, <<"startkey">>, <<"start_key">>,
  240. <<"endkey">>, <<"end_key">>, <<"keys">>]) of
  241. true ->
  242. ?JSON_ENCODE(Value);
  243. false ->
  244. Value
  245. end.
  246. get_var(VarName, Props, Default, Formats) ->
  247. VarName1 = to_binding(VarName),
  248. Val = couch_util:get_value(VarName1, Props, Default),
  249. maybe_format(VarName, Val, Formats).
  250. maybe_format(VarName, Value, Formats) ->
  251. case couch_util:get_value(VarName, Formats) of
  252. undefined ->
  253. Value;
  254. Format ->
  255. format(Format, Value)
  256. end.
  257. format(<<"int">>, Value) when is_integer(Value) ->
  258. Value;
  259. format(<<"int">>, Value) when is_binary(Value) ->
  260. format(<<"int">>, ?b2l(Value));
  261. format(<<"int">>, Value) when is_list(Value) ->
  262. case (catch list_to_integer(Value)) of
  263. IntVal when is_integer(IntVal) ->
  264. IntVal;
  265. _ ->
  266. Value
  267. end;
  268. format(<<"bool">>, Value) when is_binary(Value) ->
  269. format(<<"bool">>, ?b2l(Value));
  270. format(<<"bool">>, Value) when is_list(Value) ->
  271. case string:to_lower(Value) of
  272. "true" -> true;
  273. "false" -> false;
  274. _ -> Value
  275. end;
  276. format(_Format, Value) ->
  277. Value.
  278. %% doc: build new patch from bindings. bindings are query args
  279. %% (+ dynamic query rewritten if needed) and bindings found in
  280. %% bind_path step.
  281. make_new_path([], _Bindings, _Remaining, Acc) ->
  282. lists:reverse(Acc);
  283. make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) ->
  284. Acc1 = lists:reverse(Acc) ++ Remaining,
  285. Acc1;
  286. make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) ->
  287. Acc1 = lists:reverse(Acc) ++ Remaining,
  288. Acc1;
  289. make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) ->
  290. P2 = case couch_util:get_value({bind, P}, Bindings) of
  291. undefined -> << "undefined">>;
  292. P1 ->
  293. iolist_to_binary(P1)
  294. end,
  295. make_new_path(Rest, Bindings, Remaining, [P2|Acc]);
  296. make_new_path([P|Rest], Bindings, Remaining, Acc) ->
  297. make_new_path(Rest, Bindings, Remaining, [P|Acc]).
  298. %% @doc If method of the query fith the rule method. If the
  299. %% method rule is '*', which is the default, all
  300. %% request method will bind. It allows us to make rules
  301. %% depending on HTTP method.
  302. bind_method(?MATCH_ALL, _Method ) ->
  303. true;
  304. bind_method({bind, Method}, Method) ->
  305. true;
  306. bind_method(_, _) ->
  307. false.
  308. %% @doc bind path. Using the rule from we try to bind variables given
  309. %% to the current url by pattern matching
  310. bind_path([], [], Bindings) ->
  311. {ok, [], Bindings};
  312. bind_path([?MATCH_ALL], [Match|_RestMatch]=Rest, Bindings) ->
  313. {ok, Rest, [{?MATCH_ALL, Match}|Bindings]};
  314. bind_path(_, [], _) ->
  315. fail;
  316. bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) ->
  317. bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]);
  318. bind_path([Token|RestToken], [Token|RestMatch], Bindings) ->
  319. bind_path(RestToken, RestMatch, Bindings);
  320. bind_path(_, _, _) ->
  321. fail.
  322. %% normalize path.
  323. normalize_path(Path) ->
  324. "/" ++ string:join(normalize_path1(string:tokens(Path,
  325. "/"), []), [?SEPARATOR]).
  326. normalize_path1([], Acc) ->
  327. lists:reverse(Acc);
  328. normalize_path1([".."|Rest], Acc) ->
  329. Acc1 = case Acc of
  330. [] -> [".."|Acc];
  331. [T|_] when T =:= ".." -> [".."|Acc];
  332. [_|R] -> R
  333. end,
  334. normalize_path1(Rest, Acc1);
  335. normalize_path1(["."|Rest], Acc) ->
  336. normalize_path1(Rest, Acc);
  337. normalize_path1([Path|Rest], Acc) ->
  338. normalize_path1(Rest, [Path|Acc]).
  339. %% @doc transform json rule in erlang for pattern matching
  340. make_rule(Rule) ->
  341. Method = case couch_util:get_value(<<"method">>, Rule) of
  342. undefined -> ?MATCH_ALL;
  343. M -> to_binding(M)
  344. end,
  345. QueryArgs = case couch_util:get_value(<<"query">>, Rule) of
  346. undefined -> [];
  347. {Args} -> Args
  348. end,
  349. FromParts = case couch_util:get_value(<<"from">>, Rule) of
  350. undefined -> [?MATCH_ALL];
  351. From ->
  352. parse_path(From)
  353. end,
  354. ToParts = case couch_util:get_value(<<"to">>, Rule) of
  355. undefined ->
  356. throw({error, invalid_rewrite_target});
  357. To ->
  358. parse_path(To)
  359. end,
  360. Formats = case couch_util:get_value(<<"formats">>, Rule) of
  361. undefined -> [];
  362. {Fmts} -> Fmts
  363. end,
  364. [{FromParts, Method}, ToParts, QueryArgs, Formats].
  365. parse_path(Path) ->
  366. {ok, SlashRE} = re:compile(<<"\\/">>),
  367. path_to_list(re:split(Path, SlashRE), [], 0).
  368. %% @doc convert a path rule (from or to) to an erlang list
  369. %% * and path variable starting by ":" are converted
  370. %% in erlang atom.
  371. path_to_list([], Acc, _DotDotCount) ->
  372. lists:reverse(Acc);
  373. path_to_list([<<>>|R], Acc, DotDotCount) ->
  374. path_to_list(R, Acc, DotDotCount);
  375. path_to_list([<<"*">>|R], Acc, DotDotCount) ->
  376. path_to_list(R, [?MATCH_ALL|Acc], DotDotCount);
  377. path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 ->
  378. case couch_config:get("httpd", "secure_rewrites", "true") of
  379. "false" ->
  380. path_to_list(R, [<<"..">>|Acc], DotDotCount+1);
  381. _Else ->
  382. ?LOG_INFO("insecure_rewrite_rule ~p blocked", [lists:reverse(Acc) ++ [<<"..">>] ++ R]),
  383. throw({insecure_rewrite_rule, "too many ../.. segments"})
  384. end;
  385. path_to_list([<<"..">>|R], Acc, DotDotCount) ->
  386. path_to_list(R, [<<"..">>|Acc], DotDotCount+1);
  387. path_to_list([P|R], Acc, DotDotCount) ->
  388. P1 = case P of
  389. <<":", Var/binary>> ->
  390. to_binding(Var);
  391. _ -> P
  392. end,
  393. path_to_list(R, [P1|Acc], DotDotCount).
  394. maybe_encode_bindings([]) ->
  395. [];
  396. maybe_encode_bindings(Props) ->
  397. lists:foldl(fun
  398. ({{bind, <<"*">>}, _V}, Acc) ->
  399. Acc;
  400. ({{bind, K}, V}, Acc) ->
  401. V1 = iolist_to_binary(maybe_json(K, V)),
  402. [{K, V1}|Acc]
  403. end, [], Props).
  404. decode_query_value({K,V}) ->
  405. case lists:member(K, ["key", "startkey", "start_key",
  406. "endkey", "end_key", "keys"]) of
  407. true ->
  408. {to_binding(K), ?JSON_DECODE(V)};
  409. false ->
  410. {to_binding(K), ?l2b(V)}
  411. end.
  412. to_binding({bind, V}) ->
  413. {bind, V};
  414. to_binding(V) when is_list(V) ->
  415. to_binding(?l2b(V));
  416. to_binding(V) ->
  417. {bind, V}.