PageRenderTime 31ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/lang_php/analyze/basic/include_require_php.ml

https://github.com/Tener/pfff
OCaml | 458 lines | 322 code | 70 blank | 66 comment | 12 complexity | 14f8b4fdc6073618b55344a076704ed5 MD5 | raw file
  1. (* Yoann Padioleau
  2. *
  3. * Copyright (C) 2010 Facebook
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public License
  7. * version 2.1 as published by the Free Software Foundation, with the
  8. * special exception on linking described in file license.txt.
  9. *
  10. * This library is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
  13. * license.txt for more details.
  14. *)
  15. open Common
  16. open Ast_php
  17. module Ast = Ast_php
  18. module V = Visitor_php
  19. module Env = Env_php
  20. (*****************************************************************************)
  21. (* Prelude *)
  22. (*****************************************************************************)
  23. (*
  24. * Most of the arguments to require/include are static strings or concatenation
  25. * of know variables (e.g. $_SERVER) to static strings. It is useful to
  26. * statically analyze those arguments, e.g. to detect bugs such as missing
  27. * filenames, and so to resolve statically the filenames, hence this file.
  28. * We just provide a better "view" over the Include | Require | ...
  29. * statements present in Ast_php.
  30. *)
  31. (*****************************************************************************)
  32. (* Types *)
  33. (*****************************************************************************)
  34. type increq =
  35. increq_kind * Ast_php.tok * increq_expr
  36. and increq_expr =
  37. (* e.g. require 'master_include.php'; *)
  38. | Direct of Common.filename
  39. (* e.g. require $BASEPATH .'/lib/init/ajax.php'; *)
  40. | ConcatVar of Ast_php.dname * Common.filename
  41. (* e.g. require BASEPATH .'/lib/init/ajax.php'; *)
  42. | ConcatConstant of Ast_php.name * Common.filename
  43. (* e.g. require $_SERVER['PHP_ROOT'].'/lib/init/ajax.php'; *)
  44. | ConcatArrrayVar of Ast_php.dname * string * Common.filename
  45. (* e.g. require dirname(__FILE__).'/master_include.php'; *)
  46. | ConcatDirname of Common.filename
  47. (* e.g. require realpath(dirname(__FILE__)).'/master_include.php';
  48. * todo: diff with just dirname ??
  49. *)
  50. | ConcatRealpathDirname of Common.filename
  51. (* e.g. require $file; *)
  52. | SimpleVar of Ast_php.dname
  53. | Other of Ast_php.expr
  54. and increq_kind =
  55. | Include
  56. | IncludeOnce
  57. | Require
  58. | RequireOnce
  59. (*****************************************************************************)
  60. (* Helpers *)
  61. (*****************************************************************************)
  62. (* todo? should perhaps try to port that to use sgrep, and to access
  63. * sgrep result from OCaml itself.
  64. *)
  65. let rec increq_expr_of_expr e =
  66. match e with
  67. | (Sc(C(String((sfilemame, i_1)))), t_1) ->
  68. Direct sfilemame
  69. (* generated from ./ffi -dump_php_ml ../tests/require_classic.php *)
  70. | (Binary(
  71. (Lv(
  72. (VArrayAccess((Var(darray, scope_ref), tlval_3),
  73. (i_4,
  74. Some((Sc(C(String((sfld, i_5)))), t_6)),
  75. i_7)),
  76. tlval_8)),
  77. t_9), (BinaryConcat, i_10),
  78. (Sc(C(String((sfilename, i_11)))), t_12)),
  79. t_13)
  80. ->
  81. ConcatArrrayVar (darray, sfld, sfilename)
  82. (* generated from ./ffi -dump_php_ml ../tests/require_classic_bis.php *)
  83. | (Binary(
  84. (Lv((Var(dvar, scope_ref), tlval_3)), t_4),
  85. (BinaryConcat, i_5),
  86. (Sc(C(String((sfilename, i_6)))),
  87. t_7)),
  88. t_8)
  89. ->
  90. ConcatVar (dvar, sfilename)
  91. (* generated from ./ffi -dump_php_ml ../tests/require_classic2.php *)
  92. | (Binary(
  93. (Binary(
  94. (Lv(
  95. (VArrayAccess(
  96. (Var(darray, _scope), tlval_3),
  97. (i_4,
  98. Some((Sc(C(String((sfld, i_5)))), t_6)),
  99. i_7)),
  100. tlval_8)),
  101. t_9), (BinaryConcat, i_10),
  102. (Sc(C(String((sfilename1, i_11)))),
  103. t_12)),
  104. t_13), (BinaryConcat, i_14),
  105. (Sc(C(String((sfilename2, i_15)))),
  106. t_16)),
  107. t_17)
  108. ->
  109. ConcatArrrayVar (darray, sfld, sfilename1 ^ sfilename2)
  110. (* ./ffi -dump_php_ml ../tests/require_dirname.php *)
  111. | (Binary(
  112. (Lv(
  113. (FunCallSimple(Name(("dirname", i_2)),
  114. (i_3,
  115. [Left (Arg(
  116. (Sc(C(CName(Name(("__FILE__", i_4))))), t_5)))],
  117. i_6)),
  118. tlval_7)),
  119. t_8), (BinaryConcat, i_9),
  120. (Sc(C(String((sfilename, i_10)))), t_11)),
  121. t_12)
  122. ->
  123. ConcatDirname(sfilename)
  124. (* ./ffi -dump_php_ml ../tests/require_realpath.php *)
  125. | (Binary(
  126. (Lv(
  127. (FunCallSimple(Name(("realpath", i_2)),
  128. (i_3,
  129. [Left (Arg(
  130. (Lv(
  131. (FunCallSimple(Name(("dirname", i_4)),
  132. (i_5,
  133. [Left (Arg(
  134. (Sc(
  135. C(CName(Name(("__FILE__", i_6))))),
  136. t_7)))],
  137. i_8)),
  138. tlval_9)),
  139. t_10)))],
  140. i_11)),
  141. tlval_12)),
  142. t_13), (BinaryConcat, i_14),
  143. (Sc(
  144. C(String((sfilename, i_15)))),
  145. t_16)),
  146. t_17)
  147. ->
  148. ConcatRealpathDirname(sfilename)
  149. | (Binary(
  150. (Lv(
  151. (FunCallSimple(Name(("realpath", i_2)),
  152. (i_3,
  153. [Left (Arg(
  154. (Binary(
  155. (Lv(
  156. (FunCallSimple(Name(("dirname", i_4)),
  157. (i_5,
  158. [Left Arg((
  159. (Sc(
  160. C(CName(Name(("__FILE__", i_6))))),
  161. t_7)))],
  162. i_8)),
  163. tlval_9)),
  164. t_10), (BinaryConcat, i_11),
  165. (Sc(C(String((sfilename1, i_12)))), t_13)),
  166. t_14)))],
  167. i_15)),
  168. tlval_16)),
  169. t_17), (BinaryConcat, i_18),
  170. (Sc(C(String((sfilename2, i_19)))),
  171. t_20)),
  172. t_21)
  173. ->
  174. ConcatRealpathDirname(sfilename1 ^ sfilename2)
  175. (* ./ffi -dump_php_ml ../tests/require_realpath3.php *)
  176. | (Lv(
  177. (FunCallSimple(Name(("realpath", i_2)),
  178. (i_3,
  179. [Left (Arg(
  180. (Binary(
  181. (Lv(
  182. (FunCallSimple(Name(("dirname", i_4)),
  183. (i_5,
  184. [Left (Arg(
  185. (Sc(
  186. C(CName(Name(("__FILE__", i_6))))),
  187. t_7)))],
  188. i_8)),
  189. tlval_9)),
  190. t_10), (BinaryConcat, i_11),
  191. (Sc(
  192. C(
  193. String((sfilename, i_12)))),
  194. t_13)),
  195. t_14)))],
  196. i_15)),
  197. tlval_16)),
  198. t_17)
  199. ->
  200. ConcatRealpathDirname(sfilename)
  201. (* ./ffi -dump_php_ml ../tests/require_constant_concat.php *)
  202. | (Binary((Sc(C(CName(name))), t_3),
  203. (BinaryConcat, i_4),
  204. (Sc(C(String((sfilename, i_5)))), t_6)),
  205. t_7)
  206. ->
  207. ConcatConstant (name, sfilename)
  208. (* ./ffi -dump_php_ml ../tests/require_classic_bis2.php *)
  209. | (Sc(
  210. Guil(i_3,
  211. [EncapsVar((Var(dname, _scope), tlval_5));
  212. EncapsString((sfilename, i_6))], i_7)),
  213. t_8)
  214. ->
  215. ConcatVar (dname, sfilename)
  216. (* some simple isomorphisms *)
  217. | (ParenExpr (eparen), t_1) ->
  218. increq_expr_of_expr (Ast.unparen eparen)
  219. | (Lv((Var(dvar, _scope), tlval_1)), t_1) ->
  220. SimpleVar dvar
  221. | _ -> Other e
  222. (* todo: check that the directives are at the toplevel ? *)
  223. let increq_of_include_stmt e =
  224. match Ast.untype e with
  225. | Ast.Include (t, e) -> Some (Include, t, increq_expr_of_expr e)
  226. | Ast.IncludeOnce (t, e) -> Some (IncludeOnce, t, increq_expr_of_expr e)
  227. | Ast.Require (t, e) -> Some (Require, t, increq_expr_of_expr e)
  228. | Ast.RequireOnce (t, e) -> Some (RequireOnce, t, increq_expr_of_expr e)
  229. | _ -> None
  230. let filename_concat dir file =
  231. if file =~ "^/\\(.*\\)"
  232. then Filename.concat dir (matched1 file)
  233. else Filename.concat dir file
  234. (*****************************************************************************)
  235. (* Main entry points *)
  236. (*****************************************************************************)
  237. let all_increq_of_any =
  238. V.do_visit_with_ref (fun aref -> { V.default_visitor with
  239. V.kexpr = (fun (k, bigf) x ->
  240. match increq_of_include_stmt x with
  241. | Some require -> Common.push2 require aref;
  242. | None ->
  243. (* do we need to recurse ? *)
  244. k x
  245. );
  246. }
  247. )
  248. let top_increq_of_program asts =
  249. let stmts = Lib_parsing_php.top_statements_of_program asts in
  250. stmts |> Common.map_filter (fun st ->
  251. match st with
  252. | ExprStmt (e, tok) ->
  253. increq_of_include_stmt e
  254. | _ -> None
  255. )
  256. (* note that the strings in increq_expr can contain some '../' and
  257. * so need to resolve also that
  258. *)
  259. let resolve_path (env, pwd) incexpr =
  260. match incexpr with
  261. | Direct filename ->
  262. if Filename.is_relative filename
  263. then
  264. Some (Filename.concat pwd filename)
  265. else begin
  266. pr2_once (spf "should not use absolute path in include/require: %s"
  267. filename);
  268. Some filename
  269. end
  270. | ConcatVar (dname, filename) ->
  271. let s = Ast.dname dname in
  272. (try
  273. let path = Hashtbl.find env.Env.globals s in
  274. Some (filename_concat path filename)
  275. with Not_found ->
  276. (* maybe a dynamic var like $BASE_PATH *)
  277. (match env.Env.globals_specials s pwd with
  278. | Some path ->
  279. Some (filename_concat path filename)
  280. | _ ->
  281. None
  282. ))
  283. | ConcatConstant (name, filename) ->
  284. let s = Ast.name name in
  285. (try
  286. let path = Hashtbl.find env.Env.constants s in
  287. Some (filename_concat path filename)
  288. with Not_found -> None
  289. )
  290. | ConcatArrrayVar (dname, fld, filename) ->
  291. let s = Ast.dname dname in
  292. (try
  293. let h = Hashtbl.find env.Env.global_arrays s in
  294. let path = Hashtbl.find h fld in
  295. Some (filename_concat path filename)
  296. with Not_found -> None
  297. )
  298. | ConcatDirname (filename) ->
  299. Some (filename_concat pwd filename)
  300. | ConcatRealpathDirname (filename) ->
  301. Some (filename_concat pwd filename)
  302. | SimpleVar dname ->
  303. None
  304. | Other e ->
  305. None
  306. (* note: copy pasted in flib.ml *)
  307. let includes_of_file env file =
  308. let ast = Parse_php.parse_program file in
  309. let dir = dirname file in
  310. let incs = all_increq_of_any (Program ast) in
  311. incs +> Common.map_filter (fun (_kind, tok, incexpr) ->
  312. let fopt = resolve_path (env, dir) incexpr in
  313. match fopt with
  314. | Some f -> Some f
  315. | None ->
  316. pr2_once (spf "includes_of_file: could not resolve path at\t %s"
  317. (Ast.string_of_info tok));
  318. None
  319. )
  320. type algo = Dfs | Bfs
  321. let default_algo = Dfs
  322. let recursive_included_files_of_file
  323. ?(verbose=false)
  324. ?(depth_limit = None)
  325. ?(includes_of_file=includes_of_file)
  326. env file =
  327. let hdone = Hashtbl.create 101 in
  328. (* only for the dfs for now *)
  329. let reached_limit depth =
  330. match depth_limit with
  331. | None -> false
  332. | Some x -> depth >= x
  333. in
  334. let rec aux_dfs depth file stack =
  335. if Hashtbl.mem hdone file || reached_limit depth
  336. then ()
  337. else begin
  338. Hashtbl.add hdone file true;
  339. (* could be good to have a readable path here.
  340. * info is in env ? :)
  341. *)
  342. if verbose then begin
  343. Common._tab_level_print := depth;
  344. pr2 (spf "->%s" file);
  345. end;
  346. let incs =
  347. try
  348. includes_of_file env file
  349. with exn ->
  350. pr2 (spf "PB processing %s, exn = %s. Trace = "
  351. file (Common.exn_to_s exn));
  352. stack +> List.iter (fun (file, included_files) ->
  353. pr2 (spf " %s:" file);
  354. included_files +> List.iter (fun file -> pr2 (spf " %s" file));
  355. );
  356. raise exn;
  357. in
  358. incs +> List.iter (fun file2 ->
  359. aux_dfs (depth+1) file2 ((file, incs)::stack)
  360. );
  361. end
  362. in
  363. let aux_bfs () =
  364. let current_wave = ref [file] in
  365. while !current_wave <> [] do
  366. let current = !current_wave in
  367. current_wave := [];
  368. current +> List.iter (fun file ->
  369. if Hashtbl.mem hdone file then ()
  370. else begin
  371. Hashtbl.add hdone file true;
  372. let incs = includes_of_file env file in
  373. if verbose then
  374. pr2 (spf "Found %d includes (implicit or explicit) in\n\t%s"
  375. (List.length incs) file);
  376. current_wave := incs ++ !current_wave;
  377. end
  378. );
  379. done;
  380. in
  381. (match default_algo with
  382. | Dfs ->
  383. Common.save_excursion Common._tab_level_print 1 (fun () ->
  384. aux_dfs 0 file [];
  385. )
  386. | Bfs -> aux_bfs ()
  387. );
  388. Common.hashset_to_list hdone