PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/lang_php/analyze/foundation/unit_static_analysis_php.ml

https://github.com/facebook/pfff
OCaml | 467 lines | 328 code | 57 blank | 82 comment | 2 complexity | 33cb2e26e2f88600909a8195a62b14a5 MD5 | raw file
  1. open Common
  2. open OUnit
  3. open Env_interpreter_php
  4. module Env = Env_interpreter_php
  5. module Interp = Abstract_interpreter_php.Interp (Tainting_fake_php.Taint)
  6. module Db = Database_juju_php
  7. module CG = Callgraph_php2
  8. (*****************************************************************************)
  9. (* Prelude *)
  10. (*****************************************************************************)
  11. (* See also tests/php/ia/*.php *)
  12. (*****************************************************************************)
  13. (* Run analysis *)
  14. (*****************************************************************************)
  15. let prepare content =
  16. let tmp_file =
  17. Parse_php.tmp_php_file_from_string content in
  18. let db =
  19. Db.code_database_of_juju_db (Db.juju_db_of_files [tmp_file]) in
  20. let env =
  21. Env.empty_env db tmp_file in
  22. let ast =
  23. Ast_php_simple_build.program (Parse_php.parse_program tmp_file) in
  24. env, ast
  25. let heap_of_program_at_checkpoint content =
  26. let (env, ast) = prepare content in
  27. Common.save_excursion Abstract_interpreter_php.extract_paths false (fun()->
  28. Common.save_excursion Abstract_interpreter_php.strict true (fun()->
  29. let _heap = Interp.program env Env.empty_heap ast in
  30. match !Abstract_interpreter_php._checkpoint_heap with
  31. | None -> failwith "use checkpoint() in your unit test"
  32. | Some x -> x
  33. ))
  34. (* less: use Callgraph_php_build.create_graph *)
  35. let callgraph_generation content =
  36. let (env, ast) = prepare content in
  37. Common.save_excursion Abstract_interpreter_php.extract_paths true (fun()->
  38. Common.save_excursion Abstract_interpreter_php.strict true (fun()->
  39. Abstract_interpreter_php.graph := Map_poly.empty;
  40. let _heap = Interp.program env Env.empty_heap ast in
  41. !(Abstract_interpreter_php.graph)
  42. ))
  43. (*****************************************************************************)
  44. (* Examine value *)
  45. (*****************************************************************************)
  46. let rec chain_ptrs heap v =
  47. match v with
  48. | Vptr n ->
  49. Vptr n::(chain_ptrs heap (IMap.find n heap.ptrs))
  50. | Vref aset ->
  51. let n = ISet.choose aset in
  52. Vref aset::(chain_ptrs heap (Vptr n))
  53. | x -> [x]
  54. let value_of_var s vars heap =
  55. let v = SMap.find s vars in
  56. match v with
  57. | Vptr _n ->
  58. chain_ptrs heap v
  59. | _ -> assert_failure "variable is not a Vptr"
  60. let info heap v =
  61. Env.string_of_value heap (List.hd v)
  62. (*****************************************************************************)
  63. (* Assert helpers *)
  64. (*****************************************************************************)
  65. let assert_value_at_checkpoint var file fpattern =
  66. let (heap, vars) = heap_of_program_at_checkpoint file in
  67. let v = value_of_var var vars heap in
  68. if fpattern v
  69. then ()
  70. else assert_failure (spf "wrong value for %s: %s " var (info heap v))
  71. let assert_final_value_at_checkpoint var file v =
  72. assert_value_at_checkpoint var file (function
  73. | [Vptr _n1; Vptr _n2; x] -> x =*= v
  74. | _ -> false
  75. )
  76. (* todo: a pathdown, pathup specialization? *)
  77. let assert_graph file xs =
  78. let g = callgraph_generation file in
  79. let _nb_nodes = List.length xs in
  80. xs +> List.iter (fun (s, expected) ->
  81. try
  82. let n = CG.node_of_string s in
  83. let actual_child =
  84. Map_poly.find n g
  85. +> Set_poly.elements
  86. +> List.map CG.string_of_node
  87. in
  88. assert_equal
  89. ~msg:"it should have the expected callees"
  90. (sort expected)
  91. (sort actual_child)
  92. with Not_found ->
  93. assert_failure (spf "could not find callees for %s" s)
  94. );
  95. (* todo? assert all the nodes are there *)
  96. ()
  97. (* sugar to make a graph by adjacent list *)
  98. let (-->) a b = (a, b)
  99. (*****************************************************************************)
  100. (* Abstract interpreter *)
  101. (*****************************************************************************)
  102. let abstract_interpreter_unittest =
  103. "abstract interpreter" >::: [
  104. (*-------------------------------------------------------------------------*)
  105. (* Basic types and dataflow *)
  106. (*-------------------------------------------------------------------------*)
  107. "basic" >:: (fun () ->
  108. let file ="
  109. $x = 42;
  110. checkpoint(); // x:42
  111. " in
  112. (* note: I don't use assert_final_value_at_checkpoint for teaching
  113. * purpose here *)
  114. let (heap, vars) = heap_of_program_at_checkpoint file in
  115. match value_of_var "$x" vars heap with
  116. (* variables in PHP are pointers to a pointer to a value ... *)
  117. | [Vptr _n1; Vptr _n2; Vint 42] -> ()
  118. | v -> assert_failure ("wrong value for $x: " ^ info heap v)
  119. );
  120. "unsugaring" >:: (fun () ->
  121. let file ="
  122. $x = <<<END
  123. hello
  124. END;
  125. checkpoint(); // x:'hello'
  126. " in
  127. assert_value_at_checkpoint "$x" file (function
  128. (* todo? it should maybe be "hello" without the newline *)
  129. | [Vptr _n1; Vptr _n2; Vstring "hello\n"] -> true | _ -> false)
  130. );
  131. "aliasing" >:: (fun () ->
  132. let file ="
  133. $x = 42;
  134. $y =& $x;
  135. checkpoint();
  136. " in
  137. let (heap, vars) = heap_of_program_at_checkpoint file in
  138. let x = value_of_var "$x" vars heap in
  139. let y = value_of_var "$y" vars heap in
  140. match x, y with
  141. | [Vptr ix1; Vref _set; Vptr ix2; Vint 42],
  142. [Vptr iy1; Vref _set2; Vptr iy2; Vint 42]
  143. ->
  144. assert_equal
  145. ~msg:"it should share the second pointer"
  146. ix2 iy2;
  147. assert_bool
  148. "variables should have different original pointers"
  149. (ix1 <> iy1)
  150. | _ -> assert_failure (spf "wrong value for $x: %s, $y = %s "
  151. (info heap x) (info heap y))
  152. );
  153. "abstraction when if" >:: (fun () ->
  154. let file ="
  155. $x = 1;
  156. $y = true; // path sensitivity would detect it's always $x = 2 ...
  157. if($y) { $x = 2;} else { $x = 3; }
  158. checkpoint(); // x: int
  159. " in
  160. (* there is no range, we go from a very precise value to a
  161. * very general abstraction (the type) very quickly.
  162. * If forget the initial $x = 1; then $x will be instead
  163. * a 'choice(null,int)'.
  164. *)
  165. assert_final_value_at_checkpoint "$x" file (Vabstr Tint);
  166. );
  167. "union types" >:: (fun () ->
  168. let file ="
  169. $x = null;
  170. $y = true;
  171. if($y) { $x = 2;} else { $x = 3; }
  172. checkpoint(); // x: null | int
  173. " in
  174. assert_final_value_at_checkpoint "$x" file (Vsum [Vnull; Vabstr Tint]);
  175. );
  176. "simple dataflow" >:: (fun () ->
  177. let file ="
  178. $x = 2;
  179. $x = 3;
  180. $y = $x;
  181. checkpoint(); // y:int
  182. " in
  183. assert_final_value_at_checkpoint "$y" file (Vabstr Tint);
  184. );
  185. "constants" >:: (fun () ->
  186. let file ="
  187. const CST = 2;
  188. $x = CST;
  189. checkpoint(); // x:int
  190. " in
  191. assert_final_value_at_checkpoint "$x" file (Vint 2);
  192. );
  193. (*-------------------------------------------------------------------------*)
  194. (* Error handling *)
  195. (*-------------------------------------------------------------------------*)
  196. "use of undefined" >:: (fun () ->
  197. let file ="
  198. const CST = 2;
  199. $x = ANOTHER_CST;
  200. checkpoint(); // x:int
  201. " in
  202. try
  203. let _ = heap_of_program_at_checkpoint file in
  204. assert_failure
  205. "it should raise exns in strict mode on undefined entities"
  206. with Abstract_interpreter_php.UnknownConstant "ANOTHER_CST" -> ()
  207. );
  208. (*-------------------------------------------------------------------------*)
  209. (* Fixpoint *)
  210. (*-------------------------------------------------------------------------*)
  211. (* TODO while loop, dowhile, recursion, iterate 2 times is enough?
  212. * Because of the abstraction we've chosen (no int range for instance),
  213. * we achieve the fixpoint in one step so probably has
  214. * no fixpoint issues.
  215. *)
  216. (*-------------------------------------------------------------------------*)
  217. (* Interprocedural dataflow *)
  218. (*-------------------------------------------------------------------------*)
  219. "interprocedural dataflow" >:: (fun () ->
  220. let file ="
  221. $x = 2;
  222. function foo($a) { return $a + 1; }
  223. $y = foo($x);
  224. checkpoint(); // y: int
  225. " in
  226. assert_final_value_at_checkpoint "$y" file (Vabstr Tint);
  227. );
  228. "interprocedural dataflow with static methods" >:: (fun () ->
  229. let file ="
  230. class A {
  231. static function foo() { return self::bar(); }
  232. static function bar() { return 1+1; }
  233. }
  234. class B extends A {
  235. static function bar() { return false || false; }
  236. }
  237. $x = B::foo();
  238. $y = B::bar();
  239. checkpoint(); // x: int, y: bool
  240. " in
  241. assert_final_value_at_checkpoint "$x" file (Vabstr Tint);
  242. assert_final_value_at_checkpoint "$y" file (Vabstr Tbool);
  243. );
  244. "interprocedural dataflow with normal methods" >:: (fun () ->
  245. let file ="
  246. $x = 2;
  247. class A { function foo($a) { return $a + 1; } }
  248. class B extends A { }
  249. $o = new B();
  250. $y = $o->foo($x);
  251. checkpoint(); // y: int
  252. " in
  253. assert_final_value_at_checkpoint "$y" file (Vabstr Tint);
  254. );
  255. (*****************************************************************************)
  256. (* Callgraph *)
  257. (*****************************************************************************)
  258. (* less: move in unit_callgraph_php.ml *)
  259. (*-------------------------------------------------------------------------*)
  260. (* Callgraph and functions *)
  261. (*-------------------------------------------------------------------------*)
  262. "basic callgraph for direct functions" >:: (fun () ->
  263. let file = "
  264. function foo() { }
  265. function bar() { foo(); }
  266. " in
  267. (* note: I don't use assert_graph for teaching purpose here *)
  268. let g = callgraph_generation file in
  269. let xs = Map_poly.find (CG.Function "bar") g +> Set_poly.elements in
  270. assert_equal
  271. ~msg:"it should handle simple direct calls:"
  272. [CG.Function "foo"]
  273. xs;
  274. let file = "
  275. function bar() { foo(); }
  276. " in
  277. try
  278. let _ = callgraph_generation file in
  279. assert_failure "it should throw an exception for unknown function"
  280. with (Abstract_interpreter_php.UnknownFunction "foo") -> ()
  281. );
  282. (* todo: call_user_func, id wrapper preserve graph, ?? *)
  283. (*-------------------------------------------------------------------------*)
  284. (* Callgraph and static methods *)
  285. (*-------------------------------------------------------------------------*)
  286. "simple static method call" >:: (fun () ->
  287. let file = "
  288. class A { static function a() { } }
  289. function b() { A::a(); }
  290. " in
  291. assert_graph file ["b" --> ["A::a"]];
  292. let file = "
  293. class A { static function a() { } }
  294. function b() { A::unknown(); }
  295. " in
  296. try
  297. let _ = callgraph_generation file in
  298. assert_failure "it should throw an exception for unknown static method"
  299. with (Abstract_interpreter_php.UnknownMember ("unknown", "A", _)) -> ()
  300. );
  301. (* In PHP it is ok to call B::foo() even if B does not define
  302. * a static method 'foo' provided that B inherits from a class
  303. * that defines such a foo.
  304. *)
  305. "lookup even for static method call" >:: (fun () ->
  306. let file ="
  307. class A { static function a() { } }
  308. class B extends A { }
  309. function b() { B::a(); }
  310. " in
  311. assert_graph file ["b" --> ["A::a"]]
  312. );
  313. "static lookup self in parent" >:: (fun () ->
  314. let file ="
  315. class A {
  316. static function foo() { self::bar(); }
  317. static function bar() { }
  318. }
  319. class B extends A {
  320. static function bar() { }
  321. }
  322. function b() { B::foo(); }
  323. function c() { B::bar(); }
  324. " in
  325. assert_graph file
  326. ["b" --> ["A::foo"];"c" --> ["B::bar"];"A::foo" --> ["A::bar"]]
  327. );
  328. "static method call with self:: and parent::" >:: (fun () ->
  329. let file = "
  330. class A {
  331. static function a() { }
  332. static function a2() { self::a(); }
  333. }
  334. class B extends A {
  335. function b() { parent::a(); }
  336. }" in
  337. assert_graph file ["A::a2" --> ["A::a"]; "B::b" --> ["A::a"]]
  338. );
  339. (* PHP is very permissive regarding static method calls as one can
  340. * do $this->foo() even if foo is a static method. PHP does not
  341. * impose the X::foo() syntax, which IMHO is just wrong.
  342. *)
  343. "static method call and $this" >:: (fun () ->
  344. let file = "
  345. class A {
  346. static function a() { }
  347. function a2() { $this->a(); }
  348. } " in
  349. assert_graph file ["A::a2" --> ["A::a"]];
  350. );
  351. (*-------------------------------------------------------------------------*)
  352. (* Callgraph and normal methods *)
  353. (*-------------------------------------------------------------------------*)
  354. "lookup normal method" >:: (fun () ->
  355. let file ="
  356. class A { function foo() { } }
  357. class B extends A { }
  358. function b() {
  359. $o = new B();
  360. $y = $o->foo();
  361. }
  362. " in
  363. assert_graph file ["b" --> ["A::foo"]];
  364. let file = "
  365. class A { function foo() { } }
  366. class B extends A { }
  367. function b() {
  368. $o = new B();
  369. $y = $o->unknown();
  370. }
  371. " in
  372. try
  373. let _ = callgraph_generation file in
  374. assert_failure "it should throw an exception for unknown method"
  375. with (Abstract_interpreter_php.UnknownMember ("unknown", _, _)) -> ()
  376. );
  377. (* I used to have a very simple method analysis that did some gross over
  378. * approximation. With a call like $x->foo(), the analysis considered
  379. * any method foo in any class as a viable candidate. We can now
  380. * do better thx to the abstract interpreter.
  381. *)
  382. "method call no over approximation" >:: (fun () ->
  383. let file = "
  384. class A { function foo() { } }
  385. class B { function foo() { } }
  386. function c() { $a = new A(); $a->foo(); }
  387. " in
  388. (* no B::foo, no over approximation! *)
  389. assert_graph file ["c" --> ["A::foo"]];
  390. );
  391. "XHP method call" >:: (fun () ->
  392. let file = "
  393. class :x:frag { public function foo() { } }
  394. function bar() { $x = <x:frag></x:frag>; $x->foo(); }
  395. " in
  396. assert_graph file ["bar" --> [":x:frag::foo"]];
  397. );
  398. (* todo: example of current limitations of the analysis *)
  399. ]
  400. (*****************************************************************************)
  401. (* Tainting analysis *)
  402. (*****************************************************************************)
  403. (*****************************************************************************)
  404. (* Final suite *)
  405. (*****************************************************************************)
  406. let unittest =
  407. "static_analysis_php" >::: [
  408. abstract_interpreter_unittest;
  409. ]