PageRenderTime 41ms CodeModel.GetById 2ms app.highlight 29ms RepoModel.GetById 2ms app.codeStats 0ms

/lang_php/analyze/checker/check_variables_php.ml

https://github.com/llvm-djk/pfff
OCaml | 953 lines | 477 code | 97 blank | 379 comment | 11 complexity | ecbc59fac84b19242acc41c260fced27 MD5 | raw file
  1(* Yoann Padioleau
  2 *
  3 * Copyright (C) 2010, 2012 Facebook
  4 *
  5 * This library is free software; you can redistribute it and/or
  6 * modify it under the terms of the GNU Lesser General Public License
  7 * version 2.1 as published by the Free Software Foundation, with the
  8 * special exception on linking described in file license.txt.
  9 *
 10 * This library is distributed in the hope that it will be useful, but
 11 * WITHOUT ANY WARRANTY; without even the implied warranty of
 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 13 * license.txt for more details.
 14 *)
 15open Common
 16
 17open Ast_php_simple
 18module A = Ast_php_simple
 19module E = Error_php
 20module S = Scope_code
 21module Ent = Database_code
 22
 23(*****************************************************************************)
 24(* Prelude *)
 25(*****************************************************************************)
 26(*
 27 * This module helps find stupid PHP mistakes related to variables. See
 28 * tests/php/scheck/variables.php for examples of bugs currently
 29 * detected by this checker. This module not only checks but also annotates
 30 * the AST with scoping information as a side effect. This is useful
 31 * in codemap to display differently references to parameters, local vars,
 32 * global vars, etc.
 33 *
 34 * This file mostly deals with scoping issues. Scoping is different
 35 * from typing! Those are two orthogonal programming language concepts.
 36 * old: This file is concerned with variables, that is Ast_php.dname
 37 *  entities, so for completness C-s for dname in ast_php.ml and
 38 *  see if all uses of it are covered. Other files are more concerned
 39 *  about checks related to entities, that is Ast_php.name.
 40 *
 41 * The errors detected here are mostly:
 42 *  - UseOfUndefinedVariable
 43 *  - UnusedVariable
 44 * Some similar checks are done by JSlint.
 45 *
 46 * Some issues:
 47 *  - detecting variable-related mistakes is made slightly more complicated
 48 *    by PHP because of the lack of declaration in the language;
 49 *    the first assignment "declares" the variable (on the other side
 50 *    the PHP language forces people to explicitly declared
 51 *    the use of global variables (via the 'global' statement) which
 52 *    makes certain things easier).
 53 *
 54 *  - variables passed by reference can look like UseOfUndefinedVariable
 55 *    bugs but they are not. One way to fix it is to do a global analysis that
 56 *    remembers what are all the functions taking arguments by reference
 57 *    and whitelist them here. But it has a cost. One can optimize a little
 58 *    this by using an entity_finder computed semi lazily a la cmf multi
 59 *    level approach (recursive a la cpp, flib-map, git-grep, git head).
 60 *    Another way is to force programmers to actually declare such variables
 61 *    before those kinds of function calls (this is what Evan advocated).
 62 *
 63 *  - people abuse assignements in function calls to emulate "keyword arguments"
 64 *    as in 'foo($color = "red", $width = 10)'. Such assignments looks
 65 *    like UnusedVariable but they are not. One can fix that by detecting
 66 *    such code patterns.
 67 *
 68 *  - functions like extract(), param_get(), param_post()
 69 *    or variable variables like $$x introduce some false positives.
 70 *    Regarding the param_get/param_post(), one way to fix it is to just
 71 *    not analyse toplevel code. Another solution is to hardcode a few
 72 *    analysis that recognizes the arguments of those functions. Finally
 73 *    for the extract() and $$x one can just bailout of such code or
 74 *    as Evan did remember the first line where such code happen and
 75 *    don't do any analysis pass this point.
 76 *
 77 *  - any analysis will probably flag lots of warnings on an existing PHP
 78 *    codebase. Some programmers may find legitimate certain things,
 79 *    for instance having variables declared in a foreach to escape its
 80 *    foreach scope. This would then hinder the whole analysis because
 81 *    people would just not run the analysis. You need the approval of
 82 *    the PHP developers on such analysis first and get them ok to change
 83 *    their coding styles rules. One way to fix this problem is to have
 84 *    a strict mode where only certain checks are enabled. A good
 85 *    alternative is also to rank errors. A final trick is to report
 86 *    only new errors.
 87 *
 88 * Here are some extra notes by Evan in his own variable linter:
 89 *
 90 * "These things declare variables in a function":
 91 * - DONE Explicit parameters
 92 * - DONE Assignment (pad: variable mentionned for the first time)
 93 * - DONE Assignment via list()
 94 * - DONE Static, Global
 95 * - DONE foreach()
 96 * - DONE catch
 97 * - DONE Builtins ($this)
 98 * - DONE Lexical vars, in php 5.3 lambda expressions
 99 * pad: forgot pass by reference variables
100 *
101 * "These things make lexical scope unknowable":
102 * - DONE Use of extract()
103 * - DONE Assignment or Global with variable variables ($$x)
104 *   (pad: so just bailout on such code)
105 * pad: forgot eval()? but eval can introduce new variables in scope?
106 *
107 * These things don't count as "using" a variable:
108 * - DONE isset() (pad: this should be forbidden, it's a bad way to program)
109 * - DONE empty()
110 * - DONE Static class variables (pad: check done in check_classes instead)
111 *
112 * Here are a few additional checks and features of this checker:
113 *  - when the strict_scope flag is set, check_variables will
114 *    emulate a block-scoped language as in JSLint and flags
115 *    variables used outside their "block".
116 *  - when passed the find_entity hook, check_variables will
117 *    know about functions taking parameters by refs, which removes
118 *    some false positives
119 *
120 * history:
121 *  - sgrimm had the simple idea of just counting the number of occurences
122 *    of a variable in a program, at the token level. If only 1, then
123 *    probably a typo. But sometimes variable names are mentionned in
124 *    interface signatures in which case they occur only once. So you need
125 *    some basic analysis; the token level is not enough. You may not
126 *    need the CFG but at least you need the AST to differentiate the
127 *    different kinds of unused variables.
128 *  - Some of the scoping logic was previously in another file, scoping_php.ml
129 *    but we were kind of duplicating the logic that is in scoping_php.ml.
130 *    PHP has bad scoping rule allowing variables declared through a foreach
131 *    to be used outside the foreach, which is probably wrong.
132 *    Unfortunately, knowing from scoping_php.ml just the status of a
133 *    variable (local, global, or param) is not good enough to find bugs
134 *    related to those weird scoping rules. So I've put all variable scope
135 *    related stuff in this file and removed the duplication in scoping_php.ml.
136 *  - I was using ast_php.ml and a visitor approach but then I rewrote it
137 *    to use ast_php_simple and an "env" approach because the code was
138 *    getting ugly and was containing false positives that were hard to fix.
139 *    As a side effect of the refactoring, some bugs disappeared (nested
140 *    assigns in if, list() not at the toplevel of an expression, undefined
141 *    access to an array), and code regarding lexical variables became
142 *    more clear because localized in one place.
143 *
144 * TODO OTHER:
145 *  - factorize code for the shared_ref thing and create_new_local_if_necessary
146 *  - the old checker was handling correctly globals? was it looking up
147 *    in the top scope? add some unit tests.
148 *  - put back strict block scope
149 *  - factorize in add_var() that adds in env.vars and
150 *    update env.scoped_vars_used too. Removed refs to scope_ref in this file
151 *)
152
153(*****************************************************************************)
154(* Types, constants *)
155(*****************************************************************************)
156type env = {
157  (* The ref in the tuple is to remember the number of uses of the variable,
158   * for the UnusedVariable check.
159   * The ref for the Map.t is to avoid threading the env, because
160   * any stmt/expression can introduce new variables.
161   *
162   * todo? use a list of Map.t to represent nested scopes?
163   * (globals, methods/functions, nested blocks)? when in strict/block mode?
164   *)
165  vars: (string, (Ast_php.tok * Scope_code.scope * int ref)) Map_poly.t ref;
166
167  (* to remember how to annotate Var in ast_php.ml *)
168  scope_vars_used: (Ast_php.tok, Scope_code.scope) Hashtbl.t;
169
170  (* todo: have a globals:? *)
171
172  (* we need to access the definitions of functions/methods to know
173   * if an argument was passed by reference, in which case what looks
174   * like a UseOfUndefinedVariable is actually not (but it would be
175   * better for them to fix the code to introduce/declare this variable
176   * before ...).
177   *)
178  db: Entity_php.entity_finder option;
179  (* when analyze $this->method_call(), we need to know the enclosing class *)
180  in_class: ident option;
181
182  (* for better error message when the variable was inside a long lambda *)
183  in_long_lambda: bool;
184
185  (* when the body of a function contains eval/extract/... we bailout
186   * because we don't want to report false positives
187   *)
188  bailout: bool ref;
189}
190
191(*****************************************************************************)
192(* Helpers *)
193(*****************************************************************************)
194
195let unused_ok s =
196  s =~ "\\$_.*" ||
197  s =~ "\\$ignore.*" ||
198  List.mem s ["$unused";"$dummy";"$guard"] ||
199  (if !E.strict
200   then false
201   else
202    List.mem s [
203      "$res"; "$retval"; "$success"; "$is_error"; "$rs"; "$ret";
204      "$e"; "$ex"; "$exn"; (* exception *)
205    ]
206  )
207
208let lookup_opt s vars =
209  Common2.optionise (fun () -> Map_poly.find s vars)
210
211let s_tok_of_ident name =
212  A.str_of_ident name, A.tok_of_ident name
213
214(* to help debug *)
215let str_of_any any =
216  let v = Meta_ast_php_simple.vof_any any in
217  Ocaml.string_of_v v
218
219(*****************************************************************************)
220(* Vars passed by ref *)
221(*****************************************************************************)
222(*
223 * Detecting variables passed by reference is complicated in PHP because
224 * one does not have to use &$var at the call site (one can though). This is
225 * ugly. So to detect variables passed by reference, we need to look at
226 * the definition of the function/method called, hence the need for a
227 * entity_finder in env.db.
228 *
229 * note that it currently returns an Ast_php.func_def, not
230 * an Ast_php_simple.func_def because the database currently
231 * stores concrete ASTs, not simple ASTs.
232 *)
233let funcdef_of_call_or_new_opt env e =
234  match env.db with
235  | None -> None
236  | Some find_entity ->
237      (match e with
238      | Id [name] ->
239          (* simple function call *)
240            let (s, tok) = s_tok_of_ident name in
241            (match find_entity (Ent.Function, s) with
242            | [Ast_php.FunctionE def] -> Some def
243            (* normally those errors should be triggered in
244             * check_functions_php.ml, but right now this file uses
245             * ast_php.ml and not ast_php_simple.ml, so there are some
246             * differences in the logic so we double check things here.
247             *)
248            | [] ->
249                E.fatal tok (E.UndefinedEntity (Ent.Function, s));
250                None
251            | _x::_y::_xs ->
252                E.fatal tok (E.MultiDefinedEntity (Ent.Function, s, ("","")));
253                None
254            | _ -> raise Impossible
255            )
256      (* dynamic function call *)
257      | Var _ -> None
258
259      (* static method call *)
260      | Class_get (Id name1, Id name2) ->
261          (* todo: name1 can be self/parent in traits, or static: *)
262          let aclass = A.str_of_name name1 in
263          let amethod = A.str_of_name name2 in
264          (try
265              Some (Class_php.lookup_method ~case_insensitive:true
266                       (aclass, amethod) find_entity)
267              (* could not find the method, this is bad, but
268               * it's not our business here; this error will
269               * be reported anyway in check_classes_php.ml anyway
270               *)
271            with
272            | Not_found | Multi_found
273            | Class_php.Use__Call|Class_php.UndefinedClassWhileLookup _ ->
274                None
275          )
276      (* simple object call. If 'this->...()' then we can use lookup_method.
277       * Being complete and handling any method calls like $o->...()
278       * requires to know what is the type of $o which is quite
279       * complicated ... so let's skip that for now.
280       *
281       * todo: special case also id(new ...)-> ?
282       *)
283      | Obj_get (This _, Id [name2]) ->
284          (match env.in_class with
285          | Some name1 ->
286              let aclass = A.str_of_ident name1 in
287              let amethod = A.str_of_ident name2 in
288              (try
289                Some (Class_php.lookup_method ~case_insensitive:true
290                    (aclass, amethod) find_entity)
291               with
292               | Not_found | Multi_found
293               | Class_php.Use__Call|Class_php.UndefinedClassWhileLookup _ ->
294                   None
295              )
296          (* wtf? use of $this outside a class? *)
297          | None -> None
298          )
299
300      (* dynamic call, not much we can do ... *)
301      | _ -> None
302      )
303
304(*****************************************************************************)
305(* Checks *)
306(*****************************************************************************)
307
308let check_defined env name ~incr_count =
309  let (s, tok) = s_tok_of_ident name in
310  match lookup_opt s !(env.vars) with
311  | None ->
312      (* todo? could still issue an error but with the information that
313       * there was an extract/eval/... around?
314       *)
315      if !(env.bailout)
316      then ()
317      else
318        let err =
319          if env.in_long_lambda
320          then E.UseOfUndefinedVariableInLambda s
321          else
322            let allvars =
323              !(env.vars) +> Map_poly.to_list +> List.map fst in
324            let suggest = Suggest_fix_php.suggest s allvars in
325            E.UseOfUndefinedVariable (s, suggest)
326        in
327        E.fatal (A.tok_of_ident name) err
328
329  | Some (_tok, scope, access_count) ->
330      Hashtbl.add env.scope_vars_used tok scope;
331      if incr_count then incr access_count
332
333let check_used env vars =
334  vars +> Map_poly.iter (fun s (tok, scope, aref) ->
335    if !aref = 0
336    then
337      if unused_ok s
338      then ()
339      else
340        (* if you use compact(), some variables may look unused but
341         * they can actually be used. See variables_fp.php.
342         *)
343        if !(env.bailout)
344        then ()
345        else E.fatal tok (E.UnusedVariable (s, scope))
346  )
347
348let create_new_local_if_necessary ~incr_count env name =
349  let (s, tok) = s_tok_of_ident name in
350  match lookup_opt s !(env.vars) with
351  (* new local variable implicit declaration.
352   * todo: add in which nested scope? I would argue to add it
353   * only in the current nested scope. If someone wants to use a
354   * var outside the block, he should have initialized the var
355   * in the outer context. Jslint does the same.
356   *)
357  | None ->
358      env.vars := Map_poly.add s (tok, S.Local, ref 0) !(env.vars);
359      Hashtbl.add env.scope_vars_used tok S.Local;
360
361  | Some (_tok, scope, access_count) ->
362      Hashtbl.add env.scope_vars_used tok scope;
363      if incr_count then incr access_count
364
365(*****************************************************************************)
366(* Main entry point *)
367(*****************************************************************************)
368
369(* For each introduced variable (parameter, foreach variable, exception, etc),
370 * we add the binding in the environment with a counter, a la checkModule.
371 * We then check at use-time if something was declared before. We then
372 * finally check when we exit a scope that all variables were actually used.
373 *)
374let rec program env prog =
375  List.iter (stmt env) prog;
376
377  (* we must check if people used the variables declared at the toplevel
378   * context or via the param_post/param_get calls.
379   * todo: check env.globals instead?
380   *)
381  check_used env !(env.vars)
382
383(* ---------------------------------------------------------------------- *)
384(* Functions/Methods *)
385(* ---------------------------------------------------------------------- *)
386and func_def env def =
387
388  (* should not contain variables anyway, but does not hurt to check *)
389  def.f_params +> List.iter (fun p -> Common.opt (expr env) p.p_default);
390
391  let access_cnt =
392    match def.f_kind with
393    | Function  | AnonLambda | ShortLambda -> 0
394    (* Don't report UnusedParameter for parameters of methods;
395     * people sometimes override a method and don't use all
396     * the parameters, hence the 1 value below.
397     *
398     * less: we we don't want parameters just in method interface
399     *  to not be counted as unused Parameter
400     * less: one day we will have an @override annotation in which
401     *  case we can reconsider the above design decision.
402     *)
403    | Method -> 1
404  in
405  let oldvars = !(env.vars) in
406
407  let enclosing_vars =
408    match def.f_kind with
409    (* for ShortLambda enclosing variables can be accessed  *)
410    | ShortLambda -> Map_poly.to_list oldvars
411    (* fresh new scope *)
412    | _ ->
413      (Env_php.globals_builtins +> List.map (fun s ->
414       "$" ^ s, (Ast_php.fakeInfo s, S.Global, ref 1)
415      )) @
416      (* $this is now implicitly passed in use() for closures *)
417      (try ["$this", Map_poly.find "$this" oldvars]
418       with Not_found -> []
419      )
420  in
421
422  let env = { env with
423    vars = ref ((
424      (def.f_params +> List.map (fun p ->
425        let (s, tok) = s_tok_of_ident p.p_name in
426        s, (tok, S.Param, ref access_cnt)
427      )) @
428      enclosing_vars
429      ) +> Map_poly.of_list);
430
431    (* reinitialize bailout for each function/method *)
432    bailout = ref false;
433  }
434  in
435  def.l_uses +> List.iter (fun (_is_ref, name) ->
436    let (s, tok) = s_tok_of_ident name in
437    check_defined ~incr_count:true { env with vars = ref oldvars} name;
438    (* don't reuse same access count reference; the variable has to be used
439     * again in this new scope.
440     *)
441    env.vars := Map_poly.add s (tok, S.Closed, ref 0) !(env.vars);
442  );
443  (* We put 1 as 'use_count' below because we are not interested
444   * in error message related to $this. It's ok to not use $this
445   * in a method.
446   *)
447  if def.f_kind = Method && not (A.is_static def.m_modifiers)
448  then begin
449     let tok = (Ast_php.fakeInfo "$this") in
450     env.vars := Map_poly.add "$this" (tok, S.Class, ref 1) !(env.vars);
451  end;
452
453  List.iter (stmt env) def.f_body;
454  let newvars = 
455    enclosing_vars +> List.fold_left (fun acc (x, _) -> Map_poly.remove x acc)
456      !(env.vars)
457  in
458  check_used env newvars
459
460(* ---------------------------------------------------------------------- *)
461(* Stmt *)
462(* ---------------------------------------------------------------------- *)
463and stmt env = function
464  | FuncDef def -> func_def env def
465  | ClassDef def -> class_def env def
466  | ConstantDef def -> constant_def env def
467  | TypeDef def -> typedef_def env def
468  | NamespaceDef (qu, _) | NamespaceUse (qu, _) ->
469    raise (Ast_php.TodoNamespace (A.tok_of_name qu))
470
471  | Expr e -> expr env e
472  (* todo: block scope checking when in strict mode? *)
473  | Block xs -> stmtl env xs
474
475  | If (e, st1, st2) ->
476      expr env e;
477      stmtl env [st1;st2]
478
479  | Switch (e, xs) ->
480      expr env e;
481      casel env xs
482
483  | While (e, xs) ->
484      expr env e;
485      stmtl env xs
486  | Do (xs, e) ->
487      stmtl env xs;
488      expr env e
489  | For (es1, es2, es3, xs) ->
490      exprl env (es1 @ es2 @ es3);
491      stmtl env xs
492
493  | Foreach (e1, pattern, xs) ->
494      expr env e1;
495      foreach_pattern env pattern;
496      stmtl env xs
497
498  | Return eopt
499  | Break eopt | Continue eopt ->
500      Common.opt (expr env) eopt
501
502  | Throw e -> expr env e
503  | Try (xs, cs, fs) ->
504      stmtl env xs;
505      catches env (cs);
506      finallys env (fs)
507
508  | StaticVars xs ->
509      xs +> List.iter (fun (name, eopt) ->
510        Common.opt (expr env) eopt;
511        let (s, tok) = s_tok_of_ident name in
512        (* less: check if shadows something? *)
513        env.vars := Map_poly.add s (tok, S.Static, ref 0) !(env.vars);
514      )
515  | Global xs ->
516      xs +> List.iter (fun e ->
517        (* should be an Var most of the time.
518         * todo: should check in .globals that this variable actually exists
519         *)
520        match e with
521        | Var name ->
522            let (s, tok) = s_tok_of_ident name in
523            env.vars := Map_poly.add s (tok, S.Global, ref 0) !(env.vars);
524        (* todo: E.warning tok E.UglyGlobalDynamic *)
525        | _ ->
526            pr2 (str_of_any (Expr2 e));
527            raise Todo
528      )
529
530(* The scope of catch is actually also at the function level in PHP ...
531 *
532 * todo: but for this one it is so ugly that I introduce a new scope
533 * even outside strict mode. It's just too ugly.
534 * todo: check unused
535 * todo? could use local ? could have a UnusedExceptionParameter ?
536 * less: could use ref 1, the exception is often not used
537 *)
538and catch env (_hint_type, name, xs) =
539  let (s, tok) = s_tok_of_ident name in
540  env.vars := Map_poly.add s (tok, S.LocalExn, ref 0) !(env.vars);
541  stmtl env xs
542
543and finally env (xs) =
544  stmtl env xs
545
546and case env = function
547  | Case (e, xs) ->
548      expr env e;
549      stmtl env xs
550  | Default xs ->
551      stmtl env xs
552
553and stmtl env xs = List.iter (stmt env) xs
554and casel env xs = List.iter (case env) xs
555and catches env xs = List.iter (catch env) xs
556and finallys env xs = List.iter (finally env) xs
557
558and foreach_pattern env pattern =
559
560 (* People often use only one of the iterator when
561  * they do foreach like   foreach(... as $k => $v).
562  * We want to make sure that at least one of
563  * the iterator variables is used, hence this trick to
564  * make them share the same access count reference.
565  *)
566  let shared_ref = ref 0 in
567
568  let rec aux e =
569    (* look Ast_php.foreach_pattern to see the kinds of constructs allowed *)
570    match e with
571    | Var name ->
572      let (s, tok) = s_tok_of_ident name in
573      (* todo: if already in scope? shadowing? *)
574      (* todo: if strict then introduce new scope here *)
575      (* todo: scope_ref := S.LocalIterator; *)
576      env.vars := Map_poly.add s (tok, S.LocalIterator, shared_ref) !(env.vars)
577    | Ref x -> aux x
578    | Arrow (e1, e2) -> aux e1; aux e2
579    | List xs -> List.iter aux xs
580    (* other kinds of lvalue are permitted too, but it's a little bit wierd
581     * and very rarely used in www
582     *)
583    | Array_get (e, eopt) ->
584      aux e;
585      eopt +> Common.do_option (expr env)
586    (* todo: E.warning tok E.WeirdForeachNoIteratorVar *)
587    | _ ->
588      failwith ("Warning: unexpected `foreach` value " ^ 
589                   (str_of_any (Expr2 pattern)))
590  in
591  aux pattern
592  
593
594(* ---------------------------------------------------------------------- *)
595(* Expr *)
596(* ---------------------------------------------------------------------- *)
597and expr env = function
598  | Int _ | Double _ | String _ -> ()
599
600  | Var name ->
601      check_defined ~incr_count:true env name
602
603  | Id _name -> ()
604
605  | Assign (None, e1, e2) ->
606      (* e1 should be an lvalue *)
607      (match e1 with
608      | Var name ->
609          (* Does an assignation counts as a use? If you only
610           * assign and never use a variable what is the point?
611           * This should be legal only for parameters passed by reference.
612           * (note that here I talk about parameters, not arguments)
613           *
614           * TODO: hmm if you take a reference to something, then
615           *  assigning something to it should be considered as a use too.
616           *)
617          create_new_local_if_necessary ~incr_count:false env name;
618
619      (* extract all vars, and share the same reference *)
620      | List xs ->
621          (* Use the same trick than for LocalIterator *)
622          let shared_ref = ref 0 in
623
624          let rec aux = function
625            (* should be an lvalue again *)
626            | Var name ->
627                let (s, tok) = s_tok_of_ident name in
628                (match lookup_opt s !(env.vars) with
629                | None ->
630                    env.vars := Map_poly.add s (tok, S.ListBinded, shared_ref)
631                      !(env.vars);
632                | Some (_tok, _scope, _access_cnt) ->
633                    ()
634                )
635            | ((Array_get _ | Obj_get _ | Class_get _) as e) ->
636                expr env e
637            | List xs -> List.iter aux xs
638            | _ ->
639                pr2 (str_of_any (Expr2 (List xs)));
640                raise Todo
641          in
642          List.iter aux xs
643
644      | Array_get (e_arr, e_opt) ->
645          (* make sure the array is declared *)
646          expr env e_arr;
647          Common.opt (expr env) e_opt
648
649      (* checks for use of undefined member should be in check_classes *)
650      | Obj_get (_, _) | Class_get (_, _) ->
651          (* just recurse on the whole thing so the code for Obj_get/Class_get
652           * below will be triggered
653           *)
654          expr env e1
655
656      | Call (Id[(("__builtin__eval_var", _) as name)], _args) ->
657          env.bailout := true;
658          E.warning (tok_of_ident name) E.DynamicCode
659
660      (* can we have another kind of lvalue? *)
661      | e ->
662          pr2 (str_of_any (Expr2 e));
663          failwith "WrongLvalue"
664      );
665      expr env e2
666
667  | Assign (Some _, e1, e2) ->
668      exprl env [e1;e2]
669  | List _xs ->
670      failwith "list(...) should be used only in an Assign context"
671  | Arrow (_e1, _e2) ->
672      failwith "... => ... should be used only in ConsArray or Foreach context"
673
674  (* A mention of a variable in a unset() should not be really
675   * considered as a use of variable. There should be another
676   * statement in the function that actually uses the variable.
677   *)
678  | Call (Id[ ("__builtin__unset", _tok)], args) ->
679      args +> List.iter (function
680        (* should be an lvalue again *)
681        (* less: The use of 'unset' on a variable is still not clear to me. *)
682        | Var name ->
683            check_defined ~incr_count:false env name
684        (* Unsetting a field, seems like a valid use.
685         * Unsetting a prop, not clear why you want that.
686         * Unsetting a class var, not clear why you want that either
687         *)
688        | (Array_get (_, _) | Obj_get _ | Class_get _) as e ->
689            (* make sure that the array used is actually defined *)
690            expr env e
691        | e ->
692            pr2 (str_of_any (Expr2 e));
693            raise Todo
694      )
695  (* special case, could factorize maybe with pass_var_by_ref *)
696  | Call (Id[ ("sscanf", _tok)], x::y::vars) ->
697      (* what if no x and y? wrong number of arguments, not our business here*)
698      expr env x;
699      expr env y;
700      vars +> List.iter (function
701      | Var name ->
702          create_new_local_if_necessary ~incr_count:false env name
703      (* less: wrong, it should be a variable? *)
704      | e -> expr env e
705      )
706
707  (* The right fix is to forbid people to use isset/empty or unset on var.
708   * todo: could have if(isset($x)) { ... code using $x}.
709   *  maybe we should have a bailout_vars and skip further errors on $x.
710   * todo: could have isset(Array_get(...) there too no?
711   *)
712  | Call (Id[ ("__builtin__isset", _tok)], [Var _name]) ->
713      ()
714  (* http://php.net/manual/en/function.empty.php
715   * "empty() does not generate a warning if the variable does not exist."
716   *)
717  | Call (Id[ ("__builtin__empty", _tok)], [Var _name]) ->
718      ()
719
720
721  | Call (Id[ ((("__builtin__eval" | "__builtin__eval_var" |
722               "extract" | "compact"
723       ), _) as name)], _args) ->
724      env.bailout := true;
725      E.warning (tok_of_ident name) E.DynamicCode
726
727      (* facebook specific? should be a hook instead to visit_prog? *)
728  | Call(Id[("param_post"|"param_get"|"param_request"|"param_cookie"as kind,_)],
729        (ConsArray (array_args))::rest_param_xxx_args) ->
730
731      (* have passed a 'prefix' arg, or nothing *)
732      if List.length rest_param_xxx_args <= 1
733      then begin
734        let prefix_opt =
735          match rest_param_xxx_args with
736          | [String(str_prefix, _tok_prefix)] ->
737              Some str_prefix
738          | [] ->
739              (match kind with
740              | "param_post" -> Some "post_"
741              | "param_get" -> Some "get_"
742              | "param_request" -> Some "req_"
743              | "param_cookie" -> Some "cookie_"
744              | _ -> raise Impossible
745              )
746          | _ ->
747              (* less: display an error? weird argument to param_xxx func?*)
748              None
749        in
750        prefix_opt +> Common.do_option (fun prefix ->
751          array_args +> List.iter (function
752          | Arrow(String(param_string, tok_param), _typ_param) ->
753              let s = "$" ^ prefix ^ param_string in
754              let tok = A.tok_of_ident (param_string, tok_param) in
755              env.vars := Map_poly.add s (tok, S.Local, ref 0) !(env.vars);
756              (* less: display an error? weird argument to param_xxx func? *)
757          | _ -> ()
758          )
759        )
760      end
761
762  | Call (e, es) ->
763      expr env e;
764
765      (* getting the def for args passed by ref false positives fix *)
766      let def_opt = funcdef_of_call_or_new_opt env e in
767      let es_with_parameters =
768        match def_opt with
769        | None ->
770            es +> List.map (fun e -> e, None)
771        | Some def ->
772            let params =
773              def.Ast_php.f_params +> Ast_php.unparen +> Ast_php.uncomma_dots
774            in
775            let rec zip args params =
776              match args, params with
777              | [], [] -> []
778              (* more params than arguments, maybe because default parameters *)
779              | [], _y::_ys -> []
780              (* more arguments than params, maybe because func_get_args() *)
781              | x::xs, [] -> (x, None)::zip xs []
782              | x::xs, y::ys -> (x, Some y)::zip xs ys
783            in
784            zip es params
785      in
786
787      es_with_parameters +> List.iter (fun (arg, param_opt) ->
788        match arg, param_opt with
789        (* keyword argument; do not consider this variable as unused.
790         * We consider this variable as a pure comment here and just pass over.
791         * todo: could make sure they are not defined in the current
792         * environment in strict mode? and if they are, shout because of
793         * bad practice?
794         *)
795        | Assign (None, Var _name, e2), _ ->
796            expr env e2
797        (* a variable passed by reference, this can considered a new decl *)
798        | Var name, Some {Ast_php.p_ref = Some _;_} ->
799
800            (* if was already assigned and passed by refs,
801             * increment its use counter then.
802             * less: or should we increase only if inout param?
803             *)
804            create_new_local_if_necessary ~incr_count:true env name
805
806        | _ -> expr env arg
807      )
808
809
810  | This name ->
811      (* when we do use($this) in closures, we create a fresh $this variable
812       * with a refcount of 0, so we need to increment it here.
813       *)
814      check_defined ~incr_count:true env name
815
816  (* array used as an rvalue; the lvalue case should be handled in Assign. *)
817  | Array_get (e, eopt) ->
818      expr env e;
819      Common.opt (expr env) eopt
820
821  | Obj_get (e1, e2) ->
822      expr env e1;
823      (match e2 with
824      (* with 'echo $o->$v' we have a dynamic field, we need to visit
825       * e2 to mark $v as used at least.
826       *)
827      | Id _name  -> ()
828      | _ -> expr env e2
829      )
830
831  | Class_get (e1, e2) ->
832      expr env e1;
833      (match e2 with
834      (* with 'echo A::$v' we should not issue a UseOfUndefinedVariable,
835       * check_classes_php.ml will handle this case.
836       *)
837      | Var _  -> ()
838      | _ -> expr env e2
839      )
840
841  | New (e, es) ->
842      expr env (Call (Class_get(e, Id[ ("__construct", None)]), es))
843
844  | InstanceOf (e1, e2) -> exprl env [e1;e2]
845
846  | Infix (_, e) | Postfix (_, e) | Unop (_, e) -> expr env e
847  | Binop (_, e1, e2) -> exprl env [e1; e2]
848  | Guil xs -> exprl env xs
849
850  | Ref e -> expr env e
851
852  | ConsArray (xs) -> array_valuel env xs
853  | Collection (_n, xs) ->
854    array_valuel env xs
855  | Xhp x -> xml env x
856
857  | CondExpr (e1, e2, e3) -> exprl env [e1; e2; e3]
858  | Cast (_, e) -> expr env e
859
860  | Lambda def ->
861      let in_long_lambda = 
862        match def.f_kind with ShortLambda -> false | _ -> true
863      in
864      func_def { env with in_long_lambda } def
865
866and array_value env x =
867  match x with
868  | Arrow (e1, e2) -> exprl env [e1; e2]
869  | e -> expr env e
870
871and xml env x =
872  x.xml_attrs +> List.iter (fun (_name, xhp_attr) -> expr env xhp_attr);
873  x.xml_body +> List.iter (xhp env)
874
875and xhp env = function
876  | XhpText _s -> ()
877  | XhpExpr e -> expr env e
878  | XhpXml x -> xml env x
879
880and exprl env xs = List.iter (expr env) xs
881and array_valuel env xs = List.iter (array_value env) xs
882
883(* ---------------------------------------------------------------------- *)
884(* Misc *)
885(* ---------------------------------------------------------------------- *)
886
887(* checks for use of undefined members should be in check_classes, but
888 * it's hard to do locally.
889 *
890 * todo: inline the code of traits?
891 *)
892and class_def env def =
893  let env = { env with in_class = Some def.c_name } in
894  List.iter (constant_def env) def.c_constants;
895  List.iter (class_var env) def.c_variables;
896  List.iter (method_def env) def.c_methods
897
898(* cst_body should be a static scalar so there should not be any
899 * variable in it so in theory we don't need to check it ... doesn't
900 * hurt though, one day maybe this will change.
901 *)
902and constant_def env def =
903  expr env def.cst_body
904
905(* type definitions do not contain any variables *)
906and typedef_def _env _def =
907  ()
908
909and class_var env v =
910  Common.opt (expr env) v.cv_value
911
912and method_def env x = func_def env x
913
914(*****************************************************************************)
915(* Main entry point *)
916(*****************************************************************************)
917
918let check_and_annotate_program2 find_entity prog =
919  let env = {
920    (* less: should be a in globals field instead? *)
921    vars = ref (Env_php.globals_builtins +> List.map (fun s ->
922       "$" ^ s, (Ast_php.fakeInfo s, S.Global, ref 1)
923       ) +> Map_poly.of_list);
924    db = find_entity;
925    in_class = None;
926    in_long_lambda = false;
927    bailout = ref false;
928    scope_vars_used = Hashtbl.create 101;
929  }
930  in
931  let ast = Ast_php_simple_build.program_with_position_information prog in
932  program env ast;
933
934  (* annotating the scope of Var *)
935  (Ast_php.Program prog) +>
936    Visitor_php.mk_visitor { Visitor_php.default_visitor with
937    Visitor_php.kexpr = (fun (k, _) x ->
938      match x with
939      | Ast_php.IdVar (dname, aref) ->
940          let tok = Ast_php.info_of_dname dname in
941          (try
942            aref := Hashtbl.find env.scope_vars_used tok
943          (* keep NoScope *)
944          with Not_found -> ()
945          )
946      | _ -> k x
947    );
948  };
949  ()
950
951let check_and_annotate_program a b =
952  Common.profile_code "Checker.variables" (fun () ->
953    check_and_annotate_program2 a b)