PageRenderTime 47ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/code_map/parsing2.ml

https://github.com/facebook/pfff
OCaml | 412 lines | 285 code | 40 blank | 87 comment | 11 complexity | 0ca64feaf6d94ecde38dfbcd5de72a41 MD5 | raw file
  1. (*s: parsing2.ml *)
  2. (*s: Facebook copyright *)
  3. (* Yoann Padioleau
  4. *
  5. * Copyright (C) 2010-2012 Facebook
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public License
  9. * version 2.1 as published by the Free Software Foundation, with the
  10. * special exception on linking described in file license.txt.
  11. *
  12. * This library is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
  15. * license.txt for more details.
  16. *)
  17. (*e: Facebook copyright *)
  18. open Common
  19. open Highlight_code
  20. module FT = File_type
  21. module PI = Parse_info
  22. module HC = Highlight_code
  23. module Db = Database_code
  24. module Flag = Flag_visual
  25. (*****************************************************************************)
  26. (* Prelude *)
  27. (*****************************************************************************)
  28. (*
  29. * The main entry point of this module is tokens_with_categ_of_file
  30. * which is called in Draw_microlevel to "render" the content of a file.
  31. *)
  32. (*****************************************************************************)
  33. (* Parsing helpers *)
  34. (*****************************************************************************)
  35. (* This type is needed if we want to use a single hashtbl to memoize
  36. * all the parsed file.
  37. *)
  38. type ast =
  39. (* functional *)
  40. | ML of Parse_ml.program_and_tokens
  41. | Hs of Parse_hs.program_and_tokens
  42. | Lisp of Parse_lisp.program_and_tokens
  43. | Erlang of Parse_erlang.program_and_tokens
  44. (* web *)
  45. | Html of Parse_html.program_and_tokens
  46. | Js of Parse_js.program_and_tokens
  47. | Php of Parse_php.program_with_comments
  48. | Opa of Parse_opa.program_and_tokens
  49. (* system *)
  50. | Cpp of Parse_cpp.toplevels_and_tokens
  51. | Rust of Parse_rust.program_and_tokens
  52. (* mainstream *)
  53. | Java of Parse_java.program_and_tokens
  54. | Csharp of Parse_csharp.program_and_tokens
  55. (* scripting *)
  56. | Python of Parse_python.program_and_tokens
  57. (* documentation *)
  58. | Noweb of Parse_nw.program_and_tokens
  59. (* less? | Org of Org_mode.org ? *)
  60. let _hmemo_file = Hashtbl.create 101
  61. (* with directories with many files, this is useful *)
  62. let parse_cache parse_in extract file =
  63. Common.profile_code "View.parse_cache" (fun () ->
  64. let mtime = Common2.filemtime file in
  65. let recompute =
  66. if Hashtbl.mem _hmemo_file file
  67. then
  68. let (oldmtime, _ast) = Hashtbl.find _hmemo_file file in
  69. mtime > oldmtime
  70. else true
  71. in
  72. let ast =
  73. if recompute
  74. then begin
  75. let ast = parse_in file in
  76. Hashtbl.replace _hmemo_file file (mtime, ast);
  77. ast
  78. end
  79. else Hashtbl.find _hmemo_file file +> snd
  80. in
  81. extract ast
  82. )
  83. (*****************************************************************************)
  84. (* Semantic ehancement *)
  85. (*****************************************************************************)
  86. let use_arity_of_use_count n =
  87. match () with
  88. (* note that because my PHP object analysis have some threshold
  89. * on the number of callers (see threshold_callers_indirect_db)
  90. * the number for HugeUse can not be more than this one otherwise
  91. * you will miss some cases
  92. *)
  93. | _ when n >= 100 -> HugeUse
  94. | _ when n > 20 -> LotsOfUse
  95. | _ when n >= 10 -> MultiUse
  96. | _ when n >= 2 -> SomeUse
  97. | _ when n = 1 -> UniqueUse
  98. | _ -> NoUse
  99. let rewrite_categ_using_entities s categ file entities =
  100. match Db.entity_kind_of_highlight_category_def categ with
  101. | None -> categ
  102. | Some e_kind ->
  103. let entities =
  104. Hashtbl.find_all entities s +> List.filter (fun e ->
  105. (* we could have the full www dbcode but run the treemap on
  106. * a subdir in which case the root will not be the same.
  107. * It's a good approximation to just look at the basename.
  108. * The only false positive we will get if another file,
  109. * with the same name happened to also define entities
  110. * with the same name, which would be rare.
  111. *
  112. * update: TODO use Model2.readable_to_absolute_filename_under_root ?
  113. *)
  114. Filename.basename e.Db.e_file =$= Filename.basename file &&
  115. (* some file have both a function and class with the same name *)
  116. Database_code.matching_def_short_kind_kind e_kind e.Db.e_kind
  117. )
  118. in
  119. match entities with
  120. | [] -> categ
  121. | [e] ->
  122. let use_cnt = e.Db.e_number_external_users in
  123. let arity = use_arity_of_use_count use_cnt in
  124. if Database_code.is_entity_def_category categ
  125. then HC.rewrap_arity_def2_category arity categ
  126. else categ
  127. | _x::_y::_xs ->
  128. (* TODO: handle __construct directly *)
  129. if not (List.mem s ["__construct"])
  130. then pr2_once (spf "multi def found for %s in %s" s file);
  131. categ
  132. (*****************************************************************************)
  133. (* Helpers *)
  134. (*****************************************************************************)
  135. type ('ast, 'token) for_helper = {
  136. parse: (Common.filename -> ('ast * 'token list) list);
  137. highlight_visit:(tag_hook:(Parse_info.info -> HC.category -> unit) ->
  138. Highlight_code.highlighter_preferences ->
  139. 'ast * 'token list -> unit);
  140. info_of_tok:('token -> Parse_info.info);
  141. }
  142. let tokens_with_categ_of_file_helper
  143. {parse; highlight_visit; info_of_tok} file prefs hentities =
  144. if !Flag.verbose_visual then pr2 (spf "Parsing: %s" file);
  145. let ast2 = parse file in
  146. if !Flag.verbose_visual then pr2 (spf "Highlighting: %s" file);
  147. (* todo: ast2 should not be a list, should just be (ast, toks)
  148. * but right now only a few parsers will satisfy this interface
  149. *)
  150. ast2 +> List.map (fun (ast, toks) ->
  151. let h = Hashtbl.create 101 in
  152. (* computing the token attributes *)
  153. highlight_visit ~tag_hook:(fun info categ -> Hashtbl.add h info categ)
  154. prefs (ast, toks);
  155. (* getting the text *)
  156. toks +> Common.map_filter (fun tok ->
  157. let info = info_of_tok tok in
  158. let s = PI.str_of_info info in
  159. if not (PI.is_origintok info)
  160. then None
  161. else
  162. let categ = Common2.hfind_option info h +> Common2.fmap (fun categ ->
  163. rewrite_categ_using_entities s categ file hentities
  164. ) in
  165. Some (s, categ,{ Common2.l = PI.line_of_info info; c = PI.col_of_info info; })
  166. )) +> List.flatten
  167. (*****************************************************************************)
  168. (* Main entry point *)
  169. (*****************************************************************************)
  170. (* coupling: right now if you add a language here, you need to whitelist it
  171. * also in draw_microlevel.draw_contents2.
  172. *)
  173. let tokens_with_categ_of_file file hentities =
  174. let ftype = FT.file_type_of_file file in
  175. let prefs = Highlight_code.default_highlighter_preferences in
  176. match ftype with
  177. | FT.PL (FT.Web (FT.Php _)) ->
  178. tokens_with_categ_of_file_helper
  179. { parse = (parse_cache (fun file ->
  180. Common.save_excursion Flag_parsing_php.error_recovery true (fun () ->
  181. let ((ast, toks), _stat) = Parse_php.parse file in
  182. (* todo: use database_light if given? we could so that
  183. * variables are better annotated.
  184. * note that database_light will be passed in
  185. * rewrite_categ_using_entities() at least.
  186. *)
  187. let find_entity = None in
  188. (* work by side effect on ast2 too *)
  189. (try
  190. Check_variables_php.check_and_annotate_program
  191. find_entity
  192. ast
  193. with Ast_php.TodoNamespace _ | Common.Impossible -> ()
  194. );
  195. Php ((ast, toks))
  196. ))
  197. (function Php (ast, toks) -> [ast, toks] | _ -> raise Impossible));
  198. highlight_visit = (fun ~tag_hook prefs (ast, toks) ->
  199. Highlight_php.visit_program ~tag:tag_hook prefs hentities
  200. (ast, toks)
  201. );
  202. info_of_tok = Token_helpers_php.info_of_tok;
  203. }
  204. file prefs hentities
  205. | FT.PL (FT.ML _) ->
  206. tokens_with_categ_of_file_helper
  207. { parse = (parse_cache (fun file ->
  208. Common.save_excursion Flag_parsing_ml.error_recovery true (fun()->
  209. ML (Parse_ml.parse file +> fst))
  210. )
  211. (function
  212. | ML (astopt, toks) ->
  213. let ast = astopt ||| [] in
  214. [ast, toks]
  215. | _ -> raise Impossible));
  216. highlight_visit = (fun ~tag_hook prefs (ast, toks) ->
  217. Highlight_ml.visit_program ~tag_hook prefs (ast, toks));
  218. info_of_tok = Token_helpers_ml.info_of_tok;
  219. }
  220. file prefs hentities
  221. | FT.PL (FT.Haskell _) ->
  222. tokens_with_categ_of_file_helper
  223. { parse = (parse_cache
  224. (fun file -> Hs (Parse_hs.parse file +> fst))
  225. (function Hs (ast, toks) -> [ast, toks] | _ -> raise Impossible));
  226. highlight_visit = (fun ~tag_hook prefs (ast, toks) ->
  227. Highlight_hs.visit_program ~tag_hook prefs (ast, toks));
  228. info_of_tok = Parser_hs.info_of_tok;
  229. }
  230. file prefs hentities
  231. | FT.PL (FT.Python) ->
  232. tokens_with_categ_of_file_helper
  233. { parse = (parse_cache
  234. (fun file -> Python (Parse_python.parse file +> fst))
  235. (function Python x -> [x] | _ -> raise Impossible));
  236. highlight_visit = (fun ~tag_hook prefs (ast, toks) ->
  237. Highlight_python.visit_toplevel ~tag_hook prefs (ast, toks));
  238. info_of_tok = Token_helpers_python.info_of_tok;
  239. }
  240. file prefs hentities
  241. | FT.PL (FT.Csharp) ->
  242. tokens_with_categ_of_file_helper
  243. { parse = (parse_cache
  244. (fun file -> Csharp (Parse_csharp.parse file +> fst))
  245. (function Csharp (ast, toks) -> [ast, toks] | _ -> raise Impossible));
  246. highlight_visit = (fun ~tag_hook prefs (ast, toks) ->
  247. Highlight_csharp.visit_program ~tag_hook prefs (ast, toks));
  248. info_of_tok = Token_helpers_csharp.info_of_tok;
  249. }
  250. file prefs hentities
  251. | FT.PL (FT.Rust) ->
  252. tokens_with_categ_of_file_helper
  253. { parse = (parse_cache
  254. (fun file -> Rust (Parse_rust.parse file +> fst))
  255. (function Rust (ast, toks) -> [ast, toks] | _ -> raise Impossible));
  256. highlight_visit = (fun ~tag_hook prefs (ast, toks) ->
  257. Highlight_rust.visit_program ~tag_hook prefs (ast, toks));
  258. info_of_tok = Token_helpers_rust.info_of_tok;
  259. }
  260. file prefs hentities
  261. | FT.PL (FT.Opa) ->
  262. tokens_with_categ_of_file_helper
  263. { parse = (parse_cache
  264. (fun file -> Opa (Parse_opa.parse_just_tokens file))
  265. (function
  266. | Opa (ast, toks) -> [ast, toks]
  267. | _ -> raise Impossible));
  268. highlight_visit = Highlight_opa.visit_toplevel;
  269. info_of_tok = Token_helpers_opa.info_of_tok;
  270. }
  271. file prefs hentities
  272. | FT.PL (FT.Erlang) ->
  273. tokens_with_categ_of_file_helper
  274. { parse = (parse_cache
  275. (fun file -> Erlang (Parse_erlang.parse file +> fst))
  276. (function Erlang x -> [x] | _ -> raise Impossible));
  277. highlight_visit = Highlight_erlang.visit_program;
  278. info_of_tok = Token_helpers_erlang.info_of_tok;
  279. }
  280. file prefs hentities
  281. | FT.PL (FT.Java) ->
  282. tokens_with_categ_of_file_helper
  283. { parse = (parse_cache
  284. (fun file -> Java (Parse_java.parse file +> fst))
  285. (function
  286. | Java (ast, toks) -> [Common2.some ast, (toks)]
  287. | _ -> raise Impossible));
  288. highlight_visit = Highlight_java.visit_toplevel;
  289. info_of_tok = Token_helpers_java.info_of_tok;
  290. }
  291. file prefs hentities
  292. | FT.PL (FT.Lisp _) ->
  293. tokens_with_categ_of_file_helper
  294. { parse = (parse_cache
  295. (fun file -> Lisp (Parse_lisp.parse file +> fst))
  296. (function
  297. | Lisp (ast, toks) -> [Common2.some ast, toks]
  298. | _ -> raise Impossible));
  299. highlight_visit = Highlight_lisp.visit_toplevel;
  300. info_of_tok = Parser_lisp.info_of_tok;
  301. }
  302. file prefs hentities
  303. | FT.Text ("nw" | "tex" | "texi" | "web") ->
  304. tokens_with_categ_of_file_helper
  305. { parse = (parse_cache
  306. (fun file -> Noweb (Parse_nw.parse file +> fst))
  307. (function Noweb x -> [x] | _ -> raise Impossible));
  308. highlight_visit = Highlight_nw.visit_program;
  309. info_of_tok = Token_helpers_nw.info_of_tok;
  310. }
  311. file prefs hentities
  312. | FT.PL (FT.Cplusplus _ | FT.C _ | FT.Thrift | FT.ObjectiveC _) ->
  313. tokens_with_categ_of_file_helper
  314. { parse = (parse_cache
  315. (fun file ->
  316. let (ast2, _stat) = Parse_cpp.parse file in
  317. let ast = Parse_cpp.program_of_program2 ast2 in
  318. (* work by side effect on ast2 too *)
  319. Check_variables_cpp.check_and_annotate_program
  320. ast;
  321. Cpp ast2
  322. )
  323. (function Cpp x -> x | _ -> raise Impossible));
  324. highlight_visit = Highlight_cpp.visit_toplevel;
  325. info_of_tok = Token_helpers_cpp.info_of_tok;
  326. }
  327. file prefs hentities
  328. | FT.PL (FT.Web (FT.Js)) ->
  329. tokens_with_categ_of_file_helper
  330. { parse = (parse_cache
  331. (fun file ->
  332. Common.save_excursion Flag_parsing_js.error_recovery true (fun () ->
  333. Js (Parse_js.parse file +> fst))
  334. )
  335. (function
  336. | Js (astopt, toks) ->
  337. let ast = astopt ||| [] in
  338. [ast, toks]
  339. | _ -> raise Impossible
  340. ));
  341. highlight_visit = Highlight_js.visit_program;
  342. (* TODO?
  343. let s = Token_helpers_js.str_of_tok tok in
  344. Ast_js.remove_quotes_if_present s
  345. *)
  346. info_of_tok = Token_helpers_js.info_of_tok;
  347. }
  348. file prefs hentities
  349. | FT.PL (FT.Web (FT.Html)) ->
  350. tokens_with_categ_of_file_helper
  351. { parse = (parse_cache
  352. (fun file -> Html (Parse_html.parse file))
  353. (function
  354. | Html (ast, toks) -> [ast, toks]
  355. | _ -> raise Impossible));
  356. highlight_visit = Highlight_html.visit_toplevel;
  357. info_of_tok = Token_helpers_html.info_of_tok;
  358. }
  359. file prefs hentities
  360. | FT.Text ("org") ->
  361. let org = Org_mode.parse file in
  362. Org_mode.highlight org
  363. (* ugly, hardcoded, should instead look at the head of the file for a
  364. * # -*- org indication.
  365. * very pad and code-overlay specific.
  366. *)
  367. | FT.Text ("txt") when Common2.basename file =$= "info.txt" ->
  368. let org = Org_mode.parse file in
  369. Org_mode.highlight org
  370. | _ -> failwith
  371. "impossible: should be called only when file has good file_kind"
  372. (*e: parsing2.ml *)