PageRenderTime 32ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/h_program-lang/tags_file.ml

https://github.com/facebook/pfff
OCaml | 237 lines | 133 code | 23 blank | 81 comment | 10 complexity | 58bd59c11d7922a46bc361f8faed2d0f MD5 | raw file
  1. (* Yoann Padioleau
  2. *
  3. * Copyright (C) 2010-2012 Facebook
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public License
  7. * version 2.1 as published by the Free Software Foundation, with the
  8. * special exception on linking described in file license.txt.
  9. *
  10. * This library is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
  13. * license.txt for more details.
  14. *)
  15. open Common
  16. module PI = Parse_info
  17. module E = Entity_code
  18. (*****************************************************************************)
  19. (* Prelude *)
  20. (*****************************************************************************)
  21. (*
  22. * Generating TAGS file (for emacs or vim)
  23. *
  24. * Supposed syntax for emacs TAGS (.tags) files, as analysed from output
  25. * of etags, read in etags.c and discussed with Francesco Potorti.
  26. * src: otags readme:
  27. *
  28. * <file> ::= <page>+
  29. * <page> ::= <header><body>
  30. * <header> ::= <NP><CR><file-name>,<body-length><CR>
  31. * <body> ::= <tag-line>*
  32. * <tag-line> ::= <prefix><DEL><tag><SOH><line-number>,<begin-char-index><CR>
  33. * pad: when tag is already at the beginning of the line:
  34. * <tag-line> ::=<tag><DEL><line-number>,<begin-char-index><CR>
  35. *
  36. * <NP> ::= ascii NP, (emacs ^L)
  37. * <DEL> ::= ascii DEL, (emacs ^?)
  38. * <SOH> ::= ascii SOH, (emacs ^A)
  39. * <CR> :: ascii CR
  40. *
  41. * See also http://en.wikipedia.org/wiki/Ctags#Tags_file_formats
  42. *)
  43. (*****************************************************************************)
  44. (* Types *)
  45. (*****************************************************************************)
  46. (* see http://en.wikipedia.org/wiki/Ctags#Tags_file_formats *)
  47. let header = "\x0c\n"
  48. let footer = ""
  49. type tag = {
  50. tag_definition_text: string;
  51. tagname: string;
  52. line_number: int;
  53. (* offset of beginning of tag_definition_text, when have 0-indexed filepos *)
  54. byte_offset: int;
  55. (* only used by vim *)
  56. kind: Entity_code.entity_kind;
  57. }
  58. let mk_tag s1 s2 i1 i2 k = {
  59. tag_definition_text = s1;
  60. tagname = s2;
  61. line_number = i1;
  62. byte_offset = i2;
  63. kind = k;
  64. }
  65. (*****************************************************************************)
  66. (* Helpers *)
  67. (*****************************************************************************)
  68. let string_of_tag t =
  69. spf "%s\x7f%s\x01%d,%d\n"
  70. t.tag_definition_text
  71. t.tagname
  72. t.line_number
  73. t.byte_offset
  74. (* of tests/misc/functions.php *)
  75. (*
  76. let fake_defs = [
  77. mk_tag "function a() {" "a" 3 7;
  78. mk_tag "function b() {" "b" 7 32;
  79. mk_tag "function c() {" "c" 14 65;
  80. mk_tag "function d() {" "d" 20 107;
  81. ]
  82. *)
  83. (* helpers used externally by language taggers *)
  84. let tag_of_info filelines info kind =
  85. let line = PI.line_of_info info in
  86. let pos = PI.pos_of_info info in
  87. let col = PI.col_of_info info in
  88. let s = PI.str_of_info info in
  89. mk_tag (filelines.(line)) s line (pos - col) kind
  90. (* C-s for "kind" in http://ctags.sourceforge.net/FORMAT *)
  91. let vim_tag_kind_str tag_kind =
  92. match tag_kind with
  93. | E.Class -> "c"
  94. | E.Constant -> "d"
  95. | E.Function -> "f"
  96. | E.Method -> "f"
  97. | E.Type -> "t"
  98. | E.Field -> "m"
  99. | E.Module | E.Package
  100. | E.Global | E.Macro
  101. | E.TopStmts
  102. | E.Other _
  103. | E.ClassConstant
  104. | E.Constructor
  105. | E.File | E.Dir | E.MultiDirs
  106. | E.Exception
  107. | E.Prototype | E.GlobalExtern
  108. -> ""
  109. (* vim uses '/' as a marker for the tag definition text, so if this
  110. * test contains '/' they must be escaped.
  111. *)
  112. let vim_escape_slash str =
  113. Str.global_replace (Str.regexp "/") "\\/" str
  114. (* For methods, in addition to the tag for the precise 'class::method'
  115. * name, it can be convenient to generate another tag with just the
  116. * 'method' name so people can quickly jump to some code with just the
  117. * method name. Of course if there is also a function somewhere using the
  118. * same name then this function could be hard to reach so we generate
  119. * an (imprecise) method tag only when there is no ambiguity.
  120. *)
  121. let add_method_tags_when_unambiguous files_and_defs =
  122. (* step1: global analysis on all defs, remember all names and methods *)
  123. let h_toplevel_names =
  124. files_and_defs +> List.map (fun (_file, tags) ->
  125. tags +> Common.map_filter (fun t ->
  126. match t.kind with
  127. | E.Class | E.Function | E.Constant -> Some t.tagname
  128. | _ -> None
  129. )
  130. ) +> List.flatten +> Common.hashset_of_list
  131. in
  132. let h_grouped_methods =
  133. files_and_defs +> List.map (fun (_file, tags) ->
  134. tags +> Common.map_filter (fun t ->
  135. match t.kind with
  136. | E.Method ->
  137. if t.tagname =~ ".*::\\(.*\\)"
  138. then Some (Common.matched1 t.tagname, t)
  139. else failwith ("method tag should contain '::[, got: " ^ t.tagname)
  140. | _ -> None
  141. )
  142. (* could skip the group_assoc_bykey and do Hashtbl.find_all below instead *)
  143. ) +> List.flatten +> Common.group_assoc_bykey_eff +> Common.hash_of_list
  144. in
  145. (* step2: add method tag when no ambiguity *)
  146. files_and_defs +> List.map (fun (file, tags) ->
  147. file,
  148. tags +> List.map (fun t ->
  149. match t.kind with
  150. | E.Method ->
  151. if t.tagname =~ ".*::\\(.*\\)"
  152. then
  153. let methodname = Common.matched1 t.tagname in
  154. if not (Hashtbl.mem h_toplevel_names methodname) &&
  155. List.length (Hashtbl.find h_grouped_methods methodname) = 1
  156. then [t; { t with tagname = methodname }]
  157. else [t]
  158. else failwith("method tag should contain '::[, got: " ^ t.tagname)
  159. | _ -> [t]
  160. ) +> List.flatten
  161. )
  162. (*****************************************************************************)
  163. (* Main entry point *)
  164. (*****************************************************************************)
  165. let threshold_long_line = 1000
  166. let generate_TAGS_file tags_file files_and_defs =
  167. Common.with_open_outfile tags_file (fun (pr_no_nl, _chan) ->
  168. pr_no_nl header;
  169. files_and_defs +> List.iter (fun (file, defs) ->
  170. let all_defs = defs +> Common.map_filter (fun tag ->
  171. if String.length tag.tag_definition_text > threshold_long_line
  172. then begin
  173. pr2_once (spf "WEIRD long string in %s, passing the tag" file);
  174. None
  175. end
  176. else Some (string_of_tag tag)
  177. ) +> Common.join "" in
  178. let size_defs = String.length all_defs in
  179. pr_no_nl (spf "%s,%d\n" file size_defs);
  180. pr_no_nl all_defs;
  181. pr_no_nl "\x0c\n";
  182. );
  183. );
  184. ()
  185. (* http://vimdoc.sourceforge.net/htmldoc/tagsrch.html#tags-file-format *)
  186. let generate_vi_tags_file tags_file files_and_defs =
  187. Common.with_open_outfile tags_file (fun (pr_no_nl, _chan) ->
  188. let all_tags =
  189. files_and_defs +> List.map (fun (file, defs) ->
  190. defs +> Common.map_filter (fun tag ->
  191. if String.length tag.tag_definition_text > 300
  192. then begin
  193. pr2 (spf "WEIRD long string in %s, passing the tag" file);
  194. None
  195. end
  196. else Some (tag.tagname, (tag, file))
  197. ))
  198. +> List.flatten
  199. +> Common.sort_by_key_lowfirst
  200. in
  201. all_tags +> List.iter (fun (_tagname, (tag, file)) ->
  202. (* {tagname}<Tab>{tagfile}<Tab>{tagaddress}
  203. * "The two characters semicolon and double quote [...] are
  204. * interpreted by Vi as the start of a comment, which makes the
  205. * following be ignored."
  206. *)
  207. pr_no_nl (spf "%s\t%s\t/%s/;\"\t%s\n"
  208. tag.tagname
  209. file
  210. (vim_escape_slash tag.tag_definition_text)
  211. (vim_tag_kind_str tag.kind)
  212. );
  213. );
  214. )