/lang_php/analyze/foundation/comment_php.ml

https://github.com/repos-ocaml/pfff · OCaml · 188 lines · 102 code · 22 blank · 64 comment · 12 complexity · 67443617c49c17836ceb572a22c61721 MD5 · raw file

  1. (* Yoann Padioleau
  2. *
  3. * Copyright (C) 2010 Facebook
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public License
  7. * version 2.1 as published by the Free Software Foundation, with the
  8. * special exception on linking described in file license.txt.
  9. *
  10. * This library is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
  13. * license.txt for more details.
  14. *)
  15. open Common
  16. (*****************************************************************************)
  17. (* Prelude *)
  18. (*****************************************************************************)
  19. (*
  20. * A few types and helpers related to comment analysis.
  21. * Should perhaps at one point parse even more comments and have
  22. * tokens such as TWord, TAnnot, etc.
  23. *
  24. * Note that the T_COMMENT and T_DOC_COMMENT tokens do not contain
  25. * the final newline character. This will be tokenized as a separate
  26. * TNewline.
  27. *)
  28. (*****************************************************************************)
  29. (* Types *)
  30. (*****************************************************************************)
  31. (*
  32. * We sometimes need to analyze comments and modify them, but it can be
  33. * painful as PHP support different form of PHP comments, and people use
  34. * different style in it. We want any modification to be harmonious with
  35. * the rest of the comment, hence the need to better parsing of comments.
  36. *)
  37. type comment =
  38. | DocBlock of
  39. string list (* without the leading ' * ' and '/**' and '*/' *) *
  40. bool (* use '*/' or '**/' as end mark *)
  41. | MultiLineSlashStar of string list (* without the leading ' * ' *)
  42. | SingleLineSlashStar of string (* without the enclosing '/* ... */' *)
  43. | SingleLineSlashSlash of string (* without the '// ' *)
  44. | OtherStyle of string (* raw *)
  45. (*****************************************************************************)
  46. (* Helpers *)
  47. (*****************************************************************************)
  48. let strip_comment_marks s =
  49. match () with
  50. | _ when s =~ "^//[ ]*\\(.*\\)" -> Common.matched1 s
  51. | _ when s =~ "^[ *]*\\(.*\\)" -> Common.matched1 s
  52. | _ -> s
  53. (*
  54. let _ = example (strip_comment_marks "// @emails" = "@emails")
  55. (* when the comment is part of a multiline comment, people use '*' for
  56. * esthetic reason, actually just like in this comment *)
  57. let _ = example (strip_comment_marks "* @emails" = "@emails")
  58. let _ = example (strip_comment_marks " @emails foo" = "@emails foo")
  59. *)
  60. (*****************************************************************************)
  61. (* Parsing *)
  62. (*****************************************************************************)
  63. let (parse_comment: string -> comment) = fun s ->
  64. match () with
  65. | _ when s =~ "^// \\(.*\\)" ->
  66. SingleLineSlashSlash (Common.matched1 s)
  67. | _ when s =~ "^/\\* \\(.*\\) \\*/" ->
  68. SingleLineSlashStar (Common.matched1 s)
  69. | _ when s =~ "^/\\*.*" ->
  70. let xs = Common.lines s in
  71. if List.length xs <= 2 then begin
  72. pr2 ("wrong docblock comment: " ^ s);
  73. OtherStyle s
  74. end
  75. else
  76. (match Common.head_middle_tail xs with
  77. | start_comment, ys, end_comment ->
  78. if not (List.mem start_comment ["/**"; "/*"]) ||
  79. not (List.mem end_comment [" */"; " **/"])
  80. then begin
  81. pr2 ("wrong comment: " ^ s);
  82. OtherStyle s
  83. end
  84. else begin
  85. let ys' = ys +> Common.map_filter (fun s ->
  86. if s =~ " \\*$"
  87. then Some ""
  88. else
  89. if s =~ " \\* \\(.*\\)"
  90. then Some (Common.matched1 s)
  91. else None
  92. ) in
  93. if List.length ys <> List.length ys'
  94. then begin
  95. pr2 ("wrong comment: " ^ s);
  96. OtherStyle s
  97. end else
  98. (match start_comment, end_comment with
  99. | "/**", " */" -> DocBlock (ys', true)
  100. | "/**", " **/" -> DocBlock (ys', false)
  101. | "/*", _ -> MultiLineSlashStar ys'
  102. | _ -> raise Impossible
  103. )
  104. end
  105. )
  106. | _ ->
  107. pr2 ("unknown comment format: " ^ s);
  108. OtherStyle s
  109. (*
  110. let _ = example(parse_comment "/**\n * foo\n */" = (DocBlock (["foo"],true)))
  111. let _ = example(parse_comment "/*\n * foo\n */" = (MultiLineSlashStar ["foo"]))
  112. *)
  113. (*****************************************************************************)
  114. (* UnParsing *)
  115. (*****************************************************************************)
  116. let gen_space indent =
  117. (Common.repeat " " indent) +> Common.join ""
  118. let (unparse_comment: ?indent:int -> comment -> string) =
  119. fun ?(indent=0) m ->
  120. match m with
  121. | DocBlock (xs, b) ->
  122. (["/**"] ++
  123. (xs +> List.map (fun s ->
  124. if s = ""
  125. then " *"
  126. else
  127. spf "%s * %s" (gen_space indent) s)) ++
  128. (if b
  129. then [spf "%s */" (gen_space indent)]
  130. else [spf "%s **/" (gen_space indent)]
  131. )
  132. ) +> Common.unlines
  133. | _ ->
  134. raise Todo
  135. (*
  136. let _ = example(unparse_comment (DocBlock (["foo"],true)) = "/**\n * foo\n */\n" )
  137. let _ = example(unparse_comment (DocBlock ([""],true)) = "/**\n *\n */\n" )
  138. *)
  139. (*****************************************************************************)
  140. (* aux *)
  141. (*****************************************************************************)
  142. let (comment_style_new_line: comment -> string) = fun m ->
  143. match m with
  144. | DocBlock _ -> " * "
  145. | SingleLineSlashSlash _ -> "// "
  146. | MultiLineSlashStar _ -> " * "
  147. | _ -> raise Todo
  148. (* hmmm could also use an array ... *)
  149. let (index_comment: comment -> (int * string) list) = fun m ->
  150. match m with
  151. | SingleLineSlashSlash s -> [0, s]
  152. | DocBlock (xs,_) | MultiLineSlashStar xs ->
  153. Common.index_list_1 xs +> List.map (fun (s, i) -> (i, s))
  154. | _ -> raise Todo
  155. (*
  156. let _ = example(index_comment (SingleLineSlashSlash "foo") = [0, "foo"])
  157. let _ = example(index_comment (DocBlock (["foo"],true)) = [1, "foo"])
  158. *)
  159. let comments_of_file file =
  160. let toks = Parse_php.tokens file in
  161. toks +> Common.map_filter (function
  162. | Parser_php.T_COMMENT info
  163. | Parser_php.T_DOC_COMMENT info
  164. -> Some info
  165. | _ -> None
  166. )