/src/Semgrep/Languages/Python.hs

https://github.com/jb55/semgrep · Haskell · 315 lines · 174 code · 48 blank · 93 comment · 5 complexity · bd013cf38dff9972ea92f94f1d545149 MD5 · raw file

  1. {-# LANGUAGE ViewPatterns #-}
  2. module Semgrep.Languages.Python where
  3. import qualified Language.Python.Common.AST as P
  4. import Language.Python.Common.SrcLocation
  5. import Language.Python.Version2.Parser as P2
  6. import Language.Python.Version3.Parser as P3
  7. import qualified Language.Python.Common.Pretty as Pr
  8. import Language.Python.Common.PrettyAST()
  9. import Semgrep.Languages.Generic
  10. import Control.Monad
  11. import Data.Generics
  12. import Data.Monoid
  13. import Data.Maybe(maybeToList)
  14. import System.IO
  15. import Semgrep ( PythonVersion(..)
  16. )
  17. type PyAnno = SrcSpan
  18. type PyStmt = P.Statement PyAnno
  19. type PyAssignOp = P.AssignOp PyAnno
  20. type PyExpr = P.Expr PyAnno
  21. type PyModule = P.Module PyAnno
  22. --------------------------------------------------------------------------------
  23. -- | Binary operators
  24. --------------------------------------------------------------------------------
  25. fromPyOp :: (Data anno) => P.Op anno -> BinOp
  26. fromPyOp (P.LessThan _) = LeOp
  27. fromPyOp (P.GreaterThan _) = GrOp
  28. fromPyOp (P.Equality _) = EqOp
  29. fromPyOp o = UnkOp (gshow o)
  30. --------------------------------------------------------------------------------
  31. -- | Pretty print to maybe string
  32. --------------------------------------------------------------------------------
  33. justShowP :: (Pr.Pretty a) => a -> Maybe String
  34. justShowP = Just . show . Pr.pretty
  35. --------------------------------------------------------------------------------
  36. -- | Build a NInfo given a Pr.pretty printable node and SrcLocation
  37. --------------------------------------------------------------------------------
  38. fromSpan :: PyAnno -> Maybe Position
  39. fromSpan (SpanCoLinear f r cs ce) = Just $ PosSpanLine f r cs ce
  40. fromSpan (SpanMultiLine f rs cs re ce) = Just $ PosSpanLines f rs re cs ce
  41. fromSpan (SpanPoint f r c) = Just $ PosPoint f r c
  42. fromSpan _ = Nothing
  43. --------------------------------------------------------------------------------
  44. -- | Identifiers
  45. --------------------------------------------------------------------------------
  46. fromPyIdent :: P.Ident PyAnno -> Identifier
  47. fromPyIdent n@(P.Ident s _) = Ident s Nothing (fromPyInfo n)
  48. --------------------------------------------------------------------------------
  49. -- | Pretty print a list of nodes
  50. --------------------------------------------------------------------------------
  51. prettyNodes :: (P.Annotated n, Pr.Pretty (n PyAnno)) => [n PyAnno] -> String
  52. prettyNodes = show . mconcat . map Pr.pretty
  53. --------------------------------------------------------------------------------
  54. -- | Get NInfo out of an annotated Python node
  55. --------------------------------------------------------------------------------
  56. fromPyInfo :: (P.Annotated n, Pr.Pretty (n PyAnno)) => n PyAnno -> NInfo
  57. fromPyInfo n = NInfo pos prt
  58. where
  59. pos = fromSpan . P.annot $ n
  60. prt = Just . show . Pr.pretty $ n
  61. infoForNodes :: (P.Annotated n, Pr.Pretty (n PyAnno)) => [n PyAnno] -> NInfo
  62. infoForNodes nodes = NInfo pos prt
  63. where
  64. pos = spanNodes nodes
  65. prt = Just $ prettyNodes nodes
  66. spanNodes :: (P.Annotated n, Pr.Pretty (n PyAnno))
  67. => [n PyAnno]
  68. -> Maybe Position
  69. spanNodes = mergeSpan' . map (fromSpan . P.annot)
  70. --------------------------------------------------------------------------------
  71. -- | expressions
  72. --------------------------------------------------------------------------------
  73. fromPyExpr :: PyExpr -> Node
  74. fromPyExpr n@(P.BinaryOp op e1 e2 _) = BinaryOp (fromPyOp op)
  75. (fromPyExpr e1)
  76. (fromPyExpr e2)
  77. (fromPyInfo n)
  78. expression
  79. --------------------------------------------------------------------------------
  80. -- | Variables
  81. --------------------------------------------------------------------------------
  82. fromPyExpr n@(P.Var ident _) = Var (fromPyIdent ident)
  83. (fromPyInfo n)
  84. expression
  85. --------------------------------------------------------------------------------
  86. -- | Int literals
  87. --------------------------------------------------------------------------------
  88. fromPyExpr n@(P.Int val lit _) = Literal (IntLiteral $ fromInteger val)
  89. (Just lit)
  90. (fromPyInfo n)
  91. expression
  92. --------------------------------------------------------------------------------
  93. -- | String literals
  94. --------------------------------------------------------------------------------
  95. fromPyExpr n@(P.Strings strs _) = Compound lits ann expression
  96. where
  97. ann = fromPyInfo n
  98. litInfo c = NInfo Nothing (Just c)
  99. makeLiteral c = Literal (StringLiteral c) Nothing (litInfo c) expression
  100. lits = map makeLiteral strs
  101. --------------------------------------------------------------------------------
  102. -- | Function application
  103. --------------------------------------------------------------------------------
  104. fromPyExpr n@(P.Call e args _) = Call (fromPyExpr e) []
  105. (fromPyInfo n)
  106. expression
  107. --------------------------------------------------------------------------------
  108. -- | Unknown expressions
  109. --------------------------------------------------------------------------------
  110. fromPyExpr e = UnkNode "" (fromPyInfo e) expression
  111. --------------------------------------------------------------------------------
  112. -- | Convert Python if/elif to generic if statement
  113. --------------------------------------------------------------------------------
  114. fromPyElIf :: (PyExpr, [PyStmt]) -> Node
  115. fromPyElIf (expr, stmts) = If e1 block Nothing i expression
  116. where
  117. e1 = fromPyExpr expr
  118. i = fromPyInfo expr
  119. pr = Just $ prettyNodes stmts
  120. blockInfo = NInfo (spanNodes stmts) Nothing
  121. block = toBlock stmts blockInfo Statement
  122. --------------------------------------------------------------------------------
  123. -- | Python 'Suite' to generic Block
  124. --------------------------------------------------------------------------------
  125. toBlock :: [PyStmt] -> NInfo -> NKind -> Node
  126. toBlock stmts = Block (map fromPyStmt stmts)
  127. --------------------------------------------------------------------------------
  128. -- | Convert a list of python if/elif statements into a single generic If
  129. --------------------------------------------------------------------------------
  130. fromPyIf :: Maybe PyStmt -> [(PyExpr, [PyStmt])] -> Maybe Node -> Maybe Node
  131. -- If our if/elif list is empty, just return the else block (if it exists)
  132. fromPyIf _ [] el = el
  133. fromPyIf cond (t:ts) el =
  134. -- Extract one if/elif expression and block statement
  135. let (If e cs _ _ _) = fromPyElIf t
  136. -- Build a new if or elif, recursively applying this function
  137. -- for further elif/else statements
  138. in Just $ If e cs (fromPyIf Nothing ts el)
  139. (case fmap fromPyInfo cond of
  140. Nothing -> nullInfo
  141. Just a -> a)
  142. Statement
  143. --------------------------------------------------------------------------------
  144. -- | Function declarations
  145. --------------------------------------------------------------------------------
  146. fromPyStmt :: PyStmt -> Node
  147. fromPyStmt n@(P.Fun name' args result body _) =
  148. Function ident block i Declaration {
  149. decl_init = Nothing
  150. , kind_props = declProps
  151. }
  152. where
  153. declProps = maybeToList $ fmap (Result . fromPyExpr) result
  154. block = toBlock body blockInfo Statement
  155. blockInfo = NInfo blockSpan Nothing
  156. blockSpan = spanNodes body
  157. ident = Just $ fromPyIdent name'
  158. i = fromPyInfo n
  159. --------------------------------------------------------------------------------
  160. -- | Conditional if/elif/el statements
  161. --------------------------------------------------------------------------------
  162. fromPyStmt n@(P.Conditional ifs el a) =
  163. let maybeElse = case el of
  164. [] -> Nothing
  165. _ -> Just $ toBlock el (fromPyInfo $ head el) Statement
  166. in case fromPyIf (Just n) ifs maybeElse of
  167. Nothing -> error "empty conditional"
  168. Just iff -> iff
  169. --------------------------------------------------------------------------------
  170. -- | expression statements
  171. --------------------------------------------------------------------------------
  172. fromPyStmt n@(P.StmtExpr expr _) = Singleton (fromPyExpr expr)
  173. (fromPyInfo n)
  174. Statement
  175. --------------------------------------------------------------------------------
  176. -- | Assignment statements
  177. --------------------------------------------------------------------------------
  178. fromPyStmt n@(P.Assign exprs exprFrom _) =
  179. let ann = fromPyInfo n
  180. exprFrom' = fromPyExpr exprFrom
  181. in case exprs of
  182. [] -> error "empty assign"
  183. [a] -> Assign DefaultAssign (fromPyExpr a) exprFrom' ann Statement
  184. _ -> DestructuringAssign (map fromPyExpr exprs) exprFrom' ann Statement
  185. --------------------------------------------------------------------------------
  186. -- | Augmented assignment statements (eg. +=, -=, etc)
  187. --------------------------------------------------------------------------------
  188. fromPyStmt n@(P.AugmentedAssign e1 op e2 _) = Assign (fromPyAssignOp op)
  189. (fromPyExpr e1)
  190. (fromPyExpr e2)
  191. (fromPyInfo n)
  192. Statement
  193. --------------------------------------------------------------------------------
  194. -- | Class statements
  195. --------------------------------------------------------------------------------
  196. fromPyStmt n@(P.Class name args body _) = Class (fromPyIdent name)
  197. (map fromPyStmt body)
  198. (fromPyInfo n)
  199. Statement
  200. --------------------------------------------------------------------------------
  201. -- | Return statements
  202. --------------------------------------------------------------------------------
  203. fromPyStmt n@(P.Return mExpr _) = Return (fmap fromPyExpr mExpr)
  204. (fromPyInfo n)
  205. Statement
  206. --------------------------------------------------------------------------------
  207. -- | Import statements
  208. --------------------------------------------------------------------------------
  209. fromPyStmt n@(P.Import items _) = Import (map fromPyImportItem items)
  210. Nothing
  211. (fromPyInfo n)
  212. Statement
  213. --------------------------------------------------------------------------------
  214. -- | Unknown statements
  215. --------------------------------------------------------------------------------
  216. fromPyStmt n = UnkNode "" (fromPyInfo n) Statement
  217. --------------------------------------------------------------------------------
  218. -- | Convert a Python import item to a generic statement
  219. --------------------------------------------------------------------------------
  220. fromPyImportItem :: P.ImportItem PyAnno -> ImportItem
  221. fromPyImportItem n@(P.ImportItem names maybeIdent _) =
  222. ImportItem (map fromPyIdent names)
  223. (fmap fromPyIdent maybeIdent)
  224. (fromPyInfo n)
  225. --------------------------------------------------------------------------------
  226. -- | Python 'augmented' assignment operators
  227. --------------------------------------------------------------------------------
  228. fromPyAssignOp :: PyAssignOp -> AssignOp
  229. fromPyAssignOp (P.PlusAssign _) = PlusAssign
  230. fromPyAssignOp (P.DivAssign _) = DivAssign
  231. fromPyAssignOp (P.MultAssign _) = MulAssign
  232. fromPyAssignOp (P.MinusAssign _) = MinusAssign
  233. fromPyAssignOp (P.ModAssign _) = ModAssign
  234. fromPyAssignOp (P.PowAssign _) = PowAssign
  235. fromPyAssignOp (P.BinAndAssign _) = BinAndAssign
  236. fromPyAssignOp (P.BinOrAssign _) = BinOrAssign
  237. fromPyAssignOp (P.BinXorAssign _) = BinXorAssign
  238. fromPyAssignOp (P.LeftShiftAssign _) = LeftShiftAssign
  239. fromPyAssignOp (P.RightShiftAssign _) = RightShiftAssign
  240. fromPyAssignOp (P.FloorDivAssign _) = FloorDivAssign
  241. fromPyAssignOp n = UnkAssign (show n)
  242. --------------------------------------------------------------------------------
  243. -- | Convert a Python module to a generic module
  244. --------------------------------------------------------------------------------
  245. fromPyModule :: PyModule -> Module
  246. fromPyModule a@(P.Module stmts) = Module (map fromPyStmt stmts)
  247. Nothing
  248. nullInfo
  249. --------------------------------------------------------------------------------
  250. -- | Get a list of all Python expressions from a Python module
  251. --------------------------------------------------------------------------------
  252. allPyExprs :: (Data a) => P.Module a -> [P.Expr a]
  253. allPyExprs = listify (const True)
  254. --------------------------------------------------------------------------------
  255. -- | Parse a Python AST to a generic AST given the python version and
  256. -- file path
  257. --------------------------------------------------------------------------------
  258. parse :: PythonVersion -> FilePath -> IO (Either String Project)
  259. parse ver f = do
  260. content <- openFile f ReadMode >>= hGetContents
  261. let parser = case ver of
  262. Python2 -> P2.parseModule
  263. Python3 -> P3.parseModule
  264. let result = parser content f
  265. return $ case result of
  266. Left pe -> Left (show pe)
  267. Right (mod, comments) -> Right $ Project [fromPyModule mod]