PageRenderTime 55ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/pentest/metagoofil/extractors/metadataMSOfficeXML.py

https://github.com/sullivanmatt/Raspberry-Pwn
Python | 329 lines | 328 code | 1 blank | 0 comment | 0 complexity | 64c7e9cdac8738ed2c6069bcfd5ecfb1 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, MPL-2.0-no-copyleft-exception, GPL-2.0, GPL-3.0
  1. import unzip
  2. import zipfile
  3. import sys
  4. import re
  5. import os
  6. import random
  7. class metaInfoMS:
  8. def __init__(self):
  9. self.template =""
  10. self.totalTime =""
  11. self.pages =""
  12. self.words =""
  13. self.characters =""
  14. self.application =""
  15. self.docSecurity =""
  16. self.lines =""
  17. self.paragraphs =""
  18. self.scaleCrop =""
  19. self.company =""
  20. self.linksUpToDate =""
  21. self.charactersWithSpaces =""
  22. self.shareDoc =""
  23. self.hyperlinksChanged =""
  24. self.appVersion =""
  25. self.title =""
  26. self.subject =""
  27. self.creator =""
  28. self.keywords =""
  29. self.lastModifiedBy =""
  30. self.revision =""
  31. self.createdDate =""
  32. self.modifiedDate =""
  33. self.userscomments =""
  34. self.thumbnailPath =""
  35. self.comments= "ok"
  36. def __init__(self,filepath):
  37. print filepath
  38. self.template =""
  39. self.totalTime =""
  40. self.pages =""
  41. self.words =""
  42. self.characters =""
  43. self.application =""
  44. self.docSecurity =""
  45. self.lines =""
  46. self.paragraphs =""
  47. self.scaleCrop =""
  48. self.company =""
  49. self.linksUpToDate =""
  50. self.charactersWithSpaces =""
  51. self.shareDoc =""
  52. self.hyperlinksChanged =""
  53. self.appVersion =""
  54. self.title =""
  55. self.subject =""
  56. self.creator =""
  57. self.keywords =""
  58. self.lastModifiedBy =""
  59. self.revision =""
  60. self.createdDate =""
  61. self.modifiedDate =""
  62. self.thumbnailPath =""
  63. rnd = str(random.randrange(0, 1001, 3))
  64. zip = zipfile.ZipFile(filepath, 'r')
  65. file('app'+rnd+'.xml', 'w').write(zip.read('docProps/app.xml'))
  66. file('core'+rnd+'.xml', 'w').write(zip.read('docProps/core.xml'))
  67. try:
  68. file('comments'+rnd+'.xml', 'w').write(zip.read('word/comments.xml'))
  69. self.comments="ok"
  70. except:
  71. self.comments="error"
  72. thumbnailPath = ""
  73. #try:
  74. #file('thumbnail'+rnd+'.jpeg', 'w').write(zip.read('docProps/thumbnail.jpeg'))
  75. #thumbnailPath = 'thumbnail'+rnd+'.jpeg'
  76. #except:
  77. # pass
  78. zip.close()
  79. # primero algunas estadisticas del soft usado para la edicion y del documento
  80. f = open ('app'+rnd+'.xml','r')
  81. app = f.read()
  82. self.cargaApp(app)
  83. f.close()
  84. if self.comments=="ok":
  85. f = open ('comments'+rnd+'.xml','r')
  86. comm = f.read()
  87. self.cargaComm(comm)
  88. f.close()
  89. # datos respecto a autor, etc
  90. f = open ('core'+rnd+'.xml','r')
  91. core = f.read()
  92. self.cargaCore(core)
  93. self.thumbnailPath = thumbnailPath
  94. f.close()
  95. # borramos todo menos el thumbnail
  96. os.remove('app'+rnd+'.xml')
  97. os.remove('core'+rnd+'.xml')
  98. os.remove('comments'+rnd+'.xml')
  99. #self.toString()
  100. def toString(self):
  101. print "--- Metadata app ---"
  102. print " template: " + str(self.template)
  103. print " totalTime: " + str(self.totalTime)
  104. print " pages: "+ str(self.pages)
  105. print " words: "+ str(self.words)
  106. print " characters: "+ str(self.characters)
  107. print " application: "+ str(self.application)
  108. print " docSecurity: "+ str(self.docSecurity)
  109. print " lines: "+ str(self.lines)
  110. print " paragraphs: "+ str(self.paragraphs)
  111. print " scaleCrop: " + str(self.scaleCrop)
  112. print " company: "+ str(self.company)
  113. print " linksUpToDate: " + str(self.linksUpToDate)
  114. print " charactersWithSpaces: "+ str(self.charactersWithSpaces)
  115. print " shareDoc:" + str(self.shareDoc)
  116. print " hyperlinksChanged:" + str(self.hyperlinksChanged)
  117. print " appVersion:" + str(self.appVersion)
  118. print "\n --- Metadata core ---"
  119. print " title:" + str(self.title)
  120. print " subject:" + str(self.subject)
  121. print " creator:" + str(self.creator)
  122. print " keywords:" + str(self.keywords)
  123. print " lastModifiedBy:" + str(self.lastModifiedBy)
  124. print " revision:" + str(self.revision)
  125. print " createdDate:" + str(self.createdDate)
  126. print " modifiedDate:" + str(self.modifiedDate)
  127. print "\n thumbnailPath:" + str(self.thumbnailPath)
  128. def cargaComm(self,datos):
  129. try:
  130. p = re.compile('w:author="(.*?)" w')
  131. self.userscomments = p.findall(datos)
  132. except:
  133. pass
  134. def cargaApp(self,datos):
  135. try:
  136. p = re.compile('<Template>(.*)</Template>')
  137. self.template = str (p.findall(datos)[0])
  138. except:
  139. pass
  140. try:
  141. p = re.compile('<TotalTime>(.*)</TotalTime>')
  142. self.totalTime = str (p.findall(datos)[0])
  143. except:
  144. pass
  145. try:
  146. p = re.compile('<Pages>(.*)</Pages>')
  147. self.pages = str (p.findall(datos)[0])
  148. except:
  149. pass
  150. try:
  151. p = re.compile('<Words>(.*)</Words>')
  152. self.words = str (p.findall(datos)[0])
  153. except:
  154. pass
  155. try:
  156. p = re.compile('<Characters>(.*)</Characters>')
  157. self.characters = str (p.findall(datos)[0])
  158. except:
  159. pass
  160. try:
  161. p = re.compile('<Application>(.*)</Application>')
  162. self.application = str (p.findall(datos)[0])
  163. except:
  164. pass
  165. try:
  166. p = re.compile('<DocSecurity>(.*)</DocSecurity>')
  167. self.docSecurity = str (p.findall(datos)[0])
  168. except:
  169. pass
  170. try:
  171. p = re.compile('<Lines>(.*)</Lines>')
  172. self.lines = str (p.findall(datos)[0])
  173. except:
  174. pass
  175. try:
  176. p = re.compile('<Paragraphs>(.*)</Paragraphs>')
  177. self.paragraphs = str (p.findall(datos)[0])
  178. except:
  179. pass
  180. try:
  181. p = re.compile('<ScaleCrop>(.*)</ScaleCrop>')
  182. self.scaleCrop = str (p.findall(datos)[0])
  183. except:
  184. pass
  185. try:
  186. p = re.compile('<Company>(.*)</Company>')
  187. self.company = str (p.findall(datos)[0])
  188. except:
  189. pass
  190. try:
  191. p = re.compile('<LinksUpToDate>(.*)</LinksUpToDate>')
  192. self.linksUpToDate = str (p.findall(datos)[0])
  193. except:
  194. pass
  195. try:
  196. p = re.compile('<CharactersWithSpaces>(.*)</CharactersWithSpaces>')
  197. self.charactersWithSpaces = str (p.findall(datos)[0])
  198. except:
  199. pass
  200. try:
  201. p = re.compile('<SharedDoc>(.*)</SharedDoc>')
  202. self.sharedDoc = str (p.findall(datos)[0])
  203. except:
  204. pass
  205. try:
  206. p = re.compile('<HyperlinksChanged>(.*)</HyperlinksChanged>')
  207. self.hyperlinksChanged = str (p.findall(datos)[0])
  208. except:
  209. pass
  210. try:
  211. p = re.compile('<AppVersion>(.*)</AppVersion>')
  212. self.appVersion = str (p.findall(datos)[0])
  213. except:
  214. pass
  215. def cargaCore(self,datos):
  216. try:
  217. p = re.compile('<dc:title>(.*)</dc:title>')
  218. self.title = str (p.findall(datos)[0])
  219. except:
  220. pass
  221. try:
  222. p = re.compile('<dc:subject>(.*)</dc:subject>')
  223. self.subject = str (p.findall(datos)[0])
  224. except:
  225. pass
  226. try:
  227. p = re.compile('<dc:creator>(.*)</dc:creator>')
  228. self.creator = str (p.findall(datos)[0])
  229. except:
  230. pass
  231. try:
  232. p = re.compile('<cp:keywords>(.*)</cp:keywords>')
  233. self.keywords = str (p.findall(datos)[0])
  234. except:
  235. pass
  236. try:
  237. p = re.compile('<cp:lastModifiedBy>(.*)</cp:lastModifiedBy>')
  238. self.lastModifiedBy = str (p.findall(datos)[0])
  239. except:
  240. pass
  241. try:
  242. p = re.compile('<cp:revision>(.*)</cp:revision>')
  243. self.revision = str (p.findall(datos)[0])
  244. except:
  245. pass
  246. try:
  247. p = re.compile('<dcterms:created xsi:type=".*">(.*)</dcterms:created>')
  248. self.createdDate = str (p.findall(datos)[0])
  249. except:
  250. pass
  251. try:
  252. p = re.compile('<dcterms:modified xsi:type=".*">(.*)</dcterms:modified>')
  253. self.modifiedDate = str (p.findall(datos)[0])
  254. except:
  255. pass
  256. def getData(self):
  257. return "ok"
  258. def getRaw(self):
  259. raw = "Not implemented yet"
  260. return raw
  261. def getUsers(self):
  262. res=[]
  263. temporal=[]
  264. res.append(self.creator)
  265. res.append(self.lastModifiedBy)
  266. if self.comments == "ok":
  267. res.extend(self.userscomments)
  268. else:
  269. pass
  270. for x in res:
  271. if temporal.count(x) ==0:
  272. temporal.append(x)
  273. else:
  274. pass
  275. return temporal
  276. def getPaths(self):
  277. res=[]
  278. #res.append(self.revision)
  279. return res
  280. def getSoftware(self):
  281. res=[]
  282. res.append(self.application)
  283. return res