PageRenderTime 34ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/src/bat/checks.py

https://gitlab.com/hook/binary-analysis-tool
Python | 328 lines | 286 code | 8 blank | 34 comment | 20 complexity | d2878b599d0013c83e60cdac1750ccc1 MD5 | raw file
  1. #!/usr/bin/python
  2. ## Binary Analysis Tool
  3. ## Copyright 2009-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  4. ## Licensed under Apache 2.0, see LICENSE file for details
  5. '''
  6. This file contains a few methods that check for the presence of marker
  7. strings that are likely to be found in certain programs. It is far from fool
  8. proof and false positives are likely, so either check the results, or replace
  9. it with your own more robust checks.
  10. '''
  11. import string, re, os, magic, subprocess, sys, tempfile, copy
  12. import extractor, elfcheck
  13. ## generic searcher for certain marker strings
  14. ## TODO: implement overlap between subsequent buffer reads
  15. def genericSearch(filename, markerDict, blacklist=[], unpacktempdir=None):
  16. results = []
  17. ## first see if the entire file has been blacklisted
  18. filesize = os.stat(filename).st_size
  19. carved = False
  20. foundmarkers = set()
  21. if blacklist != []:
  22. if extractor.inblacklist(0, blacklist) == filesize:
  23. return None
  24. datafile = open(filename, 'rb')
  25. lastindex = 0
  26. datafile.seek(lastindex)
  27. ## make a copy and add a bogus value for the last
  28. ## byte to a temporary blacklist to make the loop work
  29. ## well.
  30. blacklist_tmp = copy.deepcopy(blacklist)
  31. blacklist_tmp.append((filesize,filesize))
  32. for i in blacklist_tmp:
  33. if i[0] == lastindex:
  34. lastindex = i[1] - 1
  35. datafile.seek(lastindex)
  36. continue
  37. if i[0] > lastindex:
  38. ## read the data, then search for markers
  39. data = datafile.read(i[0] - lastindex)
  40. if len(data) <=1:
  41. ## set lastindex to the next
  42. lastindex = i[1] - 1
  43. datafile.seek(lastindex)
  44. continue
  45. for marker in markerDict.keys():
  46. if marker in foundmarkers:
  47. continue
  48. for markerstring in markerDict[marker]:
  49. markeroffset = data.find(markerstring)
  50. if markeroffset != -1:
  51. results.append(marker)
  52. foundmarkers.add(marker)
  53. break
  54. ## set lastindex to the next
  55. lastindex = i[1] - 1
  56. datafile.seek(lastindex)
  57. datafile.close()
  58. else:
  59. datafile = open(filename, 'rb')
  60. databuffer = []
  61. offset = 0
  62. datafile.seek(offset)
  63. databuffer = datafile.read(100000)
  64. while databuffer != '':
  65. for marker in markerDict.keys():
  66. if marker in foundmarkers:
  67. continue
  68. for markerstring in markerDict[marker]:
  69. markeroffset = databuffer.find(markerstring)
  70. if markeroffset != -1:
  71. results.append(marker)
  72. foundmarkers.add(marker)
  73. break
  74. ## move the offset 100000
  75. datafile.seek(offset + 100000)
  76. databuffer = datafile.read(100000)
  77. offset = offset + len(databuffer)
  78. datafile.close()
  79. if results != []:
  80. return list(set(results))
  81. return None
  82. ## The result of this method is a list of library names that the file dynamically links
  83. ## with. The path of these libraries is not given, since this is usually not recorded
  84. ## in the binary (unless RPATH is used) but determined at runtime: it is dependent on
  85. ## the dynamic linker configuration on the device. With some mixing and matching it is
  86. ## nearly always to determine which library in which path is used, since most installations
  87. ## don't change the default search paths.
  88. def searchDynamicLibs(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
  89. if not 'elf' in tags:
  90. return
  91. dynamicres = elfcheck.getDynamicLibs(filename, scandebug)
  92. if dynamicres == {} or dynamicres == None:
  93. return None
  94. if 'needed_libs' in dynamicres:
  95. return (['libs'], dynamicres['needed_libs'])
  96. ## This method determines the architecture of the executable file.
  97. ## This is necessary because sometimes leftovers from different products (and
  98. ## different architectures) can be found in one firmware.
  99. def scanArchitecture(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
  100. if not 'elf' in tags:
  101. return
  102. archres = elfcheck.getArchitecture(filename, tags)
  103. if archres != None:
  104. return (['architecture'], archres)
  105. ## search markers for various open source programs
  106. ## This search is not accurate, but might come in handy in some situations
  107. def searchMarker(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
  108. markerStrings = {
  109. 'loadlin': [ 'Ooops..., size of "setup.S" has become too long for LOADLIN,'
  110. , 'LOADLIN started from $'
  111. ],
  112. 'iptables':[ 'iptables who? (do you need to insmod?)'
  113. , 'Will be implemented real soon. I promise ;)'
  114. , 'can\'t initialize iptables table `%s\': %s'
  115. ],
  116. 'dproxy': [ '# dproxy monitors this file to determine when the machine is'
  117. , '# If you want dproxy to log debug info specify a file here.'
  118. ],
  119. 'ez-ipupdate': [ 'ez-ipupdate Version %s, Copyright (C) 1998-'
  120. , '%s says that your IP address has not changed since the last update'
  121. , 'you must provide either an interface or an address'
  122. ],
  123. 'libusb': [ 'Check that you have permissions to write to %s/%s and, if you don\'t, that you set up hotplug (http://linux-hotplug.sourceforge.net/) correctly.'
  124. , 'usb_os_find_busses: Skipping non bus directory %s'
  125. , 'usb_os_init: couldn\'t find USB VFS in USB_DEVFS_PATH'
  126. ],
  127. 'vsftpd': [ 'vsftpd: version'
  128. , '(vsFTPd '
  129. , 'VSFTPD_LOAD_CONF'
  130. , 'run two copies of vsftpd for IPv4 and IPv6'
  131. ],
  132. 'hostapd': [ 'hostapd v'],
  133. 'wpasupplicant': [ 'wpa_supplicant v'],
  134. 'iproute': [ 'Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }'
  135. , 'tc utility, iproute2-ss%s'
  136. , 'Option "%s" is unknown, try "tc -help".'
  137. ],
  138. 'wireless-tools': [ "Driver has no Wireless Extension version information."
  139. , "Wireless Extension version too old."
  140. , "Wireless-Tools version"
  141. , "Wireless Extension, while we are using version %d."
  142. , "Currently compiled with Wireless Extension v%d."
  143. ],
  144. 'redboot': ["Display RedBoot version information"],
  145. 'uboot': [ "run script starting at addr"
  146. , "Hit any key to stop autoboot: %2d"
  147. , "## Binary (kermit) download aborted"
  148. , "## Ready for binary (ymodem) download "
  149. ]}
  150. res = genericSearch(filename, markerStrings, blacklist)
  151. if res != None:
  152. return (res, res)
  153. '''
  154. ## What actually do these dependencies mean?
  155. ## Are they dependencies of the installer itself, or of the programs that are
  156. ## installed by the installer?
  157. def searchWindowsDependencies(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
  158. ## first determine if we are dealing with a MS Windows executable
  159. ms = magic.open(magic.MAGIC_NONE)
  160. ms.load()
  161. mstype = ms.file(filename)
  162. ms.close()
  163. if not 'PE32 executable for MS Windows' in mstype and not "PE32+ executable for MS Windows" in mstype:
  164. return None
  165. deps = extractor.searchAssemblyDeps(filename)
  166. if deps == None:
  167. return None
  168. if deps == []:
  169. return None
  170. else:
  171. return (['windowsdependencies'], deps)
  172. '''
  173. ## method to extract meta information from PDF files
  174. def scanPDF(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
  175. ## Only consider whole PDF files. If anything has been carved from
  176. ## it, skip it. Blacklists are a good indicator.
  177. if blacklist != []:
  178. return None
  179. if not 'pdf' in tags:
  180. return None
  181. p = subprocess.Popen(['pdfinfo', "%s" % (filename,)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  182. (stanout, stanerr) = p.communicate()
  183. if p.returncode != 0:
  184. return
  185. pdfinfo = {}
  186. pdflines = stanout.rstrip().split("\n")
  187. for pdfline in pdflines:
  188. pdfsplit = pdfline.split(":", 1)
  189. if len(pdfsplit) != 2:
  190. continue
  191. (tag, value) = pdfsplit
  192. if tag == "Title":
  193. pdfinfo['title'] = value.strip()
  194. if tag == "Author":
  195. pdfinfo['author'] = value.strip()
  196. if tag == "Creator":
  197. pdfinfo['creator'] = value.strip()
  198. if tag == "CreationDate":
  199. pdfinfo['creationdate'] = value.strip()
  200. if tag == "Producer":
  201. pdfinfo['producer'] = value.strip()
  202. if tag == "Tagged":
  203. pdfinfo['tagged'] = value.strip()
  204. if tag == "Pages":
  205. pdfinfo['pages'] = int(value.strip())
  206. if tag == "Page size":
  207. pdfinfo['pagesize'] = value.strip()
  208. if tag == "Encrypted":
  209. pdfinfo['encrypted'] = value.strip()
  210. if tag == "Optimized":
  211. pdfinfo['optimized'] = value.strip()
  212. if tag == "PDF version":
  213. pdfinfo['version'] = value.strip()
  214. return (['pdfinfo'], pdfinfo)
  215. ## scan for mentions of licenses
  216. ######################################
  217. ## !!! WARNING WARNING WARNING !!! ###
  218. ######################################
  219. ## This should only be used as an indicator for further investigation,
  220. ## never as proof that a binary is actually licensed under a license!
  221. def scanLicenses(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
  222. licenseidentifiers = {}
  223. ## identifiers for any GNU license (could apply to multiple licenses)
  224. licenseidentifiers['GNU'] = ["General Public License", "http://www.gnu.org/licenses/", "http://gnu.org/licenses/", "http://www.gnu.org/gethelp/", "http://www.gnu.org/software/"]
  225. ## identifiers for a version of GNU GPL
  226. licenseidentifiers['GPL'] = ["http://gnu.org/licenses/gpl.html", "http://www.gnu.org/licenses/gpl.html",
  227. "http://www.gnu.org/licenses/gpl.txt", "http://www.opensource.org/licenses/gpl-license.php",
  228. "http://www.gnu.org/copyleft/gpl.html"]
  229. ## identifiers specifically for GPLv2
  230. licenseidentifiers['GPL-2.0'] = ["http://gnu.org/licenses/gpl-2.0.html", "http://www.gnu.org/licenses/old-licenses/gpl-2.0.html"]
  231. ## identifiers specifically for LGPLv2.1
  232. licenseidentifiers['LGPL-2.1'] = ["http://gnu.org/licenses/old-licenses/lgpl-2.1.html"]
  233. ## identifiers specifically for Apache 2.0
  234. licenseidentifiers['Apache-2.0'] = ["http://www.apache.org/licenses/LICENSE-2.0", "http://opensource.org/licenses/apache2.0.php"]
  235. ## identifiers for MPL license
  236. licenseidentifiers['MPL'] = ["http://www.mozilla.org/MPL/"]
  237. ## identifiers for MIT license
  238. licenseidentifiers['MIT'] = ["http://www.opensource.org/licenses/mit-license.php"]
  239. ## identifiers for BSD license
  240. licenseidentifiers['BSD'] = ["http://www.opensource.org/licenses/bsd-license.php"]
  241. ## identifiers specifically for OpenOffice
  242. licenseidentifiers['OpenOffice'] = ["http://www.openoffice.org/license.html"]
  243. ## identifiers specifically for BitTorrent
  244. licenseidentifiers['BitTorrent'] = ["http://www.bittorrent.com/license/"]
  245. ## identifiers specifically for Tizen
  246. licenseidentifiers['Tizen'] = ["http://www.tizenopensource.org/license"]
  247. ## identifiers specifically for OpenSSL
  248. licenseidentifiers['OpenSSL'] = ["http://www.openssl.org/source/license.html"]
  249. ## identifiers specifically for Boost
  250. licenseidentifiers['BSL-1.0'] = ["http://www.boost.org/LICENSE_1_0.txt", "http://pocoproject.org/license.html"]
  251. ## identifiers specifically for zlib
  252. licenseidentifiers['Zlib'] = ["http://www.zlib.net/zlib_license.html"]
  253. ## identifiers specifically for jQuery
  254. licenseidentifiers['jQuery'] = ["http://jquery.org/license"]
  255. ## identifiers specifically for libxml
  256. licenseidentifiers['libxml'] = ["http://xmlsoft.org/FAQ.html#License"]
  257. ## identifiers specifically for ICU
  258. licenseidentifiers['ICU'] = ["http://source.icu-project.org/repos/icu/icu/trunk/license.html"]
  259. licenseresults = genericSearch(filename, licenseidentifiers, blacklist)
  260. if licenseresults != None:
  261. return (['licenses'], licenseresults)
  262. else:
  263. return None
  264. ## scan for mentions of several forges
  265. ## Some of the URLs of the forges no longer work or are redirected, but they
  266. ## might still pop up in binaries.
  267. def scanForges(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
  268. forgeidentifiers = {}
  269. forgeidentifiers['sourceforge.net'] = ["sourceforge.net"]
  270. forgeidentifiers['freedesktop.org'] = ["http://cvs.freedesktop.org/", "http://cgit.freedesktop.org/"]
  271. forgeidentifiers['code.google.com'] = ["code.google.com", "googlecode.com"]
  272. forgeidentifiers['savannah.gnu.org'] = ["savannah.gnu.org/"]
  273. forgeidentifiers['github.com'] = ["github.com", "github.io"]
  274. forgeidentifiers['bitbucket.org'] = ["bitbucket.org"]
  275. forgeidentifiers['tigris.org'] = ["tigris.org"]
  276. forgeidentifiers['svn.apache.org'] = ["http://svn.apache.org/"]
  277. forgeidentifiers['launchpad.net'] = ["https://git.launchpad.net/", "launchpad.net"]
  278. ## various gits:
  279. ## http://git.fedoraproject.org/git/
  280. ## https://fedorahosted.org/
  281. forgeresults = genericSearch(filename, forgeidentifiers, blacklist)
  282. if forgeresults != None:
  283. return (['forges'], forgeresults)
  284. else:
  285. return None