PageRenderTime 53ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/preprocess/worker_process.py

https://bitbucket.org/oebeling/tracectory
Python | 190 lines | 153 code | 22 blank | 15 comment | 17 complexity | 12656abe04b6dfdb8d44a85cea50a166 MD5 | raw file
  1. # This file implements the worker processes, which take in lines of trace input,
  2. # parse it, disassemble the instruction using the memory dump and produce a description
  3. # of the dataflow of this instruction. This description is sent to the result sink.
  4. import sys
  5. sys.path.append(".")
  6. sys.path.append("./src")
  7. import os
  8. from data_sources import *
  9. from threading import Lock
  10. from time import time as systemtime
  11. import time
  12. from analysis_funcs import *
  13. from EffectAnalyzer import *
  14. #saveName = "t206"
  15. import multiprocessing
  16. import traceback
  17. from pymongo import Connection as MongoClient
  18. client = MongoClient()
  19. db = None
  20. try:
  21. import simplejson as json
  22. except ImportError:
  23. import json
  24. def log(s): print time.strftime("%b %d %Y %H:%M:%S"), "[worker, pid=%d]" % os.getpid(), s
  25. def normalize(x):
  26. if "numpy" in str(x.__class__):
  27. return int(x)
  28. return x
  29. def analyzeInstruction(eip, regs, memorySpaceStream):
  30. global affectCache, instrCache
  31. # Disassemble the instruction and get its expression object.
  32. # (We use cached values, if available)
  33. if instrCache.has_key(eip):
  34. affects = affectCache[eip]
  35. instr = instrCache[eip]
  36. else:
  37. instr = asmbloc.dis_i(x86_mn,memorySpaceStream, eip, symbol_pool)
  38. origAffects = get_instr_expr(instr, instr.getnextflow(), [])
  39. affects = []
  40. for a in origAffects:
  41. affects += processAffect(a)
  42. affectCache[eip] = affects
  43. instrCache[eip] = instr
  44. engineVersion = 2
  45. try:
  46. changeMatrix = buildMatrix_new(affects, regs)
  47. except:
  48. changeMatrix = None
  49. #print traceback.format_exc()
  50. if not suppressErrors:
  51. raise
  52. #Use old engine
  53. if changeMatrix is None:
  54. changeMatrix = buildMatrix_old(affects, regs)
  55. engineVersion = 1
  56. if changeMatrix is None: return None
  57. #Normalize the change matrix
  58. newMatrix = {}
  59. for key,value in changeMatrix.items():
  60. k = normalize(key)
  61. newMatrix[k] = list([normalize(x) for x in value])
  62. #Build a record of the instruction and return it
  63. record = { 'PC' : eip,
  64. 'disassembly' : str(instr),
  65. 'engineVersion' : engineVersion,
  66. 'regs' : regs,
  67. 'changes' : newMatrix }
  68. return record
  69. def processLine(line):
  70. #Parse the line
  71. eipData = line[6:line.find(" ",6)]
  72. if eipData.startswith("-"): return None
  73. try:
  74. eip = int(eipData, 16)
  75. except ValueError:
  76. #print "Warning: Couldn't parse %s\n" % eipData
  77. return None
  78. regData = line[line.find("EAX="):].split(",")
  79. regs = {}
  80. for val in regData:
  81. reg, value = val.split("=")
  82. regs[reg.strip()] = int(value,16)
  83. #Parse the info to actual analyzer
  84. return analyzeInstruction(eip, regs, memorySpace)
  85. def subprocessInit(dumpFile, useNewEngine, doSuppressErrors):
  86. global memorySpace, in_str
  87. global affectCache, instrCache
  88. global newEngine, suppressErrors
  89. newEngine = useNewEngine
  90. suppressErrors = doSuppressErrors
  91. affectCache = {}
  92. instrCache = {}
  93. log("Loading memory dump %s " % dumpFile)
  94. memorySpace = FossileStream(dumpFile)
  95. in_str = bin_stream_file(memorySpace)
  96. import zmq
  97. def main():
  98. #Receive the tasks (trace lines)
  99. context = zmq.Context.instance()
  100. receive = context.socket(zmq.PULL)
  101. receive.connect("tcp://127.0.0.1:5555")
  102. #Receive control commands from the ventilator
  103. workerControl = context.socket(zmq.SUB)
  104. workerControl.connect("tcp://127.0.0.1:5559")
  105. workerControl.setsockopt(zmq.SUBSCRIBE, "")
  106. readySignalSend = context.socket(zmq.PUSH)
  107. readySignalSend.connect("tcp://127.0.0.1:5557")
  108. poller = zmq.Poller()
  109. poller.register(workerControl, zmq.POLLIN)
  110. poller.register(receive, zmq.POLLIN)
  111. while True:
  112. socks = dict(poller.poll())
  113. #Did we get a control message?
  114. if socks.get(workerControl) == zmq.POLLIN:
  115. message = workerControl.recv_json()
  116. log("got control message: " + json.dumps(message))
  117. if message['type'] == 'loadDump':
  118. #Open the dump
  119. dump = message['dump']
  120. subprocessInit(dump, True, True)
  121. #open the database
  122. db = client[message['db']]
  123. elif message['type'] == 'shutdown':
  124. break
  125. else:
  126. raise ValueError, "Unknown workerControl message"
  127. #Did we get a task?
  128. if socks.get(receive) == zmq.POLLIN:
  129. curTime, curLine = receive.recv_json()
  130. if curTime == -1:
  131. #Pass-through message
  132. readySignalSend.send_json( { 'quit' : 1 , 'pid' : os.getpid()} )
  133. break
  134. record = None
  135. try:
  136. record = processLine(curLine)
  137. except:
  138. log("Exception while processing t=%d" % curTime)
  139. if record is not None:
  140. record['time'] = curTime
  141. record = json.loads(json.dumps(record)) #XXX: Hack, converts keys to string
  142. db.instructions.insert(record)
  143. #Add indexes to DB as well
  144. if record.has_key("changes") and record['changes'] is not None:
  145. for key,value in record['changes'].items():
  146. if str(key).isdigit():
  147. db.writes.insert( {'addr': int(key), 'time' : curTime})
  148. for v in value:
  149. if str(v).isdigit():
  150. db.reads.insert( {'addr': int(v), 'time' : curTime})
  151. if __name__ == '__main__':
  152. if len(sys.argv)==3:
  153. subprocessInit(sys.argv[1], True, False)
  154. print processLine(sys.argv[2])
  155. else:
  156. main()