PageRenderTime 89ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/selfdrive/manager.py

https://gitlab.com/jerdog/openpilot
Python | 358 lines | 259 code | 61 blank | 38 comment | 73 complexity | 992494a95d2278f8fbf7775c1ea3e177 MD5 | raw file
  1. #!/usr/bin/env python
  2. import os
  3. import sys
  4. import time
  5. import importlib
  6. import subprocess
  7. import signal
  8. import traceback
  9. import usb1
  10. from multiprocessing import Process
  11. from common.services import service_list
  12. import zmq
  13. from setproctitle import setproctitle
  14. from selfdrive.swaglog import cloudlog
  15. import selfdrive.messaging as messaging
  16. from selfdrive.thermal import read_thermal
  17. from selfdrive.registration import register
  18. import common.crash
  19. from common.params import Params
  20. from selfdrive.loggerd.config import ROOT
  21. # comment out anything you don't want to run
  22. managed_processes = {
  23. "uploader": "selfdrive.loggerd.uploader",
  24. "controlsd": "selfdrive.controls.controlsd",
  25. "radard": "selfdrive.controls.radard",
  26. "calibrationd": "selfdrive.calibrationd.calibrationd",
  27. "loggerd": "selfdrive.loggerd.loggerd",
  28. "logmessaged": "selfdrive.logmessaged",
  29. "logcatd": ("logcatd", ["./logcatd"]),
  30. "boardd": ("boardd", ["./boardd"]), # switch to c++ boardd
  31. "ui": ("ui", ["./ui"]),
  32. "visiond": ("visiond", ["./visiond"]),
  33. "sensord": ("sensord", ["./sensord"]), }
  34. running = {}
  35. # due to qualcomm kernel bugs SIGKILLing visiond sometimes causes page table corruption
  36. unkillable_processes = ['visiond']
  37. # processes to end with SIGINT instead of SIGTERM
  38. interrupt_processes = ['loggerd']
  39. car_started_processes = ['controlsd', 'loggerd', 'sensord', 'radard', 'calibrationd', 'visiond']
  40. def register_managed_process(name, desc, car_started=False):
  41. global managed_processes, car_started_processes
  42. print "registering", name
  43. managed_processes[name] = desc
  44. if car_started:
  45. car_started_processes.append(name)
  46. # ****************** process management functions ******************
  47. def launcher(proc, gctx):
  48. try:
  49. # import the process
  50. mod = importlib.import_module(proc)
  51. # rename the process
  52. setproctitle(proc)
  53. # exec the process
  54. mod.main(gctx)
  55. except KeyboardInterrupt:
  56. cloudlog.info("child %s got ctrl-c" % proc)
  57. except Exception:
  58. # can't install the crash handler becuase sys.excepthook doesn't play nice
  59. # with threads, so catch it here.
  60. common.crash.capture_exception()
  61. raise
  62. def nativelauncher(pargs, cwd):
  63. # exec the process
  64. os.chdir(cwd)
  65. # because when extracted from pex zips permissions get lost -_-
  66. os.chmod(pargs[0], 0o700)
  67. os.execvp(pargs[0], pargs)
  68. def start_managed_process(name):
  69. if name in running or name not in managed_processes:
  70. return
  71. proc = managed_processes[name]
  72. if isinstance(proc, basestring):
  73. cloudlog.info("starting python %s" % proc)
  74. running[name] = Process(name=name, target=launcher, args=(proc, gctx))
  75. else:
  76. pdir, pargs = proc
  77. cwd = os.path.dirname(os.path.realpath(__file__))
  78. if pdir is not None:
  79. cwd = os.path.join(cwd, pdir)
  80. cloudlog.info("starting process %s" % name)
  81. running[name] = Process(name=name, target=nativelauncher, args=(pargs, cwd))
  82. running[name].start()
  83. def kill_managed_process(name):
  84. if name not in running or name not in managed_processes:
  85. return
  86. cloudlog.info("killing %s" % name)
  87. if running[name].exitcode is None:
  88. if name in interrupt_processes:
  89. os.kill(running[name].pid, signal.SIGINT)
  90. else:
  91. running[name].terminate()
  92. # give it 5 seconds to die
  93. running[name].join(5.0)
  94. if running[name].exitcode is None:
  95. if name in unkillable_processes:
  96. cloudlog.critical("unkillable process %s failed to exit! rebooting in 15 if it doesn't die" % name)
  97. running[name].join(15.0)
  98. if running[name].exitcode is None:
  99. cloudlog.critical("FORCE REBOOTING PHONE!")
  100. os.system("date > /sdcard/unkillable_reboot")
  101. os.system("reboot")
  102. raise RuntimeError
  103. else:
  104. cloudlog.info("killing %s with SIGKILL" % name)
  105. os.kill(running[name].pid, signal.SIGKILL)
  106. running[name].join()
  107. cloudlog.info("%s is dead with %d" % (name, running[name].exitcode))
  108. del running[name]
  109. def cleanup_all_processes(signal, frame):
  110. cloudlog.info("caught ctrl-c %s %s" % (signal, frame))
  111. for name in running.keys():
  112. kill_managed_process(name)
  113. sys.exit(0)
  114. # ****************** run loop ******************
  115. def manager_init():
  116. global gctx
  117. reg_res = register()
  118. if reg_res:
  119. dongle_id, dongle_secret = reg_res
  120. else:
  121. raise Exception("server registration failed")
  122. # set dongle id
  123. cloudlog.info("dongle id is " + dongle_id)
  124. os.environ['DONGLE_ID'] = dongle_id
  125. version = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "common", "version.h")).read().split('"')[1]
  126. cloudlog.bind_global(dongle_id=dongle_id, version=version)
  127. common.crash.bind_user(dongle_id=dongle_id, version=version)
  128. # set gctx
  129. gctx = {
  130. "calibration": {
  131. "initial_homography": [1.15728010e+00, -4.69379619e-02, 7.46450623e+01,
  132. 7.99253014e-02, 1.06372458e+00, 5.77762553e+01,
  133. 9.35543519e-05, -1.65429898e-04, 9.98062699e-01]
  134. }
  135. }
  136. def manager_thread():
  137. # now loop
  138. context = zmq.Context()
  139. thermal_sock = messaging.pub_sock(context, service_list['thermal'].port)
  140. health_sock = messaging.sub_sock(context, service_list['health'].port)
  141. cloudlog.info("manager start")
  142. cloudlog.info(dict(os.environ))
  143. start_managed_process("logmessaged")
  144. start_managed_process("logcatd")
  145. start_managed_process("uploader")
  146. start_managed_process("ui")
  147. panda = False
  148. if os.getenv("NOBOARD") is None:
  149. # *** wait for the board ***
  150. panda = wait_for_device() == 0x2300
  151. # flash the device
  152. if os.getenv("NOPROG") is None:
  153. boarddir = os.path.dirname(os.path.abspath(__file__))+"/../board/"
  154. mkfile = "Makefile.panda" if panda else "Makefile"
  155. print "using", mkfile
  156. os.system("cd %s && make -f %s" % (boarddir, mkfile))
  157. start_managed_process("boardd")
  158. started = False
  159. logger_dead = False
  160. count = 0
  161. # set 5 second timeout on health socket
  162. # 5x slower than expected
  163. health_sock.RCVTIMEO = 5000
  164. while 1:
  165. # get health of board, log this in "thermal"
  166. td = messaging.recv_sock(health_sock, wait=True)
  167. print td
  168. # replace thermald
  169. msg = read_thermal()
  170. # loggerd is gated based on free space
  171. statvfs = os.statvfs(ROOT)
  172. avail = (statvfs.f_bavail * 1.0)/statvfs.f_blocks
  173. # thermal message now also includes free space
  174. msg.thermal.freeSpace = avail
  175. with open("/sys/class/power_supply/battery/capacity") as f:
  176. msg.thermal.batteryPercent = int(f.read())
  177. with open("/sys/class/power_supply/battery/status") as f:
  178. msg.thermal.batteryStatus = f.read().strip()
  179. thermal_sock.send(msg.to_bytes())
  180. print msg
  181. # TODO: add car battery voltage check
  182. max_temp = max(msg.thermal.cpu0, msg.thermal.cpu1,
  183. msg.thermal.cpu2, msg.thermal.cpu3) / 10.0
  184. # uploader is gated based on the phone temperature
  185. if max_temp > 85.0:
  186. cloudlog.info("over temp: %r", max_temp)
  187. kill_managed_process("uploader")
  188. elif max_temp < 70.0:
  189. start_managed_process("uploader")
  190. if avail < 0.05:
  191. logger_dead = True
  192. # start constellation of processes when the car starts
  193. # with 2% left, we killall, otherwise the phone is bricked
  194. if td is not None and td.health.started and avail > 0.02:
  195. if not started:
  196. Params().car_start()
  197. started = True
  198. for p in car_started_processes:
  199. if p == "loggerd" and logger_dead:
  200. kill_managed_process(p)
  201. else:
  202. start_managed_process(p)
  203. else:
  204. started = False
  205. logger_dead = False
  206. for p in car_started_processes:
  207. kill_managed_process(p)
  208. # shutdown if the battery gets lower than 10%, we aren't running, and we are discharging
  209. if msg.thermal.batteryPercent < 5 and msg.thermal.batteryStatus == "Discharging":
  210. os.system('LD_LIBRARY_PATH="" svc power shutdown')
  211. # check the status of all processes, did any of them die?
  212. for p in running:
  213. cloudlog.debug(" running %s %s" % (p, running[p]))
  214. # report to server once per minute
  215. if (count%60) == 0:
  216. cloudlog.event("STATUS_PACKET",
  217. running=running.keys(),
  218. count=count,
  219. health=(td.to_dict() if td else None),
  220. thermal=msg.to_dict())
  221. count += 1
  222. # optional, build the c++ binaries and preimport the python for speed
  223. def manager_prepare():
  224. # build cereal first
  225. subprocess.check_call(["make", "-j4"], cwd="../cereal")
  226. os.chdir(os.path.dirname(os.path.abspath(__file__)))
  227. for p in managed_processes:
  228. proc = managed_processes[p]
  229. if isinstance(proc, basestring):
  230. # import this python
  231. cloudlog.info("preimporting %s" % proc)
  232. importlib.import_module(proc)
  233. else:
  234. # build this process
  235. cloudlog.info("building %s" % (proc,))
  236. try:
  237. subprocess.check_call(["make", "-j4"], cwd=proc[0])
  238. except subprocess.CalledProcessError:
  239. # make clean if the build failed
  240. cloudlog.info("building %s failed, make clean" % (proc, ))
  241. subprocess.check_call(["make", "clean"], cwd=proc[0])
  242. subprocess.check_call(["make", "-j4"], cwd=proc[0])
  243. def wait_for_device():
  244. while 1:
  245. try:
  246. context = usb1.USBContext()
  247. for device in context.getDeviceList(skip_on_error=True):
  248. if (device.getVendorID() == 0xbbaa and device.getProductID() == 0xddcc) or \
  249. (device.getVendorID() == 0x0483 and device.getProductID() == 0xdf11):
  250. bcd = device.getbcdDevice()
  251. handle = device.open()
  252. handle.claimInterface(0)
  253. cloudlog.info("found board")
  254. handle.close()
  255. return bcd
  256. except Exception as e:
  257. print "exception", e,
  258. print "waiting..."
  259. time.sleep(1)
  260. def main():
  261. if os.getenv("NOLOG") is not None:
  262. del managed_processes['loggerd']
  263. if os.getenv("NOUPLOAD") is not None:
  264. del managed_processes['uploader']
  265. if os.getenv("NOVISION") is not None:
  266. del managed_processes['visiond']
  267. if os.getenv("NOBOARD") is not None:
  268. del managed_processes['boardd']
  269. if os.getenv("LEAN") is not None:
  270. del managed_processes['uploader']
  271. del managed_processes['loggerd']
  272. del managed_processes['logmessaged']
  273. del managed_processes['logcatd']
  274. if os.getenv("NOCONTROL") is not None:
  275. del managed_processes['controlsd']
  276. del managed_processes['radard']
  277. # support additional internal only extensions
  278. try:
  279. import selfdrive.manager_extensions
  280. selfdrive.manager_extensions.register(register_managed_process)
  281. except ImportError:
  282. pass
  283. params = Params()
  284. params.manager_start()
  285. manager_init()
  286. manager_prepare()
  287. if os.getenv("PREPAREONLY") is not None:
  288. sys.exit(0)
  289. try:
  290. manager_thread()
  291. except Exception:
  292. traceback.print_exc()
  293. common.crash.capture_exception()
  294. finally:
  295. cleanup_all_processes(None, None)
  296. if __name__ == "__main__":
  297. main()