PageRenderTime 118ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/bacula/src/stored/job.c

https://github.com/halgandd/bacula
C | 427 lines | 286 code | 40 blank | 101 comment | 57 complexity | ebe481a80c2cdce0e88f4a668e13d46d MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0, LGPL-2.1
  1. /*
  2. Bacula® - The Network Backup Solution
  3. Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
  4. The main author of Bacula is Kern Sibbald, with contributions from
  5. many others, a complete list can be found in the file AUTHORS.
  6. This program is Free Software; you can redistribute it and/or
  7. modify it under the terms of version two of the GNU General Public
  8. License as published by the Free Software Foundation and included
  9. in the file LICENSE.
  10. This program is distributed in the hope that it will be useful, but
  11. WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. 02110-1301, USA.
  18. Bacula® is a registered trademark of Kern Sibbald.
  19. The licensor of Bacula is the Free Software Foundation Europe
  20. (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
  21. Switzerland, email:ftf@fsfeurope.org.
  22. */
  23. /*
  24. * Job control and execution for Storage Daemon
  25. *
  26. * Kern Sibbald, MM
  27. *
  28. * Version $Id$
  29. *
  30. */
  31. #include "bacula.h"
  32. #include "stored.h"
  33. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  34. /* Imported variables */
  35. extern uint32_t VolSessionTime;
  36. /* Imported functions */
  37. extern uint32_t newVolSessionId();
  38. extern bool do_mac(JCR *jcr);
  39. /* Requests from the Director daemon */
  40. static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
  41. "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
  42. "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
  43. static char oldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
  44. "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
  45. "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
  46. /* Responses sent to Director daemon */
  47. static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
  48. static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
  49. //static char OK_query[] = "3001 OK query\n";
  50. //static char NO_query[] = "3918 Query failed\n";
  51. //static char BAD_query[] = "3917 Bad query command: %s\n";
  52. /*
  53. * Director requests us to start a job
  54. * Basic tasks done here:
  55. * - We pickup the JobId to be run from the Director.
  56. * - We pickup the device, media, and pool from the Director
  57. * - Wait for a connection from the File Daemon (FD)
  58. * - Accept commands from the FD (i.e. run the job)
  59. * - Return when the connection is terminated or
  60. * there is an error.
  61. */
  62. bool job_cmd(JCR *jcr)
  63. {
  64. int JobId;
  65. char auth_key[100];
  66. char spool_size[30];
  67. char seed[100];
  68. BSOCK *dir = jcr->dir_bsock;
  69. POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
  70. int JobType, level, spool_attributes, no_attributes, spool_data;
  71. int write_part_after_job, PreferMountedVols;
  72. int stat;
  73. JCR *ojcr;
  74. /*
  75. * Get JobId and permissions from Director
  76. */
  77. Dmsg1(100, "<dird: %s", dir->msg);
  78. bstrncpy(spool_size, "0", sizeof(spool_size));
  79. stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
  80. client_name.c_str(),
  81. &JobType, &level, fileset_name.c_str(), &no_attributes,
  82. &spool_attributes, fileset_md5.c_str(), &spool_data,
  83. &write_part_after_job, &PreferMountedVols, spool_size);
  84. if (stat != 14) {
  85. /* Try old version */
  86. stat = sscanf(dir->msg, oldjobcmd, &JobId, job.c_str(), job_name.c_str(),
  87. client_name.c_str(),
  88. &JobType, &level, fileset_name.c_str(), &no_attributes,
  89. &spool_attributes, fileset_md5.c_str(), &spool_data,
  90. &write_part_after_job, &PreferMountedVols);
  91. if (stat != 13) {
  92. pm_strcpy(jcr->errmsg, dir->msg);
  93. dir->fsend(BAD_job, stat, jcr->errmsg);
  94. Dmsg1(100, ">dird: %s", dir->msg);
  95. set_jcr_job_status(jcr, JS_ErrorTerminated);
  96. return false;
  97. }
  98. }
  99. /*
  100. * Since this job could be rescheduled, we
  101. * check to see if we have it already. If so
  102. * free the old jcr and use the new one.
  103. */
  104. ojcr = get_jcr_by_full_name(job.c_str());
  105. if (ojcr && !ojcr->authenticated) {
  106. Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
  107. free_jcr(ojcr);
  108. }
  109. jcr->JobId = JobId;
  110. jcr->VolSessionId = newVolSessionId();
  111. jcr->VolSessionTime = VolSessionTime;
  112. bstrncpy(jcr->Job, job, sizeof(jcr->Job));
  113. unbash_spaces(job_name);
  114. jcr->job_name = get_pool_memory(PM_NAME);
  115. pm_strcpy(jcr->job_name, job_name);
  116. unbash_spaces(client_name);
  117. jcr->client_name = get_pool_memory(PM_NAME);
  118. pm_strcpy(jcr->client_name, client_name);
  119. unbash_spaces(fileset_name);
  120. jcr->fileset_name = get_pool_memory(PM_NAME);
  121. pm_strcpy(jcr->fileset_name, fileset_name);
  122. jcr->set_JobType(JobType);
  123. jcr->set_JobLevel(level);
  124. jcr->no_attributes = no_attributes;
  125. jcr->spool_attributes = spool_attributes;
  126. jcr->spool_data = spool_data;
  127. jcr->spool_size = str_to_int64(spool_size);
  128. jcr->write_part_after_job = write_part_after_job;
  129. jcr->fileset_md5 = get_pool_memory(PM_NAME);
  130. pm_strcpy(jcr->fileset_md5, fileset_md5);
  131. jcr->PreferMountedVols = PreferMountedVols;
  132. jcr->authenticated = false;
  133. jcr->need_fd = true;
  134. /*
  135. * Pass back an authorization key for the File daemon
  136. */
  137. bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
  138. make_session_key(auth_key, seed, 1);
  139. dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
  140. Dmsg2(50, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
  141. jcr->sd_auth_key = bstrdup(auth_key);
  142. memset(auth_key, 0, sizeof(auth_key));
  143. generate_daemon_event(jcr, "JobStart");
  144. return true;
  145. }
  146. bool run_cmd(JCR *jcr)
  147. {
  148. struct timeval tv;
  149. struct timezone tz;
  150. struct timespec timeout;
  151. int errstat = 0;
  152. Dsm_check(1);
  153. Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
  154. /* If we do not need the FD, we are doing a migrate, copy, or virtual
  155. * backup.
  156. */
  157. if (!jcr->need_fd) {
  158. do_mac(jcr);
  159. return false;
  160. }
  161. set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
  162. dir_send_job_status(jcr);
  163. gettimeofday(&tv, &tz);
  164. timeout.tv_nsec = tv.tv_usec * 1000;
  165. timeout.tv_sec = tv.tv_sec + me->client_wait;
  166. Dmsg3(50, "%s waiting %d sec for FD to contact SD key=%s\n",
  167. jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
  168. /*
  169. * Wait for the File daemon to contact us to start the Job,
  170. * when he does, we will be released, unless the 30 minutes
  171. * expires.
  172. */
  173. P(mutex);
  174. while ( !jcr->authenticated && !job_canceled(jcr) ) {
  175. errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
  176. if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
  177. break;
  178. }
  179. }
  180. Dmsg3(50, "Auth=%d canceled=%d errstat=%d\n", jcr->authenticated,
  181. job_canceled(jcr), errstat);
  182. V(mutex);
  183. memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
  184. if (jcr->authenticated && !job_canceled(jcr)) {
  185. Dmsg1(50, "Running job %s\n", jcr->Job);
  186. run_job(jcr); /* Run the job */
  187. }
  188. return false;
  189. }
  190. /*
  191. * After receiving a connection (in dircmd.c) if it is
  192. * from the File daemon, this routine is called.
  193. */
  194. void handle_filed_connection(BSOCK *fd, char *job_name)
  195. {
  196. JCR *jcr;
  197. /*
  198. * With the following bmicrosleep on, running the
  199. * SD under the debugger fails.
  200. */
  201. // bmicrosleep(0, 50000); /* wait 50 millisecs */
  202. if (!(jcr=get_jcr_by_full_name(job_name))) {
  203. Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
  204. Dmsg1(3, "**** Job \"%s\" not found.\n", job_name);
  205. fd->close();
  206. return;
  207. }
  208. Dmsg1(50, "Found Job %s\n", job_name);
  209. if (jcr->authenticated) {
  210. Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
  211. (uint32_t)jcr->JobId, jcr->Job);
  212. Dmsg2(50, "Hey!!!! JobId %u Job %s already authenticated.\n",
  213. (uint32_t)jcr->JobId, jcr->Job);
  214. fd->close();
  215. free_jcr(jcr);
  216. return;
  217. }
  218. jcr->file_bsock = fd;
  219. jcr->file_bsock->set_jcr(jcr);
  220. /*
  221. * Authenticate the File daemon
  222. */
  223. if (jcr->authenticated || !authenticate_filed(jcr)) {
  224. Dmsg1(50, "Authentication failed Job %s\n", jcr->Job);
  225. Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
  226. } else {
  227. jcr->authenticated = true;
  228. Dmsg2(50, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
  229. }
  230. if (!jcr->authenticated) {
  231. set_jcr_job_status(jcr, JS_ErrorTerminated);
  232. }
  233. pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
  234. free_jcr(jcr);
  235. return;
  236. }
  237. #ifdef needed
  238. /*
  239. * Query Device command from Director
  240. * Sends Storage Daemon's information on the device to the
  241. * caller (presumably the Director).
  242. * This command always returns "true" so that the line is
  243. * not closed on an error.
  244. *
  245. */
  246. bool query_cmd(JCR *jcr)
  247. {
  248. POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
  249. BSOCK *dir = jcr->dir_bsock;
  250. DEVRES *device;
  251. AUTOCHANGER *changer;
  252. bool ok;
  253. Dmsg1(100, "Query_cmd: %s", dir->msg);
  254. ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
  255. Dmsg1(100, "<dird: %s\n", dir->msg);
  256. if (ok) {
  257. unbash_spaces(dev_name);
  258. foreach_res(device, R_DEVICE) {
  259. /* Find resource, and make sure we were able to open it */
  260. if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
  261. if (!device->dev) {
  262. device->dev = init_dev(jcr, device);
  263. }
  264. if (!device->dev) {
  265. break;
  266. }
  267. ok = dir_update_device(jcr, device->dev);
  268. if (ok) {
  269. ok = dir->fsend(OK_query);
  270. } else {
  271. dir->fsend(NO_query);
  272. }
  273. return ok;
  274. }
  275. }
  276. foreach_res(changer, R_AUTOCHANGER) {
  277. /* Find resource, and make sure we were able to open it */
  278. if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
  279. if (!changer->device || changer->device->size() == 0) {
  280. continue; /* no devices */
  281. }
  282. ok = dir_update_changer(jcr, changer);
  283. if (ok) {
  284. ok = dir->fsend(OK_query);
  285. } else {
  286. dir->fsend(NO_query);
  287. }
  288. return ok;
  289. }
  290. }
  291. /* If we get here, the device/autochanger was not found */
  292. unbash_spaces(dir->msg);
  293. pm_strcpy(jcr->errmsg, dir->msg);
  294. dir->fsend(NO_device, dev_name.c_str());
  295. Dmsg1(100, ">dird: %s\n", dir->msg);
  296. } else {
  297. unbash_spaces(dir->msg);
  298. pm_strcpy(jcr->errmsg, dir->msg);
  299. dir->fsend(BAD_query, jcr->errmsg);
  300. Dmsg1(100, ">dird: %s\n", dir->msg);
  301. }
  302. return true;
  303. }
  304. #endif
  305. /*
  306. * Destroy the Job Control Record and associated
  307. * resources (sockets).
  308. */
  309. void stored_free_jcr(JCR *jcr)
  310. {
  311. Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
  312. if (jcr->file_bsock) {
  313. jcr->file_bsock->close();
  314. jcr->file_bsock = NULL;
  315. }
  316. if (jcr->job_name) {
  317. free_pool_memory(jcr->job_name);
  318. }
  319. if (jcr->client_name) {
  320. free_memory(jcr->client_name);
  321. jcr->client_name = NULL;
  322. }
  323. if (jcr->fileset_name) {
  324. free_memory(jcr->fileset_name);
  325. }
  326. if (jcr->fileset_md5) {
  327. free_memory(jcr->fileset_md5);
  328. }
  329. if (jcr->bsr) {
  330. free_bsr(jcr->bsr);
  331. jcr->bsr = NULL;
  332. }
  333. /* Free any restore volume list created */
  334. free_restore_volume_list(jcr);
  335. if (jcr->RestoreBootstrap) {
  336. unlink(jcr->RestoreBootstrap);
  337. free_pool_memory(jcr->RestoreBootstrap);
  338. jcr->RestoreBootstrap = NULL;
  339. }
  340. if (jcr->next_dev || jcr->prev_dev) {
  341. Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
  342. }
  343. pthread_cond_destroy(&jcr->job_start_wait);
  344. if (jcr->dcrs) {
  345. delete jcr->dcrs;
  346. }
  347. jcr->dcrs = NULL;
  348. /* Avoid a double free */
  349. if (jcr->dcr == jcr->read_dcr) {
  350. jcr->read_dcr = NULL;
  351. }
  352. if (jcr->dcr) {
  353. free_dcr(jcr->dcr);
  354. jcr->dcr = NULL;
  355. }
  356. if (jcr->read_dcr) {
  357. free_dcr(jcr->read_dcr);
  358. jcr->read_dcr = NULL;
  359. }
  360. if (jcr->read_store) {
  361. DIRSTORE *store;
  362. foreach_alist(store, jcr->read_store) {
  363. delete store->device;
  364. delete store;
  365. }
  366. delete jcr->read_store;
  367. jcr->read_store = NULL;
  368. }
  369. if (jcr->write_store) {
  370. DIRSTORE *store;
  371. foreach_alist(store, jcr->write_store) {
  372. delete store->device;
  373. delete store;
  374. }
  375. delete jcr->write_store;
  376. jcr->write_store = NULL;
  377. }
  378. Dsm_check(1);
  379. if (jcr->JobId != 0)
  380. write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));
  381. return;
  382. }