/src/plugins/sched/wiki2/job_modify.c

https://github.com/cfenoy/slurm · C · 615 lines · 508 code · 39 blank · 68 comment · 150 complexity · 5d0dc709103f4cd9ae817127e7520b02 MD5 · raw file

  1. /*****************************************************************************\
  2. * job_modify.c - Process Wiki job modify request
  3. *****************************************************************************
  4. * Copyright (C) 2006-2007 The Regents of the University of California.
  5. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  6. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  7. * Written by Morris Jette <jette1@llnl.gov>
  8. * CODE-OCEC-09-009. All rights reserved.
  9. *
  10. * This file is part of SLURM, a resource management program.
  11. * For details, see <http://www.schedmd.com/slurmdocs/>.
  12. * Please also read the included file: DISCLAIMER.
  13. *
  14. * SLURM is free software; you can redistribute it and/or modify it under
  15. * the terms of the GNU General Public License as published by the Free
  16. * Software Foundation; either version 2 of the License, or (at your option)
  17. * any later version.
  18. *
  19. * In addition, as a special exception, the copyright holders give permission
  20. * to link the code of portions of this program with the OpenSSL library under
  21. * certain conditions as described in each individual source file, and
  22. * distribute linked combinations including the two. You must obey the GNU
  23. * General Public License in all respects for all of the code used other than
  24. * OpenSSL. If you modify file(s) with this exception, you may extend this
  25. * exception to your version of the file(s), but you are not obligated to do
  26. * so. If you do not wish to do so, delete this exception statement from your
  27. * version. If you delete this exception statement from all source files in
  28. * the program, then also delete it here.
  29. *
  30. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  31. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  32. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  33. * details.
  34. *
  35. * You should have received a copy of the GNU General Public License along
  36. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  37. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  38. \*****************************************************************************/
  39. #include "./msg.h"
  40. #include <strings.h>
  41. #include "src/common/gres.h"
  42. #include "src/common/node_select.h"
  43. #include "src/common/slurm_accounting_storage.h"
  44. #include "src/slurmctld/job_scheduler.h"
  45. #include "src/slurmctld/locks.h"
  46. #include "src/slurmctld/slurmctld.h"
  47. /* Given a string, replace the first space found with '\0' */
  48. extern void null_term(char *str)
  49. {
  50. char *tmp_ptr;
  51. for (tmp_ptr=str; ; tmp_ptr++) {
  52. if (tmp_ptr[0] == '\0')
  53. break;
  54. if (isspace(tmp_ptr[0])) {
  55. tmp_ptr[0] = '\0';
  56. break;
  57. }
  58. }
  59. }
  60. static int _job_modify(uint32_t jobid, char *bank_ptr,
  61. char *depend_ptr, char *new_hostlist,
  62. uint32_t new_node_cnt, char *part_name_ptr,
  63. uint32_t new_time_limit, char *name_ptr,
  64. char *start_ptr, char *feature_ptr, char *env_ptr,
  65. char *comment_ptr, char *gres_ptr, char *wckey_ptr)
  66. {
  67. struct job_record *job_ptr;
  68. time_t now = time(NULL);
  69. bool update_accounting = false;
  70. job_ptr = find_job_record(jobid);
  71. if (job_ptr == NULL) {
  72. error("wiki: MODIFYJOB has invalid jobid %u", jobid);
  73. return ESLURM_INVALID_JOB_ID;
  74. }
  75. if (IS_JOB_FINISHED(job_ptr) || (job_ptr->details == NULL)) {
  76. info("wiki: MODIFYJOB jobid %u is finished", jobid);
  77. return ESLURM_DISABLED;
  78. }
  79. if (comment_ptr) {
  80. info("wiki: change job %u comment %s", jobid, comment_ptr);
  81. xfree(job_ptr->comment);
  82. job_ptr->comment = xstrdup(comment_ptr);
  83. last_job_update = now;
  84. }
  85. if (depend_ptr) {
  86. int rc = update_job_dependency(job_ptr, depend_ptr);
  87. if (rc == SLURM_SUCCESS) {
  88. info("wiki: changed job %u dependency to %s",
  89. jobid, depend_ptr);
  90. } else {
  91. error("wiki: changing job %u dependency to %s",
  92. jobid, depend_ptr);
  93. return EINVAL;
  94. }
  95. }
  96. if (env_ptr) {
  97. bool have_equal = false;
  98. char old_sep[1];
  99. int begin = 0, i;
  100. if (job_ptr->batch_flag == 0) {
  101. error("wiki: attempt to set environment variables "
  102. "for non-batch job %u", jobid);
  103. return ESLURM_DISABLED;
  104. }
  105. for (i=0; ; i++) {
  106. if (env_ptr[i] == '=') {
  107. if (have_equal) {
  108. error("wiki: setting job %u invalid "
  109. "environment variables: %s",
  110. jobid, env_ptr);
  111. return EINVAL;
  112. }
  113. have_equal = true;
  114. if (env_ptr[i+1] == '\"') {
  115. for (i+=2; ; i++) {
  116. if (env_ptr[i] == '\0') {
  117. error("wiki: setting job %u "
  118. "invalid environment "
  119. "variables: %s",
  120. jobid, env_ptr);
  121. return EINVAL;
  122. }
  123. if (env_ptr[i] == '\"') {
  124. i++;
  125. break;
  126. }
  127. if (env_ptr[i] == '\\') {
  128. i++;
  129. }
  130. }
  131. } else if (env_ptr[i+1] == '\'') {
  132. for (i+=2; ; i++) {
  133. if (env_ptr[i] == '\0') {
  134. error("wiki: setting job %u "
  135. "invalid environment "
  136. "variables: %s",
  137. jobid, env_ptr);
  138. return EINVAL;
  139. }
  140. if (env_ptr[i] == '\'') {
  141. i++;
  142. break;
  143. }
  144. if (env_ptr[i] == '\\') {
  145. i++;
  146. }
  147. }
  148. }
  149. }
  150. if (isspace(env_ptr[i]) || (env_ptr[i] == ',')) {
  151. if (!have_equal) {
  152. error("wiki: setting job %u invalid "
  153. "environment variables: %s",
  154. jobid, env_ptr);
  155. return EINVAL;
  156. }
  157. old_sep[0] = env_ptr[i];
  158. env_ptr[i] = '\0';
  159. xrealloc(job_ptr->details->env_sup,
  160. sizeof(char *) *
  161. (job_ptr->details->env_cnt+1));
  162. job_ptr->details->env_sup
  163. [job_ptr->details->env_cnt++] =
  164. xstrdup(&env_ptr[begin]);
  165. info("wiki: for job %u add env: %s",
  166. jobid, &env_ptr[begin]);
  167. env_ptr[i] = old_sep[0];
  168. if (isspace(old_sep[0]))
  169. break;
  170. begin = i + 1;
  171. have_equal = false;
  172. }
  173. }
  174. }
  175. if (new_time_limit) {
  176. time_t old_time = job_ptr->time_limit;
  177. job_ptr->time_limit = new_time_limit;
  178. info("wiki: change job %u time_limit to %u",
  179. jobid, new_time_limit);
  180. /* Update end_time based upon change
  181. * to preserve suspend time info */
  182. job_ptr->end_time = job_ptr->end_time +
  183. ((job_ptr->time_limit -
  184. old_time) * 60);
  185. last_job_update = now;
  186. }
  187. if (bank_ptr &&
  188. (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) {
  189. return EINVAL;
  190. }
  191. if (feature_ptr) {
  192. if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
  193. info("wiki: change job %u features to %s",
  194. jobid, feature_ptr);
  195. job_ptr->details->features = xstrdup(feature_ptr);
  196. last_job_update = now;
  197. } else {
  198. error("wiki: MODIFYJOB features of non-pending "
  199. "job %u", jobid);
  200. return ESLURM_DISABLED;
  201. }
  202. }
  203. if (start_ptr) {
  204. char *end_ptr;
  205. uint32_t begin_time = strtol(start_ptr, &end_ptr, 10);
  206. if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
  207. info("wiki: change job %u begin time to %u",
  208. jobid, begin_time);
  209. job_ptr->details->begin_time = begin_time;
  210. last_job_update = now;
  211. update_accounting = true;
  212. } else {
  213. error("wiki: MODIFYJOB begin_time of non-pending "
  214. "job %u", jobid);
  215. return ESLURM_DISABLED;
  216. }
  217. }
  218. if (name_ptr) {
  219. if (IS_JOB_PENDING(job_ptr)) {
  220. info("wiki: change job %u name %s", jobid, name_ptr);
  221. xfree(job_ptr->name);
  222. job_ptr->name = xstrdup(name_ptr);
  223. last_job_update = now;
  224. update_accounting = true;
  225. } else {
  226. error("wiki: MODIFYJOB name of non-pending job %u",
  227. jobid);
  228. return ESLURM_DISABLED;
  229. }
  230. }
  231. if (new_hostlist) {
  232. int rc = 0, task_cnt;
  233. hostlist_t hl;
  234. char *tasklist;
  235. if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
  236. /* Job is done, nothing to reset */
  237. if (new_hostlist == '\0')
  238. goto host_fini;
  239. error("wiki: MODIFYJOB hostlist of non-pending "
  240. "job %u", jobid);
  241. return ESLURM_DISABLED;
  242. }
  243. xfree(job_ptr->details->req_nodes);
  244. FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
  245. if (new_hostlist == '\0')
  246. goto host_fini;
  247. tasklist = moab2slurm_task_list(new_hostlist, &task_cnt);
  248. if (tasklist == NULL) {
  249. rc = 1;
  250. goto host_fini;
  251. }
  252. hl = hostlist_create(tasklist);
  253. if (hl == 0) {
  254. rc = 1;
  255. goto host_fini;
  256. }
  257. hostlist_uniq(hl);
  258. hostlist_sort(hl);
  259. job_ptr->details->req_nodes =
  260. hostlist_ranged_string_xmalloc(hl);
  261. hostlist_destroy(hl);
  262. if (job_ptr->details->req_nodes == NULL) {
  263. rc = 1;
  264. goto host_fini;
  265. }
  266. if (node_name2bitmap(job_ptr->details->req_nodes, false,
  267. &job_ptr->details->req_node_bitmap)) {
  268. rc = 1;
  269. goto host_fini;
  270. }
  271. host_fini: if (rc) {
  272. info("wiki: change job %u invalid hostlist %s",
  273. jobid, new_hostlist);
  274. xfree(job_ptr->details->req_nodes);
  275. return EINVAL;
  276. } else {
  277. info("wiki: change job %u hostlist %s",
  278. jobid, new_hostlist);
  279. update_accounting = true;
  280. }
  281. }
  282. if (part_name_ptr) {
  283. struct part_record *part_ptr;
  284. if (!IS_JOB_PENDING(job_ptr)) {
  285. error("wiki: MODIFYJOB partition of non-pending "
  286. "job %u", jobid);
  287. return ESLURM_DISABLED;
  288. }
  289. part_ptr = find_part_record(part_name_ptr);
  290. if (part_ptr == NULL) {
  291. error("wiki: MODIFYJOB has invalid partition %s",
  292. part_name_ptr);
  293. return ESLURM_INVALID_PARTITION_NAME;
  294. }
  295. info("wiki: change job %u partition %s",
  296. jobid, part_name_ptr);
  297. xfree(job_ptr->partition);
  298. job_ptr->partition = xstrdup(part_name_ptr);
  299. job_ptr->part_ptr = part_ptr;
  300. last_job_update = now;
  301. update_accounting = true;
  302. }
  303. if (new_node_cnt) {
  304. job_desc_msg_t job_desc;
  305. #ifdef HAVE_BG
  306. uint16_t geometry[SYSTEM_DIMENSIONS] = {(uint16_t) NO_VAL};
  307. static uint16_t cpus_per_node = 0;
  308. if (!cpus_per_node) {
  309. select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
  310. &cpus_per_node);
  311. }
  312. #endif
  313. if(!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
  314. error("wiki: MODIFYJOB node count of non-pending "
  315. "job %u", jobid);
  316. return ESLURM_DISABLED;
  317. }
  318. memset(&job_desc, 0, sizeof(job_desc_msg_t));
  319. job_desc.min_nodes = new_node_cnt;
  320. job_desc.max_nodes = NO_VAL;
  321. job_desc.select_jobinfo = select_g_select_jobinfo_alloc();
  322. select_g_alter_node_cnt(SELECT_SET_NODE_CNT, &job_desc);
  323. select_g_select_jobinfo_free(job_desc.select_jobinfo);
  324. job_ptr->details->min_nodes = job_desc.min_nodes;
  325. if (job_ptr->details->max_nodes &&
  326. (job_ptr->details->max_nodes < job_desc.min_nodes))
  327. job_ptr->details->max_nodes = job_desc.min_nodes;
  328. info("wiki: change job %u min_nodes to %u",
  329. jobid, new_node_cnt);
  330. #ifdef HAVE_BG
  331. job_ptr->details->min_cpus = job_desc.min_cpus;
  332. job_ptr->details->max_cpus = job_desc.max_cpus;
  333. job_ptr->details->pn_min_cpus = job_desc.pn_min_cpus;
  334. new_node_cnt = job_ptr->details->min_cpus;
  335. if (cpus_per_node)
  336. new_node_cnt /= cpus_per_node;
  337. /* This is only set up so accounting is set up correctly */
  338. select_g_select_jobinfo_set(job_ptr->select_jobinfo,
  339. SELECT_JOBDATA_NODE_CNT,
  340. &new_node_cnt);
  341. /* reset geo since changing this makes any geo
  342. potentially invalid */
  343. select_g_select_jobinfo_set(job_ptr->select_jobinfo,
  344. SELECT_JOBDATA_GEOMETRY,
  345. geometry);
  346. #endif
  347. last_job_update = now;
  348. update_accounting = true;
  349. }
  350. if (gres_ptr) {
  351. char *orig_gres;
  352. if (!IS_JOB_PENDING(job_ptr)) {
  353. error("wiki: MODIFYJOB GRES of non-pending job %u",
  354. jobid);
  355. return ESLURM_DISABLED;
  356. }
  357. orig_gres = job_ptr->gres;
  358. job_ptr->gres = NULL;
  359. if (gres_ptr[0])
  360. job_ptr->gres = xstrdup(gres_ptr);
  361. if (gres_plugin_job_state_validate(job_ptr->gres,
  362. &job_ptr->gres_list)) {
  363. error("wiki: MODIFYJOB Invalid GRES=%s", gres_ptr);
  364. xfree(job_ptr->gres);
  365. job_ptr->gres = orig_gres;
  366. return ESLURM_INVALID_GRES;
  367. }
  368. xfree(orig_gres);
  369. }
  370. if (wckey_ptr) {
  371. int rc = update_job_wckey("update_job", job_ptr, wckey_ptr);
  372. if (rc != SLURM_SUCCESS) {
  373. error("wiki: MODIFYJOB Invalid WCKEY=%s", wckey_ptr);
  374. return rc;
  375. }
  376. }
  377. if (update_accounting) {
  378. if (job_ptr->details && job_ptr->details->begin_time) {
  379. /* Update job record in accounting to reflect
  380. * the changes */
  381. jobacct_storage_g_job_start(acct_db_conn, job_ptr);
  382. }
  383. }
  384. return SLURM_SUCCESS;
  385. }
  386. /* Modify a job:
  387. * CMD=MODIFYJOB ARG=<jobid>
  388. * [BANK=<name>;]
  389. * [COMMENT=<whatever>;]
  390. * [DEPEND=afterany:<jobid>;]
  391. * [JOBNAME=<name>;]
  392. * [MINSTARTTIME=<uts>;]
  393. * [NODES=<number>;]
  394. * [PARTITION=<name>;]
  395. * [RFEATURES=<features>;]
  396. * [TIMELIMT=<seconds>;]
  397. * [VARIABLELIST=<env_vars>;]
  398. * [GRES=<name:value>;]
  399. * [WCKEY=<name>;]
  400. *
  401. * RET 0 on success, -1 on failure */
  402. extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg)
  403. {
  404. char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr, *start_ptr;
  405. char *host_ptr, *name_ptr, *part_ptr, *time_ptr, *tmp_char;
  406. char *comment_ptr, *feature_ptr, *env_ptr, *gres_ptr, *wckey_ptr;
  407. int i, slurm_rc;
  408. uint32_t jobid, new_node_cnt = 0, new_time_limit = 0;
  409. static char reply_msg[128];
  410. /* Locks: write job, read node and partition info */
  411. slurmctld_lock_t job_write_lock = {
  412. NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };
  413. arg_ptr = strstr(cmd_ptr, "ARG=");
  414. if (arg_ptr == NULL) {
  415. *err_code = -300;
  416. *err_msg = "MODIFYJOB lacks ARG=";
  417. error("wiki: MODIFYJOB lacks ARG=");
  418. return -1;
  419. }
  420. /* Change all parsed "=" to ":" then search for remaining "="
  421. * and report results as unrecognized options */
  422. arg_ptr[3] = ':';
  423. arg_ptr += 4;
  424. jobid = strtoul(arg_ptr, &tmp_char, 10);
  425. if ((tmp_char[0] != '\0') && (!isspace(tmp_char[0]))) {
  426. *err_code = -300;
  427. *err_msg = "Invalid ARG value";
  428. error("wiki: MODIFYJOB has invalid jobid");
  429. return -1;
  430. }
  431. bank_ptr = strstr(cmd_ptr, "BANK=");
  432. comment_ptr = strstr(cmd_ptr, "COMMENT=");
  433. depend_ptr = strstr(cmd_ptr, "DEPEND=");
  434. gres_ptr = strstr(cmd_ptr, "GRES=");
  435. host_ptr = strstr(cmd_ptr, "HOSTLIST=");
  436. name_ptr = strstr(cmd_ptr, "JOBNAME=");
  437. start_ptr = strstr(cmd_ptr, "MINSTARTTIME=");
  438. nodes_ptr = strstr(cmd_ptr, "NODES=");
  439. part_ptr = strstr(cmd_ptr, "PARTITION=");
  440. feature_ptr = strstr(cmd_ptr, "RFEATURES=");
  441. time_ptr = strstr(cmd_ptr, "TIMELIMIT=");
  442. env_ptr = strstr(cmd_ptr, "VARIABLELIST=");
  443. wckey_ptr = strstr(cmd_ptr, "WCKEY=");
  444. if (bank_ptr) {
  445. bank_ptr[4] = ':';
  446. bank_ptr += 5;
  447. null_term(bank_ptr);
  448. }
  449. if (comment_ptr) {
  450. comment_ptr[7] = ':';
  451. comment_ptr += 8;
  452. if (comment_ptr[0] == '\"') {
  453. comment_ptr++;
  454. for (i=0; ; i++) {
  455. if (comment_ptr[i] == '\0')
  456. break;
  457. if (comment_ptr[i] == '\"') {
  458. comment_ptr[i] = '\0';
  459. break;
  460. }
  461. }
  462. } else if (comment_ptr[0] == '\'') {
  463. comment_ptr++;
  464. for (i=0; ; i++) {
  465. if (comment_ptr[i] == '\0')
  466. break;
  467. if (comment_ptr[i] == '\'') {
  468. comment_ptr[i] = '\0';
  469. break;
  470. }
  471. }
  472. } else
  473. null_term(comment_ptr);
  474. }
  475. if (depend_ptr) {
  476. depend_ptr[6] = ':';
  477. depend_ptr += 7;
  478. null_term(depend_ptr);
  479. }
  480. if (feature_ptr) {
  481. feature_ptr[9] = ':';
  482. feature_ptr += 10;
  483. null_term(feature_ptr);
  484. }
  485. if (gres_ptr) {
  486. gres_ptr[4] = ':';
  487. gres_ptr += 5;
  488. null_term(gres_ptr);
  489. }
  490. if (host_ptr) {
  491. host_ptr[8] = ':';
  492. host_ptr += 9;
  493. null_term(host_ptr);
  494. }
  495. if (name_ptr) {
  496. name_ptr[7] = ':';
  497. name_ptr += 8;
  498. if (name_ptr[0] == '\"') {
  499. name_ptr++;
  500. for (i=0; ; i++) {
  501. if (name_ptr[i] == '\0')
  502. break;
  503. if (name_ptr[i] == '\"') {
  504. name_ptr[i] = '\0';
  505. break;
  506. }
  507. }
  508. } else if (name_ptr[0] == '\'') {
  509. name_ptr++;
  510. for (i=0; ; i++) {
  511. if (name_ptr[i] == '\0')
  512. break;
  513. if (name_ptr[i] == '\'') {
  514. name_ptr[i] = '\0';
  515. break;
  516. }
  517. }
  518. } else
  519. null_term(name_ptr);
  520. }
  521. if (start_ptr) {
  522. start_ptr[12] = ':';
  523. start_ptr += 13;
  524. null_term(start_ptr);
  525. }
  526. if (nodes_ptr) {
  527. nodes_ptr[5] = ':';
  528. nodes_ptr += 6;
  529. new_node_cnt = strtoul(nodes_ptr, NULL, 10);
  530. }
  531. if (part_ptr) {
  532. part_ptr[9] = ':';
  533. part_ptr += 10;
  534. null_term(part_ptr);
  535. }
  536. if (time_ptr) {
  537. time_ptr[9] = ':';
  538. time_ptr += 10;
  539. new_time_limit = strtoul(time_ptr, NULL, 10);
  540. }
  541. if (env_ptr) {
  542. env_ptr[12] = ':';
  543. env_ptr += 13;
  544. null_term(env_ptr);
  545. }
  546. if (wckey_ptr) {
  547. wckey_ptr[5] = ':';
  548. wckey_ptr += 6;
  549. null_term(wckey_ptr);
  550. }
  551. /* Look for any un-parsed "=" ignoring anything after VARIABLELIST
  552. * which is expected to contain "=" in its value*/
  553. tmp_char = strchr(cmd_ptr, '=');
  554. if (tmp_char && (!env_ptr || (env_ptr > tmp_char))) {
  555. tmp_char[0] = '\0';
  556. while (tmp_char[-1] && (!isspace(tmp_char[-1])))
  557. tmp_char--;
  558. error("wiki: Invalid MODIFYJOB option %s", tmp_char);
  559. }
  560. lock_slurmctld(job_write_lock);
  561. slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr,
  562. new_node_cnt, part_ptr, new_time_limit, name_ptr,
  563. start_ptr, feature_ptr, env_ptr, comment_ptr,
  564. gres_ptr, wckey_ptr);
  565. unlock_slurmctld(job_write_lock);
  566. if (slurm_rc != SLURM_SUCCESS) {
  567. *err_code = -700;
  568. *err_msg = slurm_strerror(slurm_rc);
  569. error("wiki: Failed to modify job %u (%m)", jobid);
  570. return -1;
  571. }
  572. snprintf(reply_msg, sizeof(reply_msg),
  573. "job %u modified successfully", jobid);
  574. *err_msg = reply_msg;
  575. return 0;
  576. }