PageRenderTime 62ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/src/plugins/jobcomp/mysql/jobcomp_mysql.c

https://github.com/cfenoy/slurm
C | 462 lines | 313 code | 52 blank | 97 comment | 71 complexity | 53d9d31dabb9b8685d4d7512f211b985 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * jobcomp_mysql.c - Store/Get all information in a mysql storage.
  3. *
  4. * $Id: storage_mysql.c 10893 2007-01-29 21:53:48Z da $
  5. *****************************************************************************
  6. * Copyright (C) 2004-2007 The Regents of the University of California.
  7. * Copyright (C) 2008-2009 Lawrence Livermore National Security.
  8. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  9. * Written by Danny Auble <da@llnl.gov>
  10. *
  11. * This file is part of SLURM, a resource management program.
  12. * For details, see <http://www.schedmd.com/slurmdocs/>.
  13. * Please also read the included file: DISCLAIMER.
  14. *
  15. * SLURM is free software; you can redistribute it and/or modify it under
  16. * the terms of the GNU General Public License as published by the Free
  17. * Software Foundation; either version 2 of the License, or (at your option)
  18. * any later version.
  19. *
  20. * In addition, as a special exception, the copyright holders give permission
  21. * to link the code of portions of this program with the OpenSSL library under
  22. * certain conditions as described in each individual source file, and
  23. * distribute linked combinations including the two. You must obey the GNU
  24. * General Public License in all respects for all of the code used other than
  25. * OpenSSL. If you modify file(s) with this exception, you may extend this
  26. * exception to your version of the file(s), but you are not obligated to do
  27. * so. If you do not wish to do so, delete this exception statement from your
  28. * version. If you delete this exception statement from all source files in
  29. * the program, then also delete it here.
  30. *
  31. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  32. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  33. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  34. * details.
  35. *
  36. * You should have received a copy of the GNU General Public License along
  37. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  38. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  39. \*****************************************************************************/
  40. #include "mysql_jobcomp_process.h"
  41. #include <pwd.h>
  42. #include <grp.h>
  43. #include <sys/types.h>
  44. #include "src/common/parse_time.h"
  45. #include "src/common/node_select.h"
  46. #include "src/common/uid.h"
  47. /*
  48. * These variables are required by the generic plugin interface. If they
  49. * are not found in the plugin, the plugin loader will ignore it.
  50. *
  51. * plugin_name - a string giving a human-readable description of the
  52. * plugin. There is no maximum length, but the symbol must refer to
  53. * a valid string.
  54. *
  55. * plugin_type - a string suggesting the type of the plugin or its
  56. * applicability to a particular form of data or method of data handling.
  57. * If the low-level plugin API is used, the contents of this string are
  58. * unimportant and may be anything. SLURM uses the higher-level plugin
  59. * interface which requires this string to be of the form
  60. *
  61. * <application>/<method>
  62. *
  63. * where <application> is a description of the intended application of
  64. * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method>
  65. * is a description of how this plugin satisfies that application. SLURM will
  66. * only load job completion logging plugins if the plugin_type string has a
  67. * prefix of "jobacct/".
  68. *
  69. * plugin_version - an unsigned 32-bit integer giving the version number
  70. * of the plugin. If major and minor revisions are desired, the major
  71. * version number may be multiplied by a suitable magnitude constant such
  72. * as 100 or 1000. Various SLURM versions will likely require a certain
  73. * minimum version for their plugins as the job accounting API
  74. * matures.
  75. */
  76. const char plugin_name[] = "Job completion MYSQL plugin";
  77. const char plugin_type[] = "jobcomp/mysql";
  78. const uint32_t plugin_version = 100;
  79. mysql_conn_t *jobcomp_mysql_conn = NULL;
  80. char *jobcomp_table = "jobcomp_table";
  81. storage_field_t jobcomp_table_fields[] = {
  82. { "jobid", "int not null" },
  83. { "uid", "smallint unsigned not null" },
  84. { "user_name", "tinytext not null" },
  85. { "gid", "smallint unsigned not null" },
  86. { "group_name", "tinytext not null" },
  87. { "name", "tinytext not null" },
  88. { "state", "smallint not null" },
  89. { "partition", "tinytext not null" },
  90. { "timelimit", "tinytext not null" },
  91. { "starttime", "int unsigned default 0 not null" },
  92. { "endtime", "int unsigned default 0 not null" },
  93. { "nodelist", "text" },
  94. { "nodecnt", "int unsigned not null" },
  95. { "proc_cnt", "int unsigned not null" },
  96. { "connect_type", "tinytext" },
  97. { "reboot", "tinytext" },
  98. { "rotate", "tinytext" },
  99. { "maxprocs", "int unsigned default 0 not null" },
  100. { "geometry", "tinytext" },
  101. { "start", "tinytext" },
  102. { "blockid", "tinytext" },
  103. { NULL, NULL}
  104. };
  105. /* Type for error string table entries */
  106. typedef struct {
  107. int xe_number;
  108. char *xe_message;
  109. } slurm_errtab_t;
  110. static slurm_errtab_t slurm_errtab[] = {
  111. {0, "No error"},
  112. {-1, "Unspecified error"}
  113. };
  114. /* A plugin-global errno. */
  115. static int plugin_errno = SLURM_SUCCESS;
  116. /* File descriptor used for logging */
  117. static pthread_mutex_t jobcomp_lock = PTHREAD_MUTEX_INITIALIZER;
  118. static int _mysql_jobcomp_check_tables()
  119. {
  120. if(mysql_db_create_table(jobcomp_mysql_conn, jobcomp_table,
  121. jobcomp_table_fields, ")") == SLURM_ERROR)
  122. return SLURM_ERROR;
  123. return SLURM_SUCCESS;
  124. }
  125. /* get the user name for the give user_id */
  126. static char *_get_user_name(uint32_t user_id)
  127. {
  128. static uint32_t cache_uid = 0;
  129. static char cache_name[32] = "root", *uname;
  130. char *ret_name = NULL;
  131. slurm_mutex_lock(&jobcomp_lock);
  132. if (user_id != cache_uid) {
  133. uname = uid_to_string((uid_t) user_id);
  134. snprintf(cache_name, sizeof(cache_name), "%s", uname);
  135. xfree(uname);
  136. cache_uid = user_id;
  137. }
  138. ret_name = xstrdup(cache_name);
  139. slurm_mutex_unlock(&jobcomp_lock);
  140. return ret_name;
  141. }
  142. /* get the group name for the give group_id */
  143. static char *_get_group_name(uint32_t group_id)
  144. {
  145. static uint32_t cache_gid = 0;
  146. static char cache_name[32] = "root", *gname;
  147. char *ret_name = NULL;
  148. slurm_mutex_lock(&jobcomp_lock);
  149. if (group_id != cache_gid) {
  150. gname = gid_to_string((gid_t) group_id);
  151. snprintf(cache_name, sizeof(cache_name), "%s", gname);
  152. xfree(gname);
  153. cache_gid = group_id;
  154. }
  155. ret_name = xstrdup(cache_name);
  156. slurm_mutex_unlock(&jobcomp_lock);
  157. return ret_name;
  158. }
  159. /*
  160. * Linear search through table of errno values and strings,
  161. * returns NULL on error, string on success.
  162. */
  163. static char *_lookup_slurm_api_errtab(int errnum)
  164. {
  165. char *res = NULL;
  166. int i;
  167. for (i = 0; i < sizeof(slurm_errtab) / sizeof(slurm_errtab_t); i++) {
  168. if (slurm_errtab[i].xe_number == errnum) {
  169. res = slurm_errtab[i].xe_message;
  170. break;
  171. }
  172. }
  173. return res;
  174. }
  175. /*
  176. * init() is called when the plugin is loaded, before any other functions
  177. * are called. Put global initialization here.
  178. */
  179. extern int init ( void )
  180. {
  181. static int first = 1;
  182. if(first) {
  183. /* since this can be loaded from many different places
  184. only tell us once. */
  185. verbose("%s loaded", plugin_name);
  186. first = 0;
  187. } else {
  188. debug4("%s loaded", plugin_name);
  189. }
  190. return SLURM_SUCCESS;
  191. }
  192. extern int fini ( void )
  193. {
  194. if (jobcomp_mysql_conn) {
  195. destroy_mysql_conn(jobcomp_mysql_conn);
  196. jobcomp_mysql_conn = NULL;
  197. }
  198. return SLURM_SUCCESS;
  199. }
  200. extern int slurm_jobcomp_set_location(char *location)
  201. {
  202. mysql_db_info_t *db_info = create_mysql_db_info(SLURM_MYSQL_PLUGIN_JC);
  203. int rc = SLURM_SUCCESS;
  204. char *db_name = NULL;
  205. int i = 0;
  206. if (jobcomp_mysql_conn && mysql_db_ping(jobcomp_mysql_conn) == 0)
  207. return SLURM_SUCCESS;
  208. if(!location)
  209. db_name = slurm_get_jobcomp_loc();
  210. else {
  211. while(location[i]) {
  212. if(location[i] == '.' || location[i] == '/') {
  213. debug("%s doesn't look like a database "
  214. "name using %s",
  215. location, DEFAULT_JOB_COMP_DB);
  216. break;
  217. }
  218. i++;
  219. }
  220. if(location[i])
  221. db_name = xstrdup(DEFAULT_JOB_COMP_DB);
  222. else
  223. db_name = xstrdup(location);
  224. }
  225. debug2("mysql_connect() called for db %s", db_name);
  226. jobcomp_mysql_conn = create_mysql_conn(0, 0, NULL);
  227. mysql_db_get_db_connection(jobcomp_mysql_conn, db_name, db_info);
  228. xfree(db_name);
  229. rc = _mysql_jobcomp_check_tables();
  230. destroy_mysql_db_info(db_info);
  231. if(rc == SLURM_SUCCESS)
  232. debug("Jobcomp database init finished");
  233. else
  234. debug("Jobcomp database init failed");
  235. return rc;
  236. }
  237. extern int slurm_jobcomp_log_record(struct job_record *job_ptr)
  238. {
  239. int rc = SLURM_SUCCESS;
  240. char *usr_str = NULL, *grp_str = NULL, lim_str[32];
  241. char *connect_type = NULL, *reboot = NULL, *rotate = NULL,
  242. *geometry = NULL, *start = NULL,
  243. *blockid = NULL;
  244. enum job_states job_state;
  245. char *query = NULL;
  246. uint32_t time_limit, start_time, end_time;
  247. if(!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) {
  248. char *loc = slurm_get_jobcomp_loc();
  249. if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) {
  250. xfree(loc);
  251. return SLURM_ERROR;
  252. }
  253. xfree(loc);
  254. }
  255. usr_str = _get_user_name(job_ptr->user_id);
  256. grp_str = _get_group_name(job_ptr->group_id);
  257. if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr)
  258. time_limit = job_ptr->part_ptr->max_time;
  259. else
  260. time_limit = job_ptr->time_limit;
  261. if (time_limit == INFINITE)
  262. strcpy(lim_str, "UNLIMITED");
  263. else {
  264. snprintf(lim_str, sizeof(lim_str), "%lu",
  265. (unsigned long) time_limit);
  266. }
  267. /* Job will typically be COMPLETING when this is called.
  268. * We remove the flags to get the eventual completion state:
  269. * JOB_FAILED, JOB_TIMEOUT, etc. */
  270. if (IS_JOB_RESIZING(job_ptr)) {
  271. job_state = JOB_RESIZING;
  272. if (job_ptr->resize_time)
  273. start_time = job_ptr->resize_time;
  274. else
  275. start_time = job_ptr->start_time;
  276. end_time = time(NULL);
  277. } else {
  278. job_state = job_ptr->job_state & JOB_STATE_BASE;
  279. if (job_ptr->resize_time)
  280. start_time = job_ptr->resize_time;
  281. else if (job_ptr->start_time > job_ptr->end_time) {
  282. /* Job cancelled while pending and
  283. * expected start time is in the future. */
  284. start_time = 0;
  285. } else
  286. start_time = job_ptr->start_time;
  287. end_time = job_ptr->end_time;
  288. }
  289. connect_type = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo,
  290. SELECT_PRINT_CONNECTION);
  291. reboot = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo,
  292. SELECT_PRINT_REBOOT);
  293. rotate = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo,
  294. SELECT_PRINT_ROTATE);
  295. geometry = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo,
  296. SELECT_PRINT_GEOMETRY);
  297. start = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo,
  298. SELECT_PRINT_START);
  299. #ifdef HAVE_BG
  300. blockid = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo,
  301. SELECT_PRINT_BG_ID);
  302. #else
  303. blockid = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo,
  304. SELECT_PRINT_RESV_ID);
  305. #endif
  306. query = xstrdup_printf(
  307. "insert into %s (jobid, uid, user_name, gid, group_name, "
  308. "name, state, proc_cnt, partition, timelimit, "
  309. "starttime, endtime, nodecnt",
  310. jobcomp_table);
  311. if(job_ptr->nodes)
  312. xstrcat(query, ", nodelist");
  313. if(connect_type)
  314. xstrcat(query, ", connect_type");
  315. if(reboot)
  316. xstrcat(query, ", reboot");
  317. if(rotate)
  318. xstrcat(query, ", rotate");
  319. if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL))
  320. xstrcat(query, ", maxprocs");
  321. if(geometry)
  322. xstrcat(query, ", geometry");
  323. if(start)
  324. xstrcat(query, ", start");
  325. if(blockid)
  326. xstrcat(query, ", blockid");
  327. xstrfmtcat(query, ") values (%u, %u, '%s', %u, '%s', \"%s\", %d, %u, "
  328. "'%s', \"%s\", %u, %u, %u",
  329. job_ptr->job_id, job_ptr->user_id, usr_str,
  330. job_ptr->group_id, grp_str, job_ptr->name,
  331. job_state, job_ptr->total_cpus, job_ptr->partition, lim_str,
  332. start_time, end_time, job_ptr->node_cnt);
  333. if(job_ptr->nodes)
  334. xstrfmtcat(query, ", '%s'", job_ptr->nodes);
  335. if(connect_type) {
  336. xstrfmtcat(query, ", '%s'", connect_type);
  337. xfree(connect_type);
  338. }
  339. if(reboot) {
  340. xstrfmtcat(query, ", '%s'", reboot);
  341. xfree(reboot);
  342. }
  343. if(rotate) {
  344. xstrfmtcat(query, ", '%s'", rotate);
  345. xfree(rotate);
  346. }
  347. if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL))
  348. xstrfmtcat(query, ", '%u'", job_ptr->details->max_cpus);
  349. if(geometry) {
  350. xstrfmtcat(query, ", '%s'", geometry);
  351. xfree(geometry);
  352. }
  353. if(start) {
  354. xstrfmtcat(query, ", '%s'", start);
  355. xfree(start);
  356. }
  357. if(blockid) {
  358. xstrfmtcat(query, ", '%s'", blockid);
  359. xfree(blockid);
  360. }
  361. xstrcat(query, ")");
  362. //info("query = %s", query);
  363. rc = mysql_db_query(jobcomp_mysql_conn, query);
  364. xfree(usr_str);
  365. xfree(grp_str);
  366. return rc;
  367. }
  368. extern int slurm_jobcomp_get_errno(void)
  369. {
  370. return plugin_errno;
  371. }
  372. extern char *slurm_jobcomp_strerror(int errnum)
  373. {
  374. char *res = _lookup_slurm_api_errtab(errnum);
  375. return (res ? res : strerror(errnum));
  376. }
  377. /*
  378. * get info from the storage
  379. * in/out job_list List of job_rec_t *
  380. * note List needs to be freed when called
  381. */
  382. extern List slurm_jobcomp_get_jobs(slurmdb_job_cond_t *job_cond)
  383. {
  384. List job_list = NULL;
  385. if(!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) {
  386. char *loc = slurm_get_jobcomp_loc();
  387. if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) {
  388. xfree(loc);
  389. return job_list;
  390. }
  391. xfree(loc);
  392. }
  393. job_list = mysql_jobcomp_process_get_jobs(job_cond);
  394. return job_list;
  395. }
  396. /*
  397. * expire old info from the storage
  398. */
  399. extern int slurm_jobcomp_archive(slurmdb_archive_cond_t *arch_cond)
  400. {
  401. if(!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) {
  402. char *loc = slurm_get_jobcomp_loc();
  403. if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) {
  404. xfree(loc);
  405. return SLURM_ERROR;
  406. }
  407. xfree(loc);
  408. }
  409. return mysql_jobcomp_process_archive(arch_cond);
  410. }