/src/resmom/linux/mom_mach.c
https://github.com/itkovian/torque · C · 5345 lines · 3417 code · 1458 blank · 470 comment · 820 complexity · 8cf49e16f2eb3c479a19208755979b78 MD5 · raw file
- #include "license_pbs.h" /* See here for the software license */
- #include <pbs_config.h> /* the master config generated by configure */
- #include "lib_mom.h" /* header */
- #include <assert.h>
- #include <limits.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <dirent.h>
- #include <errno.h>
- #include <strings.h>
- #include <mntent.h>
- #include <asm/types.h>
- #include <time.h>
- #include <sys/quota.h>
- #include <sys/time.h>
- #include <sys/procfs.h>
- #include <sys/param.h>
- #include <sys/stat.h>
- #include <sys/vfs.h>
- #include <sys/sysmacros.h>
- #include <sys/resource.h>
- #include <signal.h>
- #include <syscall.h>
- #include <ctype.h>
- #include <string.h>
- #include <csv.h>
- #include <fcntl.h>
- /* needed for oom_adj */
- #include <linux/limits.h>
- #ifdef Q_6_5_QUOTAON
- /* remap dqblk for SUSE 9.0 */
- #define dqblk if_dqblk
- #endif /* Q_6_5_QUOTAON */
- /*
- #ifndef dqblk
- #include <linux/quotaio_v1.h>
- #define dqblk v1_disk_dqblk
- #endif
- */
- #include "pbs_error.h"
- #include "portability.h"
- #include "list_link.h"
- #include "server_limits.h"
- #include "attribute.h"
- #include "resource.h"
- #include "pbs_job.h"
- #include "log.h"
- #include "mom_mach.h"
- #include "mom_func.h"
- #include "resmon.h"
- #include "utils.h"
- #include "../rm_dep.h"
- #include "pbs_nodes.h"
- #ifdef PENABLE_LINUX26_CPUSETS
- #include "pbs_cpuset.h"
- #endif
- #include "mom_config.h"
- /*
- ** System dependent code to gather information for the resource
- ** monitor for a Linux i386 machine.
- **
- ** Resources known by this code:
- ** cput cpu time for a pid or session
- ** mem memory size for a pid or session in KB
- ** resi resident memory size for a pid or session in KB
- ** sessions list of sessions in the system
- ** pids list of pids in a session
- ** nsessions number of sessions in the system
- ** nusers number of users in the system
- ** totmem total memory size in KB
- ** availmem available memory size in KB
- ** ncpus number of cpus
- ** physmem physical memory size in KB
- ** size size of a file or filesystem
- ** idletime seconds of idle time
- ** walltime wall clock time for a pid
- ** loadave current load average
- ** quota quota information (sizes in kb)
- ** netload number of bytes transferred for all interfaces
- */
- #ifndef MAX_LINE
- #define MAX_LINE 1024
- #endif
- #ifndef TRUE
- #define FALSE 0
- #define TRUE 1
- #endif /* TRUE */
- static char procfs[] = "/proc";
- static DIR *pdir = NULL;
- static int pagesize;
- extern char *ret_string;
- extern time_t time_now;
- #define TBL_INC 200 /* initial proc table */
- #define PMEMBUF_SIZE 2048
- static proc_stat_t *proc_array = NULL;
- static int nproc = 0;
- static int max_proc = 0;
- /*
- ** external functions and data
- */
- extern tlist_head svr_alljobs;
- extern struct config *search(struct config *,char *);
- extern struct rm_attribute *momgetattr(char *);
- extern long system_ncpus;
- #ifdef NUMA_SUPPORT
- extern int num_node_boards;
- extern nodeboard node_boards[];
- extern int numa_index;
- #else
- extern char path_meminfo[MAX_LINE];
- #endif /* NUMA_SUPPORT */
- /*
- ** local functions and data
- */
- static const char *resi (struct rm_attribute *);
- static const char *totmem (struct rm_attribute *);
- static const char *availmem (struct rm_attribute *);
- static const char *physmem (struct rm_attribute *);
- static const char *ncpus (struct rm_attribute *);
- static const char *walltime (struct rm_attribute *);
- static const char *quota (struct rm_attribute *);
- static const char *netload (struct rm_attribute *);
- #ifdef NUMA_SUPPORT
- const char *cpuact (struct rm_attribute *);
- #endif
- #ifdef USELIBMEMACCT
- #ifdef __cplusplus
- extern "C"
- {
- #endif
- long long get_memacct_resi(pid_t pid);
- extern long get_weighted_memory_size(pid_t);
- #ifdef __cplusplus
- }
- #endif
- #endif
- #ifndef mbool_t
- #define mbool_t char
- #endif /* mbool_t */
- mbool_t ProcIsChild(char *,pid_t,char *);
- extern const char *loadave(struct rm_attribute *);
- extern const char *nullproc(struct rm_attribute *);
- time_t wait_time = 10;
- #ifdef NUMA_SUPPORT
- typedef struct proc_cpu
- {
- unsigned long long idle_total;
- unsigned long long busy_total;
- } proc_cpu_t;
- static proc_cpu_t *cpu_array = NULL;
- #endif
- /*
- ** local resource array
- */
- struct config dependent_config[] =
- {
- { "resi", {resi} },
- { "totmem", {totmem} },
- { "availmem", {availmem} },
- { "physmem", {physmem} },
- { "ncpus", {ncpus} },
- #ifdef NUMA_SUPPORT
- { "loadave", {cpuact} },
- #else
- { "loadave", {loadave} },
- #endif
- { "walltime", {walltime} },
- { "quota", {quota} },
- { "netload", {netload} },
- { "size", {size} },
- { NULL, {nullproc} }
- };
- unsigned linux_time = 0;
- /*
- * support routine for getting system time -- sets linux_time
- */
- void proc_get_btime(void)
- {
- FILE *fp;
- char label[256];
- if ((fp = fopen("/proc/stat", "r")) == NULL)
- {
- return;
- }
- while (!feof(fp))
- {
- if (fscanf(fp, "%s", label) != 1)
- {
- fclose(fp);
- return;
- }
- if (strcmp(label, "btime"))
- {
- if (fscanf(fp, "%*[^\n]%*c") != 0)
- {
- fclose(fp);
- return;
- }
- }
- else
- {
- if (fscanf(fp, "%u", &linux_time) != 1) {}
- fclose(fp);
- return;
- }
- } /* END while (!feof(fp)) */
- fclose(fp);
- return;
- } /* END proc_get_btime() */
- /* NOTE: see 'man 5 proc' for /proc/pid/stat format and description */
- /* NOTE: leading '*' indicates that field should be ignored */
- /* FORMAT: <PID> <COMM> <STATE> <PPID> <PGRP> <SESSION> [<TTY_NR>] [<TPGID>] <FLAGS> [<MINFLT>] [<CMINFLT>] [<MAJFLT>] [<CMAJFLT>] <UTIME> <STIME> <CUTIME> <CSTIME> [<PRIORITY>] [<NICE>] [<0>] [<ITREALVALUE>] <STARTTIME> <VSIZE> <RSS> [<RLIM>] [<STARTCODE>] ... */
- static char stat_str[] = " %c %d %d %d %*d %*d %u %*u \
- %*u %*u %*u %lu %lu %lu %lu %*ld %*ld %*u %*ld %lu %llu %lld %*lu %*lu \
- %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu";
- /*
- * Convert jiffies to seconds.
- *
- * Hertz is sysconf(_SC_CLK_TCK) in get_proc_stat()
- */
- #define JTOS(x) (x) / Hertz;
- /*
- * Linux /proc status routine.
- *
- * Returns a pointer to a static proc_stat_t structure given
- * a process number, or NULL if there is an error. Takes the
- * place of the ioctl call PIOCSTATUS in the irix imp of mom_mach.c
- *
- */
- proc_stat_t *get_proc_stat(
- int pid) /* I */
- {
- static proc_stat_t ps;
- static char path[MAXLINE];
- static char readbuf[MAXLINE << 2];
- static char *lastbracket;
- FILE *fd;
- unsigned long jstarttime; /* number of jiffies since OS start time when process started */
- struct stat sb;
- static int Hertz = 0;
- int Hertz_errored = 0;
- if (Hertz <= 0)
- {
- Hertz = sysconf(_SC_CLK_TCK); /* returns 0 on error */
- if (Hertz <= 0)
- {
- /* FAILURE */
- if (!Hertz_errored)
- log_err(errno, "get_proc_stat", "sysconf(_SC_CLK_TCK) failed, unable to monitor processes");
- Hertz_errored = 1;
- return(NULL);
- }
- }
- Hertz_errored = 0;
- sprintf(path, "/proc/%d/stat",
- pid);
- if ((fd = fopen(path, "r")) == NULL)
- {
- /* FAILURE */
- return(NULL);
- }
- /* use 'man 5 proc' for /proc/pid/stat format */
- if (!fgets(readbuf, sizeof(readbuf), fd))
- {
- fclose(fd);
- return(NULL);
- }
- lastbracket = strrchr(readbuf, ')');
- if (lastbracket == NULL)
- {
- fclose(fd);
- return(NULL);
- }
- *lastbracket = '\0'; /* We basically split the string here, overwriting the ')'. */
- lastbracket++;
- if (sscanf(readbuf,"%d (%[^\n]",&ps.pid,path) != 2)
- {
- /* FAILURE */
- fclose(fd);
- return(NULL);
- }
- /* see stat_str[] value for mapping 'stat' format */
- if (sscanf(lastbracket,stat_str,
- &ps.state, /* state (one of RSDZTW) */
- &ps.ppid, /* ppid */
- &ps.pgrp, /* pgrp */
- &ps.session, /* session id */
- &ps.flags, /* flags - kernel flags of the process, see the PF_* in <linux/sched.h> */
- &ps.utime, /* utime - jiffies that this process has been scheduled in user mode */
- &ps.stime, /* stime - jiffies that this process has been scheduled in kernel mode */
- &ps.cutime, /* cutime - jiffies that this processâs waited-for children have been scheduled in user mode */
- &ps.cstime, /* cstime - jiffies that this processâs waited-for children have been scheduled in kernel mode */
- &jstarttime, /* starttime */
- &ps.vsize, /* vsize */
- &ps.rss) != 12) /* rss */
- {
- /* FAILURE */
- fclose(fd);
- return(NULL);
- }
- if (fstat(fileno(fd), &sb) == -1)
- {
- /* FAILURE */
- fclose(fd);
- return(NULL);
- }
- ps.uid = sb.st_uid;
- ps.start_time = linux_time + JTOS(jstarttime);
- ps.name = path;
- ps.utime = JTOS(ps.utime);
- ps.stime = JTOS(ps.stime);
- ps.cutime = JTOS(ps.cutime);
- ps.cstime = JTOS(ps.cstime);
- /* SUCCESS */
- fclose(fd);
- return(&ps);
- } /* END get_proc_stat() */
- #ifdef USELIBMEMACCT
- /*
- * Retrieve weighted RSS value for process with pid from memacctd.
- * Returns the value in bytes on success, returns -1 on failure.
- */
- long long get_memacct_resi(pid_t pid)
- {
- long long w_rss;
- if ((w_rss = get_weighted_memory_size(pid)) == -1)
- {
- sprintf(log_buffer, "get_weighted_memory_size(%d) failed", pid);
- log_err(errno, __func__, log_buffer);
- }
- return(w_rss);
- } /* END get_memacct_resi() */
- #endif
- /*
- * get_proc_mem_from_path()
- * @returns a pointer to a struct containing the memory information
- * @pre-cond: path must point to a valid path of a meminfo system file
- */
- proc_mem_t *get_proc_mem_from_path(
- const char *path)
- {
- proc_mem_t *mm;
- FILE *fp;
- char str[32];
- long long bfsz = -1;
- long long casz = -1;
- long long fcasz = -1;
- if ((fp = fopen(path,"r")) == NULL)
- {
- return(NULL);
- }
- mm = (proc_mem_t *)calloc(1, sizeof(proc_mem_t));
- if (fscanf(fp,"%30s",str) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- if (!strncmp(str,"total:",sizeof(str)))
- {
- /* old format */
- if (fscanf(fp,"%*[^\n]%*c") != 0) /* remove text header */
- {
- fclose(fp);
- return(NULL);
- }
- /* umu vmem patch */
- if (fscanf(fp, "%*s %llu %llu %llu %*u %lld %lld",
- &mm->mem_total,
- &mm->mem_used,
- &mm->mem_free,
- &bfsz,
- &casz) != 5)
- {
- fclose(fp);
- return(NULL);
- }
- mm->mem_free += casz + bfsz;
- if (fscanf(fp, "%*s %llu %llu %llu %*[^\n]%*c",
- &mm->swap_total,
- &mm->swap_used,
- &mm->swap_free) != 3)
- {
- fclose(fp);
- return(NULL);
- }
- }
- else
- {
- do
- {
- /* new format (kernel > 2.4) the first 'str' has been read */
- if (!strncmp(str, "MemTotal:", sizeof(str)))
- {
- if (fscanf(fp, "%llu",
- &mm->mem_total) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- mm->mem_total *= 1024; /* the unit is kB */
- }
- else if (!strncmp(str, "MemFree:", sizeof(str)))
- {
- if (fscanf(fp, "%llu",
- &mm->mem_free) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- mm->mem_free *= 1024;
- }
- else if (!strncmp(str, "Buffers:", sizeof(str)))
- {
- if (fscanf(fp, "%lld",
- &bfsz) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- bfsz *= 1024;
- }
- else if (!strncmp(str, "Cached:", sizeof(str)))
- {
- if (fscanf(fp, "%lld",
- &casz) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- casz *= 1024;
- }
- else if (!strncmp(str, "FilePages:", sizeof(str)))
- {
- if (fscanf(fp, "%lld",
- &fcasz) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- fcasz *= 1024;
- }
- else if (!strncmp(str, "SwapTotal:", sizeof(str)))
- {
- if (fscanf(fp, "%llu",
- &mm->swap_total) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- mm->swap_total *= 1024;
- }
- else if (!strncmp(str, "SwapFree:", sizeof(str)))
- {
- if (fscanf(fp, "%llu",
- &mm->swap_free) != 1)
- {
- fclose(fp);
- return(NULL);
- }
- mm->swap_free *= 1024;
- }
- }
- while (fscanf(fp, "%30s", str) == 1);
- } /* END else */
- fclose(fp);
- if (bfsz >= 0 || casz >= 0)
- {
- if (bfsz > 0)
- mm->mem_free += bfsz;
- if (casz > 0)
- mm->mem_free += casz;
- }
- else if (fcasz > 0)
- {
- mm->mem_free += fcasz;
- }
- return(mm);
- } /* END get_proc_mem_from_path() */
- proc_mem_t *get_proc_mem(void)
- {
- static proc_mem_t ret_mm;
- #ifdef NUMA_SUPPORT
- int i;
- #else
- proc_mem_t *mem;
- #endif
- #ifdef NUMA_SUPPORT
- ret_mm.mem_total = 0;
- ret_mm.mem_used = 0;
- ret_mm.mem_free = 0;
- ret_mm.swap_total = 0;
- ret_mm.swap_used = 0;
- ret_mm.swap_free = 0;
- for (i = 0; i < node_boards[numa_index].num_nodes; i++)
- {
- proc_mem_t *node_mem = get_proc_mem_from_path(node_boards[numa_index].path_meminfo[i]);
- if (node_mem == NULL)
- return(NULL);
- ret_mm.mem_total += node_mem->mem_total;
- ret_mm.mem_used += node_mem->mem_used;
- ret_mm.mem_free += node_mem->mem_free;
- ret_mm.swap_total += node_mem->swap_total;
- ret_mm.swap_used += node_mem->swap_used;
- ret_mm.swap_free += node_mem->swap_free;
- free(node_mem);
- }
- #else
- mem = get_proc_mem_from_path(path_meminfo);
-
- if(mem == NULL)
- return (NULL);
- ret_mm.mem_total = mem->mem_total;
- ret_mm.mem_used = mem->mem_used;
- ret_mm.mem_free = mem->mem_free;
- ret_mm.swap_total = mem->swap_total;
- ret_mm.swap_used = mem->swap_used;
- ret_mm.swap_free = mem->swap_free;
- free(mem);
- #endif
- return(&ret_mm);
- } /* END get_proc_mem() */
- #ifdef PNOT
- proc_mem_t *get_proc_mem(void)
- {
- static proc_mem_t mm;
- FILE *fp;
- unsigned long m_tot, m_use, m_free;
- unsigned long s_tot, s_use, s_free;
- if ((fp = fopen(path_meminfo, "r")) == NULL)
- {
- return(NULL);
- }
- fscanf(fp, "%*[^\n]%*c"); /* remove text header */;
- fscanf(fp, "%*s %lu %lu %lu %*[^\n]%*c",
- &m_tot,
- &m_use,
- &m_free);
- fscanf(fp, "%*s %lu %lu %lu %*[^\n]%*c",
- &s_tot,
- &s_use,
- &s_free);
- mm.total = m_tot + s_tot;
- mm.used = m_use + s_use;
- mm.free = m_free + s_free;
- fclose(fp);
- return(&mm);
- } /* END get_proc_mem() */
- #endif /* PNOT */
- /*
- * sets oom_adj score for current process
- * requires root privileges or CAP_SYS_RESOURCE to succeed
- */
- static int oom_adj(int score)
- {
- pid_t pid;
- int rc,fd;
- char oom_adj_path[PATH_MAX] = "";
- char adj_value[128] = "";
- /* valid values are -17 to 15 */
- if ( score > 15 || score < -17 )
- return -1;
- pid = getpid();
- if ( snprintf(oom_adj_path, sizeof(oom_adj_path), "/proc/%d/oom_adj", pid) < 0 )
- return -1;
- if ( ( fd = open(oom_adj_path, O_RDWR) ) == -1 )
- return -1;
- if (snprintf(adj_value,sizeof(adj_value),"%d",score) < 0)
- return -1;
- rc = write(fd,adj_value,strlen(adj_value));
- close(fd);
- return rc;
- }
- void dep_initialize(void)
- {
- pagesize = getpagesize();
- if ((pdir = opendir(procfs)) == NULL)
- {
- log_err(errno, __func__, "opendir");
-
- return;
- }
- /* NOTE: /proc/<pid>/oom_adj tunable is linux specific */
- /* LKF: make pbs_mom processes immune to oom killer's killing frenzy if requested*/
- if (mom_oom_immunize != 0)
- {
-
- if (oom_adj(-17) < 0)
- {
- log_record(
- PBSEVENT_SYSTEM,
- PBS_EVENTCLASS_SERVER,
- __func__,
- "failed to make pbs_mom oom-killer immune");
- }
- else
- {
- log_record(
- PBSEVENT_SYSTEM,
- PBS_EVENTCLASS_SERVER,
- __func__,
- "mom is now oom-killer safe");
- }
- }
- proc_get_btime();
- return;
- } /* END dep_initialize() */
- void dep_cleanup(void)
- {
- log_record(PBSEVENT_SYSTEM, 0, __func__, "dependent cleanup");
- if (pdir)
- {
- closedir(pdir);
- pdir = NULL;
- }
- return;
- }
- /*
- * This routine is called on each cycle of the main loop.
- */
- void
- dep_main_loop_cycle(void)
- {
- /* No periodic functions. */
- }
- /*
- * Internal size decoding routine.
- *
- * Accepts a resource pointer and a pointer to the unsigned long integer
- * to receive the decoded value. It returns a PBS error code, and the
- * decoded value in the unsigned long integer.
- *
- * sizeof(word) = sizeof(int)
- */
- static int mm_getsize(
- resource *pres, /* I */
- unsigned long *ret) /* O */
- {
- unsigned long value;
- if (pres->rs_value.at_type != ATR_TYPE_SIZE)
- {
- return(PBSE_ATTRTYPE);
- }
- value = pres->rs_value.at_val.at_size.atsv_num;
- if (pres->rs_value.at_val.at_size.atsv_units == ATR_SV_WORDSZ)
- {
- if (value > ULONG_MAX / sizeof(int))
- {
- return(PBSE_BADATVAL);
- }
- value *= sizeof(int);
- }
- if (value > (ULONG_MAX >> pres->rs_value.at_val.at_size.atsv_shift))
- {
- return(PBSE_BADATVAL);
- }
- *ret = (value << pres->rs_value.at_val.at_size.atsv_shift);
- return(PBSE_NONE);
- } /* END mm_getsize() */
- /*
- * Internal time decoding routine.
- *
- * Accepts a resource pointer and a pointer to the unsigned long integer
- * to receive the decoded value. It returns a PBS error code, and the
- * decoded value of time in seconds in the unsigned long integer.
- */
- static int mm_gettime(
- resource *pres,
- unsigned long *ret)
- {
- if (pres->rs_value.at_type != ATR_TYPE_LONG)
- {
- return(PBSE_ATTRTYPE);
- }
- if (pres->rs_value.at_val.at_long < 0)
- {
- return(PBSE_BADATVAL);
- }
- *ret = pres->rs_value.at_val.at_long;
- return(PBSE_NONE);
- }
- static int injob(
- job *pjob,
- pid_t sid)
- {
- task *ptask;
- pid_t pid;
- #ifdef PENABLE_LINUX26_CPUSETS
- struct pidl *pids = NULL;
- struct pidl *pp;
- #else
- proc_stat_t *ps;
- #endif /* PENABLE_LINUX26_CPUSETS */
- for (ptask = (task *)GET_NEXT(pjob->ji_tasks);
- ptask != NULL;
- ptask = (task *)GET_NEXT(ptask->ti_jobtask))
- {
- if (ptask->ti_qs.ti_sid <= 1)
- continue;
- if (ptask->ti_qs.ti_sid == sid)
- {
- return(TRUE);
- }
- }
- /* processes with a different sessionid are not necessarily not part of the
- job: the job can call setsid; need to check whether one of the parent
- processes has a sessionid that is in the job */
- #ifdef PENABLE_LINUX26_CPUSETS
- /* check whether the sid is in the job's cpuset */
- pids = get_cpuset_pidlist(pjob->ji_qs.ji_jobid, pids);
- pp = pids;
- while (pp != NULL)
- {
- pid = pp->pid;
- pp = pp->next;
- if (pid == sid)
- {
- free_pidlist(pids);
- return(TRUE);
- }
- }
- free_pidlist(pids);
- #else
- /* get the parent process id of the sid and check whether it is part of
- the job; iterate */
- pid = sid;
- while (pid > 1)
- {
- if ((ps = get_proc_stat(pid)) == NULL)
- {
- if (errno != ENOENT)
- {
- sprintf(log_buffer, "%d: get_proc_stat", pid);
- log_err(errno, __func__, log_buffer);
- }
- return(FALSE);
- }
- pid = getsid(ps->ppid);
- for (ptask = (task *)GET_NEXT(pjob->ji_tasks);
- ptask != NULL;
- ptask = (task *)GET_NEXT(ptask->ti_jobtask))
- {
- if (ptask->ti_qs.ti_sid <= 1)
- continue;
- if (ptask->ti_qs.ti_sid == pid)
- {
- return(TRUE);
- }
- }
- }
- #endif /* PENABLE_LINUX26_CPUSETS */
- return(FALSE);
- } /* END injob() */
- /*
- * Internal session CPU time decoding routine.
- *
- * Accepts a job pointer. Returns the sum of all cpu time
- * consumed for all tasks executed by the job, in seconds,
- * adjusted by cputfactor.
- */
- static unsigned long cput_sum(
- job *pjob) /* I */
- {
- ulong cputime;
- int nps = 0;
- int i;
- proc_stat_t *ps;
- cputime = 0;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loop start - jobid = %s",
- pjob->ji_qs.ji_jobid);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if ((LOGLEVEL >= 6) && (ps == NULL))
- {
- sprintf(log_buffer, "proc_array loop end - nproc=%d, i=%d, ps is null",
- nproc,
- i);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- if (!injob(pjob, ps->session))
- continue;
- nps++;
- cputime += (ps->utime + ps->stime + ps->cutime + ps->cstime);
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "%s: session=%d pid=%d cputime=%lu (cputfactor=%f)",
- __func__,
- ps->session,
- ps->pid,
- cputime,
- cputfactor);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- } /* END for (i) */
- if (nps == 0)
- pjob->ji_flags |= MOM_NO_PROC;
- else
- pjob->ji_flags &= ~MOM_NO_PROC;
- return((unsigned long)((double)cputime * cputfactor));
- } /* END cput_sum() */
- /*
- * Return TRUE if any process in the job is over limit for cputime usage.
- */
- static int overcpu_proc(
- job *pjob,
- unsigned long limit) /* I */
- {
- ulong cputime;
- pid_t pid;
- proc_stat_t *ps;
- #ifdef PENABLE_LINUX26_CPUSETS
- struct pidl *pids = NULL;
- struct pidl *pp;
- #else
- struct dirent *dent;
- #endif /* PENABLE_LINUX26_CPUSETS */
- #ifdef PENABLE_LINUX26_CPUSETS
- /* Instead of collect stats of all processes running on a large SMP system,
- * collect stats of processes running in and below the cpuset of the job, only. */
- pids = get_cpuset_pidlist(pjob->ji_qs.ji_jobid, pids);
- pp = pids;
- while (pp != NULL)
- {
- pid = pp->pid;
- pp = pp->next;
- #else
- rewinddir(pdir);
- while ((dent = readdir(pdir)) != NULL)
- {
- if (!isdigit(dent->d_name[0]))
- continue;
- pid = atoi(dent->d_name);
- #endif /* PENABLE_LINUX26_CPUSETS */
- if ((ps = get_proc_stat(pid)) == NULL)
- {
- if (errno != ENOENT)
- {
- sprintf(log_buffer, "%d: get_proc_stat", pid);
- log_err(errno, __func__, log_buffer);
- }
- continue;
- }
- #ifndef PENABLE_LINUX26_CPUSETS
- /* if it was in the cpuset, its part of the job, no need to check */
- if (!injob(pjob, ps->session))
- continue;
- #endif /* PENABLE_LINUX26_CPUSETS */
- /* change from ps->cutime to ps->utime, and ps->cstime to ps->stime */
- cputime = (ulong)((double)(ps->utime + ps->stime) * cputfactor);
- if (cputime > limit)
- {
- #ifdef PENABLE_LINUX26_CPUSETS
- free_pidlist(pids);
- #endif
- return(TRUE);
- }
- }
- #ifdef PENABLE_LINUX26_CPUSETS
- free_pidlist(pids);
- #endif
- return(FALSE);
- } /* END overcpu_proc() */
- /*
- * Internal session virtual memory usage function.
- *
- * Returns the total number of bytes of address
- * space consumed by all current processes within the job.
- */
- static unsigned long long mem_sum(
- job *pjob)
- {
- int i;
- unsigned long long segadd;
- proc_stat_t *ps;
- segadd = 0;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loop start - jobid = %s",
- pjob->ji_qs.ji_jobid);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (!injob(pjob, ps->session))
- continue;
- segadd += ps->vsize;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "%s: session=%d pid=%d vsize=%llu sum=%llu",
- __func__,
- ps->session,
- ps->pid,
- ps->vsize,
- segadd);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- } /* END for (i) */
- return(segadd);
- } /* END mem_sum() */
- /*
- * Internal session memory usage function.
- *
- * Returns the total number of bytes of resident memory
- * consumed by all current processes within the job.
- */
- static unsigned long long resi_sum(
- job *pjob)
- {
- int i;
- unsigned long long resisize;
- proc_stat_t *ps;
- #ifdef USELIBMEMACCT
- long long w_rss;
- #endif
- resisize = 0;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loop start - jobid = %s",
- pjob->ji_qs.ji_jobid);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (!injob(pjob, ps->session))
- continue;
- #ifdef USELIBMEMACCT
- /* Ask memacctd for weighted rss of pid, use this instead of ps->rss */
- w_rss = get_memacct_resi(ps->pid);
- if (w_rss == -1)
- resisize += ps->rss * pagesize;
- else
- resisize += w_rss;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "%s: session=%d pid=%d rss=%llu w_rss=%ld sum=%llu",
- __func__,
- ps->session,
- ps->pid,
- ps->rss * pagesize,
- w_rss,
- resisize);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- #else
- resisize += ps->rss * pagesize;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "%s: session=%d pid=%d rss=%llu sum=%llu",
- __func__,
- ps->session,
- ps->pid,
- ps->rss * pagesize,
- resisize);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- #endif
- } /* END for (i) */
- return(resisize);
- } /* END resi_sum() */
- /*
- * Return TRUE if any process in the job is over limit for virtual memory usage.
- */
- static int overmem_proc(
- job *pjob, /* I */
- unsigned long long limit) /* I */
- {
- int i;
- proc_stat_t *ps;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loop start - jobid = %s",
- pjob->ji_qs.ji_jobid);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (!injob(pjob, ps->session))
- continue;
- if (ps->vsize > limit)
- {
- return(TRUE);
- }
- } /* END for (i) */
- return(FALSE);
- } /* END overmem_proc() */
- extern char *msg_momsetlim;
- /*
- * Internal error routine
- */
- int error(
- const char *string,
- int value)
- {
- char *message;
- assert(string != NULL);
- assert(*string != '\0');
- message = pbse_to_txt(value);
- assert(message != NULL);
- assert(*message != '\0');
- fprintf(stderr, msg_momsetlim, string, message);
- fflush(stderr);
- return(value);
- } /* END error() */
- /*
- * Establish system-enforced limits for the job.
- *
- * Run through the resource list, checking the values for all items
- * we recognize.
- *
- * If set_mode is SET_LIMIT_SET, then also set hard limits for the
- * system enforced limits (not-polled).
- * If anything goes wrong with the process, return a PBS error code
- * and print a message on standard error. A zero-length resource list
- * is not an error.
- *
- * If set_mode is SET_LIMIT_SET the entry conditions are:
- * 1. MOM has already forked, and we are called from the child.
- * 2. The child is still running as root.
- * 3. Standard error is open to the user's file.
- *
- * If set_mode is SET_LIMIT_ALTER, we are being called to modify
- * existing limits. Cannot alter those set by setrlimit (kernel)
- * because we are the wrong process.
- */
- int mom_set_limits(
- job *pjob, /* I */
- int set_mode) /* SET_LIMIT_SET or SET_LIMIT_ALTER */
- {
- const char *pname = NULL;
- int retval;
- unsigned long value; /* place in which to build resource value */
- resource *pres;
- struct rlimit reslim;
- unsigned long vmem_limit = 0;
- unsigned long mem_limit = 0;
- /* NOTE: log_buffer is exported */
- if (LOGLEVEL >= 2)
- {
- sprintf(log_buffer, "%s(%s,%s) entered",
- __func__,
- (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
- (set_mode == SET_LIMIT_SET) ? "set" : "alter");
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- log_buffer[0] = '\0';
- }
- assert(pjob != NULL);
- assert(pjob->ji_wattr[JOB_ATR_resource].at_type == ATR_TYPE_RESC);
- pres = (resource *)GET_NEXT(pjob->ji_wattr[JOB_ATR_resource].at_val.at_list);
- /*
- * cycle through all the resource specifications,
- * setting limits appropriately.
- */
- memset(&reslim, 0, sizeof(reslim));
- /* set oom_adj score for the starting job */
- /* if immunize mode is set to on, we have to set child score to 0 */
- if ( (set_mode == SET_LIMIT_SET) && ( job_oom_score_adjust != 0 || mom_oom_immunize != 0 ) )
- {
- retval = oom_adj(job_oom_score_adjust);
- if ( LOGLEVEL >= 2 )
- {
- sprintf(log_buffer, "setting oom_adj '%s'",
- (retval != -1) ? "succeeded" : "failed");
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- };
- while (pres != NULL)
- {
- if (pres->rs_defin != NULL)
- pname = pres->rs_defin->rs_name;
- else
- pname = NULL;
- if (LOGLEVEL >= 2)
- {
- sprintf(log_buffer, "setting limit for attribute '%s'",
- (pname != NULL) ? pname : "NULL");
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- log_buffer[0] = '\0';
- }
- assert(pres->rs_defin != NULL);
- assert(pname != NULL);
- assert(pname[0] != '\0');
- if (!strcmp(pname, "cput"))
- {
- if (igncput == FALSE)
- {
- /* cpu time - check, if less than pcput use it */
- retval = mm_gettime(pres, &value);
- if (retval != PBSE_NONE)
- {
- sprintf(log_buffer, "cput mm_gettime failed in %s", __func__);
- return(error(pname, retval));
- }
- }
- }
- else if (!strcmp(pname, "pcput"))
- {
- if (igncput == FALSE)
- {
- if (set_mode == SET_LIMIT_SET)
- {
- /* process cpu time - set */
- retval = mm_gettime(pres, &value);
- if (retval != PBSE_NONE)
- {
- sprintf(log_buffer, "pcput mm_gettime failed in %s", __func__);
- return(error(pname, retval));
- }
- reslim.rlim_cur = reslim.rlim_max =
- (unsigned long)((double)value / cputfactor);
- if (LOGLEVEL >= 2)
- {
- sprintf(log_buffer, "setting cpu time limit to %ld for job %s",
- (long int)reslim.rlim_cur,
- pjob->ji_qs.ji_jobid);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- log_buffer[0] = '\0';
- }
- /* NOTE: some versions of linux have a bug which causes the parent
- process to receive a SIGKILL if the child's cpu limit is exceeded */
- if (setrlimit(RLIMIT_CPU, &reslim) < 0)
- {
- sprintf(log_buffer, "setrlimit for RLIMIT_CPU failed in %s, errno=%d (%s)",
- __func__,
- errno, strerror(errno));
- return(error("RLIMIT_CPU", PBSE_SYSTEM));
- }
- } /* END if (set_mode == SET_LIMIT_SET) */
- }
- }
- else if (!strcmp(pname, "file"))
- {
- /* set */
- if (set_mode == SET_LIMIT_SET)
- {
- retval = mm_getsize(pres, &value);
- if (retval != PBSE_NONE)
- {
- sprintf(log_buffer, "mm_getsize() failed for file in %s",
- __func__);
- return(error(pname, retval));
- }
- if (value > ULONG_MAX)
- {
- if (LOGLEVEL >= 0)
- {
- sprintf(log_buffer, "cannot set file limit to %ld for job %s (value too large)",
- (long int)reslim.rlim_cur,
- pjob->ji_qs.ji_jobid);
- log_err(-1, __func__, log_buffer);
- log_buffer[0] = '\0';
- }
- return(error(pname, PBSE_BADATVAL));
- }
- reslim.rlim_cur = reslim.rlim_max = value;
- if (setrlimit(RLIMIT_FSIZE, &reslim) < 0)
- {
- sprintf(log_buffer, "cannot set file limit to %ld for job %s (setrlimit failed - check default user limits)",
- (long int)reslim.rlim_max,
- pjob->ji_qs.ji_jobid);
- log_err(errno, __func__, log_buffer);
- log_buffer[0] = '\0';
- return(error(pname, PBSE_SYSTEM));
- }
- }
- }
- else if (!strcmp(pname, "vmem"))
- {
- if (ignvmem == FALSE)
- {
- /* check */
- retval = mm_getsize(pres, &value);
- if (retval != PBSE_NONE)
- {
- sprintf(log_buffer, "mm_getsize() failed for vmem in %s", __func__);
- return(error(pname, retval));
- }
- if ((vmem_limit == 0) || (value < vmem_limit))
- vmem_limit = value;
- }
- }
- else if (!strcmp(pname, "pvmem"))
- {
- if (ignvmem == FALSE)
- {
- /* set */
- if (set_mode == SET_LIMIT_SET)
- {
- retval = mm_getsize(pres, &value);
- if (retval != PBSE_NONE)
- {
- sprintf(log_buffer, "mm_getsize() failed for pvmem in %s",
- __func__);
- return(error(pname, retval));
- }
- if (value > ULONG_MAX)
- {
- log_buffer[0] = '\0';
- sprintf(log_buffer, "invalid value returned by mm_getsize() for pvmem in %s",
- __func__);
- return(error(pname, PBSE_BADATVAL));
- }
- if ((vmem_limit == 0) || (value < vmem_limit))
- vmem_limit = value;
- }
- }
- }
- else if ((!strcmp(pname,"mem") && (pjob->ji_numnodes != 1)) ||
- !strcmp(pname,"mppmem"))
- {
- /* ignore. If we ever get rid of support for the UNICOS OS then we can
- remove the ATR_DFLAG_MOM | ATR_DFLAG_ALTRUN flags from mppmem */
- }
- else if ((!strcmp(pname, "mem") && (pjob->ji_numnodes == 1)) ||
- !strcmp(pname, "pmem"))
- {
- if (ignmem == FALSE)
- {
- /* set */
- if (set_mode == SET_LIMIT_SET)
- {
- retval = mm_getsize(pres, &value);
- if (retval != PBSE_NONE)
- {
- sprintf(log_buffer, "mm_getsize() failed for mem/pmem in %s",
- __func__);
- return(error(pname, retval));
- }
- reslim.rlim_cur = reslim.rlim_max = value;
- if (setrlimit(RLIMIT_DATA, &reslim) < 0)
- {
- sprintf(log_buffer, "cannot set data limit to %ld for job %s (setrlimit failed w/errno=%d (%s) - check default user limits)",
- (long int)reslim.rlim_max,
- pjob->ji_qs.ji_jobid,
- errno,
- strerror(errno));
- return(error("RLIMIT_DATA", PBSE_SYSTEM));
- }
- if (setrlimit(RLIMIT_RSS, &reslim) < 0)
- {
- sprintf(log_buffer, "cannot set RSS limit to %ld for job %s (setrlimit failed w/errno=%d (%s) - check default user limits)",
- (long int)reslim.rlim_max,
- pjob->ji_qs.ji_jobid,
- errno,
- strerror(errno));
- return(error("RLIMIT_RSS", PBSE_SYSTEM));
- }
- #ifdef __GATECH
- /* NOTE: best patch may be to change to 'vmem_limit = value;' */
- if (setrlimit(RLIMIT_STACK, &reslim) < 0)
- {
- sprintf(log_buffer, "cannot set stack limit to %ld for job %s (setrlimit failed w/errno=%d (%s) - check default user limits)",
- (long int)reslim.rlim_max,
- pjob->ji_qs.ji_jobid,
- errno,
- strerror(errno));
- return(error("RLIMIT_STACK", PBSE_SYSTEM));
- }
- /* set address space */
- if (setrlimit(RLIMIT_AS, &reslim) < 0)
- {
- sprintf(log_buffer, "cannot set AS limit to %ld for job %s (setrlimit failed w/errno=%d (%s) - check default user limits)",
- (long int)reslim.rlim_max,
- pjob->ji_qs.ji_jobid,
- errno,
- strerror(errno));
- return(error("RLIMIT_AS", PBSE_SYSTEM));
- }
- #endif /* __GATECH */
- mem_limit = value;
- if (getrlimit(RLIMIT_STACK, &reslim) >= 0)
- {
- /* NOTE: mem_limit no longer used with UMU patch in place */
- mem_limit = value + reslim.rlim_cur;
- }
- }
- }
- } /* END else if (!strcmp(pname,"mem") && ... */
- else if (!strcmp(pname, "walltime"))
- {
- /* check */
- retval = mm_gettime(pres, &value);
- if (retval != PBSE_NONE)
- {
- sprintf(log_buffer, "mm_gettime() failed for walltime in %s\n",
- __func__);
- return(error(pname, retval));
- }
- }
- else if (!strcmp(pname, "nice"))
- {
- /* set nice */
- if (set_mode == SET_LIMIT_SET)
- {
- errno = 0;
- if ((nice((int)pres->rs_value.at_val.at_long) == -1) && (errno != 0))
- {
- sprintf(log_buffer, "nice() failed w/errno=%d (%s) in %s\n",
- errno,
- strerror(errno),
- __func__);
- return(error(pname, PBSE_BADATVAL));
- }
- }
- }
- else if (!strcmp(pname, "size"))
- {
- /* ignore */
- /* NO-OP */
- }
- else if (!strcmp(pname, "prologue"))
- {
- }
- else if (!strcmp(pname, "epilogue"))
- {
- }
- else if ((!strcmp(pname, "mppdepth")) ||
- (!strcmp(pname, "mppnodect")) ||
- (!strcmp(pname, "mppwidth")) ||
- (!strcmp(pname, "mppnppn")) ||
- (!strcmp(pname, "mppnodes")) ||
- (!strcmp(pname, "mpplabels")) ||
- (!strcmp(pname, "mpparch")) ||
- (!strcmp(pname, "mpplabel")))
- {
- /* NO-OP */
- }
- else if ((pres->rs_defin->rs_flags & ATR_DFLAG_RMOMIG) == 0)
- {
- /* don't recognize and not marked as ignore by mom */
- sprintf(log_buffer, "do not know how to process resource '%s' in %s\n",
- pname,
- __func__);
- return(error(pname, PBSE_UNKRESC));
- }
- pres = (resource *)GET_NEXT(pres->rs_link);
- }
- if (set_mode == SET_LIMIT_SET)
- {
- /* if either of vmem or pvmem was given, set sys limit to lesser */
- if (vmem_limit != 0)
- {
- /* Don't make (p)vmem < pmem */
- if (mem_limit > vmem_limit)
- {
- vmem_limit = mem_limit;
- }
- reslim.rlim_cur = reslim.rlim_max = vmem_limit;
- if ((ignvmem == 0) && (setrlimit(RLIMIT_AS, &reslim) < 0))
- {
- sprintf(log_buffer, "setrlimit() failed setting AS for vmem_limit mod in %s\n",
- __func__);
- return(error("RLIMIT_AS", PBSE_SYSTEM));
- }
- /* UMU vmem patch sets RLIMIT_AS rather than RLIMIT_DATA and RLIMIT_STACK */
- /*
- reslim.rlim_cur = reslim.rlim_max = mem_limit;
- if (setrlimit(RLIMIT_DATA,&reslim) < 0)
- {
- sprintf(log_buffer,"setrlimit() failed setting data for vmem_limit mod in %s\n",
- id);
- return(error("RLIMIT_DATA",PBSE_SYSTEM));
- }
- if (setrlimit(RLIMIT_STACK,&reslim) < 0)
- {
- sprintf(log_buffer,"setrlimit() failed setting stack for vmem_limit mod in %s\n",
- id);
- return(error("RLIMIT_STACK",PBSE_SYSTEM));
- }
- */
- }
- }
- if (LOGLEVEL >= 5)
- {
- sprintf(log_buffer, "%s(%s,%s) completed",
- __func__,
- (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
- (set_mode == SET_LIMIT_SET) ? "set" : "alter");
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- log_buffer[0] = '\0';
- }
- /* SUCCESS */
- return(PBSE_NONE);
- } /* END mom_set_limits() */
- /*
- * State whether MOM main loop has to poll this job to determine if some
- * limits are being exceeded.
- *
- * Sets flag TRUE if polling is necessary, FALSE otherwise. Actual
- * polling is done using the mom_over_limit machine-dependent function.
- */
- int mom_do_poll(
- job *pjob) /* I */
- {
- const char *pname;
- resource *pres;
- assert(pjob != NULL);
- if (LOGLEVEL >= 4)
- {
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- pjob->ji_qs.ji_jobid,
- "evaluating limits for job");
- }
- assert(pjob != NULL);
- assert(pjob->ji_wattr[JOB_ATR_resource].at_type == ATR_TYPE_RESC);
- pres = (resource *)GET_NEXT(
- pjob->ji_wattr[JOB_ATR_resource].at_val.at_list);
- while (pres != NULL)
- {
- assert(pres->rs_defin != NULL);
- pname = pres->rs_defin->rs_name;
- assert(pname != NULL);
- assert(*pname != '\0');
- if (strcmp(pname, "walltime") == 0 ||
- strcmp(pname, "cput") == 0 ||
- strcmp(pname, "pcput") == 0 ||
- strcmp(pname, "mem") == 0 ||
- strcmp(pname, "pvmem") == 0 ||
- strcmp(pname, "vmem") == 0)
- {
- return(TRUE);
- }
- pres = (resource *)GET_NEXT(pres->rs_link);
- }
- return(FALSE);
- } /* END mom_do_poll() */
- /*
- * Setup for polling.
- *
- * Open kernel device and get namelist info.
- */
- int mom_open_poll(void)
- {
- if (LOGLEVEL >= 6)
- {
- log_record(PBSEVENT_SYSTEM, 0, __func__, "started");
- }
- pagesize = getpagesize();
- proc_array = (proc_stat_t *)calloc(TBL_INC, sizeof(proc_stat_t));
- if (proc_array == NULL)
- {
- log_err(errno, __func__, "calloc");
- return(PBSE_SYSTEM);
- }
- max_proc = TBL_INC;
- return(PBSE_NONE);
- } /* END mom_open_poll() */
- /*
- * Declare start of polling loop.
- *
- * This function caches information about all of processes
- * on the compute node (pbs_mom calls this function). Each process
- * in /proc/ is queried by looking at the 'stat' file. Statistics like
- * CPU usage time, memory consumption, etc. are gathered in the proc_array
- * list. This list is then used throughout the pbs_mom to get information
- * about tasks it is monitoring.
- *
- * This function is called from the main MOM loop once every "check_poll_interval"
- * seconds.
- *
- * @see get_proc_stat() - child
- * @see mom_set_use() - Aggregates data collected here
- *
- * NOTE: populates global 'proc_array[]' variable.
- * NOTE: reallocs proc_array[] as needed to accomodate processes.
- *
- * @see mom_open_poll() - allocs proc_array table.
- * @see mom_close_poll() - frees procs_array.
- * @see setup_program_environment() - parent - called at pbs_mom start
- * @see main_loop() - parent - called once per iteration
- * @see mom_set_use() - populate job structure with usage data for local use or to send to mother superior
- */
- int mom_get_sample(void)
- {
- proc_stat_t *pi;
- proc_stat_t *ps;
- pid_t pid;
- #ifdef PENABLE_LINUX26_CPUSETS
- struct pidl *pids = NULL;
- struct pidl *pp;
- #else
- struct dirent *dent;
- #endif
- if (proc_array == NULL)
- mom_open_poll();
- nproc = 0;
- pi = proc_array;
- if (LOGLEVEL >= 6)
- {
- log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, __func__, "proc_array load started");
- }
- #ifdef PENABLE_LINUX26_CPUSETS
- /* Instead of collect stats of all processes running on a large SMP system,
- * collect stats of processes running in and below the Torque cpuset, only
- * This relies on reliable process starters for MPI, which bind their tasks
- * to the cpuset of the job. */
- #ifdef USELIBCPUSET
- pids = get_cpuset_pidlist(TTORQUECPUSET_BASE, pids);
- #else
- pids = get_cpuset_pidlist(TTORQUECPUSET_PATH, pids);
- #endif
- pp = pids;
- while (pp != NULL)
- {
- pid = pp->pid;
- pp = pp->next;
- #else
- if (pdir == NULL)
- {
- if ((pdir = opendir(procfs)) == NULL)
- return(PBSE_SYSTEM);
- }
-
- rewinddir(pdir);
- while ((dent = readdir(pdir)) != NULL)
- {
- if (!isdigit(dent->d_name[0]))
- continue;
- pid = atoi(dent->d_name);
- #endif
- if ((ps = get_proc_stat(pid)) == NULL)
- {
- if (errno != ENOENT)
- {
- sprintf(log_buffer, "%d: get_proc_stat", pid);
- log_err(errno, __func__, log_buffer);
- }
- continue;
- }
- /* nproc++; -- we need to increment AFTER assigning this ps to
- the proc_array--otherwise we could skip it in for loops */
- if ((nproc + 1) >= max_proc)
- {
- proc_stat_t *hold;
- if (LOGLEVEL >= 9)
- {
- log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, __func__, "alloc more proc_array");
- }
- max_proc *= 2;
- hold = (proc_stat_t *)calloc(1, max_proc * sizeof(proc_stat_t));
- if (hold == NULL)
- {
- log_err(errno, __func__, "unable to realloc space for proc_array sample");
- return(PBSE_SYSTEM);
- }
- memcpy(hold, proc_array, sizeof(proc_stat_t) * max_proc / 2);
- free(proc_array);
- proc_array = hold;
- } /* END if ((nproc+1) == max_proc) */
- pi = &proc_array[nproc++];
- memcpy(pi, ps, sizeof(proc_stat_t));
- } /* END while (...) != NULL) */
- #ifdef PENABLE_LINUX26_CPUSETS
- free_pidlist(pids);
- #endif
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loaded - nproc=%d",
- nproc);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- return(PBSE_NONE);
- } /* END mom_get_sample() */
- /*
- * Measure job resource usage and compare with its limits.
- *
- * If it has exceeded any well-formed polled limit return the limit that
- * it exceeded.
- * Otherwise, return PBSE_NONE. log_buffer is populated with failure.
- */
- int mom_over_limit(
- job *pjob) /* I */
- {
- const char *pname;
- int retval;
- unsigned long value;
- unsigned long num;
- unsigned long long numll;
- resource *pres;
- assert(pjob != NULL);
- assert(pjob->ji_wattr[JOB_ATR_resource].at_type == ATR_TYPE_RESC);
- pres = (resource *)GET_NEXT(
- pjob->ji_wattr[JOB_ATR_resource].at_val.at_list);
- for (;pres != NULL;pres = (resource *)GET_NEXT(pres->rs_link))
- {
- assert(pres->rs_defin != NULL);
- pname = pres->rs_defin->rs_name;
- assert(pname != NULL);
- assert(*pname != '\0');
- if ((igncput == FALSE) && (strcmp(pname, "cput") == 0))
- {
- retval = mm_gettime(pres, &value);
- if (retval != PBSE_NONE)
- continue;
- if ((num = cput_sum(pjob)) > value)
- {
- sprintf(log_buffer, "cput %lu exceeded limit %lu",
- num,
- value);
- return(JOB_EXEC_OVERLIMIT_CPUT);
- }
- }
- else if ((igncput == FALSE) && (strcmp(pname, "pcput") == 0))
- {
- retval = mm_gettime(pres, &value);
- if (retval != PBSE_NONE)
- continue;
- if (overcpu_proc(pjob, value))
- {
- sprintf(log_buffer, "pcput exceeded limit %lu",
- value);
- return(JOB_EXEC_OVERLIMIT_CPUT);
- }
- }
- else if (strcmp(pname, "vmem") == 0)
- {
- retval = mm_getsize(pres, &value);
- if (retval != PBSE_NONE)
- continue;
- if ((ignvmem == 0) && ((numll = mem_sum(pjob)) > value))
- {
- sprintf(log_buffer, "vmem %llu exceeded limit %lu",
- numll,
- value);
- return(JOB_EXEC_OVERLIMIT_MEM);
- }
- }
- else if (strcmp(pname, "pvmem") == 0)
- {
- unsigned long long valuell;
- retval = mm_getsize(pres, &value);
- if (retval != PBSE_NONE)
- continue;
- valuell = (unsigned long long)value;
- if ((ignvmem == 0) && (overmem_proc(pjob, valuell)))
- {
- sprintf(log_buffer, "pvmem exceeded limit %llu",
- valuell);
- return(JOB_EXEC_OVERLIMIT_MEM);
- }
- }
- else if (ignwalltime == 0 && strcmp(pname, "walltime") == 0)
- {
- /* no need to check walltime on sisters, MS will get it */
- if (am_i_mother_superior(*pjob) == false)
- continue;
- retval = mm_gettime(pres, &value);
- if (retval != PBSE_NONE)
- continue;
- num = (unsigned long)((double)(time_now - pjob->ji_qs.ji_stime) *
- wallfactor);
- if (num > value)
- {
- sprintf(log_buffer, "walltime %ld exceeded limit %ld",
- num,
- value);
- return(JOB_EXEC_OVERLIMIT_WT);
- }
- }
- } /* END for (pres) */
- #ifdef PENABLE_LINUX26_CPUSETS
- /* Check memory_pressure */
- if (memory_pressure_threshold > 0)
- {
- /*
- * If last recorded memory_pressure is over threshold, increment counter.
- * If duration is enabled, throw over_limit if counter reaches duration.
- */
- if (pjob->ji_mempressure_curr < memory_pressure_threshold)
- {
- pjob->ji_mempressure_cnt = 0; /* reset */
- }
- else
- {
- pjob->ji_mempressure_cnt++; /* count */
- sprintf(log_buffer, "job %s memory_pressure is over %d for %d (%d) cycles",
- pjob->ji_qs.ji_jobid,
- memory_pressure_threshold,
- pjob->ji_mempressure_cnt,
- memory_pressure_duration);
- log_ext(-1, __func__, log_buffer,LOG_ALERT);
- if (memory_pressure_duration && (pjob->ji_mempressure_cnt >= memory_pressure_duration))
- {
- sprintf(log_buffer, "swap rate due to memory oversubscription is too high");
- return(JOB_EXEC_OVERLIMIT_MEM);
- }
- }
- }
- #endif
- return(PBSE_NONE);
- } /* END mom_over_limit() */
- /*
- * job_expected_resc_found: logs an error if an expected resource was not found
- */
- int job_expected_resc_found(
- const resource *pres,
- const resource_def *rd,
- const char *jobid)
- {
- if (!pres)
- {
- char log_buf[2048];
- snprintf(log_buf, sizeof(log_buf), "job %s missing expected resource %s for resource usage calculation",
- jobid, rd->rs_name);
- log_err(-1, __func__, log_buf);
- return -1;
- }
- return PBSE_NONE;
- }
- /*
- * Update the job attribute for resources used.
- *
- * The first time this function is called for a job,
- * it sets up resource entries for
- * each resource that can be reported for this machine.
- *
- * Subsequent calls update the resource usage information based on
- * stats gathered by the mom_get_sample() function. This function
- * is often called by "im_request()" as a result of POLL_JOB query
- * from the mother superior.
- *
- * @see im_request() - parent - respond to poll_job request from mother superior
- * @see examine_all_running_jobs() - parent - update local use on mother superior
- * @see TMomFinalizeJob1() - parent - update serial job immediately at job start
- *
- * @return An error code if something goes wrong.
- */
- int mom_set_use(
- job *pjob) /* I (modified) */
- {
- resource *pres;
- pbs_attribute *at;
- resource_def *rd;
- unsigned long *lp;
- unsigned long lnum;
- #ifdef PENABLE_LINUX26_CPUSETS
- int inum;
- #endif
- assert(pjob != NULL);
- at = &pjob->ji_wattr[JOB_ATR_resc_used];
- assert(at->at_type == ATR_TYPE_RESC);
- #ifdef USESAVEDRESOURCES
- /* don't update jobs that are marked as recovery */
- if (pjob->ji_flags & MOM_JOB_RECOVERY)
- {
- return(PBSE_NONE);
- }
- #endif /* USESAVEDRESOURCES */
- at->at_flags |= ATR_VFLAG_MODIFY;
- if ((at->at_flags & ATR_VFLAG_SET) == 0)
- {
- /* initialize usage structures */
- at->at_flags |= ATR_VFLAG_SET;
- rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);
- assert(rd != NULL);
- pres = add_resource_entry(at, rd);
- pres->rs_value.at_flags |= ATR_VFLAG_SET;
- pres->rs_value.at_type = ATR_TYPE_LONG;
- rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size);
- assert(rd != NULL);
- pres = add_resource_entry(at, rd);
- pres->rs_value.at_flags |= ATR_VFLAG_SET;
- pres->rs_value.at_type = ATR_TYPE_SIZE;
- pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */
- pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;
- rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);
- assert(rd != NULL);
- pres = add_resource_entry(at, rd);
- pres->rs_value.at_flags |= ATR_VFLAG_SET;
- pres->rs_value.at_type = ATR_TYPE_LONG;
- rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);
- assert(rd != NULL);
- pres = add_resource_entry(at, rd);
- pres->rs_value.at_flags |= ATR_VFLAG_SET;
- pres->rs_value.at_type = ATR_TYPE_SIZE;
- pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */
- pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;
- } /* END if ((at->at_flags & ATR_VFLAG_SET) == 0) */
- /* get cputime */
- rd = find_resc_def(svr_resc_def, "cput", svr_resc_size);
- assert(rd != NULL);
- pres = find_resc_entry(at, rd);
- if (job_expected_resc_found(pres, rd, pjob->ji_qs.ji_jobid))
- return -1;
- lp = (unsigned long *) & pres->rs_value.at_val.at_long;
- lnum = cput_sum(pjob);
- *lp = MAX(*lp, lnum);
- /* get swap */
- rd = find_resc_def(svr_resc_def, "vmem", svr_resc_size);
- assert(rd != NULL);
- pres = find_resc_entry(at, rd);
- if (job_expected_resc_found(pres, rd, pjob->ji_qs.ji_jobid))
- return -1;
- lp = &pres->rs_value.at_val.at_size.atsv_num;
- lnum = (mem_sum(pjob) + 1023) >> pres->rs_value.at_val.at_size.atsv_shift; /* as KB */
- *lp = MAX(*lp, lnum);
- /* get walltime */
- rd = find_resc_def(svr_resc_def, "walltime", svr_resc_size);
- assert(rd != NULL);
- pres = find_resc_entry(at, rd);
- if (job_expected_resc_found(pres, rd, pjob->ji_qs.ji_jobid))
- return -1;
- /* NOTE: starting jobs can come through here before stime is recorded */
- if (pjob->ji_qs.ji_stime == 0)
- pres->rs_value.at_val.at_long = 0;
- else
- pres->rs_value.at_val.at_long =
- (long)((double)(time_now - pjob->ji_qs.ji_stime) * wallfactor);
- /* get memory */
- rd = find_resc_def(svr_resc_def, "mem", svr_resc_size);
- assert(rd != NULL);
- pres = find_resc_entry(at, rd);
- if (job_expected_resc_found(pres, rd, pjob->ji_qs.ji_jobid))
- return -1;
- lp = &pres->rs_value.at_val.at_size.atsv_num;
- lnum = (resi_sum(pjob) + 1023) >> pres->rs_value.at_val.at_size.atsv_shift; /* as KB */
- *lp = MAX(*lp, lnum);
- #ifdef PENABLE_LINUX26_CPUSETS
- /* get memory_pressure */
- if (memory_pressure_threshold > 0)
- {
- inum = get_cpuset_mempressure(pjob->ji_qs.ji_jobid);
- /* Store if success */
- if (inum != -1)
- pjob->ji_mempressure_curr = inum;
- /* Alert if there is pressure */
- if (inum > 0)
- {
- sprintf(log_buffer, "job %s causes memory_pressure %d", pjob->ji_qs.ji_jobid, inum);
- log_ext(-1, __func__, log_buffer, LOG_ALERT);
- }
- }
- else
- {
- pjob->ji_mempressure_curr = 0;
- }
- #endif
- return(PBSE_NONE);
- } /* END mom_set_use() */
- /**
- * Kill a task session.
- * Call with the task pointer and a signal number.
- *
- * @return number of tasks signalled (0 = failure)
- *
- * @see kill_job() - parent
- *
- * NOTE: should support killpg() or killpidtree() - (NYI)
- * may be required for suspend/resume
- */
- int kill_task(
- task *ptask, /* I */
- int sig, /* I */
- int pg) /* I (1=signal process group, 0=signal master process only) */
- {
- int ct = 0; /* num of processes killed */
- int ctThisIteration = 0;
- int ctCleanIterations = 0;
- int loopCt = 0;
- int NumProcessesFound = 0; /* number of processes found with session ID */
- #ifdef PENABLE_LINUX26_CPUSETS
- struct pidl *pids = NULL;
- struct pidl *pp;
- #else
- struct dirent *dent;
- #endif
- pid_t pid;
- proc_stat_t *ps;
- int sesid;
- pid_t mompid;
- sesid = ptask->ti_qs.ti_sid;
- mompid = getpid();
- if (LOGLEVEL >= 5)
- {
- sprintf(log_buffer, "%s: sending signal %d to task %d, session %d",
- __func__,
- sig,
- ptask->ti_qs.ti_task,
- sesid);
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- }
- if (sesid <= 1)
- {
- if (LOGLEVEL >= 3)
- {
- sprintf(log_buffer, "cannot send signal %d to task (no session id)",
- sig);
- log_record(
- PBSEVENT_ERROR,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- }
- /* FAILURE */
- return(0);
- }
- do
- {
- ctThisIteration = 0;
- /* NOTE: do not use cached proc-buffer since we need up-to-date info */
- #ifdef PENABLE_LINUX26_CPUSETS
- /* Instead of collecting stats of all processes running on a large SMP system,
- * collect stats of processes running in and below the Torque cpuset, only
- * This relies on reliable process starters for MPI, which bind their tasks
- * to the cpuset of the job. */
-
- #ifdef USELIBCPUSET
- pids = get_cpuset_pidlist(TTORQUECPUSET_BASE, pids);
- #else
- pids = get_cpuset_pidlist(TTORQUECPUSET_PATH, pids);
- #endif /* USELIBCPUSET */
- pp = pids;
- while (pp != NULL)
- {
- pid = pp->pid;
- pp = pp->next;
- #else
- if (pdir == NULL)
- {
- if ((pdir = opendir(procfs)) == NULL)
- return(PBSE_SYSTEM);
- }
- /* pdir is global */
- rewinddir(pdir);
- while ((dent = readdir(pdir)) != NULL)
- {
- if (!isdigit(dent->d_name[0]))
- continue;
- pid = atoi(dent->d_name);
- #endif /* PENABLE_LINUX26_CPUSETS */
- if ((ps = get_proc_stat(pid)) == NULL)
- {
- if (errno != ENOENT)
- {
- sprintf(log_buffer, "%d: get_proc_stat", pid);
- log_err(errno, __func__, log_buffer);
- }
- continue;
- }
- if ((sesid == ps->session) ||
- (ProcIsChild(procfs,pid,ptask->ti_job->ji_qs.ji_jobid) == TRUE))
- {
- NumProcessesFound++;
- if ((ps->state == 'Z') || (ps->pid == 0))
- {
- /*
- * Killing a zombie is sure death! Its pid is zero,
- * which to kill(2) means 'every process in the process
- * group of the current process'.
- */
- sprintf(log_buffer, "%s: not killing process (pid=%d/state=%c) with sig %d",
- __func__,
- ps->pid,
- ps->state,
- sig);
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- } /* END if ((ps->state == 'Z') || (ps->pid == 0)) */
- else
- {
- int i = 0;
- if (ps->pid == mompid)
- {
- /*
- * there is a race condition with newly started jobs that
- * can be killed before they've established their own
- * session id. This means the child tasks still have MOM's
- * session id. We check this to make sure MOM doesn't kill
- * herself.
- */
- if (LOGLEVEL >= 3)
- {
- sprintf(log_buffer, "%s: not killing process %d. Avoid sending signal because child task still has MOM's session id", __func__, ps->pid);
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- }
- if((sig == SIGKILL)||(sig == SIGTERM))
- {
- ++ctThisIteration; //Ultimately this is task that will need to be killed.
- }
- continue;
- } /* END if (ps->pid == mompid) */
- if((sig == SIGKILL)||(sig == SIGTERM))
- {
- ++ctThisIteration; //Only count for killing don't count for any other signal.
- }
- if (sig == SIGKILL)
- {
- struct timespec req;
- req.tv_sec = 0;
- req.tv_nsec = 250000000; /* .25 seconds */
- /* give the process some time to quit gracefully first (up to .25*20=5 seconds) */
- sprintf(log_buffer, "%s: killing pid %d task %d gracefully with sig %d",
- __func__,
- ps->pid,
- ptask->ti_qs.ti_task,
- SIGTERM);
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- if (pg == 0)
- kill(ps->pid, SIGTERM);
- else
- killpg(ps->pid, SIGTERM);
- for (i = 0;i < 20;i++)
- {
- /* check if process is gone */
- if ((ps = get_proc_stat(ps->pid)) == NULL)
- {
- break;
- }
- else
- {
- sprintf(log_buffer, "%s: process (pid=%d/state=%c) after sig %d",
- __func__,
- ps->pid,
- ps->state,
- SIGTERM);
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- if (ps->state == 'Z')
- break;
- }
- /* try to kill again */
- if (kill(ps->pid, 0) == -1)
- break;
- nanosleep(&req, NULL);
- } /* END for (i = 0) */
- } /* END if (sig == SIGKILL) */
- else
- {
- i = 20;
- }
- if (i >= 20)
- {
- /* NOTE: handle race-condition where process goes zombie as a result of previous SIGTERM */
- /* update proc info from /proc/<PID>/stat */
- if ((ps = get_proc_stat(ps->pid)) != NULL)
- {
- if (ps->state == 'Z')
- {
- /*
- * Killing a zombie is sure death! Its pid is zero,
- * which to kill(2) means 'every process in the process
- * group of the current process'.
- */
- sprintf(log_buffer, "%s: not killing process (pid=%d/state=%c) with sig %d",
- __func__,
- ps->pid,
- ps->state,
- sig);
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- } /* END if ((ps->state == 'Z') || (ps->pid == 0)) */
- else
- {
- /* kill process hard */
- /* why is this not killing with SIGKILL? */
- sprintf(log_buffer, "%s: killing pid %d task %d with sig %d",
- __func__,
- ps->pid,
- ptask->ti_qs.ti_task,
- sig);
- log_record(
- PBSEVENT_JOB,
- PBS_EVENTCLASS_JOB,
- ptask->ti_job->ji_qs.ji_jobid,
- log_buffer);
- if (pg == 0)
- kill(ps->pid, sig);
- else
- killpg(ps->pid, sig);
- }
- } /* END if ((ps = get_proc_stat(ps->pid)) != NULL) */
- } /* END if (i >= 20) */
- ++ct;
- } /* END else ((ps->state == 'Z') || (ps->pid == 0)) */
- } /* END if (sesid == ps->session) */
- } /* END while (...) != NULL) */
- #ifdef PENABLE_LINUX26_CPUSETS
- free_pidlist(pids);
- pids = NULL;
- #endif
- if(ctThisIteration == 0)
- {
- ctCleanIterations++;
- }
- else
- {
- ctCleanIterations=0;
- }
- }while((ctCleanIterations <= 5)&&(loopCt++ < 20));
- /* NOTE: to fix bad state situations resulting from a hard crash, the logic
- below should be triggered any time no processes are found (NYI) */
- if (IS_ADOPTED_TASK(ptask->ti_qs.ti_task) && (NumProcessesFound == 0))
- {
- /* no process was found, but for an adopted task this is OK (we don't find
- * out about the adopted task's termination via waitpid()--so we can safely
- * say that we have "killed" the task, even though the task was killed/died
- * some other way */
- ct++;
- /* do code to mark task as finished (borrowed from Linux scan_for_terminated())... */
- ptask->ti_qs.ti_exitstat = 0; /* assume successful completion */
- ptask->ti_qs.ti_status = TI_STATE_EXITED;
- task_save(ptask);
- sprintf(log_buffer,
- "%s: job %s adopted task %d was marked as terminated because task's PID was no longer found, sid=%d",
- __func__,
- ptask->ti_job->ji_qs.ji_jobid,
- ptask->ti_qs.ti_task,
- ptask->ti_qs.ti_sid);
- log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, ptask->ti_job->ji_qs.ji_jobid, log_buffer);
- }
- if ((NumProcessesFound == 0) && (ct <= 0))
- {
- /* we can't find any processes belonging to given session, so we can safely say
- * that we "killed" the task and have TORQUE clean it up */
- ct++;
- /* do code to mark task as finished (borrowed from Linux scan_for_terminated())... */
- ptask->ti_qs.ti_exitstat = 0; /* assume successful completion */
- ptask->ti_qs.ti_status = TI_STATE_EXITED;
- task_save(ptask);
- if (LOGLEVEL >= 5)
- {
- sprintf(log_buffer,
- "%s: could not send signal %d to task %d (session %d)--no process was found with this session ID (marking task as killed)!",
- __func__,
- sig,
- ptask->ti_qs.ti_task,
- sesid);
- log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, ptask->ti_job->ji_qs.ji_jobid, log_buffer);
- }
- }
- /* SUCCESS */
- return(ct);
- } /* END kill_task() */
- /*
- * Clean up everything related to polling.
- */
- int mom_close_poll(void)
- {
- if (LOGLEVEL >= 6)
- {
- log_record(PBSEVENT_SYSTEM, 0, __func__, "entered");
- }
- if (pdir != NULL)
- {
- if (closedir(pdir) != 0)
- {
- log_err(errno, __func__, "closedir");
- return(PBSE_SYSTEM);
- }
- pdir = NULL;
- }
- if (proc_array != NULL)
- {
- free(proc_array);
- proc_array = NULL;
- nproc = 0;
- max_proc = TBL_INC;
- }
- return(PBSE_NONE);
- } /* END mom_close_poll() */
- /*
- * mom_does_checkpoint
- *
- * @returns CST values as described in resmon.h.
- */
- int mom_does_checkpoint(void)
- {
- return(CST_BLCR); /* Use the BLCR checkpointing system. */
- }
- /*
- * Checkpoint the job.
- *
- * If abort is true, kill it too.
- */
- int mach_checkpoint(
- task *ptask, /* I */
- char *file, /* I */
- int abort) /* I */
- {
- return(-1);
- } /* END mach_checkpoint() */
- /*
- * Restart the job from the checkpoint file.
- *
- * Return -1 on error or sid if okay.
- */
- long mach_restart(
- task *ptask,
- char *file)
- {
- return(-1);
- }
- #define dsecs(val) ( (double)(val) )
- char *cput_job(
- pid_t jobid)
- {
- int found = 0;
- int i;
- double cputime, addtime;
- proc_stat_t *ps;
- cputime = 0.0;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loop start - jobid = %d",
- jobid);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (jobid != ps->session)
- continue;
- found = 1;
- /* add utime and stime (AKE) */
- addtime =
- dsecs(ps->utime) +
- dsecs(ps->stime) +
- dsecs(ps->cutime) +
- dsecs(ps->cstime);
- cputime += addtime;
- DBPRT(("%s: total %.2f pid %d %.2f\n",
- __func__,
- cputime,
- ps->pid,
- addtime))
- } /* END for (i) */
- if (!found)
- {
- rm_errno = RM_ERR_EXIST;
- return(NULL);
- }
- sprintf(ret_string, "%.2f",
- cputime * cputfactor);
- return(ret_string);
- } /* END cput_job() */
- char *cput_proc(
- pid_t pid)
- {
- double cputime;
- proc_stat_t *ps;
- cputime = 0.0;
- if ((ps = get_proc_stat(pid)) == NULL)
- {
- if (errno != ENOENT)
- {
- sprintf(log_buffer, "%d: get_proc_stat",
- pid);
- log_err(errno, __func__, log_buffer);
- }
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- cputime = dsecs(ps->utime) + dsecs(ps->stime);
- sprintf(ret_string, "%.2f",
- cputime * cputfactor);
- return(ret_string);
- } /* END cput_proc() */
- const char *cput(
- struct rm_attribute *attrib)
- {
- int value;
- if (attrib == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if ((value = atoi(attrib->a_value)) == 0)
- {
- sprintf(log_buffer, "bad param: %s",
- attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (momgetattr(NULL))
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (strcmp(attrib->a_qualifier, "session") == 0)
- {
- return(cput_job((pid_t)value));
- }
- if (strcmp(attrib->a_qualifier, "proc") == 0)
- {
- return(cput_proc((pid_t)value));
- }
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- } /* END cput() */
- char *mem_job(
- pid_t sid) /* I */
- {
- unsigned long long memsize;
- int i;
- proc_stat_t *ps;
- /* max memsize ??? */
- memsize = 0;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loop start - sid = %d",
- sid);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (sid != ps->session)
- continue;
- memsize += ps->vsize;
- } /* END for (i) */
- if (memsize == 0)
- {
- rm_errno = RM_ERR_EXIST;
- return(NULL);
- }
- sprintf(ret_string, "%llukb",
- memsize >> 10); /* KB */
- return(ret_string);
- } /* END mem_job() */
- char *mem_proc(
- pid_t pid)
- {
- proc_stat_t *ps;
- if ((ps = get_proc_stat(pid)) == NULL)
- {
- if (errno != ENOENT)
- {
- sprintf(log_buffer, "%d: get_proc_stat",
- pid);
- log_err(errno, __func__, log_buffer);
- }
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- sprintf(ret_string, "%llukb",
- (unsigned long long)ps->vsize >> 10); /* KB */
- return(ret_string);
- } /* END mem_proc() */
- const char *mem(
- struct rm_attribute *attrib)
- {
- int value;
- if (attrib == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if ((value = atoi(attrib->a_value)) == 0)
- {
- sprintf(log_buffer, "bad param: %s",
- attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (momgetattr(NULL))
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (strcmp(attrib->a_qualifier, "session") == 0)
- {
- return(mem_job((pid_t)value));
- }
- else if (strcmp(attrib->a_qualifier, "proc") == 0)
- {
- return(mem_proc((pid_t)value));
- }
- else
- {
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- return(NULL);
- } /* END mem() */
- static char *resi_job(
- pid_t jobid)
- {
- int i;
- int found = 0;
- unsigned long long resisize;
- proc_stat_t *ps;
- #ifdef USELIBMEMACCT
- long long w_rss;
- #endif
- resisize = 0;
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "proc_array loop start - jobid = %d",
- jobid);
- log_record(PBSEVENT_DEBUG, 0, __func__, log_buffer);
- }
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (jobid != ps->session)
- continue;
- found = 1;
- #ifdef USELIBMEMACCT
- /* Ask memacctd for weighted rss of pid, use this instead of ps->rss */
- w_rss = get_memacct_resi(ps->pid);
- if (w_rss == -1)
- resisize += ps->rss * pagesize;
- else
- resisize += w_rss;
- #else
- resisize += ps->rss;
- #endif
- } /* END for (i) */
- if (found)
- {
- /* in KB */
- #ifdef USELIBMEMACCT
- sprintf(ret_string, "%llukb", resisize >> 10);
- #else
- sprintf(ret_string, "%llukb",
- (resisize * (unsigned long long)pagesize) >> 10);
- #endif
- return(ret_string);
- }
- rm_errno = RM_ERR_EXIST;
- return(NULL);
- } /* END resi_job() */
- static char *resi_proc(
- pid_t pid)
- {
- proc_stat_t *ps;
- #ifdef USELIBMEMACCT
- long long w_rss;
- #endif
- if ((ps = get_proc_stat(pid)) == NULL)
- {
- if (errno != ENOENT)
- {
- sprintf(log_buffer, "%d: get_proc_stat(PIOCPSINFO)",
- pid);
- log_err(errno, __func__, log_buffer);
- }
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- #ifdef USELIBMEMACCT
-
- /* Ask memacctd for weighted rss of pid, use this instead of ps->rss */
-
- if ((w_rss = get_memacct_resi(ps->pid)) == -1)
- sprintf(ret_string, "%llukb", (ps->rss * (unsigned long long)pagesize) >> 10);
- else
- sprintf(ret_string, "%ldkb", w_rss >> 10);
- #else
- /* in KB */
- sprintf(ret_string, "%lukb",
- ((ulong)ps->rss * (ulong)pagesize) >> 10);
- #endif
- return(ret_string);
- } /* END resi_proc() */
- static const char *resi(
- struct rm_attribute *attrib)
- {
- int value;
- if (attrib == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if ((value = atoi(attrib->a_value)) == 0)
- {
- sprintf(log_buffer,
- "bad param: %s",
- attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (momgetattr(NULL))
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (strcmp(attrib->a_qualifier, "session") == 0)
- {
- return(resi_job((pid_t)value));
- }
- if (strcmp(attrib->a_qualifier, "proc") == 0)
- {
- return(resi_proc((pid_t)value));
- }
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- } /* END resi() */
- const char *sessions(
- struct rm_attribute *attrib) /* I */
- {
- int nsids = 0;
- pid_t sid;
- char *s;
- #ifdef NUMA_SUPPORT
- char mom_check_name[PBS_MAXSERVERNAME];
- job *pjob;
- task *ptask;
- #else
- proc_stat_t *ps;
- struct pidl *sids = NULL, *sl = NULL, *sp;
- int i;
- #endif
- if (attrib != NULL)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- ret_string[0] = '\0';
- #ifdef NUMA_SUPPORT
- /* Initialize the node name to check for for this NUMA node */
- strcpy(mom_check_name, mom_host);
- if ((s = strchr(mom_check_name, '.')) != NULL)
- *s = '\0';
- sprintf(mom_check_name + strlen(mom_check_name), "-%d/", numa_index);
- /* Initialize the return string */
- s = ret_string;
- /* Walk through job list, look for jobs running on this NUMA node */
- for (pjob = (job *)GET_NEXT(svr_alljobs);
- pjob != NULL;
- pjob = (job *)GET_NEXT(pjob->ji_alljobs))
- {
- if (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, mom_check_name) == NULL)
- continue;
- /* Show all tasks registered for this job */
- for (ptask = (task *)GET_NEXT(pjob->ji_tasks);
- ptask != NULL;
- ptask = (task *)GET_NEXT(ptask->ti_jobtask))
- {
- if (ptask->ti_qs.ti_status != TI_STATE_RUNNING)
- continue;
- sid = ptask->ti_qs.ti_sid;
- if (LOGLEVEL >= 9)
- {
- sprintf(log_buffer, "%s[%d]: job %s on %s? sid %d",
- __func__,
- nsids,
- pjob->ji_qs.ji_jobid,
- mom_check_name,
- sid);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- checkret(&s, 100);
- sprintf(s, "%s%d", (ret_string[0] != '\0') ? " " : "", sid);
- s += strlen(s);
- nsids++;
- } /* END for(ptask) */
- } /* END for(pjob) */
- #else
- /* Walk through proc_array, store unique session IDs in the pids list */
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (ps->uid == 0)
- continue;
- if ((sid = ps->session) == 0)
- continue;
- if (LOGLEVEL >= 9)
- {
- sprintf(log_buffer, "%s[%d]: pid %d sid %d", __func__, nsids, ps->pid, sid);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- sp = sids;
- while (sp)
- {
- if (sp->pid == sid) /* found */
- break;
- sp = sp->next;
- }
- if (sp)
- continue;
- /* not found */
- if ((sp = (struct pidl *)calloc(1, sizeof(struct pidl))) == NULL)
- {
- log_err(errno, __func__, "no memory");
- rm_errno = RM_ERR_SYSTEM;
- if (sids)
- free_pidlist(sids);
- return(NULL);
- }
- sp->pid = sid;
- sp->next = NULL;
- nsids++;
- if (sl)
- sl->next = sp;
- else
- sids = sp;
- sl = sp;
- } /* END for(i) */
- /*
- * Assemble return string.
- * Return empty string if no sessions.
- */
- s = ret_string;
- sp = sids;
- while (sp)
- {
- checkret(&s, 100);
- if (sp == sids)
- sprintf(s, "%d", sp->pid);
- else
- sprintf(s, " %d", sp->pid);
- s += strlen(s);
- sp = sp->next;
- } /* END while(sp) */
- /* Done */
- if (sids)
- free_pidlist(sids);
- #endif
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "nsessions=%d", nsids);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- return(ret_string);
- }
- const char *nsessions(
- struct rm_attribute *attrib)
- {
- const char *result;
- const char *ch;
- int num;
- if ((result = sessions(attrib)) == NULL)
- return(result);
- if (result[0] == '\0')
- {
- num = 0;
- }
- else
- {
- num = 1;
- for (ch = result;*ch;ch++)
- if (*ch == ' ') /* count blanks */
- num++;
- } /* END for (ch) */
- sprintf(ret_string, "%d",
- num);
- return(ret_string);
- } /* END nsessions() */
- const char *pids(
- struct rm_attribute *attrib) /* I */
- {
- pid_t jobid;
- proc_stat_t *ps;
- char *fmt;
- int i;
- int num_pids = 0;
- if (attrib == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if ((jobid = (pid_t)atoi(attrib->a_value)) == 0)
- {
- sprintf(log_buffer, "bad param: %s",
- attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (momgetattr(NULL))
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (strcmp(attrib->a_qualifier, "session") != 0)
- {
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- /* Search for members of session */
- fmt = ret_string;
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (LOGLEVEL >= 6)
- {
- DBPRT(("%s[%d]: pid: %d sid: %d\n",
- __func__,
- num_pids,
- ps->pid,
- ps->session))
- }
- if (jobid != ps->session)
- continue;
- sprintf(fmt, "%d ",
- ps->pid);
- fmt += strlen(fmt);
- num_pids++;
- } /* END for (i) */
- if (num_pids == 0)
- {
- rm_errno = RM_ERR_EXIST;
- return(NULL);
- }
- return(ret_string);
- } /* END pids() */
- const char *nusers(
- struct rm_attribute *attrib)
- {
- int j;
- int nuids = 0;
- uid_t *uids, *hold;
- static int maxuid = 200;
- register uid_t uid;
- #ifdef NUMA_SUPPORT
- char mom_check_name[PBS_MAXSERVERNAME], *s;
- job *pjob;
- #else
- int i;
- proc_stat_t *ps;
- #endif
- if (attrib != NULL)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if ((uids = (uid_t *)calloc(maxuid, sizeof(uid_t))) == NULL)
- {
- log_err(errno, __func__, "no memory");
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- #ifdef NUMA_SUPPORT
- /* Initialize the node name to check for for this NUMA node */
- strcpy(mom_check_name, mom_host);
- if ((s = strchr(mom_check_name, '.')) != NULL)
- *s = '\0';
- sprintf(mom_check_name + strlen(mom_check_name), "-%d/", numa_index);
- /* Walk through job list, look for jobs running on this NUMA node */
- for (pjob = (job *)GET_NEXT(svr_alljobs);
- pjob != NULL;
- pjob = (job *)GET_NEXT(pjob->ji_alljobs))
- {
- if (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, mom_check_name) == NULL)
- continue;
- /* Store uid of job owner */
- uid = pjob->ji_qs.ji_un.ji_momt.ji_exuid;
- if (LOGLEVEL >= 9)
- {
- sprintf(log_buffer, "%s[%d]: job %s on %s? uid %d",
- __func__,
- nuids,
- pjob->ji_qs.ji_jobid,
- mom_check_name,
- uid);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- #else
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if ((uid = ps->uid) == 0)
- continue;
- if (LOGLEVEL >= 9)
- {
- sprintf(log_buffer, "%s[%d]: pid %d uid %d",
- __func__,
- nuids,
- ps->pid,
- uid);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- #endif
- for (j = 0;j < nuids;j++)
- {
- if (uids[j] == uid)
- break;
- }
- if (j == nuids)
- {
- /* not found */
- if (nuids == maxuid)
- {
- /* need more space */
- maxuid += 100;
- hold = (uid_t *)realloc(uids, maxuid);
- if (hold == NULL)
- {
- log_err(errno, __func__, "realloc");
- rm_errno = RM_ERR_SYSTEM;
- free(uids);
- return(NULL);
- }
- memset(hold+(maxuid-100), 0, 100*sizeof(uid_t));
- if (LOGLEVEL >= 7)
- {
- sprintf(log_buffer, "%s[%d]: need more space: %d", __func__, nuids, maxuid);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- hold[nuids++] = uid; /* add uid to list */
- uids = hold;
- }
- else
- {
- uids[nuids++] = uid; /* add uid to list */
- }
- }
- } /* END for (i) */
- sprintf(ret_string, "%d",
- nuids);
- free(uids);
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "nusers=%d", nuids);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- return(ret_string);
- } /* END nusers() */
- const char *totmem(
- struct rm_attribute *attrib)
- {
- proc_mem_t *mm;
- if (attrib)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if ((mm = get_proc_mem()) == NULL)
- {
- log_err(errno, __func__, "get_proc_mem");
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "%s: total mem=%llu",
- __func__,
- mm->mem_total + mm->swap_total);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- sprintf(ret_string, "%lukb",
- (ulong)((mm->mem_total >> 10) + (mm->swap_total >> 10))); /* KB */
- return(ret_string);
- } /* END totmem() */
- const char *availmem(
- struct rm_attribute *attrib)
- {
- proc_mem_t *mm;
- if (attrib != NULL)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if ((mm = get_proc_mem()) == NULL)
- {
- log_err(errno, __func__, "get_proc_mem");
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- } /* END availmem() */
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "%s: free mem=%llu",
- __func__,
- mm->mem_free + mm->swap_free);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
- }
- sprintf(ret_string, "%lukb",
- (ulong)((mm->mem_free >> 10) + (mm->swap_free >> 10))); /* KB */
- return(ret_string);
- } /* END availmem() */
- const char *ncpus(
- struct rm_attribute *attrib)
- {
- #ifdef NUMA_SUPPORT
- /* report the configured ncpus for this numa node */
- sprintf(ret_string,"%d",node_boards[numa_index].num_cpus);
- #else
- char label[128];
- FILE *fp;
- int procs;
- if (attrib != NULL)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
- {
- return(NULL);
- }
- procs = 0;
- while (!feof(fp))
- {
- if (fscanf(fp, "%s %*[^\n]%*c", label) == 0)
- {
- getc(fp); /* must do something to get to eof */
- }
- else if (strcmp("processor", label) == 0)
- procs++;
- }
- sprintf(ret_string, "%d", procs);
- system_ncpus = procs;
- fclose(fp);
- #endif /* NUMA_SUPPORT */
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "ncpus=%s", ret_string);
- log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_NODE, "ncpus", log_buffer);
- }
- return(ret_string);
- } /* END ncpus() */
- /* find_file checks for the existence of filename
- * in the ':' delimited path string
- * Return TRUE if file exists
- * FALSE if file not found
- */
- int find_file(
-
- char *path,
- char *filename)
- {
- char *ptr1, *ptr2;
- char buf[RETURN_STRING_SIZE];
- int rc;
- struct stat statBuf;
- if (path == NULL)
- {
- return(FALSE);
- }
- if (filename == NULL)
- {
- return(FALSE);
- }
- memset(buf, 0, RETURN_STRING_SIZE);
- ptr1 = path;
- ptr2 = buf;
- do
- {
- *ptr2 = *ptr1;
- ptr1++;
- if (*ptr1 == ':' || *ptr1 == '\0')
- {
- /* check for the forward slash at the end of the path variable */
- if (*ptr2 != '/')
- {
- ptr2++;
- *ptr2 = '/';
- }
- strcat(buf, filename);
- rc = stat(buf, &statBuf);
- if (rc == 0)
- {
- return(TRUE);
- }
- /* Advance the pointer in the path */
- ptr1++;
- /* reset ptr2 to the beginning of buf and get the
- next directory */
- memset(buf, 0, RETURN_STRING_SIZE);
- ptr2 = buf;
- }
- else
- ptr2++; /* advance ptr2 to the next element in buf */
- }while(*ptr1 != '\0');
- return(FALSE);
- }
- static const char *physmem(
- struct rm_attribute *attrib)
- {
- char tmpBuf[PMEMBUF_SIZE];
- char *BPtr;
- int BSpace;
- unsigned long long mem;
- unsigned long long mem_total;
- FILE *fp;
- #ifdef NUMA_SUPPORT
- int i;
- #endif
- if (attrib != NULL)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- mem_total = 0;
- #ifdef NUMA_SUPPORT
- for (i = 0; i < node_boards[numa_index].num_nodes; i++)
- #endif /* NUMA_SUPPORT */
- {
- #ifdef NUMA_SUPPORT
- if (!(fp = fopen(node_boards[numa_index].path_meminfo[i],"r")))
- #else
- if (!(fp = fopen(path_meminfo, "r")))
- #endif
- {
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- BPtr = tmpBuf;
- BSpace = sizeof(tmpBuf);
- BPtr[0] = '\0';
- while (!feof(fp))
- {
- if (fgets(BPtr, BSpace, fp) == NULL)
- {
- break;
- }
- BSpace -= strlen(BPtr);
- BPtr += strlen(BPtr);
- }
- fclose(fp);
- /* FORMAT: '...\nMemTotal: XXX kB\n' */
- if ((BPtr = strstr(tmpBuf, "MemTotal:")) != NULL)
- {
- BPtr += strlen("MemTotal:");
- if (sscanf(BPtr, "%llu",
- &mem) != 1)
- {
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- /* value specified in kb */
- }
- else
- {
- /* attempt to load first numeric value */
- if (sscanf(BPtr, "%*s %llu",
- &mem) != 1)
- {
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- /* value specified in bytes */
- mem >>= 10;
- }
- mem_total += mem;
- }
- sprintf(ret_string, "%llukb",
- mem_total);
- return(ret_string);
- } /* END physmem() */
- char *size_fs(
- char *param)
- {
- struct statfs fsbuf;
- if (param[0] != '/')
- {
- sprintf(log_buffer, "%s: not full path filesystem name: %s",
- __func__,
- param);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (statfs(param, &fsbuf) == -1)
- {
- log_err(errno, __func__, "statfs");
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- #ifdef RPT_BAVAIL
- #define RPT_STATFS_MEMBER f_bavail
- #else
- #define RPT_STATFS_MEMBER f_bfree
- #endif
- sprintf(ret_string, "%lukb:%lukb",
- (ulong)(((double)fsbuf.f_bsize * (double)fsbuf.RPT_STATFS_MEMBER) / 1024.0),
- (ulong)(((double)fsbuf.f_bsize * (double)fsbuf.f_blocks) / 1024.0)); /* KB */
- return(ret_string);
- } /* END size_fs() */
- char *size_file(
- char *param)
- {
- struct stat sbuf;
- if (param[0] != '/')
- {
- sprintf(log_buffer, "%s: not full path filesystem name: %s",
- __func__, param);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (stat(param, &sbuf) == -1)
- {
- log_err(errno, __func__, "stat");
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- sprintf(ret_string, "%lukb",
- (unsigned long)sbuf.st_size >> 10); /* KB */
- return(ret_string);
- } /* END size_file() */
- const char *size(
- struct rm_attribute *attrib)
- {
- char *param;
- if (attrib == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if (momgetattr(NULL))
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- param = attrib->a_value;
- if (strcmp(attrib->a_qualifier, "file") == 0)
- {
- return(size_file(param));
- }
- if (strcmp(attrib->a_qualifier, "fs") == 0)
- {
- return(size_fs(param));
- }
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- } /* END size() */
- /*
- * For a recovering (-p) mom, look through existing tasks in existing
- * jobs for things that have exited that are not owned by us through a
- * parent-child relationship. Otherwise we cannot report back to tm
- * clients when tasks have exited.
- */
- void scan_non_child_tasks(void)
- {
- job *pJob;
- static int first_time = TRUE;
- DIR *pdir; /* use local pdir to prevent race conditions associated w/global pdir (VPAC) */
- pdir = opendir(procfs);
- for (pJob = (job *)(GET_NEXT(svr_alljobs));
- pJob != (job *)NULL;pJob = (job *)(GET_NEXT(pJob->ji_alljobs)))
- {
- task *pTask;
- long job_start_time = 0;
- long job_session_id = 0;
- long session_start_time = 0;
- proc_stat_t *ps = NULL;
- if(pJob->ji_wattr[JOB_ATR_system_start_time].at_flags&ATR_VFLAG_SET)
- {
- job_start_time = pJob->ji_wattr[JOB_ATR_system_start_time].at_val.at_long;
- }
- if(pJob->ji_wattr[JOB_ATR_session_id].at_flags&ATR_VFLAG_SET)
- {
- job_session_id = pJob->ji_wattr[JOB_ATR_session_id].at_val.at_long;
- }
- if((ps = get_proc_stat(job_session_id)) != NULL)
- {
- session_start_time = (long)ps->start_time;
- }
- for (pTask = (task *)(GET_NEXT(pJob->ji_tasks));
- pTask != NULL;
- pTask = (task *)(GET_NEXT(pTask->ti_jobtask)))
- {
- #ifdef PENABLE_LINUX26_CPUSETS
- struct pidl *pids = NULL;
- struct pidl *pp;
- #else
- struct dirent *dent;
- #endif
- pid_t pid;
- int found;
- /*
- * Check for tasks that were exiting when mom went down, set back to
- * running so we can reprocess them and send the obit
- */
- if ((first_time) && (pTask->ti_qs.ti_sid != 0) &&
- ((pTask->ti_qs.ti_status == TI_STATE_EXITED) ||
- (pTask->ti_qs.ti_status == TI_STATE_DEAD)))
- {
- if (LOGLEVEL >= 7)
- {
- sprintf(log_buffer, "marking task %d as TI_STATE_RUNNING was %d",
- pTask->ti_qs.ti_task,
- pTask->ti_qs.ti_status);
- log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pJob->ji_qs.ji_jobid, log_buffer);
- }
- pTask->ti_qs.ti_status = TI_STATE_RUNNING;
- }
- /* only check on tasks that we think should still be around */
- if (pTask->ti_qs.ti_status != TI_STATE_RUNNING)
- continue;
- /* look for processes with this session id */
- found = 0;
- /* NOTE: on linux systems, the session master should have pid == sessionid */
- if (kill(pTask->ti_qs.ti_sid, 0) != -1)
- {
- if((job_start_time != 0)&&
- (session_start_time != 0))
- {
- if(job_start_time == session_start_time)
- {
- found = 1;
- }
- }
- else
- {
- found = 1;
- }
- }
- if(!found)
- {
- /* session master cannot be found, look for other pid in session */
- #ifdef PENABLE_LINUX26_CPUSETS
- pids = get_cpuset_pidlist(pJob->ji_qs.ji_jobid, pids);
- pp = pids;
- while (pp != NULL)
- {
- pid = pp->pid;
- pp = pp->next;
- #else
- if (pdir == NULL)
- {
- if ((pdir = opendir(procfs)) == NULL)
- return;
- }
-
- rewinddir(pdir);
- while ((dent = readdir(pdir)) != NULL)
- {
- if (!isdigit(dent->d_name[0]))
- continue;
- pid = atoi(dent->d_name);
- #endif /* PENABLE_LINUX26_CPUSETS */
- if ((ps = get_proc_stat(pid)) == NULL)
- continue;
- if (ps->session == pTask->ti_qs.ti_sid)
- {
- if(pJob->ji_wattr[JOB_ATR_system_start_time].at_flags&ATR_VFLAG_SET)
- {
- proc_stat_t *ts = get_proc_stat(ps->session);
- if(ts == NULL)
- continue;
- if(ts->start_time == (unsigned long)pJob->ji_wattr[JOB_ATR_system_start_time].at_val.at_long)
- {
- found = 1;
- break;
- }
- }
- else
- {
- found = 1;
- break;
- }
- }
- } /* END while ((dent) != NULL) */
- #ifdef PENABLE_LINUX26_CPUSETS
- free_pidlist(pids);
- #endif
- }
- if (!found)
- {
- char buf[MAXLINE];
- extern int exiting_tasks;
- sprintf(buf, "found exited session %d for task %d in job %s",
- pTask->ti_qs.ti_sid,
- pTask->ti_qs.ti_task,
- pJob->ji_qs.ji_jobid);
- log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, buf);
- pTask->ti_qs.ti_exitstat = 0; /* actually unknown */
- pTask->ti_qs.ti_status = TI_STATE_EXITED;
- task_save(pTask);
- #ifdef USESAVEDRESOURCES
- if (first_time)
- {
- pJob->ji_flags |= MOM_JOB_RECOVERY;
- if (LOGLEVEL >= 7)
- {
- sprintf(buf, "marking job as MOM_JOB_RECOVERY for task %d",
- pTask->ti_qs.ti_task);
- log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pJob->ji_qs.ji_jobid, buf);
- }
- }
- #endif /* USESAVEDRESOURCES */
- exiting_tasks = 1;
- }
- }
- } /* END for (job = GET_NEXT(svr_alljobs)) */
- if (pdir != NULL)
- closedir(pdir);
- first_time = FALSE;
- return;
- } /* END scan_non_child_tasks() */
- time_t maxtm;
- void setmax(
- const char *dev)
- {
- struct stat sb;
- if (stat(dev, &sb) == -1)
- {
- return;
- }
- if (maxtm < sb.st_atime)
- maxtm = sb.st_atime;
- return;
- } /* END setmax() */
- const char *idletime(
- struct rm_attribute *attrib)
- {
- DIR *dp;
- struct dirent *de;
- char ttyname[50];
- time_t curtm;
- if (attrib)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if ((dp = opendir("/dev")) == NULL)
- {
- log_err(errno, __func__, "opendir /dev");
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- maxtm = 0;
- curtm = time(NULL);
- setmax("/dev/mouse");
- while ((de = readdir(dp)) != NULL)
- {
- if (maxtm >= curtm)
- break;
- if (strncmp(de->d_name, "tty", 3))
- continue;
- sprintf(ttyname, "/dev/%s",
- de->d_name);
- setmax(ttyname);
- }
- closedir(dp);
- sprintf(ret_string, "%ld",
- (long)MAX(0, curtm - maxtm));
- return(ret_string);
- } /* END idletime() */
- static const char *walltime(
- struct rm_attribute *attrib)
- {
- int value, job, found = 0;
- time_t now, start;
- proc_stat_t *ps;
- int i;
- if (attrib == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if ((value = atoi(attrib->a_value)) == 0)
- {
- sprintf(log_buffer, "bad param: %s",
- attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (momgetattr(NULL))
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (strcmp(attrib->a_qualifier, "proc") == 0)
- {
- job = 0;
- }
- else if (strcmp(attrib->a_qualifier, "session") == 0)
- {
- job = 1;
- }
- else
- {
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if ((now = time(NULL)) <= 0)
- {
- log_err(errno, __func__, "time");
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- start = now;
- for (i = 0;i < nproc;i++)
- {
- ps = &proc_array[i];
- if (job != 0)
- {
- if (value != ps->session)
- continue;
- }
- else
- {
- if (value != ps->pid)
- continue;
- }
- found = 1;
- start = MIN((unsigned)start, ps->start_time);
- } /* END for (i) */
- if (found)
- {
- sprintf(ret_string, "%ld",
- (long)((double)(now - start) * wallfactor));
- return(ret_string);
- }
- rm_errno = RM_ERR_EXIST;
- return(NULL);
- } /* END walltime() */
- /* Get the load average for this node */
- int get_la(
- double *rv) /* O */
- {
- FILE *fp;
- float load;
- if ((fp = fopen("/proc/loadavg", "r")) == NULL)
- {
- rm_errno = RM_ERR_SYSTEM;
- return(rm_errno);
- }
- if (fscanf(fp, "%f",
- &load) != 1)
- {
- log_err(errno, __func__, "fscanf of load in /proc/loadavg");
- fclose(fp);
- rm_errno = RM_ERR_SYSTEM;
- return(rm_errno);
- }
- *rv = (double)load;
- fclose(fp);
- return(0);
- } /* END get_la() */
- #ifdef NUMA_SUPPORT
- /*
- * Calculate cpu activities for numa nodeboards.
- *
- * This is a very preliminary attempt to provide useful load data for NUMA nodeboards.
- * Instead of a load average, we report the cpu activities of all cpus of a NUMA board.
- * Calculated numbers range from 0.0 (no CPU activity) to the number of
- * CPUs of a NUMA board (all CPUs are busy to 100%).
- *
- * Note that this is NOT the load average. However, it almost looks the same.
- *
- * The activity of a cpu is calculated from the content of /proc/stat like done
- * by top and related tools.
- */
-
- void collect_cpuact(void)
- {
- FILE *fp;
- char label[128];
- long procs;
- int cpu_id;
- int i;
- unsigned long long usr, nice, sys, idle, wait;
- unsigned long long totidle, totbusy, prevtot;
- unsigned long long dtot, dbusy;
- /*
- * Allocate cpu_array, if not already done.
- * Need to figure out number of cpus in the system, first.
- */
- if (cpu_array == NULL)
- {
- if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
- /* Failure */
- return;
- procs = 0;
- while (! feof(fp))
- {
- if (fscanf(fp, "%s %*[^\n]%*c", label) == 0)
- getc(fp);
- else if (strcmp("processor", label) == 0)
- procs++;
- }
- fclose(fp);
- system_ncpus = procs;
- sprintf(log_buffer, "system contains %ld CPUs", system_ncpus);
- log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
-
- if (system_ncpus)
- {
- if ((cpu_array = (proc_cpu_t *)calloc(system_ncpus, sizeof(proc_cpu_t))) == NULL)
- {
- log_err(errno, __func__, "failed to allocate memory");
- return;
- }
- }
- }
- /* Zero out cpu_array */
- memset(cpu_array, 0, system_ncpus * sizeof(proc_cpu_t));
- /* Parse CPU counters from /proc/stat */
- if ((fp = fopen("/proc/stat", "r")) != NULL)
- {
- while (! feof(fp))
- {
- if (fscanf(fp, "%s", label) != 1)
- /* Format error */
- break;
-
- if (sscanf(label, "cpu%d", &cpu_id) != 1)
- /* Line does not report cpu activities */
- continue;
- if (cpu_id >= system_ncpus)
- /* Ups, more cpus than found in /proc/cpuinfo */
- break;
- if (fscanf(fp, " %llu %llu %llu %llu %llu", &usr, &nice, &sys, &idle, &wait) != 5)
- /* Format error */
- break;
-
- cpu_array[cpu_id].idle_total = idle;
- cpu_array[cpu_id].busy_total = usr + nice + sys + wait;
- }
- fclose(fp);
- } /* END if (fp) */
- /* Calculate cpu activity for each nodeboard */
- for (i = 0; i < num_node_boards; i++)
- {
-
- /* Sum up cpu counters of relevant CPUs */
- totidle = totbusy = 0;
- hwloc_bitmap_foreach_begin(cpu_id, node_boards[i].cpuset)
- {
- totidle += cpu_array[cpu_id].idle_total;
- totbusy += cpu_array[cpu_id].busy_total;
- }
- hwloc_bitmap_foreach_end();
-
- /* If there are counters from a previous call, evaluate */
- if ((prevtot = node_boards[i].pstat_idle + node_boards[i].pstat_busy) != 0)
- {
- dbusy = totbusy - node_boards[i].pstat_busy; /* diff busy counter sum */
- dtot = totbusy + totidle - prevtot; /* diff total counter sum */
- node_boards[i].cpuact = (float)(node_boards[i].num_cpus * dbusy / (double)dtot);
- }
- else
- {
- node_boards[i].cpuact = 0;
- }
- /* Remember counter sums */
- node_boards[i].pstat_idle = totidle;
- node_boards[i].pstat_busy = totbusy;
- } /* END for(i) */
- return;
- } /* END collect_cpuact() */
- const char *cpuact(
- struct rm_attribute *attrib)
-
- {
- if (attrib != NULL)
- {
- log_err(-1, __func__, extra_parm);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- sprintf(ret_string, "%.2f", node_boards[numa_index].cpuact);
- if (LOGLEVEL >= 6)
- {
- sprintf(log_buffer, "cpuact=%s", ret_string);
- log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_NODE, __func__, log_buffer);
- }
- return(ret_string);
- } /* END cpuact() */
- #endif
- u_long gracetime(
- u_long secs)
- {
- time_t now = time((time_t *)NULL);
- if (secs > (u_long)now) /* time is in the future */
- return(secs - now);
- return(0);
- }
- static const char *quota(
- struct rm_attribute *attrib)
- {
- int type;
- dev_t dirdev;
- uid_t uid;
- struct stat sb;
- struct mntent *me;
- struct dqblk qi;
- FILE *m;
- struct passwd *pw;
- static const char *type_array[] =
- {
- "harddata",
- "softdata",
- "currdata",
- "hardfile",
- "softfile",
- "currfile",
- "timedata",
- "timefile",
- NULL
- };
- enum type_name
- {
- harddata,
- softdata,
- currdata,
- hardfile,
- softfile,
- currfile,
- timedata,
- timefile,
- type_end
- };
- if (attrib == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if (strcmp(attrib->a_qualifier, "type"))
- {
- sprintf(log_buffer, "unknown qualifier %s",
- attrib->a_qualifier);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- for (type = 0;type < type_end;type++)
- {
- if (strcmp(attrib->a_value, type_array[type]) == 0)
- break;
- }
- if (type == type_end)
- {
- /* check to see if command is legal */
- sprintf(log_buffer, "bad param: %s=%s",
- attrib->a_qualifier,
- attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if ((attrib = momgetattr(NULL)) == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return(NULL);
- }
- if (strcmp(attrib->a_qualifier, "dir") != 0)
- {
- sprintf(log_buffer, "bad param: %s=%s",
- attrib->a_qualifier,
- attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return(NULL);
- }
- if (attrib->a_value[0] != '/') /* must be absolute path */
- {
- sprintf(log_buffer,
- "not an absolute path: %s", attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return NULL;
- }
- if (stat(attrib->a_value, &sb) == -1)
- {
- sprintf(log_buffer, "stat: %s", attrib->a_value);
- log_err(errno, __func__, log_buffer);
- rm_errno = RM_ERR_EXIST;
- return NULL;
- }
- dirdev = (dev_t)sb.st_dev;
- DBPRT(("dir has devnum %d\n", (int)dirdev))
- if ((m = setmntent(MOUNTED, "r")) == NULL)
- {
- log_err(errno, __func__, "setmntent");
- rm_errno = RM_ERR_SYSTEM;
- return NULL;
- }
- while ((me = getmntent(m)) != NULL)
- {
- if (strcmp(me->mnt_type, MNTTYPE_IGNORE) == 0)
- continue;
- if (stat(me->mnt_dir, &sb) == -1)
- {
- sprintf(log_buffer, "stat: %s", me->mnt_dir);
- log_err(errno, __func__, log_buffer);
- continue;
- }
- if (LOGLEVEL >= 6)
- DBPRT(("%s\t%s\t%d\n", me->mnt_fsname, me->mnt_dir, (int)dirdev))
- if (!memcmp(&sb.st_dev, &dirdev, sizeof(dev_t)))
- break;
- }
- endmntent(m);
- if (me == NULL)
- {
- sprintf(log_buffer,
- "filesystem %s not found", attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_EXIST;
- return NULL;
- }
- #if defined(MNTOPT_NOQUOTA)
- if (hasmntopt(me, MNTOPT_NOQUOTA) != NULL)
- {
- sprintf(log_buffer,
- "no quotas on filesystem %s", me->mnt_dir);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_EXIST;
- return NULL;
- }
- #endif /* MNTOPT_NOQUOTA */
- if ((attrib = momgetattr(NULL)) == NULL)
- {
- log_err(-1, __func__, no_parm);
- rm_errno = RM_ERR_NOPARAM;
- return NULL;
- }
- if (strcmp(attrib->a_qualifier, "user") != 0)
- {
- sprintf(log_buffer, "bad param: %s=%s",
- attrib->a_qualifier, attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_BADPARAM;
- return NULL;
- }
- if ((uid = (uid_t)atoi(attrib->a_value)) == 0)
- {
- if ((pw = getpwnam_ext(attrib->a_value)) == NULL)
- {
- sprintf(log_buffer,
- "user not found: %s", attrib->a_value);
- log_err(-1, __func__, log_buffer);
- rm_errno = RM_ERR_EXIST;
- return NULL;
- }
- uid = pw->pw_uid;
- }
- if (syscall(
- SYS_quotactl,
- QCMD(Q_GETQUOTA, USRQUOTA),
- me->mnt_fsname,
- uid,
- (caddr_t)&qi) == -1)
- {
- log_err(errno, __func__, "quotactl");
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- /* sizes in KB */
- switch (type)
- {
- case harddata:
- sprintf(ret_string, "%lukb",
- (u_long)qi.dqb_bhardlimit >> 10);
- break;
- case softdata:
- sprintf(ret_string, "%lukb",
- (u_long)qi.dqb_bsoftlimit >> 10);
- break;
- case currdata:
- #if defined(TENABLEQUOTA)
- #if _LINUX_QUOTA_VERSION < 2
- sprintf(ret_string, "%lukb",
- (u_long)qi.dqb_curblocks >> 10);
- #else /* _LINUX_QUOTA_VERSION < 2 */
- sprintf(ret_string, "%lukb",
- (u_long)qi.dqb_curspace >> 10);
- #endif /* _LINUX_QUOTA_VERSION < 2 */
- #endif /* TENABLEQUOTA */
- break;
- case hardfile:
- sprintf(ret_string, "%lu",
- (u_long)qi.dqb_ihardlimit);
- break;
- case softfile:
- sprintf(ret_string, "%lu",
- (u_long)qi.dqb_isoftlimit);
- break;
- case currfile:
- sprintf(ret_string, "%lu",
- (u_long)qi.dqb_curinodes);
- break;
- case timedata:
- sprintf(ret_string, "%lu",
- gracetime((u_long)qi.dqb_btime));
- break;
- case timefile:
- sprintf(ret_string, "%lu",
- gracetime((u_long)qi.dqb_itime));
- break;
- } /* END switch() */
- return(ret_string);
- } /* END quota() */
- /* tested for linux 2.4 kernel (not tested on 2.6) */
- #define MAX_INTERFACES 10 /*the maximum number of interfaces*/
- #define HEADER_STR "%*[^\n]\n%*[^\n]\n"
- #define INTERFACE_STR "%[^:]:%lu %*d %*d %*d %*d %*d %*d %*d %lu %*d %*d %*d %*d %*d %*d %*d\n"
- static const char *netload(
- struct rm_attribute *attrib)
- {
- #ifdef NUMA_SUPPORT
- /* there's no way to determine these numbers for a numa node */
- return(NULL);
- #else
- FILE *fp;
- int rc; /*read count*/
- char interfaceName[MAX_INTERFACES][32];
- unsigned long int bytesRX[MAX_INTERFACES + 1];
- unsigned long int bytesTX[MAX_INTERFACES + 1];
- int interface = 0;
- /* int ethNum = 0; */
- if ((fp = fopen("/proc/net/dev", "r")) == NULL)
- {
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- rc = fscanf(fp, HEADER_STR); /*strip off header lines*/
- if (rc < 0)
- {
- log_err(errno, __func__, "fscanf of header lines in /proc/net/dev");
- fclose(fp);
- rm_errno = RM_ERR_SYSTEM;
- return(NULL);
- }
- /* read in interface stats until we can't */
- /* sum all interface stats, excluding 'lo'*/
- memset(bytesRX, 0, sizeof(bytesRX));
- memset(bytesTX, 0, sizeof(bytesTX));
- for (interface = 0;interface < MAX_INTERFACES;interface++)
- {
- rc = fscanf(fp, INTERFACE_STR,
- interfaceName[interface],
- &bytesRX[interface],
- &bytesTX[interface]);
- if (rc != 3)
- {
- interface++; /*adjust counter for future decrement*/
- break;
- }
- if (strcmp(interfaceName[interface], "lo") != 0) /* don't count 'lo' interfaces' stats */
- {
- /* For singling out ethernet interfaces */
- /*
- if (strncmp(interfaceName[interface],"eth",3) == 0)
- {
- rc = sscanf(interfaceName[interface],"eth%d",
- ðNum);
- }
- */
- bytesRX[MAX_INTERFACES] += bytesRX[interface];
- bytesTX[MAX_INTERFACES] += bytesTX[interface];
- }
- } /* END for (interface) */
- /* remove lo from interface count */
- --interface;
- fclose(fp);
- sprintf(ret_string, "%lu",
- bytesRX[MAX_INTERFACES] + bytesTX[MAX_INTERFACES]);
- return(ret_string);
- #endif /* NUMA_SUPPORT */
- } /* END netload() */
- mbool_t ProcIsChild(
- char *Dir, /* I */
- pid_t PID, /* I */
- char *JobID) /* I */
- {
- return(FALSE);
- } /* END ProcIsChild() */
- /* END mom_mach.c */