PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/nordugrid-arc-2.0.0/src/services/a-rex/infoproviders/Fork.pm

#
Perl | 350 lines | 244 code | 67 blank | 39 comment | 37 complexity | 6e1aec7f1750ec3c17deffa1e5e74a27 MD5 | raw file
Possible License(s): Apache-2.0
  1. package Fork;
  2. use POSIX qw(ceil floor);
  3. use Sys::Hostname;
  4. @ISA = ('Exporter');
  5. @EXPORT_OK = ('cluster_info',
  6. 'queue_info',
  7. 'jobs_info',
  8. 'users_info');
  9. use LogUtils ( 'start_logging', 'error', 'warning', 'debug' );
  10. use strict;
  11. ##########################################
  12. # Saved private variables
  13. ##########################################
  14. our (%lrms_queue);
  15. our $running = undef; # total running jobs in a queue
  16. # the queue passed in the latest call to queue_info, jobs_info or users_info
  17. my $currentqueue = undef;
  18. # Resets queue-specific global variables if
  19. # the queue has changed since the last call
  20. sub init_globals($) {
  21. my $qname = shift;
  22. if (not defined $currentqueue or $currentqueue ne $qname) {
  23. $currentqueue = $qname;
  24. %lrms_queue = ();
  25. $running = undef;
  26. }
  27. }
  28. ##########################################
  29. # Private subs
  30. ##########################################
  31. sub cpu_threads_cores_sockets {
  32. my $nsockets; # total number of physical cpu sockets
  33. my $ncores; # total number of cpu cores
  34. my $nthreads; # total number of hardware execution threads
  35. if (-f "/proc/cpuinfo") {
  36. # Linux variant
  37. my %sockets; # cpu socket IDs
  38. my %cores; # cpu core IDs
  39. open (CPUINFO, "</proc/cpuinfo")
  40. or warning("Failed opening /proc/cpuinfo: $!");
  41. while ( my $line = <CPUINFO> ) {
  42. if ($line=~/^processor\s*:\s+(\d+)$/) {
  43. ++$nthreads;
  44. } elsif ($line=~/^physical id\s*:\s+(\d+)$/) {
  45. ++$sockets{$1};
  46. } elsif ($line=~/^core id\s*:\s+(\d+)$/) {
  47. ++$cores{$1};
  48. }
  49. }
  50. close CPUINFO;
  51. # count total cpu cores and sockets
  52. $ncores = scalar keys %cores;
  53. $nsockets = scalar keys %sockets;
  54. if ($nthreads) {
  55. # if /proc/cpuinfo does not provide socket and core IDs,
  56. # assume every thread represents a separate cpu
  57. $ncores = $nthreads unless $ncores;
  58. $nsockets = $nthreads unless $nsockets;
  59. } else {
  60. warning("Failed parsing /proc/cpuinfo");
  61. }
  62. } elsif (-x "/usr/sbin/system_profiler") {
  63. # OS X
  64. my @lines = `system_profiler SPHardwareDataType`;
  65. warning("Failed running system_profiler: $!") if $?;
  66. for my $line ( @lines ) {
  67. if ($line =~ /Number Of Processors:\s*(.+)/) {
  68. $nsockets = $1;
  69. } elsif ($line =~ /Total Number Of Cores:\s*(.+)/) {
  70. $ncores = $1;
  71. $nthreads = $1; # Assume 1 execution thread per core
  72. }
  73. }
  74. unless ($nsockets and $ncores) {
  75. warning("Failed parsing output of system_profiler");
  76. }
  77. } elsif (-x "/usr/bin/kstat" ) {
  78. # Solaris
  79. my %chips;
  80. eval {
  81. require Sun::Solaris::Kstat;
  82. my $ks = Sun::Solaris::Kstat->new();
  83. my $cpuinfo = $ks->{cpu_info};
  84. die "key not found: cpu_info" unless defined $cpuinfo;
  85. for my $id (keys %$cpuinfo) {
  86. my $info = $cpuinfo->{$id}{"cpu_info$id"};
  87. die "key not found: cpu_info$id" unless defined $info;
  88. $chips{$info->{chip_id}}++;
  89. $nthreads++;
  90. }
  91. };
  92. if ($@) {
  93. error("Failed running module Sun::Solaris::Kstat: $@");
  94. }
  95. # assume each core is in a separate socket
  96. $nsockets = $ncores = scalar keys %chips;
  97. } else {
  98. warning("Cannot query CPU info: unsupported operating system");
  99. }
  100. return ($nthreads,$ncores,$nsockets);
  101. }
  102. # Produces stats for all processes on the system
  103. sub process_info() {
  104. my $command = "ps -e -o ppid,pid,vsz,time,etime,user,comm";
  105. my @pslines = `$command`;
  106. if ($? != 0) {
  107. warning("Failed running (non-zero exit status): $command");
  108. return ();
  109. }
  110. shift @pslines; # drop header line
  111. my @procinfo;
  112. for my $line (@pslines) {
  113. my ($ppid,$pid,$vsize,$cputime,$etime,$user,$comm) = split ' ', $line, 7;
  114. # matches time formats like: 21:29.44, 12:21:29, 3-12:21:29
  115. if ($cputime =~ /^(?:(?:(\d+)-)?(\d+):)?(\d+):(\d\d(?:\.\d+)?)$/) {
  116. my ($days,$hours,$minutes,$seconds) = (($1||0), ($2||0), $3, $4);
  117. $cputime = $seconds + 60*($minutes + 60*($hours + 24*$days));
  118. } else {
  119. warning("Invalid cputime: $cputime");
  120. $cputime = 0;
  121. }
  122. # matches time formats like: 21:29.44, 12:21:29, 3-12:21:29
  123. if ($etime =~ /^(?:(?:(\d+)-)?(\d+):)?(\d+):(\d\d(?:\.\d+)?)$/) {
  124. my ($days,$hours,$minutes,$seconds) = (($1||0), ($2||0), $3, $4);
  125. $etime = $seconds + 60*($minutes + 60*($hours + 24*$days));
  126. } elsif ($etime eq '-') {
  127. $etime = 0; # a zombie ?
  128. } else {
  129. warning("Invalid etime: $etime");
  130. $etime = 0;
  131. }
  132. my $pi = { ppid => $ppid, pid => $pid, vsize => $vsize, user => $user,
  133. cputime => $cputime, etime => $etime, comm => $comm };
  134. push @procinfo, $pi,
  135. }
  136. return @procinfo;
  137. }
  138. ############################################
  139. # Public subs
  140. #############################################
  141. sub cluster_info ($) {
  142. my ($config) = shift;
  143. my (%lrms_cluster);
  144. $lrms_cluster{lrms_type} = "fork";
  145. $lrms_cluster{lrms_version} = "1";
  146. # only enforcing per-process cputime limit
  147. $lrms_cluster{has_total_cputime_limit} = 0;
  148. my ($cputhreads) = cpu_threads_cores_sockets();
  149. $lrms_cluster{totalcpus} = $cputhreads;
  150. # Since fork is a single machine backend all there will only be one machine available
  151. $lrms_cluster{cpudistribution} = $lrms_cluster{totalcpus}."cpu:1";
  152. # usedcpus on a fork machine is determined from the 1min cpu
  153. # loadaverage and cannot be larger than the totalcpus
  154. if (`uptime` =~ /load averages?:\s+([.\d]+),?\s+([.\d]+),?\s+([.\d]+)/) {
  155. $lrms_cluster{usedcpus} = ($1 <= $lrms_cluster{totalcpus})
  156. ? floor(0.5+$1) : $lrms_cluster{totalcpus};
  157. } else {
  158. error("Failed getting load averages");
  159. $lrms_cluster{usedcpus} = 0;
  160. }
  161. #Fork does not support parallel jobs
  162. $lrms_cluster{runningjobs} = $lrms_cluster{usedcpus};
  163. # no LRMS queuing jobs on a fork machine, fork has no queueing ability
  164. $lrms_cluster{queuedcpus} = 0;
  165. $lrms_cluster{queuedjobs} = 0;
  166. $lrms_cluster{queue} = [ ];
  167. return %lrms_cluster;
  168. }
  169. sub queue_info ($$) {
  170. my ($config) = shift;
  171. my ($qname) = shift;
  172. init_globals($qname);
  173. if (defined $running) {
  174. # job_info was already called, we know exactly how many grid jobs
  175. # are running
  176. $lrms_queue{running} = $running;
  177. } else {
  178. # assuming that the submitted grid jobs are cpu hogs, approximate
  179. # the number of running jobs with the number of running processes
  180. $lrms_queue{running}= 0;
  181. unless (open PSCOMMAND, "ps axr |") {
  182. error("error in executing ps axr");
  183. }
  184. while(my $line = <PSCOMMAND>) {
  185. chomp($line);
  186. next if ($line =~ m/PID TTY/);
  187. next if ($line =~ m/ps axr/);
  188. next if ($line =~ m/cluster-fork/);
  189. $lrms_queue{running}++;
  190. }
  191. close PSCOMMAND;
  192. }
  193. my ($cputhreads) = cpu_threads_cores_sockets();
  194. $lrms_queue{totalcpus} = $cputhreads;
  195. $lrms_queue{status} = $lrms_queue{totalcpus}-$lrms_queue{running};
  196. # reserve negative numbers for error states
  197. # Fork is not real LRMS, and cannot be in error state
  198. if ($lrms_queue{status}<0) {
  199. debug("lrms_queue{status} = $lrms_queue{status}");
  200. $lrms_queue{status} = 0;
  201. }
  202. my $job_limit;
  203. if ( not $$config{fork_job_limit} ) {
  204. $job_limit = 1;
  205. } elsif ($$config{fork_job_limit} eq "cpunumber") {
  206. $job_limit = $lrms_queue{totalcpus};
  207. } else {
  208. $job_limit = $$config{fork_job_limit};
  209. }
  210. $lrms_queue{maxrunning} = $job_limit;
  211. $lrms_queue{maxuserrun} = $job_limit;
  212. $lrms_queue{maxqueuable} = $job_limit;
  213. chomp( my $maxcputime = `ulimit "-t"` );
  214. if ($maxcputime =~ /^\d+$/) {
  215. $lrms_queue{maxcputime} = $maxcputime;
  216. } elsif ($maxcputime eq 'unlimited') {
  217. $lrms_queue{maxcputime} = "";
  218. } else {
  219. warning("Could not determine max cputime with ulimit -t");
  220. $lrms_queue{maxcputime} = "";
  221. }
  222. $lrms_queue{queued} = 0;
  223. $lrms_queue{mincputime} = "";
  224. $lrms_queue{defaultcput} = "";
  225. $lrms_queue{minwalltime} = "";
  226. $lrms_queue{defaultwallt} = "";
  227. $lrms_queue{maxwalltime} = $lrms_queue{maxcputime};
  228. return %lrms_queue;
  229. }
  230. sub jobs_info ($$@) {
  231. my ($config) = shift;
  232. my ($qname) = shift;
  233. my ($jids) = shift;
  234. init_globals($qname);
  235. my (%lrms_jobs);
  236. my @procinfo = process_info();
  237. foreach my $id (@$jids){
  238. $lrms_jobs{$id}{nodes} = [ hostname ];
  239. my ($proc) = grep { $id == $_->{pid} } @procinfo;
  240. if ($proc) {
  241. # number of running jobs. Will be used in queue_info
  242. ++$running;
  243. # sum cputime of all child processes
  244. my $cputime = $proc->{cputime};
  245. $_->{ppid} == $id and $cputime += $_->{cputime} for @procinfo;
  246. $lrms_jobs{$id}{mem} = $proc->{vsize};
  247. $lrms_jobs{$id}{walltime} = ceil $proc->{etime}/60;
  248. $lrms_jobs{$id}{cputime} = ceil $cputime/60;
  249. $lrms_jobs{$id}{status} = 'R';
  250. $lrms_jobs{$id}{comment} = [ "LRMS: Running under fork" ];
  251. } else {
  252. $lrms_jobs{$id}{mem} = '';
  253. $lrms_jobs{$id}{walltime} = '';
  254. $lrms_jobs{$id}{cputime} = '';
  255. $lrms_jobs{$id}{status} = ''; # job is EXECUTED
  256. $lrms_jobs{$id}{comment} = [ "LRMS: no longer running" ];
  257. }
  258. $lrms_jobs{$id}{reqwalltime} = "";
  259. $lrms_jobs{$id}{reqcputime} = "";
  260. $lrms_jobs{$id}{rank} = "0";
  261. #Fork backend does not support parallel jobs
  262. $lrms_jobs{$id}{cpus} = "1";
  263. }
  264. return %lrms_jobs;
  265. }
  266. sub users_info($$@) {
  267. my ($config) = shift;
  268. my ($qname) = shift;
  269. my ($accts) = shift;
  270. init_globals($qname);
  271. my (%lrms_users);
  272. # freecpus
  273. # queue length
  274. if ( ! exists $lrms_queue{status} ) {
  275. %lrms_queue = queue_info( $config, $qname );
  276. }
  277. foreach my $u ( @{$accts} ) {
  278. $lrms_users{$u}{freecpus} = $lrms_queue{maxuserrun} - $lrms_queue{running};
  279. $lrms_users{$u}{queuelength} = "$lrms_queue{queued}";
  280. }
  281. return %lrms_users;
  282. }
  283. 1;