/lib/Net/Hadoop/Oozie/TheJudge.pm

https://github.com/Perl-Hadoop/Net-Hadoop-Oozie · Perl · 215 lines · 120 code · 32 blank · 63 comment · 19 complexity · d0baefc5bfcdb35e5c3cd24384e6b4d9 MD5 · raw file

  1. package Net::Hadoop::Oozie::TheJudge;
  2. use 5.010;
  3. use strict;
  4. use warnings;
  5. use Constant::FromGlobal DEBUG => { int => 1, default => 0, env => 1 };
  6. use Moo;
  7. use Net::Hadoop::Oozie;
  8. has 'oozie' => (
  9. is => 'rw',
  10. default => sub {
  11. Net::Hadoop::Oozie->new
  12. },
  13. lazy => 1,
  14. );
  15. has 'badge' => (
  16. is => 'rw',
  17. default => sub { 'x' },
  18. lazy => 1,
  19. );
  20. has name => (
  21. is => 'rw',
  22. default => sub { 'TheJudge' },
  23. );
  24. sub question {
  25. my $self = shift;
  26. my $opt = @_ > 1 ? {@_} : $_[0];
  27. my $name = $self->name;
  28. my $oozie = $self->oozie;
  29. die 'Not an options hashref' if ref $opt ne 'HASH';
  30. # retrieve the last $len actions for a coordinator for analysis; this should be
  31. # large enough allow the front of the queue to be discarded if there are many
  32. # actions in READY state
  33. ( my $len = $opt->{len} ) ||= 1000;
  34. # if we have no success in the last threshold(s) that actually ran, throw
  35. # an alert
  36. ( my $soft_limit = $opt->{suspend} ) ||= 10;
  37. ( my $hard_limit = $opt->{kill} ) ||= 20;
  38. if ( $len < $soft_limit || $len < $hard_limit ) {
  39. die "'len' should be higher than 'soft' and 'hard'; there's no point otherwise";
  40. }
  41. if ( $soft_limit > $hard_limit ) {
  42. die "'soft' should be lower than 'hard'; there's no point otherwise"
  43. }
  44. my $job_id = $opt->{coord} || die "No coordinator ID!";
  45. my($job, $job_error);
  46. eval {
  47. $job = $oozie->job_exists(
  48. $job_id,
  49. {
  50. len => $len,
  51. order => 'desc',
  52. },
  53. );
  54. 1;
  55. } or do {
  56. $job_error = $@ || 'Zombie error';
  57. };
  58. if ( ! $job ) {
  59. warn sprintf 'Could not retrieve details for coord id %s. %s',
  60. $job_id,
  61. $job_error ? "Error: $job_error" : 'Job does not exist.',
  62. ;
  63. return;
  64. }
  65. if ( $job->{status} eq 'KILLED' ) {
  66. return { verdict => 'R.I.P. citizen' };
  67. }
  68. my $actions = $job->{actions} || return { verdict => "free" };
  69. # take the actions in order; discard all the READY/PREP ones at the front of
  70. # the queue, then check the rest
  71. while ( my $ax = shift @$actions ) {
  72. if ( $ax->{status} !~ /(READY|WAITING|PREP|RUNNING)/ ) {
  73. unshift @$actions, $ax;
  74. last;
  75. }
  76. }
  77. # keep $hard_limit elements, bail out if the first $soft_limit ones are not all
  78. # KILLED
  79. splice @$actions, $hard_limit;
  80. my $total_killed = grep { $_->{status} && $_->{status} eq 'KILLED' }
  81. @{ $actions }[ 0 .. $soft_limit - 1 ]
  82. ;
  83. if ( $total_killed < $soft_limit ) {
  84. DEBUG && printf STDERR "[%s] KILLED: %s < %s\t- [%s] %s (%s)\n",
  85. $name,
  86. $total_killed,
  87. $soft_limit,
  88. $self->badge,
  89. $job_id,
  90. $job->{coordJobName},
  91. ;
  92. return {};
  93. }
  94. my $stats;
  95. for (@$actions) {
  96. $stats->{ $_->{status} }++;
  97. $stats->{total}++;
  98. }
  99. my $sentence = $stats->{KILLED} == $hard_limit ? 'KILLED' : 'SUSPENDED';
  100. my $out = sprintf "[%s] Coordinator %s (%s) should be %s\n",
  101. $self->badge,
  102. $job_id,
  103. $job->{coordJobName},
  104. $sentence,
  105. ;
  106. $out .= sprintf "Latest %s actions are:\n", $stats->{total};
  107. for (qw(SUCCEEDED KILLED)) {
  108. $out .= sprintf " %-10s: %.2f%%\n",
  109. $_,
  110. ( $stats->{$_} || 0 ) / $stats->{total} * 100,
  111. ;
  112. }
  113. for (@$actions) {
  114. $out .= sprintf "action %s (%s) %s\n",
  115. @{$_}{qw/ actionNumber lastModifiedTime status/ },
  116. ;
  117. }
  118. $out .= sprintf qq{\n\nOozie console:\n\n%s/?job=%s\n},
  119. $oozie->oozie_uri,
  120. $job_id,
  121. ;
  122. return {
  123. guilty => 1,
  124. sentence => lc $sentence,
  125. text => $out,
  126. };
  127. # we could check the % KILLED and % SUCCEEDED over a longer period,
  128. # optionally with a weight like (0.9 ^ days(time - lastModifiedEpoch))
  129. }
  130. 'Drokk';
  131. __END__
  132. =pod
  133. =encoding utf8
  134. =head1 NAME
  135. Net::Hadoop::Oozie::TheJudge - Will tell you the verdict on coordinators
  136. =head1 SYNOPSIS
  137. my $verdict = Net::Hadoop::Oozie::TheJudge->new->question(
  138. len => 1000,
  139. kill => 20,
  140. suspend => 10,
  141. coord => shift(),
  142. );
  143. print $verdict->{text} if $verdict->{guilty};
  144. =head1 DESCRIPTION
  145. Part of the Perl Oozie interface.
  146. It is named after Judge Dredd, who is empowered to summarily arrest,
  147. convict, sentence, and execute criminals.
  148. =head1 SYNOPSIS
  149. use Net::Hadoop::TheJudge;
  150. # TODO
  151. =head1 ATTRIBUTES
  152. =head2 oozie
  153. The L<Net::Hadoop::Oozie> instance used to fetch information.
  154. =head2 badge
  155. The name of the cluster.
  156. =head2 name
  157. The name of the program.
  158. =head1 METHODS
  159. =head2 question
  160. TODO.
  161. =cut