PageRenderTime 145ms CodeModel.GetById 32ms RepoModel.GetById 7ms app.codeStats 0ms

/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java

http://github.com/apache/hive
Java | 839 lines | 629 code | 100 blank | 110 comment | 119 complexity | 3b839d65588fcbbb6ef7131846edab89 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec;
  19. import java.io.BufferedInputStream;
  20. import java.io.BufferedOutputStream;
  21. import java.io.DataInputStream;
  22. import java.io.DataOutputStream;
  23. import java.io.File;
  24. import java.io.IOException;
  25. import java.io.Serializable;
  26. import java.util.ArrayList;
  27. import java.util.Arrays;
  28. import java.util.Collections;
  29. import java.util.HashSet;
  30. import java.util.Iterator;
  31. import java.util.Map;
  32. import java.util.Set;
  33. import java.util.Timer;
  34. import java.util.TimerTask;
  35. import java.util.concurrent.TimeUnit;
  36. import org.apache.hadoop.conf.Configuration;
  37. import org.apache.hadoop.hive.conf.HiveConf;
  38. import org.apache.hadoop.hive.ql.CompilationOpContext;
  39. import org.apache.hadoop.hive.ql.ErrorMsg;
  40. import org.apache.hadoop.hive.ql.metadata.HiveException;
  41. import org.apache.hadoop.hive.ql.plan.ScriptDesc;
  42. import org.apache.hadoop.hive.ql.plan.api.OperatorType;
  43. import org.apache.hadoop.hive.serde2.AbstractSerDe;
  44. import org.apache.hadoop.hive.serde2.Deserializer;
  45. import org.apache.hadoop.hive.serde2.SerDeException;
  46. import org.apache.hadoop.hive.serde2.Serializer;
  47. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  48. import org.apache.hadoop.io.BytesWritable;
  49. import org.apache.hadoop.io.LongWritable;
  50. import org.apache.hadoop.io.Text;
  51. import org.apache.hadoop.io.Writable;
  52. import org.apache.hadoop.mapred.Reporter;
  53. import org.apache.hadoop.util.Shell;
  54. import org.apache.spark.SparkConf;
  55. import org.apache.spark.SparkEnv;
  56. import org.apache.spark.SparkFiles;
  57. /**
  58. * ScriptOperator.
  59. *
  60. */
  61. public class ScriptOperator extends Operator<ScriptDesc> implements
  62. Serializable {
  63. private static final long serialVersionUID = 1L;
  64. /**
  65. * Counter.
  66. *
  67. */
  68. public static enum Counter {
  69. DESERIALIZE_ERRORS, SERIALIZE_ERRORS
  70. }
  71. private final transient LongWritable deserialize_error_count = new LongWritable();
  72. private final transient LongWritable serialize_error_count = new LongWritable();
  73. transient Thread outThread = null;
  74. transient Thread errThread = null;
  75. transient Process scriptPid = null;
  76. transient Configuration hconf;
  77. // Input to the script
  78. transient Serializer scriptInputSerializer;
  79. // Output from the script
  80. transient Deserializer scriptOutputDeserializer;
  81. transient volatile Throwable scriptError = null;
  82. transient RecordWriter scriptOutWriter = null;
  83. // List of conf entries not to turn into env vars
  84. transient Set<String> blackListedConfEntries = null;
  85. static final String IO_EXCEPTION_BROKEN_PIPE_STRING = "Broken pipe";
  86. static final String IO_EXCEPTION_STREAM_CLOSED = "Stream closed";
  87. /**
  88. * sends periodic reports back to the tracker.
  89. */
  90. transient AutoProgressor autoProgressor;
  91. // first row - the process should only be started if necessary, as it may
  92. // conflict with some
  93. // of the user assumptions.
  94. transient boolean firstRow;
  95. String safeEnvVarName(String name) {
  96. StringBuilder safe = new StringBuilder();
  97. int len = name.length();
  98. for (int i = 0; i < len; i++) {
  99. char c = name.charAt(i);
  100. char s;
  101. if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z')
  102. || (c >= 'a' && c <= 'z')) {
  103. s = c;
  104. } else {
  105. s = '_';
  106. }
  107. safe.append(s);
  108. }
  109. return safe.toString();
  110. }
  111. /**
  112. * Most UNIX implementations impose some limit on the total size of environment variables and
  113. * size of strings. To fit in this limit we need sometimes to truncate strings. Also,
  114. * some values tend be long and are meaningless to scripts, so strain them out.
  115. * @param value environment variable value to check
  116. * @param name name of variable (used only for logging purposes)
  117. * @param truncate truncate value or not
  118. * @return original value, or truncated one if it's length is more then 20KB and
  119. * truncate flag is set
  120. * @see <a href="http://www.kernel.org/doc/man-pages/online/pages/man2/execve.2.html">Linux
  121. * Man page</a> for more details
  122. */
  123. String safeEnvVarValue(String value, String name, boolean truncate) {
  124. final int lenLimit = 20*1024;
  125. if (truncate && value.length() > lenLimit) {
  126. value = value.substring(0, lenLimit);
  127. LOG.warn("Length of environment variable " + name + " was truncated to " + lenLimit
  128. + " bytes to fit system limits.");
  129. }
  130. return value;
  131. }
  132. /**
  133. * Checks whether a given configuration name is blacklisted and should not be converted
  134. * to an environment variable.
  135. */
  136. boolean blackListed(Configuration conf, String name) {
  137. if (blackListedConfEntries == null) {
  138. blackListedConfEntries = new HashSet<String>();
  139. if (conf != null) {
  140. String bl = conf.get(HiveConf.ConfVars.HIVESCRIPT_ENV_BLACKLIST.toString(),
  141. HiveConf.ConfVars.HIVESCRIPT_ENV_BLACKLIST.getDefaultValue());
  142. if (bl != null && !bl.isEmpty()) {
  143. String[] bls = bl.split(",");
  144. Collections.addAll(blackListedConfEntries, bls);
  145. }
  146. }
  147. }
  148. return blackListedConfEntries.contains(name);
  149. }
  150. /**
  151. * addJobConfToEnvironment is mostly shamelessly copied from hadoop streaming. Added additional
  152. * check on environment variable length
  153. */
  154. void addJobConfToEnvironment(Configuration conf, Map<String, String> env) {
  155. Iterator<Map.Entry<String, String>> it = conf.iterator();
  156. while (it.hasNext()) {
  157. Map.Entry<String, String> en = it.next();
  158. String name = en.getKey();
  159. if (!blackListed(conf, name)) {
  160. // String value = (String)en.getValue(); // does not apply variable
  161. // expansion
  162. String value = conf.get(name); // does variable expansion
  163. name = safeEnvVarName(name);
  164. boolean truncate = conf
  165. .getBoolean(HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV.toString(), false);
  166. value = safeEnvVarValue(value, name, truncate);
  167. env.put(name, value);
  168. }
  169. }
  170. }
  171. /**
  172. * Maps a relative pathname to an absolute pathname using the PATH environment.
  173. */
  174. public class PathFinder {
  175. String pathenv; // a string of pathnames
  176. String pathSep; // the path separator
  177. String fileSep; // the file separator in a directory
  178. /**
  179. * Construct a PathFinder object using the path from the specified system
  180. * environment variable.
  181. */
  182. public PathFinder(String envpath) {
  183. pathenv = System.getenv(envpath);
  184. pathSep = System.getProperty("path.separator");
  185. fileSep = System.getProperty("file.separator");
  186. }
  187. /**
  188. * Appends the specified component to the path list.
  189. */
  190. public void prependPathComponent(String str) {
  191. pathenv = str + pathSep + pathenv;
  192. }
  193. /**
  194. * Returns the full path name of this file if it is listed in the path.
  195. */
  196. public File getAbsolutePath(String filename) {
  197. if (pathenv == null || pathSep == null || fileSep == null) {
  198. return null;
  199. }
  200. int val = -1;
  201. String classvalue = pathenv + pathSep;
  202. while (((val = classvalue.indexOf(pathSep)) >= 0)
  203. && classvalue.length() > 0) {
  204. //
  205. // Extract each entry from the pathenv
  206. //
  207. String entry = classvalue.substring(0, val).trim();
  208. File f = new File(entry);
  209. try {
  210. if (f.isDirectory()) {
  211. //
  212. // this entry in the pathenv is a directory.
  213. // see if the required file is in this directory
  214. //
  215. f = new File(entry + fileSep + filename);
  216. }
  217. //
  218. // see if the filename matches and we can read it
  219. //
  220. if (f.isFile() && f.canRead()) {
  221. return f;
  222. }
  223. } catch (Exception exp) {
  224. }
  225. classvalue = classvalue.substring(val + 1).trim();
  226. }
  227. return null;
  228. }
  229. }
  230. /** Kryo ctor. */
  231. protected ScriptOperator() {
  232. super();
  233. }
  234. public ScriptOperator(CompilationOpContext ctx) {
  235. super(ctx);
  236. }
  237. @Override
  238. protected void initializeOp(Configuration hconf) throws HiveException {
  239. super.initializeOp(hconf);
  240. firstRow = true;
  241. statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count);
  242. statsMap.put(Counter.SERIALIZE_ERRORS.toString(), serialize_error_count);
  243. try {
  244. this.hconf = hconf;
  245. AbstractSerDe outputSerDe = conf.getScriptOutputInfo().getSerDeClass().newInstance();
  246. outputSerDe.initialize(hconf, conf.getScriptOutputInfo().getProperties(), null);
  247. AbstractSerDe inputSerde = conf.getScriptInputInfo().getSerDeClass().newInstance();
  248. inputSerde.initialize(hconf, conf.getScriptInputInfo().getProperties(), null);
  249. scriptOutputDeserializer = outputSerDe;
  250. scriptInputSerializer = inputSerde;
  251. outputObjInspector = scriptOutputDeserializer.getObjectInspector();
  252. } catch (Exception e) {
  253. throw new HiveException(ErrorMsg.SCRIPT_INIT_ERROR.getErrorCodedMsg(), e);
  254. }
  255. }
  256. boolean isBrokenPipeException(IOException e) {
  257. return (e.getMessage().equalsIgnoreCase(IO_EXCEPTION_BROKEN_PIPE_STRING) ||
  258. e.getMessage().equalsIgnoreCase(IO_EXCEPTION_STREAM_CLOSED));
  259. }
  260. boolean allowPartialConsumption() {
  261. return HiveConf.getBoolVar(hconf, HiveConf.ConfVars.ALLOWPARTIALCONSUMP);
  262. }
  263. void displayBrokenPipeInfo() {
  264. LOG.info("The script did not consume all input data. This is considered as an error.");
  265. LOG.info("set " + HiveConf.ConfVars.ALLOWPARTIALCONSUMP.toString() + "=true; to ignore it.");
  266. return;
  267. }
  268. private transient String tableName;
  269. private transient String partitionName ;
  270. @Override
  271. public void setInputContext(String tableName, String partitionName) {
  272. this.tableName = tableName;
  273. this.partitionName = partitionName;
  274. super.setInputContext(tableName, partitionName);
  275. }
  276. @Override
  277. public void process(Object row, int tag) throws HiveException {
  278. // initialize the user's process only when you receive the first row
  279. if (firstRow) {
  280. firstRow = false;
  281. SparkConf sparkConf = null;
  282. try {
  283. String[] cmdArgs = splitArgs(conf.getScriptCmd());
  284. String prog = cmdArgs[0];
  285. File currentDir = new File(".").getAbsoluteFile();
  286. if (!new File(prog).isAbsolute()) {
  287. PathFinder finder = new PathFinder("PATH");
  288. finder.prependPathComponent(currentDir.toString());
  289. // In spark local mode, we need to search added files in root directory.
  290. if (HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
  291. sparkConf = SparkEnv.get().conf();
  292. finder.prependPathComponent(SparkFiles.getRootDirectory());
  293. }
  294. File f = finder.getAbsolutePath(prog);
  295. if (f != null) {
  296. cmdArgs[0] = f.getAbsolutePath();
  297. }
  298. f = null;
  299. }
  300. String[] wrappedCmdArgs = addWrapper(cmdArgs);
  301. LOG.info("Executing " + Arrays.asList(wrappedCmdArgs));
  302. LOG.info("tablename=" + tableName);
  303. LOG.info("partname=" + partitionName);
  304. LOG.info("alias=" + alias);
  305. ProcessBuilder pb = new ProcessBuilder(wrappedCmdArgs);
  306. Map<String, String> env = pb.environment();
  307. addJobConfToEnvironment(hconf, env);
  308. env.put(safeEnvVarName(HiveConf.ConfVars.HIVEALIAS.varname), String
  309. .valueOf(alias));
  310. // Create an environment variable that uniquely identifies this script
  311. // operator
  312. String idEnvVarName = HiveConf.getVar(hconf,
  313. HiveConf.ConfVars.HIVESCRIPTIDENVVAR);
  314. String idEnvVarVal = getOperatorId();
  315. env.put(safeEnvVarName(idEnvVarName), idEnvVarVal);
  316. // For spark, in non-local mode, any added dependencies are stored at
  317. // SparkFiles::getRootDirectory, which is the executor's working directory.
  318. // In local mode, we need to manually point the process's working directory to it,
  319. // in order to make the dependencies accessible.
  320. if (sparkConf != null) {
  321. String master = sparkConf.get("spark.master");
  322. if (master.equals("local") || master.startsWith("local[")) {
  323. pb.directory(new File(SparkFiles.getRootDirectory()));
  324. }
  325. }
  326. scriptPid = pb.start(); // Runtime.getRuntime().exec(wrappedCmdArgs);
  327. DataOutputStream scriptOut = new DataOutputStream(
  328. new BufferedOutputStream(scriptPid.getOutputStream()));
  329. DataInputStream scriptIn = new DataInputStream(new BufferedInputStream(
  330. scriptPid.getInputStream()));
  331. DataInputStream scriptErr = new DataInputStream(
  332. new BufferedInputStream(scriptPid.getErrorStream()));
  333. scriptOutWriter = conf.getInRecordWriterClass().newInstance();
  334. scriptOutWriter.initialize(scriptOut, hconf);
  335. RecordReader scriptOutputReader = conf.getOutRecordReaderClass()
  336. .newInstance();
  337. scriptOutputReader.initialize(scriptIn, hconf, conf
  338. .getScriptOutputInfo().getProperties());
  339. outThread = new StreamThread(scriptOutputReader,
  340. new OutputStreamProcessor(scriptOutputDeserializer
  341. .getObjectInspector()), "OutputProcessor");
  342. RecordReader scriptErrReader = conf.getErrRecordReaderClass()
  343. .newInstance();
  344. scriptErrReader.initialize(scriptErr, hconf, conf.getScriptErrInfo()
  345. .getProperties());
  346. errThread = new StreamThread(scriptErrReader, new ErrorStreamProcessor(
  347. HiveConf.getIntVar(hconf, HiveConf.ConfVars.SCRIPTERRORLIMIT)),
  348. "ErrorProcessor");
  349. if (HiveConf
  350. .getBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTAUTOPROGRESS)) {
  351. autoProgressor = new AutoProgressor(this.getClass().getName(),
  352. reporter, Utilities.getDefaultNotificationInterval(hconf),
  353. HiveConf.getTimeVar(
  354. hconf, HiveConf.ConfVars.HIVES_AUTO_PROGRESS_TIMEOUT, TimeUnit.MILLISECONDS));
  355. autoProgressor.go();
  356. }
  357. outThread.start();
  358. errThread.start();
  359. } catch (Exception e) {
  360. throw new HiveException(ErrorMsg.SCRIPT_INIT_ERROR.getErrorCodedMsg(), e);
  361. }
  362. }
  363. if (scriptError != null) {
  364. throw new HiveException(ErrorMsg.SCRIPT_GENERIC_ERROR.getErrorCodedMsg(), scriptError);
  365. }
  366. try {
  367. Writable res = scriptInputSerializer.serialize(row,
  368. inputObjInspectors[tag]);
  369. scriptOutWriter.write(res);
  370. } catch (SerDeException e) {
  371. LOG.error("Error in serializing the row: " + e.getMessage());
  372. scriptError = e;
  373. serialize_error_count.set(serialize_error_count.get() + 1);
  374. throw new HiveException(e);
  375. } catch (IOException e) {
  376. if (isBrokenPipeException(e) && allowPartialConsumption()) {
  377. // Give the outThread a chance to finish before marking the operator as done
  378. try {
  379. scriptPid.waitFor();
  380. } catch (InterruptedException interruptedException) {
  381. }
  382. // best effort attempt to write all output from the script before marking the operator
  383. // as done
  384. try {
  385. if (outThread != null) {
  386. outThread.join(0);
  387. }
  388. } catch (Exception e2) {
  389. LOG.warn("Exception in closing outThread", e2);
  390. }
  391. setDone(true);
  392. LOG.warn("Got broken pipe during write: ignoring exception and setting operator to done");
  393. } else {
  394. LOG.error("Error in writing to script: " + e.getMessage());
  395. if (isBrokenPipeException(e)) {
  396. displayBrokenPipeInfo();
  397. }
  398. scriptError = e;
  399. throw new HiveException(ErrorMsg.SCRIPT_IO_ERROR.getErrorCodedMsg(), e);
  400. }
  401. }
  402. }
  403. @Override
  404. public void close(boolean abort) throws HiveException {
  405. boolean new_abort = abort;
  406. if (!abort) {
  407. if (scriptError != null) {
  408. throw new HiveException(ErrorMsg.SCRIPT_GENERIC_ERROR.getErrorCodedMsg(), scriptError);
  409. }
  410. // everything ok. try normal shutdown
  411. try {
  412. try {
  413. if (scriptOutWriter != null) {
  414. scriptOutWriter.close();
  415. }
  416. } catch (IOException e) {
  417. if (isBrokenPipeException(e) && allowPartialConsumption()) {
  418. LOG.warn("Got broken pipe: ignoring exception");
  419. } else {
  420. if (isBrokenPipeException(e)) {
  421. displayBrokenPipeInfo();
  422. }
  423. throw e;
  424. }
  425. }
  426. int exitVal = 0;
  427. if (scriptPid != null) {
  428. exitVal = scriptPid.waitFor();
  429. }
  430. if (exitVal != 0) {
  431. LOG.error("Script failed with code " + exitVal);
  432. new_abort = true;
  433. }
  434. } catch (IOException e) {
  435. LOG.error("Got exception", e);
  436. new_abort = true;
  437. } catch (InterruptedException e) {
  438. }
  439. } else {
  440. // Error already occurred, but we still want to get the
  441. // error code of the child process if possible.
  442. try {
  443. // Interrupt the current thread after 1 second
  444. final Thread mythread = Thread.currentThread();
  445. Timer timer = new Timer(true);
  446. timer.schedule(new TimerTask() {
  447. @Override
  448. public void run() {
  449. mythread.interrupt();
  450. }
  451. }, 1000);
  452. // Wait for the child process to finish
  453. int exitVal = 0;
  454. if (scriptPid != null) {
  455. scriptPid.waitFor();
  456. }
  457. // Cancel the timer
  458. timer.cancel();
  459. // Output the exit code
  460. LOG.error("Script exited with code " + exitVal);
  461. } catch (InterruptedException e) {
  462. // Ignore
  463. LOG.error("Script has not exited yet. It will be killed.");
  464. }
  465. }
  466. // try these best effort
  467. try {
  468. if (outThread != null) {
  469. outThread.join(0);
  470. }
  471. } catch (Exception e) {
  472. LOG.warn("Exception in closing outThread", e);
  473. }
  474. try {
  475. if (errThread != null) {
  476. errThread.join(0);
  477. }
  478. } catch (Exception e) {
  479. LOG.warn("Exception in closing errThread", e);
  480. }
  481. try {
  482. if (scriptPid != null) {
  483. scriptPid.destroy();
  484. }
  485. } catch (Exception e) {
  486. LOG.warn("Exception in destroying scriptPid", e);
  487. }
  488. super.close(new_abort);
  489. if (new_abort && !abort) {
  490. throw new HiveException(ErrorMsg.SCRIPT_CLOSING_ERROR.getErrorCodedMsg());
  491. }
  492. }
  493. interface StreamProcessor {
  494. void processLine(Writable line) throws HiveException;
  495. void close() throws HiveException;
  496. }
  497. class OutputStreamProcessor implements StreamProcessor {
  498. Object row;
  499. ObjectInspector rowInspector;
  500. public OutputStreamProcessor(ObjectInspector rowInspector) {
  501. this.rowInspector = rowInspector;
  502. }
  503. @Override
  504. public void processLine(Writable line) throws HiveException {
  505. try {
  506. row = scriptOutputDeserializer.deserialize(line);
  507. } catch (SerDeException e) {
  508. deserialize_error_count.set(deserialize_error_count.get() + 1);
  509. return;
  510. }
  511. forward(row, rowInspector);
  512. }
  513. @Override
  514. public void close() {
  515. }
  516. }
  517. class CounterStatusProcessor {
  518. private final String reporterPrefix;
  519. private final String counterPrefix;
  520. private final String statusPrefix;
  521. private final Reporter reporter;
  522. CounterStatusProcessor(Configuration hconf, Reporter reporter){
  523. this.reporterPrefix = HiveConf.getVar(hconf, HiveConf.ConfVars.STREAMREPORTERPERFIX);
  524. this.counterPrefix = reporterPrefix + "counter:";
  525. this.statusPrefix = reporterPrefix + "status:";
  526. this.reporter = reporter;
  527. }
  528. private boolean process(String line) {
  529. if (line.startsWith(reporterPrefix)){
  530. if (line.startsWith(counterPrefix)){
  531. incrCounter(line);
  532. }
  533. if (line.startsWith(statusPrefix)){
  534. setStatus(line);
  535. }
  536. return true;
  537. } else {
  538. return false;
  539. }
  540. }
  541. private void incrCounter(String line) {
  542. String trimmedLine = line.substring(counterPrefix.length()).trim();
  543. String[] columns = trimmedLine.split(",");
  544. if (columns.length == 3) {
  545. try {
  546. reporter.incrCounter(columns[0], columns[1], Long.parseLong(columns[2]));
  547. } catch (NumberFormatException e) {
  548. LOG.warn("Cannot parse counter increment '" + columns[2] +
  549. "' from line " + line);
  550. }
  551. } else {
  552. LOG.warn("Cannot parse counter line: " + line);
  553. }
  554. }
  555. private void setStatus(String line) {
  556. reporter.setStatus(line.substring(statusPrefix.length()).trim());
  557. }
  558. }
  559. /**
  560. * The processor for stderr stream.
  561. */
  562. class ErrorStreamProcessor implements StreamProcessor {
  563. private long bytesCopied = 0;
  564. private final long maxBytes;
  565. private long lastReportTime;
  566. private CounterStatusProcessor counterStatus;
  567. public ErrorStreamProcessor(int maxBytes) {
  568. this.maxBytes = maxBytes;
  569. lastReportTime = 0;
  570. if (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.STREAMREPORTERENABLED)){
  571. counterStatus = new CounterStatusProcessor(hconf, reporter);
  572. }
  573. }
  574. @Override
  575. public void processLine(Writable line) throws HiveException {
  576. String stringLine = line.toString();
  577. int len = 0;
  578. if (line instanceof Text) {
  579. len = ((Text) line).getLength();
  580. } else if (line instanceof BytesWritable) {
  581. len = ((BytesWritable) line).getSize();
  582. }
  583. // Report progress for each stderr line, but no more frequently than once
  584. // per minute.
  585. long now = System.currentTimeMillis();
  586. // reporter is a member variable of the Operator class.
  587. if (now - lastReportTime > 60 * 1000 && reporter != null) {
  588. LOG.info("ErrorStreamProcessor calling reporter.progress()");
  589. lastReportTime = now;
  590. reporter.progress();
  591. }
  592. if (reporter != null) {
  593. if (counterStatus != null) {
  594. if (counterStatus.process(stringLine)) {
  595. return;
  596. }
  597. }
  598. }
  599. if ((maxBytes < 0) || (bytesCopied < maxBytes)) {
  600. System.err.println(stringLine);
  601. }
  602. if (bytesCopied < maxBytes && bytesCopied + len >= maxBytes) {
  603. System.err.println("Operator " + id + " " + getName()
  604. + ": exceeding stderr limit of " + maxBytes
  605. + " bytes, will truncate stderr messages.");
  606. }
  607. bytesCopied += len;
  608. }
  609. @Override
  610. public void close() {
  611. }
  612. }
  613. class StreamThread extends Thread {
  614. RecordReader in;
  615. StreamProcessor proc;
  616. String name;
  617. StreamThread(RecordReader in, StreamProcessor proc, String name) {
  618. this.in = in;
  619. this.proc = proc;
  620. this.name = name;
  621. setDaemon(true);
  622. }
  623. @Override
  624. public void run() {
  625. try {
  626. Writable row = in.createRow();
  627. while (true) {
  628. long bytes = in.next(row);
  629. if (bytes <= 0) {
  630. break;
  631. }
  632. proc.processLine(row);
  633. }
  634. LOG.info("StreamThread " + name + " done");
  635. } catch (Throwable th) {
  636. scriptError = th;
  637. LOG.warn("Exception in StreamThread.run()", th);
  638. } finally {
  639. try {
  640. if (in != null) {
  641. in.close();
  642. }
  643. } catch (Exception e) {
  644. LOG.warn(name + ": error in closing ..", e);
  645. }
  646. try
  647. {
  648. if (null != proc) {
  649. proc.close();
  650. }
  651. }catch (Exception e) {
  652. LOG.warn(": error in closing ..", e);
  653. }
  654. }
  655. }
  656. }
  657. /**
  658. * Wrap the script in a wrapper that allows admins to control.
  659. */
  660. protected String[] addWrapper(String[] inArgs) {
  661. String wrapper = HiveConf.getVar(hconf, HiveConf.ConfVars.SCRIPTWRAPPER);
  662. if (wrapper == null) {
  663. return inArgs;
  664. }
  665. String[] wrapComponents = splitArgs(wrapper);
  666. int totallength = wrapComponents.length + inArgs.length;
  667. String[] finalArgv = new String[totallength];
  668. for (int i = 0; i < wrapComponents.length; i++) {
  669. finalArgv[i] = wrapComponents[i];
  670. }
  671. for (int i = 0; i < inArgs.length; i++) {
  672. finalArgv[wrapComponents.length + i] = inArgs[i];
  673. }
  674. return finalArgv;
  675. }
  676. // Code below shameless borrowed from Hadoop Streaming
  677. public static String[] splitArgs(String args) {
  678. final int OUTSIDE = 1;
  679. final int SINGLEQ = 2;
  680. final int DOUBLEQ = 3;
  681. ArrayList argList = new ArrayList();
  682. char[] ch = args.toCharArray();
  683. int clen = ch.length;
  684. int state = OUTSIDE;
  685. int argstart = 0;
  686. for (int c = 0; c <= clen; c++) {
  687. boolean last = (c == clen);
  688. int lastState = state;
  689. boolean endToken = false;
  690. if (!last) {
  691. if (ch[c] == '\'') {
  692. if (state == OUTSIDE) {
  693. state = SINGLEQ;
  694. } else if (state == SINGLEQ) {
  695. state = OUTSIDE;
  696. }
  697. endToken = (state != lastState);
  698. } else if (ch[c] == '"') {
  699. if (state == OUTSIDE) {
  700. state = DOUBLEQ;
  701. } else if (state == DOUBLEQ) {
  702. state = OUTSIDE;
  703. }
  704. endToken = (state != lastState);
  705. } else if (ch[c] == ' ') {
  706. if (state == OUTSIDE) {
  707. endToken = true;
  708. }
  709. }
  710. }
  711. if (last || endToken) {
  712. if (c == argstart) {
  713. // unquoted space
  714. } else {
  715. String a;
  716. a = args.substring(argstart, c);
  717. argList.add(a);
  718. }
  719. argstart = c + 1;
  720. lastState = state;
  721. }
  722. }
  723. return (String[]) argList.toArray(new String[0]);
  724. }
  725. @Override
  726. public String getName() {
  727. return ScriptOperator.getOperatorName();
  728. }
  729. static public String getOperatorName() {
  730. return "SCR";
  731. }
  732. @Override
  733. public OperatorType getType() {
  734. return OperatorType.SCRIPT;
  735. }
  736. }