PageRenderTime 51ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/data/src/main/scala/scalanlp/pipes/Pipes.scala

http://github.com/scalanlp/scalanlp-core
Scala | 564 lines | 292 code | 91 blank | 181 comment | 53 complexity | cfb2bed0c86c28d50866043322bfc693 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Distributed as part of ScalaRA, a scientific research tool.
  3. *
  4. * Copyright (C) 2007 Daniel Ramage
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110 USA
  17. */
  18. package scalanlp.pipes;
  19. import java.io.File
  20. import java.io.InputStream
  21. import java.io.OutputStream
  22. import java.lang.Process
  23. import java.lang.ProcessBuilder
  24. import scala.concurrent.ops._
  25. /**
  26. * Utilities for executing shell scripts and reading from file in
  27. * a similar way to unix shell piping. To get started with a global
  28. * pipes shell, use:
  29. *
  30. * import Pipes.global._
  31. *
  32. * and see examples in the main method.
  33. *
  34. * @author dramage
  35. */
  36. class Pipes {
  37. /** Throws an exception with the given errors message. */
  38. protected def error(message : String) : Unit = {
  39. throw new PipesException(message);
  40. }
  41. //
  42. // state variables
  43. //
  44. /** Current directory the java process launched in. */
  45. protected val _sysCwd : File = new File(new File("").getAbsolutePath);
  46. /** Current directory for our pipes instance. */
  47. protected var _cwd : File = _sysCwd;
  48. protected var _stdout : OutputStream = java.lang.System.out;
  49. protected var _stderr : OutputStream = java.lang.System.err;
  50. protected var _stdin : InputStream = java.lang.System.in;
  51. /**
  52. * Mutable map containing the current environmental variables
  53. * as seen by invoked processes. Based initially on the
  54. * system environment.
  55. */
  56. protected val _env : scala.collection.mutable.Map[String,String] = {
  57. import scala.collection.JavaConversions._;
  58. scala.collection.mutable.Map() ++= java.lang.System.getenv;
  59. }
  60. //
  61. // context properties
  62. //
  63. /** Returns the default stdout used in this context. */
  64. def stdout = _stdout;
  65. /** Sets the default stdout used in this context. */
  66. def setStdout(stream : OutputStream) : Unit = _stdout = stream;
  67. /** Returns the default stderr used in this context. */
  68. def stderr = _stderr;
  69. /** Sets the default stderr used in this context. */
  70. def setStderr(stream : OutputStream) : Unit = _stderr = stream;
  71. /** Returns the default stdin used in this context. */
  72. def stdin = _stdin;
  73. /** Sets the default stdin used in this context. */
  74. def setStdin(stream : InputStream) : Unit = _stdin = stream;
  75. //
  76. // path and directory access and update
  77. //
  78. /** Returns the current working directory. */
  79. def cwd : File = _cwd;
  80. /**
  81. * Changes to the given directory.
  82. */
  83. def cd(folder : File) = {
  84. if (!folder.exists) {
  85. error("Folder "+folder+" does not exist.");
  86. } else if (!folder.isDirectory) {
  87. error("Folder "+folder+" is not a directory");
  88. } else if (!folder.canRead) {
  89. error("Cannot access folder "+folder);
  90. }
  91. if (folder.getAbsolutePath == _sysCwd.getAbsolutePath) {
  92. _cwd = _sysCwd;
  93. } else {
  94. _cwd = folder;
  95. }
  96. }
  97. /**
  98. * Returns a file referring to the given name. The returned file
  99. * is relative to the current directory (cwd) if the path is not absolute.
  100. */
  101. implicit def file(path : String) : File = {
  102. val f = new File(path);
  103. if (f.isAbsolute) {
  104. f
  105. } else if (_cwd == _sysCwd) {
  106. new File(path);
  107. } else if (_cwd.getAbsolutePath.startsWith(_sysCwd.getAbsolutePath)) {
  108. new File(_cwd.getAbsolutePath.substring(_sysCwd.getAbsolutePath.length+1), path);
  109. } else {
  110. new File(cwd,path);
  111. }
  112. }
  113. /**
  114. * Returns a file relative to the given base, which itself is either
  115. * absolute or relative to the current working directory.
  116. */
  117. def file(base : String, path : String) : File =
  118. file(file(base), path);
  119. /**
  120. * Returns a file relative to the given base file. This method is
  121. * not affected by the current working directory.
  122. */
  123. def file(base : java.io.File, path : String) : File =
  124. new File(base, path);
  125. //
  126. // environmental variables
  127. //
  128. /** An immutable map view of the current system environment. */
  129. def env : Map[String,String] =
  130. Map() ++ _env;
  131. /** Sets the given environmental variable key to the given value. */
  132. def env(key : String, value : String) =
  133. _env(key) = value;
  134. /** Returns the current value associated with the given environmental variable. */
  135. def env(key : String) =
  136. _env(key);
  137. //
  138. // process invocation
  139. //
  140. /**
  141. * Runs the given command (via the system command shell if found)
  142. * in the current directory. Because the system command shell is
  143. * used to parse the arguments, all standard escaping and quoting
  144. * mechanims of the system are used to determine how to split
  145. * the command string into the appropriate arguments for invoking
  146. * the program. Uses this instance's environment as the full process
  147. * execution environment.
  148. */
  149. def sh(command : String) : java.lang.Process = {
  150. val pb = new ProcessBuilder().directory(_cwd);
  151. val m = pb.environment();
  152. m.clear();
  153. for ((k,v) <- env) {
  154. m.put(k,v);
  155. }
  156. val os = System.getProperty("os.name");
  157. if (os == "Windows 95" || os == "Windows 98" || os == "Windows ME") {
  158. pb.command("command.exe", "/C", command);
  159. } else if (os.startsWith("Windows")) {
  160. pb.command("cmd.exe", "/C", command);
  161. } else {
  162. pb.command("/bin/sh", "-c", command);
  163. };
  164. return pb.start();
  165. }
  166. //
  167. // implicit conversions
  168. //
  169. implicit def iPipeProcess(process : Process) =
  170. new PipeProcess(process)(this);
  171. implicit def iPipeInputStream(stream : InputStream) =
  172. new PipeInputStream(stream);
  173. implicit def iPipeInputStream(file : File) =
  174. new PipeInputStream(iInputStream(file));
  175. /**
  176. * Gets a FileInputStream for the given file. If the filename
  177. * ends with .gz, automatically wraps the returned stream with
  178. * a java.util.zip.GZIPInputStream.
  179. */
  180. implicit def iInputStream(file : File) : InputStream = {
  181. val fis = new java.io.BufferedInputStream(new java.io.FileInputStream(file));
  182. if (file.getName.toLowerCase.endsWith(".gz")) {
  183. return new java.util.zip.GZIPInputStream(fis);
  184. } else {
  185. return fis;
  186. }
  187. }
  188. /**
  189. * Gets a FileOutputStream for the given file. If the filename
  190. * ends with .gz, automatically wraps the returned stream with
  191. * a java.util.zip.GZIPOutputStream.
  192. */
  193. implicit def iOutputStream(file : File) : OutputStream = {
  194. val fos = new java.io.BufferedOutputStream(new java.io.FileOutputStream(file));
  195. if (file.getName.toLowerCase.endsWith(".gz")) {
  196. return new java.util.zip.GZIPOutputStream(fos);
  197. } else {
  198. return fos;
  199. }
  200. }
  201. implicit def iPipeIterator(lines : Iterator[String]) =
  202. new PipeIterator(lines)(this);
  203. implicit def iPipeIterator(lines : Iterable[String]) =
  204. new PipeIterator(lines.iterator)(this);
  205. }
  206. /**
  207. * To get started with a global pipes shell, use:
  208. *
  209. * import scalanlp.util.Pipes.global._
  210. *
  211. * And take a look at the example code in the Pipes object's main method.
  212. */
  213. object Pipes {
  214. private[pipes] type HasLines = {
  215. def getLines() : Iterator[String];
  216. }
  217. /** A global instance for easy imports */
  218. val global = Pipes();
  219. def apply() : Pipes = {
  220. new Pipes();
  221. }
  222. /** Copy constructor. */
  223. def apply(ref : Pipes) : Pipes = {
  224. val pipes = Pipes();
  225. pipes._cwd = ref._cwd;
  226. pipes._stdout = ref._stdout;
  227. pipes._stderr = ref._stderr;
  228. pipes._stdin = ref._stdin;
  229. pipes._env.clear;
  230. for ((k,v) <- ref._env) {
  231. pipes._env(k) = v;
  232. }
  233. pipes;
  234. }
  235. }
  236. object PipesExample {
  237. import Pipes.global._;
  238. def main(argv : Array[String]) {
  239. sh("echo '(no sleep) prints 1st'") | stdout;
  240. sh("sleep 1; echo '(sleep 1) prints 2nd'") | stdout;
  241. sh("echo '(stderr redirect) should show up on stdout' | cat >&2") |& stdout;
  242. sh("echo '(stderr redirect) should also show up on stdout' | cat >&2") |& sh("cat") | stdout;
  243. sh("echo '(pipe test line 1) should be printed'; echo '(pipe test line 2) should not be printed'") | sh("grep 1") | stdout;
  244. sh("echo '(translation test) should sound funny'") | sh("perl -pe 's/(a|e|i|o|u)+/oi/g';") | stdout;
  245. stdin | sh("egrep '[0-9]'") | stdout;
  246. sh("ls") | ((x : String) => x.toUpperCase) | stdout;
  247. (1 to 10).map(_.toString) | stderr;
  248. for (line <- sh("ls").getLines) {
  249. println(line.toUpperCase);
  250. }
  251. }
  252. }
  253. /**
  254. * Helper methods for PipeProcess
  255. *
  256. * @author dramage
  257. */
  258. object PipeIO {
  259. /**
  260. * Read all bytes from the given input stream to the given output
  261. * stream, closing the input stream when finished reading. Does
  262. * not close the output stream.
  263. */
  264. def drain(in : InputStream, out : OutputStream) {
  265. val buffer = new Array[Byte](1024);
  266. try {
  267. var numRead = 0;
  268. do {
  269. numRead = in.read(buffer,0,buffer.length);
  270. if (numRead > 0) {
  271. // read some bytes
  272. out.write(buffer,0,numRead);
  273. } else if (numRead == 0) {
  274. // read no bytes, but not yet EOF
  275. Thread.sleep(100l);
  276. }
  277. } while (numRead >= 0)
  278. } finally {
  279. in.close();
  280. }
  281. }
  282. /**
  283. * Reads all lines in the given input stream using Java's
  284. * BufferedReader. The returned lines do not have a trailing
  285. * newline character.
  286. */
  287. def readLines(in : InputStream) : Iterator[String] = {
  288. val reader = new java.io.BufferedReader(new java.io.InputStreamReader(in));
  289. return new Iterator[String]() {
  290. var line = prepare();
  291. override def hasNext =
  292. line != null;
  293. override def next = {
  294. val rv = line;
  295. line = prepare();
  296. rv;
  297. }
  298. def prepare() = {
  299. val rv = reader.readLine();
  300. if (rv == null) {
  301. reader.close();
  302. }
  303. rv;
  304. }
  305. };
  306. }
  307. }
  308. /**
  309. * A richer Process object used for linking together in pipes.
  310. *
  311. * @author dramage
  312. */
  313. class PipeProcess(val process : Process)(implicit pipes : Pipes) {
  314. import PipeIO._
  315. /** where stdout and stderr go. */
  316. protected var out : OutputStream = pipes.stdout;
  317. protected var err : OutputStream = pipes.stderr;
  318. def waitFor : Int = process.waitFor();
  319. /** Close output pipes (on finish) if they are not stdout and stderr */
  320. private def closePipes() {
  321. if (out != pipes.stdout && out != pipes.stderr) {
  322. out.close();
  323. }
  324. if (err != pipes.stdout && err != pipes.stderr) {
  325. err.close();
  326. }
  327. }
  328. def | (next : PipeProcess) : PipeProcess = {
  329. // stdout goes to the next process
  330. this.out = next.process.getOutputStream;
  331. spawn {
  332. val waitForStdin = future { drain(process.getInputStream, out); }
  333. val waitForStderr = future { drain(process.getErrorStream, err); }
  334. waitForStdin();
  335. closePipes();
  336. }
  337. return next;
  338. }
  339. def |& (next : PipeProcess) : PipeProcess = {
  340. // stdout and stderr both go to the next process
  341. this.out = next.process.getOutputStream;
  342. this.err = next.process.getOutputStream;
  343. spawn {
  344. val waitForStdin = future { drain(process.getInputStream, out); }
  345. val waitForStderr = future { drain(process.getErrorStream, err); }
  346. waitForStdin();
  347. waitForStderr();
  348. closePipes();
  349. }
  350. return next;
  351. }
  352. /** Piping to a process happens immediately via spawning. */
  353. def | (process : Process) : PipeProcess = {
  354. spawn {
  355. this | process.getOutputStream;
  356. }
  357. return new PipeProcess(process);
  358. }
  359. /** Piping to a process happens immediately via spawning. */
  360. def |& (process : Process) : PipeProcess = {
  361. spawn {
  362. this |& process.getOutputStream;
  363. }
  364. return new PipeProcess(process);
  365. }
  366. /** Redirects the given input stream as the source for the process */
  367. def < (instream : InputStream) : Process = {
  368. spawn {
  369. val out = process.getOutputStream;
  370. drain(instream, process.getOutputStream);
  371. out.close();
  372. }
  373. return process;
  374. }
  375. /**
  376. * Redirects output from the process to the given output stream.
  377. * Blocks until the process completes.
  378. */
  379. def | (outstream : OutputStream) : Process = {
  380. this.out = outstream;
  381. val waitForStdin = future { drain(process.getInputStream, out); }
  382. val waitForStderr = future { drain(process.getErrorStream, err); }
  383. waitForStdin();
  384. closePipes();
  385. process;
  386. }
  387. /**
  388. * Redirects stdout and stderr from the process to the given output stream.
  389. * Blocks until the process completes.
  390. */
  391. def |& (outstream : OutputStream) : Process = {
  392. this.out = outstream;
  393. this.err = outstream;
  394. val waitForStdin = future { drain(process.getInputStream, out); }
  395. val waitForStderr = future { drain(process.getErrorStream, err); }
  396. waitForStdin();
  397. waitForStderr();
  398. closePipes();
  399. process;
  400. }
  401. /** Pipes to a function that accepts an InputStream. */
  402. def |[T](func : (InputStream => T)) : T =
  403. func(process.getInputStream);
  404. /** Pipes to a function that maps each line to. */
  405. def |[T](func : (String => T)) : Iterator[T] =
  406. for (line <- getLines) yield func(line);
  407. /** Reads the lines from this file. */
  408. def getLines : Iterator[String] =
  409. readLines(process.getInputStream);
  410. }
  411. /**
  412. * An alternative richer InputStream that can be piped to an OutputStream,
  413. * Process, or function.
  414. *
  415. * @author dramage
  416. */
  417. class PipeInputStream(var stream : InputStream) {
  418. import PipeIO._;
  419. /**
  420. * Pipe to an OutputStream. Returns when all bytes have been
  421. * written to out. Does not close out.
  422. */
  423. def |(out : OutputStream) : Unit =
  424. drain(stream, out);
  425. /**
  426. * Pipe to Process, returning that Process instance. Returns
  427. * immediately. Spawns a background job to write all bytes
  428. * from the incoming stream to the process.
  429. */
  430. def |(process : PipeProcess) : Process =
  431. process < stream;
  432. /** Pipes to a function that accepts an InputStream. */
  433. def |[T](func : (InputStream => T)) : T =
  434. func(stream);
  435. /** Returns all lines in this Stream. */
  436. def getLines : Iterator[String] =
  437. readLines(stream);
  438. }
  439. /**
  440. * A pipeable iterator of Strings, to be written as lines to a stream.
  441. */
  442. class PipeIterator(lines : Iterator[String])(implicit pipes : Pipes) {
  443. /**
  444. * Writes all lines to the given process. Returns immediately.
  445. */
  446. def |(process : PipeProcess) : Process = {
  447. val pipeIn = new java.io.PipedInputStream();
  448. val pipeOut = new java.io.PipedOutputStream(pipeIn);
  449. spawn { this | pipeOut; }
  450. process < pipeIn;
  451. }
  452. /**
  453. * Writes all lines to the given OutputStream, closing it when done
  454. * if it is not System.out or System.err.
  455. */
  456. def |(outstream : OutputStream) = {
  457. val ps = new java.io.PrintStream(outstream);
  458. for (line <- lines) {
  459. ps.println(line);
  460. }
  461. if (!(outstream == pipes.stdout || outstream == pipes.stderr)) {
  462. ps.close;
  463. }
  464. }
  465. }
  466. /**
  467. * Runtime exception thrown by the Pipes framework.
  468. *
  469. * @author dramage
  470. */
  471. class PipesException(message : String) extends RuntimeException(message);