PageRenderTime 52ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/projects/heritrix-1.14.4/src/java/org/archive/io/RecordingInputStream.java

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Java | 369 lines | 201 code | 39 blank | 129 comment | 24 complexity | 153c0437c2307d9ea3830173e5f0ea04 MD5 | raw file
  1. /* RecordingInputStream
  2. *
  3. * $Id: RecordingInputStream.java 5080 2007-04-13 20:30:49Z gojomo $
  4. *
  5. * Created on Sep 24, 2003
  6. *
  7. * Copyright (C) 2003 Internet Archive.
  8. *
  9. * This file is part of the Heritrix web crawler (crawler.archive.org).
  10. *
  11. * Heritrix is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU Lesser Public License as published by
  13. * the Free Software Foundation; either version 2.1 of the License, or
  14. * any later version.
  15. *
  16. * Heritrix is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU Lesser Public License for more details.
  20. *
  21. * You should have received a copy of the GNU Lesser Public License
  22. * along with Heritrix; if not, write to the Free Software
  23. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  24. */
  25. package org.archive.io;
  26. import java.io.File;
  27. import java.io.FileOutputStream;
  28. import java.io.IOException;
  29. import java.io.InputStream;
  30. import java.net.SocketException;
  31. import java.net.SocketTimeoutException;
  32. import java.security.MessageDigest;
  33. import java.util.logging.Level;
  34. import java.util.logging.Logger;
  35. /**
  36. * Stream which records all data read from it, which it acquires from a wrapped
  37. * input stream.
  38. *
  39. * Makes use of a RecordingOutputStream for recording because of its being
  40. * file backed so we can write massive amounts of data w/o worrying about
  41. * overflowing memory.
  42. *
  43. * @author gojomo
  44. *
  45. */
  46. public class RecordingInputStream
  47. extends InputStream {
  48. protected static Logger logger =
  49. Logger.getLogger("org.archive.io.RecordingInputStream");
  50. /**
  51. * Where we are recording to.
  52. */
  53. private RecordingOutputStream recordingOutputStream;
  54. /**
  55. * Stream to record.
  56. */
  57. private InputStream in = null;
  58. /**
  59. * Reusable buffer to avoid reallocation on each readFullyUntil
  60. */
  61. protected byte[] drainBuffer = new byte[16*1024];
  62. /**
  63. * Create a new RecordingInputStream.
  64. *
  65. * @param bufferSize Size of buffer to use.
  66. * @param backingFilename Name of backing file.
  67. */
  68. public RecordingInputStream(int bufferSize, String backingFilename)
  69. {
  70. this.recordingOutputStream = new RecordingOutputStream(bufferSize,
  71. backingFilename);
  72. }
  73. public void open(InputStream wrappedStream) throws IOException {
  74. logger.fine(Thread.currentThread().getName() + " opening " +
  75. wrappedStream + ", " + Thread.currentThread().getName());
  76. if(isOpen()) {
  77. // error; should not be opening/wrapping in an unclosed
  78. // stream remains open
  79. throw new IOException("RIS already open for "
  80. +Thread.currentThread().getName());
  81. }
  82. this.in = wrappedStream;
  83. this.recordingOutputStream.open();
  84. }
  85. public int read() throws IOException {
  86. if (!isOpen()) {
  87. throw new IOException("Stream closed " +
  88. Thread.currentThread().getName());
  89. }
  90. int b = this.in.read();
  91. if (b != -1) {
  92. assert this.recordingOutputStream != null: "ROS is null " +
  93. Thread.currentThread().getName();
  94. this.recordingOutputStream.write(b);
  95. }
  96. return b;
  97. }
  98. public int read(byte[] b, int off, int len) throws IOException {
  99. if (!isOpen()) {
  100. throw new IOException("Stream closed " +
  101. Thread.currentThread().getName());
  102. }
  103. int count = this.in.read(b,off,len);
  104. if (count > 0) {
  105. assert this.recordingOutputStream != null: "ROS is null " +
  106. Thread.currentThread().getName();
  107. this.recordingOutputStream.write(b,off,count);
  108. }
  109. return count;
  110. }
  111. public int read(byte[] b) throws IOException {
  112. if (!isOpen()) {
  113. throw new IOException("Stream closed " +
  114. Thread.currentThread().getName());
  115. }
  116. int count = this.in.read(b);
  117. if (count > 0) {
  118. assert this.recordingOutputStream != null: "ROS is null " +
  119. Thread.currentThread().getName();
  120. this.recordingOutputStream.write(b,0,count);
  121. }
  122. return count;
  123. }
  124. public void close() throws IOException {
  125. if (logger.isLoggable(Level.FINE)) {
  126. logger.fine(Thread.currentThread().getName() + " closing " +
  127. this.in + ", " + Thread.currentThread().getName());
  128. }
  129. if (this.in != null) {
  130. this.in.close();
  131. this.in = null;
  132. }
  133. this.recordingOutputStream.close();
  134. }
  135. public ReplayInputStream getReplayInputStream() throws IOException {
  136. return this.recordingOutputStream.getReplayInputStream();
  137. }
  138. public ReplayInputStream getContentReplayInputStream() throws IOException {
  139. return this.recordingOutputStream.getContentReplayInputStream();
  140. }
  141. public long readFully() throws IOException {
  142. while(read(drainBuffer) != -1) {
  143. // Empty out stream.
  144. continue;
  145. }
  146. return this.recordingOutputStream.getSize();
  147. }
  148. /**
  149. * Read all of a stream (Or read until we timeout or have read to the max).
  150. * @param softMaxLength Maximum length to read; if zero or < 0, then no
  151. * limit. If met, return normally.
  152. * @param hardMaxLength Maximum length to read; if zero or < 0, then no
  153. * limit. If exceeded, throw RecorderLengthExceededException
  154. * @param timeout Timeout in milliseconds for total read; if zero or
  155. * negative, timeout is <code>Long.MAX_VALUE</code>. If exceeded, throw
  156. * RecorderTimeoutException
  157. * @param maxBytesPerMs How many bytes per millisecond.
  158. * @throws IOException failed read.
  159. * @throws RecorderLengthExceededException
  160. * @throws RecorderTimeoutException
  161. * @throws InterruptedException
  162. */
  163. public void readFullyOrUntil(long softMaxLength)
  164. throws IOException, RecorderLengthExceededException,
  165. RecorderTimeoutException, InterruptedException {
  166. // Check we're open before proceeding.
  167. if (!isOpen()) {
  168. // TODO: should this be a noisier exception-raising error?
  169. return;
  170. }
  171. long totalBytes = 0L;
  172. long bytesRead = -1L;
  173. long maxToRead = -1;
  174. while (true) {
  175. try {
  176. // read no more than soft max
  177. maxToRead = (softMaxLength <= 0)
  178. ? drainBuffer.length
  179. : Math.min(drainBuffer.length, softMaxLength - totalBytes);
  180. // nor more than hard max
  181. maxToRead = Math.min(maxToRead, recordingOutputStream.getRemainingLength());
  182. // but always at least 1 (to trigger hard max exception
  183. maxToRead = Math.max(maxToRead, 1);
  184. bytesRead = read(drainBuffer,0,(int)maxToRead);
  185. if (bytesRead == -1) {
  186. break;
  187. }
  188. totalBytes += bytesRead;
  189. if (Thread.interrupted()) {
  190. throw new InterruptedException("Interrupted during IO");
  191. }
  192. } catch (SocketTimeoutException e) {
  193. // A socket timeout is just a transient problem, meaning
  194. // nothing was available in the configured timeout period,
  195. // but something else might become available later.
  196. // Take this opportunity to check the overall
  197. // timeout (below). One reason for this timeout is
  198. // servers that keep up the connection, 'keep-alive', even
  199. // though we asked them to not keep the connection open.
  200. if (logger.isLoggable(Level.FINE)) {
  201. logger.log(Level.FINE, "socket timeout", e);
  202. }
  203. // check for overall timeout
  204. recordingOutputStream.checkLimits();
  205. } catch (SocketException se) {
  206. throw se;
  207. } catch (NullPointerException e) {
  208. // [ 896757 ] NPEs in Andy's Th-Fri Crawl.
  209. // A crawl was showing NPE's in this part of the code but can
  210. // not reproduce. Adding this rethrowing catch block w/
  211. // diagnostics to help should we come across the problem in the
  212. // future.
  213. throw new NullPointerException("Stream " + this.in + ", " +
  214. e.getMessage() + " " + Thread.currentThread().getName());
  215. }
  216. // if have read 'enough', just finish
  217. if (softMaxLength > 0 && totalBytes >= softMaxLength) {
  218. break; // return
  219. }
  220. }
  221. }
  222. public long getSize() {
  223. return this.recordingOutputStream.getSize();
  224. }
  225. public void markContentBegin() {
  226. this.recordingOutputStream.markContentBegin();
  227. }
  228. public long getContentBegin() {
  229. return this.recordingOutputStream.getContentBegin();
  230. }
  231. public void startDigest() {
  232. this.recordingOutputStream.startDigest();
  233. }
  234. /**
  235. * Convenience method for setting SHA1 digest.
  236. */
  237. public void setSha1Digest() {
  238. this.recordingOutputStream.setSha1Digest();
  239. }
  240. /**
  241. * Sets a digest algorithm which may be applied to recorded data.
  242. * As usually only a subset of the recorded data should
  243. * be fed to the digest, you must also call startDigest()
  244. * to begin digesting.
  245. *
  246. * @param algorithm
  247. */
  248. public void setDigest(String algorithm) {
  249. this.recordingOutputStream.setDigest(algorithm);
  250. }
  251. /**
  252. * Sets a digest function which may be applied to recorded data.
  253. * As usually only a subset of the recorded data should
  254. * be fed to the digest, you must also call startDigest()
  255. * to begin digesting.
  256. *
  257. * @param md
  258. */
  259. public void setDigest(MessageDigest md) {
  260. this.recordingOutputStream.setDigest(md);
  261. }
  262. /**
  263. * Return the digest value for any recorded, digested data. Call
  264. * only after all data has been recorded; otherwise, the running
  265. * digest state is ruined.
  266. *
  267. * @return the digest final value
  268. */
  269. public byte[] getDigestValue() {
  270. return this.recordingOutputStream.getDigestValue();
  271. }
  272. public ReplayCharSequence getReplayCharSequence() throws IOException {
  273. return getReplayCharSequence(null);
  274. }
  275. /**
  276. * @param characterEncoding Encoding of recorded stream.
  277. * @return A ReplayCharSequence Will return null if an IOException. Call
  278. * close on returned RCS when done.
  279. * @throws IOException
  280. */
  281. public ReplayCharSequence getReplayCharSequence(String characterEncoding)
  282. throws IOException {
  283. return this.recordingOutputStream.
  284. getReplayCharSequence(characterEncoding);
  285. }
  286. public long getResponseContentLength() {
  287. return this.recordingOutputStream.getResponseContentLength();
  288. }
  289. public void closeRecorder() throws IOException {
  290. this.recordingOutputStream.closeRecorder();
  291. }
  292. /**
  293. * @param tempFile
  294. * @throws IOException
  295. */
  296. public void copyContentBodyTo(File tempFile) throws IOException {
  297. FileOutputStream fos = new FileOutputStream(tempFile);
  298. ReplayInputStream ris = getContentReplayInputStream();
  299. ris.readFullyTo(fos);
  300. fos.close();
  301. ris.close();
  302. }
  303. /**
  304. * @return True if we've been opened.
  305. */
  306. public boolean isOpen()
  307. {
  308. return this.in != null;
  309. }
  310. @Override
  311. public synchronized void mark(int readlimit) {
  312. this.in.mark(readlimit);
  313. this.recordingOutputStream.mark();
  314. }
  315. @Override
  316. public boolean markSupported() {
  317. return this.in.markSupported();
  318. }
  319. @Override
  320. public synchronized void reset() throws IOException {
  321. this.in.reset();
  322. this.recordingOutputStream.reset();
  323. }
  324. /**
  325. * Set limits to be enforced by internal recording-out
  326. */
  327. public void setLimits(long hardMax, long timeoutMs, long maxRateKBps) {
  328. recordingOutputStream.setLimits(hardMax, timeoutMs, maxRateKBps);
  329. }
  330. }