/src/core/worker.js

http://github.com/mozilla/pdf.js · JavaScript · 859 lines · 694 code · 109 blank · 56 comment · 67 complexity · 942a9a191797c38ba81d1d40a5c984a1 MD5 · raw file

  1. /* Copyright 2012 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import {
  16. AbortException,
  17. arrayByteLength,
  18. arraysToBytes,
  19. createPromiseCapability,
  20. getVerbosityLevel,
  21. info,
  22. InvalidPDFException,
  23. isString,
  24. MissingPDFException,
  25. PasswordException,
  26. setVerbosityLevel,
  27. stringToPDFString,
  28. UnexpectedResponseException,
  29. UnknownErrorException,
  30. UNSUPPORTED_FEATURES,
  31. VerbosityLevel,
  32. warn,
  33. } from "../shared/util.js";
  34. import { clearPrimitiveCaches, Dict, Ref } from "./primitives.js";
  35. import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
  36. import { incrementalUpdate } from "./writer.js";
  37. import { isNodeJS } from "../shared/is_node.js";
  38. import { MessageHandler } from "../shared/message_handler.js";
  39. import { PDFWorkerStream } from "./worker_stream.js";
  40. import { XRefParseException } from "./core_utils.js";
  41. class WorkerTask {
  42. constructor(name) {
  43. this.name = name;
  44. this.terminated = false;
  45. this._capability = createPromiseCapability();
  46. }
  47. get finished() {
  48. return this._capability.promise;
  49. }
  50. finish() {
  51. this._capability.resolve();
  52. }
  53. terminate() {
  54. this.terminated = true;
  55. }
  56. ensureNotTerminated() {
  57. if (this.terminated) {
  58. throw new Error("Worker task was terminated");
  59. }
  60. }
  61. }
  62. class WorkerMessageHandler {
  63. static setup(handler, port) {
  64. let testMessageProcessed = false;
  65. handler.on("test", function wphSetupTest(data) {
  66. if (testMessageProcessed) {
  67. return; // we already processed 'test' message once
  68. }
  69. testMessageProcessed = true;
  70. // check if Uint8Array can be sent to worker
  71. if (!(data instanceof Uint8Array)) {
  72. handler.send("test", null);
  73. return;
  74. }
  75. // making sure postMessage transfers are working
  76. const supportTransfers = data[0] === 255;
  77. handler.postMessageTransfers = supportTransfers;
  78. handler.send("test", { supportTransfers });
  79. });
  80. handler.on("configure", function wphConfigure(data) {
  81. setVerbosityLevel(data.verbosity);
  82. });
  83. handler.on("GetDocRequest", function wphSetupDoc(data) {
  84. return WorkerMessageHandler.createDocumentHandler(data, port);
  85. });
  86. }
  87. static createDocumentHandler(docParams, port) {
  88. // This context is actually holds references on pdfManager and handler,
  89. // until the latter is destroyed.
  90. let pdfManager;
  91. let terminated = false;
  92. let cancelXHRs = null;
  93. const WorkerTasks = [];
  94. const verbosity = getVerbosityLevel();
  95. const apiVersion = docParams.apiVersion;
  96. const workerVersion =
  97. typeof PDFJSDev !== "undefined" && !PDFJSDev.test("TESTING")
  98. ? PDFJSDev.eval("BUNDLE_VERSION")
  99. : null;
  100. if (apiVersion !== workerVersion) {
  101. throw new Error(
  102. `The API version "${apiVersion}" does not match ` +
  103. `the Worker version "${workerVersion}".`
  104. );
  105. }
  106. if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) {
  107. // Fail early, and predictably, rather than having (some) fonts fail to
  108. // load/render with slightly cryptic error messages in environments where
  109. // the `Array.prototype` has been *incorrectly* extended.
  110. //
  111. // PLEASE NOTE: We do *not* want to slow down font parsing by adding
  112. // `hasOwnProperty` checks all over the code-base.
  113. const enumerableProperties = [];
  114. for (const property in []) {
  115. enumerableProperties.push(property);
  116. }
  117. if (enumerableProperties.length) {
  118. throw new Error(
  119. "The `Array.prototype` contains unexpected enumerable properties: " +
  120. enumerableProperties.join(", ") +
  121. "; thus breaking e.g. `for...in` iteration of `Array`s."
  122. );
  123. }
  124. // Ensure that (primarily) Node.js users won't accidentally attempt to use
  125. // a non-translated/non-polyfilled build of the library, since that would
  126. // quickly fail anyway because of missing functionality.
  127. if (
  128. (typeof PDFJSDev === "undefined" || PDFJSDev.test("SKIP_BABEL")) &&
  129. typeof ReadableStream === "undefined"
  130. ) {
  131. throw new Error(
  132. "The browser/environment lacks native support for critical " +
  133. "functionality used by the PDF.js library (e.g. `ReadableStream`); " +
  134. "please use a `legacy`-build instead."
  135. );
  136. }
  137. }
  138. const docId = docParams.docId;
  139. const docBaseUrl = docParams.docBaseUrl;
  140. const workerHandlerName = docParams.docId + "_worker";
  141. let handler = new MessageHandler(workerHandlerName, docId, port);
  142. // Ensure that postMessage transfers are always correctly enabled/disabled,
  143. // to prevent "DataCloneError" in browsers without transfers support.
  144. handler.postMessageTransfers = docParams.postMessageTransfers;
  145. function ensureNotTerminated() {
  146. if (terminated) {
  147. throw new Error("Worker was terminated");
  148. }
  149. }
  150. function startWorkerTask(task) {
  151. WorkerTasks.push(task);
  152. }
  153. function finishWorkerTask(task) {
  154. task.finish();
  155. const i = WorkerTasks.indexOf(task);
  156. WorkerTasks.splice(i, 1);
  157. }
  158. async function loadDocument(recoveryMode) {
  159. await pdfManager.ensureDoc("checkHeader");
  160. await pdfManager.ensureDoc("parseStartXRef");
  161. await pdfManager.ensureDoc("parse", [recoveryMode]);
  162. if (!recoveryMode) {
  163. // Check that at least the first page can be successfully loaded,
  164. // since otherwise the XRef table is definitely not valid.
  165. await pdfManager.ensureDoc("checkFirstPage");
  166. }
  167. const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
  168. if (isPureXfa) {
  169. const task = new WorkerTask("loadXfaFonts");
  170. startWorkerTask(task);
  171. await Promise.all([
  172. pdfManager
  173. .loadXfaFonts(handler, task)
  174. .catch(reason => {
  175. // Ignore errors, to allow the document to load.
  176. })
  177. .then(() => finishWorkerTask(task)),
  178. pdfManager.loadXfaImages(),
  179. ]);
  180. }
  181. const [numPages, fingerprints] = await Promise.all([
  182. pdfManager.ensureDoc("numPages"),
  183. pdfManager.ensureDoc("fingerprints"),
  184. ]);
  185. // Get htmlForXfa after numPages to avoid to create HTML twice.
  186. const htmlForXfa = isPureXfa
  187. ? await pdfManager.ensureDoc("htmlForXfa")
  188. : null;
  189. return { numPages, fingerprints, htmlForXfa };
  190. }
  191. function getPdfManager(data, evaluatorOptions, enableXfa) {
  192. const pdfManagerCapability = createPromiseCapability();
  193. let newPdfManager;
  194. const source = data.source;
  195. if (source.data) {
  196. try {
  197. newPdfManager = new LocalPdfManager(
  198. docId,
  199. source.data,
  200. source.password,
  201. evaluatorOptions,
  202. enableXfa,
  203. docBaseUrl
  204. );
  205. pdfManagerCapability.resolve(newPdfManager);
  206. } catch (ex) {
  207. pdfManagerCapability.reject(ex);
  208. }
  209. return pdfManagerCapability.promise;
  210. }
  211. let pdfStream,
  212. cachedChunks = [];
  213. try {
  214. pdfStream = new PDFWorkerStream(handler);
  215. } catch (ex) {
  216. pdfManagerCapability.reject(ex);
  217. return pdfManagerCapability.promise;
  218. }
  219. const fullRequest = pdfStream.getFullReader();
  220. fullRequest.headersReady
  221. .then(function () {
  222. if (!fullRequest.isRangeSupported) {
  223. return;
  224. }
  225. // We don't need auto-fetch when streaming is enabled.
  226. const disableAutoFetch =
  227. source.disableAutoFetch || fullRequest.isStreamingSupported;
  228. newPdfManager = new NetworkPdfManager(
  229. docId,
  230. pdfStream,
  231. {
  232. msgHandler: handler,
  233. password: source.password,
  234. length: fullRequest.contentLength,
  235. disableAutoFetch,
  236. rangeChunkSize: source.rangeChunkSize,
  237. },
  238. evaluatorOptions,
  239. enableXfa,
  240. docBaseUrl
  241. );
  242. // There may be a chance that `newPdfManager` is not initialized for
  243. // the first few runs of `readchunk` block of code. Be sure to send
  244. // all cached chunks, if any, to chunked_stream via pdf_manager.
  245. for (let i = 0; i < cachedChunks.length; i++) {
  246. newPdfManager.sendProgressiveData(cachedChunks[i]);
  247. }
  248. cachedChunks = [];
  249. pdfManagerCapability.resolve(newPdfManager);
  250. cancelXHRs = null;
  251. })
  252. .catch(function (reason) {
  253. pdfManagerCapability.reject(reason);
  254. cancelXHRs = null;
  255. });
  256. let loaded = 0;
  257. const flushChunks = function () {
  258. const pdfFile = arraysToBytes(cachedChunks);
  259. if (source.length && pdfFile.length !== source.length) {
  260. warn("reported HTTP length is different from actual");
  261. }
  262. // the data is array, instantiating directly from it
  263. try {
  264. newPdfManager = new LocalPdfManager(
  265. docId,
  266. pdfFile,
  267. source.password,
  268. evaluatorOptions,
  269. enableXfa,
  270. docBaseUrl
  271. );
  272. pdfManagerCapability.resolve(newPdfManager);
  273. } catch (ex) {
  274. pdfManagerCapability.reject(ex);
  275. }
  276. cachedChunks = [];
  277. };
  278. const readPromise = new Promise(function (resolve, reject) {
  279. const readChunk = function ({ value, done }) {
  280. try {
  281. ensureNotTerminated();
  282. if (done) {
  283. if (!newPdfManager) {
  284. flushChunks();
  285. }
  286. cancelXHRs = null;
  287. return;
  288. }
  289. loaded += arrayByteLength(value);
  290. if (!fullRequest.isStreamingSupported) {
  291. handler.send("DocProgress", {
  292. loaded,
  293. total: Math.max(loaded, fullRequest.contentLength || 0),
  294. });
  295. }
  296. if (newPdfManager) {
  297. newPdfManager.sendProgressiveData(value);
  298. } else {
  299. cachedChunks.push(value);
  300. }
  301. fullRequest.read().then(readChunk, reject);
  302. } catch (e) {
  303. reject(e);
  304. }
  305. };
  306. fullRequest.read().then(readChunk, reject);
  307. });
  308. readPromise.catch(function (e) {
  309. pdfManagerCapability.reject(e);
  310. cancelXHRs = null;
  311. });
  312. cancelXHRs = function (reason) {
  313. pdfStream.cancelAllRequests(reason);
  314. };
  315. return pdfManagerCapability.promise;
  316. }
  317. function setupDoc(data) {
  318. function onSuccess(doc) {
  319. ensureNotTerminated();
  320. handler.send("GetDoc", { pdfInfo: doc });
  321. }
  322. function onFailure(ex) {
  323. ensureNotTerminated();
  324. if (ex instanceof PasswordException) {
  325. const task = new WorkerTask(`PasswordException: response ${ex.code}`);
  326. startWorkerTask(task);
  327. handler
  328. .sendWithPromise("PasswordRequest", ex)
  329. .then(function ({ password }) {
  330. finishWorkerTask(task);
  331. pdfManager.updatePassword(password);
  332. pdfManagerReady();
  333. })
  334. .catch(function () {
  335. finishWorkerTask(task);
  336. handler.send("DocException", ex);
  337. });
  338. } else if (
  339. ex instanceof InvalidPDFException ||
  340. ex instanceof MissingPDFException ||
  341. ex instanceof UnexpectedResponseException ||
  342. ex instanceof UnknownErrorException
  343. ) {
  344. handler.send("DocException", ex);
  345. } else {
  346. handler.send(
  347. "DocException",
  348. new UnknownErrorException(ex.message, ex.toString())
  349. );
  350. }
  351. }
  352. function pdfManagerReady() {
  353. ensureNotTerminated();
  354. loadDocument(false).then(onSuccess, function (reason) {
  355. ensureNotTerminated();
  356. // Try again with recoveryMode == true
  357. if (!(reason instanceof XRefParseException)) {
  358. onFailure(reason);
  359. return;
  360. }
  361. pdfManager.requestLoadedStream();
  362. pdfManager.onLoadedStream().then(function () {
  363. ensureNotTerminated();
  364. loadDocument(true).then(onSuccess, onFailure);
  365. });
  366. });
  367. }
  368. ensureNotTerminated();
  369. const evaluatorOptions = {
  370. maxImageSize: data.maxImageSize,
  371. disableFontFace: data.disableFontFace,
  372. ignoreErrors: data.ignoreErrors,
  373. isEvalSupported: data.isEvalSupported,
  374. fontExtraProperties: data.fontExtraProperties,
  375. useSystemFonts: data.useSystemFonts,
  376. cMapUrl: data.cMapUrl,
  377. standardFontDataUrl: data.standardFontDataUrl,
  378. };
  379. getPdfManager(data, evaluatorOptions, data.enableXfa)
  380. .then(function (newPdfManager) {
  381. if (terminated) {
  382. // We were in a process of setting up the manager, but it got
  383. // terminated in the middle.
  384. newPdfManager.terminate(
  385. new AbortException("Worker was terminated.")
  386. );
  387. throw new Error("Worker was terminated");
  388. }
  389. pdfManager = newPdfManager;
  390. pdfManager.onLoadedStream().then(function (stream) {
  391. handler.send("DataLoaded", { length: stream.bytes.byteLength });
  392. });
  393. })
  394. .then(pdfManagerReady, onFailure);
  395. }
  396. handler.on("GetPage", function wphSetupGetPage(data) {
  397. return pdfManager.getPage(data.pageIndex).then(function (page) {
  398. return Promise.all([
  399. pdfManager.ensure(page, "rotate"),
  400. pdfManager.ensure(page, "ref"),
  401. pdfManager.ensure(page, "userUnit"),
  402. pdfManager.ensure(page, "view"),
  403. ]).then(function ([rotate, ref, userUnit, view]) {
  404. return {
  405. rotate,
  406. ref,
  407. userUnit,
  408. view,
  409. };
  410. });
  411. });
  412. });
  413. handler.on("GetPageIndex", function wphSetupGetPageIndex({ ref }) {
  414. const pageRef = Ref.get(ref.num, ref.gen);
  415. return pdfManager.ensureCatalog("getPageIndex", [pageRef]);
  416. });
  417. handler.on("GetDestinations", function wphSetupGetDestinations(data) {
  418. return pdfManager.ensureCatalog("destinations");
  419. });
  420. handler.on("GetDestination", function wphSetupGetDestination(data) {
  421. return pdfManager.ensureCatalog("getDestination", [data.id]);
  422. });
  423. handler.on("GetPageLabels", function wphSetupGetPageLabels(data) {
  424. return pdfManager.ensureCatalog("pageLabels");
  425. });
  426. handler.on("GetPageLayout", function wphSetupGetPageLayout(data) {
  427. return pdfManager.ensureCatalog("pageLayout");
  428. });
  429. handler.on("GetPageMode", function wphSetupGetPageMode(data) {
  430. return pdfManager.ensureCatalog("pageMode");
  431. });
  432. handler.on("GetViewerPreferences", function (data) {
  433. return pdfManager.ensureCatalog("viewerPreferences");
  434. });
  435. handler.on("GetOpenAction", function (data) {
  436. return pdfManager.ensureCatalog("openAction");
  437. });
  438. handler.on("GetAttachments", function wphSetupGetAttachments(data) {
  439. return pdfManager.ensureCatalog("attachments");
  440. });
  441. handler.on("GetJavaScript", function wphSetupGetJavaScript(data) {
  442. return pdfManager.ensureCatalog("javaScript");
  443. });
  444. handler.on("GetDocJSActions", function wphSetupGetDocJSActions(data) {
  445. return pdfManager.ensureCatalog("jsActions");
  446. });
  447. handler.on("GetPageJSActions", function ({ pageIndex }) {
  448. return pdfManager.getPage(pageIndex).then(function (page) {
  449. return pdfManager.ensure(page, "jsActions");
  450. });
  451. });
  452. handler.on("GetOutline", function wphSetupGetOutline(data) {
  453. return pdfManager.ensureCatalog("documentOutline");
  454. });
  455. handler.on("GetOptionalContentConfig", function (data) {
  456. return pdfManager.ensureCatalog("optionalContentConfig");
  457. });
  458. handler.on("GetPermissions", function (data) {
  459. return pdfManager.ensureCatalog("permissions");
  460. });
  461. handler.on("GetMetadata", function wphSetupGetMetadata(data) {
  462. return Promise.all([
  463. pdfManager.ensureDoc("documentInfo"),
  464. pdfManager.ensureCatalog("metadata"),
  465. ]);
  466. });
  467. handler.on("GetMarkInfo", function wphSetupGetMarkInfo(data) {
  468. return pdfManager.ensureCatalog("markInfo");
  469. });
  470. handler.on("GetData", function wphSetupGetData(data) {
  471. pdfManager.requestLoadedStream();
  472. return pdfManager.onLoadedStream().then(function (stream) {
  473. return stream.bytes;
  474. });
  475. });
  476. handler.on("GetStats", function wphSetupGetStats(data) {
  477. return pdfManager.ensureXRef("stats");
  478. });
  479. handler.on("GetAnnotations", function ({ pageIndex, intent }) {
  480. return pdfManager.getPage(pageIndex).then(function (page) {
  481. return page.getAnnotationsData(intent);
  482. });
  483. });
  484. handler.on("GetFieldObjects", function (data) {
  485. return pdfManager.ensureDoc("fieldObjects");
  486. });
  487. handler.on("HasJSActions", function (data) {
  488. return pdfManager.ensureDoc("hasJSActions");
  489. });
  490. handler.on("GetCalculationOrderIds", function (data) {
  491. return pdfManager.ensureDoc("calculationOrderIds");
  492. });
  493. handler.on(
  494. "SaveDocument",
  495. function ({ isPureXfa, numPages, annotationStorage, filename }) {
  496. pdfManager.requestLoadedStream();
  497. const promises = [
  498. pdfManager.onLoadedStream(),
  499. pdfManager.ensureCatalog("acroForm"),
  500. pdfManager.ensureCatalog("acroFormRef"),
  501. pdfManager.ensureDoc("xref"),
  502. pdfManager.ensureDoc("startXRef"),
  503. ];
  504. if (isPureXfa) {
  505. promises.push(pdfManager.serializeXfaData(annotationStorage));
  506. } else {
  507. for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
  508. promises.push(
  509. pdfManager.getPage(pageIndex).then(function (page) {
  510. const task = new WorkerTask(`Save: page ${pageIndex}`);
  511. return page
  512. .save(handler, task, annotationStorage)
  513. .finally(function () {
  514. finishWorkerTask(task);
  515. });
  516. })
  517. );
  518. }
  519. }
  520. return Promise.all(promises).then(function ([
  521. stream,
  522. acroForm,
  523. acroFormRef,
  524. xref,
  525. startXRef,
  526. ...refs
  527. ]) {
  528. let newRefs = [];
  529. let xfaData = null;
  530. if (isPureXfa) {
  531. xfaData = refs[0];
  532. if (!xfaData) {
  533. return stream.bytes;
  534. }
  535. } else {
  536. for (const ref of refs) {
  537. newRefs = ref
  538. .filter(x => x !== null)
  539. .reduce((a, b) => a.concat(b), newRefs);
  540. }
  541. if (newRefs.length === 0) {
  542. // No new refs so just return the initial bytes
  543. return stream.bytes;
  544. }
  545. }
  546. const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || null;
  547. let xfaDatasetsRef = null;
  548. let hasXfaDatasetsEntry = false;
  549. if (Array.isArray(xfa)) {
  550. for (let i = 0, ii = xfa.length; i < ii; i += 2) {
  551. if (xfa[i] === "datasets") {
  552. xfaDatasetsRef = xfa[i + 1];
  553. acroFormRef = null;
  554. hasXfaDatasetsEntry = true;
  555. }
  556. }
  557. if (xfaDatasetsRef === null) {
  558. xfaDatasetsRef = xref.getNewRef();
  559. }
  560. } else if (xfa) {
  561. acroFormRef = null;
  562. // TODO: Support XFA streams.
  563. warn("Unsupported XFA type.");
  564. }
  565. let newXrefInfo = Object.create(null);
  566. if (xref.trailer) {
  567. // Get string info from Info in order to compute fileId.
  568. const infoObj = Object.create(null);
  569. const xrefInfo = xref.trailer.get("Info") || null;
  570. if (xrefInfo instanceof Dict) {
  571. xrefInfo.forEach((key, value) => {
  572. if (isString(key) && isString(value)) {
  573. infoObj[key] = stringToPDFString(value);
  574. }
  575. });
  576. }
  577. newXrefInfo = {
  578. rootRef: xref.trailer.getRaw("Root") || null,
  579. encryptRef: xref.trailer.getRaw("Encrypt") || null,
  580. newRef: xref.getNewRef(),
  581. infoRef: xref.trailer.getRaw("Info") || null,
  582. info: infoObj,
  583. fileIds: xref.trailer.get("ID") || null,
  584. startXRef,
  585. filename,
  586. };
  587. }
  588. xref.resetNewRef();
  589. return incrementalUpdate({
  590. originalData: stream.bytes,
  591. xrefInfo: newXrefInfo,
  592. newRefs,
  593. xref,
  594. hasXfa: !!xfa,
  595. xfaDatasetsRef,
  596. hasXfaDatasetsEntry,
  597. acroFormRef,
  598. acroForm,
  599. xfaData,
  600. });
  601. });
  602. }
  603. );
  604. handler.on("GetOperatorList", function wphSetupRenderPage(data, sink) {
  605. const pageIndex = data.pageIndex;
  606. pdfManager.getPage(pageIndex).then(function (page) {
  607. const task = new WorkerTask(`GetOperatorList: page ${pageIndex}`);
  608. startWorkerTask(task);
  609. // NOTE: Keep this condition in sync with the `info` helper function.
  610. const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
  611. // Pre compile the pdf page and fetch the fonts/images.
  612. page
  613. .getOperatorList({
  614. handler,
  615. sink,
  616. task,
  617. intent: data.intent,
  618. cacheKey: data.cacheKey,
  619. annotationStorage: data.annotationStorage,
  620. })
  621. .then(
  622. function (operatorListInfo) {
  623. finishWorkerTask(task);
  624. if (start) {
  625. info(
  626. `page=${pageIndex + 1} - getOperatorList: time=` +
  627. `${Date.now() - start}ms, len=${operatorListInfo.length}`
  628. );
  629. }
  630. sink.close();
  631. },
  632. function (reason) {
  633. finishWorkerTask(task);
  634. if (task.terminated) {
  635. return; // ignoring errors from the terminated thread
  636. }
  637. // For compatibility with older behavior, generating unknown
  638. // unsupported feature notification on errors.
  639. handler.send("UnsupportedFeature", {
  640. featureId: UNSUPPORTED_FEATURES.errorOperatorList,
  641. });
  642. sink.error(reason);
  643. // TODO: Should `reason` be re-thrown here (currently that casues
  644. // "Uncaught exception: ..." messages in the console)?
  645. }
  646. );
  647. });
  648. });
  649. handler.on("GetTextContent", function wphExtractText(data, sink) {
  650. const pageIndex = data.pageIndex;
  651. pdfManager.getPage(pageIndex).then(function (page) {
  652. const task = new WorkerTask("GetTextContent: page " + pageIndex);
  653. startWorkerTask(task);
  654. // NOTE: Keep this condition in sync with the `info` helper function.
  655. const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
  656. page
  657. .extractTextContent({
  658. handler,
  659. task,
  660. sink,
  661. normalizeWhitespace: data.normalizeWhitespace,
  662. includeMarkedContent: data.includeMarkedContent,
  663. combineTextItems: data.combineTextItems,
  664. })
  665. .then(
  666. function () {
  667. finishWorkerTask(task);
  668. if (start) {
  669. info(
  670. `page=${pageIndex + 1} - getTextContent: time=` +
  671. `${Date.now() - start}ms`
  672. );
  673. }
  674. sink.close();
  675. },
  676. function (reason) {
  677. finishWorkerTask(task);
  678. if (task.terminated) {
  679. return; // ignoring errors from the terminated thread
  680. }
  681. sink.error(reason);
  682. // TODO: Should `reason` be re-thrown here (currently that casues
  683. // "Uncaught exception: ..." messages in the console)?
  684. }
  685. );
  686. });
  687. });
  688. handler.on("GetStructTree", function wphGetStructTree(data) {
  689. return pdfManager.getPage(data.pageIndex).then(function (page) {
  690. return pdfManager.ensure(page, "getStructTree");
  691. });
  692. });
  693. handler.on("FontFallback", function (data) {
  694. return pdfManager.fontFallback(data.id, handler);
  695. });
  696. handler.on("Cleanup", function wphCleanup(data) {
  697. return pdfManager.cleanup(/* manuallyTriggered = */ true);
  698. });
  699. handler.on("Terminate", function wphTerminate(data) {
  700. terminated = true;
  701. const waitOn = [];
  702. if (pdfManager) {
  703. pdfManager.terminate(new AbortException("Worker was terminated."));
  704. const cleanupPromise = pdfManager.cleanup();
  705. waitOn.push(cleanupPromise);
  706. pdfManager = null;
  707. } else {
  708. clearPrimitiveCaches();
  709. }
  710. if (cancelXHRs) {
  711. cancelXHRs(new AbortException("Worker was terminated."));
  712. }
  713. for (const task of WorkerTasks) {
  714. waitOn.push(task.finished);
  715. task.terminate();
  716. }
  717. return Promise.all(waitOn).then(function () {
  718. // Notice that even if we destroying handler, resolved response promise
  719. // must be sent back.
  720. handler.destroy();
  721. handler = null;
  722. });
  723. });
  724. handler.on("Ready", function wphReady(data) {
  725. setupDoc(docParams);
  726. docParams = null; // we don't need docParams anymore -- saving memory.
  727. });
  728. return workerHandlerName;
  729. }
  730. static initializeFromPort(port) {
  731. const handler = new MessageHandler("worker", "main", port);
  732. WorkerMessageHandler.setup(handler, port);
  733. handler.send("ready", null);
  734. }
  735. }
  736. function isMessagePort(maybePort) {
  737. return (
  738. typeof maybePort.postMessage === "function" && "onmessage" in maybePort
  739. );
  740. }
  741. // Worker thread (and not Node.js)?
  742. if (
  743. typeof window === "undefined" &&
  744. !isNodeJS &&
  745. typeof self !== "undefined" &&
  746. isMessagePort(self)
  747. ) {
  748. WorkerMessageHandler.initializeFromPort(self);
  749. }
  750. export { WorkerMessageHandler, WorkerTask };