/src/core/worker.js
http://github.com/mozilla/pdf.js · JavaScript · 859 lines · 694 code · 109 blank · 56 comment · 67 complexity · 942a9a191797c38ba81d1d40a5c984a1 MD5 · raw file
- /* Copyright 2012 Mozilla Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- import {
- AbortException,
- arrayByteLength,
- arraysToBytes,
- createPromiseCapability,
- getVerbosityLevel,
- info,
- InvalidPDFException,
- isString,
- MissingPDFException,
- PasswordException,
- setVerbosityLevel,
- stringToPDFString,
- UnexpectedResponseException,
- UnknownErrorException,
- UNSUPPORTED_FEATURES,
- VerbosityLevel,
- warn,
- } from "../shared/util.js";
- import { clearPrimitiveCaches, Dict, Ref } from "./primitives.js";
- import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
- import { incrementalUpdate } from "./writer.js";
- import { isNodeJS } from "../shared/is_node.js";
- import { MessageHandler } from "../shared/message_handler.js";
- import { PDFWorkerStream } from "./worker_stream.js";
- import { XRefParseException } from "./core_utils.js";
- class WorkerTask {
- constructor(name) {
- this.name = name;
- this.terminated = false;
- this._capability = createPromiseCapability();
- }
- get finished() {
- return this._capability.promise;
- }
- finish() {
- this._capability.resolve();
- }
- terminate() {
- this.terminated = true;
- }
- ensureNotTerminated() {
- if (this.terminated) {
- throw new Error("Worker task was terminated");
- }
- }
- }
- class WorkerMessageHandler {
- static setup(handler, port) {
- let testMessageProcessed = false;
- handler.on("test", function wphSetupTest(data) {
- if (testMessageProcessed) {
- return; // we already processed 'test' message once
- }
- testMessageProcessed = true;
- // check if Uint8Array can be sent to worker
- if (!(data instanceof Uint8Array)) {
- handler.send("test", null);
- return;
- }
- // making sure postMessage transfers are working
- const supportTransfers = data[0] === 255;
- handler.postMessageTransfers = supportTransfers;
- handler.send("test", { supportTransfers });
- });
- handler.on("configure", function wphConfigure(data) {
- setVerbosityLevel(data.verbosity);
- });
- handler.on("GetDocRequest", function wphSetupDoc(data) {
- return WorkerMessageHandler.createDocumentHandler(data, port);
- });
- }
- static createDocumentHandler(docParams, port) {
- // This context is actually holds references on pdfManager and handler,
- // until the latter is destroyed.
- let pdfManager;
- let terminated = false;
- let cancelXHRs = null;
- const WorkerTasks = [];
- const verbosity = getVerbosityLevel();
- const apiVersion = docParams.apiVersion;
- const workerVersion =
- typeof PDFJSDev !== "undefined" && !PDFJSDev.test("TESTING")
- ? PDFJSDev.eval("BUNDLE_VERSION")
- : null;
- if (apiVersion !== workerVersion) {
- throw new Error(
- `The API version "${apiVersion}" does not match ` +
- `the Worker version "${workerVersion}".`
- );
- }
- if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) {
- // Fail early, and predictably, rather than having (some) fonts fail to
- // load/render with slightly cryptic error messages in environments where
- // the `Array.prototype` has been *incorrectly* extended.
- //
- // PLEASE NOTE: We do *not* want to slow down font parsing by adding
- // `hasOwnProperty` checks all over the code-base.
- const enumerableProperties = [];
- for (const property in []) {
- enumerableProperties.push(property);
- }
- if (enumerableProperties.length) {
- throw new Error(
- "The `Array.prototype` contains unexpected enumerable properties: " +
- enumerableProperties.join(", ") +
- "; thus breaking e.g. `for...in` iteration of `Array`s."
- );
- }
- // Ensure that (primarily) Node.js users won't accidentally attempt to use
- // a non-translated/non-polyfilled build of the library, since that would
- // quickly fail anyway because of missing functionality.
- if (
- (typeof PDFJSDev === "undefined" || PDFJSDev.test("SKIP_BABEL")) &&
- typeof ReadableStream === "undefined"
- ) {
- throw new Error(
- "The browser/environment lacks native support for critical " +
- "functionality used by the PDF.js library (e.g. `ReadableStream`); " +
- "please use a `legacy`-build instead."
- );
- }
- }
- const docId = docParams.docId;
- const docBaseUrl = docParams.docBaseUrl;
- const workerHandlerName = docParams.docId + "_worker";
- let handler = new MessageHandler(workerHandlerName, docId, port);
- // Ensure that postMessage transfers are always correctly enabled/disabled,
- // to prevent "DataCloneError" in browsers without transfers support.
- handler.postMessageTransfers = docParams.postMessageTransfers;
- function ensureNotTerminated() {
- if (terminated) {
- throw new Error("Worker was terminated");
- }
- }
- function startWorkerTask(task) {
- WorkerTasks.push(task);
- }
- function finishWorkerTask(task) {
- task.finish();
- const i = WorkerTasks.indexOf(task);
- WorkerTasks.splice(i, 1);
- }
- async function loadDocument(recoveryMode) {
- await pdfManager.ensureDoc("checkHeader");
- await pdfManager.ensureDoc("parseStartXRef");
- await pdfManager.ensureDoc("parse", [recoveryMode]);
- if (!recoveryMode) {
- // Check that at least the first page can be successfully loaded,
- // since otherwise the XRef table is definitely not valid.
- await pdfManager.ensureDoc("checkFirstPage");
- }
- const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
- if (isPureXfa) {
- const task = new WorkerTask("loadXfaFonts");
- startWorkerTask(task);
- await Promise.all([
- pdfManager
- .loadXfaFonts(handler, task)
- .catch(reason => {
- // Ignore errors, to allow the document to load.
- })
- .then(() => finishWorkerTask(task)),
- pdfManager.loadXfaImages(),
- ]);
- }
- const [numPages, fingerprints] = await Promise.all([
- pdfManager.ensureDoc("numPages"),
- pdfManager.ensureDoc("fingerprints"),
- ]);
- // Get htmlForXfa after numPages to avoid to create HTML twice.
- const htmlForXfa = isPureXfa
- ? await pdfManager.ensureDoc("htmlForXfa")
- : null;
- return { numPages, fingerprints, htmlForXfa };
- }
- function getPdfManager(data, evaluatorOptions, enableXfa) {
- const pdfManagerCapability = createPromiseCapability();
- let newPdfManager;
- const source = data.source;
- if (source.data) {
- try {
- newPdfManager = new LocalPdfManager(
- docId,
- source.data,
- source.password,
- evaluatorOptions,
- enableXfa,
- docBaseUrl
- );
- pdfManagerCapability.resolve(newPdfManager);
- } catch (ex) {
- pdfManagerCapability.reject(ex);
- }
- return pdfManagerCapability.promise;
- }
- let pdfStream,
- cachedChunks = [];
- try {
- pdfStream = new PDFWorkerStream(handler);
- } catch (ex) {
- pdfManagerCapability.reject(ex);
- return pdfManagerCapability.promise;
- }
- const fullRequest = pdfStream.getFullReader();
- fullRequest.headersReady
- .then(function () {
- if (!fullRequest.isRangeSupported) {
- return;
- }
- // We don't need auto-fetch when streaming is enabled.
- const disableAutoFetch =
- source.disableAutoFetch || fullRequest.isStreamingSupported;
- newPdfManager = new NetworkPdfManager(
- docId,
- pdfStream,
- {
- msgHandler: handler,
- password: source.password,
- length: fullRequest.contentLength,
- disableAutoFetch,
- rangeChunkSize: source.rangeChunkSize,
- },
- evaluatorOptions,
- enableXfa,
- docBaseUrl
- );
- // There may be a chance that `newPdfManager` is not initialized for
- // the first few runs of `readchunk` block of code. Be sure to send
- // all cached chunks, if any, to chunked_stream via pdf_manager.
- for (let i = 0; i < cachedChunks.length; i++) {
- newPdfManager.sendProgressiveData(cachedChunks[i]);
- }
- cachedChunks = [];
- pdfManagerCapability.resolve(newPdfManager);
- cancelXHRs = null;
- })
- .catch(function (reason) {
- pdfManagerCapability.reject(reason);
- cancelXHRs = null;
- });
- let loaded = 0;
- const flushChunks = function () {
- const pdfFile = arraysToBytes(cachedChunks);
- if (source.length && pdfFile.length !== source.length) {
- warn("reported HTTP length is different from actual");
- }
- // the data is array, instantiating directly from it
- try {
- newPdfManager = new LocalPdfManager(
- docId,
- pdfFile,
- source.password,
- evaluatorOptions,
- enableXfa,
- docBaseUrl
- );
- pdfManagerCapability.resolve(newPdfManager);
- } catch (ex) {
- pdfManagerCapability.reject(ex);
- }
- cachedChunks = [];
- };
- const readPromise = new Promise(function (resolve, reject) {
- const readChunk = function ({ value, done }) {
- try {
- ensureNotTerminated();
- if (done) {
- if (!newPdfManager) {
- flushChunks();
- }
- cancelXHRs = null;
- return;
- }
- loaded += arrayByteLength(value);
- if (!fullRequest.isStreamingSupported) {
- handler.send("DocProgress", {
- loaded,
- total: Math.max(loaded, fullRequest.contentLength || 0),
- });
- }
- if (newPdfManager) {
- newPdfManager.sendProgressiveData(value);
- } else {
- cachedChunks.push(value);
- }
- fullRequest.read().then(readChunk, reject);
- } catch (e) {
- reject(e);
- }
- };
- fullRequest.read().then(readChunk, reject);
- });
- readPromise.catch(function (e) {
- pdfManagerCapability.reject(e);
- cancelXHRs = null;
- });
- cancelXHRs = function (reason) {
- pdfStream.cancelAllRequests(reason);
- };
- return pdfManagerCapability.promise;
- }
- function setupDoc(data) {
- function onSuccess(doc) {
- ensureNotTerminated();
- handler.send("GetDoc", { pdfInfo: doc });
- }
- function onFailure(ex) {
- ensureNotTerminated();
- if (ex instanceof PasswordException) {
- const task = new WorkerTask(`PasswordException: response ${ex.code}`);
- startWorkerTask(task);
- handler
- .sendWithPromise("PasswordRequest", ex)
- .then(function ({ password }) {
- finishWorkerTask(task);
- pdfManager.updatePassword(password);
- pdfManagerReady();
- })
- .catch(function () {
- finishWorkerTask(task);
- handler.send("DocException", ex);
- });
- } else if (
- ex instanceof InvalidPDFException ||
- ex instanceof MissingPDFException ||
- ex instanceof UnexpectedResponseException ||
- ex instanceof UnknownErrorException
- ) {
- handler.send("DocException", ex);
- } else {
- handler.send(
- "DocException",
- new UnknownErrorException(ex.message, ex.toString())
- );
- }
- }
- function pdfManagerReady() {
- ensureNotTerminated();
- loadDocument(false).then(onSuccess, function (reason) {
- ensureNotTerminated();
- // Try again with recoveryMode == true
- if (!(reason instanceof XRefParseException)) {
- onFailure(reason);
- return;
- }
- pdfManager.requestLoadedStream();
- pdfManager.onLoadedStream().then(function () {
- ensureNotTerminated();
- loadDocument(true).then(onSuccess, onFailure);
- });
- });
- }
- ensureNotTerminated();
- const evaluatorOptions = {
- maxImageSize: data.maxImageSize,
- disableFontFace: data.disableFontFace,
- ignoreErrors: data.ignoreErrors,
- isEvalSupported: data.isEvalSupported,
- fontExtraProperties: data.fontExtraProperties,
- useSystemFonts: data.useSystemFonts,
- cMapUrl: data.cMapUrl,
- standardFontDataUrl: data.standardFontDataUrl,
- };
- getPdfManager(data, evaluatorOptions, data.enableXfa)
- .then(function (newPdfManager) {
- if (terminated) {
- // We were in a process of setting up the manager, but it got
- // terminated in the middle.
- newPdfManager.terminate(
- new AbortException("Worker was terminated.")
- );
- throw new Error("Worker was terminated");
- }
- pdfManager = newPdfManager;
- pdfManager.onLoadedStream().then(function (stream) {
- handler.send("DataLoaded", { length: stream.bytes.byteLength });
- });
- })
- .then(pdfManagerReady, onFailure);
- }
- handler.on("GetPage", function wphSetupGetPage(data) {
- return pdfManager.getPage(data.pageIndex).then(function (page) {
- return Promise.all([
- pdfManager.ensure(page, "rotate"),
- pdfManager.ensure(page, "ref"),
- pdfManager.ensure(page, "userUnit"),
- pdfManager.ensure(page, "view"),
- ]).then(function ([rotate, ref, userUnit, view]) {
- return {
- rotate,
- ref,
- userUnit,
- view,
- };
- });
- });
- });
- handler.on("GetPageIndex", function wphSetupGetPageIndex({ ref }) {
- const pageRef = Ref.get(ref.num, ref.gen);
- return pdfManager.ensureCatalog("getPageIndex", [pageRef]);
- });
- handler.on("GetDestinations", function wphSetupGetDestinations(data) {
- return pdfManager.ensureCatalog("destinations");
- });
- handler.on("GetDestination", function wphSetupGetDestination(data) {
- return pdfManager.ensureCatalog("getDestination", [data.id]);
- });
- handler.on("GetPageLabels", function wphSetupGetPageLabels(data) {
- return pdfManager.ensureCatalog("pageLabels");
- });
- handler.on("GetPageLayout", function wphSetupGetPageLayout(data) {
- return pdfManager.ensureCatalog("pageLayout");
- });
- handler.on("GetPageMode", function wphSetupGetPageMode(data) {
- return pdfManager.ensureCatalog("pageMode");
- });
- handler.on("GetViewerPreferences", function (data) {
- return pdfManager.ensureCatalog("viewerPreferences");
- });
- handler.on("GetOpenAction", function (data) {
- return pdfManager.ensureCatalog("openAction");
- });
- handler.on("GetAttachments", function wphSetupGetAttachments(data) {
- return pdfManager.ensureCatalog("attachments");
- });
- handler.on("GetJavaScript", function wphSetupGetJavaScript(data) {
- return pdfManager.ensureCatalog("javaScript");
- });
- handler.on("GetDocJSActions", function wphSetupGetDocJSActions(data) {
- return pdfManager.ensureCatalog("jsActions");
- });
- handler.on("GetPageJSActions", function ({ pageIndex }) {
- return pdfManager.getPage(pageIndex).then(function (page) {
- return pdfManager.ensure(page, "jsActions");
- });
- });
- handler.on("GetOutline", function wphSetupGetOutline(data) {
- return pdfManager.ensureCatalog("documentOutline");
- });
- handler.on("GetOptionalContentConfig", function (data) {
- return pdfManager.ensureCatalog("optionalContentConfig");
- });
- handler.on("GetPermissions", function (data) {
- return pdfManager.ensureCatalog("permissions");
- });
- handler.on("GetMetadata", function wphSetupGetMetadata(data) {
- return Promise.all([
- pdfManager.ensureDoc("documentInfo"),
- pdfManager.ensureCatalog("metadata"),
- ]);
- });
- handler.on("GetMarkInfo", function wphSetupGetMarkInfo(data) {
- return pdfManager.ensureCatalog("markInfo");
- });
- handler.on("GetData", function wphSetupGetData(data) {
- pdfManager.requestLoadedStream();
- return pdfManager.onLoadedStream().then(function (stream) {
- return stream.bytes;
- });
- });
- handler.on("GetStats", function wphSetupGetStats(data) {
- return pdfManager.ensureXRef("stats");
- });
- handler.on("GetAnnotations", function ({ pageIndex, intent }) {
- return pdfManager.getPage(pageIndex).then(function (page) {
- return page.getAnnotationsData(intent);
- });
- });
- handler.on("GetFieldObjects", function (data) {
- return pdfManager.ensureDoc("fieldObjects");
- });
- handler.on("HasJSActions", function (data) {
- return pdfManager.ensureDoc("hasJSActions");
- });
- handler.on("GetCalculationOrderIds", function (data) {
- return pdfManager.ensureDoc("calculationOrderIds");
- });
- handler.on(
- "SaveDocument",
- function ({ isPureXfa, numPages, annotationStorage, filename }) {
- pdfManager.requestLoadedStream();
- const promises = [
- pdfManager.onLoadedStream(),
- pdfManager.ensureCatalog("acroForm"),
- pdfManager.ensureCatalog("acroFormRef"),
- pdfManager.ensureDoc("xref"),
- pdfManager.ensureDoc("startXRef"),
- ];
- if (isPureXfa) {
- promises.push(pdfManager.serializeXfaData(annotationStorage));
- } else {
- for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
- promises.push(
- pdfManager.getPage(pageIndex).then(function (page) {
- const task = new WorkerTask(`Save: page ${pageIndex}`);
- return page
- .save(handler, task, annotationStorage)
- .finally(function () {
- finishWorkerTask(task);
- });
- })
- );
- }
- }
- return Promise.all(promises).then(function ([
- stream,
- acroForm,
- acroFormRef,
- xref,
- startXRef,
- ...refs
- ]) {
- let newRefs = [];
- let xfaData = null;
- if (isPureXfa) {
- xfaData = refs[0];
- if (!xfaData) {
- return stream.bytes;
- }
- } else {
- for (const ref of refs) {
- newRefs = ref
- .filter(x => x !== null)
- .reduce((a, b) => a.concat(b), newRefs);
- }
- if (newRefs.length === 0) {
- // No new refs so just return the initial bytes
- return stream.bytes;
- }
- }
- const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || null;
- let xfaDatasetsRef = null;
- let hasXfaDatasetsEntry = false;
- if (Array.isArray(xfa)) {
- for (let i = 0, ii = xfa.length; i < ii; i += 2) {
- if (xfa[i] === "datasets") {
- xfaDatasetsRef = xfa[i + 1];
- acroFormRef = null;
- hasXfaDatasetsEntry = true;
- }
- }
- if (xfaDatasetsRef === null) {
- xfaDatasetsRef = xref.getNewRef();
- }
- } else if (xfa) {
- acroFormRef = null;
- // TODO: Support XFA streams.
- warn("Unsupported XFA type.");
- }
- let newXrefInfo = Object.create(null);
- if (xref.trailer) {
- // Get string info from Info in order to compute fileId.
- const infoObj = Object.create(null);
- const xrefInfo = xref.trailer.get("Info") || null;
- if (xrefInfo instanceof Dict) {
- xrefInfo.forEach((key, value) => {
- if (isString(key) && isString(value)) {
- infoObj[key] = stringToPDFString(value);
- }
- });
- }
- newXrefInfo = {
- rootRef: xref.trailer.getRaw("Root") || null,
- encryptRef: xref.trailer.getRaw("Encrypt") || null,
- newRef: xref.getNewRef(),
- infoRef: xref.trailer.getRaw("Info") || null,
- info: infoObj,
- fileIds: xref.trailer.get("ID") || null,
- startXRef,
- filename,
- };
- }
- xref.resetNewRef();
- return incrementalUpdate({
- originalData: stream.bytes,
- xrefInfo: newXrefInfo,
- newRefs,
- xref,
- hasXfa: !!xfa,
- xfaDatasetsRef,
- hasXfaDatasetsEntry,
- acroFormRef,
- acroForm,
- xfaData,
- });
- });
- }
- );
- handler.on("GetOperatorList", function wphSetupRenderPage(data, sink) {
- const pageIndex = data.pageIndex;
- pdfManager.getPage(pageIndex).then(function (page) {
- const task = new WorkerTask(`GetOperatorList: page ${pageIndex}`);
- startWorkerTask(task);
- // NOTE: Keep this condition in sync with the `info` helper function.
- const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
- // Pre compile the pdf page and fetch the fonts/images.
- page
- .getOperatorList({
- handler,
- sink,
- task,
- intent: data.intent,
- cacheKey: data.cacheKey,
- annotationStorage: data.annotationStorage,
- })
- .then(
- function (operatorListInfo) {
- finishWorkerTask(task);
- if (start) {
- info(
- `page=${pageIndex + 1} - getOperatorList: time=` +
- `${Date.now() - start}ms, len=${operatorListInfo.length}`
- );
- }
- sink.close();
- },
- function (reason) {
- finishWorkerTask(task);
- if (task.terminated) {
- return; // ignoring errors from the terminated thread
- }
- // For compatibility with older behavior, generating unknown
- // unsupported feature notification on errors.
- handler.send("UnsupportedFeature", {
- featureId: UNSUPPORTED_FEATURES.errorOperatorList,
- });
- sink.error(reason);
- // TODO: Should `reason` be re-thrown here (currently that casues
- // "Uncaught exception: ..." messages in the console)?
- }
- );
- });
- });
- handler.on("GetTextContent", function wphExtractText(data, sink) {
- const pageIndex = data.pageIndex;
- pdfManager.getPage(pageIndex).then(function (page) {
- const task = new WorkerTask("GetTextContent: page " + pageIndex);
- startWorkerTask(task);
- // NOTE: Keep this condition in sync with the `info` helper function.
- const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
- page
- .extractTextContent({
- handler,
- task,
- sink,
- normalizeWhitespace: data.normalizeWhitespace,
- includeMarkedContent: data.includeMarkedContent,
- combineTextItems: data.combineTextItems,
- })
- .then(
- function () {
- finishWorkerTask(task);
- if (start) {
- info(
- `page=${pageIndex + 1} - getTextContent: time=` +
- `${Date.now() - start}ms`
- );
- }
- sink.close();
- },
- function (reason) {
- finishWorkerTask(task);
- if (task.terminated) {
- return; // ignoring errors from the terminated thread
- }
- sink.error(reason);
- // TODO: Should `reason` be re-thrown here (currently that casues
- // "Uncaught exception: ..." messages in the console)?
- }
- );
- });
- });
- handler.on("GetStructTree", function wphGetStructTree(data) {
- return pdfManager.getPage(data.pageIndex).then(function (page) {
- return pdfManager.ensure(page, "getStructTree");
- });
- });
- handler.on("FontFallback", function (data) {
- return pdfManager.fontFallback(data.id, handler);
- });
- handler.on("Cleanup", function wphCleanup(data) {
- return pdfManager.cleanup(/* manuallyTriggered = */ true);
- });
- handler.on("Terminate", function wphTerminate(data) {
- terminated = true;
- const waitOn = [];
- if (pdfManager) {
- pdfManager.terminate(new AbortException("Worker was terminated."));
- const cleanupPromise = pdfManager.cleanup();
- waitOn.push(cleanupPromise);
- pdfManager = null;
- } else {
- clearPrimitiveCaches();
- }
- if (cancelXHRs) {
- cancelXHRs(new AbortException("Worker was terminated."));
- }
- for (const task of WorkerTasks) {
- waitOn.push(task.finished);
- task.terminate();
- }
- return Promise.all(waitOn).then(function () {
- // Notice that even if we destroying handler, resolved response promise
- // must be sent back.
- handler.destroy();
- handler = null;
- });
- });
- handler.on("Ready", function wphReady(data) {
- setupDoc(docParams);
- docParams = null; // we don't need docParams anymore -- saving memory.
- });
- return workerHandlerName;
- }
- static initializeFromPort(port) {
- const handler = new MessageHandler("worker", "main", port);
- WorkerMessageHandler.setup(handler, port);
- handler.send("ready", null);
- }
- }
- function isMessagePort(maybePort) {
- return (
- typeof maybePort.postMessage === "function" && "onmessage" in maybePort
- );
- }
- // Worker thread (and not Node.js)?
- if (
- typeof window === "undefined" &&
- !isNodeJS &&
- typeof self !== "undefined" &&
- isMessagePort(self)
- ) {
- WorkerMessageHandler.initializeFromPort(self);
- }
- export { WorkerMessageHandler, WorkerTask };