/src/main/java/org/docx4j/samples/CompareDocuments.java

http://github.com/plutext/docx4j · Java · 146 lines · 72 code · 28 blank · 46 comment · 4 complexity · 3f53e4cb9e02418f8f4c67a351b5352b MD5 · raw file

  1. /*
  2. * Copyright 2007-2008, Plutext Pty Ltd.
  3. *
  4. * This file is part of docx4j.
  5. docx4j is licensed under the Apache License, Version 2.0 (the "License");
  6. you may not use this file except in compliance with the License.
  7. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. */
  15. package org.docx4j.samples;
  16. import java.io.OutputStream;
  17. import java.util.ArrayList;
  18. import java.util.Calendar;
  19. import java.util.List;
  20. import javax.xml.bind.JAXBContext;
  21. import javax.xml.bind.JAXBElement;
  22. import javax.xml.bind.JAXBException;
  23. import javax.xml.bind.Unmarshaller;
  24. import org.docx4j.convert.out.pdf.viaXSLFO.PdfSettings;
  25. import org.docx4j.diff.Differencer;
  26. import org.docx4j.fonts.IdentityPlusMapper;
  27. import org.docx4j.openpackaging.io.LoadFromZipFile;
  28. import org.docx4j.openpackaging.io.SaveToZipFile;
  29. import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
  30. import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
  31. import org.docx4j.openpackaging.parts.relationships.Namespaces;
  32. import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
  33. import org.docx4j.relationships.Relationship;
  34. import org.docx4j.wml.Body;
  35. import org.docx4j.wml.Document;
  36. import org.docx4j.wml.SdtContentBlock;
  37. /**
  38. * This sample compares the 2 input documents, and renders
  39. * the result using PDF viewer.
  40. *
  41. */
  42. public class CompareDocuments {
  43. public static JAXBContext context = org.docx4j.jaxb.Context.jc;
  44. /**
  45. * @param args
  46. */
  47. public static void main(String[] args) throws Exception {
  48. String newerfilepath = System.getProperty("user.dir") + "/sample-docs/word/sample-docxv2.docx";
  49. String olderfilepath = System.getProperty("user.dir") + "/sample-docs/word/sample-docx.docx";
  50. // 1. Load the Packages
  51. WordprocessingMLPackage newerPackage = WordprocessingMLPackage.load(new java.io.File(newerfilepath));
  52. WordprocessingMLPackage olderPackage = WordprocessingMLPackage.load(new java.io.File(olderfilepath));
  53. Body newerBody = ((Document)newerPackage.getMainDocumentPart().getJaxbElement()).getBody();
  54. Body olderBody = ((Document)olderPackage.getMainDocumentPart().getJaxbElement()).getBody();
  55. System.out.println("Differencing..");
  56. // 2. Do the differencing
  57. java.io.StringWriter sw = new java.io.StringWriter();
  58. javax.xml.transform.stream.StreamResult result = new javax.xml.transform.stream.StreamResult(
  59. sw);
  60. Calendar changeDate = null;
  61. Differencer pd = new Differencer();
  62. pd.setRelsDiffIdentifier("blagh"); // not necessary in this case
  63. pd.diff(newerBody, olderBody, result, "someone", changeDate,
  64. newerPackage.getMainDocumentPart().getRelationshipsPart(),
  65. olderPackage.getMainDocumentPart().getRelationshipsPart()
  66. );
  67. // 3. Get the result
  68. String contentStr = sw.toString();
  69. System.out.println("Result: \n\n " + contentStr);
  70. Body newBody = (Body) org.docx4j.XmlUtils
  71. .unmarshalString(contentStr);
  72. // 4. Display the result as a PDF
  73. // To do this, we'll replace the body in the newer document
  74. ((Document)newerPackage.getMainDocumentPart().getJaxbElement()).setBody(newBody);
  75. RelationshipsPart rp = newerPackage.getMainDocumentPart().getRelationshipsPart();
  76. handleRels(pd, rp);
  77. newerPackage.setFontMapper(new IdentityPlusMapper());
  78. org.docx4j.convert.out.pdf.PdfConversion c
  79. = new org.docx4j.convert.out.pdf.viaXSLFO.Conversion(newerPackage);
  80. OutputStream os = new java.io.FileOutputStream(System.getProperty("user.dir") + "/OUT_CompareDocuments.pdf");
  81. c.output(os, new PdfSettings() );
  82. System.out.println("Saved " + System.getProperty("user.dir") + "/OUT_CompareDocuments.pdf");
  83. }
  84. /**
  85. In the general case, you need to handle relationships.
  86. Although not necessary in this simple example,
  87. we do it anyway for the purposes of illustration.
  88. */
  89. private static void handleRels(Differencer pd, RelationshipsPart rp) {
  90. // Since we are going to add rels appropriate to the docs being
  91. // compared, for neatness and to avoid duplication
  92. // (duplication of internal part names is fatal in Word,
  93. // and export xslt makes images internal, though it does avoid duplicating
  94. // a part ),
  95. // remove any existing rels which point to images
  96. List<Relationship> relsToRemove = new ArrayList<Relationship>();
  97. for (Relationship r : rp.getRelationships().getRelationship() ) {
  98. // Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
  99. if (r.getType().equals(Namespaces.IMAGE)) {
  100. relsToRemove.add(r);
  101. }
  102. }
  103. for (Relationship r : relsToRemove) {
  104. rp.removeRelationship(r);
  105. }
  106. // Now add the rels we composed
  107. List<Relationship> newRels = pd.getComposedRels();
  108. for (Relationship nr : newRels) {
  109. rp.addRelationship(nr);
  110. }
  111. }
  112. }