/plugin/src/main/java/com/atlassian/confluence/extra/masterdetail/services/DetailsMacroBodyHandlerFastParse.java
Java | 317 lines | 240 code | 52 blank | 25 comment | 52 complexity | 6a7feeb15dbe8673f4d730457b644540 MD5 | raw file
- package com.atlassian.confluence.extra.masterdetail.services;
- import com.atlassian.confluence.content.render.xhtml.Namespace;
- import com.atlassian.confluence.content.render.xhtml.XhtmlConstants;
- import com.atlassian.confluence.extra.masterdetail.DetailsSummaryMacro;
- import com.atlassian.confluence.extra.masterdetail.analytics.DetailsSummaryMacroMetricsEvent;
- import com.atlassian.confluence.plugins.pageproperties.api.model.PageProperty;
- import com.atlassian.confluence.xhtml.api.MacroDefinition;
- import com.google.common.collect.ImmutableList;
- import com.google.common.collect.ImmutableMap;
- import com.google.common.collect.Lists;
- import com.google.common.collect.Maps;
- import org.apache.commons.lang3.StringUtils;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import javax.xml.namespace.QName;
- import javax.xml.stream.XMLEventReader;
- import javax.xml.stream.XMLStreamConstants;
- import javax.xml.stream.XMLStreamException;
- import javax.xml.stream.events.Characters;
- import javax.xml.stream.events.StartElement;
- import javax.xml.stream.events.XMLEvent;
- import java.io.IOException;
- import java.io.Reader;
- import java.io.StringReader;
- import java.io.StringWriter;
- import java.io.UnsupportedEncodingException;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.concurrent.ConcurrentHashMap;
- import static com.atlassian.confluence.content.render.xhtml.XhtmlConstants.XHTML_NAMESPACE_URI;
- import static org.apache.commons.text.StringEscapeUtils.escapeHtml4;
- import static org.apache.commons.text.StringEscapeUtils.unescapeHtml4;
- /**
- * DetailsMacroBodyHandler with STaX parsing logic, resulting in around a 4x speedup over legacy SAXReader parsing.
- *
- * @since 5.3.0
- */
- public class DetailsMacroBodyHandlerFastParse implements DetailsMacroBodyHandler {
- private static final Logger LOG = LoggerFactory.getLogger(DetailsMacroBodyHandlerFastParse.class);
- public static final String CHARSET_UTF8 = "UTF-8";
- private final Map<String, ImmutableList<ImmutableMap<String, PageProperty>>> detailsById;
- private static final String XHTML_NAMESPACE_PREFIX = "xhtml";
- private static final String XPATH_TBODY = "//" + XHTML_NAMESPACE_PREFIX + ":tbody";
- private static final QName TR_QNAME = new QName(XHTML_NAMESPACE_URI, "tr");
- private static final QName TD_QNAME = new QName(XHTML_NAMESPACE_URI, "td");
- private static final QName TH_QNAME = new QName(XHTML_NAMESPACE_URI, "th");
- private static final Map<String, String> NAMESPACE_MAP;
- static {
- NAMESPACE_MAP = new ConcurrentHashMap<String, String>(XhtmlConstants.STORAGE_NAMESPACES.size());
- for (Namespace namespace : XhtmlConstants.STORAGE_NAMESPACES) {
- NAMESPACE_MAP.put(namespace.getPrefix() != null ? namespace.getPrefix() : XHTML_NAMESPACE_PREFIX, namespace.getUri());
- }
- }
- private final DetailsSummaryMacroMetricsEvent.Builder metrics;
- @FunctionalInterface
- public interface XMLEventReaderSupplier {
- XMLEventReader supplyXMLEventReader(Reader xml) throws XMLStreamException;
- }
- private final XMLEventReaderSupplier xmlEventReaderSupplier;
- DetailsMacroBodyHandlerFastParse(final DetailsSummaryMacroMetricsEvent.Builder metrics, XMLEventReaderSupplier xmlEventReaderSupplier) {
- this.metrics = metrics;
- this.detailsById = Maps.newHashMap();
- this.xmlEventReaderSupplier = xmlEventReaderSupplier;
- }
- public static String readElementBody(XMLEventReader eventReader)
- throws XMLStreamException {
- StringWriter buf = new StringWriter(1024);
- int depth = 0;
- while (eventReader.hasNext()) {
- XMLEvent xmlEvent = eventReader.peek();
- if (xmlEvent.isStartElement()) {
- ++depth;
- } else if (xmlEvent.isEndElement()) {
- --depth;
- if (depth < 0)
- break;
- }
- xmlEvent = eventReader.nextEvent();
- if (xmlEvent.isCharacters()) {
- Characters xmlEventCharacters = xmlEvent.asCharacters();
- if (xmlEventCharacters.isCData()) {
- buf.append("<![CDATA[").append(xmlEventCharacters.getData()).append("]]>");
- } else {
- buf.append(escapeHtml4(xmlEventCharacters.getData()));
- }
- } else {
- xmlEvent.writeAsEncodedUnicode(buf);
- }
- }
- return buf.getBuffer().toString();
- }
- public static String readElementBodyCharacters(XMLEventReader eventReader)
- throws XMLStreamException {
- StringWriter buf = new StringWriter(1024);
- int depth = 0;
- while (eventReader.hasNext()) {
- XMLEvent xmlEvent = eventReader.peek();
- if (xmlEvent.isStartElement()) {
- ++depth;
- } else if (xmlEvent.isEndElement()) {
- --depth;
- if (depth < 0)
- break;
- }
- xmlEvent = eventReader.nextEvent();
- if (xmlEvent.isCharacters()) {
- xmlEvent.asCharacters().writeAsEncodedUnicode(buf);
- }
- }
- String body = buf.getBuffer().toString();
- return escapeHtml4(body);
- }
- public void handle(MacroDefinition macroDefinition) {
- if (!"details".equals(macroDefinition.getName()))
- return;
- String bodyText = macroDefinition.getBodyText();
- String detailsId = StringUtils.trim(macroDefinition.getParameter(DetailsSummaryMacro.PARAM_ID));
- if (detailsId == null)
- detailsId = "";
- if (StringUtils.isBlank(bodyText)) {
- addToDetails(detailsId, ImmutableMap.of());
- return;
- }
- try {
- metrics.detailsExtractionStart();
- ImmutableMap<String, PageProperty> extractedDetails = extractDetails(bodyText);
- metrics.detailsExtractionFinish(extractedDetails.size());
- addToDetails(detailsId, extractedDetails);
- } catch (Exception e) {
- LOG.error(String.format("Unable to parse detailsById in detailsById macro\n%s", bodyText), e);
- }
- }
- /**
- * Get the detailsById extracted from the content. It is a mapping of ids to a map of heading/values.
- * The default without an id mapping is keyed with the empty string. Note null is returned if no details
- * for the given id are found.
- */
- public List<? extends Map<String, PageProperty>> getDetails(String detailsId) {
- return detailsById.get(detailsId);
- }
- /**
- * @return an ImmutableMap keyed by detail ID to a map of details, where details is a map keyed by heading text to
- * ExtractedDetail objects
- */
- public ImmutableMap<String, ImmutableList<ImmutableMap<String, PageProperty>>> getDetails() {
- return ImmutableMap.copyOf(detailsById);
- }
- private void addToDetails(String id, ImmutableMap<String, PageProperty> details) {
- List<ImmutableMap<String, PageProperty>> newDetails;
- List<ImmutableMap<String, PageProperty>> currentDetails = detailsById.get(id);
- if (currentDetails == null) {
- newDetails = Lists.newArrayList();
- } else {
- newDetails = Lists.newArrayList(currentDetails);
- }
- // Note that this means different keys in different PP macros won't be combined into a single map - that
- // happens later.
- newDetails.add(details);
- detailsById.put(id, ImmutableList.copyOf(newDetails));
- }
- private ImmutableMap<String, PageProperty> extractDetails(String macroBodyXhtml)
- throws IOException, XMLStreamException {
- final XMLEventReader reader = getEventReader(macroBodyXhtml);
- List<List<String>> outer = Lists.newArrayList();
- List<String> inner = Lists.newArrayList();
- int rowIndex = -1;
- int columnIndex = -1;
- // Any row/column can be a header, but if the first row is a header then consider vertical properties (default is horizontal)
- // E.g. [ firstRowIsThs = true ] [ firstRowIsThs = false ] [ firstRowIsThs = false ]
- // <th>Key1</th><th>Key2</th> vs. <th>Key1</th><td>Val1</td> vs. <td>Key1</td><td>Val1</td>
- // <td>Val1</td><td>Val1</td> <th>Key2</th><td>Val2</td> <td>Key2</td><td>Val2</td>
- boolean firstRowIsThs = false;
- while (reader.hasNext()) {
- XMLEvent event = reader.nextEvent();
- switch (event.getEventType()) {
- case XMLStreamConstants.START_ELEMENT:
- StartElement startElement = event.asStartElement();
- if (TR_QNAME.equals(startElement.getName())) {
- // New row
- rowIndex++;
- } else if (TD_QNAME.equals(startElement.getName())) {
- if (rowIndex == 0 && firstRowIsThs) {
- // We are on the first row, but it is not entirely heading elements
- firstRowIsThs = false;
- }
- columnIndex++;
- inner.add(readElementBody(reader));
- } else if (TH_QNAME.equals(startElement.getName())) {
- columnIndex++;
- inner.add(readElementBody(reader));
- if (rowIndex == 0 && columnIndex == 0) {
- firstRowIsThs = true;
- } else if (columnIndex == 0 && firstRowIsThs) {
- // We are on the nth row, and we have a header in the first column
- firstRowIsThs = false;
- }
- }
- break;
- case XMLStreamConstants.END_ELEMENT:
- if (TR_QNAME.equals(event.asEndElement().getName())) {
- outer.add(inner);
- inner = Lists.newArrayList();
- columnIndex = -1;
- }
- break;
- default:
- break;
- }
- }
- reader.close();
- final Map<String, PageProperty> results = new HashMap<>();
- if (firstRowIsThs) {
- List<String> keys = outer.get(0); // first row are heading/keys
- List<String> values = outer.size() > 1 ? outer.get(1) : keys; // second row are values for above keys
- for (int i = 0; i < keys.size(); i++) {
- String key = keys.get(i);
- String value = values.get(i);
- String keyText = getKeyText(key);
- results.put(keyText, new PageProperty(value, key));
- }
- } else {
- for (List<String> row : outer) {
- String key = row.get(0);
- String value = (row.size() > 1) ? row.get(1) : key; // If only single column is provided, keys = values
- String keyText = getKeyText(key);
- if (!results.containsKey(key)) {
- results.put(keyText, new PageProperty(value, key));
- }
- }
- }
- return ImmutableMap.copyOf(results);
- }
- private String getKeyText(String keyMarkup) {
- final String keyMarkupNoNBSP = StringUtils.remove(keyMarkup, " ");
- if (!keyMarkup.contains("<")) {
- // If key is not an xml element, then it is just characters
- return keyMarkupNoNBSP;
- }
- try {
- return readElementBodyCharacters(getEventReader(unescapeHtml4(keyMarkupNoNBSP)));
- } catch (Exception e) {
- return keyMarkupNoNBSP;
- }
- }
- private XMLEventReader getEventReader(final String macroBodyXhtml) throws XMLStreamException, UnsupportedEncodingException {
- StringBuilder builder = new StringBuilder();
- // Add internal DTD declaration to allow support for entities in attribute vales.
- builder.append("<!DOCTYPE xml>").append("<xml");
- for (Namespace namespace : XhtmlConstants.STORAGE_NAMESPACES) {
- builder.append(" xmlns");
- if (!namespace.isDefaultNamespace())
- builder.append(":").append(namespace.getPrefix());
- builder.append("=\"").append(namespace.getUri()).append("\"");
- if (namespace.isDefaultNamespace()) {
- builder.append(" xmlns:xhtml=\"").append(namespace.getUri()).append("\"");
- }
- }
- builder.append(">")
- .append(macroBodyXhtml)
- .append("</xml>");
- StringReader xmlStringReader = new StringReader(builder.toString());
- return xmlEventReaderSupplier.supplyXMLEventReader(xmlStringReader);
- }
- }