PageRenderTime 29ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/portal-impl/src/com/liferay/portlet/wiki/importers/mediawiki/MediaWikiImporter.java

https://github.com/viktorkovacs/liferay-portal-trunk
Java | 690 lines | 499 code | 174 blank | 17 comment | 60 complexity | 1eecb8a79fc3bc4e5bcbc30878ab9fa5 MD5 | raw file
  1. /**
  2. * Copyright (c) 2000-2011 Liferay, Inc. All rights reserved.
  3. *
  4. * This library is free software; you can redistribute it and/or modify it under
  5. * the terms of the GNU Lesser General Public License as published by the Free
  6. * Software Foundation; either version 2.1 of the License, or (at your option)
  7. * any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  12. * details.
  13. */
  14. package com.liferay.portlet.wiki.importers.mediawiki;
  15. import com.liferay.documentlibrary.service.DLLocalServiceUtil;
  16. import com.liferay.portal.NoSuchUserException;
  17. import com.liferay.portal.kernel.exception.PortalException;
  18. import com.liferay.portal.kernel.exception.SystemException;
  19. import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
  20. import com.liferay.portal.kernel.log.Log;
  21. import com.liferay.portal.kernel.log.LogFactoryUtil;
  22. import com.liferay.portal.kernel.util.ArrayUtil;
  23. import com.liferay.portal.kernel.util.MapUtil;
  24. import com.liferay.portal.kernel.util.ObjectValuePair;
  25. import com.liferay.portal.kernel.util.ProgressTracker;
  26. import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
  27. import com.liferay.portal.kernel.util.StringBundler;
  28. import com.liferay.portal.kernel.util.StringPool;
  29. import com.liferay.portal.kernel.util.StringUtil;
  30. import com.liferay.portal.kernel.util.Validator;
  31. import com.liferay.portal.kernel.xml.Document;
  32. import com.liferay.portal.kernel.xml.DocumentException;
  33. import com.liferay.portal.kernel.xml.Element;
  34. import com.liferay.portal.kernel.xml.SAXReaderUtil;
  35. import com.liferay.portal.kernel.zip.ZipReader;
  36. import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
  37. import com.liferay.portal.model.User;
  38. import com.liferay.portal.service.ServiceContext;
  39. import com.liferay.portal.service.UserLocalServiceUtil;
  40. import com.liferay.portal.util.PropsValues;
  41. import com.liferay.portlet.asset.NoSuchTagException;
  42. import com.liferay.portlet.asset.model.AssetTag;
  43. import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
  44. import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
  45. import com.liferay.portlet.asset.util.AssetUtil;
  46. import com.liferay.portlet.wiki.ImportFilesException;
  47. import com.liferay.portlet.wiki.NoSuchPageException;
  48. import com.liferay.portlet.wiki.importers.WikiImporter;
  49. import com.liferay.portlet.wiki.importers.WikiImporterKeys;
  50. import com.liferay.portlet.wiki.model.WikiNode;
  51. import com.liferay.portlet.wiki.model.WikiPage;
  52. import com.liferay.portlet.wiki.model.WikiPageConstants;
  53. import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
  54. import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
  55. import java.io.File;
  56. import java.io.FileReader;
  57. import java.io.IOException;
  58. import java.util.ArrayList;
  59. import java.util.Collections;
  60. import java.util.HashMap;
  61. import java.util.Iterator;
  62. import java.util.List;
  63. import java.util.Map;
  64. import java.util.regex.Matcher;
  65. import java.util.regex.Pattern;
  66. /**
  67. * @author Alvaro del Castillo
  68. * @author Jorge Ferrer
  69. */
  70. public class MediaWikiImporter implements WikiImporter {
  71. public static final String SHARED_IMAGES_CONTENT = "See attachments";
  72. public static final String SHARED_IMAGES_TITLE = "SharedImages";
  73. public void importPages(
  74. long userId, WikiNode node, File[] files,
  75. Map<String, String[]> options)
  76. throws PortalException {
  77. if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
  78. throw new PortalException("The pages file is mandatory");
  79. }
  80. File pagesFile = files[0];
  81. File usersFile = files[1];
  82. File imagesFile = files[2];
  83. try {
  84. Document doc = SAXReaderUtil.read(pagesFile);
  85. Map<String, String> usersMap = readUsersFile(usersFile);
  86. Element root = doc.getRootElement();
  87. List<String> specialNamespaces = readSpecialNamespaces(root);
  88. processSpecialPages(userId, node, root, specialNamespaces);
  89. processRegularPages(
  90. userId, node, root, specialNamespaces, usersMap, imagesFile,
  91. options);
  92. processImages(userId, node, imagesFile);
  93. moveFrontPage(userId, node, options);
  94. }
  95. catch (DocumentException de) {
  96. throw new ImportFilesException("Invalid XML file provided");
  97. }
  98. catch (IOException de) {
  99. throw new ImportFilesException("Error reading the files provided");
  100. }
  101. catch (PortalException e) {
  102. throw e;
  103. }
  104. catch (Exception e) {
  105. throw new PortalException(e);
  106. }
  107. }
  108. protected long getUserId(
  109. long userId, WikiNode node, String author,
  110. Map<String, String> usersMap)
  111. throws PortalException, SystemException {
  112. User user = null;
  113. String emailAddress = usersMap.get(author);
  114. try {
  115. if (Validator.isNull(emailAddress)) {
  116. user = UserLocalServiceUtil.getUserByScreenName(
  117. node.getCompanyId(), author.toLowerCase());
  118. }
  119. else {
  120. user = UserLocalServiceUtil.getUserByEmailAddress(
  121. node.getCompanyId(), emailAddress);
  122. }
  123. }
  124. catch (NoSuchUserException nsue) {
  125. user = UserLocalServiceUtil.getUserById(userId);
  126. }
  127. return user.getUserId();
  128. }
  129. protected void importPage(
  130. long userId, String author, WikiNode node, String title,
  131. String content, String summary, Map<String, String> usersMap)
  132. throws PortalException {
  133. try {
  134. long authorUserId = getUserId(userId, node, author, usersMap);
  135. String parentTitle = readParentTitle(content);
  136. String redirectTitle = readRedirectTitle(content);
  137. ServiceContext serviceContext = new ServiceContext();
  138. serviceContext.setAddCommunityPermissions(true);
  139. serviceContext.setAddGuestPermissions(true);
  140. serviceContext.setAssetTagNames(
  141. readAssetTagNames(userId, node, content));
  142. if (Validator.isNull(redirectTitle)) {
  143. content = _translator.translate(content);
  144. }
  145. else {
  146. content =
  147. StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
  148. StringPool.DOUBLE_CLOSE_BRACKET;
  149. }
  150. WikiPage page = null;
  151. try {
  152. page = WikiPageLocalServiceUtil.getPage(
  153. node.getNodeId(), title);
  154. }
  155. catch (NoSuchPageException nspe) {
  156. page = WikiPageLocalServiceUtil.addPage(
  157. authorUserId, node.getNodeId(), title,
  158. WikiPageConstants.NEW, null, true, serviceContext);
  159. }
  160. WikiPageLocalServiceUtil.updatePage(
  161. authorUserId, node.getNodeId(), title, page.getVersion(),
  162. content, summary, true, "creole", parentTitle, redirectTitle,
  163. serviceContext);
  164. }
  165. catch (Exception e) {
  166. throw new PortalException("Error importing page " + title, e);
  167. }
  168. }
  169. protected boolean isSpecialMediaWikiPage(
  170. String title, List<String> specialNamespaces) {
  171. for (String namespace: specialNamespaces) {
  172. if (title.startsWith(namespace + StringPool.COLON)) {
  173. return true;
  174. }
  175. }
  176. return false;
  177. }
  178. protected boolean isValidImage(String[] paths, byte[] bytes) {
  179. if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
  180. return false;
  181. }
  182. if ((paths.length > 1) &&
  183. (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
  184. return false;
  185. }
  186. String fileName = paths[paths.length - 1];
  187. try {
  188. DLLocalServiceUtil.validate(fileName, true, bytes);
  189. }
  190. catch (PortalException pe) {
  191. return false;
  192. }
  193. catch (SystemException se) {
  194. return false;
  195. }
  196. return true;
  197. }
  198. protected void moveFrontPage(
  199. long userId, WikiNode node, Map<String, String[]> options) {
  200. String frontPageTitle = MapUtil.getString(
  201. options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
  202. if (Validator.isNotNull(frontPageTitle)) {
  203. frontPageTitle = normalizeTitle(frontPageTitle);
  204. try {
  205. if (WikiPageLocalServiceUtil.getPagesCount(
  206. node.getNodeId(), frontPageTitle, true) > 0) {
  207. ServiceContext serviceContext = new ServiceContext();
  208. serviceContext.setAddCommunityPermissions(true);
  209. serviceContext.setAddGuestPermissions(true);
  210. WikiPageLocalServiceUtil.movePage(
  211. userId, node.getNodeId(), frontPageTitle,
  212. WikiPageConstants.FRONT_PAGE, false, serviceContext);
  213. }
  214. }
  215. catch (Exception e) {
  216. if (_log.isWarnEnabled()) {
  217. StringBundler sb = new StringBundler(4);
  218. sb.append("Could not move ");
  219. sb.append(WikiPageConstants.FRONT_PAGE);
  220. sb.append(" to the title provided: ");
  221. sb.append(frontPageTitle);
  222. _log.warn(sb.toString(), e);
  223. }
  224. }
  225. }
  226. }
  227. protected String normalize(String categoryName, int length) {
  228. categoryName = AssetUtil.toWord(categoryName.trim());
  229. return StringUtil.shorten(categoryName, length);
  230. }
  231. protected String normalizeDescription(String description) {
  232. description = description.replaceAll(
  233. _categoriesPattern.pattern(), StringPool.BLANK);
  234. return normalize(description, 300);
  235. }
  236. protected String normalizeTitle(String title) {
  237. title = title.replaceAll(
  238. PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
  239. return StringUtil.shorten(title, 75);
  240. }
  241. protected void processImages(long userId, WikiNode node, File imagesFile)
  242. throws Exception {
  243. if ((imagesFile == null) || (!imagesFile.exists())) {
  244. return;
  245. }
  246. ProgressTracker progressTracker =
  247. ProgressTrackerThreadLocal.getProgressTracker();
  248. int count = 0;
  249. ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
  250. List<String> entries = zipReader.getEntries();
  251. int total = entries.size();
  252. if (total > 0) {
  253. try {
  254. WikiPageLocalServiceUtil.getPage(
  255. node.getNodeId(), SHARED_IMAGES_TITLE);
  256. }
  257. catch (NoSuchPageException nspe) {
  258. ServiceContext serviceContext = new ServiceContext();
  259. serviceContext.setAddCommunityPermissions(true);
  260. serviceContext.setAddGuestPermissions(true);
  261. WikiPageLocalServiceUtil.addPage(
  262. userId, node.getNodeId(), SHARED_IMAGES_TITLE,
  263. SHARED_IMAGES_CONTENT, null, true, serviceContext);
  264. }
  265. }
  266. List<ObjectValuePair<String, byte[]>> attachments =
  267. new ArrayList<ObjectValuePair<String, byte[]>>();
  268. int percentage = 50;
  269. for (int i = 0; i < entries.size(); i++) {
  270. String entry = entries.get(i);
  271. String key = entry;
  272. byte[] value = zipReader.getEntryAsByteArray(entry);
  273. String[] paths = StringUtil.split(key, StringPool.SLASH);
  274. if (!isValidImage(paths, value)) {
  275. if (_log.isInfoEnabled()) {
  276. _log.info("Ignoring " + key);
  277. }
  278. continue;
  279. }
  280. String fileName = paths[paths.length - 1].toLowerCase();
  281. attachments.add(
  282. new ObjectValuePair<String, byte[]>(fileName, value));
  283. count++;
  284. if ((i % 5) == 0) {
  285. WikiPageLocalServiceUtil.addPageAttachments(
  286. userId, node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
  287. attachments.clear();
  288. percentage = Math.min(50 + (i * 50) / total, 99);
  289. progressTracker.updateProgress(percentage);
  290. }
  291. }
  292. if (!attachments.isEmpty()) {
  293. WikiPageLocalServiceUtil.addPageAttachments(
  294. userId, node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
  295. }
  296. zipReader.close();
  297. if (_log.isInfoEnabled()) {
  298. _log.info("Imported " + count + " images into " + node.getName());
  299. }
  300. }
  301. protected void processRegularPages(
  302. long userId, WikiNode node, Element root,
  303. List<String> specialNamespaces, Map<String, String> usersMap,
  304. File imagesFile, Map<String, String[]> options) {
  305. boolean importLatestVersion = MapUtil.getBoolean(
  306. options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
  307. ProgressTracker progressTracker =
  308. ProgressTrackerThreadLocal.getProgressTracker();
  309. int count = 0;
  310. List<Element> pages = root.elements("page");
  311. int total = pages.size();
  312. Iterator<Element> itr = root.elements("page").iterator();
  313. int percentage = 10;
  314. int maxPercentage = 50;
  315. if ((imagesFile == null) || (!imagesFile.exists())) {
  316. maxPercentage = 99;
  317. }
  318. int percentageRange = maxPercentage - percentage;
  319. for (int i = 0; itr.hasNext(); i++) {
  320. Element pageEl = itr.next();
  321. String title = pageEl.elementText("title");
  322. title = normalizeTitle(title);
  323. percentage = Math.min(
  324. 10 + (i * percentageRange) / total, maxPercentage);
  325. progressTracker.updateProgress(percentage);
  326. if (isSpecialMediaWikiPage(title, specialNamespaces)) {
  327. continue;
  328. }
  329. List<Element> revisionEls = pageEl.elements("revision");
  330. if (importLatestVersion) {
  331. Element lastRevisionEl = revisionEls.get(
  332. revisionEls.size() - 1);
  333. revisionEls = new ArrayList<Element>();
  334. revisionEls.add(lastRevisionEl);
  335. }
  336. for (Element curRevisionEl : revisionEls) {
  337. String author = curRevisionEl.element(
  338. "contributor").elementText("username");
  339. String content = curRevisionEl.elementText("text");
  340. String summary = curRevisionEl.elementText("comment");
  341. try {
  342. importPage(
  343. userId, author, node, title, content, summary,
  344. usersMap);
  345. }
  346. catch (Exception e) {
  347. if (_log.isWarnEnabled()) {
  348. StringBundler sb = new StringBundler(3);
  349. sb.append("Page with title ");
  350. sb.append(title);
  351. sb.append(" could not be imported");
  352. _log.warn(sb.toString(), e);
  353. }
  354. }
  355. }
  356. count++;
  357. }
  358. if (_log.isInfoEnabled()) {
  359. _log.info("Imported " + count + " pages into " + node.getName());
  360. }
  361. }
  362. protected void processSpecialPages(
  363. long userId, WikiNode node, Element root,
  364. List<String> specialNamespaces)
  365. throws PortalException {
  366. ProgressTracker progressTracker =
  367. ProgressTrackerThreadLocal.getProgressTracker();
  368. List<Element> pages = root.elements("page");
  369. int total = pages.size();
  370. Iterator<Element> itr = pages.iterator();
  371. for (int i = 0; itr.hasNext(); i++) {
  372. Element page = itr.next();
  373. String title = page.elementText("title");
  374. if (!title.startsWith("Category:")) {
  375. if (isSpecialMediaWikiPage(title, specialNamespaces)) {
  376. root.remove(page);
  377. }
  378. continue;
  379. }
  380. String categoryName = title.substring("Category:".length());
  381. categoryName = normalize(categoryName, 75);
  382. String description = page.element("revision").elementText("text");
  383. description = normalizeDescription(description);
  384. try {
  385. AssetTag assetTag = null;
  386. try {
  387. assetTag = AssetTagLocalServiceUtil.getTag(
  388. node.getCompanyId(), categoryName);
  389. }
  390. catch (NoSuchTagException nste) {
  391. ServiceContext serviceContext = new ServiceContext();
  392. serviceContext.setAddCommunityPermissions(true);
  393. serviceContext.setAddGuestPermissions(true);
  394. serviceContext.setScopeGroupId(node.getGroupId());
  395. assetTag = AssetTagLocalServiceUtil.addTag(
  396. userId, categoryName, null, serviceContext);
  397. }
  398. if (Validator.isNotNull(description)) {
  399. AssetTagPropertyLocalServiceUtil.addTagProperty(
  400. userId, assetTag.getTagId(), "description",
  401. description);
  402. }
  403. }
  404. catch (SystemException se) {
  405. _log.error(se, se);
  406. }
  407. if ((i % 5) == 0) {
  408. progressTracker.updateProgress((i * 10) / total);
  409. }
  410. }
  411. }
  412. protected String[] readAssetTagNames(
  413. long userId, WikiNode node, String content)
  414. throws PortalException, SystemException {
  415. Matcher matcher = _categoriesPattern.matcher(content);
  416. List<String> assetTagNames = new ArrayList<String>();
  417. while (matcher.find()) {
  418. String categoryName = matcher.group(1);
  419. categoryName = normalize(categoryName, 75);
  420. AssetTag assetTag = null;
  421. try {
  422. assetTag = AssetTagLocalServiceUtil.getTag(
  423. node.getGroupId(), categoryName);
  424. }
  425. catch (NoSuchTagException nste) {
  426. ServiceContext serviceContext = new ServiceContext();
  427. serviceContext.setAddCommunityPermissions(true);
  428. serviceContext.setAddGuestPermissions(true);
  429. serviceContext.setScopeGroupId(node.getGroupId());
  430. assetTag = AssetTagLocalServiceUtil.addTag(
  431. userId, categoryName, null, serviceContext);
  432. }
  433. assetTagNames.add(assetTag.getName());
  434. }
  435. if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
  436. assetTagNames.add(_WORK_IN_PROGRESS_TAG);
  437. }
  438. return assetTagNames.toArray(new String[assetTagNames.size()]);
  439. }
  440. protected String readParentTitle(String content) {
  441. Matcher matcher = _parentPattern.matcher(content);
  442. String redirectTitle = StringPool.BLANK;
  443. if (matcher.find()) {
  444. redirectTitle = matcher.group(1);
  445. redirectTitle = normalizeTitle(redirectTitle);
  446. redirectTitle += " (disambiguation)";
  447. }
  448. return redirectTitle;
  449. }
  450. protected String readRedirectTitle(String content) {
  451. Matcher matcher = _redirectPattern.matcher(content);
  452. String redirectTitle = StringPool.BLANK;
  453. if (matcher.find()) {
  454. redirectTitle = matcher.group(1);
  455. redirectTitle = normalizeTitle(redirectTitle);
  456. }
  457. return redirectTitle;
  458. }
  459. protected List<String> readSpecialNamespaces(Element root)
  460. throws ImportFilesException {
  461. List<String> namespaces = new ArrayList<String>();
  462. Element siteinfoEl = root.element("siteinfo");
  463. if (siteinfoEl == null) {
  464. throw new ImportFilesException("Invalid pages XML file");
  465. }
  466. Iterator<Element> itr = siteinfoEl.element(
  467. "namespaces").elements("namespace").iterator();
  468. while (itr.hasNext()) {
  469. Element namespace = itr.next();
  470. if (!namespace.attribute("key").getData().equals("0")) {
  471. namespaces.add(namespace.getText());
  472. }
  473. }
  474. return namespaces;
  475. }
  476. protected Map<String, String> readUsersFile(File usersFile)
  477. throws IOException {
  478. if ((usersFile == null) || (!usersFile.exists())) {
  479. return Collections.emptyMap();
  480. }
  481. Map<String, String> usersMap = new HashMap<String, String>();
  482. UnsyncBufferedReader unsyncBufferedReader =
  483. new UnsyncBufferedReader(new FileReader(usersFile));
  484. String line = unsyncBufferedReader.readLine();
  485. while (line != null) {
  486. String[] array = StringUtil.split(line);
  487. if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
  488. (Validator.isNotNull(array[1]))) {
  489. usersMap.put(array[0], array[1]);
  490. }
  491. else {
  492. if (_log.isInfoEnabled()) {
  493. _log.info(
  494. "Ignoring line " + line +
  495. " because it does not contain exactly 2 columns");
  496. }
  497. }
  498. line = unsyncBufferedReader.readLine();
  499. }
  500. return usersMap;
  501. }
  502. private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
  503. "thumb", "temp", "archive"
  504. };
  505. private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
  506. private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
  507. private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
  508. private static Pattern _categoriesPattern = Pattern.compile(
  509. "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
  510. private static Pattern _parentPattern = Pattern.compile(
  511. "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
  512. private static Pattern _redirectPattern = Pattern.compile(
  513. "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
  514. private MediaWikiToCreoleTranslator _translator =
  515. new MediaWikiToCreoleTranslator();
  516. }