PageRenderTime 66ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/okapi/tikal/src/main/java/net/sf/okapi/applications/tikal/Main.java

https://code.google.com/
Java | 1809 lines | 1591 code | 110 blank | 108 comment | 349 complexity | 6468c4c8ca695fc2aa6b075d323d3e2f MD5 | raw file
Possible License(s): LGPL-2.1, LGPL-3.0
  1. /*===========================================================================
  2. Copyright (C) 2009-2011 by the Okapi Framework contributors
  3. -----------------------------------------------------------------------------
  4. This library is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published by
  6. the Free Software Foundation; either version 2.1 of the License, or (at
  7. your option) any later version.
  8. This library is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
  11. General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this library; if not, write to the Free Software Foundation,
  14. Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  15. See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
  16. ===========================================================================*/
  17. package net.sf.okapi.applications.tikal;
  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.io.PrintStream;
  21. import java.net.MalformedURLException;
  22. import java.net.URI;
  23. import java.net.URISyntaxException;
  24. import java.net.URL;
  25. import java.nio.charset.Charset;
  26. import java.security.InvalidParameterException;
  27. import java.text.NumberFormat;
  28. import java.util.ArrayList;
  29. import java.util.Hashtable;
  30. import java.util.Iterator;
  31. import java.util.List;
  32. import java.util.Locale;
  33. import java.util.logging.Handler;
  34. import java.util.logging.Level;
  35. import java.util.logging.Logger;
  36. import java.util.regex.Matcher;
  37. import java.util.regex.Pattern;
  38. import net.sf.okapi.common.FileUtil;
  39. import net.sf.okapi.common.IParameters;
  40. import net.sf.okapi.common.Util;
  41. import net.sf.okapi.common.exceptions.OkapiIOException;
  42. import net.sf.okapi.common.filters.DefaultFilters;
  43. import net.sf.okapi.common.filters.FilterConfiguration;
  44. import net.sf.okapi.common.filters.FilterConfigurationMapper;
  45. import net.sf.okapi.common.filters.IFilterConfigurationEditor;
  46. import net.sf.okapi.common.filters.IFilterConfigurationListEditor;
  47. import net.sf.okapi.common.filterwriter.XLIFFWriter;
  48. import net.sf.okapi.common.LocaleId;
  49. import net.sf.okapi.common.pipeline.IPipelineStep;
  50. import net.sf.okapi.common.pipelinedriver.BatchItemContext;
  51. import net.sf.okapi.common.pipelinedriver.PipelineDriver;
  52. import net.sf.okapi.common.plugins.PluginsManager;
  53. import net.sf.okapi.common.resource.RawDocument;
  54. import net.sf.okapi.common.resource.TextFragment;
  55. import net.sf.okapi.common.resource.TextFragment.TagType;
  56. import net.sf.okapi.common.query.IQuery;
  57. import net.sf.okapi.lib.translation.ITMQuery;
  58. import net.sf.okapi.common.query.QueryResult;
  59. import net.sf.okapi.steps.common.FilterEventsToRawDocumentStep;
  60. import net.sf.okapi.steps.common.FilterEventsWriterStep;
  61. import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep;
  62. import net.sf.okapi.steps.formatconversion.FormatConversionStep;
  63. import net.sf.okapi.steps.formatconversion.Parameters;
  64. import net.sf.okapi.steps.formatconversion.TableFilterWriterParameters;
  65. import net.sf.okapi.steps.leveraging.LeveragingStep;
  66. import net.sf.okapi.steps.moses.ExtractionStep;
  67. import net.sf.okapi.steps.moses.MergingParameters;
  68. import net.sf.okapi.steps.moses.MergingStep;
  69. import net.sf.okapi.steps.segmentation.SegmentationStep;
  70. import net.sf.okapi.connectors.apertium.ApertiumMTConnector;
  71. import net.sf.okapi.connectors.globalsight.GlobalSightTMConnector;
  72. import net.sf.okapi.connectors.google.GoogleMTv2Connector;
  73. import net.sf.okapi.connectors.microsoft.MicrosoftMTConnector;
  74. import net.sf.okapi.connectors.mymemory.MyMemoryTMConnector;
  75. import net.sf.okapi.connectors.opentran.OpenTranTMConnector;
  76. import net.sf.okapi.connectors.pensieve.PensieveTMConnector;
  77. import net.sf.okapi.connectors.tda.TDASearchConnector;
  78. import net.sf.okapi.connectors.translatetoolkit.TranslateToolkitTMConnector;
  79. import net.sf.okapi.filters.mosestext.FilterWriterParameters;
  80. public class Main {
  81. protected final static int CMD_EXTRACT = 0;
  82. protected final static int CMD_MERGE = 1;
  83. protected final static int CMD_EDITCONFIG = 2;
  84. protected final static int CMD_QUERYTRANS = 3;
  85. protected final static int CMD_CONV2PO = 4;
  86. protected final static int CMD_CONV2TMX = 5;
  87. protected final static int CMD_CONV2TABLE = 6;
  88. protected final static int CMD_CONV2PEN = 7;
  89. protected final static int CMD_TRANSLATE = 8;
  90. protected final static int CMD_EXTRACTTOMOSES = 9;
  91. protected final static int CMD_LEVERAGEMOSES = 10;
  92. protected final static int CMD_SEGMENTATION = 11;
  93. protected final static int CMD_SHOWCONFIGS = 12;
  94. protected final static int CMD_ADDTRANS = 13;
  95. private static final String DEFAULT_SEGRULES = "-";
  96. private static final String MSG_ONLYWITHUICOMP = "UI-based commands are available only in the distributions with UI components.";
  97. private static PrintStream ps;
  98. protected ArrayList<String> inputs;
  99. protected String skeleton;
  100. protected String output;
  101. protected String specifiedConfigId;
  102. protected String specifiedConfigIdPath;
  103. protected String configId;
  104. protected String inputEncoding;
  105. protected String outputEncoding;
  106. protected LocaleId srcLoc;
  107. protected LocaleId trgLoc;
  108. protected int command = -1;
  109. protected String query;
  110. protected String addTransTrans;
  111. protected int addTransRating = 4;
  112. protected boolean useGoogleV2;
  113. protected String googleV2Params;
  114. protected boolean useOpenTran;
  115. protected boolean useTransToolkit;
  116. protected String transToolkitParams;
  117. protected boolean useGlobalSight;
  118. protected String globalSightParams;
  119. protected boolean useTDA;
  120. protected String tdaParams;
  121. protected boolean useMyMemory;
  122. protected String myMemoryParams;
  123. protected boolean useApertium;
  124. protected String apertiumParams;
  125. protected boolean usePensieve;
  126. protected String pensieveData;
  127. protected boolean useMicrosoft;
  128. protected String microsoftParams;
  129. // protected boolean useProMT;
  130. // protected String proMTParams;
  131. protected boolean genericOutput = false;
  132. protected String tableConvFormat;
  133. protected String tableConvCodes;
  134. protected int convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_TARGETOREMPTY;
  135. protected boolean convSkipEntriesWithoutText = true;
  136. protected boolean convOverwrite = false;
  137. protected String segRules;
  138. protected boolean showTraceHint = true;
  139. protected String tmOptions;
  140. protected boolean levOptFillTarget = true;
  141. protected String levOptTMXPath;
  142. protected boolean extOptCopy = true; // Copy source in empty target by default
  143. protected boolean extOptAltTrans = true; // Output alt-trans by default
  144. protected boolean mosesCopyToTarget = false;
  145. protected boolean mosesOverwriteTarget = false;
  146. protected boolean moses2Outputs = false;
  147. protected boolean mosesUseGModeInAltTrans = true;
  148. protected String mosesFromPath;
  149. protected String mosesToPath;
  150. protected String skeletonDir;
  151. protected String outputDir;
  152. private FilterConfigurationMapper fcMapper;
  153. private Hashtable<String, String> extensionsMap;
  154. private Hashtable<String, String> filtersMap;
  155. /**
  156. * Try the guess the encoding of the console.
  157. * @return the guessed name of the console's encoding.
  158. */
  159. private static String getConsoleEncodingName () {
  160. String osName = System.getProperty("os.name");
  161. if ( osName.startsWith("Mac OS")) {
  162. return "UTF-8"; // Apparently the default for bash on Mac
  163. }
  164. if ( osName.startsWith("Windows") ) {
  165. //TODO: Get DOS code-pages per locale
  166. return "cp850"; // Not perfect, but covers many languages
  167. }
  168. // Default: Assumes unique encoding overall
  169. return Charset.defaultCharset().name();
  170. }
  171. public static void main (String[] originalArgs) {
  172. Main prog = new Main();
  173. boolean showTrace = false;
  174. try {
  175. // Create an encoding-aware output for the console
  176. // System.out uses the default system encoding that
  177. // may not be the right one (e.g. windows-1252 vs cp850)
  178. ps = new PrintStream(System.out, true, getConsoleEncodingName());
  179. // Disable root console handler
  180. Handler[] handlers = Logger.getLogger("").getHandlers();
  181. for ( Handler handler : handlers ) {
  182. Logger.getLogger("").removeHandler(handler);
  183. }
  184. // Create our own handler
  185. LogHandler logHandler = new LogHandler(ps);
  186. logHandler.setLevel(Level.INFO);
  187. Logger.getLogger("").addHandler(logHandler); //$NON-NLS-1$
  188. // Remove all empty arguments
  189. // This is to work around the "$1" issue in bash
  190. ArrayList<String> args = new ArrayList<String>();
  191. for ( String tmp : originalArgs ) {
  192. if ( tmp.length() > 0 ) args.add(tmp);
  193. }
  194. prog.printBanner();
  195. if ( args.size() == 0 ) {
  196. prog.printUsage();
  197. return;
  198. }
  199. if ( args.contains("-?") ) {
  200. prog.printUsage();
  201. return; // Overrides all arguments
  202. }
  203. if ( args.contains("-h") || args.contains("--help") || args.contains("-help") ) {
  204. prog.showHelp();
  205. return; // Overrides all arguments
  206. }
  207. if ( args.contains("-i") || args.contains("--info") || args.contains("-info") ) {
  208. prog.showInfo();
  209. return; // Overrides all arguments
  210. }
  211. if ( args.contains("-trace") ) {
  212. // Check early so the option does not get 'eaten' by a bad syntax
  213. showTrace = true;
  214. }
  215. for ( int i=0; i<args.size(); i++ ) {
  216. String arg = args.get(i);
  217. if ( arg.equals("-fc") ) {
  218. prog.specifiedConfigId = prog.getArgument(args, ++i);
  219. }
  220. else if ( arg.equals("-sl") ) {
  221. prog.srcLoc = new LocaleId(prog.getArgument(args, ++i), true);
  222. }
  223. else if ( arg.equals("-tl") ) {
  224. prog.trgLoc = new LocaleId(prog.getArgument(args, ++i), true);
  225. }
  226. else if ( arg.equals("-ie") ) {
  227. prog.inputEncoding = prog.getArgument(args, ++i);
  228. }
  229. else if ( arg.equals("-oe") ) {
  230. prog.outputEncoding = prog.getArgument(args, ++i);
  231. }
  232. else if ( arg.equals("-od") ) {
  233. prog.outputDir = prog.getArgument(args, ++i);
  234. }
  235. else if ( arg.equals("-sd") ) {
  236. prog.skeletonDir = prog.getArgument(args, ++i);
  237. }
  238. else if ( arg.equals("-x") ) {
  239. prog.command = CMD_EXTRACT;
  240. }
  241. else if ( arg.equals("-xm") ) {
  242. prog.command = CMD_EXTRACTTOMOSES;
  243. }
  244. else if ( arg.equals("-2") ) {
  245. prog.moses2Outputs = true;
  246. }
  247. else if ( arg.equals("-t") ) {
  248. prog.command = CMD_TRANSLATE;
  249. }
  250. else if ( arg.equals("-m") ) {
  251. prog.command = CMD_MERGE;
  252. }
  253. else if ( arg.equals("-lm") ) {
  254. prog.command = CMD_LEVERAGEMOSES;
  255. }
  256. else if ( arg.equals("-totrg") ) {
  257. prog.mosesCopyToTarget = true;
  258. prog.mosesOverwriteTarget = false;
  259. }
  260. else if ( arg.equals("-overtrg") ) {
  261. prog.mosesCopyToTarget = true;
  262. prog.mosesOverwriteTarget = true;
  263. }
  264. else if ( arg.equals("-bpt") ) {
  265. prog.mosesUseGModeInAltTrans = false;
  266. }
  267. else if ( arg.equals("-over") ) {
  268. prog.convOverwrite = true;
  269. }
  270. else if ( arg.equals("-from")) {
  271. prog.mosesFromPath = prog.getArgument(args, ++i);
  272. }
  273. else if ( arg.equals("-to") ) {
  274. prog.mosesToPath = prog.getArgument(args, ++i);
  275. }
  276. else if ( arg.equals("-2po") ) {
  277. prog.command = CMD_CONV2PO;
  278. }
  279. else if ( arg.equals("-2tmx") ) {
  280. prog.command = CMD_CONV2TMX;
  281. }
  282. else if ( arg.equals("-2tbl") ) {
  283. prog.command = CMD_CONV2TABLE;
  284. }
  285. else if ( arg.equals("-csv") ) {
  286. prog.tableConvFormat = "csv";
  287. }
  288. else if ( arg.equals("-tab") ) {
  289. prog.tableConvFormat = "tab";
  290. }
  291. else if ( arg.equals("-xliff") ) {
  292. prog.tableConvCodes = TableFilterWriterParameters.INLINE_XLIFF;
  293. }
  294. else if ( arg.equals("-xliffgx") ) {
  295. prog.tableConvCodes = TableFilterWriterParameters.INLINE_XLIFFGX;
  296. }
  297. else if ( arg.equals("-tmx") ) {
  298. prog.tableConvCodes = TableFilterWriterParameters.INLINE_TMX;
  299. }
  300. else if ( arg.equals("-all") ) {
  301. prog.convSkipEntriesWithoutText = false;
  302. }
  303. else if ( arg.equals("-nofill") ) {
  304. prog.levOptFillTarget = false;
  305. }
  306. else if ( arg.equals("-nocopy") ) {
  307. prog.extOptCopy = false;
  308. }
  309. else if ( arg.equals("-noalttrans") ) {
  310. prog.extOptAltTrans = false;
  311. }
  312. else if ( arg.equals("-maketmx") ) {
  313. prog.levOptTMXPath = "pretrans.tmx";
  314. if ( args.size() > i+1 ) {
  315. if ( !args.get(i+1).startsWith("-") ) {
  316. prog.levOptTMXPath = args.get(++i);
  317. }
  318. }
  319. }
  320. else if ( arg.equals("-trgsource") ) {
  321. prog.convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_FORCESOURCE;
  322. }
  323. else if ( arg.equals("-trgempty") ) {
  324. prog.convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_FORCEEMPTY;
  325. }
  326. else if ( arg.equals("-imp") ) {
  327. prog.command = CMD_CONV2PEN;
  328. prog.pensieveData = prog.getArgument(args, ++i);
  329. }
  330. else if ( arg.equals("-exp") ) {
  331. prog.command = CMD_CONV2TMX;
  332. prog.specifiedConfigId = "okf_pensieve";
  333. }
  334. else if ( arg.equals("-e") ) {
  335. prog.command = CMD_EDITCONFIG;
  336. if ( args.size() > i+1 ) {
  337. if ( !args.get(i+1).startsWith("-") ) {
  338. prog.specifiedConfigId = args.get(++i);
  339. }
  340. }
  341. }
  342. else if ( arg.equals("-generic") ) {
  343. prog.genericOutput = true;
  344. prog.tableConvCodes = TableFilterWriterParameters.INLINE_GENERIC;
  345. }
  346. else if ( arg.equals("-q") ) {
  347. prog.command = CMD_QUERYTRANS;
  348. prog.query = prog.getArgument(args, ++i);
  349. }
  350. else if ( arg.equals("-a") ) {
  351. prog.command = CMD_ADDTRANS;
  352. prog.query = prog.getArgument(args, ++i);
  353. prog.addTransTrans = prog.getArgument(args, ++i);
  354. if ( args.size() > i+1 ) {
  355. if ( !args.get(i+1).startsWith("-") ) {
  356. // Optional rating
  357. try {
  358. prog.addTransRating = Integer.parseInt(args.get(++i));
  359. }
  360. catch ( NumberFormatException e ) {
  361. throw new RuntimeException(String.format("Invalid rating option: '%s'.", args.get(i)));
  362. }
  363. if (( prog.addTransRating < -10 ) || ( prog.addTransRating > 10 )) {
  364. throw new RuntimeException("Rating must be between -10 and 10.");
  365. }
  366. }
  367. }
  368. }
  369. else if ( arg.equals("-opt") ) {
  370. prog.tmOptions = prog.getArgument(args, ++i);
  371. }
  372. else if ( arg.equals("-gg") || arg.equals("-google") ) {
  373. prog.useGoogleV2 = true;
  374. if ( args.size() > i+1 ) {
  375. if ( !args.get(i+1).startsWith("-") ) {
  376. prog.googleV2Params = args.get(++i);
  377. }
  378. }
  379. }
  380. else if ( arg.equals("-opentran") ) {
  381. prog.useOpenTran = true;
  382. }
  383. else if ( arg.equals("-tt") ) {
  384. prog.useTransToolkit = true;
  385. prog.transToolkitParams = "amagama.locamotion.org:80";
  386. if ( args.size() > i+1 ) {
  387. if ( !args.get(i+1).startsWith("-") ) {
  388. prog.transToolkitParams = args.get(++i);
  389. }
  390. }
  391. }
  392. else if ( arg.equals("-gs") ) {
  393. prog.useGlobalSight = true;
  394. prog.globalSightParams = prog.getArgument(args, ++i);
  395. }
  396. else if ( arg.equals("-tda") ) {
  397. prog.useTDA = true;
  398. prog.tdaParams = prog.getArgument(args, ++i);
  399. }
  400. else if ( arg.equals("-ms") ) {
  401. prog.useMicrosoft = true;
  402. if ( args.size() > i+1 ) {
  403. if ( !args.get(i+1).startsWith("-") ) {
  404. prog.microsoftParams = args.get(++i);
  405. }
  406. }
  407. }
  408. // else if ( arg.equals("-promt") ) {
  409. // prog.useProMT = true;
  410. // if ( args.size() > i+1 ) {
  411. // if ( !args.get(i+1).startsWith("-") ) {
  412. // prog.proMTParams = args.get(++i);
  413. // }
  414. // }
  415. // }
  416. else if ( arg.equals("-apertium") ) {
  417. prog.useApertium = true;
  418. if ( args.size() > i+1 ) {
  419. if ( !args.get(i+1).startsWith("-") ) {
  420. prog.apertiumParams = args.get(++i);
  421. }
  422. }
  423. }
  424. else if ( arg.equals("-mm") ) {
  425. prog.useMyMemory = true;
  426. // Key is optional (left for backward compatibility)
  427. if ( args.size() > i+1 ) {
  428. if ( !args.get(i+1).startsWith("-") ) {
  429. prog.myMemoryParams = prog.getArgument(args, ++i);
  430. }
  431. }
  432. }
  433. else if ( arg.equals("-pen") ) {
  434. prog.usePensieve = true;
  435. prog.pensieveData = "http://localhost:8080";
  436. if ( args.size() > i+1 ) {
  437. if ( !args.get(i+1).startsWith("-") ) {
  438. prog.pensieveData = args.get(++i);
  439. }
  440. }
  441. }
  442. else if ( arg.endsWith("-listconf") || arg.equals("-lfc") ) {
  443. prog.command = CMD_SHOWCONFIGS;
  444. }
  445. else if ( arg.equals("-s") ) {
  446. prog.command = CMD_SEGMENTATION;
  447. prog.segRules = DEFAULT_SEGRULES;
  448. }
  449. else if ( arg.equals("-seg") ) {
  450. prog.segRules = DEFAULT_SEGRULES; // Default
  451. if ( args.size() > i+1 ) {
  452. if ( !args.get(i+1).startsWith("-") ) {
  453. prog.segRules = args.get(++i);
  454. }
  455. }
  456. }
  457. else if ( arg.equals("-trace") ) {
  458. // Trace aAlready set. this is just to avoid
  459. // seeing -trace as invalid parameter
  460. }
  461. //=== Input file or error
  462. else if ( !arg.startsWith("-") ) {
  463. prog.inputs.add(args.get(i));
  464. }
  465. else {
  466. prog.showTraceHint = false; // Using trace is not helpful to the user for this error
  467. throw new InvalidParameterException(
  468. String.format("Invalid command-line argument '%s'.", args.get(i)));
  469. }
  470. }
  471. // Forgive having the extension .fprm from configuration ID if there is one
  472. if ( prog.specifiedConfigId != null ) {
  473. String cfgPath = Util.getDirectoryName(prog.specifiedConfigId);
  474. if ( !cfgPath.isEmpty() ) {
  475. prog.specifiedConfigIdPath = cfgPath;
  476. prog.specifiedConfigId = Util.getFilename(prog.specifiedConfigId, true);
  477. }
  478. if ( prog.specifiedConfigId.endsWith(FilterConfigurationMapper.CONFIGFILE_EXT) ) {
  479. prog.specifiedConfigId = Util.getFilename(prog.specifiedConfigId, false);
  480. }
  481. }
  482. // Check inputs and command
  483. if ( prog.command == -1 ) {
  484. ps.println("No command specified. Please use one of the command described below:");
  485. prog.printUsage();
  486. return;
  487. }
  488. if ( prog.command == CMD_EDITCONFIG ) {
  489. if ( prog.specifiedConfigId == null ) {
  490. prog.editAllConfigurations();
  491. }
  492. else {
  493. prog.editConfiguration();
  494. }
  495. return;
  496. }
  497. if ( prog.command == CMD_SHOWCONFIGS ) {
  498. prog.showAllConfigurations();
  499. return;
  500. }
  501. if ( prog.command == CMD_QUERYTRANS ) {
  502. prog.processQuery();
  503. return;
  504. }
  505. if ( prog.command == CMD_ADDTRANS ) {
  506. prog.processAddTranslation();
  507. return;
  508. }
  509. if ( prog.inputs.size() == 0 ) {
  510. throw new RuntimeException("No input document specified.");
  511. }
  512. // Process all input files
  513. for ( int i=0; i<prog.inputs.size(); i++ ) {
  514. if ( i > 0 ) {
  515. ps.println("------------------------------------------------------------"); //$NON-NLS-1$
  516. }
  517. prog.process(prog.inputs.get(i));
  518. }
  519. }
  520. catch ( Throwable e ) {
  521. if ( showTrace ) e.printStackTrace();
  522. else {
  523. ps.println("ERROR: "+e.getMessage());
  524. Throwable e2 = e.getCause();
  525. if ( e2 != null ) ps.println(e2.getMessage());
  526. if ( prog.showTraceHint ) ps.println("You can use the -trace option for more details.");
  527. }
  528. System.exit(1); // Error
  529. }
  530. }
  531. public Main () {
  532. inputs = new ArrayList<String>();
  533. }
  534. protected String getArgument (ArrayList<String> args, int index) {
  535. if ( index >= args.size() ) {
  536. showTraceHint = false; // Using trace is not helpful to the user for this error
  537. throw new RuntimeException(String.format(
  538. "Missing parameter after '%s'", args.get(index-1)));
  539. }
  540. return args.get(index);
  541. }
  542. private void initialize () {
  543. // Create the mapper and load it with all parameters editor info
  544. // Do not load the filter configurations yet (time consuming)
  545. fcMapper = new FilterConfigurationMapper();
  546. DefaultFilters.setMappings(fcMapper, false, false);
  547. // Instead create a map with extensions -> filter
  548. extensionsMap = new Hashtable<String, String>();
  549. filtersMap = new Hashtable<String, String>();
  550. extensionsMap.put(".docx", "okf_openxml");
  551. extensionsMap.put(".pptx", "okf_openxml");
  552. extensionsMap.put(".xlsx", "okf_openxml");
  553. filtersMap.put("okf_openxml", "net.sf.okapi.filters.openxml.OpenXMLFilter");
  554. extensionsMap.put(".odt", "okf_openoffice");
  555. extensionsMap.put(".swx", "okf_openoffice");
  556. extensionsMap.put(".ods", "okf_openoffice");
  557. extensionsMap.put(".swc", "okf_openoffice");
  558. extensionsMap.put(".odp", "okf_openoffice");
  559. extensionsMap.put(".sxi", "okf_openoffice");
  560. extensionsMap.put(".odg", "okf_openoffice");
  561. extensionsMap.put(".sxd", "okf_openoffice");
  562. filtersMap.put("okf_openoffice", "net.sf.okapi.filters.openoffice.OpenOfficeFilter");
  563. extensionsMap.put(".htm", "okf_html");
  564. extensionsMap.put(".html", "okf_html");
  565. filtersMap.put("okf_html", "net.sf.okapi.filters.html.HtmlFilter");
  566. extensionsMap.put(".xlf", "okf_xliff");
  567. extensionsMap.put(".xlif", "okf_xliff");
  568. extensionsMap.put(".xliff", "okf_xliff");
  569. filtersMap.put("okf_xliff", "net.sf.okapi.filters.xliff.XLIFFFilter");
  570. extensionsMap.put(".tmx", "okf_tmx");
  571. filtersMap.put("okf_tmx", "net.sf.okapi.filters.tmx.TmxFilter");
  572. extensionsMap.put(".properties", "okf_properties");
  573. extensionsMap.put(".lang", "okf_properties-skypeLang");
  574. filtersMap.put("okf_properties", "net.sf.okapi.filters.properties.PropertiesFilter");
  575. extensionsMap.put(".po", "okf_po");
  576. filtersMap.put("okf_po", "net.sf.okapi.filters.po.POFilter");
  577. extensionsMap.put(".xml", "okf_xml");
  578. extensionsMap.put(".resx", "okf_xml-resx");
  579. filtersMap.put("okf_xml", "net.sf.okapi.filters.xml.XMLFilter");
  580. extensionsMap.put(".srt", "okf_regex-srt");
  581. filtersMap.put("okf_regex", "net.sf.okapi.filters.regex.RegexFilter");
  582. extensionsMap.put(".dtd", "okf_dtd");
  583. extensionsMap.put(".ent", "okf_dtd");
  584. filtersMap.put("okf_dtd", "net.sf.okapi.filters.dtd.DTDFilter");
  585. extensionsMap.put(".ts", "okf_ts");
  586. filtersMap.put("okf_ts", "net.sf.okapi.filters.ts.TsFilter");
  587. extensionsMap.put(".txt", "okf_plaintext");
  588. filtersMap.put("okf_plaintext", "net.sf.okapi.filters.plaintext.PlainTextFilter");
  589. extensionsMap.put(".csv", "okf_table_csv");
  590. filtersMap.put("okf_table", "net.sf.okapi.filters.table.TableFilter");
  591. extensionsMap.put(".ttx", "okf_ttx");
  592. filtersMap.put("okf_ttx", "net.sf.okapi.filters.ttx.TTXFilter");
  593. extensionsMap.put(".json", "okf_json");
  594. filtersMap.put("okf_json", "net.sf.okapi.filters.json.JSONFilter");
  595. filtersMap.put("okf_phpcontent", "net.sf.okapi.filters.php.PHPContentFilter");
  596. extensionsMap.put(".pentm", "okf_pensieve");
  597. filtersMap.put("okf_pensieve", "net.sf.okapi.filters.pensieve.PensieveFilter");
  598. filtersMap.put("okf_vignette", "net.sf.okapi.filters.vignette.VignetteFilter");
  599. extensionsMap.put(".yml", "okf_railsyaml");
  600. filtersMap.put("okf_railsyaml", "net.sf.okapi.filters.railsyaml.RailsYamlFilter");
  601. extensionsMap.put(".idml", "okf_idml");
  602. filtersMap.put("okf_idml", "net.sf.okapi.filters.idml.IDMLFilter");
  603. extensionsMap.put(".mif", "okf_mif");
  604. filtersMap.put("okf_mif", "net.sf.okapi.filters.mif.MIFFilter");
  605. extensionsMap.put(".txp", "okf_transifex");
  606. filtersMap.put("okf_transifex", "net.sf.okapi.filters.transifex.TransifexFilter");
  607. extensionsMap.put(".zip", "okf_archive");
  608. filtersMap.put("okf_archive", "net.sf.okapi.filters.archive.ArchiveFilter");
  609. extensionsMap.put(".txml", "okf_txml");
  610. filtersMap.put("okf_txml", "net.sf.okapi.filters.txml.TXMLFilter");
  611. filtersMap.put("okf_versifiedtxt", "net.sf.okapi.filters.versifiedtxt.VersifiedTextFilter");
  612. filtersMap.put("okf_xmlstream", "net.sf.okapi.filters.xmlstream.XmlStreamFilter");
  613. filtersMap.put("okf_mosestext", "net.sf.okapi.filters.mosestext.MosesTextFilter");
  614. if ( specifiedConfigIdPath != null ) {
  615. fcMapper.setCustomConfigurationsDirectory(specifiedConfigIdPath);
  616. }
  617. }
  618. private String getConfigurationId (String ext) {
  619. // Get the configuration for the extension
  620. String id = extensionsMap.get(ext);
  621. if ( id == null ) {
  622. throw new RuntimeException(String.format(
  623. "Could not guess the configuration for the extension '%s'", ext));
  624. }
  625. return id;
  626. }
  627. private void editAllConfigurations () {
  628. initialize();
  629. guessMissingLocales(null);
  630. // Add all the pre-defined configurations
  631. DefaultFilters.setMappings(fcMapper, false, true);
  632. loadFromPluginsAndUpdate();
  633. // Add the custom configurations
  634. fcMapper.updateCustomConfigurations();
  635. // Edit
  636. try {
  637. // Invoke the editor using dynamic instantiation so we can compile non-UI distributions
  638. IFilterConfigurationListEditor editor =
  639. (IFilterConfigurationListEditor)Class.forName("net.sf.okapi.common.ui.filters.FilterConfigurationEditor").newInstance();
  640. // Call the editor
  641. editor.editConfigurations(fcMapper);
  642. }
  643. catch ( InstantiationException e ) {
  644. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  645. }
  646. catch ( IllegalAccessException e ) {
  647. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  648. }
  649. catch ( ClassNotFoundException e ) {
  650. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  651. }
  652. }
  653. private void editConfiguration () {
  654. initialize();
  655. guessMissingLocales(null);
  656. if ( specifiedConfigId == null ) {
  657. throw new RuntimeException("You must specified the configuration to edit.");
  658. }
  659. configId = specifiedConfigId;
  660. if ( !prepareFilter(configId) ) return; // Next input
  661. try {
  662. // Invoke the editor using dynamic instantiation so we can compile non-UI distributions
  663. IFilterConfigurationEditor editor =
  664. (IFilterConfigurationEditor)Class.forName("net.sf.okapi.common.ui.filters.FilterConfigurationEditor").newInstance();
  665. // Call the editor
  666. editor.editConfiguration(configId, fcMapper);
  667. }
  668. catch ( InstantiationException e ) {
  669. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  670. }
  671. catch ( IllegalAccessException e ) {
  672. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  673. }
  674. catch ( ClassNotFoundException e ) {
  675. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  676. }
  677. }
  678. private void showAllConfigurations () {
  679. initialize();
  680. DefaultFilters.setMappings(fcMapper, true, true);
  681. loadFromPluginsAndUpdate();
  682. // Add the custom configurations
  683. fcMapper.updateCustomConfigurations();
  684. ps.println("List of all filter configurations available:");
  685. Iterator<FilterConfiguration> iter = fcMapper.getAllConfigurations();
  686. FilterConfiguration config;
  687. while ( iter.hasNext() ) {
  688. config = iter.next();
  689. ps.println(String.format(" - %s = %s",
  690. config.configId, config.description));
  691. }
  692. }
  693. private boolean prepareFilter (String configId) {
  694. boolean pluginsDone = false;
  695. while ( true ) {
  696. // Is it a default configuration?
  697. if ( filtersMap.containsKey(configId) ) {
  698. // Configuration ID is a default one:
  699. // Add its filter to the configuration mapper
  700. fcMapper.addConfigurations(filtersMap.get(configId));
  701. // Always add okf_html because it's used as sub-filter by several filters
  702. //TODO: Find a better way to handle sub-filter cases
  703. fcMapper.addConfigurations(filtersMap.get("okf_html"));
  704. return true;
  705. }
  706. // Else: Try to find the filter for that configuration
  707. for ( String tmp : filtersMap.keySet() ) {
  708. if ( configId.startsWith(tmp) ) {
  709. fcMapper.addConfigurations(filtersMap.get(tmp));
  710. // If the given configuration is not one of the pre-defined
  711. if ( fcMapper.getConfiguration(configId) == null ) {
  712. // Assume it is a custom one
  713. fcMapper.addCustomConfiguration(configId);
  714. }
  715. return true;
  716. }
  717. }
  718. // No success yet?
  719. if ( pluginsDone ) break;
  720. // Try to load the plug-ins if it was not done yet
  721. loadFromPluginsAndUpdate();
  722. pluginsDone = true;
  723. }
  724. // Could not guess
  725. ps.println(String.format(
  726. "ERROR: Could not guess the filter for the configuration '%s'", configId));
  727. return false;
  728. }
  729. private void loadFromPluginsAndUpdate () {
  730. // Discover and add plug-ins
  731. PluginsManager mgt = new PluginsManager();
  732. mgt.discover(new File(getAppRootDirectory()+File.separator+"dropins"), true);
  733. fcMapper.addFromPlugins(mgt);
  734. // Now update the filtersMap with new configurations
  735. Iterator<FilterConfiguration> iter = fcMapper.getAllConfigurations();
  736. while ( iter.hasNext() ) {
  737. FilterConfiguration cfg = iter.next();
  738. if ( !filtersMap.containsKey(cfg.configId) ) {
  739. filtersMap.put(cfg.configId, cfg.filterClass);
  740. }
  741. }
  742. }
  743. private void guessMissingLocales (String inputPath) {
  744. // If both locales are already set: just use those
  745. if (( srcLoc != null ) && ( trgLoc != null )) return;
  746. // Try to see if we can get one or both from the input file
  747. if ( inputPath != null ) {
  748. List<String> guessed = FileUtil.guessLanguages(inputPath);
  749. if ( guessed.size() > 0 ) {
  750. if ( srcLoc == null ) {
  751. srcLoc = LocaleId.fromString(guessed.get(0));
  752. }
  753. if ( guessed.size() > 1 ) {
  754. if ( trgLoc == null ) {
  755. trgLoc = LocaleId.fromString(guessed.get(1));
  756. }
  757. }
  758. }
  759. }
  760. // Make sure we do have a source
  761. if ( srcLoc == null ) {
  762. srcLoc = new LocaleId("en", false);
  763. }
  764. // Make sure we do have a target
  765. if ( trgLoc == null ) {
  766. trgLoc = new LocaleId(Locale.getDefault());
  767. if ( trgLoc.sameLanguageAs(srcLoc) ) {
  768. trgLoc = new LocaleId("fr", false);
  769. }
  770. }
  771. }
  772. private void guessMissingParameters (String inputOfConfig) {
  773. if ( specifiedConfigId == null ) {
  774. String ext = Util.getExtension(inputOfConfig);
  775. if ( Util.isEmpty(ext) ) {
  776. throw new RuntimeException(String.format(
  777. "The input file '%s' has no extension to guess the filter from.", inputOfConfig));
  778. }
  779. configId = getConfigurationId(ext.toLowerCase());
  780. }
  781. else {
  782. configId = specifiedConfigId;
  783. }
  784. if ( outputEncoding == null ) {
  785. if ( inputEncoding != null ) outputEncoding = inputEncoding;
  786. else outputEncoding = Charset.defaultCharset().name();
  787. }
  788. if ( inputEncoding == null ) {
  789. inputEncoding = Charset.defaultCharset().name();
  790. }
  791. }
  792. String pathChangeFolder (String newFolder,
  793. String oldPath)
  794. {
  795. String result;
  796. if ( newFolder == null ) {
  797. result = oldPath;
  798. }
  799. else {
  800. File file = new File(newFolder, Util.getFilename(oldPath, true));
  801. result = file.toString();
  802. }
  803. return result;
  804. }
  805. String pathInsertOutBeforeExt(String oldPath) {
  806. String ext = Util.getExtension(oldPath);
  807. int n = oldPath.lastIndexOf('.');
  808. return oldPath.substring(0, n) + ".out" + ext; //$NON-NLS-1$
  809. }
  810. private void guessMergingArguments (String input) {
  811. String ext = Util.getExtension(input);
  812. if ( !ext.equals(".xlf") ) {
  813. throw new RuntimeException(String.format(
  814. "The input file '%s' does not have the expected .xlf extension.", input));
  815. }
  816. int n = input.lastIndexOf('.');
  817. skeleton = input.substring(0, n);
  818. if ( outputDir == null ) {
  819. output = pathInsertOutBeforeExt(skeleton);
  820. }
  821. else {
  822. output = pathChangeFolder(outputDir, skeleton);
  823. }
  824. skeleton = pathChangeFolder(skeletonDir, skeleton);
  825. }
  826. private void guessMergingMosesArguments (String input) {
  827. // Main input is the original file, not the Moses file
  828. // The Moses file is specified with -from or null
  829. if ( Util.isEmpty(mosesFromPath) ) {
  830. // We guess the Moses filename:
  831. mosesFromPath = input + "."+trgLoc.toString();
  832. }
  833. if ( !Util.isEmpty(mosesToPath) ) {
  834. output = mosesToPath;
  835. }
  836. else {
  837. output = pathInsertOutBeforeExt(input);
  838. }
  839. }
  840. protected void process (String input) throws URISyntaxException {
  841. initialize();
  842. RawDocument rd;
  843. File file;
  844. switch ( command ) {
  845. case CMD_TRANSLATE:
  846. ps.println("Translation");
  847. guessMissingParameters(input);
  848. if ( !prepareFilter(configId) ) return; // Next input
  849. guessMissingLocales(input);
  850. file = new File(input);
  851. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  852. rd.setFilterConfigId(configId);
  853. translateFile(rd);
  854. break;
  855. case CMD_SEGMENTATION:
  856. ps.println("Segmentation");
  857. guessMissingParameters(input);
  858. if ( !prepareFilter(configId) ) return; // Next input
  859. guessMissingLocales(input);
  860. file = new File(input);
  861. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  862. rd.setFilterConfigId(configId);
  863. segmentFile(rd);
  864. break;
  865. case CMD_EXTRACT:
  866. ps.println("Extraction");
  867. guessMissingParameters(input);
  868. if ( !prepareFilter(configId) ) return; // Next input
  869. guessMissingLocales(input);
  870. file = new File(input);
  871. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  872. rd.setFilterConfigId(configId);
  873. extractFile(rd);
  874. break;
  875. case CMD_EXTRACTTOMOSES:
  876. ps.println("Extraction to Moses InlineText");
  877. guessMissingParameters(input);
  878. if ( !prepareFilter(configId) ) return; // Next input
  879. guessMissingLocales(input);
  880. file = new File(input);
  881. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  882. rd.setFilterConfigId(configId);
  883. extractFileToMoses(rd);
  884. break;
  885. case CMD_MERGE:
  886. ps.println("Merging");
  887. guessMergingArguments(input);
  888. guessMissingParameters(skeleton);
  889. if ( !prepareFilter(configId) ) return; // Next input
  890. guessMissingLocales(input);
  891. XLIFFMergingStep stepMrg = new XLIFFMergingStep(fcMapper);
  892. file = new File(skeleton);
  893. RawDocument skelRawDoc = new RawDocument(file.toURI(), inputEncoding,
  894. srcLoc, trgLoc);
  895. skelRawDoc.setFilterConfigId(configId);
  896. stepMrg.setXliffPath(input);
  897. stepMrg.setOutputPath(output);
  898. stepMrg.setOutputEncoding(outputEncoding);
  899. ps.println("Source language: "+srcLoc);
  900. ps.println("Target language: "+trgLoc);
  901. ps.println("Default input encoding: "+inputEncoding);
  902. ps.println("Output encoding: "+outputEncoding);
  903. ps.println("Filter configuration: "+configId);
  904. ps.println("XLIFF: "+input);
  905. ps.println(String.format("Output: %s", (output==null) ? "<auto-defined>" : output));
  906. stepMrg.handleRawDocument(skelRawDoc);
  907. break;
  908. case CMD_LEVERAGEMOSES:
  909. ps.println("Merging Moses InlineText");
  910. guessMissingLocales(input);
  911. guessMergingMosesArguments(input);
  912. guessMissingParameters(input);
  913. if ( !prepareFilter(configId) ) return; // Next input
  914. file = new File(input);
  915. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc, configId);
  916. leverageFileWithMoses(rd);
  917. break;
  918. case CMD_CONV2PO:
  919. case CMD_CONV2TMX:
  920. case CMD_CONV2PEN:
  921. case CMD_CONV2TABLE:
  922. if ( command == CMD_CONV2PO ) {
  923. ps.println("Conversion to PO");
  924. }
  925. else if ( command == CMD_CONV2TMX ) {
  926. ps.println("Conversion to TMX");
  927. }
  928. else if ( command == CMD_CONV2TABLE ) {
  929. ps.println("Conversion to Table");
  930. }
  931. else {
  932. ps.println("Importing to Pensieve TM");
  933. }
  934. guessMissingParameters(input);
  935. if ( !prepareFilter(configId) ) return; // Next input
  936. guessMissingLocales(input);
  937. file = new File(input);
  938. String output = input;
  939. if ( command == CMD_CONV2PO ) {
  940. output += ".po";
  941. }
  942. else if ( command == CMD_CONV2TMX ) {
  943. output += ".tmx";
  944. }
  945. else if ( command == CMD_CONV2TABLE) {
  946. output += ".txt";
  947. }
  948. else { // Pensieve
  949. output = checkPensieveDirExtension();
  950. }
  951. URI outputURI = new File(output).toURI();
  952. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  953. rd.setFilterConfigId(configId);
  954. ps.println("Source language: "+srcLoc);
  955. ps.println("Target language: "+trgLoc);
  956. ps.println("Default input encoding: "+inputEncoding);
  957. ps.println("Filter configuration: "+configId);
  958. ps.println("Output: "+output);
  959. convertFile(rd, outputURI);
  960. break;
  961. }
  962. ps.println("Done");
  963. }
  964. private void printBanner () {
  965. ps.println("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  966. ps.println("Okapi Tikal - Localization Toolset");
  967. // The version will show as 'null' until the code is build as a JAR.
  968. ps.println(String.format("Version: %s", getClass().getPackage().getImplementationVersion()));
  969. ps.println("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  970. }
  971. private void showInfo () {
  972. Runtime rt = Runtime.getRuntime();
  973. rt.runFinalization();
  974. rt.gc();
  975. ps.println("Java version: " + System.getProperty("java.version")); //$NON-NLS-1$
  976. ps.println(String.format("Platform: %s, %s, %s",
  977. System.getProperty("os.name"), //$NON-NLS-1$
  978. System.getProperty("os.arch"), //$NON-NLS-1$
  979. System.getProperty("os.version"))); //$NON-NLS-1$
  980. NumberFormat nf = NumberFormat.getInstance();
  981. ps.println(String.format("Java VM memory: free=%s KB, total=%s KB", //$NON-NLS-1$
  982. nf.format(rt.freeMemory()/1024),
  983. nf.format(rt.totalMemory()/1024)));
  984. ps.println("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  985. }
  986. private String getAppRootDirectory () {
  987. try {
  988. URL url = getClass().getProtectionDomain().getCodeSource().getLocation();
  989. String path = new File(url.toURI()).getCanonicalPath();
  990. return Util.getDirectoryName(Util.getDirectoryName(path));
  991. }
  992. catch ( IOException e ) {
  993. throw new OkapiIOException(e);
  994. }
  995. catch ( URISyntaxException e ) {
  996. throw new OkapiIOException("Bad URI syntax.", e);
  997. }
  998. }
  999. private void showHelp () throws MalformedURLException {
  1000. Util.openWikiTopic("Tikal");
  1001. }
  1002. private void printUsage () {
  1003. ps.println("Shows this screen: -?");
  1004. ps.println("Shows version and other information: -i or --info");
  1005. ps.println("Opens the user guide page: -h or --help");
  1006. ps.println("Lists all available filter configurations: -lfc or --listconf");
  1007. ps.println("Edits or view filter configurations (UI-dependent command):");
  1008. ps.println(" -e [[-fc] configId]");
  1009. ps.println("Extracts a file to XLIFF (and optionally segment and pre-translate):");
  1010. ps.println(" -x inputFile [inputFile2...] [-fc configId] [-ie encoding] [-sl srcLang]");
  1011. ps.println(" [-tl trgLang] [-seg [srxFile]] [-tt [hostname[:port]]|-mm [key]");
  1012. ps.println(" |-pen tmDirectory|-gs configFile|-apertium [configFile]");
  1013. ps.println(" |-ms configFile|-tda configFile|-gg configFile]");
  1014. ps.println(" [-maketmx [tmxFile]] [-opt threshold]");
  1015. ps.println(" [-od outputDirectory] [-nocopy] [-noalttrans]");
  1016. ps.println("Merges an XLIFF document back to its original format:");
  1017. ps.println(" -m xliffFile [xliffFile2...] [-fc configId] [-ie encoding] [-oe encoding]");
  1018. ps.println(" [-sd sourceDirectory] [-od outputDirectory]");
  1019. ps.println(" [-sl srcLang] [-tl trgLang]");
  1020. ps.println("Translates a file:");
  1021. ps.println(" -t inputFile [inputFile2...] [-fc configId] [-ie encoding] [-oe encoding]");
  1022. ps.println(" [-sl srcLang] [-tl trgLang] [-seg [srxFile]] [-tt [hostname[:port]]");
  1023. ps.println(" |-mm [key]|-pen tmDirectory|-gs configFile|-apertium [configFile]");
  1024. ps.println(" |-ms configFile|-tda configFile|-gg configFile]");
  1025. ps.println(" [-maketmx [tmxFile]] [-opt threshold]");
  1026. ps.println("Extracts a file to Moses InlineText:");
  1027. ps.println(" -xm inputFile [-fc configId] [-ie encoding] [-seg [srxFile]]");
  1028. ps.println(" [-sl srcLang] [-tl trgLang] [-2] [-to srcOutputFile]");
  1029. ps.println("Leverages a file with Moses InlineText:");
  1030. ps.println(" -lm inputFile [-fc configId] [-ie encoding] [-oe encoding] [-sl srcLang]");
  1031. ps.println(" [-tl trgLang] [-seg [srxFile]] [-totrg|-overtrg] [-bpt]");
  1032. ps.println(" [-from mosesFile] [-to outputFile]");
  1033. ps.println("Segments a file:");
  1034. ps.println(" -s inputFile [-fc configId] [-ie encoding]");
  1035. ps.println(" [-sl srcLang] [-tl trgLang] [-seg [srxFile]]");
  1036. ps.println("Queries translation resources:");
  1037. ps.println(" -q \"source text\" [-sl srcLang] [-tl trgLang] [-opentran]");
  1038. ps.println(" [-tt [hostname[:port]]] [-mm [key]] [-pen tmDirectory] [-gs configFile]");
  1039. ps.println(" [-apertium [configFile]] [-ms configFile] [-tda configFile]");
  1040. ps.println(" [-gg configFile] [-opt threshold[:maxhits]]");
  1041. ps.println("Adds translation to a resources:");
  1042. ps.println(" -a \"source text\" \"target text\" [rating] [-sl srcLang] [-tl trgLang]");
  1043. ps.println(" -ms configFile");
  1044. ps.println("Converts to PO format:");
  1045. ps.println(" -2po inputFile [inputFile2...] [-fc configId] [-ie encoding] [-all]");
  1046. ps.println(" [-sl srcLang] [-tl trgLang] [-generic] [-trgsource|-trgempty]");
  1047. ps.println("Converts to TMX format:");
  1048. ps.println(" -2tmx inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1049. ps.println(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty] [-all]");
  1050. ps.println("Converts to table format:");
  1051. ps.println(" -2tbl inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1052. ps.println(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty]");
  1053. ps.println(" [-csv|-tab] [-xliff|-xliffgx|-tmx|-generic] [-all]");
  1054. ps.println("Imports to Pensieve TM:");
  1055. ps.println(" -imp tmDirectory inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1056. ps.println(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty] [-all] [-over]");
  1057. ps.println("Exports Pensieve TM as TMX:");
  1058. ps.println(" -exp tmDirectory1 [tmDirectory2...] [-sl srcLang] [-tl trgLang]");
  1059. ps.println(" [-trgsource|-trgempty] [-all]");
  1060. }
  1061. private void displayQuery (IQuery conn,
  1062. boolean isTM)
  1063. {
  1064. int count;
  1065. if ( conn.getClass().getName().endsWith("PensieveTMConnector")
  1066. || conn.getClass().getName().endsWith("GoogleMTConnector")
  1067. || conn.getClass().getName().endsWith("GoogleMTv2Connector")
  1068. || conn.getClass().getName().endsWith("MyMemoryTMConnector")
  1069. || conn.getClass().getName().endsWith("MicrosoftMTConnector")
  1070. // || conn.getClass().getName().endsWith("ProMTConnector")
  1071. || conn.getClass().getName().endsWith("GlobalSightTMConnector") ) {
  1072. count = conn.query(parseToTextFragment(query));
  1073. }
  1074. else { // Raw text otherwise
  1075. count = conn.query(query);
  1076. }
  1077. ps.println(String.format("\n= From %s (%s->%s)", conn.getName(),
  1078. conn.getSourceLanguage(), conn.getTargetLanguage()));
  1079. if ( isTM ) {
  1080. ITMQuery tmConn = (ITMQuery)conn;
  1081. ps.println(String.format(" Threshold=%d, Maximum hits=%d",
  1082. tmConn.getThreshold(), tmConn.getMaximumHits()));
  1083. }
  1084. if ( count > 0 ) {
  1085. QueryResult qr;
  1086. while ( conn.hasNext() ) {
  1087. qr = conn.next();
  1088. ps.println(String.format("score: %d, origin: '%s'%s",
  1089. qr.getCombinedScore(),
  1090. (qr.origin==null ? "" : qr.origin),
  1091. (qr.fromMT() ? " (from MT)" : "")));
  1092. ps.println(String.format(" Source: \"%s\"", qr.source.toText()));
  1093. ps.println(String.format(" Target: \"%s\"", qr.target.toText()));
  1094. }
  1095. }
  1096. else {
  1097. ps.println(String.format(" Source: \"%s\"", query));
  1098. ps.println(" <No translation has been found>");
  1099. }
  1100. }
  1101. private void processAddTranslation () {
  1102. guessMissingLocales(null);
  1103. if ( Util.isEmpty(query) ) {
  1104. throw new RuntimeException(String.format("Cannot add empty source text."));
  1105. }
  1106. if ( Util.isEmpty(addTransTrans) ) {
  1107. throw new RuntimeException(String.format("Cannot add empty target text."));
  1108. }
  1109. if ( useMicrosoft ) {
  1110. MicrosoftMTConnector conn = new MicrosoftMTConnector();
  1111. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1112. conn.setLanguages(srcLoc, trgLoc);
  1113. conn.open();
  1114. int res = conn.addTranslation(parseToTextFragment(query), parseToTextFragment(addTransTrans), addTransRating);
  1115. if ( res == 200 ) {
  1116. ps.println("Done");
  1117. }
  1118. else {
  1119. ps.println(String.format("Error code %d.", res));
  1120. }
  1121. conn.close();
  1122. }
  1123. else {
  1124. throw new RuntimeException(String.format("No valid connector specified to add a translation."));
  1125. }
  1126. }
  1127. private void processQuery () {
  1128. guessMissingLocales(null);
  1129. if ( !useGoogleV2 && !useOpenTran && !useTransToolkit && !useMyMemory
  1130. && !usePensieve && !useGlobalSight && !useApertium && !useMicrosoft && !useTDA ) {
  1131. useOpenTran = true; // Default if none is specified
  1132. }
  1133. // Query options
  1134. int[] opt = parseTMOptions();
  1135. int threshold = opt[0];
  1136. int maxhits = opt[1];
  1137. IQuery conn;
  1138. if ( useGoogleV2 ) {
  1139. conn = new GoogleMTv2Connector();
  1140. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1141. conn.setLanguages(srcLoc, trgLoc);
  1142. conn.open();
  1143. displayQuery(conn, false);
  1144. conn.close();
  1145. }
  1146. if ( usePensieve ) {
  1147. conn = new PensieveTMConnector();
  1148. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1149. conn.setLanguages(srcLoc, trgLoc);
  1150. setTMOptionsIfPossible(conn, threshold, maxhits);
  1151. conn.open();
  1152. displayQuery(conn, true);
  1153. conn.close();
  1154. }
  1155. if ( useTransToolkit ) {
  1156. conn = new TranslateToolkitTMConnector();
  1157. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1158. conn.setLanguages(srcLoc, trgLoc);
  1159. setTMOptionsIfPossible(conn, threshold, maxhits);
  1160. conn.open();
  1161. displayQuery(conn, true);
  1162. conn.close();
  1163. }
  1164. if ( useGlobalSight ) {
  1165. conn = new GlobalSightTMConnector();
  1166. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1167. conn.setLanguages(srcLoc, trgLoc);
  1168. setTMOptionsIfPossible(conn, threshold, maxhits);
  1169. conn.open();
  1170. displayQuery(conn, true);
  1171. conn.close();
  1172. }
  1173. if ( useTDA ) {
  1174. conn = new TDASearchConnector();
  1175. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1176. conn.setLanguages(srcLoc, trgLoc);
  1177. setTMOptionsIfPossible(conn, threshold, maxhits);
  1178. conn.open();
  1179. displayQuery(conn, true);
  1180. conn.close();
  1181. }
  1182. if ( useMicrosoft ) {
  1183. conn = new MicrosoftMTConnector();
  1184. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1185. conn.setLanguages(srcLoc, trgLoc);
  1186. setTMOptionsIfPossible(conn, threshold, maxhits);
  1187. conn.open();
  1188. displayQuery(conn, true);
  1189. conn.close();
  1190. }
  1191. // if ( useProMT ) {
  1192. // conn = new ProMTConnector();
  1193. // conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1194. // conn.setLanguages(srcLoc, trgLoc);
  1195. // conn.open();
  1196. // displayQuery(conn, false);
  1197. // conn.close();
  1198. // }
  1199. if ( useMyMemory ) {
  1200. conn = new MyMemoryTMConnector();
  1201. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1202. conn.setLanguages(srcLoc, trgLoc);
  1203. setTMOptionsIfPossible(conn, threshold, maxhits);
  1204. conn.open();
  1205. displayQuery(conn, true);
  1206. conn.close();
  1207. }
  1208. if ( useApertium ) {
  1209. conn = new ApertiumMTConnector();
  1210. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1211. conn.setLanguages(srcLoc, trgLoc);
  1212. conn.open();
  1213. displayQuery(conn, false);
  1214. conn.close();
  1215. }
  1216. if ( useOpenTran ) {
  1217. conn = new OpenTranTMConnector();
  1218. conn.setLanguages(srcLoc, trgLoc);
  1219. setTMOptionsIfPossible(conn, threshold, maxhits);
  1220. conn.open();
  1221. displayQuery(conn, true);
  1222. conn.close();
  1223. }
  1224. }
  1225. private int[] parseTMOptions () {
  1226. int[] opt = new int[2];
  1227. opt[0] = -1;
  1228. opt[1] = -1;
  1229. if ( !Util.isEmpty(tmOptions) ) {
  1230. try {
  1231. // Expected format: "threshold[:maxhits]"
  1232. int n = tmOptions.indexOf(':');
  1233. if ( n == -1 ) { // Threshold only
  1234. opt[0] = Integer.parseInt(tmOptions);
  1235. }
  1236. else {
  1237. opt[0] = Integer.parseInt(tmOptions.substring(0, n));
  1238. opt[1] = Integer.parseInt(tmOptions.substring(n+1));
  1239. if ( opt[1] < 0 ) {
  1240. throw new RuntimeException(String.format("Invalid TM options: '%s' Maximum hits must be more than 0.", tmOptions));
  1241. }
  1242. }
  1243. if (( opt[0] < 0 ) || ( opt[0] > 100 )) {
  1244. throw new RuntimeException(String.format("Invalid TM options: '%s' Thresold must be between 0 and 100.", tmOptions));
  1245. }
  1246. }
  1247. catch ( NumberFormatException e ) {
  1248. throw new RuntimeException(String.format("Invalid TM options: '%s'", tmOptions));
  1249. }
  1250. }
  1251. return opt;
  1252. }
  1253. private void setTMOptionsIfPossible (IQuery conn,
  1254. int threshold,
  1255. int maxhits)
  1256. {
  1257. ITMQuery tmConn = (ITMQuery)conn;
  1258. if ( threshold > -1 ) tmConn.setThreshold(threshold);
  1259. if ( maxhits > -1 ) tmConn.setMaximumHits(maxhits);
  1260. }
  1261. private void convertFile (RawDocument rd, URI outputURI) {
  1262. // Create the driver
  1263. PipelineDriver driver = new PipelineDriver();
  1264. driver.setFilterConfigurationMapper(fcMapper);
  1265. driver.setRootDirectories(System.getProperty("user.dir"),
  1266. Util.getDirectoryName(rd.getInputURI().getPath()));
  1267. RawDocumentToFilterEventsStep rd2feStep = new RawDocumentToFilterEventsStep();
  1268. driver.addStep(rd2feStep);
  1269. FormatConversionStep fcStep = new FormatConversionStep();
  1270. net.sf.okapi.steps.formatconversion.Parameters params = fcStep.getParameters();
  1271. if ( command == CMD_CONV2PO ) {
  1272. params.setOutputFormat(Parameters.FORMAT_PO);
  1273. params.setOutputPath("output.po");
  1274. }
  1275. else if ( command == CMD_CONV2TMX ) {
  1276. params.setOutputFormat(Parameters.FORMAT_TMX);
  1277. params.setOutputPath("output.tmx");
  1278. }
  1279. else if ( command == CMD_CONV2TABLE ) {
  1280. params.setOutputFormat(Parameters.FORMAT_TABLE);
  1281. TableFilterWriterParameters opt = new TableFilterWriterParameters();
  1282. opt.fromArguments(tableConvFormat, tableConvCodes);
  1283. params.setFormatOptions(opt.toString());
  1284. params.setOutputPath("output.txt");
  1285. }
  1286. else if ( command == CMD_CONV2PEN ) {
  1287. params.setOutputFormat(Parameters.FORMAT_PENSIEVE);
  1288. params.setOutputPath(checkPensieveDirExtension());
  1289. }
  1290. params.setSingleOutput(command==CMD_CONV2PEN);
  1291. // These options may or may not be used depending on the output format
  1292. params.setUseGenericCodes(genericOutput);
  1293. params.setTargetStyle(convTargetStyle);
  1294. params.setSkipEntriesWithoutText(convSkipEntriesWithoutText);
  1295. params.setOverwriteSameSource(convOverwrite);
  1296. driver.addStep(fcStep);
  1297. driver.addBatchItem(rd, outputURI, outputEncoding);
  1298. driver.processBatch();
  1299. }
  1300. private IPipelineStep addSegmentationStep () {
  1301. if ( segRules.equals(DEFAULT_SEGRULES) ) { // Defaults
  1302. segRules = getAppRootDirectory();
  1303. segRules += File.separator + "config" + File.separator + "defaultSegmentation.srx";
  1304. }
  1305. else {
  1306. if ( Util.isEmpty(Util.getExtension(segRules)) ) {
  1307. segRules += ".srx";
  1308. }
  1309. }
  1310. SegmentationStep segStep = new SegmentationStep();
  1311. net.sf.okapi.steps.segmentation.Parameters segParams
  1312. = (net.sf.okapi.steps.segmentation.Parameters)segStep.getParameters();
  1313. segParams.segmentSource = true;
  1314. segParams.segmentTarget = true;
  1315. File f = new File(segRules);
  1316. segParams.setSourceSrxPath(f.getAbsolutePath());
  1317. segParams.setTargetSrxPath(f.getAbsolutePath());
  1318. ps.println("Segmentation: " + f.getAbsolutePath());
  1319. return segStep;
  1320. }
  1321. private IPipelineStep addLeveragingStep () {
  1322. LeveragingStep levStep = new LeveragingStep();
  1323. net.sf.okapi.steps.leveraging.Parameters levParams
  1324. = (net.sf.okapi.steps.leveraging.Parameters)levStep.getParameters();
  1325. if ( usePensieve ) {
  1326. levParams.setResourceClassName(PensieveTMConnector.class.getName());
  1327. }
  1328. else if ( useTransToolkit ) {
  1329. levParams.setResourceClassName(TranslateToolkitTMConnector.class.getName());
  1330. }
  1331. else if ( useMyMemory ) {
  1332. levParams.setResourceClassName(MyMemoryTMConnector.class.getName());
  1333. }
  1334. else if ( useGoogleV2 ) {
  1335. levParams.setResourceClassName(GoogleMTv2Connector.class.getName());
  1336. }
  1337. else if ( useGlobalSight ) {
  1338. levParams.setResourceClassName(GlobalSightTMConnector.class.getName());
  1339. }
  1340. else if ( useTDA ) {
  1341. levParams.setResourceClassName(TDASearchConnector.class.getName());
  1342. }
  1343. else if ( useMicrosoft ) {
  1344. levParams.setResourceClassName(MicrosoftMTConnector.class.getName());
  1345. }
  1346. // else if ( useProMT ) {
  1347. // levParams.setResourceClassName(ProMTConnector.class.getName());
  1348. // }
  1349. else if ( useApertium ) {
  1350. levParams.setResourceClassName(ApertiumMTConnector.class.getName());
  1351. }
  1352. IParameters p = prepareConnectorParameters(levParams.getResourceClassName());
  1353. if ( p != null ) levParams.setResourceParameters(p.toString());
  1354. levParams.setFillTarget(levOptFillTarget);
  1355. // Query options
  1356. int[] opt = parseTMOptions();
  1357. if ( opt[0] > -1 ) levParams.setThreshold(opt[0]);
  1358. if ( levOptTMXPath != null ) {
  1359. levParams.setMakeTMX(true);
  1360. levParams.setTMXPath(levOptTMXPath);
  1361. }
  1362. return levStep;
  1363. }
  1364. private void extractFile (RawDocument rd) throws URISyntaxException {
  1365. // Create the driver
  1366. PipelineDriver driver = new PipelineDriver();
  1367. driver.setFilterConfigurationMapper(fcMapper);
  1368. driver.setRootDirectories(System.getProperty("user.dir"),
  1369. Util.getDirectoryName(rd.getInputURI().getPath()));
  1370. // Raw document to filter events step
  1371. RawDocumentToFilterEventsStep rd2feStep = new RawDocumentToFilterEventsStep();
  1372. driver.addStep(rd2feStep);
  1373. // Add segmentation step if requested
  1374. if ( segRules != null ) {
  1375. driver.addStep(addSegmentationStep());
  1376. }
  1377. // Add leveraging step if requested
  1378. if ( useGoogleV2 || useTransToolkit || useMyMemory || usePensieve
  1379. || useGlobalSight || useApertium || useMicrosoft || useTDA ) {
  1380. driver.addStep(addLeveragingStep());
  1381. }
  1382. // Filter events to raw document final step (using the XLIFF writer)
  1383. FilterEventsWriterStep fewStep = new FilterEventsWriterStep();
  1384. XLIFFWriter writer = new XLIFFWriter();
  1385. writer.setCopySource(extOptCopy);
  1386. writer.setIncludeAltTrans(extOptAltTrans);
  1387. fewStep.setFilterWriter(writer);
  1388. fewStep.setDocumentRoots(System.getProperty("user.dir"));
  1389. driver.addStep(fewStep);
  1390. // Create the raw document and set the output
  1391. String tmp = rd.getInputURI().getPath();
  1392. // If the input is a directory, it ends with a separator, then we remove it
  1393. if ( tmp.endsWith("/") || tmp.endsWith("\\") ) {
  1394. tmp = tmp.substring(0, tmp.length()-1);
  1395. }
  1396. tmp += ".xlf";
  1397. tmp = pathChangeFolder(outputDir, tmp);
  1398. driver.addBatchItem(rd, new File(tmp).toURI(), outputEncoding);
  1399. ps.println("Source language: "+srcLoc);
  1400. ps.println("Target language: "+trgLoc);
  1401. ps.println("Default input encoding: "+inputEncoding);
  1402. ps.println("Filter configuration: "+configId);
  1403. ps.println("Output: "+tmp);
  1404. // Process
  1405. driver.processBatch();
  1406. }
  1407. private void segmentFile (RawDocument rd) throws URISyntaxException {
  1408. // Create the driver
  1409. PipelineDriver driver = new PipelineDriver();
  1410. driver.setFilterConfigurationMapper(fcMapper);
  1411. driver.setRootDirectories(System.getProperty("user.dir"),
  1412. Util.getDirectoryName(rd.getInputURI().getPath()));
  1413. // Raw document to filter events step
  1414. RawDocumentToFilterEventsStep rd2feStep = new RawDocumentToFilterEventsStep();
  1415. driver.addStep(rd2feStep);
  1416. driver.addStep(addSegmentationStep());
  1417. // Filter events to raw document final step
  1418. FilterEventsToRawDocumentStep ferdStep = new FilterEventsToRawDocumentStep();
  1419. driver.addStep(ferdStep);
  1420. // Create the raw document and set the output
  1421. String tmp = rd.getInputURI().getPath();
  1422. output = pathInsertOutBeforeExt(tmp);
  1423. ps.println("Source language: "+srcLoc);
  1424. ps.println("Target language: "+trgLoc);
  1425. ps.println("Default input encoding: "+inputEncoding);
  1426. ps.println("Output encoding: "+outputEncoding);
  1427. ps.println("Filter configuration: "+configId);
  1428. ps.println("Output: "+output);
  1429. driver.addBatchItem(rd, new File(output).toURI(), outputEncoding);
  1430. // Process
  1431. driver.processBatch();
  1432. }
  1433. private void leverageFileWithMoses (RawDocument rd) {
  1434. // Create the driver
  1435. PipelineDriver driver = new PipelineDriver();
  1436. driver.setFilterConfigurationMapper(fcMapper);
  1437. driver.setRootDirectories(System.getProperty("user.dir"),
  1438. Util.getDirectoryName(rd.getInputURI().getPath()));
  1439. driver.addStep(new RawDocumentToFilterEventsStep());
  1440. // Add segmentation step if requested
  1441. if ( segRules != null ) {
  1442. driver.addStep(addSegmentationStep());
  1443. }
  1444. MergingStep mrgStep = new MergingStep();
  1445. MergingParameters params = (MergingParameters)mrgStep.getParameters();
  1446. params.setCopyToTarget(mosesCopyToTarget);
  1447. params.setOverwriteExistingTarget(mosesOverwriteTarget);
  1448. params.setForceAltTransOutput(true);
  1449. params.setUseGModeInAltTrans(mosesUseGModeInAltTrans);
  1450. driver.addStep(mrgStep);
  1451. driver.addStep(new FilterEventsToRawDocumentStep());
  1452. // Two parallel inputs: 1=the original file, 2=the Moses translated file
  1453. RawDocument rdMoses = new RawDocument(new File(mosesFromPath).toURI(), "UTF-8", trgLoc);
  1454. driver.addBatchItem(new BatchItemContext(rd, new File(output).toURI(), outputEncoding, rdMoses));
  1455. // Execute
  1456. driver.processBatch();
  1457. }
  1458. private void extractFileToMoses (RawDocument rd) throws URISyntaxException {
  1459. // Create the driver
  1460. PipelineDriver driver = new PipelineDriver();
  1461. driver.setFilterConfigurationMapper(fcMapper);
  1462. driver.setRootDirectories(System.getProperty("user.dir"),
  1463. Util.getDirectoryName(rd.getInputURI().getPath()));
  1464. // Raw document to filter events step
  1465. RawDocumentToFilterEventsStep rd2feStep = new RawDocumentToFilterEventsStep();
  1466. driver.addStep(rd2feStep);
  1467. // Add segmentation step if requested
  1468. if ( segRules != null ) {
  1469. driver.addStep(addSegmentationStep());
  1470. }
  1471. // Filter events to raw document final step (using the XLIFF writer)
  1472. ExtractionStep extStep = new ExtractionStep();
  1473. if ( moses2Outputs ) {
  1474. FilterWriterParameters p = (FilterWriterParameters)extStep.getParameters();
  1475. p.setSourceAndTarget(true);
  1476. }
  1477. driver.addStep(extStep);
  1478. // Create the raw document and set the output
  1479. if ( Util.isEmpty(mosesToPath) ) {
  1480. mosesToPath = rd.getInputURI().getPath();
  1481. }
  1482. if ( !mosesToPath.endsWith("."+srcLoc.toString()) ) {
  1483. mosesToPath = mosesToPath + ("."+srcLoc.toString());
  1484. }
  1485. driver.addBatchItem(rd, new File(mosesToPath).toURI(), "UTF-8");
  1486. ps.println("Source language: "+srcLoc);
  1487. if ( moses2Outputs ) {
  1488. ps.println("Target language: "+trgLoc);
  1489. }
  1490. ps.println("Default input encoding: "+inputEncoding);
  1491. ps.println("Filter configuration: "+configId);
  1492. // Process
  1493. driver.processBatch();
  1494. }
  1495. private void translateFile (RawDocument rd) throws URISyntaxException {
  1496. // Create the driver
  1497. PipelineDriver driver = new PipelineDriver();
  1498. driver.setFilterConfigurationMapper(fcMapper);
  1499. driver.setRootDirectories(System.getProperty("user.dir"),
  1500. Util.getDirectoryName(rd.getInputURI().getPath()));
  1501. // Raw document to filter events step
  1502. RawDocumentToFilterEventsStep rd2feStep = new RawDocumentToFilterEventsStep();
  1503. driver.addStep(rd2feStep);
  1504. // Add segmentation step if requested
  1505. if ( segRules != null ) {
  1506. driver.addStep(addSegmentationStep());
  1507. }
  1508. // Add leveraging step
  1509. if ( useGoogleV2 || useTransToolkit || useMyMemory || usePensieve
  1510. || useGlobalSight || useApertium || useMicrosoft || useTDA ) {
  1511. driver.addStep(addLeveragingStep());
  1512. }
  1513. else { // Or indicate that we won't translate
  1514. ps.println("No valid translation resource has been specified: The text will not be modified.");
  1515. }
  1516. // Filter events to raw document final step
  1517. FilterEventsToRawDocumentStep ferdStep = new FilterEventsToRawDocumentStep();
  1518. driver.addStep(ferdStep);
  1519. // Create the raw document and set the output
  1520. String tmp = rd.getInputURI().getPath();
  1521. output = pathInsertOutBeforeExt(tmp);
  1522. ps.println("Source language: "+srcLoc);
  1523. ps.println("Target language: "+trgLoc);
  1524. ps.println("Default input encoding: "+inputEncoding);
  1525. ps.println("Output encoding: "+outputEncoding);
  1526. ps.println("Filter configuration: "+configId);
  1527. ps.println("Output: "+output);
  1528. driver.addBatchItem(rd, new File(output).toURI(), outputEncoding);
  1529. // Process
  1530. driver.processBatch();
  1531. }
  1532. private String checkPensieveDirExtension () {
  1533. String ext = Util.getExtension(pensieveData);
  1534. if ( Util.isEmpty(ext) ) pensieveData += ".pentm";
  1535. return pensieveData;
  1536. }
  1537. private IParameters prepareConnectorParameters (String connectorClassName) {
  1538. if ( connectorClassName.equals(PensieveTMConnector.class.getName()) ) {
  1539. net.sf.okapi.connectors.pensieve.Parameters params
  1540. = new net.sf.okapi.connectors.pensieve.Parameters();
  1541. if ( pensieveData.startsWith("http:") ) {
  1542. params.setHost(pensieveData);
  1543. params.setUseServer(true);
  1544. }
  1545. else {
  1546. params.setDbDirectory(checkPensieveDirExtension());
  1547. }
  1548. return params;
  1549. }
  1550. if ( connectorClassName.equals(TranslateToolkitTMConnector.class.getName()) ) {
  1551. net.sf.okapi.connectors.translatetoolkit.Parameters params
  1552. = new net.sf.okapi.connectors.translatetoolkit.Parameters();
  1553. // Parse the parameters hostname:port
  1554. int n = transToolkitParams.lastIndexOf(':');
  1555. if ( n == -1 ) {
  1556. params.setHost(transToolkitParams);
  1557. }
  1558. else {
  1559. params.setPort(Integer.valueOf(transToolkitParams.substring(n+1)));
  1560. params.setHost(transToolkitParams.substring(0, n));
  1561. }
  1562. return params;
  1563. }
  1564. if ( connectorClassName.equals(MyMemoryTMConnector.class.getName()) ) {
  1565. net.sf.okapi.connectors.mymemory.Parameters params
  1566. = new net.sf.okapi.connectors.mymemory.Parameters();
  1567. params.setKey(myMemoryParams);
  1568. return params;
  1569. }
  1570. if ( connectorClassName.equals(GlobalSightTMConnector.class.getName()) ) {
  1571. net.sf.okapi.connectors.globalsight.Parameters params
  1572. = new net.sf.okapi.connectors.globalsight.Parameters();
  1573. URI paramURI = (new File(globalSightParams).toURI());
  1574. params.load(paramURI, false);
  1575. return params;
  1576. }
  1577. if ( connectorClassName.equals(TDASearchConnector.class.getName()) ) {
  1578. net.sf.okapi.connectors.tda.Parameters params
  1579. = new net.sf.okapi.connectors.tda.Parameters();
  1580. URI paramURI = (new File(tdaParams).toURI());
  1581. params.load(paramURI, false);
  1582. return params;
  1583. }
  1584. if ( connectorClassName.equals(MicrosoftMTConnector.class.getName()) ) {
  1585. net.sf.okapi.connectors.microsoft.Parameters params
  1586. = new net.sf.okapi.connectors.microsoft.Parameters();
  1587. // Use the specified parameters if available, otherwise use the default
  1588. if ( microsoftParams != null ) {
  1589. URI paramURI = (new File(microsoftParams).toURI());
  1590. params.load(paramURI, false);
  1591. }
  1592. return params;
  1593. }
  1594. if ( connectorClassName.equals(GoogleMTv2Connector.class.getName()) ) {
  1595. net.sf.okapi.connectors.google.GoogleMTv2Parameters params
  1596. = new net.sf.okapi.connectors.google.GoogleMTv2Parameters();
  1597. // Use the specified parameters if available, otherwise use the default
  1598. if ( googleV2Params != null ) {
  1599. URI paramURI = (new File(googleV2Params).toURI());
  1600. params.load(paramURI, false);
  1601. }
  1602. return params;
  1603. }
  1604. // if ( connectorClassName.equals(ProMTConnector.class.getName()) ) {
  1605. // net.sf.okapi.connectors.promt.Parameters params
  1606. // = new net.sf.okapi.connectors.promt.Parameters();
  1607. // // Use the specified parameters if available, otherwise use the default
  1608. // if ( proMTParams != null ) {
  1609. // URI paramURI = (new File(proMTParams).toURI());
  1610. // params.load(paramURI, false);
  1611. // }
  1612. // return params;
  1613. // }
  1614. if ( connectorClassName.equals(ApertiumMTConnector.class.getName()) ) {
  1615. net.sf.okapi.connectors.apertium.Parameters params
  1616. = new net.sf.okapi.connectors.apertium.Parameters();
  1617. if ( apertiumParams != null ) {
  1618. URI paramURI = (new File(apertiumParams).toURI());
  1619. params.load(paramURI, false);
  1620. } // Use default otherwise
  1621. return params;
  1622. }
  1623. // Other connector: no parameters
  1624. return null;
  1625. }
  1626. /**
  1627. * Converts the plain text string into a TextFragment, using HTML-like patterns are inline codes.
  1628. * @param text the plain text to convert to TextFragment
  1629. * @return a new TextFragment (with possibly inline codes).
  1630. */
  1631. public TextFragment parseToTextFragment (String text) {
  1632. // Parses any thing within <...> into opening codes
  1633. // Parses any thing within </...> into closing codes
  1634. // Parses any thing within <.../> into placeholder codes
  1635. Pattern patternOpening = Pattern.compile("\\<(\\w+)[ ]*[^\\>/]*\\>");
  1636. Pattern patternClosing = Pattern.compile("\\</(\\w+)[ ]*[^\\>]*\\>");
  1637. Pattern patternPlaceholder = Pattern.compile("\\<(\\w+)[ ]*[^\\>]*/\\>");
  1638. TextFragment tf = new TextFragment();
  1639. tf.setCodedText(text);
  1640. int n;
  1641. int start = 0;
  1642. int diff = 0;
  1643. Matcher m = patternOpening.matcher(text);
  1644. while ( m.find(start) ) {
  1645. n = m.start();
  1646. diff += tf.changeToCode(n+diff, (n+diff)+m.group().length(),
  1647. TagType.OPENING, m.group(1));
  1648. start = (n+m.group().length());