PageRenderTime 49ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/okapi/tikal/src/main/java/net/sf/okapi/applications/tikal/Main.java

http://okapi.googlecode.com/
Java | 1835 lines | 1617 code | 110 blank | 108 comment | 351 complexity | 3fbecaf30c361a70f8547f1c46ff3b02 MD5 | raw file
Possible License(s): LGPL-2.1, LGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. /*===========================================================================
  2. Copyright (C) 2009-2011 by the Okapi Framework contributors
  3. -----------------------------------------------------------------------------
  4. This library is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published by
  6. the Free Software Foundation; either version 2.1 of the License, or (at
  7. your option) any later version.
  8. This library is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
  11. General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this library; if not, write to the Free Software Foundation,
  14. Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  15. See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
  16. ===========================================================================*/
  17. package net.sf.okapi.applications.tikal;
  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.io.PrintStream;
  21. import java.net.MalformedURLException;
  22. import java.net.URI;
  23. import java.net.URISyntaxException;
  24. import java.net.URL;
  25. import java.nio.charset.Charset;
  26. import java.security.InvalidParameterException;
  27. import java.text.NumberFormat;
  28. import java.util.ArrayList;
  29. import java.util.Hashtable;
  30. import java.util.Iterator;
  31. import java.util.List;
  32. import java.util.Locale;
  33. import java.util.logging.Handler;
  34. import java.util.logging.Level;
  35. import java.util.logging.Logger;
  36. import java.util.regex.Matcher;
  37. import java.util.regex.Pattern;
  38. import net.sf.okapi.common.FileUtil;
  39. import net.sf.okapi.common.IParameters;
  40. import net.sf.okapi.common.Util;
  41. import net.sf.okapi.common.exceptions.OkapiIOException;
  42. import net.sf.okapi.common.filters.DefaultFilters;
  43. import net.sf.okapi.common.filters.FilterConfiguration;
  44. import net.sf.okapi.common.filters.FilterConfigurationMapper;
  45. import net.sf.okapi.common.filters.IFilterConfigurationEditor;
  46. import net.sf.okapi.common.filters.IFilterConfigurationListEditor;
  47. import net.sf.okapi.common.filterwriter.XLIFFWriter;
  48. import net.sf.okapi.common.LocaleId;
  49. import net.sf.okapi.common.pipeline.IPipelineStep;
  50. import net.sf.okapi.common.pipelinedriver.BatchItemContext;
  51. import net.sf.okapi.common.pipelinedriver.PipelineDriver;
  52. import net.sf.okapi.common.plugins.PluginsManager;
  53. import net.sf.okapi.common.resource.RawDocument;
  54. import net.sf.okapi.common.resource.TextFragment;
  55. import net.sf.okapi.common.resource.TextFragment.TagType;
  56. import net.sf.okapi.common.query.IQuery;
  57. import net.sf.okapi.lib.translation.ITMQuery;
  58. import net.sf.okapi.common.query.QueryResult;
  59. import net.sf.okapi.steps.common.FilterEventsToRawDocumentStep;
  60. import net.sf.okapi.steps.common.FilterEventsWriterStep;
  61. import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep;
  62. import net.sf.okapi.steps.formatconversion.FormatConversionStep;
  63. import net.sf.okapi.steps.formatconversion.Parameters;
  64. import net.sf.okapi.steps.formatconversion.TableFilterWriterParameters;
  65. import net.sf.okapi.steps.leveraging.LeveragingStep;
  66. import net.sf.okapi.steps.moses.ExtractionStep;
  67. import net.sf.okapi.steps.moses.MergingParameters;
  68. import net.sf.okapi.steps.moses.MergingStep;
  69. import net.sf.okapi.steps.segmentation.SegmentationStep;
  70. import net.sf.okapi.connectors.apertium.ApertiumMTConnector;
  71. import net.sf.okapi.connectors.globalsight.GlobalSightTMConnector;
  72. import net.sf.okapi.connectors.google.GoogleMTv2Connector;
  73. import net.sf.okapi.connectors.microsoft.MicrosoftMTConnector;
  74. import net.sf.okapi.connectors.mymemory.MyMemoryTMConnector;
  75. import net.sf.okapi.connectors.opentran.OpenTranTMConnector;
  76. import net.sf.okapi.connectors.pensieve.PensieveTMConnector;
  77. import net.sf.okapi.connectors.tda.TDASearchConnector;
  78. import net.sf.okapi.connectors.translatetoolkit.TranslateToolkitTMConnector;
  79. import net.sf.okapi.filters.mosestext.FilterWriterParameters;
  80. public class Main {
  81. protected final static int CMD_EXTRACT = 0;
  82. protected final static int CMD_MERGE = 1;
  83. protected final static int CMD_EDITCONFIG = 2;
  84. protected final static int CMD_QUERYTRANS = 3;
  85. protected final static int CMD_CONV2PO = 4;
  86. protected final static int CMD_CONV2TMX = 5;
  87. protected final static int CMD_CONV2TABLE = 6;
  88. protected final static int CMD_CONV2PEN = 7;
  89. protected final static int CMD_TRANSLATE = 8;
  90. protected final static int CMD_EXTRACTTOMOSES = 9;
  91. protected final static int CMD_LEVERAGEMOSES = 10;
  92. protected final static int CMD_SEGMENTATION = 11;
  93. protected final static int CMD_SHOWCONFIGS = 12;
  94. protected final static int CMD_ADDTRANS = 13;
  95. private static final String DEFAULT_SEGRULES = "-";
  96. private static final String MSG_ONLYWITHUICOMP = "UI-based commands are available only in the distributions with UI components.";
  97. private static PrintStream ps;
  98. protected ArrayList<String> inputs;
  99. protected String skeleton;
  100. protected String output;
  101. protected String specifiedConfigId;
  102. protected String specifiedConfigIdPath;
  103. protected String configId;
  104. protected String inputEncoding;
  105. protected String outputEncoding;
  106. protected LocaleId srcLoc;
  107. protected LocaleId trgLoc;
  108. protected int command = -1;
  109. protected String query;
  110. protected String addTransTrans;
  111. protected int addTransRating = 4;
  112. protected boolean useGoogleV2;
  113. protected String googleV2Params;
  114. protected boolean useOpenTran;
  115. protected boolean useTransToolkit;
  116. protected String transToolkitParams;
  117. protected boolean useGlobalSight;
  118. protected String globalSightParams;
  119. protected boolean useTDA;
  120. protected String tdaParams;
  121. protected boolean useMyMemory;
  122. protected String myMemoryParams;
  123. protected boolean useApertium;
  124. protected String apertiumParams;
  125. protected boolean usePensieve;
  126. protected String pensieveData;
  127. protected boolean useMicrosoft;
  128. protected String microsoftParams;
  129. // protected boolean useProMT;
  130. // protected String proMTParams;
  131. protected boolean genericOutput = false;
  132. protected String tableConvFormat;
  133. protected String tableConvCodes;
  134. protected int convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_TARGETOREMPTY;
  135. protected boolean convSkipEntriesWithoutText = true;
  136. protected boolean convOverwrite = false;
  137. protected String segRules;
  138. protected boolean showTraceHint = true;
  139. protected String tmOptions;
  140. protected boolean levOptFillTarget = true;
  141. protected String levOptTMXPath;
  142. protected boolean extOptCopy = true; // Copy source in empty target by default
  143. protected boolean extOptAltTrans = true; // Output alt-trans by default
  144. protected boolean mosesCopyToTarget = false;
  145. protected boolean mosesOverwriteTarget = false;
  146. protected boolean moses2Outputs = false;
  147. protected boolean mosesUseGModeInAltTrans = true;
  148. protected String mosesFromPath;
  149. protected String mosesToPath;
  150. protected String skeletonDir;
  151. protected String outputDir;
  152. private FilterConfigurationMapper fcMapper;
  153. private Hashtable<String, String> extensionsMap;
  154. private Hashtable<String, String> filtersMap;
  155. /**
  156. * Try the guess the encoding of the console.
  157. * @return the guessed name of the console's encoding.
  158. */
  159. private static String getConsoleEncodingName () {
  160. String osName = System.getProperty("os.name");
  161. if ( osName.startsWith("Mac OS")) {
  162. return "UTF-8"; // Apparently the default for bash on Mac
  163. }
  164. if ( osName.startsWith("Windows") ) {
  165. //TODO: Get DOS code-pages per locale
  166. return "cp850"; // Not perfect, but covers many languages
  167. }
  168. // Default: Assumes unique encoding overall
  169. return Charset.defaultCharset().name();
  170. }
  171. public static void main (String[] originalArgs) {
  172. Main prog = new Main();
  173. boolean showTrace = false;
  174. try {
  175. // Create an encoding-aware output for the console
  176. // System.out uses the default system encoding that
  177. // may not be the right one (e.g. windows-1252 vs cp850)
  178. ps = new PrintStream(System.out, true, getConsoleEncodingName());
  179. // Disable root console handler
  180. Handler[] handlers = Logger.getLogger("").getHandlers();
  181. for ( Handler handler : handlers ) {
  182. Logger.getLogger("").removeHandler(handler);
  183. }
  184. // Create our own handler
  185. LogHandler logHandler = new LogHandler(ps);
  186. logHandler.setLevel(Level.INFO);
  187. Logger.getLogger("").addHandler(logHandler); //$NON-NLS-1$
  188. // Remove all empty arguments
  189. // This is to work around the "$1" issue in bash
  190. ArrayList<String> args = new ArrayList<String>();
  191. for ( String tmp : originalArgs ) {
  192. if ( tmp.length() > 0 ) args.add(tmp);
  193. }
  194. prog.printBanner();
  195. if ( args.size() == 0 ) {
  196. prog.printUsage();
  197. return;
  198. }
  199. if ( args.contains("-?") ) {
  200. prog.printUsage();
  201. return; // Overrides all arguments
  202. }
  203. if ( args.contains("-h") || args.contains("--help") || args.contains("-help") ) {
  204. prog.showHelp();
  205. return; // Overrides all arguments
  206. }
  207. if ( args.contains("-i") || args.contains("--info") || args.contains("-info") ) {
  208. prog.showInfo();
  209. return; // Overrides all arguments
  210. }
  211. if ( args.contains("-trace") ) {
  212. // Check early so the option does not get 'eaten' by a bad syntax
  213. showTrace = true;
  214. }
  215. for ( int i=0; i<args.size(); i++ ) {
  216. String arg = args.get(i);
  217. if ( arg.equals("-fc") ) {
  218. prog.specifiedConfigId = prog.getArgument(args, ++i);
  219. }
  220. else if ( arg.equals("-sl") ) {
  221. prog.srcLoc = new LocaleId(prog.getArgument(args, ++i), true);
  222. }
  223. else if ( arg.equals("-tl") ) {
  224. prog.trgLoc = new LocaleId(prog.getArgument(args, ++i), true);
  225. }
  226. else if ( arg.equals("-ie") ) {
  227. prog.inputEncoding = prog.getArgument(args, ++i);
  228. }
  229. else if ( arg.equals("-oe") ) {
  230. prog.outputEncoding = prog.getArgument(args, ++i);
  231. }
  232. else if ( arg.equals("-od") ) {
  233. prog.outputDir = prog.getArgument(args, ++i);
  234. }
  235. else if ( arg.equals("-sd") ) {
  236. prog.skeletonDir = prog.getArgument(args, ++i);
  237. }
  238. else if ( arg.equals("-x") ) {
  239. prog.command = CMD_EXTRACT;
  240. }
  241. else if ( arg.equals("-xm") ) {
  242. prog.command = CMD_EXTRACTTOMOSES;
  243. }
  244. else if ( arg.equals("-2") ) {
  245. prog.moses2Outputs = true;
  246. }
  247. else if ( arg.equals("-t") ) {
  248. prog.command = CMD_TRANSLATE;
  249. }
  250. else if ( arg.equals("-m") ) {
  251. prog.command = CMD_MERGE;
  252. }
  253. else if ( arg.equals("-lm") ) {
  254. prog.command = CMD_LEVERAGEMOSES;
  255. }
  256. else if ( arg.equals("-totrg") ) {
  257. prog.mosesCopyToTarget = true;
  258. prog.mosesOverwriteTarget = false;
  259. }
  260. else if ( arg.equals("-overtrg") ) {
  261. prog.mosesCopyToTarget = true;
  262. prog.mosesOverwriteTarget = true;
  263. }
  264. else if ( arg.equals("-bpt") ) {
  265. prog.mosesUseGModeInAltTrans = false;
  266. }
  267. else if ( arg.equals("-over") ) {
  268. prog.convOverwrite = true;
  269. }
  270. else if ( arg.equals("-from")) {
  271. prog.mosesFromPath = prog.getArgument(args, ++i);
  272. }
  273. else if ( arg.equals("-to") ) {
  274. prog.mosesToPath = prog.getArgument(args, ++i);
  275. }
  276. else if ( arg.equals("-2po") ) {
  277. prog.command = CMD_CONV2PO;
  278. }
  279. else if ( arg.equals("-2tmx") ) {
  280. prog.command = CMD_CONV2TMX;
  281. }
  282. else if ( arg.equals("-2tbl") ) {
  283. prog.command = CMD_CONV2TABLE;
  284. }
  285. else if ( arg.equals("-csv") ) {
  286. prog.tableConvFormat = "csv";
  287. }
  288. else if ( arg.equals("-tab") ) {
  289. prog.tableConvFormat = "tab";
  290. }
  291. else if ( arg.equals("-xliff") ) {
  292. prog.tableConvCodes = TableFilterWriterParameters.INLINE_XLIFF;
  293. }
  294. else if ( arg.equals("-xliffgx") ) {
  295. prog.tableConvCodes = TableFilterWriterParameters.INLINE_XLIFFGX;
  296. }
  297. else if ( arg.equals("-tmx") ) {
  298. prog.tableConvCodes = TableFilterWriterParameters.INLINE_TMX;
  299. }
  300. else if ( arg.equals("-all") ) {
  301. prog.convSkipEntriesWithoutText = false;
  302. }
  303. else if ( arg.equals("-nofill") ) {
  304. prog.levOptFillTarget = false;
  305. }
  306. else if ( arg.equals("-nocopy") ) {
  307. prog.extOptCopy = false;
  308. }
  309. else if ( arg.equals("-noalttrans") ) {
  310. prog.extOptAltTrans = false;
  311. }
  312. else if ( arg.equals("-maketmx") ) {
  313. prog.levOptTMXPath = "pretrans.tmx";
  314. if ( args.size() > i+1 ) {
  315. if ( !args.get(i+1).startsWith("-") ) {
  316. prog.levOptTMXPath = args.get(++i);
  317. }
  318. }
  319. }
  320. else if ( arg.equals("-trgsource") ) {
  321. prog.convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_FORCESOURCE;
  322. }
  323. else if ( arg.equals("-trgempty") ) {
  324. prog.convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_FORCEEMPTY;
  325. }
  326. else if ( arg.equals("-imp") ) {
  327. prog.command = CMD_CONV2PEN;
  328. prog.pensieveData = prog.getArgument(args, ++i);
  329. }
  330. else if ( arg.equals("-exp") ) {
  331. prog.command = CMD_CONV2TMX;
  332. prog.specifiedConfigId = "okf_pensieve";
  333. }
  334. else if ( arg.equals("-e") ) {
  335. prog.command = CMD_EDITCONFIG;
  336. if ( args.size() > i+1 ) {
  337. if ( !args.get(i+1).startsWith("-") ) {
  338. prog.specifiedConfigId = args.get(++i);
  339. }
  340. }
  341. }
  342. else if ( arg.equals("-generic") ) {
  343. prog.genericOutput = true;
  344. prog.tableConvCodes = TableFilterWriterParameters.INLINE_GENERIC;
  345. }
  346. else if ( arg.equals("-q") ) {
  347. prog.command = CMD_QUERYTRANS;
  348. prog.query = prog.getArgument(args, ++i);
  349. }
  350. else if ( arg.equals("-a") ) {
  351. prog.command = CMD_ADDTRANS;
  352. prog.query = prog.getArgument(args, ++i);
  353. prog.addTransTrans = prog.getArgument(args, ++i);
  354. if ( args.size() > i+1 ) {
  355. if ( !args.get(i+1).startsWith("-") ) {
  356. // Optional rating
  357. try {
  358. prog.addTransRating = Integer.parseInt(args.get(++i));
  359. }
  360. catch ( NumberFormatException e ) {
  361. throw new RuntimeException(String.format("Invalid rating option: '%s'.", args.get(i)));
  362. }
  363. if (( prog.addTransRating < -10 ) || ( prog.addTransRating > 10 )) {
  364. throw new RuntimeException("Rating must be between -10 and 10.");
  365. }
  366. }
  367. }
  368. }
  369. else if ( arg.equals("-opt") ) {
  370. prog.tmOptions = prog.getArgument(args, ++i);
  371. }
  372. else if ( arg.equals("-gg") || arg.equals("-google") ) {
  373. prog.useGoogleV2 = true;
  374. if ( args.size() > i+1 ) {
  375. if ( !args.get(i+1).startsWith("-") ) {
  376. prog.googleV2Params = args.get(++i);
  377. }
  378. }
  379. }
  380. else if ( arg.equals("-opentran") ) {
  381. prog.useOpenTran = true;
  382. }
  383. else if ( arg.equals("-tt") ) {
  384. prog.useTransToolkit = true;
  385. prog.transToolkitParams = "amagama.locamotion.org:80";
  386. if ( args.size() > i+1 ) {
  387. if ( !args.get(i+1).startsWith("-") ) {
  388. prog.transToolkitParams = args.get(++i);
  389. }
  390. }
  391. }
  392. else if ( arg.equals("-gs") ) {
  393. prog.useGlobalSight = true;
  394. prog.globalSightParams = prog.getArgument(args, ++i);
  395. }
  396. else if ( arg.equals("-tda") ) {
  397. prog.useTDA = true;
  398. prog.tdaParams = prog.getArgument(args, ++i);
  399. }
  400. else if ( arg.equals("-ms") ) {
  401. prog.useMicrosoft = true;
  402. if ( args.size() > i+1 ) {
  403. if ( !args.get(i+1).startsWith("-") ) {
  404. prog.microsoftParams = args.get(++i);
  405. }
  406. }
  407. }
  408. // else if ( arg.equals("-promt") ) {
  409. // prog.useProMT = true;
  410. // if ( args.size() > i+1 ) {
  411. // if ( !args.get(i+1).startsWith("-") ) {
  412. // prog.proMTParams = args.get(++i);
  413. // }
  414. // }
  415. // }
  416. else if ( arg.equals("-apertium") ) {
  417. prog.useApertium = true;
  418. if ( args.size() > i+1 ) {
  419. if ( !args.get(i+1).startsWith("-") ) {
  420. prog.apertiumParams = args.get(++i);
  421. }
  422. }
  423. }
  424. else if ( arg.equals("-mm") ) {
  425. prog.useMyMemory = true;
  426. // Key is optional (left for backward compatibility)
  427. if ( args.size() > i+1 ) {
  428. if ( !args.get(i+1).startsWith("-") ) {
  429. prog.myMemoryParams = prog.getArgument(args, ++i);
  430. }
  431. }
  432. }
  433. else if ( arg.equals("-pen") ) {
  434. prog.usePensieve = true;
  435. prog.pensieveData = "http://localhost:8080";
  436. if ( args.size() > i+1 ) {
  437. if ( !args.get(i+1).startsWith("-") ) {
  438. prog.pensieveData = args.get(++i);
  439. }
  440. }
  441. }
  442. else if ( arg.endsWith("-listconf") || arg.equals("-lfc") ) {
  443. prog.command = CMD_SHOWCONFIGS;
  444. }
  445. else if ( arg.equals("-s") ) {
  446. prog.command = CMD_SEGMENTATION;
  447. prog.segRules = DEFAULT_SEGRULES;
  448. }
  449. else if ( arg.equals("-seg") ) {
  450. prog.segRules = DEFAULT_SEGRULES; // Default
  451. if ( args.size() > i+1 ) {
  452. if ( !args.get(i+1).startsWith("-") ) {
  453. prog.segRules = args.get(++i);
  454. }
  455. }
  456. }
  457. else if ( arg.equals("-trace") ) {
  458. // Trace aAlready set. this is just to avoid
  459. // seeing -trace as invalid parameter
  460. }
  461. //=== Input file or error
  462. else if ( !arg.startsWith("-") ) {
  463. prog.inputs.add(args.get(i));
  464. }
  465. else {
  466. prog.showTraceHint = false; // Using trace is not helpful to the user for this error
  467. throw new InvalidParameterException(
  468. String.format("Invalid command-line argument '%s'.", args.get(i)));
  469. }
  470. }
  471. // Forgive having the extension .fprm from configuration ID if there is one
  472. if ( prog.specifiedConfigId != null ) {
  473. String cfgPath = Util.getDirectoryName(prog.specifiedConfigId);
  474. if ( !cfgPath.isEmpty() ) {
  475. prog.specifiedConfigIdPath = cfgPath;
  476. prog.specifiedConfigId = Util.getFilename(prog.specifiedConfigId, true);
  477. }
  478. if ( prog.specifiedConfigId.endsWith(FilterConfigurationMapper.CONFIGFILE_EXT) ) {
  479. prog.specifiedConfigId = Util.getFilename(prog.specifiedConfigId, false);
  480. }
  481. }
  482. // Check inputs and command
  483. if ( prog.command == -1 ) {
  484. ps.println("No command specified. Please use one of the command described below:");
  485. prog.printUsage();
  486. return;
  487. }
  488. if ( prog.command == CMD_EDITCONFIG ) {
  489. if ( prog.specifiedConfigId == null ) {
  490. prog.editAllConfigurations();
  491. }
  492. else {
  493. prog.editConfiguration();
  494. }
  495. return;
  496. }
  497. if ( prog.command == CMD_SHOWCONFIGS ) {
  498. prog.showAllConfigurations();
  499. return;
  500. }
  501. if ( prog.command == CMD_QUERYTRANS ) {
  502. prog.processQuery();
  503. return;
  504. }
  505. if ( prog.command == CMD_ADDTRANS ) {
  506. prog.processAddTranslation();
  507. return;
  508. }
  509. if ( prog.inputs.size() == 0 ) {
  510. throw new RuntimeException("No input document specified.");
  511. }
  512. // Process all input files
  513. for ( int i=0; i<prog.inputs.size(); i++ ) {
  514. if ( i > 0 ) {
  515. ps.println("------------------------------------------------------------"); //$NON-NLS-1$
  516. }
  517. prog.process(prog.inputs.get(i));
  518. }
  519. }
  520. catch ( Throwable e ) {
  521. if ( showTrace ) e.printStackTrace();
  522. else {
  523. ps.println("ERROR: "+e.getMessage());
  524. Throwable e2 = e.getCause();
  525. if ( e2 != null ) ps.println(e2.getMessage());
  526. if ( prog.showTraceHint ) ps.println("You can use the -trace option for more details.");
  527. }
  528. System.exit(1); // Error
  529. }
  530. }
  531. public Main () {
  532. inputs = new ArrayList<String>();
  533. }
  534. protected String getArgument (ArrayList<String> args, int index) {
  535. if ( index >= args.size() ) {
  536. showTraceHint = false; // Using trace is not helpful to the user for this error
  537. throw new RuntimeException(String.format(
  538. "Missing parameter after '%s'", args.get(index-1)));
  539. }
  540. return args.get(index);
  541. }
  542. private void initialize () {
  543. // Create the mapper and load it with all parameters editor info
  544. // Do not load the filter configurations yet (time consuming)
  545. fcMapper = new FilterConfigurationMapper();
  546. DefaultFilters.setMappings(fcMapper, false, false);
  547. // Instead create a map with extensions -> filter
  548. extensionsMap = new Hashtable<String, String>();
  549. filtersMap = new Hashtable<String, String>();
  550. extensionsMap.put(".docx", "okf_openxml");
  551. extensionsMap.put(".pptx", "okf_openxml");
  552. extensionsMap.put(".xlsx", "okf_openxml");
  553. filtersMap.put("okf_openxml", "net.sf.okapi.filters.openxml.OpenXMLFilter");
  554. extensionsMap.put(".odt", "okf_openoffice");
  555. extensionsMap.put(".swx", "okf_openoffice");
  556. extensionsMap.put(".ods", "okf_openoffice");
  557. extensionsMap.put(".swc", "okf_openoffice");
  558. extensionsMap.put(".odp", "okf_openoffice");
  559. extensionsMap.put(".sxi", "okf_openoffice");
  560. extensionsMap.put(".odg", "okf_openoffice");
  561. extensionsMap.put(".sxd", "okf_openoffice");
  562. filtersMap.put("okf_openoffice", "net.sf.okapi.filters.openoffice.OpenOfficeFilter");
  563. extensionsMap.put(".htm", "okf_html");
  564. extensionsMap.put(".html", "okf_html");
  565. filtersMap.put("okf_html", "net.sf.okapi.filters.html.HtmlFilter");
  566. extensionsMap.put(".xlf", "okf_xliff");
  567. extensionsMap.put(".xlif", "okf_xliff");
  568. extensionsMap.put(".xliff", "okf_xliff");
  569. filtersMap.put("okf_xliff", "net.sf.okapi.filters.xliff.XLIFFFilter");
  570. extensionsMap.put(".tmx", "okf_tmx");
  571. filtersMap.put("okf_tmx", "net.sf.okapi.filters.tmx.TmxFilter");
  572. extensionsMap.put(".properties", "okf_properties");
  573. extensionsMap.put(".lang", "okf_properties-skypeLang");
  574. filtersMap.put("okf_properties", "net.sf.okapi.filters.properties.PropertiesFilter");
  575. extensionsMap.put(".po", "okf_po");
  576. filtersMap.put("okf_po", "net.sf.okapi.filters.po.POFilter");
  577. extensionsMap.put(".xml", "okf_xml");
  578. extensionsMap.put(".resx", "okf_xml-resx");
  579. filtersMap.put("okf_xml", "net.sf.okapi.filters.xml.XMLFilter");
  580. extensionsMap.put(".srt", "okf_regex-srt");
  581. filtersMap.put("okf_regex", "net.sf.okapi.filters.regex.RegexFilter");
  582. extensionsMap.put(".dtd", "okf_dtd");
  583. extensionsMap.put(".ent", "okf_dtd");
  584. filtersMap.put("okf_dtd", "net.sf.okapi.filters.dtd.DTDFilter");
  585. extensionsMap.put(".ts", "okf_ts");
  586. filtersMap.put("okf_ts", "net.sf.okapi.filters.ts.TsFilter");
  587. extensionsMap.put(".txt", "okf_plaintext");
  588. filtersMap.put("okf_plaintext", "net.sf.okapi.filters.plaintext.PlainTextFilter");
  589. extensionsMap.put(".csv", "okf_table_csv");
  590. filtersMap.put("okf_table", "net.sf.okapi.filters.table.TableFilter");
  591. extensionsMap.put(".ttx", "okf_ttx");
  592. filtersMap.put("okf_ttx", "net.sf.okapi.filters.ttx.TTXFilter");
  593. extensionsMap.put(".json", "okf_json");
  594. filtersMap.put("okf_json", "net.sf.okapi.filters.json.JSONFilter");
  595. filtersMap.put("okf_phpcontent", "net.sf.okapi.filters.php.PHPContentFilter");
  596. extensionsMap.put(".pentm", "okf_pensieve");
  597. filtersMap.put("okf_pensieve", "net.sf.okapi.filters.pensieve.PensieveFilter");
  598. filtersMap.put("okf_vignette", "net.sf.okapi.filters.vignette.VignetteFilter");
  599. extensionsMap.put(".yml", "okf_railsyaml");
  600. filtersMap.put("okf_railsyaml", "net.sf.okapi.filters.railsyaml.RailsYamlFilter");
  601. extensionsMap.put(".idml", "okf_idml");
  602. filtersMap.put("okf_idml", "net.sf.okapi.filters.idml.IDMLFilter");
  603. extensionsMap.put(".mif", "okf_mif");
  604. filtersMap.put("okf_mif", "net.sf.okapi.filters.mif.MIFFilter");
  605. extensionsMap.put(".txp", "okf_transifex");
  606. filtersMap.put("okf_transifex", "net.sf.okapi.filters.transifex.TransifexFilter");
  607. extensionsMap.put(".zip", "okf_archive");
  608. filtersMap.put("okf_archive", "net.sf.okapi.filters.archive.ArchiveFilter");
  609. extensionsMap.put(".txml", "okf_txml");
  610. filtersMap.put("okf_txml", "net.sf.okapi.filters.txml.TXMLFilter");
  611. filtersMap.put("okf_versifiedtxt", "net.sf.okapi.filters.versifiedtxt.VersifiedTextFilter");
  612. filtersMap.put("okf_xmlstream", "net.sf.okapi.filters.xmlstream.XmlStreamFilter");
  613. filtersMap.put("okf_mosestext", "net.sf.okapi.filters.mosestext.MosesTextFilter");
  614. if (specifiedConfigIdPath != null)
  615. fcMapper.setCustomConfigurationsDirectory(specifiedConfigIdPath);
  616. }
  617. private String getConfigurationId (String ext) {
  618. // Get the configuration for the extension
  619. String id = extensionsMap.get(ext);
  620. if ( id == null ) {
  621. throw new RuntimeException(String.format(
  622. "Could not guess the configuration for the extension '%s'", ext));
  623. }
  624. return id;
  625. }
  626. private void editAllConfigurations () {
  627. initialize();
  628. guessMissingLocales(null);
  629. // Add all the pre-defined configurations
  630. DefaultFilters.setMappings(fcMapper, false, true);
  631. loadFromPluginsAndUpdate();
  632. // Add the custom configurations
  633. fcMapper.updateCustomConfigurations();
  634. // Edit
  635. try {
  636. // Invoke the editor using dynamic instantiation so we can compile non-UI distributions
  637. IFilterConfigurationListEditor editor =
  638. (IFilterConfigurationListEditor)Class.forName("net.sf.okapi.common.ui.filters.FilterConfigurationEditor").newInstance();
  639. // Call the editor
  640. editor.editConfigurations(fcMapper);
  641. }
  642. catch ( InstantiationException e ) {
  643. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  644. }
  645. catch ( IllegalAccessException e ) {
  646. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  647. }
  648. catch ( ClassNotFoundException e ) {
  649. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  650. }
  651. }
  652. private void editConfiguration () {
  653. initialize();
  654. guessMissingLocales(null);
  655. if ( specifiedConfigId == null ) {
  656. throw new RuntimeException("You must specified the configuration to edit.");
  657. }
  658. configId = specifiedConfigId;
  659. if ( !prepareFilter(configId) ) return; // Next input
  660. try {
  661. // Invoke the editor using dynamic instantiation so we can compile non-UI distributions
  662. IFilterConfigurationEditor editor =
  663. (IFilterConfigurationEditor)Class.forName("net.sf.okapi.common.ui.filters.FilterConfigurationEditor").newInstance();
  664. // Call the editor
  665. editor.editConfiguration(configId, fcMapper);
  666. }
  667. catch ( InstantiationException e ) {
  668. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  669. }
  670. catch ( IllegalAccessException e ) {
  671. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  672. }
  673. catch ( ClassNotFoundException e ) {
  674. throw new RuntimeException(MSG_ONLYWITHUICOMP);
  675. }
  676. }
  677. private void showAllConfigurations () {
  678. initialize();
  679. DefaultFilters.setMappings(fcMapper, true, true);
  680. loadFromPluginsAndUpdate();
  681. // Add the custom configurations
  682. fcMapper.updateCustomConfigurations();
  683. ps.println("List of all filter configurations available:");
  684. Iterator<FilterConfiguration> iter = fcMapper.getAllConfigurations();
  685. FilterConfiguration config;
  686. while ( iter.hasNext() ) {
  687. config = iter.next();
  688. ps.println(String.format(" - %s = %s",
  689. config.configId, config.description));
  690. }
  691. }
  692. private boolean prepareFilter (String configId) {
  693. boolean pluginsDone = false;
  694. while ( true ) {
  695. // Is it a default configuration?
  696. if ( filtersMap.containsKey(configId) ) {
  697. // Configuration ID is a default one:
  698. // Add its filter to the configuration mapper
  699. fcMapper.addConfigurations(filtersMap.get(configId));
  700. // Hard code case: okf_vignette requires okgf_html to be also loaded
  701. //TODO: Find a better way to handle sub-filter cases
  702. if ( configId.startsWith("okf_vignette") || configId.startsWith("okf_xmlstream") ) {
  703. fcMapper.addConfigurations(filtersMap.get("okf_html"));
  704. }
  705. return true;
  706. }
  707. // Else: Try to find the filter for that configuration
  708. for ( String tmp : filtersMap.keySet() ) {
  709. if ( configId.startsWith(tmp) ) {
  710. fcMapper.addConfigurations(filtersMap.get(tmp));
  711. // If the given configuration is not one of the pre-defined
  712. if ( fcMapper.getConfiguration(configId) == null ) {
  713. // Assume it is a custom one
  714. fcMapper.addCustomConfiguration(configId);
  715. }
  716. return true;
  717. }
  718. }
  719. // No success yet?
  720. if ( pluginsDone ) break;
  721. // Try to load the plug-ins if it was not done yet
  722. loadFromPluginsAndUpdate();
  723. pluginsDone = true;
  724. }
  725. // Could not guess
  726. ps.println(String.format(
  727. "ERROR: Could not guess the filter for the configuration '%s'", configId));
  728. return false;
  729. }
  730. private void loadFromPluginsAndUpdate () {
  731. // Discover and add plug-ins
  732. PluginsManager mgt = new PluginsManager();
  733. mgt.discover(new File(getAppRootDirectory()+File.separator+"dropins"), true);
  734. fcMapper.addFromPlugins(mgt);
  735. // Now update the filtersMap with new configurations
  736. Iterator<FilterConfiguration> iter = fcMapper.getAllConfigurations();
  737. while ( iter.hasNext() ) {
  738. FilterConfiguration cfg = iter.next();
  739. if ( !filtersMap.containsKey(cfg.configId) ) {
  740. filtersMap.put(cfg.configId, cfg.filterClass);
  741. }
  742. }
  743. }
  744. private void guessMissingLocales (String inputPath) {
  745. // If both locales are already set: just use those
  746. if (( srcLoc != null ) && ( trgLoc != null )) return;
  747. // Try to see if we can get one or both from the input file
  748. if ( inputPath != null ) {
  749. List<String> guessed = FileUtil.guessLanguages(inputPath);
  750. if ( guessed.size() > 0 ) {
  751. if ( srcLoc == null ) {
  752. srcLoc = LocaleId.fromString(guessed.get(0));
  753. }
  754. if ( guessed.size() > 1 ) {
  755. if ( trgLoc == null ) {
  756. trgLoc = LocaleId.fromString(guessed.get(1));
  757. }
  758. }
  759. }
  760. }
  761. // Make sure we do have a source
  762. if ( srcLoc == null ) {
  763. srcLoc = new LocaleId("en", false);
  764. }
  765. // Make sure we do have a target
  766. if ( trgLoc == null ) {
  767. trgLoc = new LocaleId(Locale.getDefault());
  768. if ( trgLoc.sameLanguageAs(srcLoc) ) {
  769. trgLoc = new LocaleId("fr", false);
  770. }
  771. }
  772. }
  773. private void guessMissingParameters (String inputOfConfig) {
  774. if ( specifiedConfigId == null ) {
  775. String ext = Util.getExtension(inputOfConfig);
  776. if ( Util.isEmpty(ext) ) {
  777. throw new RuntimeException(String.format(
  778. "The input file '%s' has no extension to guess the filter from.", inputOfConfig));
  779. }
  780. configId = getConfigurationId(ext.toLowerCase());
  781. }
  782. else {
  783. configId = specifiedConfigId;
  784. }
  785. if ( outputEncoding == null ) {
  786. if ( inputEncoding != null ) outputEncoding = inputEncoding;
  787. else outputEncoding = Charset.defaultCharset().name();
  788. }
  789. if ( inputEncoding == null ) {
  790. inputEncoding = Charset.defaultCharset().name();
  791. }
  792. }
  793. String pathChangeFolder (String newFolder,
  794. String oldPath)
  795. {
  796. String result;
  797. if ( newFolder == null ) {
  798. result = oldPath;
  799. }
  800. else {
  801. File file = new File(newFolder, Util.getFilename(oldPath, true));
  802. result = file.toString();
  803. }
  804. return result;
  805. }
  806. String pathInsertOutBeforeExt(String oldPath) {
  807. String ext = Util.getExtension(oldPath);
  808. int n = oldPath.lastIndexOf('.');
  809. return oldPath.substring(0, n) + ".out" + ext; //$NON-NLS-1$
  810. }
  811. private void guessMergingArguments (String input) {
  812. String ext = Util.getExtension(input);
  813. if ( !ext.equals(".xlf") ) {
  814. throw new RuntimeException(String.format(
  815. "The input file '%s' does not have the expected .xlf extension.", input));
  816. }
  817. int n = input.lastIndexOf('.');
  818. skeleton = input.substring(0, n);
  819. if ( outputDir == null ) {
  820. output = pathInsertOutBeforeExt(skeleton);
  821. }
  822. else {
  823. output = pathChangeFolder(outputDir, skeleton);
  824. }
  825. skeleton = pathChangeFolder(skeletonDir, skeleton);
  826. }
  827. private void guessMergingMosesArguments (String input) {
  828. // Main input is the original file, not the Moses file
  829. // The Moses file is specified with -from or null
  830. if ( Util.isEmpty(mosesFromPath) ) {
  831. // We guess the Moses filename:
  832. mosesFromPath = input + "."+trgLoc.toString();
  833. }
  834. if ( !Util.isEmpty(mosesToPath) ) {
  835. output = mosesToPath;
  836. }
  837. else {
  838. output = pathInsertOutBeforeExt(input);
  839. }
  840. }
  841. protected void process (String input) throws URISyntaxException {
  842. initialize();
  843. RawDocument rd;
  844. File file;
  845. switch ( command ) {
  846. case CMD_TRANSLATE:
  847. ps.println("Translation");
  848. guessMissingParameters(input);
  849. if ( !prepareFilter(configId) ) return; // Next input
  850. guessMissingLocales(input);
  851. file = new File(input);
  852. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  853. rd.setFilterConfigId(configId);
  854. translateFile(rd);
  855. break;
  856. case CMD_SEGMENTATION:
  857. ps.println("Segmentation");
  858. guessMissingParameters(input);
  859. if ( !prepareFilter(configId) ) return; // Next input
  860. guessMissingLocales(input);
  861. file = new File(input);
  862. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  863. rd.setFilterConfigId(configId);
  864. segmentFile(rd);
  865. break;
  866. case CMD_EXTRACT:
  867. ps.println("Extraction");
  868. guessMissingParameters(input);
  869. if ( !prepareFilter(configId) ) return; // Next input
  870. guessMissingLocales(input);
  871. file = new File(input);
  872. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  873. rd.setFilterConfigId(configId);
  874. extractFile(rd);
  875. break;
  876. case CMD_EXTRACTTOMOSES:
  877. ps.println("Extraction to Moses InlineText");
  878. guessMissingParameters(input);
  879. if ( !prepareFilter(configId) ) return; // Next input
  880. guessMissingLocales(input);
  881. file = new File(input);
  882. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  883. rd.setFilterConfigId(configId);
  884. extractFileToMoses(rd);
  885. break;
  886. case CMD_MERGE:
  887. ps.println("Merging");
  888. guessMergingArguments(input);
  889. guessMissingParameters(skeleton);
  890. if ( !prepareFilter(configId) ) return; // Next input
  891. guessMissingLocales(input);
  892. XLIFFMergingStep stepMrg = new XLIFFMergingStep(fcMapper);
  893. file = new File(skeleton);
  894. RawDocument skelRawDoc = new RawDocument(file.toURI(), inputEncoding,
  895. srcLoc, trgLoc);
  896. skelRawDoc.setFilterConfigId(configId);
  897. stepMrg.setXliffPath(input);
  898. stepMrg.setOutputPath(output);
  899. stepMrg.setOutputEncoding(outputEncoding);
  900. ps.println("Source language: "+srcLoc);
  901. ps.println("Target language: "+trgLoc);
  902. ps.println("Default input encoding: "+inputEncoding);
  903. ps.println("Output encoding: "+outputEncoding);
  904. ps.println("Filter configuration: "+configId);
  905. ps.println("XLIFF: "+input);
  906. ps.println(String.format("Output: %s", (output==null) ? "<auto-defined>" : output));
  907. stepMrg.handleRawDocument(skelRawDoc);
  908. break;
  909. case CMD_LEVERAGEMOSES:
  910. ps.println("Merging Moses InlineText");
  911. guessMissingLocales(input);
  912. guessMergingMosesArguments(input);
  913. guessMissingParameters(input);
  914. if ( !prepareFilter(configId) ) return; // Next input
  915. file = new File(input);
  916. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc, configId);
  917. leverageFileWithMoses(rd);
  918. break;
  919. case CMD_CONV2PO:
  920. case CMD_CONV2TMX:
  921. case CMD_CONV2PEN:
  922. case CMD_CONV2TABLE:
  923. if ( command == CMD_CONV2PO ) {
  924. ps.println("Conversion to PO");
  925. }
  926. else if ( command == CMD_CONV2TMX ) {
  927. ps.println("Conversion to TMX");
  928. }
  929. else if ( command == CMD_CONV2TABLE ) {
  930. ps.println("Conversion to Table");
  931. }
  932. else {
  933. ps.println("Importing to Pensieve TM");
  934. }
  935. guessMissingParameters(input);
  936. if ( !prepareFilter(configId) ) return; // Next input
  937. guessMissingLocales(input);
  938. file = new File(input);
  939. String output = input;
  940. if ( command == CMD_CONV2PO ) {
  941. output += ".po";
  942. }
  943. else if ( command == CMD_CONV2TMX ) {
  944. output += ".tmx";
  945. }
  946. else if ( command == CMD_CONV2TABLE) {
  947. output += ".txt";
  948. }
  949. else { // Pensieve
  950. output = checkPensieveDirExtension();
  951. }
  952. URI outputURI = new File(output).toURI();
  953. rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);
  954. rd.setFilterConfigId(configId);
  955. ps.println("Source language: "+srcLoc);
  956. ps.println("Target language: "+trgLoc);
  957. ps.println("Default input encoding: "+inputEncoding);
  958. ps.println("Filter configuration: "+configId);
  959. ps.println("Output: "+output);
  960. convertFile(rd, outputURI);
  961. break;
  962. }
  963. ps.println("Done");
  964. }
  965. private void printBanner () {
  966. ps.println("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  967. ps.println("Okapi Tikal - Localization Toolset");
  968. // The version will show as 'null' until the code is build as a JAR.
  969. ps.println(String.format("Version: %s", getClass().getPackage().getImplementationVersion()));
  970. ps.println("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  971. }
  972. private void showInfo () {
  973. Runtime rt = Runtime.getRuntime();
  974. rt.runFinalization();
  975. rt.gc();
  976. ps.println("Java version: " + System.getProperty("java.version")); //$NON-NLS-1$
  977. ps.println(String.format("Platform: %s, %s, %s",
  978. System.getProperty("os.name"), //$NON-NLS-1$
  979. System.getProperty("os.arch"), //$NON-NLS-1$
  980. System.getProperty("os.version"))); //$NON-NLS-1$
  981. NumberFormat nf = NumberFormat.getInstance();
  982. ps.println(String.format("Java VM memory: free=%s KB, total=%s KB", //$NON-NLS-1$
  983. nf.format(rt.freeMemory()/1024),
  984. nf.format(rt.totalMemory()/1024)));
  985. ps.println("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  986. }
  987. private String getAppRootDirectory () {
  988. try {
  989. URL url = getClass().getProtectionDomain().getCodeSource().getLocation();
  990. String path = new File(url.toURI()).getCanonicalPath();
  991. return Util.getDirectoryName(Util.getDirectoryName(path));
  992. }
  993. catch ( IOException e ) {
  994. throw new OkapiIOException(e);
  995. }
  996. catch ( URISyntaxException e ) {
  997. throw new OkapiIOException("Bad URI syntax.", e);
  998. }
  999. }
  1000. private void showHelp () throws MalformedURLException {
  1001. Util.openWikiTopic("Tikal");
  1002. }
  1003. private void printUsage () {
  1004. ps.println("Shows this screen: -?");
  1005. ps.println("Shows version and other information: -i or --info");
  1006. ps.println("Opens the user guide page: -h or --help");
  1007. ps.println("Lists all available filter configurations: -lfc or --listconf");
  1008. ps.println("Edits or view filter configurations (UI-dependent command):");
  1009. ps.println(" -e [[-fc] configId]");
  1010. ps.println("Extracts a file to XLIFF (and optionally segment and pre-translate):");
  1011. ps.println(" -x inputFile [inputFile2...] [-fc configId] [-ie encoding] [-sl srcLang]");
  1012. ps.println(" [-tl trgLang] [-seg [srxFile]] [-tt [hostname[:port]]|-mm [key]");
  1013. ps.println(" |-pen tmDirectory|-gs configFile|-apertium [configFile]");
  1014. ps.println(" |-ms configFile|-tda configFile|-gg configFile]");
  1015. ps.println(" [-maketmx [tmxFile]] [-opt threshold]");
  1016. ps.println(" [-od outputDirectory] [-nocopy] [-noalttrans]");
  1017. ps.println("Merges an XLIFF document back to its original format:");
  1018. ps.println(" -m xliffFile [xliffFile2...] [-fc configId] [-ie encoding] [-oe encoding]");
  1019. ps.println(" [-sd sourceDirectory] [-od outputDirectory]");
  1020. ps.println(" [-sl srcLang] [-tl trgLang]");
  1021. ps.println("Translates a file:");
  1022. ps.println(" -t inputFile [inputFile2...] [-fc configId] [-ie encoding] [-oe encoding]");
  1023. ps.println(" [-sl srcLang] [-tl trgLang] [-seg [srxFile]] [-tt [hostname[:port]]");
  1024. ps.println(" |-mm [key]|-pen tmDirectory|-gs configFile|-apertium [configFile]");
  1025. ps.println(" |-ms configFile|-tda configFile|-gg configFile]");
  1026. ps.println(" [-maketmx [tmxFile]] [-opt threshold]");
  1027. ps.println("Extracts a file to Moses InlineText:");
  1028. ps.println(" -xm inputFile [-fc configId] [-ie encoding] [-seg [srxFile]]");
  1029. ps.println(" [-sl srcLang] [-tl trgLang] [-2] [-to srcOutputFile]");
  1030. ps.println("Leverages a file with Moses InlineText:");
  1031. ps.println(" -lm inputFile [-fc configId] [-ie encoding] [-oe encoding] [-sl srcLang]");
  1032. ps.println(" [-tl trgLang] [-seg [srxFile]] [-totrg|-overtrg] [-bpt]");
  1033. ps.println(" [-from mosesFile] [-to outputFile]");
  1034. ps.println("Segments a file:");
  1035. ps.println(" -s inputFile [-fc configId] [-ie encoding]");
  1036. ps.println(" [-sl srcLang] [-tl trgLang] [-seg [srxFile]]");
  1037. ps.println("Queries translation resources:");
  1038. ps.println(" -q \"source text\" [-sl srcLang] [-tl trgLang] [-opentran]");
  1039. ps.println(" [-tt [hostname[:port]]] [-mm [key]] [-pen tmDirectory] [-gs configFile]");
  1040. ps.println(" [-apertium [configFile]] [-ms configFile] [-tda configFile]");
  1041. ps.println(" [-gg configFile] [-opt threshold[:maxhits]]");
  1042. ps.println("Adds translation to a resources:");
  1043. ps.println(" -a \"source text\" \"target text\" [rating] [-sl srcLang] [-tl trgLang]");
  1044. ps.println(" -ms configFile");
  1045. ps.println("Converts to PO format:");
  1046. ps.println(" -2po inputFile [inputFile2...] [-fc configId] [-ie encoding] [-all]");
  1047. ps.println(" [-sl srcLang] [-tl trgLang] [-generic] [-trgsource|-trgempty]");
  1048. ps.println("Converts to TMX format:");
  1049. ps.println(" -2tmx inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1050. ps.println(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty] [-all]");
  1051. ps.println("Converts to table format:");
  1052. ps.println(" -2tbl inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1053. ps.println(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty]");
  1054. ps.println(" [-csv|-tab] [-xliff|-xliffgx|-tmx|-generic] [-all]");
  1055. ps.println("Imports to Pensieve TM:");
  1056. ps.println(" -imp tmDirectory inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1057. ps.println(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty] [-all] [-over]");
  1058. ps.println("Exports Pensieve TM as TMX:");
  1059. ps.println(" -exp tmDirectory1 [tmDirectory2...] [-sl srcLang] [-tl trgLang]");
  1060. ps.println(" [-trgsource|-trgempty] [-all]");
  1061. }
  1062. private void displayQuery (IQuery conn,
  1063. boolean isTM)
  1064. {
  1065. int count;
  1066. if ( conn.getClass().getName().endsWith("PensieveTMConnector")
  1067. || conn.getClass().getName().endsWith("GoogleMTConnector")
  1068. || conn.getClass().getName().endsWith("GoogleMTv2Connector")
  1069. || conn.getClass().getName().endsWith("MyMemoryTMConnector")
  1070. || conn.getClass().getName().endsWith("MicrosoftMTConnector")
  1071. // || conn.getClass().getName().endsWith("ProMTConnector")
  1072. || conn.getClass().getName().endsWith("GlobalSightTMConnector") ) {
  1073. count = conn.query(parseToTextFragment(query));
  1074. }
  1075. else { // Raw text otherwise
  1076. count = conn.query(query);
  1077. }
  1078. ps.println(String.format("\n= From %s (%s->%s)", conn.getName(),
  1079. conn.getSourceLanguage(), conn.getTargetLanguage()));
  1080. if ( isTM ) {
  1081. ITMQuery tmConn = (ITMQuery)conn;
  1082. ps.println(String.format(" Threshold=%d, Maximum hits=%d",
  1083. tmConn.getThreshold(), tmConn.getMaximumHits()));
  1084. }
  1085. if ( count > 0 ) {
  1086. QueryResult qr;
  1087. while ( conn.hasNext() ) {
  1088. qr = conn.next();
  1089. ps.println(String.format("score: %d, origin: '%s'%s",
  1090. qr.getCombinedScore(),
  1091. (qr.origin==null ? "" : qr.origin),
  1092. (qr.fromMT() ? " (from MT)" : "")));
  1093. ps.println(String.format(" Source: \"%s\"", qr.source.toText()));
  1094. ps.println(String.format(" Target: \"%s\"", qr.target.toText()));
  1095. }
  1096. }
  1097. else {
  1098. ps.println(String.format(" Source: \"%s\"", query));
  1099. ps.println(" <No translation has been found>");
  1100. }
  1101. }
  1102. private void processAddTranslation () {
  1103. guessMissingLocales(null);
  1104. if ( Util.isEmpty(query) ) {
  1105. throw new RuntimeException(String.format("Cannot add empty source text."));
  1106. }
  1107. if ( Util.isEmpty(addTransTrans) ) {
  1108. throw new RuntimeException(String.format("Cannot add empty target text."));
  1109. }
  1110. if ( useMicrosoft ) {
  1111. MicrosoftMTConnector conn = new MicrosoftMTConnector();
  1112. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1113. conn.setLanguages(srcLoc, trgLoc);
  1114. conn.open();
  1115. int res = conn.addTranslation(parseToTextFragment(query), parseToTextFragment(addTransTrans), addTransRating);
  1116. if ( res == 200 ) {
  1117. ps.println("Done");
  1118. }
  1119. else {
  1120. ps.println(String.format("Error code %d.", res));
  1121. }
  1122. conn.close();
  1123. }
  1124. else {
  1125. throw new RuntimeException(String.format("No valid connector specified to add a translation."));
  1126. }
  1127. }
  1128. private void processQuery () {
  1129. guessMissingLocales(null);
  1130. if ( !useGoogleV2 && !useOpenTran && !useTransToolkit && !useMyMemory
  1131. && !usePensieve && !useGlobalSight && !useApertium && !useMicrosoft && !useTDA ) {
  1132. useOpenTran = true; // Default if none is specified
  1133. }
  1134. // Query options
  1135. int[] opt = parseTMOptions();
  1136. int threshold = opt[0];
  1137. int maxhits = opt[1];
  1138. IQuery conn;
  1139. if ( useGoogleV2 ) {
  1140. conn = new GoogleMTv2Connector();
  1141. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1142. conn.setLanguages(srcLoc, trgLoc);
  1143. conn.open();
  1144. displayQuery(conn, false);
  1145. conn.close();
  1146. }
  1147. if ( usePensieve ) {
  1148. conn = new PensieveTMConnector();
  1149. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1150. conn.setLanguages(srcLoc, trgLoc);
  1151. setTMOptionsIfPossible(conn, threshold, maxhits);
  1152. conn.open();
  1153. displayQuery(conn, true);
  1154. conn.close();
  1155. }
  1156. if ( useTransToolkit ) {
  1157. conn = new TranslateToolkitTMConnector();
  1158. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1159. conn.setLanguages(srcLoc, trgLoc);
  1160. setTMOptionsIfPossible(conn, threshold, maxhits);
  1161. conn.open();
  1162. displayQuery(conn, true);
  1163. conn.close();
  1164. }
  1165. if ( useGlobalSight ) {
  1166. conn = new GlobalSightTMConnector();
  1167. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1168. conn.setLanguages(srcLoc, trgLoc);
  1169. setTMOptionsIfPossible(conn, threshold, maxhits);
  1170. conn.open();
  1171. displayQuery(conn, true);
  1172. conn.close();
  1173. }
  1174. if ( useTDA ) {
  1175. conn = new TDASearchConnector();
  1176. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1177. conn.setLanguages(srcLoc, trgLoc);
  1178. setTMOptionsIfPossible(conn, threshold, maxhits);
  1179. conn.open();
  1180. displayQuery(conn, true);
  1181. conn.close();
  1182. }
  1183. if ( useMicrosoft ) {
  1184. conn = new MicrosoftMTConnector();
  1185. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1186. conn.setLanguages(srcLoc, trgLoc);
  1187. setTMOptionsIfPossible(conn, threshold, maxhits);
  1188. conn.open();
  1189. displayQuery(conn, true);
  1190. conn.close();
  1191. }
  1192. // if ( useProMT ) {
  1193. // conn = new ProMTConnector();
  1194. // conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1195. // conn.setLanguages(srcLoc, trgLoc);
  1196. // conn.open();
  1197. // displayQuery(conn, false);
  1198. // conn.close();
  1199. // }
  1200. if ( useMyMemory ) {
  1201. conn = new MyMemoryTMConnector();
  1202. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1203. conn.setLanguages(srcLoc, trgLoc);
  1204. setTMOptionsIfPossible(conn, threshold, maxhits);
  1205. conn.open();
  1206. displayQuery(conn, true);
  1207. conn.close();
  1208. }
  1209. if ( useApertium ) {
  1210. conn = new ApertiumMTConnector();
  1211. conn.setParameters(prepareConnectorParameters(conn.getClass().getName()));
  1212. conn.setLanguages(srcLoc, trgLoc);
  1213. conn.open();
  1214. displayQuery(conn, false);
  1215. conn.close();
  1216. }
  1217. if ( useOpenTran ) {
  1218. conn = new OpenTranTMConnector();
  1219. conn.setLanguages(srcLoc, trgLoc);
  1220. setTMOptionsIfPossible(conn, threshold, maxhits);
  1221. conn.open();
  1222. displayQuery(conn, true);
  1223. conn.close();
  1224. }
  1225. }
  1226. private int[] parseTMOptions () {
  1227. int[] opt = new int[2];
  1228. opt[0] = -1;
  1229. opt[1] = -1;
  1230. if ( !Util.isEmpty(tmOptions) ) {
  1231. try {
  1232. // Expected format: "threshold[:maxhits]"
  1233. int n = tmOptions.indexOf(':');
  1234. if ( n == -1 ) { // Threshold only
  1235. opt[0] = Integer.parseInt(tmOptions);
  1236. }
  1237. else {
  1238. opt[0] = Integer.parseInt(tmOptions.substring(0, n));
  1239. opt[1] = Integer.parseInt(tmOptions.substring(n+1));
  1240. if ( opt[1] < 0 ) {
  1241. throw new RuntimeException(String.format("Invalid TM options: '%s' Maximum hits must be more than 0.", tmOptions));
  1242. }
  1243. }
  1244. if (( opt[0] < 0 ) || ( opt[0] > 100 )) {
  1245. throw new RuntimeException(String.format("Invalid TM options: '%s' Thresold must be between 0 and 100.", tmOptions));
  1246. }
  1247. }
  1248. catch ( NumberFormatException e ) {
  1249. throw new RuntimeException(String.format("Invalid TM options: '%s'", tmOptions));
  1250. }
  1251. }
  1252. return opt;
  1253. }
  1254. private void setTMOptionsIfPossible (IQuery conn,
  1255. int threshold,
  1256. int maxhits)
  1257. {
  1258. ITMQuery tmConn = (ITMQuery)conn;
  1259. if ( threshold > -1 ) tmConn.setThreshold(threshold);
  1260. if ( maxhits > -1 ) tmConn.setMaximumHits(maxhits);
  1261. }
  1262. private void convertFile (RawDocument rd, URI outputURI) {
  1263. // Create the driver
  1264. PipelineDriver driver = new PipelineDriver();
  1265. driver.setFilterConfigurationMapper(fcMapper);
  1266. driver.setRootDirectories(System.getProperty("user.dir"),
  1267. Util.getDirectoryName(rd.getInputURI().getPath()));
  1268. RawDocumentToFilterEventsStep rd2feStep = new RawDocumentToFilterEventsStep();
  1269. driver.addStep(rd2feStep);
  1270. FormatConversionStep fcStep = new FormatConversionStep();
  1271. net.sf.okapi.steps.formatconversion.Parameters params = fcStep.getParameters();
  1272. if ( command == CMD_CONV2PO ) {
  1273. params.setOutputFormat(Parameters.FORMAT_PO);
  1274. params.setOutputPath("output.po");
  1275. }
  1276. else if ( command == CMD_CONV2TMX ) {
  1277. params.setOutputFormat(Parameters.FORMAT_TMX);
  1278. params.setOutputPath("output.tmx");
  1279. }
  1280. else if ( command == CMD_CONV

Large files files are truncated, but you can click here to view the full file