PageRenderTime 82ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/okapi/tikal/src/main/java/net/sf/okapi/applications/tikal/Main.java

https://bitbucket.org/Xavier_Richez/okapi
Java | 2160 lines | 1922 code | 139 blank | 99 comment | 426 complexity | 99d0f27073442978384a11e19f15dff6 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /*===========================================================================
  2. Copyright (C) 2009-2018 by the Okapi Framework contributors
  3. -----------------------------------------------------------------------------
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ===========================================================================*/
  14. package net.sf.okapi.applications.tikal;
  15. import java.io.DataOutputStream;
  16. import java.io.File;
  17. import java.io.IOException;
  18. import java.io.PrintStream;
  19. import java.lang.ProcessBuilder.Redirect;
  20. import java.net.MalformedURLException;
  21. import java.net.URI;
  22. import java.net.URISyntaxException;
  23. import java.net.URLDecoder;
  24. import java.nio.charset.Charset;
  25. import java.nio.charset.StandardCharsets;
  26. import java.security.InvalidParameterException;
  27. import java.text.NumberFormat;
  28. import java.util.ArrayList;
  29. import java.util.Arrays;
  30. import java.util.Collections;
  31. import java.util.Hashtable;
  32. import java.util.Iterator;
  33. import java.util.List;
  34. import java.util.Locale;
  35. import java.util.regex.Matcher;
  36. import java.util.regex.Pattern;
  37. import org.slf4j.Logger;
  38. import org.slf4j.LoggerFactory;
  39. import net.sf.okapi.applications.tikal.logger.ILogHandler;
  40. import net.sf.okapi.applications.tikal.logger.LogHandlerFactory;
  41. import net.sf.okapi.common.ClassUtil;
  42. import net.sf.okapi.common.ExecutionContext;
  43. import net.sf.okapi.common.FileUtil;
  44. import net.sf.okapi.common.IParameters;
  45. import net.sf.okapi.common.ListUtil;
  46. import net.sf.okapi.common.LocaleId;
  47. import net.sf.okapi.common.UserConfiguration;
  48. import net.sf.okapi.common.Util;
  49. import net.sf.okapi.common.exceptions.OkapiException;
  50. import net.sf.okapi.common.exceptions.OkapiIOException;
  51. import net.sf.okapi.common.filters.DefaultFilters;
  52. import net.sf.okapi.common.filters.FilterConfiguration;
  53. import net.sf.okapi.common.filters.FilterConfigurationMapper;
  54. import net.sf.okapi.common.filters.IFilterConfigurationEditor;
  55. import net.sf.okapi.common.filters.IFilterConfigurationListEditor;
  56. import net.sf.okapi.common.filterwriter.XLIFFWriter;
  57. import net.sf.okapi.common.filterwriter.XLIFFWriterParameters;
  58. import net.sf.okapi.common.pipeline.IPipelineStep;
  59. import net.sf.okapi.common.pipelinedriver.BatchItemContext;
  60. import net.sf.okapi.common.pipelinedriver.IPipelineDriver;
  61. import net.sf.okapi.common.pipelinedriver.PipelineDriver;
  62. import net.sf.okapi.common.plugins.PluginsManager;
  63. import net.sf.okapi.common.query.IQuery;
  64. import net.sf.okapi.common.query.QueryResult;
  65. import net.sf.okapi.common.resource.RawDocument;
  66. import net.sf.okapi.common.resource.TextFragment;
  67. import net.sf.okapi.common.resource.TextFragment.TagType;
  68. import net.sf.okapi.connectors.apertium.ApertiumMTConnector;
  69. import net.sf.okapi.connectors.bifile.BilingualFileConnector;
  70. import net.sf.okapi.connectors.globalsight.GlobalSightTMConnector;
  71. import net.sf.okapi.connectors.google.GoogleMTv2Connector;
  72. import net.sf.okapi.connectors.lingo24.Lingo24Connector;
  73. import net.sf.okapi.connectors.microsoft.MicrosoftMTConnector;
  74. import net.sf.okapi.connectors.mmt.MMTConnector;
  75. import net.sf.okapi.connectors.mymemory.MyMemoryTMConnector;
  76. import net.sf.okapi.connectors.pensieve.PensieveTMConnector;
  77. import net.sf.okapi.connectors.tda.TDASearchConnector;
  78. import net.sf.okapi.connectors.translatetoolkit.TranslateToolkitTMConnector;
  79. import net.sf.okapi.filters.mosestext.FilterWriterParameters;
  80. import net.sf.okapi.lib.tkit.jarswitcher.VersionManager;
  81. import net.sf.okapi.lib.tkit.step.OriginalDocumentXliffMergerStep;
  82. import net.sf.okapi.lib.translation.ITMQuery;
  83. import net.sf.okapi.steps.common.FilterEventsToRawDocumentStep;
  84. import net.sf.okapi.steps.common.FilterEventsWriterStep;
  85. import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep;
  86. import net.sf.okapi.steps.common.RawDocumentWriterStep;
  87. import net.sf.okapi.steps.formatconversion.FormatConversionStep;
  88. import net.sf.okapi.steps.formatconversion.Parameters;
  89. import net.sf.okapi.steps.formatconversion.TableFilterWriterParameters;
  90. import net.sf.okapi.steps.leveraging.LeveragingStep;
  91. import net.sf.okapi.steps.moses.ExtractionStep;
  92. import net.sf.okapi.steps.moses.MergingParameters;
  93. import net.sf.okapi.steps.moses.MergingStep;
  94. import net.sf.okapi.steps.scopingreport.ScopingReportStep;
  95. import net.sf.okapi.steps.segmentation.SegmentationStep;
  96. import net.sf.okapi.steps.wordcount.CharacterCountStep;
  97. import net.sf.okapi.steps.wordcount.WordCountStep;
  98. public class Main {
  99. private static final Logger LOGGER = LoggerFactory.getLogger(Main.class);
  100. protected final static int CMD_EXTRACT = 0;
  101. protected final static int CMD_MERGE = 1;
  102. protected final static int CMD_EDITCONFIG = 2;
  103. protected final static int CMD_QUERYTRANS = 3;
  104. protected final static int CMD_CONV2PO = 4;
  105. protected final static int CMD_CONV2TMX = 5;
  106. protected final static int CMD_CONV2TABLE = 6;
  107. protected final static int CMD_CONV2PEN = 7;
  108. protected final static int CMD_TRANSLATE = 8;
  109. protected final static int CMD_EXTRACTTOMOSES = 9;
  110. protected final static int CMD_LEVERAGEMOSES = 10;
  111. protected final static int CMD_SEGMENTATION = 11;
  112. protected final static int CMD_SHOWCONFIGS = 12;
  113. // protected final static int CMD_ADDTRANS = 13; // No longer supported
  114. protected final static int CMD_REPORT = 14;
  115. private static final String DEFAULT_SEGRULES = "-";
  116. private static final String MSG_ONLYWITHUICOMP = "UI-based commands are available only in the distributions with UI components.";
  117. private static ILogHandler logHandler;
  118. protected ArrayList<String> inputs;
  119. protected String skeleton;
  120. protected String output;
  121. protected String specifiedConfigId;
  122. protected String specifiedConfigIdPath;
  123. protected String configId;
  124. protected String inputEncoding;
  125. protected String outputEncoding;
  126. protected LocaleId srcLoc;
  127. protected LocaleId trgLoc;
  128. protected int command = -1;
  129. protected String query;
  130. protected boolean useGoogleV2;
  131. protected String googleV2Params;
  132. protected boolean useTransToolkit;
  133. protected String transToolkitParams;
  134. protected boolean useGlobalSight;
  135. protected String globalSightParams;
  136. protected boolean useTDA;
  137. protected String tdaParams;
  138. protected boolean useMyMemory;
  139. protected String myMemoryParams;
  140. protected boolean useApertium;
  141. protected String apertiumParams;
  142. protected boolean usePensieve;
  143. protected String pensieveData;
  144. protected boolean useMicrosoft;
  145. protected String microsoftParams;
  146. protected boolean useBifile;
  147. protected boolean useLingo24;
  148. protected String lingo24Params;
  149. protected boolean useMMT;
  150. protected String mmtUrl;
  151. protected String mmtContext;
  152. protected String bifileData;
  153. protected boolean genericOutput = false;
  154. protected String tableConvFormat;
  155. protected String tableConvCodes;
  156. protected int convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_TARGETOREMPTY;
  157. protected boolean convApprovedEntriesOnly = false;
  158. protected boolean convSkipEntriesWithoutText = true;
  159. protected boolean convOverwrite = false;
  160. protected String segRules;
  161. protected boolean showTraceHint = true;
  162. protected String tmOptions;
  163. protected boolean levOptFillTarget = true;
  164. protected String levOptTMXPath;
  165. protected boolean extOptCopy = true; // Copy source in empty target by default
  166. protected boolean extOptAltTrans = true; // Output alt-trans by default
  167. protected boolean extOptCodeAttrs = false; // Disable extended code attributes by default
  168. protected boolean mosesCopyToTarget = false;
  169. protected boolean mosesOverwriteTarget = false;
  170. protected boolean moses2Outputs = false;
  171. protected boolean mosesUseGModeInAltTrans = true;
  172. protected boolean abortOnFailure = true;
  173. protected String mosesFromPath;
  174. protected String mosesFromPathParam;
  175. protected String mosesToPathParam;
  176. protected String skeletonDir;
  177. protected String outputDir;
  178. protected String rootDir = System.getProperty("user.dir");
  179. protected ExecutionContext context;
  180. protected VersionManager versionManager;
  181. protected boolean autoJarVersion = false;
  182. protected String jarVersion = null;
  183. protected String jarPath = null;
  184. protected String configPath = null;
  185. private FilterConfigurationMapper fcMapper;
  186. private Hashtable<String, String> extensionsMap;
  187. private boolean useExternalClassLoader = false; // For now we call a previous Tikal version packaged as a single jar
  188. private static final List<String> MERGE_PARAMS = Collections.unmodifiableList(Arrays.asList (
  189. "-fc", "-ie", "-oe", "-sl", "-tl", "-sd", "-od"
  190. ));
  191. /**
  192. * Try the guess the encoding of the console.
  193. * @return the guessed name of the console's encoding.
  194. */
  195. private static String getConsoleEncodingName () {
  196. String osName = System.getProperty("os.name");
  197. String enc = null;
  198. if ( osName.contains("OS X")) {
  199. enc = "UTF-8"; // Apparently the default for bash on Mac
  200. }
  201. else if ( osName.startsWith("Windows") ) {
  202. enc = "cp850"; // Not perfect, but covers many languages
  203. }
  204. else {
  205. // Default: Assumes unique encoding overall
  206. enc = Charset.defaultCharset().name();
  207. }
  208. // Now check if we have a user setting
  209. UserConfiguration uc = new UserConfiguration();
  210. uc.load("Tikal");
  211. return uc.getProperty("displayEncoding", enc);
  212. }
  213. public static void main (String[] originalArgs) {
  214. StringBuilder sb = new StringBuilder();
  215. for (String st : originalArgs) {
  216. sb.append(st);
  217. sb.append(" ");
  218. }
  219. LOGGER.debug(sb.toString());
  220. Main prog = new Main();
  221. boolean useLogger = false;
  222. boolean showTrace = false;
  223. try {
  224. // Remove all empty arguments
  225. // This is to work around the "$1" issue in bash
  226. List<String> args = new ArrayList<String>();
  227. for ( String tmp : originalArgs ) {
  228. if ( tmp.length() > 0 ) args.add(tmp);
  229. }
  230. // Check early so the option does not get 'eaten' by a bad syntax
  231. if ( args.contains("-trace") )
  232. showTrace = true;
  233. if ( args.contains("-logger") )
  234. useLogger = true;
  235. // Create an encoding-aware output for the console
  236. // System.out uses the default system encoding that
  237. // may not be the right one (e.g. windows-1252 vs cp850)
  238. if ( !useLogger ) {
  239. PrintStream ps = new PrintStream(System.out, true, getConsoleEncodingName());
  240. logHandler = LogHandlerFactory.getLogHandler();
  241. logHandler.initialize(ps);
  242. if ( showTrace ) logHandler.setLogLevel(ILogHandler.LogLevel.TRACE);
  243. }
  244. prog.printBanner();
  245. if ( args.size() == 0 ) {
  246. prog.printUsage();
  247. return;
  248. }
  249. if ( args.contains("-?") ) {
  250. prog.printUsage();
  251. return; // Overrides all arguments
  252. }
  253. if ( args.contains("-h") || args.contains("--help") || args.contains("-help") ) {
  254. prog.showHelp();
  255. return; // Overrides all arguments
  256. }
  257. if ( args.contains("-i") || args.contains("--info") || args.contains("-info") ) {
  258. prog.showInfo();
  259. return; // Overrides all arguments
  260. }
  261. // Set the default resource for the default engine.
  262. prog.transToolkitParams = "https://amagama-live.translatehouse.org/api/v1/";
  263. for ( int i=0; i<args.size(); i++ ) {
  264. String arg = args.get(i);
  265. if ( arg.equals("-fc") ) {
  266. prog.specifiedConfigId = prog.getArgument(args, ++i);
  267. }
  268. else if ( arg.equals("-sl") ) {
  269. prog.srcLoc = LocaleId.fromString(prog.getArgument(args, ++i));
  270. }
  271. else if ( arg.equals("-tl") ) {
  272. prog.trgLoc = LocaleId.fromString(prog.getArgument(args, ++i));
  273. }
  274. else if ( arg.equals("-ie") ) {
  275. prog.inputEncoding = prog.getArgument(args, ++i);
  276. }
  277. else if ( arg.equals("-oe") ) {
  278. prog.outputEncoding = prog.getArgument(args, ++i);
  279. }
  280. else if ( arg.equals("-od") ) {
  281. prog.outputDir = prog.getArgument(args, ++i);
  282. }
  283. else if ( arg.equals("-sd") ) {
  284. prog.skeletonDir = prog.getArgument(args, ++i);
  285. }
  286. else if ( arg.equals("-rd") ) {
  287. prog.rootDir = prog.getArgument(args, ++i);
  288. }
  289. else if ( arg.equals("-pd") ) {
  290. // This value will be overridden if -fc is
  291. // also specified
  292. prog.specifiedConfigIdPath = prog.getArgument(args, ++i);
  293. }
  294. else if ( arg.equals("-x") ) { // Default extraction
  295. prog.command = CMD_EXTRACT;
  296. }
  297. else if ( arg.equals("-x1") ) { // non-new-skeleton based extraction
  298. prog.command = CMD_EXTRACT; // Keep the flag for backward compatibility
  299. }
  300. else if ( arg.equals("-xm") ) {
  301. prog.command = CMD_EXTRACTTOMOSES;
  302. }
  303. else if ( arg.equals("-av") ) { // Jar version automatically selected for merge
  304. prog.autoJarVersion = true;
  305. }
  306. else if ( arg.equals("-v") ) { // Specific jar version for merge
  307. if ( args.size() > i+1 ) {
  308. if ( !args.get(i+1).startsWith("-") ) {
  309. prog.jarVersion = args.get(++i);
  310. }
  311. }
  312. }
  313. else if ( arg.equals("-vp") ) { // Specific jar for merge
  314. if ( args.size() > i+1 ) {
  315. if ( !args.get(i+1).startsWith("-") ) {
  316. prog.jarPath = args.get(++i);
  317. // Check trailing slash for a dir path
  318. if (!prog.jarPath.endsWith(".jar")) {
  319. // dir paths not supported now, only jars
  320. prog.jarPath = Util.ensureSeparator(prog.jarPath, false);
  321. }
  322. }
  323. }
  324. }
  325. else if ( arg.equals("-vc") ) { // Path to the versions configuration file
  326. if ( args.size() > i+1 ) {
  327. if ( !args.get(i+1).startsWith("-") ) {
  328. prog.configPath = args.get(++i);
  329. }
  330. }
  331. }
  332. else if ( arg.equals("-2") ) {
  333. prog.moses2Outputs = true;
  334. }
  335. else if ( arg.equals("-t") ) {
  336. prog.command = CMD_TRANSLATE;
  337. }
  338. else if ( arg.equals("-m") ) { // Default merge
  339. prog.command = CMD_MERGE;
  340. }
  341. else if ( arg.equals("-m1") ) { // non-new-skeleton based merge
  342. prog.command = CMD_MERGE; // Flag kept for backward compatibility
  343. }
  344. else if ( arg.equals("-lm") ) {
  345. prog.command = CMD_LEVERAGEMOSES;
  346. }
  347. else if ( arg.equals("-totrg") ) {
  348. prog.mosesCopyToTarget = true;
  349. prog.mosesOverwriteTarget = false;
  350. }
  351. else if ( arg.equals("-overtrg") ) {
  352. prog.mosesCopyToTarget = true;
  353. prog.mosesOverwriteTarget = true;
  354. }
  355. else if ( arg.equals("-bpt") ) {
  356. prog.mosesUseGModeInAltTrans = false;
  357. }
  358. else if ( arg.equals("-over") ) {
  359. prog.convOverwrite = true;
  360. }
  361. else if ( arg.equals("-from")) {
  362. prog.mosesFromPathParam = prog.getArgument(args, ++i);
  363. }
  364. else if ( arg.equals("-to") ) {
  365. prog.mosesToPathParam = prog.getArgument(args, ++i);
  366. }
  367. else if ( arg.equals("-2po") ) {
  368. prog.command = CMD_CONV2PO;
  369. }
  370. else if ( arg.equals("-2tmx") ) {
  371. prog.command = CMD_CONV2TMX;
  372. }
  373. else if ( arg.equals("-2tbl") ) {
  374. prog.command = CMD_CONV2TABLE;
  375. }
  376. else if ( arg.equals("-csv") ) {
  377. prog.tableConvFormat = "csv";
  378. }
  379. else if ( arg.equals("-tab") ) {
  380. prog.tableConvFormat = "tab";
  381. }
  382. else if ( arg.equals("-xliff") ) {
  383. prog.tableConvCodes = TableFilterWriterParameters.INLINE_XLIFF;
  384. }
  385. else if ( arg.equals("-xliffgx") ) {
  386. prog.tableConvCodes = TableFilterWriterParameters.INLINE_XLIFFGX;
  387. }
  388. else if ( arg.equals("-tmx") ) {
  389. prog.tableConvCodes = TableFilterWriterParameters.INLINE_TMX;
  390. }
  391. else if ( arg.equals("-all") ) {
  392. prog.convSkipEntriesWithoutText = false;
  393. }
  394. else if ( arg.equals("-approved") ) {
  395. prog.convApprovedEntriesOnly = true;
  396. }
  397. else if ( arg.equals("-nofill") ) {
  398. prog.levOptFillTarget = false;
  399. }
  400. else if ( arg.equals("-nocopy") ) {
  401. prog.extOptCopy = false;
  402. }
  403. else if ( arg.equals("-noalttrans") ) {
  404. prog.extOptAltTrans = false;
  405. }
  406. else if ( arg.equals("-codeattrs") ) {
  407. prog.extOptCodeAttrs = true;
  408. }
  409. else if ( arg.equals("-maketmx") ) {
  410. prog.levOptTMXPath = "pretrans.tmx";
  411. if ( args.size() > i+1 ) {
  412. if ( !args.get(i+1).startsWith("-") ) {
  413. prog.levOptTMXPath = args.get(++i);
  414. }
  415. }
  416. }
  417. else if ( arg.equals("-trgsource") ) {
  418. prog.convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_FORCESOURCE;
  419. }
  420. else if ( arg.equals("-trgempty") ) {
  421. prog.convTargetStyle = net.sf.okapi.steps.formatconversion.Parameters.TRG_FORCEEMPTY;
  422. }
  423. else if ( arg.equals("-imp") ) {
  424. prog.command = CMD_CONV2PEN;
  425. prog.pensieveData = prog.getArgument(args, ++i);
  426. }
  427. else if ( arg.equals("-exp") ) {
  428. prog.command = CMD_CONV2TMX;
  429. prog.specifiedConfigId = "okf_pensieve";
  430. }
  431. else if ( arg.equals("-e") ) {
  432. prog.command = CMD_EDITCONFIG;
  433. if ( args.size() > i+1 ) {
  434. if ( !args.get(i+1).startsWith("-") ) {
  435. prog.specifiedConfigId = args.get(++i);
  436. }
  437. }
  438. }
  439. else if ( arg.equals("-generic") ) {
  440. prog.genericOutput = true;
  441. prog.tableConvCodes = TableFilterWriterParameters.INLINE_GENERIC;
  442. }
  443. else if ( arg.equals("-q") ) {
  444. prog.command = CMD_QUERYTRANS;
  445. prog.query = prog.getArgument(args, ++i);
  446. }
  447. else if ( arg.equals("-opt") ) {
  448. prog.tmOptions = prog.getArgument(args, ++i);
  449. }
  450. else if ( arg.equals("-gg") || arg.equals("-google") ) {
  451. prog.useGoogleV2 = true;
  452. if ( args.size() > i+1 ) {
  453. if ( !args.get(i+1).startsWith("-") ) {
  454. prog.googleV2Params = args.get(++i);
  455. }
  456. }
  457. }
  458. else if ( arg.equals("-tt") ) {
  459. prog.useTransToolkit = true;
  460. if ( args.size() > i+1 ) {
  461. if ( !args.get(i+1).startsWith("-") ) {
  462. prog.transToolkitParams = args.get(++i);
  463. }
  464. }
  465. }
  466. else if ( arg.equals("-gs") ) {
  467. prog.useGlobalSight = true;
  468. prog.globalSightParams = prog.getArgument(args, ++i);
  469. }
  470. else if ( arg.equals("-tda") ) {
  471. prog.useTDA = true;
  472. prog.tdaParams = prog.getArgument(args, ++i);
  473. }
  474. else if ( arg.equals("-ms") ) {
  475. prog.useMicrosoft = true;
  476. if ( args.size() > i+1 ) {
  477. if ( !args.get(i+1).startsWith("-") ) {
  478. prog.microsoftParams = args.get(++i);
  479. }
  480. }
  481. }
  482. else if ( arg.equals("-apertium") ) {
  483. prog.useApertium = true;
  484. if ( args.size() > i+1 ) {
  485. if ( !args.get(i+1).startsWith("-") ) {
  486. prog.apertiumParams = args.get(++i);
  487. }
  488. }
  489. }
  490. else if ( arg.equals("-mm") ) {
  491. prog.useMyMemory = true;
  492. // Key is optional (left for backward compatibility)
  493. if ( args.size() > i+1 ) {
  494. if ( !args.get(i+1).startsWith("-") ) {
  495. prog.myMemoryParams = prog.getArgument(args, ++i);
  496. }
  497. }
  498. }
  499. else if ( arg.equals("-pen") ) {
  500. prog.usePensieve = true;
  501. prog.pensieveData = "http://localhost:8080";
  502. if ( args.size() > i+1 ) {
  503. if ( !args.get(i+1).startsWith("-") ) {
  504. prog.pensieveData = args.get(++i);
  505. }
  506. }
  507. }
  508. else if ( arg.equals("-bi")) {
  509. prog.useBifile = true;
  510. if ( args.size() > i+1 ) {
  511. if ( !args.get(i+1).startsWith("-") ) {
  512. prog.bifileData = args.get(++i);
  513. }
  514. }
  515. }
  516. else if ( arg.equals("-lingo24") ) {
  517. prog.useLingo24 = true;
  518. if ( args.size() > i+1 ) {
  519. if ( !args.get(i+1).startsWith("-") ) {
  520. prog.lingo24Params = args.get(++i);
  521. }
  522. }
  523. }
  524. else if ( arg.equals("-mmt") ) {
  525. prog.useMMT = true;
  526. // The URL is a mandatory parameter
  527. prog.mmtUrl = prog.getArgument(args, ++i);
  528. // The context is optional
  529. if ( args.size() > i+1 ) {
  530. if ( !args.get(i+1).startsWith("-") ) {
  531. prog.mmtContext = args.get(++i);
  532. }
  533. }
  534. }
  535. else if ( arg.endsWith("-listconf") || arg.equals("-lfc") ) {
  536. prog.command = CMD_SHOWCONFIGS;
  537. }
  538. else if ( arg.equals("-s") ) {
  539. prog.command = CMD_SEGMENTATION;
  540. prog.segRules = DEFAULT_SEGRULES;
  541. }
  542. else if ( arg.equals("-seg") ) {
  543. prog.segRules = DEFAULT_SEGRULES; // Default
  544. if ( args.size() > i+1 ) {
  545. if ( !args.get(i+1).startsWith("-") ) {
  546. prog.segRules = args.get(++i);
  547. }
  548. }
  549. }
  550. else if ( arg.equals("-trace") || arg.equals("-logger") ) {
  551. // Already set. This is just to avoid warnings about invalid parameters
  552. }
  553. else if ( arg.equals("-continue") ) {
  554. prog.abortOnFailure = false;
  555. }
  556. else if ( arg.equals("-safe") ) {
  557. prog.context.setIsNoPrompt(false);
  558. }
  559. else if ( arg.equals("-sr")) {
  560. prog.command = CMD_REPORT;
  561. }
  562. //=== Input file or error
  563. else if ( !arg.startsWith("-") ) {
  564. prog.inputs.add(args.get(i));
  565. }
  566. else {
  567. prog.showTraceHint = false; // Using trace is not helpful to the user for this error
  568. throw new InvalidParameterException(
  569. String.format("Invalid command-line argument '%s'.", args.get(i)));
  570. }
  571. }
  572. // Forgive having the extension .fprm from configuration ID if there is one
  573. if ( prog.specifiedConfigId != null ) {
  574. String cfgPath = Util.getDirectoryName(prog.specifiedConfigId);
  575. if ( !cfgPath.isEmpty() ) {
  576. prog.specifiedConfigIdPath = cfgPath;
  577. prog.specifiedConfigId = Util.getFilename(prog.specifiedConfigId, true);
  578. }
  579. if ( prog.specifiedConfigId.endsWith(FilterConfigurationMapper.CONFIGFILE_EXT) ) {
  580. prog.specifiedConfigId = Util.getFilename(prog.specifiedConfigId, false);
  581. }
  582. }
  583. // Check inputs and command
  584. if ( prog.command == -1 ) {
  585. LOGGER.warn("No command specified. Please use one of the command described below:");
  586. prog.printUsage();
  587. return;
  588. }
  589. if ( prog.command == CMD_EDITCONFIG ) {
  590. if ( prog.specifiedConfigId == null ) {
  591. prog.editAllConfigurations();
  592. }
  593. else {
  594. prog.editConfiguration();
  595. }
  596. return;
  597. }
  598. if ( prog.command == CMD_SHOWCONFIGS ) {
  599. prog.showAllConfigurations();
  600. return;
  601. }
  602. if ( prog.command == CMD_QUERYTRANS ) {
  603. prog.processQuery();
  604. return;
  605. }
  606. if ( prog.command == CMD_REPORT ) {
  607. prog.printScopingReport();
  608. return;
  609. }
  610. if ( prog.inputs.size() == 0 ) {
  611. throw new OkapiException("No input document specified.");
  612. }
  613. // Process all input files
  614. Timer timer = new Timer();
  615. int errorCount = 0;
  616. for ( int i=0; i<prog.inputs.size(); i++ ) {
  617. if ( i > 0 ) {
  618. displayDivider();
  619. }
  620. try {
  621. prog.process(prog.inputs.get(i), args);
  622. }
  623. catch ( Throwable e ) {
  624. displayError(e, showTrace, prog.showTraceHint);
  625. if ( prog.abortOnFailure ) {
  626. System.exit(1);
  627. }
  628. else {
  629. errorCount++;
  630. }
  631. }
  632. }
  633. if ( prog.inputs.size() > 1 ) {
  634. displayDivider();
  635. displaySummary(prog.inputs.size(), errorCount, timer);
  636. }
  637. }
  638. catch ( Throwable e ) {
  639. displayError(e, showTrace, prog.showTraceHint);
  640. System.exit(1); // Error
  641. }
  642. }
  643. private static void displayDivider() {
  644. LOGGER.info("------------------------------------------------------------"); //$NON-NLS-1$
  645. }
  646. private static void displaySummary (int fileCount,
  647. int errorCount,
  648. Timer t)
  649. {
  650. LOGGER.info("Files: " + fileCount + ", Errors: " + errorCount+ ", Time: " + t);
  651. }
  652. private static void displayError (Throwable e,
  653. boolean showTrace,
  654. boolean showTraceHint)
  655. {
  656. if ( showTrace ) e.printStackTrace();
  657. else {
  658. LOGGER.error(e.getMessage());
  659. Throwable e2 = e.getCause();
  660. if ( e2 != null ) LOGGER.error(e2.getMessage());
  661. if ( showTraceHint ) LOGGER.info("You can use the -trace option for more details.");
  662. }
  663. }
  664. public Main () {
  665. inputs = new ArrayList<String>();
  666. context = new ExecutionContext();
  667. context.setApplicationName("Tikal");
  668. context.setIsNoPrompt(true);
  669. }
  670. protected String getArgument (List<String> args,
  671. int index)
  672. {
  673. if ( index >= args.size() ) {
  674. showTraceHint = false; // Using trace is not helpful to the user for this error
  675. throw new OkapiException(String.format(
  676. "Missing parameter after '%s'", args.get(index-1)));
  677. }
  678. return args.get(index);
  679. }
  680. private void initialize () {
  681. // Create the mapper and load it with all parameters editor info
  682. fcMapper = new FilterConfigurationMapper();
  683. DefaultFilters.setMappings(fcMapper, false, true);
  684. // Instead create a map with extensions -> filter
  685. extensionsMap = new Hashtable<String, String>();
  686. extensionsMap.put(".docx", "okf_openxml");
  687. extensionsMap.put(".pptx", "okf_openxml");
  688. extensionsMap.put(".xlsx", "okf_openxml");
  689. extensionsMap.put(".odt", "okf_openoffice");
  690. extensionsMap.put(".swx", "okf_openoffice");
  691. extensionsMap.put(".ods", "okf_openoffice");
  692. extensionsMap.put(".swc", "okf_openoffice");
  693. extensionsMap.put(".odp", "okf_openoffice");
  694. extensionsMap.put(".sxi", "okf_openoffice");
  695. extensionsMap.put(".odg", "okf_openoffice");
  696. extensionsMap.put(".sxd", "okf_openoffice");
  697. extensionsMap.put(".htm", "okf_html");
  698. extensionsMap.put(".html", "okf_html");
  699. extensionsMap.put(".xlf", "okf_xliff");
  700. extensionsMap.put(".xlif", "okf_xliff");
  701. extensionsMap.put(".xliff", "okf_xliff");
  702. extensionsMap.put(".tmx", "okf_tmx");
  703. extensionsMap.put(".properties", "okf_properties");
  704. extensionsMap.put(".lang", "okf_properties-skypeLang");
  705. extensionsMap.put(".po", "okf_po");
  706. extensionsMap.put(".xml", "okf_xml");
  707. extensionsMap.put(".resx", "okf_xml-resx");
  708. extensionsMap.put(".srt", "okf_regex-srt");
  709. extensionsMap.put(".dtd", "okf_dtd");
  710. extensionsMap.put(".ent", "okf_dtd");
  711. extensionsMap.put(".ts", "okf_ts");
  712. extensionsMap.put(".txt", "okf_plaintext");
  713. extensionsMap.put(".csv", "okf_table_csv");
  714. extensionsMap.put(".ttx", "okf_ttx");
  715. extensionsMap.put(".json", "okf_json");
  716. extensionsMap.put(".pentm", "okf_pensieve");
  717. extensionsMap.put(".yml", "okf_yaml");
  718. extensionsMap.put(".idml", "okf_idml");
  719. extensionsMap.put(".mif", "okf_mif");
  720. extensionsMap.put(".txp", "okf_transifex");
  721. extensionsMap.put(".rtf", "okf_tradosrtf");
  722. extensionsMap.put(".zip", "okf_archive");
  723. extensionsMap.put(".txml", "okf_txml");
  724. extensionsMap.put(".md", "okf_markdown");
  725. if ( specifiedConfigIdPath != null ) {
  726. fcMapper.setCustomConfigurationsDirectory(specifiedConfigIdPath);
  727. }
  728. loadFromPluginsAndUpdate();
  729. }
  730. private String getConfigurationId (String ext) {
  731. // Get the configuration for the extension
  732. String id = extensionsMap.get(ext);
  733. if ( id == null ) {
  734. throw new OkapiException(String.format(
  735. "Could not guess the configuration for the extension '%s'", ext));
  736. }
  737. return id;
  738. }
  739. private void editAllConfigurations () {
  740. initialize();
  741. guessMissingLocales(null);
  742. // Add the custom configurations
  743. fcMapper.updateCustomConfigurations();
  744. // Edit
  745. try {
  746. // Invoke the editor using dynamic instantiation so we can compile non-UI distributions
  747. IFilterConfigurationListEditor editor =
  748. (IFilterConfigurationListEditor)Class.forName("net.sf.okapi.common.ui.filters.FilterConfigurationEditor").newInstance();
  749. // Call the editor
  750. editor.editConfigurations(fcMapper);
  751. }
  752. catch ( InstantiationException e ) {
  753. throw new OkapiException(MSG_ONLYWITHUICOMP);
  754. }
  755. catch ( IllegalAccessException e ) {
  756. throw new OkapiException(MSG_ONLYWITHUICOMP);
  757. }
  758. catch ( ClassNotFoundException e ) {
  759. throw new OkapiException(MSG_ONLYWITHUICOMP);
  760. }
  761. }
  762. private void editConfiguration () {
  763. initialize();
  764. guessMissingLocales(null);
  765. if ( specifiedConfigId == null ) {
  766. throw new OkapiException("You must specified the configuration to edit.");
  767. }
  768. configId = specifiedConfigId;
  769. if ( !prepareFilter(configId) ) return; // Next input
  770. try {
  771. // Invoke the editor using dynamic instantiation so we can compile non-UI distributions
  772. IFilterConfigurationEditor editor =
  773. (IFilterConfigurationEditor)Class.forName("net.sf.okapi.common.ui.filters.FilterConfigurationEditor").newInstance();
  774. // Call the editor
  775. editor.editConfiguration(configId, fcMapper);
  776. }
  777. catch ( InstantiationException e ) {
  778. throw new OkapiException(MSG_ONLYWITHUICOMP);
  779. }
  780. catch ( IllegalAccessException e ) {
  781. throw new OkapiException(MSG_ONLYWITHUICOMP);
  782. }
  783. catch ( ClassNotFoundException e ) {
  784. throw new OkapiException(MSG_ONLYWITHUICOMP);
  785. }
  786. }
  787. private void showAllConfigurations () {
  788. initialize();
  789. // Add the custom configurations
  790. fcMapper.updateCustomConfigurations();
  791. LOGGER.info("List of all filter configurations available:");
  792. Iterator<FilterConfiguration> iter = fcMapper.getAllConfigurations();
  793. FilterConfiguration config;
  794. while ( iter.hasNext() ) {
  795. config = iter.next();
  796. LOGGER.info(" - {} = {}", config.configId, config.description);
  797. }
  798. }
  799. private boolean prepareFilter (String configId) {
  800. // Is it a default configuration?
  801. if (fcMapper.getConfiguration(configId) != null) {
  802. return true;
  803. }
  804. // Else: Try to find the filter for that configuration
  805. Iterator<FilterConfiguration> configs = fcMapper.getAllConfigurations();
  806. while (configs.hasNext()) {
  807. FilterConfiguration fc = configs.next();
  808. if (configId.startsWith(fc.configId)) {
  809. // If the given configuration is not one of the pre-defined
  810. if ( fcMapper.getConfiguration(configId) == null ) {
  811. // Assume it is a custom one
  812. fcMapper.addCustomConfiguration(configId);
  813. }
  814. return true;
  815. }
  816. }
  817. // Could not guess
  818. LOGGER.error("Could not guess the filter for the configuration '{}'", configId);
  819. return false;
  820. }
  821. private void loadFromPluginsAndUpdate () {
  822. // Discover and add plug-ins
  823. PluginsManager mgt = new PluginsManager();
  824. mgt.discover(new File(getAppRootDirectory()+File.separator+"dropins"), true);
  825. fcMapper.addFromPlugins(mgt);
  826. }
  827. private void guessMissingLocales (String inputPath) {
  828. // If both locales are already set: just use those
  829. if (( srcLoc != null ) && ( trgLoc != null )) return;
  830. // Try to see if we can get one or both from the input file
  831. if ( inputPath != null ) {
  832. List<String> guessed = FileUtil.guessLanguages(inputPath);
  833. if ( guessed.size() > 0 ) {
  834. if ( srcLoc == null ) {
  835. srcLoc = LocaleId.fromString(guessed.get(0));
  836. }
  837. if ( guessed.size() > 1 ) {
  838. if ( trgLoc == null ) {
  839. trgLoc = LocaleId.fromString(guessed.get(1));
  840. }
  841. }
  842. }
  843. }
  844. // Make sure we do have a source
  845. if ( srcLoc == null ) {
  846. srcLoc = LocaleId.fromString("en");
  847. }
  848. // Make sure we do have a target
  849. if ( trgLoc == null ) {
  850. trgLoc = new LocaleId(Locale.getDefault());
  851. if ( trgLoc.sameLanguageAs(srcLoc) ) {
  852. trgLoc = LocaleId.fromString("fr");
  853. }
  854. }
  855. }
  856. private void guessMissingParameters (String inputOfConfig) {
  857. if ( specifiedConfigId == null ) {
  858. String ext = Util.getExtension(inputOfConfig);
  859. if ( Util.isEmpty(ext) ) {
  860. throw new OkapiException(String.format(
  861. "The input file '%s' has no extension to guess the filter from.", inputOfConfig));
  862. }
  863. configId = getConfigurationId(ext.toLowerCase());
  864. }
  865. else {
  866. configId = specifiedConfigId;
  867. }
  868. guessMissingEncodings();
  869. }
  870. private void guessMissingEncodings () {
  871. if ( outputEncoding == null ) {
  872. if ( inputEncoding != null ) outputEncoding = inputEncoding;
  873. else outputEncoding = Charset.defaultCharset().name();
  874. }
  875. if ( inputEncoding == null ) {
  876. inputEncoding = Charset.defaultCharset().name();
  877. }
  878. }
  879. String pathChangeFolder (String newFolder,
  880. String oldPath)
  881. {
  882. String result;
  883. if ( newFolder == null ) {
  884. result = oldPath;
  885. }
  886. else {
  887. File file = new File(newFolder, Util.getFilename(oldPath, true));
  888. result = file.toString();
  889. }
  890. return result;
  891. }
  892. String pathInsertOutBeforeExt(String oldPath) {
  893. String ext = Util.getExtension(oldPath);
  894. int n = oldPath.lastIndexOf('.');
  895. if (n == -1) {
  896. // Filename with no extension
  897. return oldPath + ".out";
  898. }
  899. return oldPath.substring(0, n) + ".out" + ext; //$NON-NLS-1$
  900. }
  901. private void guessMergingArguments (String input) {
  902. String ext = Util.getExtension(input);
  903. if ( !ext.equals(".xlf") ) {
  904. throw new OkapiException(String.format(
  905. "The input file '%s' does not have the expected .xlf extension.", input));
  906. }
  907. int n = input.lastIndexOf('.');
  908. skeleton = input.substring(0, n);
  909. if ( outputDir == null ) {
  910. output = pathInsertOutBeforeExt(skeleton);
  911. }
  912. else {
  913. output = pathChangeFolder(outputDir, skeleton);
  914. }
  915. skeleton = pathChangeFolder(skeletonDir, skeleton);
  916. }
  917. private void guessMergingMosesArguments (String input) {
  918. // Main input is the original file, not the Moses file
  919. // The Moses file is specified with -from or null
  920. if ( !Util.isEmpty(mosesFromPathParam) ) {
  921. mosesFromPath = mosesFromPathParam;
  922. }
  923. else {
  924. // We guess the Moses filename:
  925. mosesFromPath = input + "."+trgLoc.toString();
  926. }
  927. // Output path
  928. if ( !Util.isEmpty(mosesToPathParam) ) {
  929. output = mosesToPathParam;
  930. }
  931. else {
  932. output = pathInsertOutBeforeExt(input);
  933. }
  934. }
  935. protected void process (String input, List<String> args) throws URISyntaxException {
  936. Timer timer = new Timer();
  937. initialize();
  938. // Add the custom configurations
  939. fcMapper.updateCustomConfigurations();
  940. File file;
  941. switch ( command ) {
  942. case CMD_TRANSLATE:
  943. LOGGER.info("Translation");
  944. guessMissingParameters(input);
  945. if ( !prepareFilter(configId) ) return; // Next input
  946. guessMissingLocales(input);
  947. file = new File(input);
  948. try (RawDocument rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);) {
  949. rd.setFilterConfigId(configId);
  950. translateFile(rd);
  951. }
  952. break;
  953. case CMD_SEGMENTATION:
  954. LOGGER.info("Segmentation");
  955. guessMissingParameters(input);
  956. if ( !prepareFilter(configId) ) return; // Next input
  957. guessMissingLocales(input);
  958. file = new File(input);
  959. try (RawDocument rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);) {
  960. rd.setFilterConfigId(configId);
  961. segmentFile(rd);
  962. }
  963. break;
  964. case CMD_EXTRACT:
  965. LOGGER.info("Extraction");
  966. guessMissingParameters(input);
  967. if ( !prepareFilter(configId) ) return; // Next input
  968. guessMissingLocales(input);
  969. file = new File(input);
  970. try (RawDocument rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);) {
  971. rd.setFilterConfigId(configId);
  972. extractFile(rd);
  973. }
  974. break;
  975. case CMD_EXTRACTTOMOSES:
  976. LOGGER.info("Extraction to Moses InlineText");
  977. guessMissingParameters(input);
  978. if ( !prepareFilter(configId) ) return; // Next input
  979. guessMissingLocales(input);
  980. file = new File(input);
  981. try (RawDocument rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);) {
  982. rd.setFilterConfigId(configId);
  983. extractFileToMoses(rd);
  984. }
  985. break;
  986. case CMD_MERGE:
  987. LOGGER.info("Merging");
  988. guessMergingArguments(input);
  989. guessMissingLocales(input);
  990. guessMissingParameters(skeleton);
  991. if ( !prepareFilter(configId) ) return; // Next input
  992. // Merge with a different version of filters or other external classes
  993. if (jarPath != null || jarVersion != null || autoJarVersion) {
  994. versionManager = new VersionManager();
  995. if ( configPath != null ) {
  996. LOGGER.info("Path to the versions configuration file to use: {}", configPath);
  997. versionManager.load(FileUtil.fileToUrl(new File(configPath)));
  998. }
  999. if ( jarPath != null ) {
  1000. LOGGER.info("Path to the library version to use: {}", jarPath);
  1001. versionManager.loadVersion(FileUtil.fileToUrl(new File(jarPath)));
  1002. }
  1003. else if ( jarVersion != null ) {
  1004. LOGGER.info("Library version to use: {}", jarVersion);
  1005. versionManager.loadVersion(jarVersion);
  1006. }
  1007. if ( autoJarVersion ) {
  1008. LOGGER.info("The library version to use will be auto-detected");
  1009. versionManager.loadVersion(new File(input));
  1010. }
  1011. String appLibPath = versionManager.getPath();
  1012. // Prepare child process CL parameters
  1013. List<String> params = new ArrayList<String>();
  1014. params.add("java");
  1015. // XXX External class loader implementation is not finished, don't remove the commented out block
  1016. if (useExternalClassLoader) {
  1017. params.add("-cp");
  1018. params.add(System.getProperty("java.class.path"));
  1019. params.add("-Djava.system.class.loader=net.sf.okapi.lib.tkit.jarswitcher.VMClassLoader");
  1020. params.add(this.getClass().getName());
  1021. }
  1022. else {
  1023. params.add("-jar");
  1024. // Convert to system-specific path
  1025. appLibPath = new File(versionManager.getPath()).getAbsolutePath();
  1026. params.add(appLibPath);
  1027. }
  1028. // Collect params for the recursive main() as its args
  1029. params.add("-m");
  1030. params.add("-trace");
  1031. for ( int i=0; i<args.size(); i++ ) {
  1032. String arg = args.get(i);
  1033. if (MERGE_PARAMS.contains(arg)) {
  1034. // All merge params are duple
  1035. params.add(arg);
  1036. params.add(getArgument(args, ++i));
  1037. }
  1038. }
  1039. params.add(input); // One input at this iteration of process()
  1040. LOGGER.info("Starting child process:\n" + ListUtil.listAsString(params, " "));
  1041. ProcessBuilder pb = new ProcessBuilder(params);
  1042. pb.redirectOutput(Redirect.INHERIT);
  1043. pb.redirectError(Redirect.INHERIT);
  1044. try {
  1045. Process p = pb.start();
  1046. if (useExternalClassLoader) {
  1047. DataOutputStream dos = new DataOutputStream(p.getOutputStream());
  1048. dos.writeUTF(appLibPath);
  1049. dos.writeUTF(ClassUtil.getQualifiedClassName(this)); // appRootName
  1050. dos.writeUTF(ClassUtil.getPath(this.getClass()));
  1051. dos.writeUTF(ClassUtil.getClassFilePath(this.getClass()));
  1052. dos.flush(); // Pass data to VMClassLoader
  1053. }
  1054. } catch (IOException e) {
  1055. throw new OkapiIOException(e);
  1056. }
  1057. break;
  1058. }
  1059. LOGGER.info("Source language: {}", srcLoc);
  1060. LOGGER.info("Target language: {}", trgLoc);
  1061. LOGGER.info("Default input encoding: {}", inputEncoding);
  1062. LOGGER.info("Output encoding: {}", outputEncoding);
  1063. LOGGER.info("Filter configuration: {}", configId);
  1064. LOGGER.info("XLIFF: {}", input);
  1065. LOGGER.info("Output: {}", (output==null) ? "<auto-defined>" : output);
  1066. // original document aka "skeleton"
  1067. RawDocument originalDoc = new RawDocument(Util.toURI(skeleton), inputEncoding, srcLoc, trgLoc);
  1068. originalDoc.setFilterConfigId(configId);
  1069. IPipelineDriver driver = new PipelineDriver();
  1070. driver.setFilterConfigurationMapper(fcMapper);
  1071. BatchItemContext bic = new BatchItemContext(
  1072. // xliff RawDocument
  1073. new RawDocument(Util.toURI(input), StandardCharsets.UTF_8.name(), srcLoc, trgLoc),
  1074. Util.toURI(output),
  1075. outputEncoding,
  1076. originalDoc);
  1077. driver.addBatchItem(bic);
  1078. driver.addStep(new OriginalDocumentXliffMergerStep());
  1079. driver.addStep(new RawDocumentWriterStep());
  1080. driver.processBatch();
  1081. driver.destroy();
  1082. break;
  1083. case CMD_LEVERAGEMOSES:
  1084. LOGGER.info("Merging Moses InlineText");
  1085. guessMissingLocales(input);
  1086. guessMergingMosesArguments(input);
  1087. guessMissingParameters(input);
  1088. if ( !prepareFilter(configId) ) return; // Next input
  1089. file = new File(input);
  1090. try (RawDocument rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc, configId);) {
  1091. leverageFileWithMoses(rd);
  1092. }
  1093. break;
  1094. case CMD_CONV2PO:
  1095. case CMD_CONV2TMX:
  1096. case CMD_CONV2PEN:
  1097. case CMD_CONV2TABLE:
  1098. if ( command == CMD_CONV2PO ) {
  1099. LOGGER.info("Conversion to PO");
  1100. }
  1101. else if ( command == CMD_CONV2TMX ) {
  1102. LOGGER.info("Conversion to TMX");
  1103. }
  1104. else if ( command == CMD_CONV2TABLE ) {
  1105. LOGGER.info("Conversion to Table");
  1106. }
  1107. else {
  1108. LOGGER.info("Importing to Pensieve TM");
  1109. }
  1110. guessMissingParameters(input);
  1111. if ( !prepareFilter(configId) ) return; // Next input
  1112. guessMissingLocales(input);
  1113. file = new File(input);
  1114. String output = input;
  1115. if ( command == CMD_CONV2PO ) {
  1116. output += ".po";
  1117. }
  1118. else if ( command == CMD_CONV2TMX ) {
  1119. output += ".tmx";
  1120. }
  1121. else if ( command == CMD_CONV2TABLE) {
  1122. output += ".txt";
  1123. }
  1124. else { // Pensieve
  1125. output = checkPensieveDirExtension();
  1126. }
  1127. URI outputURI = new File(output).toURI();
  1128. try (RawDocument rd = new RawDocument(file.toURI(), inputEncoding, srcLoc, trgLoc);) {
  1129. rd.setFilterConfigId(configId);
  1130. LOGGER.info("Source language: {}", srcLoc);
  1131. LOGGER.info("Target language: {}", trgLoc);
  1132. LOGGER.info("Default input encoding: {}", inputEncoding);
  1133. LOGGER.info("Filter configuration: {}", configId);
  1134. LOGGER.info("Output: {}", output);
  1135. convertFile(rd, outputURI);
  1136. }
  1137. break;
  1138. }
  1139. LOGGER.info("Done in " + timer);
  1140. }
  1141. private void printBanner () {
  1142. if (getClass().getPackage() == null) return;
  1143. LOGGER.info("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  1144. LOGGER.info("Okapi Tikal - Localization Toolset");
  1145. // The version will show as 'null' until the code is build as a JAR.
  1146. LOGGER.info("Version: {}", getClass().getPackage().getImplementationVersion());
  1147. LOGGER.info("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  1148. }
  1149. private void showInfo () {
  1150. Runtime rt = Runtime.getRuntime();
  1151. rt.runFinalization();
  1152. rt.gc();
  1153. LOGGER.info("Java version: {}", System.getProperty("java.version")); //$NON-NLS-1$
  1154. LOGGER.info("Platform: {}, {}, {}",
  1155. System.getProperty("os.name"), //$NON-NLS-1$
  1156. System.getProperty("os.arch"), //$NON-NLS-1$
  1157. System.getProperty("os.version")); //$NON-NLS-1$
  1158. NumberFormat nf = NumberFormat.getInstance();
  1159. LOGGER.info("Java VM memory: free={} KB, total={} KB", //$NON-NLS-1$
  1160. nf.format(rt.freeMemory()/1024),
  1161. nf.format(rt.totalMemory()/1024));
  1162. LOGGER.info("Tikal display encoding: {}", getConsoleEncodingName());
  1163. LOGGER.info("-------------------------------------------------------------------------------"); //$NON-NLS-1$
  1164. }
  1165. private String getAppRootDirectory () {
  1166. try {
  1167. // Old code
  1168. // URL url = getClass().getProtectionDomain().getCodeSource().getLocation();
  1169. // String path = new File(url.toURI()).getCanonicalPath();
  1170. // return Util.getDirectoryName(Util.getDirectoryName(path));
  1171. // Get the location of the main class source
  1172. LOGGER.debug("1 " + getClass());
  1173. LOGGER.debug("2 " + getClass().getProtectionDomain());
  1174. LOGGER.debug("3 " + getClass().getProtectionDomain().getCodeSource());
  1175. LOGGER.debug("4 " + getClass().getProtectionDomain().getCodeSource().getLocation());
  1176. LOGGER.debug("5 " + getClass().getProtectionDomain().getCodeSource().getLocation().getFile());
  1177. File file = new File(getClass().getProtectionDomain().getCodeSource().getLocation().getFile());
  1178. String appRootFolder = URLDecoder.decode(file.getAbsolutePath(),"utf-8"); //$NON-NLS-1$
  1179. // Remove the JAR file if running an installed version
  1180. boolean fromJar = appRootFolder.endsWith(".jar");
  1181. if ( fromJar ) appRootFolder = Util.getDirectoryName(appRootFolder);
  1182. // Remove the application folder in all cases
  1183. return Util.getDirectoryName(appRootFolder);
  1184. }
  1185. catch ( IOException e ) {
  1186. throw new OkapiIOException(e);
  1187. }
  1188. }
  1189. private void showHelp () throws MalformedURLException {
  1190. Util.openWikiTopic("Tikal");
  1191. }
  1192. private void printUsage () {
  1193. LOGGER.info("Shows this screen: -?");
  1194. LOGGER.info("Shows version and other information: -i or --info");
  1195. LOGGER.info("Opens the user guide page: -h or --help");
  1196. LOGGER.info("Lists all available filter configurations: -lfc or --listconf");
  1197. LOGGER.info("Outputs all messages to the current logger instead of the console: -logger");
  1198. LOGGER.info("Outputs debug messages when in console mode (no effect on logger): -trace");
  1199. LOGGER.info("Does not abort batch processing in case of individual errors: -continue");
  1200. LOGGER.info("Edits or view filter configurations (UI-dependent command):");
  1201. LOGGER.info(" -e [[-fc] configId] [-pd configDirectory]");
  1202. LOGGER.info("Extracts a file to XLIFF (and optionally segment and pre-translate):");
  1203. LOGGER.info(" -x[1] inputFile [inputFile2...] [-fc configId] [-ie encoding] [-sl srcLang]");
  1204. LOGGER.info(" [-tl trgLang] [-seg [srxFile]] [-tt [url]|-mm [key]");
  1205. LOGGER.info(" |-pen tmDirectory|-gs configFile|-apertium [configFile]|-mmt url [context]");
  1206. LOGGER.info(" |-ms configFile|-tda configFile|-gg configFile|-bi bilingFile|-lingo24 configFile]");
  1207. LOGGER.info(" [-maketmx [tmxFile]] [-opt threshold] [-od outputDirectory]");
  1208. LOGGER.info(" [-rd rootDirectory] [-nocopy] [-noalttrans] [-pd configDirectory]");
  1209. LOGGER.info(" -x and -x1: use original file for merge");
  1210. LOGGER.info("Merges an XLIFF document back to its original format:");
  1211. LOGGER.info(" -m[1] xliffFile [xliffFile2...] [-fc configId] [-ie encoding] [-oe encoding]");
  1212. LOGGER.info(" [-sd sourceDirectory] [-od outputDirectory] [-pd configDirectory]");
  1213. LOGGER.info(" [-sl srcLang] [-tl trgLang] [-av|-v <libVersion>|-vp <jarPath>]|-vc <configPath>");
  1214. LOGGER.info(" -m and -m1: use original file for merge");
  1215. LOGGER.info("Translates a file:");
  1216. LOGGER.info(" -t inputFile [inputFile2...] [-fc configId] [-ie encoding] [-oe encoding]");
  1217. LOGGER.info(" [-sl srcLang] [-tl trgLang] [-seg [srxFile]] [-tt [url]");
  1218. LOGGER.info(" |-mm [key]|-pen tmDirectory|-gs configFile|-apertium [configFile]|-mmt url [context]");
  1219. LOGGER.info(" |-ms configFile|-tda configFile|-gg configFile|-bi bilingFile|-lingo24 [configFile]");
  1220. LOGGER.info(" [-maketmx [tmxFile]] [-opt threshold] [-pd configDirectory]");
  1221. LOGGER.info(" [-rd rootDirectory]");
  1222. LOGGER.info("Extracts a file to Moses InlineText:");
  1223. LOGGER.info(" -xm inputFile [-fc configId] [-ie encoding] [-seg [srxFile]] [-2]");
  1224. LOGGER.info(" [-sl srcLang] [-tl trgLang] [-rd rootDirectory]");
  1225. LOGGER.info(" [-to srcOutputFile] (single input only)");
  1226. LOGGER.info("Leverages a file with Moses InlineText:");
  1227. LOGGER.info(" -lm inputFile [-fc configId] [-ie encoding] [-oe encoding] [-sl srcLang]");
  1228. LOGGER.info(" [-tl trgLang] [-seg [srxFile]] [-totrg|-overtrg] [-bpt]");
  1229. LOGGER.info(" [-rd rootDirectory] [-noalttrans]");
  1230. LOGGER.info(" [-from mosesFile] [-to outputFile] (single input only)");
  1231. LOGGER.info("Segments a file:");
  1232. LOGGER.info(" -s inputFile [-fc configId] [-ie encoding] [-rd rootDirectory]");
  1233. LOGGER.info(" [-sl srcLang] [-tl trgLang] [-seg [srxFile]] [-pd configDirectory]");
  1234. LOGGER.info("Queries translation resources:");
  1235. LOGGER.info(" -q \"source text\" [-sl srcLang] [-tl trgLang]");
  1236. LOGGER.info(" [-tt [url]] [-mm [key]] [-pen tmDirectory] [-gs configFile] [-mmt url [context]]");
  1237. LOGGER.info(" [-apertium [configFile]] [-ms configFile] [-tda configFile] [-lingo24 [configFile]]");
  1238. LOGGER.info(" [-gg configFile] [-bi bilingFile] [-lingo24 configFile] [-opt threshold[:maxhits]]");
  1239. //LOGGER.info("Adds translation to a resources:");
  1240. //LOGGER.info(" -a \"source text\" \"target text\" [rating] [-sl srcLang] [-tl trgLang]");
  1241. //LOGGER.info(" -ms configFile");
  1242. LOGGER.info("Converts to PO format:");
  1243. LOGGER.info(" -2po inputFile [inputFile2...] [-fc configId] [-ie encoding] [-all]");
  1244. LOGGER.info(" [-generic] [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty]");
  1245. LOGGER.info(" [-rd rootDirectory] [-pd configDirectory] [-approved]");
  1246. LOGGER.info("Converts to TMX format:");
  1247. LOGGER.info(" -2tmx inputFile [inputFile2...] [-fc configId] [-ie encoding] [-all]");
  1248. LOGGER.info(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty] [-rd rootDirectory]");
  1249. LOGGER.info(" [-pd configDirectory] [-approved]");
  1250. LOGGER.info("Converts to table format:");
  1251. LOGGER.info(" -2tbl inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1252. LOGGER.info(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty] [-csv|-tab]");
  1253. LOGGER.info(" [-xliff|-xliffgx|-tmx|-generic] [-all] [-rd rootDirectory]");
  1254. LOGGER.info(" [-pd configDirectory] [-approved]");
  1255. LOGGER.info("Imports to Pensieve TM:");
  1256. LOGGER.info(" -imp tmDirectory inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1257. LOGGER.info(" [-sl srcLang] [-tl trgLang] [-trgsource|-trgempty] [-all] [-over]");
  1258. LOGGER.info(" [-rd rootDirectory] [-pd configDirectory] [-approved]");
  1259. LOGGER.info("Exports Pensieve TM as TMX:");
  1260. LOGGER.info(" -exp tmDirectory1 [tmDirectory2...] [-sl srcLang] [-tl trgLang]");
  1261. LOGGER.info(" [-trgsource|-trgempty] [-all]");
  1262. LOGGER.info("Prints a Scoping Report:");
  1263. LOGGER.info(" -sr inputFile [inputFile2...] [-fc configId] [-ie encoding]");
  1264. LOGGER.info(" [-sl srcL…

Large files files are truncated, but you can click here to view the full file