PageRenderTime 58ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/tmispell-voikko-0.7.1/src/options.cc

#
C++ | 451 lines | 319 code | 66 blank | 66 comment | 118 complexity | 395969169d9e8e25eadc63f29ba46983 MD5 | raw file
Possible License(s): GPL-2.0
  1. /* Copyright (C) Pauli Virtanen
  2. *
  3. * This program is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU General Public License
  5. * as published by the Free Software Foundation; either version 2
  6. * of the License, or (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this program; if not, write to the Free Software
  15. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. *********************************************************************************/
  17. /**
  18. * @file options.cc
  19. * @author Pauli Virtanen
  20. *
  21. * Parse command line options and assume sensible defaults.
  22. */
  23. #include <cstdlib>
  24. #include <cstring>
  25. #include <string>
  26. #include <iostream>
  27. #include <sstream>
  28. #include <vector>
  29. #include "i18n.hh"
  30. #include "common.hh"
  31. #include "regexp.hh"
  32. #include "config.hh"
  33. #include "options.hh"
  34. #include "tmerror.hh"
  35. using std::string;
  36. using std::vector;
  37. using std::pair;
  38. using std::istringstream;
  39. /** Return the user's home directory */
  40. string get_home_dir()
  41. {
  42. char const* homedir = getenv("HOME");
  43. if (homedir)
  44. return string(homedir);
  45. else
  46. return string();
  47. }
  48. /**
  49. * Deduce the filter needed by the given file by the file extension.
  50. */
  51. Options::FilterType Options::guess_file_filter(string const& filename)
  52. {
  53. static RegExp::Flag const rf = RegExp::EXTENDED | RegExp::ICASE
  54. | RegExp::NOSUB;
  55. static RegExp roff_re("\\.(ms|mm|me|man)$", rf);
  56. static RegExp tex_re("\\.(tex)$", rf);
  57. static RegExp html_re("\\.(htm|html|sgml)$", rf);
  58. if (roff_re.match(filename)) {
  59. return nroff;
  60. } else if (tex_re.match(filename)) {
  61. return tex;
  62. } else if (html_re.match(filename)) {
  63. return sgml;
  64. } else {
  65. return plain;
  66. }
  67. }
  68. /**
  69. * Print usage information
  70. */
  71. void Options::print_usage()
  72. {
  73. std::cout << ssprintf(
  74. _("Usage: %s [options] [file]...\n"
  75. "Options: [FMNLVlfsaAtnhgbxBCPmSdpwWTv]\n"
  76. "\n"
  77. " -F <file> Use given file as the configuration file.\n"
  78. "\n"
  79. "The following flags are same for ispell:\n"
  80. " -v[v] Print version number and exit.\n"
  81. " -M One-line mini menu at the bottom of the screen.\n"
  82. " -N No mini menu at the bottom of the screen.\n"
  83. " -L <num> Number of context lines.\n"
  84. " -V Use \"cat -v\" style for characters not in the 7-bit ANSI\n"
  85. " character set.\n"
  86. " -l Only output a list of misspelled words.\n"
  87. " -f <file> Specify the output file.\n"
  88. " -s Issue SIGTSTP at every end of line.\n"
  89. " -a Read commands.\n"
  90. " -A Read commands and enable a command to include a file.\n"
  91. " -e[e1234] Expand affixes.\n"
  92. " -c Compress affixes.\n"
  93. " -D Dump affix tables.\n"
  94. " -t The input is in TeX format.\n"
  95. " -n The input is in [nt]roff format.\n"
  96. " -h The input is in sgml format.\n"
  97. " -b Create backup files.\n"
  98. " -x Do not create backup files.\n"
  99. " -B Do not allow run-together words.\n"
  100. " -C Allow run-together words.\n"
  101. " -P Do not generate extra root/affix combinations.\n"
  102. " -m Allow root/affix combinations that are not in dictionary.\n"
  103. " -S Sort the list of guesses by probable correctness.\n"
  104. " -d <dict> Specify an alternate dictionary file.\n"
  105. " -p <file> Specify an alternate personal dictionary.\n"
  106. " -w <chars> Specify additional characters that can be part of a word.\n"
  107. " -W <len> Consider words shorter than this always correct.\n"
  108. " -T <fmt> Assume a given formatter type for all files.\n"
  109. " -r <cset> Specify the character set of the input.\n"),
  110. PACKAGE) << std::endl;
  111. }
  112. /**
  113. * Print version information to cout.
  114. */
  115. static void print_version()
  116. {
  117. // This should not be localized: some programs look for this.
  118. std::cout << "@(#) International Ispell Version 3.1.20 compatible "
  119. << PACKAGE_STRING
  120. << std::endl;
  121. }
  122. /**
  123. * Form the ispell argv.
  124. * The array is newly allocated, but its contents are constant, and
  125. * should not be modified.
  126. */
  127. typedef char const* argv_t;
  128. char const** Options::get_ispell_argv(string const& prog_name) const
  129. {
  130. char const** argv = new argv_t[ispell_args_.size() + 2];
  131. int i = 0;
  132. argv[i++] = prog_name.c_str();
  133. vector<string>::const_iterator it;
  134. for (it = ispell_args_.begin(); it != ispell_args_.end(); ++it)
  135. {
  136. argv[i++] = it->c_str();
  137. }
  138. argv[i] = 0;
  139. return argv;
  140. }
  141. /**
  142. * Extract the main dictionary's identifier from the hash file name as given to
  143. * ispell. This implementation returns the part of the file name between the
  144. * last '/' and '.'.
  145. *
  146. * Example: /usr/lib/ispell/americanmed+.hash => americanmed+
  147. */
  148. static string extract_dictionary_identifier(string const& hash_file_name)
  149. {
  150. RegExp re1 = RegExp("([^/]*)\\.hash$", RegExp::ICASE|RegExp::EXTENDED);
  151. RegExp re2 = RegExp("([^/]*)$", RegExp::ICASE|RegExp::EXTENDED);
  152. if (re1.match(hash_file_name)) {
  153. return re1.sub(hash_file_name, 1);
  154. } else if (re2.match(hash_file_name)) {
  155. return re2.sub(hash_file_name, 1);
  156. } else {
  157. return hash_file_name;
  158. }
  159. }
  160. /**
  161. * Recognizes options and extracts arguments from the program argument list.
  162. * Arguments are directly concatenated to options or in the following argv
  163. * entry. For example: -d<argument> or -d <argument>
  164. */
  165. class OptionParser
  166. {
  167. public:
  168. OptionParser(int argc, char* const* argv)
  169. : argc_(argc), argv_(argv), i_(1), argument_in_next_(false)
  170. {}
  171. void next_option() {
  172. ++i_;
  173. if (argument_in_next_) {
  174. argument_in_next_ = false;
  175. ++i_;
  176. }
  177. }
  178. bool has_next_option() const { return i_ < argc_; }
  179. bool is_option(char const* opt=0, char const** arg = 0);
  180. void push_to(vector<string>& args) const
  181. {
  182. args.push_back(argv_[i_]);
  183. if (argument_in_next_)
  184. args.push_back(argv_[i_ + 1]);
  185. }
  186. char const* opt() const { return argv_[i_]; }
  187. private:
  188. int argc_;
  189. char* const* argv_;
  190. int i_;
  191. bool argument_in_next_;
  192. };
  193. bool OptionParser::is_option(char const* opt, char const** arg_p)
  194. {
  195. char const* s = argv_[i_];
  196. if (opt == 0) {
  197. if (s[0] == '-')
  198. return true;
  199. else
  200. return false;
  201. }
  202. int opt_len = strlen(opt);
  203. if (strncmp(s, opt, opt_len) != 0) return false;
  204. char const* arg = (s + opt_len);
  205. if (arg_p == 0) {
  206. // If the option should not have an argument, then
  207. // the whole argv[i] must be the option.
  208. return *arg == '\0';
  209. }
  210. if (*arg == '\0') {
  211. // The argument is in the next argv position
  212. argument_in_next_ = true;
  213. if (i_ + 1 >= argc_) {
  214. throw Error(_("Missing argument for option %s"), opt);
  215. }
  216. *arg_p = argv_[i_ + 1];
  217. } else {
  218. argument_in_next_ = false;
  219. *arg_p = arg;
  220. }
  221. return true;
  222. }
  223. /**
  224. * Parse the given command line arguments, and assume sensible defaults.
  225. *
  226. * Some options understood by the real ispell are ignored, because they
  227. * are irrelevant to the spell checking engine used by this program.
  228. */
  229. Options::Options(int argc, char* const* argv)
  230. : mode_(normal), // Normal operation mode
  231. backups_(true), // Backup files
  232. pipe_include_command_(), // Pipe mode include command disabled
  233. sigstop_at_eol_(false), // No SIGTSTOP needed
  234. dictionary_("default"), // Use default dictionary
  235. personal_dictionary_(), // Default determined later
  236. spellchecker_entry_(0), // No active spell checker by default
  237. extra_word_characters_(), // No extra word characters by default
  238. legal_word_length_(0), // All strings of word characters are words
  239. default_filter_(plain), // Use plain text filter
  240. files_(), // No files to check
  241. ansi7_(false), // No 7 bit ANSI
  242. mini_menu_(true), // Mini menu enabled
  243. context_lines_(-1), // Number of context lines by screen size
  244. output_file_(), // Output to stdout
  245. config_file_(CONFIG_FILE), // Default configuration file
  246. user_encoding_(),
  247. ispell_args_()
  248. {
  249. FilterType next_filter = plain;
  250. FilterType default_filter = plain;
  251. bool default_filter_set = false;
  252. bool next_filter_set = false;
  253. for (OptionParser p(argc, argv);
  254. p.has_next_option();
  255. p.next_option()) {
  256. const char* arg;
  257. if (p.is_option("-F", &arg)) { // Configuration file
  258. config_file_ = arg;
  259. continue; // This argument will not be passed to ispell
  260. } else if (p.is_option("-v") ||
  261. p.is_option("--version")) { // Print version
  262. print_version();
  263. mode_ = quit;
  264. return;
  265. } else if (p.is_option("-vv")) { // Print extra information
  266. mode_ = ispell;
  267. } else if (p.is_option("--help")) { // Print usage
  268. print_usage();
  269. mode_ = quit;
  270. return;
  271. } else if (p.is_option("-M")) { // One-line mini menu at bottom
  272. mini_menu_ = true;
  273. } else if (p.is_option("-N")) { // Suppress the mini menu
  274. mini_menu_ = false;
  275. } else if (p.is_option("-L", &arg)) {// Number of context lines
  276. istringstream str(arg);
  277. str >> context_lines_;
  278. } else if (p.is_option("-V")) {// Chars not in 7-bit ansi
  279. // displayed as by cat -v
  280. ansi7_ = true;
  281. } else if (p.is_option("-l")) { // List misspelled words
  282. mode_ = list;
  283. } else if (p.is_option("-f", &arg)) { // The output file
  284. output_file_ = arg;
  285. } else if (p.is_option("-s")) { // SIGTSTP at end of line
  286. sigstop_at_eol_ = true;
  287. } else if (p.is_option("-a")) { // pipe controlled mode
  288. mode_ = pipe;
  289. pipe_include_command_ = string();
  290. } else if (p.is_option("-A")) { // pipe controlled
  291. // include-enabled mode
  292. mode_ = pipe;
  293. pipe_include_command_ = "&Include_File&";
  294. if (getenv("INCLUDE_STRING")) {
  295. pipe_include_command_=getenv("INCLUDE_STRING");
  296. }
  297. } else if (p.is_option("-e") || // Affix expansion mode
  298. p.is_option("-ee") || // Affix expansion mode
  299. p.is_option("-e1") || // Affix expansion mode
  300. p.is_option("-e2") || // Affix expansion mode
  301. p.is_option("-e3") || // Affix expansion mode
  302. p.is_option("-e4") || // Affix expansion mode
  303. p.is_option("-c") || // Affix compression mode
  304. p.is_option("-D")) { // Dump affix tables
  305. mode_ = ispell;
  306. } else if (p.is_option("-t")) { // Input in TeX format
  307. next_filter = tex;
  308. next_filter_set = true;
  309. } else if (p.is_option("-n")) {// Input in [nt]roff format
  310. next_filter = nroff;
  311. next_filter_set = true;
  312. } else if (p.is_option("-h")) { // Input in SGML format
  313. next_filter = sgml;
  314. next_filter_set = true;
  315. } else if (p.is_option("-b")) { // Create .bak backup files
  316. backups_ = true;
  317. } else if (p.is_option("-x")) { // Do not create backup files
  318. backups_ = false;
  319. } else if (p.is_option("-B") || // Disallow run-together words
  320. p.is_option("-C") || // Allow run-together words
  321. p.is_option("-P") || // No extra affix combinations
  322. p.is_option("-m") || // Extra affix combinations
  323. p.is_option("-S")) { // Sort guesses by correctness
  324. // (ignored)
  325. } else if (p.is_option("-d", &arg)) { // Dictionary to use
  326. dictionary_ = arg;
  327. dictionary_identifier_ =
  328. extract_dictionary_identifier(dictionary_);
  329. } else if (p.is_option("-p", &arg)) { // Personal dict. to use
  330. personal_dictionary_ = arg;
  331. } else if (p.is_option("-w", &arg)) { // Additional word chars
  332. extra_word_characters_.assign(arg,
  333. arg + strlen(arg));
  334. } else if (p.is_option("-W", &arg)) { // Min. length of words
  335. // to check.
  336. istringstream str(arg);
  337. str >> legal_word_length_;
  338. } else if (p.is_option("-T", &arg)) { // Assume input format
  339. // for all files
  340. if (strcmp(arg, "plain") == 0)
  341. default_filter = plain;
  342. else if (strcmp(arg, "latin1") == 0) {
  343. default_filter = plain;
  344. user_encoding_ = "latin1";
  345. }
  346. else if (strcmp(arg, "utf8") == 0) {
  347. default_filter = plain;
  348. user_encoding_ = "utf8";
  349. }
  350. else if (strcmp(arg, "nroff") == 0)
  351. default_filter = nroff;
  352. else if (strcmp(arg, "tex") == 0)
  353. default_filter = tex;
  354. else if (strcmp(arg, "sgml") == 0 ||
  355. strcmp(arg, "html") == 0)
  356. default_filter = sgml;
  357. next_filter = default_filter;
  358. default_filter_set = true;
  359. } else if (p.is_option("-r", &arg)) { // Set input charset
  360. user_encoding_ = arg;
  361. } else if (p.is_option()) { // An unknown option
  362. print_usage();
  363. throw Error(_("Unknown option %s"), p.opt());
  364. } else { // A file
  365. if (!next_filter_set)
  366. next_filter = guess_file_filter(p.opt());
  367. files_.push_back(
  368. pair<string, FilterType>(p.opt(),
  369. next_filter));
  370. next_filter = default_filter;
  371. next_filter_set = default_filter_set;
  372. }
  373. p.push_to(ispell_args_);
  374. }
  375. // Set the personal dictionary according to dictionary,
  376. // if the user hasn't set it.
  377. if (personal_dictionary_.empty()) {
  378. personal_dictionary_ =
  379. get_home_dir() + "/.ispell_" + dictionary_identifier_;
  380. }
  381. if (default_filter_set)
  382. default_filter_ = default_filter;
  383. else if (next_filter_set)
  384. default_filter_ = next_filter;
  385. }