PageRenderTime 72ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 1ms

/crates/core/args.rs

https://github.com/BurntSushi/ripgrep
Rust | 1854 lines | 1284 code | 140 blank | 430 comment | 245 complexity | 6711208dec69baf102a25eb43b9d65b6 MD5 | raw file
Possible License(s): MIT, Unlicense
  1. use std::cmp;
  2. use std::env;
  3. use std::ffi::{OsStr, OsString};
  4. use std::fs;
  5. use std::io::{self, Write};
  6. use std::path::{Path, PathBuf};
  7. use std::process;
  8. use std::sync::Arc;
  9. use std::time::SystemTime;
  10. use clap;
  11. use grep::cli;
  12. use grep::matcher::LineTerminator;
  13. #[cfg(feature = "pcre2")]
  14. use grep::pcre2::{
  15. RegexMatcher as PCRE2RegexMatcher,
  16. RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
  17. };
  18. use grep::printer::{
  19. default_color_specs, ColorSpecs, JSONBuilder, Standard, StandardBuilder,
  20. Stats, Summary, SummaryBuilder, SummaryKind, JSON,
  21. };
  22. use grep::regex::{
  23. RegexMatcher as RustRegexMatcher,
  24. RegexMatcherBuilder as RustRegexMatcherBuilder,
  25. };
  26. use grep::searcher::{
  27. BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder,
  28. };
  29. use ignore::overrides::{Override, OverrideBuilder};
  30. use ignore::types::{FileTypeDef, Types, TypesBuilder};
  31. use ignore::{Walk, WalkBuilder, WalkParallel};
  32. use log;
  33. use num_cpus;
  34. use regex;
  35. use termcolor::{BufferWriter, ColorChoice, WriteColor};
  36. use crate::app;
  37. use crate::config;
  38. use crate::logger::Logger;
  39. use crate::messages::{set_ignore_messages, set_messages};
  40. use crate::path_printer::{PathPrinter, PathPrinterBuilder};
  41. use crate::search::{
  42. PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder,
  43. };
  44. use crate::subject::SubjectBuilder;
  45. use crate::Result;
  46. /// The command that ripgrep should execute based on the command line
  47. /// configuration.
  48. #[derive(Clone, Copy, Debug, Eq, PartialEq)]
  49. pub enum Command {
  50. /// Search using exactly one thread.
  51. Search,
  52. /// Search using possibly many threads.
  53. SearchParallel,
  54. /// The command line parameters suggest that a search should occur, but
  55. /// ripgrep knows that a match can never be found (e.g., no given patterns
  56. /// or --max-count=0).
  57. SearchNever,
  58. /// Show the files that would be searched, but don't actually search them,
  59. /// and use exactly one thread.
  60. Files,
  61. /// Show the files that would be searched, but don't actually search them,
  62. /// and perform directory traversal using possibly many threads.
  63. FilesParallel,
  64. /// List all file type definitions configured, including the default file
  65. /// types and any additional file types added to the command line.
  66. Types,
  67. /// Print the version of PCRE2 in use.
  68. PCRE2Version,
  69. }
  70. impl Command {
  71. /// Returns true if and only if this command requires executing a search.
  72. fn is_search(&self) -> bool {
  73. use self::Command::*;
  74. match *self {
  75. Search | SearchParallel => true,
  76. SearchNever | Files | FilesParallel | Types | PCRE2Version => {
  77. false
  78. }
  79. }
  80. }
  81. }
  82. /// The primary configuration object used throughout ripgrep. It provides a
  83. /// high-level convenient interface to the provided command line arguments.
  84. ///
  85. /// An `Args` object is cheap to clone and can be used from multiple threads
  86. /// simultaneously.
  87. #[derive(Clone, Debug)]
  88. pub struct Args(Arc<ArgsImp>);
  89. #[derive(Clone, Debug)]
  90. struct ArgsImp {
  91. /// Mid-to-low level routines for extracting CLI arguments.
  92. matches: ArgMatches,
  93. /// The patterns provided at the command line and/or via the -f/--file
  94. /// flag. This may be empty.
  95. patterns: Vec<String>,
  96. /// A matcher built from the patterns.
  97. ///
  98. /// It's important that this is only built once, since building this goes
  99. /// through regex compilation and various types of analyses. That is, if
  100. /// you need many of theses (one per thread, for example), it is better to
  101. /// build it once and then clone it.
  102. matcher: PatternMatcher,
  103. /// The paths provided at the command line. This is guaranteed to be
  104. /// non-empty. (If no paths are provided, then a default path is created.)
  105. paths: Vec<PathBuf>,
  106. /// Returns true if and only if `paths` had to be populated with a single
  107. /// default path.
  108. using_default_path: bool,
  109. }
  110. impl Args {
  111. /// Parse the command line arguments for this process.
  112. ///
  113. /// If a CLI usage error occurred, then exit the process and print a usage
  114. /// or error message. Similarly, if the user requested the version of
  115. /// ripgrep, then print the version and exit.
  116. ///
  117. /// Also, initialize a global logger.
  118. pub fn parse() -> Result<Args> {
  119. // We parse the args given on CLI. This does not include args from
  120. // the config. We use the CLI args as an initial configuration while
  121. // trying to parse config files. If a config file exists and has
  122. // arguments, then we re-parse argv, otherwise we just use the matches
  123. // we have here.
  124. let early_matches = ArgMatches::new(clap_matches(env::args_os())?);
  125. set_messages(!early_matches.is_present("no-messages"));
  126. set_ignore_messages(!early_matches.is_present("no-ignore-messages"));
  127. if let Err(err) = Logger::init() {
  128. return Err(format!("failed to initialize logger: {}", err).into());
  129. }
  130. if early_matches.is_present("trace") {
  131. log::set_max_level(log::LevelFilter::Trace);
  132. } else if early_matches.is_present("debug") {
  133. log::set_max_level(log::LevelFilter::Debug);
  134. } else {
  135. log::set_max_level(log::LevelFilter::Warn);
  136. }
  137. let matches = early_matches.reconfigure()?;
  138. // The logging level may have changed if we brought in additional
  139. // arguments from a configuration file, so recheck it and set the log
  140. // level as appropriate.
  141. if matches.is_present("trace") {
  142. log::set_max_level(log::LevelFilter::Trace);
  143. } else if matches.is_present("debug") {
  144. log::set_max_level(log::LevelFilter::Debug);
  145. } else {
  146. log::set_max_level(log::LevelFilter::Warn);
  147. }
  148. set_messages(!matches.is_present("no-messages"));
  149. set_ignore_messages(!matches.is_present("no-ignore-messages"));
  150. matches.to_args()
  151. }
  152. /// Return direct access to command line arguments.
  153. fn matches(&self) -> &ArgMatches {
  154. &self.0.matches
  155. }
  156. /// Return the patterns found in the command line arguments. This includes
  157. /// patterns read via the -f/--file flags.
  158. fn patterns(&self) -> &[String] {
  159. &self.0.patterns
  160. }
  161. /// Return the matcher builder from the patterns.
  162. fn matcher(&self) -> &PatternMatcher {
  163. &self.0.matcher
  164. }
  165. /// Return the paths found in the command line arguments. This is
  166. /// guaranteed to be non-empty. In the case where no explicit arguments are
  167. /// provided, a single default path is provided automatically.
  168. fn paths(&self) -> &[PathBuf] {
  169. &self.0.paths
  170. }
  171. /// Returns true if and only if `paths` had to be populated with a default
  172. /// path, which occurs only when no paths were given as command line
  173. /// arguments.
  174. fn using_default_path(&self) -> bool {
  175. self.0.using_default_path
  176. }
  177. /// Return the printer that should be used for formatting the output of
  178. /// search results.
  179. ///
  180. /// The returned printer will write results to the given writer.
  181. fn printer<W: WriteColor>(&self, wtr: W) -> Result<Printer<W>> {
  182. match self.matches().output_kind() {
  183. OutputKind::Standard => {
  184. let separator_search = self.command()? == Command::Search;
  185. self.matches()
  186. .printer_standard(self.paths(), wtr, separator_search)
  187. .map(Printer::Standard)
  188. }
  189. OutputKind::Summary => self
  190. .matches()
  191. .printer_summary(self.paths(), wtr)
  192. .map(Printer::Summary),
  193. OutputKind::JSON => {
  194. self.matches().printer_json(wtr).map(Printer::JSON)
  195. }
  196. }
  197. }
  198. }
  199. /// High level public routines for building data structures used by ripgrep
  200. /// from command line arguments.
  201. impl Args {
  202. /// Create a new buffer writer for multi-threaded printing with color
  203. /// support.
  204. pub fn buffer_writer(&self) -> Result<BufferWriter> {
  205. let mut wtr = BufferWriter::stdout(self.matches().color_choice());
  206. wtr.separator(self.matches().file_separator()?);
  207. Ok(wtr)
  208. }
  209. /// Return the high-level command that ripgrep should run.
  210. pub fn command(&self) -> Result<Command> {
  211. let is_one_search = self.matches().is_one_search(self.paths());
  212. let threads = self.matches().threads()?;
  213. let one_thread = is_one_search || threads == 1;
  214. Ok(if self.matches().is_present("pcre2-version") {
  215. Command::PCRE2Version
  216. } else if self.matches().is_present("type-list") {
  217. Command::Types
  218. } else if self.matches().is_present("files") {
  219. if one_thread {
  220. Command::Files
  221. } else {
  222. Command::FilesParallel
  223. }
  224. } else if self.matches().can_never_match(self.patterns()) {
  225. Command::SearchNever
  226. } else if one_thread {
  227. Command::Search
  228. } else {
  229. Command::SearchParallel
  230. })
  231. }
  232. /// Builder a path printer that can be used for printing just file paths,
  233. /// with optional color support.
  234. ///
  235. /// The printer will print paths to the given writer.
  236. pub fn path_printer<W: WriteColor>(
  237. &self,
  238. wtr: W,
  239. ) -> Result<PathPrinter<W>> {
  240. let mut builder = PathPrinterBuilder::new();
  241. builder
  242. .color_specs(self.matches().color_specs()?)
  243. .separator(self.matches().path_separator()?)
  244. .terminator(self.matches().path_terminator().unwrap_or(b'\n'));
  245. Ok(builder.build(wtr))
  246. }
  247. /// Returns true if and only if ripgrep should be "quiet."
  248. pub fn quiet(&self) -> bool {
  249. self.matches().is_present("quiet")
  250. }
  251. /// Returns true if and only if the search should quit after finding the
  252. /// first match.
  253. pub fn quit_after_match(&self) -> Result<bool> {
  254. Ok(self.matches().is_present("quiet") && self.stats()?.is_none())
  255. }
  256. /// Build a worker for executing searches.
  257. ///
  258. /// Search results are written to the given writer.
  259. pub fn search_worker<W: WriteColor>(
  260. &self,
  261. wtr: W,
  262. ) -> Result<SearchWorker<W>> {
  263. let matches = self.matches();
  264. let matcher = self.matcher().clone();
  265. let printer = self.printer(wtr)?;
  266. let searcher = matches.searcher(self.paths())?;
  267. let mut builder = SearchWorkerBuilder::new();
  268. builder
  269. .json_stats(matches.is_present("json"))
  270. .preprocessor(matches.preprocessor())
  271. .preprocessor_globs(matches.preprocessor_globs()?)
  272. .search_zip(matches.is_present("search-zip"))
  273. .binary_detection_implicit(matches.binary_detection_implicit())
  274. .binary_detection_explicit(matches.binary_detection_explicit());
  275. Ok(builder.build(matcher, searcher, printer))
  276. }
  277. /// Returns a zero value for tracking statistics if and only if it has been
  278. /// requested.
  279. ///
  280. /// When this returns a `Stats` value, then it is guaranteed that the
  281. /// search worker will be configured to track statistics as well.
  282. pub fn stats(&self) -> Result<Option<Stats>> {
  283. Ok(if self.command()?.is_search() && self.matches().stats() {
  284. Some(Stats::new())
  285. } else {
  286. None
  287. })
  288. }
  289. /// Return a builder for constructing subjects. A subject represents a
  290. /// single unit of something to search. Typically, this corresponds to a
  291. /// file or a stream such as stdin.
  292. pub fn subject_builder(&self) -> SubjectBuilder {
  293. let mut builder = SubjectBuilder::new();
  294. builder.strip_dot_prefix(self.using_default_path());
  295. builder
  296. }
  297. /// Execute the given function with a writer to stdout that enables color
  298. /// support based on the command line configuration.
  299. pub fn stdout(&self) -> cli::StandardStream {
  300. let color = self.matches().color_choice();
  301. if self.matches().is_present("line-buffered") {
  302. cli::stdout_buffered_line(color)
  303. } else if self.matches().is_present("block-buffered") {
  304. cli::stdout_buffered_block(color)
  305. } else {
  306. cli::stdout(color)
  307. }
  308. }
  309. /// Return the type definitions compiled into ripgrep.
  310. ///
  311. /// If there was a problem reading and parsing the type definitions, then
  312. /// this returns an error.
  313. pub fn type_defs(&self) -> Result<Vec<FileTypeDef>> {
  314. Ok(self.matches().types()?.definitions().to_vec())
  315. }
  316. /// Return a walker that never uses additional threads.
  317. pub fn walker(&self) -> Result<Walk> {
  318. Ok(self.matches().walker_builder(self.paths())?.build())
  319. }
  320. /// Return a walker that never uses additional threads.
  321. pub fn walker_parallel(&self) -> Result<WalkParallel> {
  322. Ok(self.matches().walker_builder(self.paths())?.build_parallel())
  323. }
  324. }
  325. /// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to
  326. /// the parsed arguments.
  327. #[derive(Clone, Debug)]
  328. struct ArgMatches(clap::ArgMatches<'static>);
  329. /// The output format. Generally, this corresponds to the printer that ripgrep
  330. /// uses to show search results.
  331. #[derive(Clone, Copy, Debug, Eq, PartialEq)]
  332. enum OutputKind {
  333. /// Classic grep-like or ack-like format.
  334. Standard,
  335. /// Show matching files and possibly the number of matches in each file.
  336. Summary,
  337. /// Emit match information in the JSON Lines format.
  338. JSON,
  339. }
  340. /// The sort criteria, if present.
  341. #[derive(Clone, Copy, Debug, Eq, PartialEq)]
  342. struct SortBy {
  343. /// Whether to reverse the sort criteria (i.e., descending order).
  344. reverse: bool,
  345. /// The actual sorting criteria.
  346. kind: SortByKind,
  347. }
  348. #[derive(Clone, Copy, Debug, Eq, PartialEq)]
  349. enum SortByKind {
  350. /// No sorting at all.
  351. None,
  352. /// Sort by path.
  353. Path,
  354. /// Sort by last modified time.
  355. LastModified,
  356. /// Sort by last accessed time.
  357. LastAccessed,
  358. /// Sort by creation time.
  359. Created,
  360. }
  361. impl SortBy {
  362. fn asc(kind: SortByKind) -> SortBy {
  363. SortBy { reverse: false, kind }
  364. }
  365. fn desc(kind: SortByKind) -> SortBy {
  366. SortBy { reverse: true, kind }
  367. }
  368. fn none() -> SortBy {
  369. SortBy::asc(SortByKind::None)
  370. }
  371. /// Try to check that the sorting criteria selected is actually supported.
  372. /// If it isn't, then an error is returned.
  373. fn check(&self) -> Result<()> {
  374. match self.kind {
  375. SortByKind::None | SortByKind::Path => {}
  376. SortByKind::LastModified => {
  377. env::current_exe()?.metadata()?.modified()?;
  378. }
  379. SortByKind::LastAccessed => {
  380. env::current_exe()?.metadata()?.accessed()?;
  381. }
  382. SortByKind::Created => {
  383. env::current_exe()?.metadata()?.created()?;
  384. }
  385. }
  386. Ok(())
  387. }
  388. fn configure_walk_builder(self, builder: &mut WalkBuilder) {
  389. // This isn't entirely optimal. In particular, we will wind up issuing
  390. // a stat for many files redundantly. Aside from having potentially
  391. // inconsistent results with respect to sorting, this is also slow.
  392. // We could fix this here at the expense of memory by caching stat
  393. // calls. A better fix would be to find a way to push this down into
  394. // directory traversal itself, but that's a somewhat nasty change.
  395. match self.kind {
  396. SortByKind::None => {}
  397. SortByKind::Path => {
  398. if self.reverse {
  399. builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
  400. } else {
  401. builder.sort_by_file_name(|a, b| a.cmp(b));
  402. }
  403. }
  404. SortByKind::LastModified => {
  405. builder.sort_by_file_path(move |a, b| {
  406. sort_by_metadata_time(a, b, self.reverse, |md| {
  407. md.modified()
  408. })
  409. });
  410. }
  411. SortByKind::LastAccessed => {
  412. builder.sort_by_file_path(move |a, b| {
  413. sort_by_metadata_time(a, b, self.reverse, |md| {
  414. md.accessed()
  415. })
  416. });
  417. }
  418. SortByKind::Created => {
  419. builder.sort_by_file_path(move |a, b| {
  420. sort_by_metadata_time(a, b, self.reverse, |md| {
  421. md.created()
  422. })
  423. });
  424. }
  425. }
  426. }
  427. }
  428. impl SortByKind {
  429. fn new(kind: &str) -> SortByKind {
  430. match kind {
  431. "none" => SortByKind::None,
  432. "path" => SortByKind::Path,
  433. "modified" => SortByKind::LastModified,
  434. "accessed" => SortByKind::LastAccessed,
  435. "created" => SortByKind::Created,
  436. _ => SortByKind::None,
  437. }
  438. }
  439. }
  440. /// Encoding mode the searcher will use.
  441. #[derive(Clone, Debug)]
  442. enum EncodingMode {
  443. /// Use an explicit encoding forcefully, but let BOM sniffing override it.
  444. Some(Encoding),
  445. /// Use only BOM sniffing to auto-detect an encoding.
  446. Auto,
  447. /// Use no explicit encoding and disable all BOM sniffing. This will
  448. /// always result in searching the raw bytes, regardless of their
  449. /// true encoding.
  450. Disabled,
  451. }
  452. impl EncodingMode {
  453. /// Checks if an explicit encoding has been set. Returns false for
  454. /// automatic BOM sniffing and no sniffing.
  455. ///
  456. /// This is only used to determine whether PCRE2 needs to have its own
  457. /// UTF-8 checking enabled. If we have an explicit encoding set, then
  458. /// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
  459. /// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
  460. /// check.
  461. #[cfg(feature = "pcre2")]
  462. fn has_explicit_encoding(&self) -> bool {
  463. match self {
  464. EncodingMode::Some(_) => true,
  465. _ => false,
  466. }
  467. }
  468. }
  469. impl ArgMatches {
  470. /// Create an ArgMatches from clap's parse result.
  471. fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
  472. ArgMatches(clap_matches)
  473. }
  474. /// Run clap and return the matches using a config file if present. If clap
  475. /// determines a problem with the user provided arguments (or if --help or
  476. /// --version are given), then an error/usage/version will be printed and
  477. /// the process will exit.
  478. ///
  479. /// If there are no additional arguments from the environment (e.g., a
  480. /// config file), then the given matches are returned as is.
  481. fn reconfigure(self) -> Result<ArgMatches> {
  482. // If the end user says no config, then respect it.
  483. if self.is_present("no-config") {
  484. log::debug!(
  485. "not reading config files because --no-config is present"
  486. );
  487. return Ok(self);
  488. }
  489. // If the user wants ripgrep to use a config file, then parse args
  490. // from that first.
  491. let mut args = config::args();
  492. if args.is_empty() {
  493. return Ok(self);
  494. }
  495. let mut cliargs = env::args_os();
  496. if let Some(bin) = cliargs.next() {
  497. args.insert(0, bin);
  498. }
  499. args.extend(cliargs);
  500. log::debug!("final argv: {:?}", args);
  501. Ok(ArgMatches(clap_matches(args)?))
  502. }
  503. /// Convert the result of parsing CLI arguments into ripgrep's higher level
  504. /// configuration structure.
  505. fn to_args(self) -> Result<Args> {
  506. // We compute these once since they could be large.
  507. let patterns = self.patterns()?;
  508. let matcher = self.matcher(&patterns)?;
  509. let mut paths = self.paths();
  510. let using_default_path = if paths.is_empty() {
  511. paths.push(self.path_default());
  512. true
  513. } else {
  514. false
  515. };
  516. Ok(Args(Arc::new(ArgsImp {
  517. matches: self,
  518. patterns,
  519. matcher,
  520. paths,
  521. using_default_path,
  522. })))
  523. }
  524. }
  525. /// High level routines for converting command line arguments into various
  526. /// data structures used by ripgrep.
  527. ///
  528. /// Methods are sorted alphabetically.
  529. impl ArgMatches {
  530. /// Return the matcher that should be used for searching.
  531. ///
  532. /// If there was a problem building the matcher (e.g., a syntax error),
  533. /// then this returns an error.
  534. fn matcher(&self, patterns: &[String]) -> Result<PatternMatcher> {
  535. if self.is_present("pcre2") {
  536. self.matcher_engine("pcre2", patterns)
  537. } else if self.is_present("auto-hybrid-regex") {
  538. self.matcher_engine("auto", patterns)
  539. } else {
  540. let engine = self.value_of_lossy("engine").unwrap();
  541. self.matcher_engine(&engine, patterns)
  542. }
  543. }
  544. /// Return the matcher that should be used for searching using engine
  545. /// as the engine for the patterns.
  546. ///
  547. /// If there was a problem building the matcher (e.g., a syntax error),
  548. /// then this returns an error.
  549. fn matcher_engine(
  550. &self,
  551. engine: &str,
  552. patterns: &[String],
  553. ) -> Result<PatternMatcher> {
  554. match engine {
  555. "default" => {
  556. let matcher = match self.matcher_rust(patterns) {
  557. Ok(matcher) => matcher,
  558. Err(err) => {
  559. return Err(From::from(suggest(err.to_string())));
  560. }
  561. };
  562. Ok(PatternMatcher::RustRegex(matcher))
  563. }
  564. #[cfg(feature = "pcre2")]
  565. "pcre2" => {
  566. let matcher = self.matcher_pcre2(patterns)?;
  567. Ok(PatternMatcher::PCRE2(matcher))
  568. }
  569. #[cfg(not(feature = "pcre2"))]
  570. "pcre2" => Err(From::from(
  571. "PCRE2 is not available in this build of ripgrep",
  572. )),
  573. "auto" => {
  574. let rust_err = match self.matcher_rust(patterns) {
  575. Ok(matcher) => {
  576. return Ok(PatternMatcher::RustRegex(matcher));
  577. }
  578. Err(err) => err,
  579. };
  580. log::debug!(
  581. "error building Rust regex in hybrid mode:\n{}",
  582. rust_err,
  583. );
  584. let pcre_err = match self.matcher_engine("pcre2", patterns) {
  585. Ok(matcher) => return Ok(matcher),
  586. Err(err) => err,
  587. };
  588. Err(From::from(format!(
  589. "regex could not be compiled with either the default \
  590. regex engine or with PCRE2.\n\n\
  591. default regex engine error:\n{}\n{}\n{}\n\n\
  592. PCRE2 regex engine error:\n{}",
  593. "~".repeat(79),
  594. rust_err,
  595. "~".repeat(79),
  596. pcre_err,
  597. )))
  598. }
  599. _ => Err(From::from(format!(
  600. "unrecognized regex engine '{}'",
  601. engine
  602. ))),
  603. }
  604. }
  605. /// Build a matcher using Rust's regex engine.
  606. ///
  607. /// If there was a problem building the matcher (such as a regex syntax
  608. /// error), then an error is returned.
  609. fn matcher_rust(&self, patterns: &[String]) -> Result<RustRegexMatcher> {
  610. let mut builder = RustRegexMatcherBuilder::new();
  611. builder
  612. .case_smart(self.case_smart())
  613. .case_insensitive(self.case_insensitive())
  614. .multi_line(true)
  615. .unicode(self.unicode())
  616. .octal(false)
  617. .word(self.is_present("word-regexp"));
  618. if self.is_present("multiline") {
  619. builder.dot_matches_new_line(self.is_present("multiline-dotall"));
  620. if self.is_present("crlf") {
  621. builder.crlf(true).line_terminator(None);
  622. }
  623. } else {
  624. builder.line_terminator(Some(b'\n')).dot_matches_new_line(false);
  625. if self.is_present("crlf") {
  626. builder.crlf(true);
  627. }
  628. // We don't need to set this in multiline mode since mulitline
  629. // matchers don't use optimizations related to line terminators.
  630. // Moreover, a mulitline regex used with --null-data should
  631. // be allowed to match NUL bytes explicitly, which this would
  632. // otherwise forbid.
  633. if self.is_present("null-data") {
  634. builder.line_terminator(Some(b'\x00'));
  635. }
  636. }
  637. if let Some(limit) = self.regex_size_limit()? {
  638. builder.size_limit(limit);
  639. }
  640. if let Some(limit) = self.dfa_size_limit()? {
  641. builder.dfa_size_limit(limit);
  642. }
  643. let res = if self.is_present("fixed-strings") {
  644. builder.build_literals(patterns)
  645. } else {
  646. builder.build(&patterns.join("|"))
  647. };
  648. match res {
  649. Ok(m) => Ok(m),
  650. Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
  651. }
  652. }
  653. /// Build a matcher using PCRE2.
  654. ///
  655. /// If there was a problem building the matcher (such as a regex syntax
  656. /// error), then an error is returned.
  657. #[cfg(feature = "pcre2")]
  658. fn matcher_pcre2(&self, patterns: &[String]) -> Result<PCRE2RegexMatcher> {
  659. let mut builder = PCRE2RegexMatcherBuilder::new();
  660. builder
  661. .case_smart(self.case_smart())
  662. .caseless(self.case_insensitive())
  663. .multi_line(true)
  664. .word(self.is_present("word-regexp"));
  665. // For whatever reason, the JIT craps out during regex compilation with
  666. // a "no more memory" error on 32 bit systems. So don't use it there.
  667. if cfg!(target_pointer_width = "64") {
  668. builder
  669. .jit_if_available(true)
  670. // The PCRE2 docs say that 32KB is the default, and that 1MB
  671. // should be big enough for anything. But let's crank it to
  672. // 10MB.
  673. .max_jit_stack_size(Some(10 * (1 << 20)));
  674. }
  675. if self.unicode() {
  676. builder.utf(true).ucp(true);
  677. if self.encoding()?.has_explicit_encoding() {
  678. // SAFETY: If an encoding was specified, then we're guaranteed
  679. // to get valid UTF-8, so we can disable PCRE2's UTF checking.
  680. // (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
  681. unsafe {
  682. builder.disable_utf_check();
  683. }
  684. }
  685. }
  686. if self.is_present("multiline") {
  687. builder.dotall(self.is_present("multiline-dotall"));
  688. }
  689. if self.is_present("crlf") {
  690. builder.crlf(true);
  691. }
  692. Ok(builder.build(&patterns.join("|"))?)
  693. }
  694. /// Build a JSON printer that writes results to the given writer.
  695. fn printer_json<W: io::Write>(&self, wtr: W) -> Result<JSON<W>> {
  696. let mut builder = JSONBuilder::new();
  697. builder
  698. .pretty(false)
  699. .max_matches(self.max_count()?)
  700. .always_begin_end(false);
  701. Ok(builder.build(wtr))
  702. }
  703. /// Build a Standard printer that writes results to the given writer.
  704. ///
  705. /// The given paths are used to configure aspects of the printer.
  706. ///
  707. /// If `separator_search` is true, then the returned printer will assume
  708. /// the responsibility of printing a separator between each set of
  709. /// search results, when appropriate (e.g., when contexts are enabled).
  710. /// When it's set to false, the caller is responsible for handling
  711. /// separators.
  712. ///
  713. /// In practice, we want the printer to handle it in the single threaded
  714. /// case but not in the multi-threaded case.
  715. fn printer_standard<W: WriteColor>(
  716. &self,
  717. paths: &[PathBuf],
  718. wtr: W,
  719. separator_search: bool,
  720. ) -> Result<Standard<W>> {
  721. let mut builder = StandardBuilder::new();
  722. builder
  723. .color_specs(self.color_specs()?)
  724. .stats(self.stats())
  725. .heading(self.heading())
  726. .path(self.with_filename(paths))
  727. .only_matching(self.is_present("only-matching"))
  728. .per_match(self.is_present("vimgrep"))
  729. .replacement(self.replacement())
  730. .max_columns(self.max_columns()?)
  731. .max_columns_preview(self.max_columns_preview())
  732. .max_matches(self.max_count()?)
  733. .column(self.column())
  734. .byte_offset(self.is_present("byte-offset"))
  735. .trim_ascii(self.is_present("trim"))
  736. .separator_search(None)
  737. .separator_context(self.context_separator())
  738. .separator_field_match(b":".to_vec())
  739. .separator_field_context(b"-".to_vec())
  740. .separator_path(self.path_separator()?)
  741. .path_terminator(self.path_terminator());
  742. if separator_search {
  743. builder.separator_search(self.file_separator()?);
  744. }
  745. Ok(builder.build(wtr))
  746. }
  747. /// Build a Summary printer that writes results to the given writer.
  748. ///
  749. /// The given paths are used to configure aspects of the printer.
  750. ///
  751. /// This panics if the output format is not `OutputKind::Summary`.
  752. fn printer_summary<W: WriteColor>(
  753. &self,
  754. paths: &[PathBuf],
  755. wtr: W,
  756. ) -> Result<Summary<W>> {
  757. let mut builder = SummaryBuilder::new();
  758. builder
  759. .kind(self.summary_kind().expect("summary format"))
  760. .color_specs(self.color_specs()?)
  761. .stats(self.stats())
  762. .path(self.with_filename(paths))
  763. .max_matches(self.max_count()?)
  764. .exclude_zero(!self.is_present("include-zero"))
  765. .separator_field(b":".to_vec())
  766. .separator_path(self.path_separator()?)
  767. .path_terminator(self.path_terminator());
  768. Ok(builder.build(wtr))
  769. }
  770. /// Build a searcher from the command line parameters.
  771. fn searcher(&self, paths: &[PathBuf]) -> Result<Searcher> {
  772. let (ctx_before, ctx_after) = self.contexts()?;
  773. let line_term = if self.is_present("crlf") {
  774. LineTerminator::crlf()
  775. } else if self.is_present("null-data") {
  776. LineTerminator::byte(b'\x00')
  777. } else {
  778. LineTerminator::byte(b'\n')
  779. };
  780. let mut builder = SearcherBuilder::new();
  781. builder
  782. .line_terminator(line_term)
  783. .invert_match(self.is_present("invert-match"))
  784. .line_number(self.line_number(paths))
  785. .multi_line(self.is_present("multiline"))
  786. .before_context(ctx_before)
  787. .after_context(ctx_after)
  788. .passthru(self.is_present("passthru"))
  789. .memory_map(self.mmap_choice(paths));
  790. match self.encoding()? {
  791. EncodingMode::Some(enc) => {
  792. builder.encoding(Some(enc));
  793. }
  794. EncodingMode::Auto => {} // default for the searcher
  795. EncodingMode::Disabled => {
  796. builder.bom_sniffing(false);
  797. }
  798. }
  799. Ok(builder.build())
  800. }
  801. /// Return a builder for recursively traversing a directory while
  802. /// respecting ignore rules.
  803. ///
  804. /// If there was a problem parsing the CLI arguments necessary for
  805. /// constructing the builder, then this returns an error.
  806. fn walker_builder(&self, paths: &[PathBuf]) -> Result<WalkBuilder> {
  807. let mut builder = WalkBuilder::new(&paths[0]);
  808. for path in &paths[1..] {
  809. builder.add(path);
  810. }
  811. if !self.no_ignore_files() {
  812. for path in self.ignore_paths() {
  813. if let Some(err) = builder.add_ignore(path) {
  814. ignore_message!("{}", err);
  815. }
  816. }
  817. }
  818. builder
  819. .max_depth(self.usize_of("max-depth")?)
  820. .follow_links(self.is_present("follow"))
  821. .max_filesize(self.max_file_size()?)
  822. .threads(self.threads()?)
  823. .same_file_system(self.is_present("one-file-system"))
  824. .skip_stdout(!self.is_present("files"))
  825. .overrides(self.overrides()?)
  826. .types(self.types()?)
  827. .hidden(!self.hidden())
  828. .parents(!self.no_ignore_parent())
  829. .ignore(!self.no_ignore_dot())
  830. .git_global(!self.no_ignore_vcs() && !self.no_ignore_global())
  831. .git_ignore(!self.no_ignore_vcs())
  832. .git_exclude(!self.no_ignore_vcs() && !self.no_ignore_exclude())
  833. .require_git(!self.is_present("no-require-git"))
  834. .ignore_case_insensitive(self.ignore_file_case_insensitive());
  835. if !self.no_ignore() {
  836. builder.add_custom_ignore_filename(".rgignore");
  837. }
  838. let sortby = self.sort_by()?;
  839. sortby.check()?;
  840. sortby.configure_walk_builder(&mut builder);
  841. Ok(builder)
  842. }
  843. }
  844. /// Mid level routines for converting command line arguments into various types
  845. /// of data structures.
  846. ///
  847. /// Methods are sorted alphabetically.
  848. impl ArgMatches {
  849. /// Returns the form of binary detection to perform on files that are
  850. /// implicitly searched via recursive directory traversal.
  851. fn binary_detection_implicit(&self) -> BinaryDetection {
  852. let none = self.is_present("text") || self.is_present("null-data");
  853. let convert =
  854. self.is_present("binary") || self.unrestricted_count() >= 3;
  855. if none {
  856. BinaryDetection::none()
  857. } else if convert {
  858. BinaryDetection::convert(b'\x00')
  859. } else {
  860. BinaryDetection::quit(b'\x00')
  861. }
  862. }
  863. /// Returns the form of binary detection to perform on files that are
  864. /// explicitly searched via the user invoking ripgrep on a particular
  865. /// file or files or stdin.
  866. ///
  867. /// In general, this should never be BinaryDetection::quit, since that acts
  868. /// as a filter (but quitting immediately once a NUL byte is seen), and we
  869. /// should never filter out files that the user wants to explicitly search.
  870. fn binary_detection_explicit(&self) -> BinaryDetection {
  871. let none = self.is_present("text") || self.is_present("null-data");
  872. if none {
  873. BinaryDetection::none()
  874. } else {
  875. BinaryDetection::convert(b'\x00')
  876. }
  877. }
  878. /// Returns true if the command line configuration implies that a match
  879. /// can never be shown.
  880. fn can_never_match(&self, patterns: &[String]) -> bool {
  881. patterns.is_empty() || self.max_count().ok() == Some(Some(0))
  882. }
  883. /// Returns true if and only if case should be ignore.
  884. ///
  885. /// If --case-sensitive is present, then case is never ignored, even if
  886. /// --ignore-case is present.
  887. fn case_insensitive(&self) -> bool {
  888. self.is_present("ignore-case") && !self.is_present("case-sensitive")
  889. }
  890. /// Returns true if and only if smart case has been enabled.
  891. ///
  892. /// If either --ignore-case of --case-sensitive are present, then smart
  893. /// case is disabled.
  894. fn case_smart(&self) -> bool {
  895. self.is_present("smart-case")
  896. && !self.is_present("ignore-case")
  897. && !self.is_present("case-sensitive")
  898. }
  899. /// Returns the user's color choice based on command line parameters and
  900. /// environment.
  901. fn color_choice(&self) -> ColorChoice {
  902. let preference = match self.value_of_lossy("color") {
  903. None => "auto".to_string(),
  904. Some(v) => v,
  905. };
  906. if preference == "always" {
  907. ColorChoice::Always
  908. } else if preference == "ansi" {
  909. ColorChoice::AlwaysAnsi
  910. } else if preference == "auto" {
  911. if cli::is_tty_stdout() || self.is_present("pretty") {
  912. ColorChoice::Auto
  913. } else {
  914. ColorChoice::Never
  915. }
  916. } else {
  917. ColorChoice::Never
  918. }
  919. }
  920. /// Returns the color specifications given by the user on the CLI.
  921. ///
  922. /// If the was a problem parsing any of the provided specs, then an error
  923. /// is returned.
  924. fn color_specs(&self) -> Result<ColorSpecs> {
  925. // Start with a default set of color specs.
  926. let mut specs = default_color_specs();
  927. for spec_str in self.values_of_lossy_vec("colors") {
  928. specs.push(spec_str.parse()?);
  929. }
  930. Ok(ColorSpecs::new(&specs))
  931. }
  932. /// Returns true if and only if column numbers should be shown.
  933. fn column(&self) -> bool {
  934. if self.is_present("no-column") {
  935. return false;
  936. }
  937. self.is_present("column") || self.is_present("vimgrep")
  938. }
  939. /// Returns the before and after contexts from the command line.
  940. ///
  941. /// If a context setting was absent, then `0` is returned.
  942. ///
  943. /// If there was a problem parsing the values from the user as an integer,
  944. /// then an error is returned.
  945. fn contexts(&self) -> Result<(usize, usize)> {
  946. let after = self.usize_of("after-context")?.unwrap_or(0);
  947. let before = self.usize_of("before-context")?.unwrap_or(0);
  948. let both = self.usize_of("context")?.unwrap_or(0);
  949. Ok(if both > 0 { (both, both) } else { (before, after) })
  950. }
  951. /// Returns the unescaped context separator in UTF-8 bytes.
  952. ///
  953. /// If one was not provided, the default `--` is returned.
  954. /// If --no-context-separator is passed, None is returned.
  955. fn context_separator(&self) -> Option<Vec<u8>> {
  956. let nosep = self.is_present("no-context-separator");
  957. let sep = self.value_of_os("context-separator");
  958. match (nosep, sep) {
  959. (true, _) => None,
  960. (false, None) => Some(b"--".to_vec()),
  961. (false, Some(sep)) => Some(cli::unescape_os(&sep)),
  962. }
  963. }
  964. /// Returns whether the -c/--count or the --count-matches flags were
  965. /// passed from the command line.
  966. ///
  967. /// If --count-matches and --invert-match were passed in, behave
  968. /// as if --count and --invert-match were passed in (i.e. rg will
  969. /// count inverted matches as per existing behavior).
  970. fn counts(&self) -> (bool, bool) {
  971. let count = self.is_present("count");
  972. let count_matches = self.is_present("count-matches");
  973. let invert_matches = self.is_present("invert-match");
  974. let only_matching = self.is_present("only-matching");
  975. if count_matches && invert_matches {
  976. // Treat `-v --count-matches` as `-v -c`.
  977. (true, false)
  978. } else if count && only_matching {
  979. // Treat `-c --only-matching` as `--count-matches`.
  980. (false, true)
  981. } else {
  982. (count, count_matches)
  983. }
  984. }
  985. /// Parse the dfa-size-limit argument option into a byte count.
  986. fn dfa_size_limit(&self) -> Result<Option<usize>> {
  987. let r = self.parse_human_readable_size("dfa-size-limit")?;
  988. u64_to_usize("dfa-size-limit", r)
  989. }
  990. /// Returns the encoding mode to use.
  991. ///
  992. /// This only returns an encoding if one is explicitly specified. Otherwise
  993. /// if set to automatic, the Searcher will do BOM sniffing for UTF-16
  994. /// and transcode seamlessly. If disabled, no BOM sniffing nor transcoding
  995. /// will occur.
  996. fn encoding(&self) -> Result<EncodingMode> {
  997. if self.is_present("no-encoding") {
  998. return Ok(EncodingMode::Auto);
  999. }
  1000. let label = match self.value_of_lossy("encoding") {
  1001. None if self.pcre2_unicode() => "utf-8".to_string(),
  1002. None => return Ok(EncodingMode::Auto),
  1003. Some(label) => label,
  1004. };
  1005. if label == "auto" {
  1006. return Ok(EncodingMode::Auto);
  1007. } else if label == "none" {
  1008. return Ok(EncodingMode::Disabled);
  1009. }
  1010. Ok(EncodingMode::Some(Encoding::new(&label)?))
  1011. }
  1012. /// Return the file separator to use based on the CLI configuration.
  1013. fn file_separator(&self) -> Result<Option<Vec<u8>>> {
  1014. // File separators are only used for the standard grep-line format.
  1015. if self.output_kind() != OutputKind::Standard {
  1016. return Ok(None);
  1017. }
  1018. let (ctx_before, ctx_after) = self.contexts()?;
  1019. Ok(if self.heading() {
  1020. Some(b"".to_vec())
  1021. } else if ctx_before > 0 || ctx_after > 0 {
  1022. self.context_separator()
  1023. } else {
  1024. None
  1025. })
  1026. }
  1027. /// Returns true if and only if matches should be grouped with file name
  1028. /// headings.
  1029. fn heading(&self) -> bool {
  1030. if self.is_present("no-heading") || self.is_present("vimgrep") {
  1031. false
  1032. } else {
  1033. cli::is_tty_stdout()
  1034. || self.is_present("heading")
  1035. || self.is_present("pretty")
  1036. }
  1037. }
  1038. /// Returns true if and only if hidden files/directories should be
  1039. /// searched.
  1040. fn hidden(&self) -> bool {
  1041. self.is_present("hidden") || self.unrestricted_count() >= 2
  1042. }
  1043. /// Returns true if ignore files should be processed case insensitively.
  1044. fn ignore_file_case_insensitive(&self) -> bool {
  1045. self.is_present("ignore-file-case-insensitive")
  1046. }
  1047. /// Return all of the ignore file paths given on the command line.
  1048. fn ignore_paths(&self) -> Vec<PathBuf> {
  1049. let paths = match self.values_of_os("ignore-file") {
  1050. None => return vec![],
  1051. Some(paths) => paths,
  1052. };
  1053. paths.map(|p| Path::new(p).to_path_buf()).collect()
  1054. }
  1055. /// Returns true if and only if ripgrep is invoked in a way where it knows
  1056. /// it search exactly one thing.
  1057. fn is_one_search(&self, paths: &[PathBuf]) -> bool {
  1058. if paths.len() != 1 {
  1059. return false;
  1060. }
  1061. self.is_only_stdin(paths) || paths[0].is_file()
  1062. }
  1063. /// Returns true if and only if we're only searching a single thing and
  1064. /// that thing is stdin.
  1065. fn is_only_stdin(&self, paths: &[PathBuf]) -> bool {
  1066. paths == [Path::new("-")]
  1067. }
  1068. /// Returns true if and only if we should show line numbers.
  1069. fn line_number(&self, paths: &[PathBuf]) -> bool {
  1070. if self.output_kind() == OutputKind::Summary {
  1071. return false;
  1072. }
  1073. if self.is_present("no-line-number") {
  1074. return false;
  1075. }
  1076. if self.output_kind() == OutputKind::JSON {
  1077. return true;
  1078. }
  1079. // A few things can imply counting line numbers. In particular, we
  1080. // generally want to show line numbers by default when printing to a
  1081. // tty for human consumption, except for one interesting case: when
  1082. // we're only searching stdin. This makes pipelines work as expected.
  1083. (cli::is_tty_stdout() && !self.is_only_stdin(paths))
  1084. || self.is_present("line-number")
  1085. || self.is_present("column")
  1086. || self.is_present("pretty")
  1087. || self.is_present("vimgrep")
  1088. }
  1089. /// The maximum number of columns allowed on each line.
  1090. ///
  1091. /// If `0` is provided, then this returns `None`.
  1092. fn max_columns(&self) -> Result<Option<u64>> {
  1093. Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64))
  1094. }
  1095. /// Returns true if and only if a preview should be shown for lines that
  1096. /// exceed the maximum column limit.
  1097. fn max_columns_preview(&self) -> bool {
  1098. self.is_present("max-columns-preview")
  1099. }
  1100. /// The maximum number of matches permitted.
  1101. fn max_count(&self) -> Result<Option<u64>> {
  1102. Ok(self.usize_of("max-count")?.map(|n| n as u64))
  1103. }
  1104. /// Parses the max-filesize argument option into a byte count.
  1105. fn max_file_size(&self) -> Result<Option<u64>> {
  1106. self.parse_human_readable_size("max-filesize")
  1107. }
  1108. /// Returns whether we should attempt to use memory maps or not.
  1109. fn mmap_choice(&self, paths: &[PathBuf]) -> MmapChoice {
  1110. // SAFETY: Memory maps are difficult to impossible to encapsulate
  1111. // safely in a portable way that doesn't simultaneously negate some of
  1112. // the benfits of using memory maps. For ripgrep's use, we never mutate
  1113. // a memory map and generally never store the contents of memory map
  1114. // in a data structure that depends on immutability. Generally
  1115. // speaking, the worst thing that can happen is a SIGBUS (if the
  1116. // underlying file is truncated while reading it), which will cause
  1117. // ripgrep to abort. This reasoning should be treated as suspect.
  1118. let maybe = unsafe { MmapChoice::auto() };
  1119. let never = MmapChoice::never();
  1120. if self.is_present("no-mmap") {
  1121. never
  1122. } else if self.is_present("mmap") {
  1123. maybe
  1124. } else if paths.len() <= 10 && paths.iter().all(|p| p.is_file()) {
  1125. // If we're only searching a few paths and all of them are
  1126. // files, then memory maps are probably faster.
  1127. maybe
  1128. } else {
  1129. never
  1130. }
  1131. }
  1132. /// Returns true if ignore files should be ignored.
  1133. fn no_ignore(&self) -> bool {
  1134. self.is_present("no-ignore") || self.unrestricted_count() >= 1
  1135. }
  1136. /// Returns true if .ignore files should be ignored.
  1137. fn no_ignore_dot(&self) -> bool {
  1138. self.is_present("no-ignore-dot") || self.no_ignore()
  1139. }
  1140. /// Returns true if local exclude (ignore) files should be ignored.
  1141. fn no_ignore_exclude(&self) -> bool {
  1142. self.is_present("no-ignore-exclude") || self.no_ignore()
  1143. }
  1144. /// Returns true if explicitly given ignore files should be ignored.
  1145. fn no_ignore_files(&self) -> bool {
  1146. // We don't look at no-ignore here because --no-ignore is explicitly
  1147. // documented to not override --ignore-file. We could change this, but
  1148. // it would be a fairly severe breaking change.
  1149. self.is_present("no-ignore-files")
  1150. }
  1151. /// Returns true if global ignore files should be ignored.
  1152. fn no_ignore_global(&self) -> bool {
  1153. self.is_present("no-ignore-global") || self.no_ignore()
  1154. }
  1155. /// Returns true if parent ignore files should be ignored.
  1156. fn no_ignore_parent(&self) -> bool {
  1157. self.is_present("no-ignore-parent") || self.no_ignore()
  1158. }
  1159. /// Returns true if VCS ignore files should be ignored.
  1160. fn no_ignore_vcs(&self) -> bool {
  1161. self.is_present("no-ignore-vcs") || self.no_ignore()
  1162. }
  1163. /// Determine the type of output we should produce.
  1164. fn output_kind(&self) -> OutputKind {
  1165. if self.is_present("quiet") {
  1166. // While we don't technically print results (or aggregate results)
  1167. // in quiet mode, we still support the --stats flag, and those
  1168. // stats are computed by the Summary printer for now.
  1169. return OutputKind::Summary;
  1170. } else if self.is_present("json") {
  1171. return OutputKind::JSON;
  1172. }
  1173. let (count, count_matches) = self.counts();
  1174. let summary = count
  1175. || count_matches
  1176. || self.is_present("files-with-matches")
  1177. || self.is_present("files-without-match");
  1178. if summary {
  1179. OutputKind::Summary
  1180. } else {
  1181. OutputKind::Standard
  1182. }
  1183. }
  1184. /// Builds the set of glob overrides from the command line flags.
  1185. fn overrides(&self) -> Result<Override> {
  1186. let globs = self.values_of_lossy_vec("glob");
  1187. let iglobs = self.values_of_lossy_vec("iglob");
  1188. if globs.is_empty() && iglobs.is_empty() {
  1189. return Ok(Override::empty());
  1190. }
  1191. let mut builder = OverrideBuilder::new(current_dir()?);
  1192. // Make all globs case insensitive with --glob-case-insensitive.
  1193. if self.is_present("glob-case-insensitive") {
  1194. builder.case_insensitive(true).unwrap();
  1195. }
  1196. for glob in globs {
  1197. builder.add(&glob)?;
  1198. }
  1199. // This only enables case insensitivity for subsequent globs.
  1200. builder.case_insensitive(true).unwrap();
  1201. for glob in iglobs {
  1202. builder.add(&glob)?;
  1203. }
  1204. Ok(builder.build()?)
  1205. }
  1206. /// Return all file paths that ripgrep should search.
  1207. ///
  1208. /// If no paths were given, then this returns an empty list.
  1209. fn paths(&self) -> Vec<PathBuf> {
  1210. let mut paths: Vec<PathBuf> = match self.values_of_os("path") {
  1211. None => vec![],
  1212. Some(paths) => paths.map(|p| Path::new(p).to_path_buf()).collect(),
  1213. };
  1214. // If --file, --files or --regexp is given, then the first path is
  1215. // always in `pattern`.
  1216. if self.is_present("file")
  1217. || self.is_present("files")
  1218. || self.is_present("regexp")
  1219. {
  1220. if let Some(path) = self.value_of_os("pattern") {
  1221. paths.insert(0, Path::new(path).to_path_buf());
  1222. }
  1223. }
  1224. paths
  1225. }
  1226. /// Return the default path that ripgrep should search. This should only
  1227. /// be used when ripgrep is not otherwise given at least one file path
  1228. /// as a positional argument.
  1229. fn path_default(&self) -> PathBuf {
  1230. let file_is_stdin = self
  1231. .values_of_os("file")
  1232. .map_or(false, |mut files| files.any(|f| f == "-"));
  1233. let search_cwd = !cli::is_readable_stdin()
  1234. || (self.is_present("file") && file_is_stdin)
  1235. || self.is_present("files")
  1236. || self.is_present("type-list")
  1237. || self.is_present("pcre2-version");
  1238. if search_cwd {
  1239. Path::new("./").to_path_buf()
  1240. } else {
  1241. Path::new("-").to_path_buf()
  1242. }
  1243. }
  1244. /// Returns the unescaped path separator as a single byte, if one exists.
  1245. ///
  1246. /// If the provided path separator is more than a single byte, then an
  1247. /// error is returned.
  1248. fn path_separator(&self) -> Result<Option<u8>> {
  1249. let sep = match self.value_of_os("path-separator") {
  1250. None => return Ok(None),
  1251. Some(sep) => cli::unescape_os(&sep),
  1252. };
  1253. if sep.is_empty() {
  1254. Ok(None)
  1255. } else if sep.len() > 1 {
  1256. Err(From::from(format!(
  1257. "A path separator must be exactly one byte, but \
  1258. the given separator is {} bytes: {}\n\
  1259. In some shells on Windows '/' is automatically \
  1260. expanded. Use '//' instead.",
  1261. sep.len(),
  1262. cli::escape(&sep),
  1263. )))
  1264. } else {
  1265. Ok(Some(sep[0]))
  1266. }
  1267. }
  1268. /// Returns the byte that should be used to terminate paths.
  1269. ///
  1270. /// Typically, this is only set to `\x00` when the --null flag is provided,
  1271. /// and `None` otherwise.
  1272. fn path_terminator(&self) -> Option<u8> {
  1273. if self.is_present("null") {
  1274. Some(b'\x00')
  1275. } else {
  1276. None
  1277. }
  1278. }
  1279. /// Get a sequence of all available patterns from the command line.
  1280. /// This includes reading the -e/--regexp and -f/--file flags.
  1281. ///
  1282. /// Note that if -F/--fixed-strings is set, then all patterns will be
  1283. /// escaped. If -x/--line-regexp is set, then all patterns are surrounded
  1284. /// by `^...$`. Other things, such as --word-regexp, are handled by the
  1285. /// regex matcher itself.
  1286. ///
  1287. /// If any pattern is invalid UTF-8, then an error is returned.
  1288. fn patterns(&self) -> Result<Vec<String>> {
  1289. if self.is_present("files") || self.is_present("type-list") {
  1290. return Ok(vec![]);
  1291. }
  1292. let mut pats = vec![];
  1293. match self.values_of_os("regexp") {
  1294. None => {
  1295. if self.values_of_os("file").is_none() {
  1296. if let Some(os_pat) = self.value_of_os("pattern") {
  1297. pats.push(self.pattern_from_os_str(os_pat)?);
  1298. }
  1299. }
  1300. }
  1301. Some(os_pats) => {
  1302. for os_pat in os_pats {
  1303. pats.push(self.pattern_from_os_str(os_pat)?);
  1304. }
  1305. }
  1306. }
  1307. if let Some(paths) = self.values_of_os("file") {
  1308. for path in paths {
  1309. if path == "-" {
  1310. pats.extend(
  1311. cli::patterns_from_stdin()?
  1312. .into_iter()
  1313. .map(|p| self.pattern_from_string(p)),
  1314. );
  1315. } else {
  1316. pats.extend(
  1317. cli::patterns_from_path(path)?
  1318. .into_iter()
  1319. .map(|p| self.pattern_from_string(p)),
  1320. );
  1321. }
  1322. }
  1323. }
  1324. Ok(pats)
  1325. }
  1326. /// Returns a pattern that is guaranteed to produce an empty regular
  1327. /// expression that is valid in any position.
  1328. fn pattern_empty(&self) -> String {
  1329. // This would normally just be an empty string, which works on its
  1330. // own, but if the patterns are joined in a set of alternations, then
  1331. // you wind up with `foo|`, which is currently invalid in Rust's regex
  1332. // engine.
  1333. "(?:z{0})*".to_string()
  1334. }
  1335. /// Converts an OsStr pattern to a String pattern. The pattern is escaped
  1336. /// if -F/--fixed-strings is set.
  1337. ///
  1338. /// If the pattern is not valid UTF-8, then an error is returned.
  1339. fn pattern_from_os_str(&self, pat: &OsStr) -> Result<String> {
  1340. let s = cli::pattern_from_os(pat)?;
  1341. Ok(self.pattern_from_str(s))
  1342. }
  1343. /// Converts a &str pattern to a String pattern. The pattern is escaped
  1344. /// if -F/--fixed-strings is set.
  1345. fn pattern_from_str(&self, pat: &str) -> String {
  1346. self.pattern_from_string(pat.to_string())
  1347. }
  1348. /// Applies additional processing on the given pattern if necessary
  1349. /// (such as escaping meta characters or turning it into a line regex).
  1350. fn pattern_from_string(&self, pat: String) -> String {
  1351. let pat = self.pattern_line(self.pattern_literal(pat));
  1352. if pat.is_empty() {
  1353. self.pattern_empty()
  1354. } else {
  1355. pat
  1356. }
  1357. }
  1358. /// Returns the given pattern as a line pattern if the -x/--line-regexp
  1359. /// flag is set. Otherwise, the pattern is returned unchanged.
  1360. fn pattern_line(&self, pat: String) -> String {
  1361. if self.is_present("line-regexp") {
  1362. format!(r"^(?:{})$", pat)
  1363. } else {
  1364. pat
  1365. }
  1366. }
  1367. /// Returns the given pattern as a literal pattern if the
  1368. /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned
  1369. /// unchanged.
  1370. fn pattern_literal(&self, pat: String) -> String {
  1371. if self.is_present("fixed-strings") {
  1372. regex::escape(&pat)
  1373. } else {
  1374. pat
  1375. }
  1376. }
  1377. /// Returns the preprocessor command if one was specified.
  1378. fn preprocessor(&self) -> Option<PathBuf> {
  1379. let path = match self.value_of_os("pre") {
  1380. None => return None,
  1381. Some(path) => path,
  1382. };
  1383. if path.is_empty() {
  1384. return None;
  1385. }
  1386. Some(Path::new(path).to_path_buf())
  1387. }
  1388. /// Builds the set of globs for filtering files to apply to the --pre
  1389. /// flag. If no --pre-globs are available, then this always returns an
  1390. /// empty set of globs.
  1391. fn preprocessor_globs(&self) -> Result<Override> {
  1392. let globs = self.values_of_lossy_vec("pre-glob");
  1393. if globs.is_empty() {
  1394. return Ok(Override::empty());
  1395. }
  1396. let mut builder = OverrideBuilder::new(current_dir()?);
  1397. for glob in globs {
  1398. builder.add(&glob)?;
  1399. }
  1400. Ok(builder.build()?)
  1401. }
  1402. /// Parse the regex-size-limit argument option into a byte count.
  1403. fn regex_size_limit(&self) -> Result<Option<usize>> {
  1404. let r = self.parse_human_readable_size("regex-size-limit")?;
  1405. u64_to_usize("regex-size-limit", r)
  1406. }
  1407. /// Returns the replacement string as UTF-8 bytes if it exists.
  1408. fn replacement(&self) -> Option<Vec<u8>> {
  1409. self.value_of_lossy("replace").map(|s| s.into_bytes())
  1410. }
  1411. /// Returns the sorting criteria based on command line parameters.
  1412. fn sort_by(&self) -> Result<SortBy> {
  1413. // For backcompat, continue supporting deprecated --sort-files flag.
  1414. if self.is_present("sort-files") {
  1415. return Ok(SortBy::asc(SortByKind::Path));
  1416. }
  1417. let sortby = match self.value_of_lossy("sort") {
  1418. None => match self.value_of_lossy("sortr") {
  1419. None => return Ok(SortBy::none()),
  1420. Some(choice) => SortBy::desc(SortByKind::new(&choice)),
  1421. },
  1422. Some(choice) => SortBy::asc(SortByKind::new(&choice)),
  1423. };
  1424. Ok(sortby)
  1425. }
  1426. /// Returns true if and only if aggregate statistics for a search should
  1427. /// be tracked.
  1428. ///
  1429. /// Generally, this is only enabled when explicitly requested by in the
  1430. /// command line arguments via the --stats flag, but this can also be
  1431. /// enabled implicity via the output format, e.g., for JSON Lines.
  1432. fn stats(&self) -> bool {
  1433. self.output_kind() == OutputKind::JSON || self.is_present("stats")
  1434. }
  1435. /// When the output format is `Summary`, this returns the type of summary
  1436. /// output to show.
  1437. ///
  1438. /// This returns `None` if the output format is not `Summary`.
  1439. fn summary_kind(&self) -> Option<SummaryKind> {
  1440. let (count, count_matches) = self.counts();
  1441. if self.is_present("quiet") {
  1442. Some(SummaryKind::Quiet)
  1443. } else if count_matches {
  1444. Some(SummaryKind::CountMatches)
  1445. } else if count {
  1446. Some(SummaryKind::Count)
  1447. } else if self.is_present("files-with-matches") {
  1448. Some(SummaryKind::PathWithMatch)
  1449. } else if self.is_present("files-without-match") {
  1450. Some(SummaryKind::PathWithoutMatch)
  1451. } else {
  1452. None
  1453. }
  1454. }
  1455. /// Return the number of threads that should be used for parallelism.
  1456. fn threads(&self) -> Result<usize> {
  1457. if self.sort_by()?.kind != SortByKind::None {
  1458. return Ok(1);
  1459. }
  1460. let threads = self.usize_of("threads")?.unwrap_or(0);
  1461. Ok(if threads == 0 { cmp::min(12, num_cpus::get()) } else { threads })
  1462. }
  1463. /// Builds a file type matcher from the command line flags.
  1464. fn types(&self) -> Result<Types> {
  1465. let mut builder = TypesBuilder::new();
  1466. builder.add_defaults();
  1467. for ty in self.values_of_lossy_vec("type-clear") {
  1468. builder.clear(&ty);
  1469. }
  1470. for def in self.values_of_lossy_vec("type-add") {
  1471. builder.add_def(&def)?;
  1472. }
  1473. for ty in self.values_of_lossy_vec("type") {
  1474. builder.select(&ty);
  1475. }
  1476. for ty in self.values_of_lossy_vec("type-not") {
  1477. builder.negate(&ty);
  1478. }
  1479. builder.build().map_err(From::from)
  1480. }
  1481. /// Returns the number of times the `unrestricted` flag is provided.
  1482. fn unrestricted_count(&self) -> u64 {
  1483. self.occurrences_of("unrestricted")
  1484. }
  1485. /// Returns true if and only if Unicode mode should be enabled.
  1486. fn unicode(&self) -> bool {
  1487. // Unicode mode is enabled by default, so only disable it when
  1488. // --no-unicode is given explicitly.
  1489. !(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode"))
  1490. }
  1491. /// Returns true if and only if PCRE2 is enabled and its Unicode mode is
  1492. /// enabled.
  1493. fn pcre2_unicode(&self) -> bool {
  1494. self.is_present("pcre2") && self.unicode()
  1495. }
  1496. /// Returns true if and only if file names containing each match should
  1497. /// be emitted.
  1498. fn with_filename(&self, paths: &[PathBuf]) -> bool {
  1499. if self.is_present("no-filename") {
  1500. false
  1501. } else {
  1502. let path_stdin = Path::new("-");
  1503. self.is_present("with-filename")
  1504. || self.is_present("vimgrep")
  1505. || paths.len() > 1
  1506. || paths
  1507. .get(0)
  1508. .map_or(false, |p| p != path_stdin && p.is_dir())
  1509. }
  1510. }
  1511. }
  1512. /// Lower level generic helper methods for teasing values out of clap.
  1513. impl ArgMatches {
  1514. /// Like values_of_lossy, but returns an empty vec if the flag is not
  1515. /// present.
  1516. fn values_of_lossy_vec(&self, name: &str) -> Vec<String> {
  1517. self.values_of_lossy(name).unwrap_or_else(Vec::new)
  1518. }
  1519. /// Safely reads an arg value with the given name, and if it's present,
  1520. /// tries to parse it as a usize value.
  1521. ///
  1522. /// If the number is zero, then it is considered absent and `None` is
  1523. /// returned.
  1524. fn usize_of_nonzero(&self, name: &str) -> Result<Option<usize>> {
  1525. let n = match self.usize_of(name)? {
  1526. None => return Ok(None),
  1527. Some(n) => n,
  1528. };
  1529. Ok(if n == 0 { None } else { Some(n) })
  1530. }
  1531. /// Safely reads an arg value with the given name, and if it's present,
  1532. /// tries to parse it as a usize value.
  1533. fn usize_of(&self, name: &str) -> Result<Option<usize>> {
  1534. match self.value_of_lossy(name) {
  1535. None => Ok(None),
  1536. Some(v) => v.parse().map(Some).map_err(From::from),
  1537. }
  1538. }
  1539. /// Parses an argument of the form `[0-9]+(KMG)?`.
  1540. ///
  1541. /// If the aforementioned format is not recognized, then this returns an
  1542. /// error.
  1543. fn parse_human_readable_size(
  1544. &self,
  1545. arg_name: &str,
  1546. ) -> Result<Option<u64>> {
  1547. let size = match self.value_of_lossy(arg_name) {
  1548. None => return Ok(None),
  1549. Some(size) => size,
  1550. };
  1551. Ok(Some(cli::parse_human_readable_size(&size)?))
  1552. }
  1553. }
  1554. /// The following methods mostly dispatch to the underlying clap methods
  1555. /// directly. Methods that would otherwise get a single value will fetch all
  1556. /// values and return the last one. (Clap returns the first one.) We only
  1557. /// define the ones we need.
  1558. impl ArgMatches {
  1559. fn is_present(&self, name: &str) -> bool {
  1560. self.0.is_present(name)
  1561. }
  1562. fn occurrences_of(&self, name: &str) -> u64 {
  1563. self.0.occurrences_of(name)
  1564. }
  1565. fn value_of_lossy(&self, name: &str) -> Option<String> {
  1566. self.0.value_of_lossy(name).map(|s| s.into_owned())
  1567. }
  1568. fn values_of_lossy(&self, name: &str) -> Option<Vec<String>> {
  1569. self.0.values_of_lossy(name)
  1570. }
  1571. fn value_of_os(&self, name: &str) -> Option<&OsStr> {
  1572. self.0.value_of_os(name)
  1573. }
  1574. fn values_of_os(&self, name: &str) -> Option<clap::OsValues> {
  1575. self.0.values_of_os(name)
  1576. }
  1577. }
  1578. /// Inspect an error resulting from building a Rust regex matcher, and if it's
  1579. /// believed to correspond to a syntax error that another engine could handle,
  1580. /// then add a message to suggest the use of the engine flag.
  1581. fn suggest(msg: String) -> String {
  1582. if let Some(pcre_msg) = suggest_pcre2(&msg) {
  1583. return pcre_msg;
  1584. }
  1585. msg
  1586. }
  1587. /// Inspect an error resulting from building a Rust regex matcher, and if it's
  1588. /// believed to correspond to a syntax error that PCRE2 could handle, then
  1589. /// add a message to suggest the use of -P/--pcre2.
  1590. fn suggest_pcre2(msg: &str) -> Option<String> {
  1591. #[cfg(feature = "pcre2")]
  1592. fn suggest(msg: &str) -> Option<String> {
  1593. if !msg.contains("backreferences") && !msg.contains("look-around") {
  1594. None
  1595. } else {
  1596. Some(format!(
  1597. "{}
  1598. Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences
  1599. and look-around.",
  1600. msg
  1601. ))
  1602. }
  1603. }
  1604. #[cfg(not(feature = "pcre2"))]
  1605. fn suggest(_: &str) -> Option<String> {
  1606. None
  1607. }
  1608. suggest(msg)
  1609. }
  1610. fn suggest_multiline(msg: String) -> String {
  1611. if msg.contains("the literal") && msg.contains("not allowed") {
  1612. format!(
  1613. "{}
  1614. Consider enabling multiline mode with the --multiline flag (or -U for short).
  1615. When multiline mode is enabled, new line characters can be matched.",
  1616. msg
  1617. )
  1618. } else {
  1619. msg
  1620. }
  1621. }
  1622. /// Convert the result of parsing a human readable file size to a `usize`,
  1623. /// failing if the type does not fit.
  1624. fn u64_to_usize(arg_name: &str, value: Option<u64>) -> Result<Option<usize>> {
  1625. use std::usize;
  1626. let value = match value {
  1627. None => return Ok(None),
  1628. Some(value) => value,
  1629. };
  1630. if value <= usize::MAX as u64 {
  1631. Ok(Some(value as usize))
  1632. } else {
  1633. Err(From::from(format!("number too large for {}", arg_name)))
  1634. }
  1635. }
  1636. /// Builds a comparator for sorting two files according to a system time
  1637. /// extracted from the file's metadata.
  1638. ///
  1639. /// If there was a problem extracting the metadata or if the time is not
  1640. /// available, then both entries compare equal.
  1641. fn sort_by_metadata_time<G>(
  1642. p1: &Path,
  1643. p2: &Path,
  1644. reverse: bool,
  1645. get_time: G,
  1646. ) -> cmp::Ordering
  1647. where
  1648. G: Fn(&fs::Metadata) -> io::Result<SystemTime>,
  1649. {
  1650. let t1 = match p1.metadata().and_then(|md| get_time(&md)) {
  1651. Ok(t) => t,
  1652. Err(_) => return cmp::Ordering::Equal,
  1653. };
  1654. let t2 = match p2.metadata().and_then(|md| get_time(&md)) {
  1655. Ok(t) => t,
  1656. Err(_) => return cmp::Ordering::Equal,
  1657. };
  1658. if reverse {
  1659. t1.cmp(&t2).reverse()
  1660. } else {
  1661. t1.cmp(&t2)
  1662. }
  1663. }
  1664. /// Returns a clap matches object if the given arguments parse successfully.
  1665. ///
  1666. /// Otherwise, if an error occurred, then it is returned unless the error
  1667. /// corresponds to a `--help` or `--version` request. In which case, the
  1668. /// corresponding output is printed and the current process is exited
  1669. /// successfully.
  1670. fn clap_matches<I, T>(args: I) -> Result<clap::ArgMatches<'static>>
  1671. where
  1672. I: IntoIterator<Item = T>,
  1673. T: Into<OsString> + Clone,
  1674. {
  1675. let err = match app::app().get_matches_from_safe(args) {
  1676. Ok(matches) => return Ok(matches),
  1677. Err(err) => err,
  1678. };
  1679. if err.use_stderr() {
  1680. return Err(err.into());
  1681. }
  1682. // Explicitly ignore any error returned by write!. The most likely error
  1683. // at this point is a broken pipe error, in which case, we want to ignore
  1684. // it and exit quietly.
  1685. //
  1686. // (This is the point of this helper function. clap's functionality for
  1687. // doing this will panic on a broken pipe error.)
  1688. let _ = write!(io::stdout(), "{}", err);
  1689. process::exit(0);
  1690. }
  1691. /// Attempts to discover the current working directory. This mostly just defers
  1692. /// to the standard library, however, such things will fail if ripgrep is in
  1693. /// a directory that no longer exists. We attempt some fallback mechanisms,
  1694. /// such as querying the PWD environment variable, but otherwise return an
  1695. /// error.
  1696. fn current_dir() -> Result<PathBuf> {
  1697. let err = match env::current_dir() {
  1698. Err(err) => err,
  1699. Ok(cwd) => return Ok(cwd),
  1700. };
  1701. if let Some(cwd) = env::var_os("PWD") {
  1702. if !cwd.is_empty() {
  1703. return Ok(PathBuf::from(cwd));
  1704. }
  1705. }
  1706. Err(format!(
  1707. "failed to get current working directory: {} \
  1708. --- did your CWD get deleted?",
  1709. err,
  1710. )
  1711. .into())
  1712. }