PageRenderTime 66ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/tests/binary.rs

https://github.com/BurntSushi/ripgrep
Rust | 306 lines | 183 code | 35 blank | 88 comment | 0 complexity | 9bb1c9cc61b84dbed23ac52dc12b1a3d MD5 | raw file
Possible License(s): MIT, Unlicense
  1. use crate::util::{Dir, TestCommand};
  2. // This file contains a smattering of tests specifically for checking ripgrep's
  3. // handling of binary files. There's quite a bit of discussion on this in this
  4. // bug report: https://github.com/BurntSushi/ripgrep/issues/306
  5. // Our haystack is the first 500 lines of Gutenberg's copy of "A Study in
  6. // Scarlet," with a NUL byte at line 237: `abcdef\x00`.
  7. //
  8. // The position and size of the haystack is, unfortunately, significant. In
  9. // particular, the NUL byte is specifically inserted at some point *after* the
  10. // first 8192 bytes, which corresponds to the initial capacity of the buffer
  11. // that ripgrep uses to read files. (grep for DEFAULT_BUFFER_CAPACITY.) The
  12. // position of the NUL byte ensures that we can execute some search on the
  13. // initial buffer contents without ever detecting any binary data. Moreover,
  14. // when using a memory map for searching, only the first 8192 bytes are
  15. // scanned for a NUL byte, so no binary bytes are detected at all when using
  16. // a memory map (unless our query matches line 237).
  17. //
  18. // One last note: in the tests below, we use --no-mmap heavily because binary
  19. // detection with memory maps is a bit different. Namely, NUL bytes are only
  20. // searched for in the first few KB of the file and in a match. Normally, NUL
  21. // bytes are searched for everywhere.
  22. //
  23. // TODO: Add tests for binary file detection when using memory maps.
  24. const HAY: &'static [u8] = include_bytes!("./data/sherlock-nul.txt");
  25. // This tests that ripgrep prints a warning message if it finds and prints a
  26. // match in a binary file before detecting that it is a binary file. The point
  27. // here is to notify that user that the search of the file is only partially
  28. // complete.
  29. //
  30. // This applies to files that are *implicitly* searched via a recursive
  31. // directory traversal. In particular, this results in a WARNING message being
  32. // printed. We make our file "implicit" by doing a recursive search with a glob
  33. // that matches our file.
  34. rgtest!(after_match1_implicit, |dir: Dir, mut cmd: TestCommand| {
  35. dir.create_bytes("hay", HAY);
  36. cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook", "-g", "hay"]);
  37. let expected = "\
  38. hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  39. WARNING: stopped searching binary file hay after match (found \"\\u{0}\" byte around offset 9741)
  40. ";
  41. eqnice!(expected, cmd.stdout());
  42. });
  43. // Like after_match1_implicit, except we provide a file to search
  44. // explicitly. This results in identical behavior, but a different message.
  45. rgtest!(after_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
  46. dir.create_bytes("hay", HAY);
  47. cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook", "hay"]);
  48. let expected = "\
  49. 1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  50. Binary file matches (found \"\\u{0}\" byte around offset 9741)
  51. ";
  52. eqnice!(expected, cmd.stdout());
  53. });
  54. // Like after_match1_explicit, except we feed our content on stdin.
  55. rgtest!(after_match1_stdin, |_: Dir, mut cmd: TestCommand| {
  56. cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook"]);
  57. let expected = "\
  58. 1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  59. Binary file matches (found \"\\u{0}\" byte around offset 9741)
  60. ";
  61. eqnice!(expected, cmd.pipe(HAY));
  62. });
  63. // Like after_match1_implicit, but provides the --binary flag, which
  64. // disables binary filtering. Thus, this matches the behavior of ripgrep as
  65. // if the file were given explicitly.
  66. rgtest!(after_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
  67. dir.create_bytes("hay", HAY);
  68. cmd.args(&[
  69. "--no-mmap",
  70. "-n",
  71. "--binary",
  72. "Project Gutenberg EBook",
  73. "-g",
  74. "hay",
  75. ]);
  76. let expected = "\
  77. hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  78. Binary file hay matches (found \"\\u{0}\" byte around offset 9741)
  79. ";
  80. eqnice!(expected, cmd.stdout());
  81. });
  82. // Like after_match1_implicit, but enables -a/--text, so no binary
  83. // detection should be performed.
  84. rgtest!(after_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| {
  85. dir.create_bytes("hay", HAY);
  86. cmd.args(&[
  87. "--no-mmap",
  88. "-n",
  89. "--text",
  90. "Project Gutenberg EBook",
  91. "-g",
  92. "hay",
  93. ]);
  94. let expected = "\
  95. hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  96. ";
  97. eqnice!(expected, cmd.stdout());
  98. });
  99. // Like after_match1_implicit_text, but enables -a/--text, so no binary
  100. // detection should be performed.
  101. rgtest!(after_match1_explicit_text, |dir: Dir, mut cmd: TestCommand| {
  102. dir.create_bytes("hay", HAY);
  103. cmd.args(&["--no-mmap", "-n", "--text", "Project Gutenberg EBook", "hay"]);
  104. let expected = "\
  105. 1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  106. ";
  107. eqnice!(expected, cmd.stdout());
  108. });
  109. // Like after_match1_implicit, except this asks ripgrep to print all matching
  110. // files.
  111. //
  112. // This is an interesting corner case that one might consider a bug, however,
  113. // it's unlikely to be fixed. Namely, ripgrep probably shouldn't print `hay`
  114. // as a matching file since it is in fact a binary file, and thus should be
  115. // filtered out by default. However, the --files-with-matches flag will print
  116. // out the path of a matching file as soon as a match is seen and then stop
  117. // searching completely. Therefore, the NUL byte is never actually detected.
  118. //
  119. // The only way to fix this would be to kill ripgrep's performance in this case
  120. // and continue searching the entire file for a NUL byte. (Similarly if the
  121. // --quiet flag is set. See the next test.)
  122. rgtest!(after_match1_implicit_path, |dir: Dir, mut cmd: TestCommand| {
  123. dir.create_bytes("hay", HAY);
  124. cmd.args(&["--no-mmap", "-l", "Project Gutenberg EBook", "-g", "hay"]);
  125. eqnice!("hay\n", cmd.stdout());
  126. });
  127. // Like after_match1_implicit_path, except this indicates that a match was
  128. // found with no other output. (This is the same bug described above, but
  129. // manifest as an exit code with no output.)
  130. rgtest!(after_match1_implicit_quiet, |dir: Dir, mut cmd: TestCommand| {
  131. dir.create_bytes("hay", HAY);
  132. cmd.args(&["--no-mmap", "-q", "Project Gutenberg EBook", "-g", "hay"]);
  133. eqnice!("", cmd.stdout());
  134. });
  135. // This sets up the same test as after_match1_implicit_path, but instead of
  136. // just printing the matching files, this includes the full count of matches.
  137. // In this case, we need to search the entire file, so ripgrep correctly
  138. // detects the binary data and suppresses output.
  139. rgtest!(after_match1_implicit_count, |dir: Dir, mut cmd: TestCommand| {
  140. dir.create_bytes("hay", HAY);
  141. cmd.args(&["--no-mmap", "-c", "Project Gutenberg EBook", "-g", "hay"]);
  142. cmd.assert_err();
  143. });
  144. // Like after_match1_implicit_count, except the --binary flag is provided,
  145. // which makes ripgrep disable binary data filtering even for implicit files.
  146. rgtest!(
  147. after_match1_implicit_count_binary,
  148. |dir: Dir, mut cmd: TestCommand| {
  149. dir.create_bytes("hay", HAY);
  150. cmd.args(&[
  151. "--no-mmap",
  152. "-c",
  153. "--binary",
  154. "Project Gutenberg EBook",
  155. "-g",
  156. "hay",
  157. ]);
  158. eqnice!("hay:1\n", cmd.stdout());
  159. }
  160. );
  161. // Like after_match1_implicit_count, except the file path is provided
  162. // explicitly, so binary filtering is disabled and a count is correctly
  163. // reported.
  164. rgtest!(after_match1_explicit_count, |dir: Dir, mut cmd: TestCommand| {
  165. dir.create_bytes("hay", HAY);
  166. cmd.args(&["--no-mmap", "-c", "Project Gutenberg EBook", "hay"]);
  167. eqnice!("1\n", cmd.stdout());
  168. });
  169. // This tests that a match way before the NUL byte is shown, but a match after
  170. // the NUL byte is not.
  171. rgtest!(after_match2_implicit, |dir: Dir, mut cmd: TestCommand| {
  172. dir.create_bytes("hay", HAY);
  173. cmd.args(&[
  174. "--no-mmap",
  175. "-n",
  176. "Project Gutenberg EBook|a medical student",
  177. "-g",
  178. "hay",
  179. ]);
  180. let expected = "\
  181. hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  182. WARNING: stopped searching binary file hay after match (found \"\\u{0}\" byte around offset 9741)
  183. ";
  184. eqnice!(expected, cmd.stdout());
  185. });
  186. // Like after_match2_implicit, but enables -a/--text, so no binary
  187. // detection should be performed.
  188. rgtest!(after_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
  189. dir.create_bytes("hay", HAY);
  190. cmd.args(&[
  191. "--no-mmap",
  192. "-n",
  193. "--text",
  194. "Project Gutenberg EBook|a medical student",
  195. "-g",
  196. "hay",
  197. ]);
  198. let expected = "\
  199. hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
  200. hay:236:\"And yet you say he is not a medical student?\"
  201. ";
  202. eqnice!(expected, cmd.stdout());
  203. });
  204. // This tests that ripgrep *silently* quits before finding a match that occurs
  205. // after a NUL byte.
  206. rgtest!(before_match1_implicit, |dir: Dir, mut cmd: TestCommand| {
  207. dir.create_bytes("hay", HAY);
  208. cmd.args(&["--no-mmap", "-n", "Heaven", "-g", "hay"]);
  209. cmd.assert_err();
  210. });
  211. // This tests that ripgrep *does not* silently quit before finding a match that
  212. // occurs after a NUL byte when a file is explicitly searched.
  213. rgtest!(before_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
  214. dir.create_bytes("hay", HAY);
  215. cmd.args(&["--no-mmap", "-n", "Heaven", "hay"]);
  216. let expected = "\
  217. Binary file matches (found \"\\u{0}\" byte around offset 9741)
  218. ";
  219. eqnice!(expected, cmd.stdout());
  220. });
  221. // Like before_match1_implicit, but enables the --binary flag, which
  222. // disables binary filtering. Thus, this matches the behavior of ripgrep as if
  223. // the file were given explicitly.
  224. rgtest!(before_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
  225. dir.create_bytes("hay", HAY);
  226. cmd.args(&["--no-mmap", "-n", "--binary", "Heaven", "-g", "hay"]);
  227. let expected = "\
  228. Binary file hay matches (found \"\\u{0}\" byte around offset 9741)
  229. ";
  230. eqnice!(expected, cmd.stdout());
  231. });
  232. // Like before_match1_implicit, but enables -a/--text, so no binary
  233. // detection should be performed.
  234. rgtest!(before_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| {
  235. dir.create_bytes("hay", HAY);
  236. cmd.args(&["--no-mmap", "-n", "--text", "Heaven", "-g", "hay"]);
  237. let expected = "\
  238. hay:238:\"No. Heaven knows what the objects of his studies are. But here we
  239. ";
  240. eqnice!(expected, cmd.stdout());
  241. });
  242. // This tests that ripgrep *silently* quits before finding a match that occurs
  243. // before a NUL byte, but within the same buffer as the NUL byte.
  244. rgtest!(before_match2_implicit, |dir: Dir, mut cmd: TestCommand| {
  245. dir.create_bytes("hay", HAY);
  246. cmd.args(&["--no-mmap", "-n", "a medical student", "-g", "hay"]);
  247. cmd.assert_err();
  248. });
  249. // This tests that ripgrep *does not* silently quit before finding a match that
  250. // occurs before a NUL byte, but within the same buffer as the NUL byte. Even
  251. // though the match occurs before the NUL byte, ripgrep still doesn't print it
  252. // because it has already scanned ahead to detect the NUL byte. (This matches
  253. // the behavior of GNU grep.)
  254. rgtest!(before_match2_explicit, |dir: Dir, mut cmd: TestCommand| {
  255. dir.create_bytes("hay", HAY);
  256. cmd.args(&["--no-mmap", "-n", "a medical student", "hay"]);
  257. let expected = "\
  258. Binary file matches (found \"\\u{0}\" byte around offset 9741)
  259. ";
  260. eqnice!(expected, cmd.stdout());
  261. });
  262. // Like before_match1_implicit, but enables -a/--text, so no binary
  263. // detection should be performed.
  264. rgtest!(before_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
  265. dir.create_bytes("hay", HAY);
  266. cmd.args(&["--no-mmap", "-n", "--text", "a medical student", "-g", "hay"]);
  267. let expected = "\
  268. hay:236:\"And yet you say he is not a medical student?\"
  269. ";
  270. eqnice!(expected, cmd.stdout());
  271. });