PageRenderTime 28ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/src/app/szl.cc

http://szl.googlecode.com/
C++ | 317 lines | 239 code | 41 blank | 37 comment | 50 complexity | 5099467f0f22853e0aba2c3c5e0232ad MD5 | raw file
Possible License(s): BSD-3-Clause, Unlicense
  1. // Copyright 2010 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // ------------------------------------------------------------------------
  15. // We need PRIu64, which is only defined if we explicitly ask for it.
  16. #define __STDC_FORMAT_MACROS
  17. #include <inttypes.h>
  18. #include <stdio.h>
  19. #include <algorithm>
  20. #include <string>
  21. #include <vector>
  22. #include <set>
  23. #include <list>
  24. #include <utility>
  25. #include <sys/time.h>
  26. #include <unistd.h>
  27. #include <time.h>
  28. #include <regex.h>
  29. #include <string.h>
  30. #include "config.h"
  31. #include "public/hash_set.h"
  32. #include "public/hash_map.h"
  33. #include "public/porting.h"
  34. #include "public/commandlineflags.h"
  35. #include "public/logging.h"
  36. #include "public/recordio.h"
  37. #include "utilities/strutils.h"
  38. #include "fmt/fmt.h"
  39. #include "public/szltype.h"
  40. #include "public/szlvalue.h"
  41. #include "public/sawzall.h"
  42. #include "public/emitterinterface.h"
  43. #include "public/szlemitter.h"
  44. #include "public/szltabentry.h"
  45. #include "app/szlemitterfactory.h"
  46. #include "app/printemitter.h"
  47. #include "app/szlutils.h"
  48. DEFINE_bool(V, false, "print version");
  49. // Special flag: if --program is set, next arg is .szl file.
  50. // If not, the first non-option argument is the .szl file.
  51. DEFINE_string(program, "", "sawzall source file. If the file is not found in "
  52. "the current directory, look for it in "
  53. "--szl_includepath");
  54. // szl flags
  55. DEFINE_bool(execute, true, "execute program");
  56. DEFINE_bool(skip_files, false, "skip processing of input files");
  57. DEFINE_bool(print_source, false, "print program source");
  58. DEFINE_bool(print_raw_source, false, "print raw program source");
  59. DEFINE_bool(always_print_raw_source, false, "print raw program source");
  60. DECLARE_bool(print_rewritten_source);
  61. DEFINE_bool(print_code, false, "print generated code");
  62. DEFINE_bool(trace_files, false, "trace input files");
  63. DEFINE_bool(trace_input, false, "trace input records");
  64. DEFINE_bool(use_recordio, false, "use record I/O to read input files");
  65. DEFINE_bool(ignore_undefs, false,
  66. "silently ignore undefined variables/statements");
  67. DEFINE_bool(info, false, "print Sawzall version information");
  68. DEFINE_int64(begin_record, 0, "first record to process");
  69. DEFINE_int64(end_record, -1, "first record not to process (-1 => end of file)");
  70. DEFINE_int64(num_records, -1, "number of input records to process (-1 => all)");
  71. DEFINE_string(e, "", "program snippet on command line");
  72. DEFINE_string(explain, explain_default,
  73. "print definition of a predeclared identifier");
  74. DEFINE_bool(print_html, false, "print html documentation");
  75. DEFINE_bool(print_histogram, false, "print byte code histogram for each process");
  76. DEFINE_bool(print_tables, false, "print output tables");
  77. DEFINE_bool(print_input_proto_name, false,
  78. "print the name of the protocol buffer associated with \"input\"");
  79. DEFINE_string(print_referenced_tuple_field_names, "",
  80. "print the names of the referenced fields in the specified tuple; "
  81. "use \"<input>\" to specify the input proto tuple and \"<all>\" "
  82. "to specify all named tuples");
  83. DEFINE_bool(profile, false, "print function use profile for each process");
  84. DEFINE_bool(native, true,
  85. "generate native code instead of interpreted byte code");
  86. DEFINE_string(gen_elf, "",
  87. "generate ELF file representing generated native code");
  88. DEFINE_string(table_output, "", "comma-separated list of table names or * to "
  89. "display the aggregated output for.");
  90. #ifdef OS_LINUX
  91. DEFINE_int32(memory_limit, 0,
  92. "memory limit in MB (0 is size of RAM, -1 is unlimited); memory "
  93. "manager will reclaim memory to try to stay below this limit");
  94. #endif
  95. static void TraceBinaryInput(uint64 record_number, const void* input, size_t size) {
  96. Fmt::print("%4"PRIu64". input = bytes({", record_number);
  97. for (int i = 0; i < size; i++) {
  98. if (i > 0)
  99. Fmt::print(", ");
  100. Fmt::print("%02x", static_cast<const char*>(input)[i]);
  101. }
  102. Fmt::print("}); # size = %d bytes\n", size);
  103. }
  104. static void ApplyToRecords(sawzall::Process* process, const char* file_name,
  105. uint64 begin, uint64 end) {
  106. // TODO: support sequence file input
  107. assert(false);
  108. sawzall::RecordReader* reader = sawzall::RecordReader::Open(file_name);
  109. if (reader != NULL) {
  110. uint64 record_number = 0;
  111. char* record_ptr;
  112. size_t record_size;
  113. while (record_number < end && reader->Read(&record_ptr, &record_size)) {
  114. if (begin <= record_number) {
  115. if (FLAGS_trace_input)
  116. TraceBinaryInput(record_number, record_ptr, record_size);
  117. string key = StringPrintf("%"PRIu64, record_number);
  118. process->RunOrDie(record_ptr, record_size, key.data(), key.size());
  119. }
  120. record_number++;
  121. }
  122. if (!reader->error_message().empty())
  123. fprintf(stderr, "error reading file: %s: %s\n",
  124. file_name,
  125. reader->error_message().c_str());
  126. delete reader;
  127. } else {
  128. fprintf(stderr, "can't open file: ");
  129. perror(file_name);
  130. }
  131. }
  132. static bool Execute(const char* program, const char* cmd,
  133. int argc, char* argv[], uint64 begin, uint64 end) {
  134. sawzall::Executable exe(program, cmd, ExecMode());
  135. if (FLAGS_always_print_raw_source)
  136. Fmt::print("%s\n", exe.RawSource());
  137. // do not execute if there were compilation errors
  138. if (!exe.is_executable())
  139. return false;
  140. // debugging output
  141. if (FLAGS_print_raw_source && !FLAGS_always_print_raw_source)
  142. Fmt::print("%s\n", exe.RawSource());
  143. if (FLAGS_print_rewritten_source)
  144. exe.PrintSource(); // see DoCompile for the pre-rewrite source printing
  145. if (FLAGS_print_code)
  146. exe.PrintCode();
  147. if (FLAGS_print_tables)
  148. exe.PrintTables();
  149. if (FLAGS_native && !FLAGS_gen_elf.empty()) {
  150. if (!exe.GenerateELF(FLAGS_gen_elf.c_str(), NULL, NULL, NULL)) {
  151. fprintf(stderr, "could not write elf file %s\n", FLAGS_gen_elf.c_str());
  152. return false;
  153. }
  154. }
  155. if (FLAGS_print_input_proto_name)
  156. exe.PrintInputProtoName();
  157. if (!FLAGS_print_referenced_tuple_field_names.empty())
  158. exe.PrintReferencedTupleFieldNames(FLAGS_print_referenced_tuple_field_names,
  159. true);
  160. // execute the program
  161. if (FLAGS_execute) {
  162. sawzall::Process process(&exe, NULL);
  163. #ifdef OS_LINUX
  164. process.set_memory_limit(FLAGS_memory_limit);
  165. #endif
  166. // set up print output buffer
  167. Fmt::State fmt;
  168. char buf[1024];
  169. Fmt::fmtfdinit(&fmt, 1, buf, sizeof buf);
  170. // register backend emitters for tables
  171. SzlEmitterFactory emitter_factory(&fmt, TableOutput(&process));
  172. process.set_emitter_factory(&emitter_factory);
  173. sawzall::RegisterEmitters(&process);
  174. process.InitializeOrDie();
  175. // run for each input line, if any
  176. if (argc > 0) {
  177. // we have an input file
  178. // => run the Sawzall program for all lines in each file
  179. for (int i = 0; i < argc; i++) {
  180. const char* file_name = argv[i];
  181. if (FLAGS_skip_files) {
  182. printf("%d. skipping %s\n", i, file_name);
  183. } else {
  184. if (FLAGS_trace_files)
  185. printf("%d. processing %s\n", i, file_name);
  186. if (FLAGS_use_recordio)
  187. ApplyToRecords(&process, file_name, begin, end);
  188. else
  189. ApplyToLines(&process, file_name, begin, end);
  190. }
  191. }
  192. } else {
  193. // we have no input file
  194. // => run the Sawzall program once
  195. process.RunOrDie("", 0, "", 0);
  196. }
  197. // cleanup
  198. process.Epilog(true);
  199. }
  200. return true;
  201. }
  202. int main(int argc, char* argv[]) {
  203. Fmt::quoteinstall(); // For TraceStringInput
  204. // save the current directory so we can restore it
  205. char pre_init_directory[PATH_MAX + 1];
  206. CHECK(getcwd(pre_init_directory, sizeof(pre_init_directory)) != NULL);
  207. ProcessCommandLineArguments(argc, argv);
  208. InitializeAllModules();
  209. if (FLAGS_V)
  210. fprintf(stderr, "Szl version %d.%d.%d\n",
  211. GOOGLE_SZL_VERSION/1000000, GOOGLE_SZL_VERSION/1000%1000);
  212. // check if the directory changed; if so, complain and restore it
  213. char post_init_directory[PATH_MAX + 1];
  214. CHECK(getcwd(post_init_directory, sizeof(post_init_directory)) != NULL) <<
  215. "getcwd() failed - unable to get current directory";
  216. if (strcmp(pre_init_directory, post_init_directory) != 0) {
  217. LOG(ERROR) << "Current directory was changed to \"" << post_init_directory
  218. << "\" and will be restored to \"" << pre_init_directory << "\"";
  219. CHECK_EQ(chdir(pre_init_directory), 0) <<
  220. "chdir() failed - unable to restore current directory";
  221. }
  222. // Set the job start time.
  223. timeval tv;
  224. gettimeofday(&tv, NULL);
  225. setenv("SZL_START_TIME",
  226. StringPrintf("%lld",
  227. static_cast<int64>(tv.tv_sec * 1e6 + tv.tv_usec)).c_str(),
  228. 0); // Do not override SZL_START_TIME if it's already present.
  229. sawzall::RegisterStandardTableTypes();
  230. // process some command line flags
  231. if (FLAGS_info)
  232. printf("szl using %s\n", sawzall::Version());
  233. if (FLAGS_explain != explain_default) {
  234. Explain();
  235. return 0;
  236. }
  237. if (FLAGS_print_html)
  238. sawzall::PrintHtmlDocumentation();
  239. // determine file interval
  240. uint64 begin = FLAGS_begin_record;
  241. uint64 end = FLAGS_end_record;
  242. if (FLAGS_num_records != -1) {
  243. if (FLAGS_end_record != -1) {
  244. // cannot set both flags at the same time
  245. Fmt::fprint(
  246. 2, "cannot use --end_record and --num_records at the same time\n");
  247. return 1;
  248. }
  249. end = begin + FLAGS_num_records;
  250. }
  251. // process extra argument or --e arg or --program arg as szl program
  252. --argc;
  253. ++argv; // step over argv[0]; our args start at argv[1]
  254. const char* program = FLAGS_program.c_str();
  255. const char* ecommand = FLAGS_e.c_str();
  256. if (strlen(ecommand) > 0) {
  257. if (strlen(program) > 0) {
  258. Fmt::fprint(2, "cannot use --e and --program at the same time\n");
  259. return 1;
  260. }
  261. program = "<commandline>";
  262. } else {
  263. ecommand = NULL;
  264. if (strlen(program) == 0) {
  265. if (argc < 1)
  266. return 0; // nothing to run
  267. program = *argv;
  268. --argc;
  269. ++argv;
  270. }
  271. }
  272. bool success = Execute(program, ecommand, argc, argv, begin, end);
  273. if (success)
  274. return 0;
  275. return 1;
  276. }