PageRenderTime 59ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/strace-4.6/cde.c

https://github.com/gidden/CDE
C | 3782 lines | 2204 code | 613 blank | 965 comment | 525 complexity | 124eb0ee2f55ded549539ccbc1129166 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. CDE: Code, Data, and Environment packaging for Linux
  3. http://www.stanford.edu/~pgbovine/cde.html
  4. Philip Guo
  5. CDE is currently licensed under GPL v3:
  6. Copyright (c) 2010-2011 Philip Guo <pg@cs.stanford.edu>
  7. This program is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. */
  16. /* Linux system call calling conventions:
  17. According to this page:
  18. http://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-on-x86-64
  19. ... and the source code for systrace: http://www.citi.umich.edu/u/provos/systrace/
  20. 32-bit x86:
  21. syscall number: %eax
  22. first 6 syscall parameters: %ebx, %ecx, %edx, %esi, %edi, %ebp
  23. 64-bit x86-64:
  24. syscall number: %rax
  25. first 6 syscall parameters (for a 64-bit target process): %rdi, %rsi, %rdx, %rcx, %r8 and %r9
  26. first 6 syscall parameters (for a 32-bit target process): %rbx, %rcx, %rdx, %rsi, %rdi, %rbp
  27. (note how these are similar to the 32-bit syscall parameter registers)
  28. */
  29. #include "cde.h"
  30. #include "okapi.h"
  31. #include <dirent.h>
  32. // for CDE_begin_socket_bind_or_connect
  33. #include <sys/socket.h>
  34. #include <sys/un.h>
  35. #include <time.h>
  36. #include <sys/utsname.h> // for uname
  37. // TODO: eliminate this hack if it results in a compile-time error
  38. #include "config.h" // to get I386 / X86_64 definitions
  39. #if defined (I386)
  40. __asm__(".symver shmctl,shmctl@GLIBC_2.0"); // hack to eliminate glibc 2.2 dependency
  41. #endif
  42. // 1 if we are executing code in a CDE package,
  43. // 0 for tracing regular execution
  44. char CDE_exec_mode;
  45. char CDE_verbose_mode = 0; // -v option
  46. // only valid if !CDE_exec_mode
  47. char* CDE_PACKAGE_DIR = NULL;
  48. char* CDE_ROOT_DIR = NULL;
  49. char CDE_block_net_access = 0; // -n option
  50. // only relevant if CDE_exec_mode = 1
  51. char CDE_exec_streaming_mode = 0; // -s option
  52. #if defined(X86_64)
  53. // current_personality == 1 means that a 64-bit cde-exec is actually tracking a
  54. // 32-bit target process at the moment:
  55. #define IS_32BIT_EMU (current_personality == 1)
  56. #endif
  57. // Super-simple trie implementation for doing fast string matching:
  58. // adapted from my earlier IncPy project
  59. typedef struct _trie {
  60. struct _trie* children[128]; // we support ASCII characters from 0 to 127
  61. int elt_is_present; // 1 if there is an element present here
  62. } Trie;
  63. static Trie* TrieNew(void) {
  64. // VERY important to blank out the contents with a calloc()
  65. return (Trie*)calloc(1, sizeof(Trie));
  66. }
  67. /* currently unused ... but could be useful in the future
  68. static void TrieDelete(Trie* t) {
  69. // free all your children before freeing yourself
  70. unsigned char i;
  71. for (i = 0; i < 128; i++) {
  72. if (t->children[i]) {
  73. TrieDelete(t->children[i]);
  74. }
  75. }
  76. free(t);
  77. }
  78. */
  79. static void TrieInsert(Trie* t, char* ascii_string) {
  80. while (*ascii_string != '\0') {
  81. unsigned char idx = (unsigned char)*ascii_string;
  82. assert(idx < 128); // we don't support extended ASCII characters
  83. if (!t->children[idx]) {
  84. t->children[idx] = TrieNew();
  85. }
  86. t = t->children[idx];
  87. ascii_string++;
  88. }
  89. t->elt_is_present = 1;
  90. }
  91. static int TrieContains(Trie* t, char* ascii_string) {
  92. while (*ascii_string != '\0') {
  93. unsigned char idx = (unsigned char)*ascii_string;
  94. t = t->children[idx];
  95. if (!t) {
  96. return 0; // early termination, no match!
  97. }
  98. ascii_string++;
  99. }
  100. return t->elt_is_present;
  101. }
  102. // 1 if we should use the dynamic linker from within the package
  103. // (much more portable, but might be less robust since the dynamic linker
  104. // must be invoked explicitly, which leads to some weird-ass bugs)
  105. // 0 if we should attempt to use the native dynamic linker from target machine
  106. // (not portable at all since the target machine's dynamic linker must
  107. // match the libc version WITHIN the package, but potentially more
  108. // robust if the target and source machines are identically-configured)
  109. char CDE_use_linker_from_package = 1; // ON by default, -l option to turn OFF
  110. // only 1 if we are running cde-exec from OUTSIDE of a cde-root/ directory
  111. char cde_exec_from_outside_cderoot = 0;
  112. FILE* CDE_copied_files_logfile = NULL;
  113. static char cde_options_initialized = 0; // set to 1 after CDE_init_options() done
  114. static void begin_setup_shmat(struct tcb* tcp);
  115. static void* find_free_addr(int pid, int exec, unsigned long size);
  116. static char* strcpy_from_child(struct tcb* tcp, long addr);
  117. static char* strcpy_from_child_or_null(struct tcb* tcp, long addr);
  118. static int ignore_path(char* filename, struct tcb* tcp);
  119. #define SHARED_PAGE_SIZE (MAXPATHLEN * 4)
  120. static char* redirect_filename_into_cderoot(char* filename, char* child_current_pwd, struct tcb* tcp);
  121. static void memcpy_to_child(int pid, char* dst_child, char* src, int size);
  122. // the true pwd of the cde executable AT THE START of execution
  123. char cde_starting_pwd[MAXPATHLEN];
  124. // these arrays are initialized in CDE_init_options()
  125. // yeah, statically-sized arrays are dumb but easy to implement :)
  126. static char* ignore_exact_paths[100];
  127. static char* ignore_prefix_paths[100];
  128. static char* ignore_substr_paths[100];
  129. int ignore_exact_paths_ind = 0;
  130. int ignore_prefix_paths_ind = 0;
  131. int ignore_substr_paths_ind = 0;
  132. // these override their ignore path counterparts
  133. static char* redirect_exact_paths[100];
  134. static char* redirect_prefix_paths[100];
  135. static char* redirect_substr_paths[100];
  136. int redirect_exact_paths_ind = 0;
  137. int redirect_prefix_paths_ind = 0;
  138. int redirect_substr_paths_ind = 0;
  139. static char* ignore_envvars[100]; // each element should be an environment variable to ignore
  140. int ignore_envvars_ind = 0;
  141. struct PI process_ignores[50];
  142. int process_ignores_ind = 0;
  143. // the absolute path to the cde-root/ directory, since that will be
  144. // where our fake filesystem starts. e.g., if cde_starting_pwd is
  145. // /home/bob/cde-package/cde-root/home/alice/cool-experiment
  146. // then cde_pseudo_root_dir is:
  147. // /home/bob/cde-package/cde-root
  148. //
  149. // only relevant when we're executing in CDE_exec_mode
  150. char cde_pseudo_root_dir[MAXPATHLEN];
  151. // the path to where the root directory is mounted on the remote machine
  152. // (only relevant for "cde-exec -s")
  153. char* cde_remote_root_dir = NULL;
  154. // file paths that should be accessed in cde-package/cde-root/
  155. // rather than on the remote machine (only relevant for "cde-exec -s")
  156. static Trie* cached_files_trie = NULL;
  157. FILE* cached_files_fp = NULL; // save cached_files_trie on-disk as "locally-cached-files.txt"
  158. // to shut up gcc warnings without going thru #include hell
  159. extern ssize_t getline(char **lineptr, size_t *n, FILE *stream);
  160. extern char* find_ELF_program_interpreter(char * file_name); // from ../readelf-mini/libreadelf-mini.a
  161. extern void path_pop(struct path* p);
  162. static void CDE_init_options(void);
  163. static void CDE_create_convenience_scripts(char** argv, int optind);
  164. static void CDE_create_toplevel_symlink_dirs(void);
  165. static void CDE_create_path_symlink_dirs(void);
  166. static void CDE_load_environment_vars(void);
  167. // returns a component within real_pwd that represents the part within
  168. // cde_pseudo_root_dir
  169. // the return value should NOT be mutated; otherwise we might be screwed!
  170. //
  171. // (tcp argument is optional and used to pass into ignore_path)
  172. static char* extract_sandboxed_pwd(char* real_pwd, struct tcb* tcp) {
  173. assert(CDE_exec_mode);
  174. // spoof getcwd by only taking the part BELOW cde-root/
  175. // e.g., if real_pwd is:
  176. // /home/bob/cde-package/cde-root/home/alice/cool-experiment
  177. // then return:
  178. // /home/alice/cool-experiment
  179. // as cwd
  180. int cde_pseudo_root_dir_len = strlen(cde_pseudo_root_dir);
  181. char real_pwd_is_within_cde_pseudo_root_dir =
  182. ((strlen(real_pwd) >= cde_pseudo_root_dir_len) &&
  183. (strncmp(real_pwd, cde_pseudo_root_dir, cde_pseudo_root_dir_len) == 0));
  184. // if real_pwd is within a strange directory like '/tmp' that should
  185. // be ignored, AND if it resides OUTSIDE of cde_pseudo_root_dir, then
  186. // simply return itself
  187. //
  188. // e.g., if real_pwd is '/tmp', then return itself,
  189. // but if real_pwd is '/tmp/cde-package/cde-root/home/pgbovine' and
  190. // cde_pseudo_root_dir is '/tmp/cde-package/cde-root/', then
  191. // treat it like any normal path (extract '/home/pgbovine')
  192. if (ignore_path(real_pwd, tcp) && !real_pwd_is_within_cde_pseudo_root_dir) {
  193. return real_pwd;
  194. }
  195. // sanity check, make sure real_pwd is within/ cde_pseudo_root_dir,
  196. // if we're not ignoring it
  197. if (!real_pwd_is_within_cde_pseudo_root_dir) {
  198. // if we're in this mode, then we're okay!!! don't return an error!
  199. if (cde_exec_from_outside_cderoot) {
  200. return real_pwd;
  201. }
  202. else {
  203. fprintf(stderr,
  204. "Fatal error: '%s' is outside of cde-root/ and NOT being ignored.\n",
  205. real_pwd);
  206. exit(1);
  207. }
  208. }
  209. // regular action: truncate path up to and including 'cde-root/'
  210. char* sandboxed_pwd = (real_pwd + cde_pseudo_root_dir_len);
  211. // special case for '/' directory:
  212. if (strlen(sandboxed_pwd) == 0) {
  213. return (char*)"/";
  214. }
  215. else {
  216. return sandboxed_pwd;
  217. }
  218. }
  219. // prepend CDE_ROOT_DIR to the given path string, assumes that the string
  220. // starts with '/' (i.e., it's an absolute path)
  221. // (mallocs a new string)
  222. char* prepend_cderoot(char* path) {
  223. assert(IS_ABSPATH(path));
  224. return format("%s%s", CDE_ROOT_DIR, path);
  225. }
  226. // WARNING: this function behaves differently depending on value of CDE_exec_mode
  227. char* create_abspath_within_cderoot(char* path) {
  228. assert(IS_ABSPATH(path)); // Pre-req: path must be an absolute path!
  229. if (CDE_exec_mode) {
  230. // if we're making a cde-exec run, then simply re-route it
  231. // inside of cde_pseudo_root_dir
  232. /* SUPER WEIRD special case: Sometimes 'path' will ALREADY BE within
  233. cde_pseudo_root_dir, so in those cases, do NOT redirect it again.
  234. Instead, simply strdup the original path (and maybe issue a warning).
  235. This can happen if, say, the target program reads /proc/self/maps
  236. or /proc/<pid>/maps and extracts the final field in a line, which
  237. represents the filename of a file that's been mmapped into the
  238. process's address space. If we're running in cde-exec mode, then
  239. the filename extracted from the maps 'pseudo-file' is actually an
  240. absolute path WITHIN cde-root/. e.g.,:
  241. 00754000-00755000 rw-p 00165000 08:01 85299 /home/pgbovine/cde-package/cde-root/bin/foo
  242. If we try to blindly redirect this path within cde-root/ again,
  243. we'll get something nonsensical like:
  244. /home/pgbovine/cde-package/cde-root/home/pgbovine/cde-package/cde-root/bin/foo
  245. To prevent such atrocities, we just do a simple check to see if a
  246. path is already within cde-root/, and if so, then don't redirect it.
  247. */
  248. if(strncmp(path, cde_pseudo_root_dir, strlen(cde_pseudo_root_dir)) == 0) {
  249. // TODO: maybe print a warning to stderr or a log file?
  250. //fprintf(stderr, "CDE WARNING: refusing to redirect path that's within cde-root/: '%s'", path);
  251. return strdup(path);
  252. }
  253. else {
  254. if (CDE_exec_streaming_mode) {
  255. // copy file into local cde-root/ 'cache' (if necessary)
  256. // we REALLY rely on cached_files_trie for performance to avoid
  257. // unnecessary filesystem accesses
  258. if (TrieContains(cached_files_trie, path)) {
  259. // cache hit! fall-through
  260. }
  261. else {
  262. printf("Accessing remote file: '%s'\n", path);
  263. // copy from remote -> local
  264. create_mirror_file(path, cde_remote_root_dir, cde_pseudo_root_dir);
  265. // VERY IMPORTANT: add ALL paths to cached_files_trie, even
  266. // for nonexistent files, so that we can avoid trying to access
  267. // those nonexistent files on the remote machine in future
  268. // executions. Remember, ANY filesystem access we can avoid
  269. // will lead to speed-ups.
  270. TrieInsert(cached_files_trie, path);
  271. if (cached_files_fp) {
  272. fprintf(cached_files_fp, "%s\n", path);
  273. }
  274. }
  275. }
  276. // normal behavior - redirect into cde-root/
  277. return format("%s%s", cde_pseudo_root_dir, path);
  278. }
  279. }
  280. else {
  281. // if we're making an ORIGINAL (tracing) run, then simply prepend
  282. // CDE_ROOT_DIR to path and canonicalize it
  283. char* path_within_cde_root = prepend_cderoot(path);
  284. // really really tricky ;) if the child process has changed
  285. // directories, then we can't rely on path_within_cde_root to
  286. // exist. instead, we must create an ABSOLUTE path based on
  287. // cde_starting_pwd, which is the directory where cde-exec was first launched!
  288. char* ret = canonicalize_path(path_within_cde_root, cde_starting_pwd);
  289. free(path_within_cde_root);
  290. assert(IS_ABSPATH(ret));
  291. return ret;
  292. }
  293. }
  294. // original_abspath must be an absolute path
  295. // create all the corresponding 'mirror' directories within
  296. // cde-package/cde-root/, MAKING SURE TO CREATE DIRECTORY SYMLINKS
  297. // when necessary (sort of emulate "mkdir -p" functionality)
  298. // if pop_one is non-zero, then pop last element before doing "mkdir -p"
  299. static void make_mirror_dirs_in_cde_package(char* original_abspath, int pop_one) {
  300. create_mirror_dirs(original_abspath, (char*)"", CDE_ROOT_DIR, pop_one);
  301. }
  302. // does simple string comparisons on ABSOLUTE PATHS.
  303. // (tcp argument is optional and used for tcp->p_ignores)
  304. static int ignore_path(char* filename, struct tcb* tcp) {
  305. assert(cde_options_initialized);
  306. // sometimes you will get a BOGUS empty filename ... in that case,
  307. // simply ignore it (this might hide some true errors, though!!!)
  308. if (filename[0] == '\0') {
  309. return 1;
  310. }
  311. assert(IS_ABSPATH(filename));
  312. int i;
  313. // process-specific ignores take precedence over global ignores
  314. // remember, tcp is optional
  315. if (tcp && tcp->p_ignores) {
  316. if (strcmp(filename, tcp->p_ignores->process_name) == 0) {
  317. if (CDE_verbose_mode) {
  318. printf("IGNORED '%s' (process=%s)\n", filename, tcp->p_ignores->process_name);
  319. }
  320. return 1;
  321. }
  322. for (i = 0; i < tcp->p_ignores->process_ignore_prefix_paths_ind; i++) {
  323. char* p = tcp->p_ignores->process_ignore_prefix_paths[i];
  324. if (strncmp(filename, p, strlen(p)) == 0) {
  325. if (CDE_verbose_mode) {
  326. printf("IGNORED '%s' [%s] (process=%s)\n", filename, p, tcp->p_ignores->process_name);
  327. }
  328. return 1;
  329. }
  330. }
  331. }
  332. // redirect paths override ignore paths
  333. for (i = 0; i < redirect_exact_paths_ind; i++) {
  334. if (strcmp(filename, redirect_exact_paths[i]) == 0) {
  335. return 0;
  336. }
  337. }
  338. for (i = 0; i < redirect_prefix_paths_ind; i++) {
  339. char* p = redirect_prefix_paths[i];
  340. if (strncmp(filename, p, strlen(p)) == 0) {
  341. return 0;
  342. }
  343. }
  344. for (i = 0; i < redirect_substr_paths_ind; i++) {
  345. if (strstr(filename, redirect_substr_paths[i])) {
  346. return 0;
  347. }
  348. }
  349. for (i = 0; i < ignore_exact_paths_ind; i++) {
  350. if (strcmp(filename, ignore_exact_paths[i]) == 0) {
  351. return 1;
  352. }
  353. }
  354. for (i = 0; i < ignore_prefix_paths_ind; i++) {
  355. char* p = ignore_prefix_paths[i];
  356. if (strncmp(filename, p, strlen(p)) == 0) {
  357. return 1;
  358. }
  359. }
  360. for (i = 0; i < ignore_substr_paths_ind; i++) {
  361. if (strstr(filename, ignore_substr_paths[i])) {
  362. return 1;
  363. }
  364. }
  365. if (cde_exec_from_outside_cderoot) {
  366. // if we're running cde-exec from OUTSIDE of cde-root/, then adopt a
  367. // 'Union FS' like policy where if a version of the file exists
  368. // within cde-package/cde-root/, then use it (return 0 to NOT
  369. // ignore), otherwise try using the version in the real system
  370. // directory (return 1 to ignore)
  371. struct stat tmp_statbuf;
  372. char* redirected_filename = create_abspath_within_cderoot(filename);
  373. if (stat(redirected_filename, &tmp_statbuf) == 0) {
  374. free(redirected_filename);
  375. return 0;
  376. }
  377. else {
  378. free(redirected_filename);
  379. return 1;
  380. }
  381. }
  382. else {
  383. // do NOT ignore by default. if you want to ignore everything except
  384. // for what's explicitly specified by 'redirect' directives, then
  385. // use an option like "ignore_prefix=/" (to ignore everything) and
  386. // then add redirect_prefix= and redirect_exact= directives accordingly
  387. return 0;
  388. }
  389. }
  390. // copies a file into its respective location within cde-root/,
  391. // creating all necessary intermediate sub-directories and symlinks
  392. //
  393. // if filename is a symlink, then copy both it AND its target into cde-root
  394. static void copy_file_into_cde_root(char* filename, char* child_current_pwd) {
  395. assert(filename);
  396. assert(!CDE_exec_mode);
  397. // resolve absolute path relative to child_current_pwd and
  398. // get rid of '..', '.', and other weird symbols
  399. char* filename_abspath = canonicalize_path(filename, child_current_pwd);
  400. // don't copy filename that we're ignoring (remember to use ABSOLUTE PATH)
  401. if (ignore_path(filename_abspath, NULL)) {
  402. free(filename_abspath);
  403. return;
  404. }
  405. if (CDE_copied_files_logfile) {
  406. fprintf(CDE_copied_files_logfile, "%s\n", filename_abspath);
  407. }
  408. create_mirror_file(filename_abspath, (char*)"", CDE_ROOT_DIR);
  409. free(filename_abspath);
  410. }
  411. extern int isascii(int c);
  412. extern int isprint(int c);
  413. extern int isspace(int c);
  414. #define STRING_ISGRAPHIC(c) ( ((c) == '\t' || (isascii (c) && isprint (c))) )
  415. // modify a single argument to the given system call
  416. // to a path within cde-root/, if applicable
  417. //
  418. // arg_num == 1 mean modify first register arg
  419. // arg_num == 2 mean modify second register arg
  420. static void modify_syscall_single_arg(struct tcb* tcp, int arg_num, char* filename) {
  421. assert(CDE_exec_mode);
  422. assert(filename);
  423. char* redirected_filename =
  424. redirect_filename_into_cderoot(filename, tcp->current_dir, tcp);
  425. if (!redirected_filename) {
  426. return;
  427. }
  428. if (!tcp->childshm) {
  429. begin_setup_shmat(tcp);
  430. // no more need for filename, so don't leak it
  431. free(redirected_filename);
  432. return; // MUST punt early here!!!
  433. }
  434. // redirect all requests for absolute paths to version within cde-root/
  435. // if those files exist!
  436. strcpy(tcp->localshm, redirected_filename); // hopefully this doesn't overflow :0
  437. //printf(" redirect %s\n", tcp->localshm);
  438. //static char tmp[MAXPATHLEN];
  439. //EXITIF(umovestr(tcp, (long)tcp->childshm, sizeof tmp, tmp) < 0);
  440. //printf(" %s\n", tmp);
  441. struct user_regs_struct cur_regs;
  442. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  443. if (arg_num == 1) {
  444. #if defined (I386)
  445. cur_regs.ebx = (long)tcp->childshm;
  446. #elif defined(X86_64)
  447. if (IS_32BIT_EMU) {
  448. cur_regs.rbx = (long)tcp->childshm;
  449. }
  450. else {
  451. cur_regs.rdi = (long)tcp->childshm;
  452. }
  453. #else
  454. #error "Unknown architecture (not I386 or X86_64)"
  455. #endif
  456. }
  457. else {
  458. assert(arg_num == 2);
  459. #if defined (I386)
  460. cur_regs.ecx = (long)tcp->childshm;
  461. #elif defined(X86_64)
  462. if (IS_32BIT_EMU) {
  463. cur_regs.rcx = (long)tcp->childshm;
  464. }
  465. else {
  466. cur_regs.rsi = (long)tcp->childshm;
  467. }
  468. #else
  469. #error "Unknown architecture (not I386 or X86_64)"
  470. #endif
  471. }
  472. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  473. free(redirected_filename);
  474. }
  475. // copy and paste from modify_syscall_first_arg ;)
  476. static void modify_syscall_two_args(struct tcb* tcp) {
  477. assert(CDE_exec_mode);
  478. if (!tcp->childshm) {
  479. begin_setup_shmat(tcp);
  480. return; // MUST punt early here!!!
  481. }
  482. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
  483. char* redirected_filename1 =
  484. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  485. free(filename1);
  486. char* filename2 = strcpy_from_child(tcp, tcp->u_arg[1]);
  487. char* redirected_filename2 =
  488. redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
  489. free(filename2);
  490. // gotta do both, yuck
  491. if (redirected_filename1 && redirected_filename2) {
  492. strcpy(tcp->localshm, redirected_filename1);
  493. int len1 = strlen(redirected_filename1);
  494. char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
  495. strcpy(redirect_file2_begin, redirected_filename2);
  496. struct user_regs_struct cur_regs;
  497. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  498. #if defined (I386)
  499. cur_regs.ebx = (long)tcp->childshm;
  500. cur_regs.ecx = (long)(((char*)tcp->childshm) + len1 + 1);
  501. #elif defined(X86_64)
  502. if (IS_32BIT_EMU) {
  503. cur_regs.rbx = (long)tcp->childshm;
  504. cur_regs.rcx = (long)(((char*)tcp->childshm) + len1 + 1);
  505. }
  506. else {
  507. cur_regs.rdi = (long)tcp->childshm;
  508. cur_regs.rsi = (long)(((char*)tcp->childshm) + len1 + 1);
  509. }
  510. #else
  511. #error "Unknown architecture (not I386 or X86_64)"
  512. #endif
  513. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  514. //static char tmp[MAXPATHLEN];
  515. //EXITIF(umovestr(tcp, (long)cur_regs.ebx, sizeof tmp, tmp) < 0);
  516. //printf(" ebx: %s\n", tmp);
  517. //EXITIF(umovestr(tcp, (long)cur_regs.ecx, sizeof tmp, tmp) < 0);
  518. //printf(" ecx: %s\n", tmp);
  519. }
  520. else if (redirected_filename1) {
  521. strcpy(tcp->localshm, redirected_filename1);
  522. struct user_regs_struct cur_regs;
  523. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  524. #if defined (I386)
  525. cur_regs.ebx = (long)tcp->childshm; // only set EBX
  526. #elif defined(X86_64)
  527. if (IS_32BIT_EMU) {
  528. cur_regs.rbx = (long)tcp->childshm;
  529. }
  530. else {
  531. cur_regs.rdi = (long)tcp->childshm;
  532. }
  533. #else
  534. #error "Unknown architecture (not I386 or X86_64)"
  535. #endif
  536. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  537. }
  538. else if (redirected_filename2) {
  539. strcpy(tcp->localshm, redirected_filename2);
  540. struct user_regs_struct cur_regs;
  541. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  542. #if defined (I386)
  543. cur_regs.ecx = (long)tcp->childshm; // only set ECX
  544. #elif defined(X86_64)
  545. if (IS_32BIT_EMU) {
  546. cur_regs.rcx = (long)tcp->childshm;
  547. }
  548. else {
  549. cur_regs.rsi = (long)tcp->childshm;
  550. }
  551. #else
  552. #error "Unknown architecture (not I386 or X86_64)"
  553. #endif
  554. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  555. }
  556. if (redirected_filename1) free(redirected_filename1);
  557. if (redirected_filename2) free(redirected_filename2);
  558. }
  559. // modify the second and fourth args to redirect into cde-root/
  560. // really nasty copy-and-paste from modify_syscall_two_args above
  561. static void modify_syscall_second_and_fourth_args(struct tcb* tcp) {
  562. assert(CDE_exec_mode);
  563. if (!tcp->childshm) {
  564. begin_setup_shmat(tcp);
  565. return; // MUST punt early here!!!
  566. }
  567. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[1]);
  568. char* redirected_filename1 =
  569. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  570. free(filename1);
  571. char* filename2 = strcpy_from_child(tcp, tcp->u_arg[3]);
  572. char* redirected_filename2 =
  573. redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
  574. free(filename2);
  575. // gotta do both, yuck
  576. if (redirected_filename1 && redirected_filename2) {
  577. strcpy(tcp->localshm, redirected_filename1);
  578. int len1 = strlen(redirected_filename1);
  579. char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
  580. strcpy(redirect_file2_begin, redirected_filename2);
  581. struct user_regs_struct cur_regs;
  582. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  583. #if defined (I386)
  584. cur_regs.ecx = (long)tcp->childshm;
  585. cur_regs.esi = (long)(((char*)tcp->childshm) + len1 + 1);
  586. #elif defined(X86_64)
  587. if (IS_32BIT_EMU) {
  588. cur_regs.rcx = (long)tcp->childshm;
  589. cur_regs.rsi = (long)(((char*)tcp->childshm) + len1 + 1);
  590. }
  591. else {
  592. cur_regs.rsi = (long)tcp->childshm;
  593. cur_regs.rcx = (long)(((char*)tcp->childshm) + len1 + 1);
  594. }
  595. #else
  596. #error "Unknown architecture (not I386 or X86_64)"
  597. #endif
  598. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  599. }
  600. else if (redirected_filename1) {
  601. strcpy(tcp->localshm, redirected_filename1);
  602. struct user_regs_struct cur_regs;
  603. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  604. #if defined (I386)
  605. cur_regs.ecx = (long)tcp->childshm;
  606. #elif defined(X86_64)
  607. if (IS_32BIT_EMU) {
  608. cur_regs.rcx = (long)tcp->childshm;
  609. }
  610. else {
  611. cur_regs.rsi = (long)tcp->childshm;
  612. }
  613. #else
  614. #error "Unknown architecture (not I386 or X86_64)"
  615. #endif
  616. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  617. }
  618. else if (redirected_filename2) {
  619. strcpy(tcp->localshm, redirected_filename2);
  620. struct user_regs_struct cur_regs;
  621. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  622. #if defined (I386)
  623. cur_regs.esi = (long)tcp->childshm; // only set ECX
  624. #elif defined(X86_64)
  625. if (IS_32BIT_EMU) {
  626. cur_regs.rsi = (long)tcp->childshm;
  627. }
  628. else {
  629. cur_regs.rcx = (long)tcp->childshm;
  630. }
  631. #else
  632. #error "Unknown architecture (not I386 or X86_64)"
  633. #endif
  634. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  635. }
  636. if (redirected_filename1) free(redirected_filename1);
  637. if (redirected_filename2) free(redirected_filename2);
  638. }
  639. // modify the first and third args to redirect into cde-root/
  640. // really nasty copy-and-paste from modify_syscall_two_args above
  641. static void modify_syscall_first_and_third_args(struct tcb* tcp) {
  642. assert(CDE_exec_mode);
  643. if (!tcp->childshm) {
  644. begin_setup_shmat(tcp);
  645. return; // MUST punt early here!!!
  646. }
  647. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
  648. char* redirected_filename1 =
  649. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  650. free(filename1);
  651. char* filename2 = strcpy_from_child(tcp, tcp->u_arg[2]);
  652. char* redirected_filename2 =
  653. redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
  654. free(filename2);
  655. // gotta do both, yuck
  656. if (redirected_filename1 && redirected_filename2) {
  657. strcpy(tcp->localshm, redirected_filename1);
  658. int len1 = strlen(redirected_filename1);
  659. char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
  660. strcpy(redirect_file2_begin, redirected_filename2);
  661. struct user_regs_struct cur_regs;
  662. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  663. #if defined (I386)
  664. cur_regs.ebx = (long)tcp->childshm;
  665. cur_regs.edx = (long)(((char*)tcp->childshm) + len1 + 1);
  666. #elif defined(X86_64)
  667. if (IS_32BIT_EMU) {
  668. cur_regs.rbx = (long)tcp->childshm;
  669. cur_regs.rdx = (long)(((char*)tcp->childshm) + len1 + 1);
  670. }
  671. else {
  672. cur_regs.rdi = (long)tcp->childshm;
  673. cur_regs.rdx = (long)(((char*)tcp->childshm) + len1 + 1);
  674. }
  675. #else
  676. #error "Unknown architecture (not I386 or X86_64)"
  677. #endif
  678. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  679. }
  680. else if (redirected_filename1) {
  681. strcpy(tcp->localshm, redirected_filename1);
  682. struct user_regs_struct cur_regs;
  683. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  684. #if defined (I386)
  685. cur_regs.ebx = (long)tcp->childshm;
  686. #elif defined(X86_64)
  687. if (IS_32BIT_EMU) {
  688. cur_regs.rbx = (long)tcp->childshm;
  689. }
  690. else {
  691. cur_regs.rdi = (long)tcp->childshm;
  692. }
  693. #else
  694. #error "Unknown architecture (not I386 or X86_64)"
  695. #endif
  696. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  697. }
  698. else if (redirected_filename2) {
  699. strcpy(tcp->localshm, redirected_filename2);
  700. struct user_regs_struct cur_regs;
  701. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  702. #if defined (I386)
  703. cur_regs.edx = (long)tcp->childshm; // only set ECX
  704. #elif defined(X86_64)
  705. if (IS_32BIT_EMU) {
  706. cur_regs.rdx = (long)tcp->childshm;
  707. }
  708. else {
  709. cur_regs.rdx = (long)tcp->childshm;
  710. }
  711. #else
  712. #error "Unknown architecture (not I386 or X86_64)"
  713. #endif
  714. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  715. }
  716. if (redirected_filename1) free(redirected_filename1);
  717. if (redirected_filename2) free(redirected_filename2);
  718. }
  719. // create a malloc'ed filename that contains a version within cde-root/
  720. // return NULL if the filename should NOT be redirected
  721. // WARNING: behavior differs based on CDE_exec_mode!
  722. //
  723. // (tcp argument is optional and used to pass into ignore_path)
  724. static char* redirect_filename_into_cderoot(char* filename, char* child_current_pwd, struct tcb* tcp) {
  725. /* sometimes this is called with a null arg ... investigate further
  726. before making this hack permanent, though
  727. if (!filename) {
  728. return NULL;
  729. }
  730. */
  731. assert(filename);
  732. assert(child_current_pwd);
  733. char* filename_abspath = NULL;
  734. if (CDE_exec_mode) {
  735. // canonicalize_path has the desirable side effect of preventing
  736. // 'malicious' paths from going below the pseudo-root '/' ... e.g.,
  737. // if filename is '/home/pgbovine/../../../../'
  738. // then filename_abspath is simply '/'
  739. //
  740. // we resolve relative paths w.r.t.
  741. // extract_sandboxed_pwd(child_current_pwd), so that programs
  742. // can't use relative paths like '../../../' to get out of sandbox
  743. //
  744. // this is why it's VERY IMPORTANT to canonicalize before creating a
  745. // path into CDE_ROOT_DIR, so that absolute paths can't 'escape'
  746. // the sandbox
  747. filename_abspath =
  748. canonicalize_path(filename, extract_sandboxed_pwd(child_current_pwd, tcp));
  749. }
  750. else {
  751. filename_abspath = canonicalize_path(filename, child_current_pwd);
  752. }
  753. assert(filename_abspath);
  754. // don't redirect paths that we're ignoring (remember to use ABSOLUTE PATH)
  755. if (ignore_path(filename_abspath, tcp)) {
  756. free(filename_abspath);
  757. return NULL;
  758. }
  759. // WARNING: behavior of create_abspath_within_cderoot
  760. // differs based on CDE_exec_mode!
  761. char* ret = create_abspath_within_cderoot(filename_abspath);
  762. if (CDE_verbose_mode) {
  763. printf("redirect '%s' => '%s'\n", filename, ret);
  764. }
  765. free(filename_abspath);
  766. return ret;
  767. }
  768. /* standard functionality for syscalls that take a filename as first argument
  769. cde (package creation) mode:
  770. - if abspath(filename) is outside pwd, then copy it into cde-root/
  771. cde-exec mode:
  772. - if abspath(filename) is outside pwd, then redirect it into cde-root/
  773. sys_open(filename, flags, mode)
  774. sys_creat(filename, mode)
  775. sys_chmod(filename, ...)
  776. sys_chown(filename, ...)
  777. sys_chown16(filename, ...)
  778. sys_lchown(filename, ...)
  779. sys_lchown16(filename, ...)
  780. sys_stat(filename, ...)
  781. sys_stat64(filename, ...)
  782. sys_lstat(filename, ...)
  783. sys_lstat64(filename, ...)
  784. sys_truncate(path, length)
  785. sys_truncate64(path, length)
  786. sys_access(filename, mode)
  787. sys_utime(filename, ...)
  788. sys_readlink(path, ...)
  789. */
  790. void CDE_begin_standard_fileop(struct tcb* tcp, const char* syscall_name) {
  791. //char* filename = strcpy_from_child(tcp, tcp->u_arg[0]);
  792. /* Patch by Edward Wang
  793. "Attached is a patch to fix a small bug that happens when a syscall
  794. is called without any arguments (tcp->u_arg[0] is "0"). This
  795. happened to me a few times when I was trying to package a portable
  796. version of VLC media player."
  797. */
  798. char* filename = strcpy_from_child_or_null(tcp, tcp->u_arg[0]);
  799. if (filename == NULL)
  800. return;
  801. if (CDE_verbose_mode) {
  802. printf("[%d] BEGIN %s '%s'\n", tcp->pid, syscall_name, filename);
  803. }
  804. if (CDE_exec_mode) {
  805. if (filename) {
  806. modify_syscall_single_arg(tcp, 1, filename);
  807. }
  808. }
  809. else {
  810. // pre-emptively copy the given file into cde-root/, silencing warnings for
  811. // non-existent files.
  812. // (Note that filename can sometimes be a JUNKY STRING due to weird race
  813. // conditions when strace is tracing complex multi-process applications)
  814. if (filename) {
  815. copy_file_into_cde_root(filename, tcp->current_dir);
  816. }
  817. }
  818. free(filename);
  819. }
  820. /* standard functionality for *at syscalls that take a dirfd as first
  821. argument, followed by a filepath
  822. e.g., see documentation for http://linux.die.net/man/2/openat
  823. example syscalls:
  824. openat,faccessat,fstatat64,fchownat,fchmodat,futimesat,mknodat
  825. if filepath is an absolute path, or if filepath is a relative path but
  826. dirfd is AT_FDCWD, then:
  827. cde (package creation) mode:
  828. - if abspath(filepath) is outside pwd, then copy it into cde-root/
  829. exec mode:
  830. - if abspath(filepath) is outside pwd, then redirect it into cde-root/
  831. issue a warning if filepath is a relative path but dirfd is NOT AT_FDCWD
  832. */
  833. void CDE_begin_at_fileop(struct tcb* tcp, const char* syscall_name) {
  834. char* filename = strcpy_from_child(tcp, tcp->u_arg[1]);
  835. if (CDE_verbose_mode) {
  836. printf("[%d] BEGIN %s '%s' (dirfd=%u)\n", tcp->pid, syscall_name, filename, (unsigned int)tcp->u_arg[0]);
  837. }
  838. if (!IS_ABSPATH(filename) && tcp->u_arg[0] != AT_FDCWD) {
  839. fprintf(stderr,
  840. "CDE WARNING (unsupported operation): %s '%s' is a relative path and dirfd != AT_FDCWD\n",
  841. syscall_name, filename);
  842. goto done; // punt early!
  843. }
  844. if (CDE_exec_mode) {
  845. modify_syscall_single_arg(tcp, 2, filename);
  846. }
  847. else {
  848. // pre-emptively copy the given file into cde-root/, silencing warnings for
  849. // non-existent files.
  850. // (Note that filename can sometimes be a JUNKY STRING due to weird race
  851. // conditions when strace is tracing complex multi-process applications)
  852. copy_file_into_cde_root(filename, tcp->current_dir);
  853. }
  854. done:
  855. free(filename);
  856. }
  857. // input_buffer_arg_index is the index of the input filename argument
  858. // output_buffer_arg_index is the index of the argument where the output
  859. // buffer is being held (we clobber this in some special cases)
  860. static void CDE_end_readlink_internal(struct tcb* tcp, int input_buffer_arg_index, int output_buffer_arg_index) {
  861. char* filename = strcpy_from_child(tcp, tcp->u_arg[input_buffer_arg_index]);
  862. if (CDE_exec_mode) {
  863. if (tcp->u_rval >= 0) {
  864. // super hack! if the program is trying to access the special
  865. // /proc/self/exe file, return perceived_program_fullpath if
  866. // available, or else cde-exec will ERRONEOUSLY return the path
  867. // to the dynamic linker (e.g., ld-linux.so.2).
  868. //
  869. // programs like 'java' rely on the value of /proc/self/exe
  870. // being the true path to the executable, in order to dynamically
  871. // load libraries based on paths relative to that full path!
  872. char is_proc_self_exe = (strcmp(filename, "/proc/self/exe") == 0);
  873. // another super hack! programs like Google Earth
  874. // ('googleearth-bin') access /proc/self/exe as /proc/<pid>/exe
  875. // where <pid> is ITS OWN PID! be sure to handle that case properly
  876. // (but don't worry about handling cases where <pid> is the PID of
  877. // another process).
  878. //
  879. // (again, these programs use the real path of /proc/<pid>/exe as
  880. // a basis for dynamically loading libraries, so we must properly
  881. // 'fake' this value)
  882. char* self_pid_name = format("/proc/%d/exe", tcp->pid);
  883. char is_proc_self_pid_exe = (strcmp(filename, self_pid_name) == 0);
  884. free(self_pid_name);
  885. if ((is_proc_self_exe || is_proc_self_pid_exe) &&
  886. tcp->perceived_program_fullpath) {
  887. memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
  888. tcp->perceived_program_fullpath,
  889. strlen(tcp->perceived_program_fullpath) + 1);
  890. // VERY SUBTLE - set %eax (the syscall return value) to the length
  891. // of the FAKED STRING, since readlink is supposed to return the
  892. // length of the returned path (some programs like Python rely
  893. // on that length to allocated memory)
  894. struct user_regs_struct cur_regs;
  895. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  896. #if defined (I386)
  897. cur_regs.eax = (long)strlen(tcp->perceived_program_fullpath);
  898. #elif defined(X86_64)
  899. cur_regs.rax = (long)strlen(tcp->perceived_program_fullpath);
  900. #else
  901. #error "Unknown architecture (not I386 or X86_64)"
  902. #endif
  903. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  904. }
  905. // if the program tries to read /proc/self/cwd, then treat it like
  906. // a CDE_end_getcwd call, returning a fake cwd:
  907. //
  908. // (note that we don't handle /proc/<pid>/cwd yet)
  909. else if (strcmp(filename, "/proc/self/cwd") == 0) {
  910. // copied from CDE_end_getcwd
  911. char* sandboxed_pwd = extract_sandboxed_pwd(tcp->current_dir, tcp);
  912. memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
  913. sandboxed_pwd, strlen(sandboxed_pwd) + 1);
  914. // VERY SUBTLE - set %eax (the syscall return value) to the length
  915. // of the FAKED STRING, since readlink is supposed to return the
  916. // length of the returned path (some programs like Python rely
  917. // on that length to allocated memory)
  918. struct user_regs_struct cur_regs;
  919. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  920. #if defined (I386)
  921. cur_regs.eax = (long)strlen(sandboxed_pwd);
  922. #elif defined(X86_64)
  923. cur_regs.rax = (long)strlen(sandboxed_pwd);
  924. #else
  925. #error "Unknown architecture (not I386 or X86_64)"
  926. #endif
  927. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  928. }
  929. else {
  930. // inspect the return value (stored in readlink_target) and if
  931. // it's a relative path that starts with './' and contains a '//'
  932. // marker, then it MIGHT actually be a "munged" version of an
  933. // absolute path symlink that was turned into a relative path
  934. // when the original file was copied (okapi-ed) into the package.
  935. // e.g., a symlink to an absolute path like /lib/libc.so.6 might
  936. // be munged into some monstrous relative path like:
  937. //
  938. // ./../../../../..//lib/libc.so.6
  939. //
  940. // so that it can reference the version of /lib/libc.so.6 from
  941. // WITHIN THE PACKAGE rather than the native one on the target
  942. // machine. However, when the target program does a readlink(),
  943. // it expects to the syscall to return '/lib/libc.so.6', so we
  944. // must properly "un-munge" these sorts of symlinks.
  945. //
  946. // (Note that we don't have this problem with symlinks to
  947. // relative paths.)
  948. // first get the length of the return value string ...
  949. struct user_regs_struct cur_regs;
  950. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  951. #if defined (I386)
  952. int ret_length = cur_regs.eax;
  953. #elif defined(X86_64)
  954. int ret_length = cur_regs.rax;
  955. #else
  956. #error "Unknown architecture (not I386 or X86_64)"
  957. #endif
  958. char readlink_target[MAXPATHLEN];
  959. if (umoven(tcp, tcp->u_arg[output_buffer_arg_index], ret_length, readlink_target) == 0) {
  960. // remember to cap off the end ...
  961. readlink_target[ret_length] = '\0';
  962. // now readlink_target is the string that's "returned" by this
  963. // readlink syscall
  964. // is there a leading './' marker?
  965. if (strncmp(readlink_target, "./", 2) == 0) {
  966. // now check for a distinctive '//' marker, indicative of munged paths.
  967. // However, this simple check can still result in false positives!!!
  968. char* suffix = strstr(readlink_target, "//");
  969. if (suffix) {
  970. assert(suffix[0] == '/');
  971. suffix++; // skip one of the slashes
  972. assert(IS_ABSPATH(suffix));
  973. // as a final sanity check, see if this file actually exists
  974. // within cde_pseudo_root_dir, to prevent false positives
  975. char* actual_path = format("%s%s", cde_pseudo_root_dir, suffix);
  976. struct stat st;
  977. if (lstat(actual_path, &st) == 0) {
  978. // clobber the syscall's return value with 'suffix'
  979. memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
  980. suffix, strlen(suffix) + 1);
  981. // VERY SUBTLE - set %eax (the syscall return value) to the length
  982. // of the FAKED STRING, since readlink is supposed to return the
  983. // length of the returned path (some programs like Python rely
  984. // on that length to allocated memory)
  985. #if defined (I386)
  986. cur_regs.eax = (long)strlen(suffix);
  987. #elif defined(X86_64)
  988. cur_regs.rax = (long)strlen(suffix);
  989. #else
  990. #error "Unknown architecture (not I386 or X86_64)"
  991. #endif
  992. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  993. }
  994. free(actual_path);
  995. }
  996. }
  997. }
  998. }
  999. }
  1000. }
  1001. free(filename);
  1002. }
  1003. void CDE_end_readlink(struct tcb* tcp) {
  1004. // output buffer is second argument (index 1)
  1005. CDE_end_readlink_internal(tcp, 0, 1);
  1006. }
  1007. void CDE_end_readlinkat(struct tcb* tcp) {
  1008. // output buffer is third argument (index 2)
  1009. CDE_end_readlink_internal(tcp, 1, 2);
  1010. }
  1011. void CDE_begin_execve(struct tcb* tcp) {
  1012. // null all these out up-top, then deallocate them in 'done'
  1013. char* exe_filename = NULL;
  1014. char* redirected_path = NULL;
  1015. char* exe_filename_abspath = NULL;
  1016. char* script_command = NULL;
  1017. char* ld_linux_filename = NULL;
  1018. char* ld_linux_fullpath = NULL;
  1019. exe_filename = strcpy_from_child(tcp, tcp->u_arg[0]);
  1020. // only attempt to do the ld-linux.so.2 trick if exe_filename
  1021. // is a valid executable file ... otherwise don't do
  1022. // anything and simply let the execve fail just like it's supposed to
  1023. struct stat filename_stat;
  1024. // NULL out p_ignores since you might have inherited it from your parent after
  1025. // forking, but when you exec, you're probably now executing a different program
  1026. tcp->p_ignores = NULL;
  1027. if (CDE_verbose_mode) {
  1028. printf("[%d] CDE_begin_execve '%s'\n", tcp->pid, exe_filename);
  1029. }
  1030. if (CDE_exec_mode) {
  1031. // if we're purposely ignoring a path to an executable (e.g.,
  1032. // ignoring "/bin/bash" to prevent crashes on certain Ubuntu
  1033. // machines), then DO NOT use the ld-linux trick and simply
  1034. // execve the file normally
  1035. //
  1036. // (note that this check doesn't pick up the case when a textual script
  1037. // is being executed (e.g., with "#!/bin/bash" as its shebang line),
  1038. // since exe_filename is the script's name and NOT "/bin/bash".
  1039. // We will need to handle this case LATER in the function.)
  1040. char* opened_filename_abspath =
  1041. canonicalize_path(exe_filename, extract_sandboxed_pwd(tcp->current_dir, tcp));
  1042. if (ignore_path(opened_filename_abspath, tcp)) {
  1043. free(opened_filename_abspath);
  1044. goto done;
  1045. }
  1046. // check for presence in process_ignores, and if found, set
  1047. // tcp->p_ignores and punt
  1048. int i;
  1049. for (i = 0; i < process_ignores_ind; i++) {
  1050. if (strcmp(opened_filename_abspath, process_ignores[i].process_name) == 0) {
  1051. //printf("IGNORED '%s'\n", opened_filename_abspath);
  1052. tcp->p_ignores = &process_ignores[i];
  1053. free(opened_filename_abspath);
  1054. goto done; // TOTALLY PUNT!!!
  1055. }
  1056. }
  1057. free(opened_filename_abspath);
  1058. redirected_path = redirect_filename_into_cderoot(exe_filename, tcp->current_dir, tcp);
  1059. }
  1060. char* path_to_executable = NULL;
  1061. if (redirected_path) {
  1062. // TODO: we don't check whether it's a real executable file :/
  1063. if (stat(redirected_path, &filename_stat) != 0) {
  1064. goto done;
  1065. }
  1066. path_to_executable = redirected_path;
  1067. }
  1068. else {
  1069. // just check the file itself (REMEMBER TO GET ITS ABSOLUTE PATH!)
  1070. exe_filename_abspath = canonicalize_path(exe_filename, tcp->current_dir);
  1071. // TODO: we don't check whether it's a real executable file :/
  1072. if (stat(exe_filename_abspath, &filename_stat) != 0) {
  1073. goto done;
  1074. }
  1075. path_to_executable = exe_filename_abspath;
  1076. }
  1077. assert(path_to_executable);
  1078. // WARNING: ld-linux.so.2 only works on dynamically-linked binary
  1079. // executable files; it will fail if you invoke it on:
  1080. // - a textual script file
  1081. // - a statically-linked binary
  1082. //
  1083. // for a textual script file, we must invoke ld-linux.so.2 on the
  1084. // target of the shebang #! (which can itself take arguments)
  1085. //
  1086. // e.g., #! /bin/sh
  1087. // e.g., #! /usr/bin/env python
  1088. char is_textual_script = 0;
  1089. char is_elf_binary = 0;
  1090. FILE* f = fopen(path_to_executable, "rb"); // open in binary mode
  1091. assert(f);
  1092. char header[5];
  1093. memset(header, 0, sizeof(header));
  1094. fgets(header, 5, f); // 5 means 4 bytes + 1 null terminating byte
  1095. if (strcmp(header, "\177ELF") == 0) {
  1096. is_elf_binary = 1;
  1097. }
  1098. fclose(f);
  1099. if (is_elf_binary) {
  1100. // look for whether it's a statically-linked binary ...
  1101. // if so, then there is NO need to call ld-linux.so.2 on it;
  1102. // we can just execute it directly (in fact, ld-linux.so.2
  1103. // will fail on static binaries!)
  1104. // mallocs a new string if successful
  1105. // (this string is most likely "/lib/ld-linux.so.2")
  1106. ld_linux_filename = find_ELF_program_interpreter(path_to_executable);
  1107. if (!ld_linux_filename) {
  1108. // if the program interpreter isn't found, then it's a static
  1109. // binary, so let the execve call proceed normally
  1110. if (CDE_exec_mode) {
  1111. // redirect the executable's path to within $CDE_ROOT_DIR:
  1112. modify_syscall_single_arg(tcp, 1, exe_filename);
  1113. }
  1114. else {
  1115. copy_file_into_cde_root(exe_filename, tcp->current_dir);
  1116. }
  1117. // remember to EXIT EARLY!
  1118. goto done;
  1119. }
  1120. assert(IS_ABSPATH(ld_linux_filename));
  1121. }
  1122. else {
  1123. // find out whether it's a script file (starting with #! line)
  1124. FILE* f = fopen(path_to_executable, "rb"); // open in binary mode
  1125. size_t len = 0;
  1126. ssize_t read;
  1127. char* tmp = NULL; // getline() mallocs for us
  1128. read = getline(&tmp, &len, f);
  1129. if (read > 2) {
  1130. assert(tmp[read-1] == '\n'); // strip of trailing newline
  1131. tmp[read-1] = '\0'; // strip of trailing newline
  1132. if (tmp[0] == '#' && tmp[1] == '!') {
  1133. is_textual_script = 1;
  1134. script_command = strdup(&tmp[2]);
  1135. }
  1136. }
  1137. free(tmp);
  1138. /* Patch from Yang Chen
  1139. "I am packaging our tool using it. I found there is a possible
  1140. bug in cde.c where opened files were not closed. In a long run,
  1141. it could cause fopen fail. I noticed it because our toolchain has
  1142. a lot of invocations on shell scripts and hence hit this
  1143. problem.""
  1144. */
  1145. fclose(f);
  1146. if (!script_command) {
  1147. fprintf(stderr, "Fatal error: '%s' seems to be a script without a #! line.\n(cde can only execute scripts that start with a proper #! line)\n",
  1148. path_to_executable);
  1149. exit(1);
  1150. }
  1151. // now find the program interpreter for the script_command
  1152. // executable, be sure to grab the FIRST TOKEN since that's
  1153. // the actual executable name ...
  1154. // TODO: this will fail if the executable's path has a space in it
  1155. //
  1156. // mallocs a new string if successful
  1157. // (this string is most likely "/lib/ld-linux.so.2")
  1158. // libc is so dumb ... strtok() alters its argument in an un-kosher way
  1159. tmp = strdup(script_command);
  1160. char* p = strtok(tmp, " ");
  1161. // to have find_ELF_program_interpreter succeed, we might need to
  1162. // redirect the path inside CDE_ROOT_DIR:
  1163. char* script_command_filename = NULL;
  1164. if (CDE_exec_mode) {
  1165. // this path should look like the name in the #! line, just
  1166. // canonicalized to be an absolute path
  1167. char* script_command_abspath =
  1168. canonicalize_path(p, extract_sandboxed_pwd(tcp->current_dir, tcp));
  1169. if (ignore_path(script_command_abspath, tcp)) {
  1170. free(script_command_abspath);
  1171. free(tmp);
  1172. goto done; // PUNT!
  1173. }
  1174. // check for presence in process_ignores, and if found, set
  1175. // tcp->p_ignores and punt
  1176. int i;
  1177. for (i = 0; i < process_ignores_ind; i++) {
  1178. if (strcmp(script_command_abspath, process_ignores[i].process_name) == 0) {
  1179. //printf("IGNORED (script) '%s'\n", script_command_abspath);
  1180. tcp->p_ignores = &process_ignores[i];
  1181. free(script_command_abspath);
  1182. free(tmp);
  1183. goto done; // TOTALLY PUNT!!!
  1184. }
  1185. }
  1186. free(script_command_abspath);
  1187. script_command_filename = redirect_filename_into_cderoot(p, tcp->current_dir, tcp);
  1188. }
  1189. if (!script_command_filename) {
  1190. script_command_filename = strdup(p);
  1191. }
  1192. ld_linux_filename = find_ELF_program_interpreter(script_command_filename);
  1193. free(script_command_filename);
  1194. free(tmp);
  1195. if (!ld_linux_filename) {
  1196. // if the program interpreter isn't found, then it's a static
  1197. // binary, so let the execve call proceed unmodified
  1198. // TODO: is this the right thing to do here? I think we might
  1199. // need to do something better here (think harder about this case!)
  1200. if (CDE_exec_mode) {
  1201. // redirect the executable's path to within cde-root/:
  1202. modify_syscall_single_arg(tcp, 1, exe_filename);
  1203. }
  1204. goto done;
  1205. }
  1206. assert(IS_ABSPATH(ld_linux_filename));
  1207. }
  1208. assert(!(is_elf_binary && is_textual_script));
  1209. if (CDE_exec_mode) {
  1210. // set up shared memory segment if we haven't done so yet
  1211. if (!tcp->childshm) {
  1212. begin_setup_shmat(tcp);
  1213. goto done; // MUST punt early here!!!
  1214. }
  1215. ld_linux_fullpath = create_abspath_within_cderoot(ld_linux_filename);
  1216. /* we're gonna do some craziness here to redirect the OS to call
  1217. cde-root/lib/ld-linux.so.2 rather than the real program, since
  1218. ld-linux.so.2 is closely-tied with the version of libc in
  1219. cde-root/. */
  1220. if (is_textual_script) {
  1221. /* we're running a script with a shebang (#!), so
  1222. let's set up the shared memory segment (tcp->localshm) like so:
  1223. if (CDE_use_linker_from_package) {
  1224. base --> tcp->localshm : "cde-root/lib/ld-linux.so.2" (ld_linux_fullpath)
  1225. script_command token 0 : "/usr/bin/env"
  1226. script_command token 1 : "python"
  1227. ... (for as many t…

Large files files are truncated, but you can click here to view the full file