/strace-4.6/cde.c

https://github.com/gidden/CDE · C · 3782 lines · 2204 code · 613 blank · 965 comment · 525 complexity · 124eb0ee2f55ded549539ccbc1129166 MD5 · raw file

  1. /*
  2. CDE: Code, Data, and Environment packaging for Linux
  3. http://www.stanford.edu/~pgbovine/cde.html
  4. Philip Guo
  5. CDE is currently licensed under GPL v3:
  6. Copyright (c) 2010-2011 Philip Guo <pg@cs.stanford.edu>
  7. This program is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. */
  16. /* Linux system call calling conventions:
  17. According to this page:
  18. http://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-on-x86-64
  19. ... and the source code for systrace: http://www.citi.umich.edu/u/provos/systrace/
  20. 32-bit x86:
  21. syscall number: %eax
  22. first 6 syscall parameters: %ebx, %ecx, %edx, %esi, %edi, %ebp
  23. 64-bit x86-64:
  24. syscall number: %rax
  25. first 6 syscall parameters (for a 64-bit target process): %rdi, %rsi, %rdx, %rcx, %r8 and %r9
  26. first 6 syscall parameters (for a 32-bit target process): %rbx, %rcx, %rdx, %rsi, %rdi, %rbp
  27. (note how these are similar to the 32-bit syscall parameter registers)
  28. */
  29. #include "cde.h"
  30. #include "okapi.h"
  31. #include <dirent.h>
  32. // for CDE_begin_socket_bind_or_connect
  33. #include <sys/socket.h>
  34. #include <sys/un.h>
  35. #include <time.h>
  36. #include <sys/utsname.h> // for uname
  37. // TODO: eliminate this hack if it results in a compile-time error
  38. #include "config.h" // to get I386 / X86_64 definitions
  39. #if defined (I386)
  40. __asm__(".symver shmctl,shmctl@GLIBC_2.0"); // hack to eliminate glibc 2.2 dependency
  41. #endif
  42. // 1 if we are executing code in a CDE package,
  43. // 0 for tracing regular execution
  44. char CDE_exec_mode;
  45. char CDE_verbose_mode = 0; // -v option
  46. // only valid if !CDE_exec_mode
  47. char* CDE_PACKAGE_DIR = NULL;
  48. char* CDE_ROOT_DIR = NULL;
  49. char CDE_block_net_access = 0; // -n option
  50. // only relevant if CDE_exec_mode = 1
  51. char CDE_exec_streaming_mode = 0; // -s option
  52. #if defined(X86_64)
  53. // current_personality == 1 means that a 64-bit cde-exec is actually tracking a
  54. // 32-bit target process at the moment:
  55. #define IS_32BIT_EMU (current_personality == 1)
  56. #endif
  57. // Super-simple trie implementation for doing fast string matching:
  58. // adapted from my earlier IncPy project
  59. typedef struct _trie {
  60. struct _trie* children[128]; // we support ASCII characters from 0 to 127
  61. int elt_is_present; // 1 if there is an element present here
  62. } Trie;
  63. static Trie* TrieNew(void) {
  64. // VERY important to blank out the contents with a calloc()
  65. return (Trie*)calloc(1, sizeof(Trie));
  66. }
  67. /* currently unused ... but could be useful in the future
  68. static void TrieDelete(Trie* t) {
  69. // free all your children before freeing yourself
  70. unsigned char i;
  71. for (i = 0; i < 128; i++) {
  72. if (t->children[i]) {
  73. TrieDelete(t->children[i]);
  74. }
  75. }
  76. free(t);
  77. }
  78. */
  79. static void TrieInsert(Trie* t, char* ascii_string) {
  80. while (*ascii_string != '\0') {
  81. unsigned char idx = (unsigned char)*ascii_string;
  82. assert(idx < 128); // we don't support extended ASCII characters
  83. if (!t->children[idx]) {
  84. t->children[idx] = TrieNew();
  85. }
  86. t = t->children[idx];
  87. ascii_string++;
  88. }
  89. t->elt_is_present = 1;
  90. }
  91. static int TrieContains(Trie* t, char* ascii_string) {
  92. while (*ascii_string != '\0') {
  93. unsigned char idx = (unsigned char)*ascii_string;
  94. t = t->children[idx];
  95. if (!t) {
  96. return 0; // early termination, no match!
  97. }
  98. ascii_string++;
  99. }
  100. return t->elt_is_present;
  101. }
  102. // 1 if we should use the dynamic linker from within the package
  103. // (much more portable, but might be less robust since the dynamic linker
  104. // must be invoked explicitly, which leads to some weird-ass bugs)
  105. // 0 if we should attempt to use the native dynamic linker from target machine
  106. // (not portable at all since the target machine's dynamic linker must
  107. // match the libc version WITHIN the package, but potentially more
  108. // robust if the target and source machines are identically-configured)
  109. char CDE_use_linker_from_package = 1; // ON by default, -l option to turn OFF
  110. // only 1 if we are running cde-exec from OUTSIDE of a cde-root/ directory
  111. char cde_exec_from_outside_cderoot = 0;
  112. FILE* CDE_copied_files_logfile = NULL;
  113. static char cde_options_initialized = 0; // set to 1 after CDE_init_options() done
  114. static void begin_setup_shmat(struct tcb* tcp);
  115. static void* find_free_addr(int pid, int exec, unsigned long size);
  116. static char* strcpy_from_child(struct tcb* tcp, long addr);
  117. static char* strcpy_from_child_or_null(struct tcb* tcp, long addr);
  118. static int ignore_path(char* filename, struct tcb* tcp);
  119. #define SHARED_PAGE_SIZE (MAXPATHLEN * 4)
  120. static char* redirect_filename_into_cderoot(char* filename, char* child_current_pwd, struct tcb* tcp);
  121. static void memcpy_to_child(int pid, char* dst_child, char* src, int size);
  122. // the true pwd of the cde executable AT THE START of execution
  123. char cde_starting_pwd[MAXPATHLEN];
  124. // these arrays are initialized in CDE_init_options()
  125. // yeah, statically-sized arrays are dumb but easy to implement :)
  126. static char* ignore_exact_paths[100];
  127. static char* ignore_prefix_paths[100];
  128. static char* ignore_substr_paths[100];
  129. int ignore_exact_paths_ind = 0;
  130. int ignore_prefix_paths_ind = 0;
  131. int ignore_substr_paths_ind = 0;
  132. // these override their ignore path counterparts
  133. static char* redirect_exact_paths[100];
  134. static char* redirect_prefix_paths[100];
  135. static char* redirect_substr_paths[100];
  136. int redirect_exact_paths_ind = 0;
  137. int redirect_prefix_paths_ind = 0;
  138. int redirect_substr_paths_ind = 0;
  139. static char* ignore_envvars[100]; // each element should be an environment variable to ignore
  140. int ignore_envvars_ind = 0;
  141. struct PI process_ignores[50];
  142. int process_ignores_ind = 0;
  143. // the absolute path to the cde-root/ directory, since that will be
  144. // where our fake filesystem starts. e.g., if cde_starting_pwd is
  145. // /home/bob/cde-package/cde-root/home/alice/cool-experiment
  146. // then cde_pseudo_root_dir is:
  147. // /home/bob/cde-package/cde-root
  148. //
  149. // only relevant when we're executing in CDE_exec_mode
  150. char cde_pseudo_root_dir[MAXPATHLEN];
  151. // the path to where the root directory is mounted on the remote machine
  152. // (only relevant for "cde-exec -s")
  153. char* cde_remote_root_dir = NULL;
  154. // file paths that should be accessed in cde-package/cde-root/
  155. // rather than on the remote machine (only relevant for "cde-exec -s")
  156. static Trie* cached_files_trie = NULL;
  157. FILE* cached_files_fp = NULL; // save cached_files_trie on-disk as "locally-cached-files.txt"
  158. // to shut up gcc warnings without going thru #include hell
  159. extern ssize_t getline(char **lineptr, size_t *n, FILE *stream);
  160. extern char* find_ELF_program_interpreter(char * file_name); // from ../readelf-mini/libreadelf-mini.a
  161. extern void path_pop(struct path* p);
  162. static void CDE_init_options(void);
  163. static void CDE_create_convenience_scripts(char** argv, int optind);
  164. static void CDE_create_toplevel_symlink_dirs(void);
  165. static void CDE_create_path_symlink_dirs(void);
  166. static void CDE_load_environment_vars(void);
  167. // returns a component within real_pwd that represents the part within
  168. // cde_pseudo_root_dir
  169. // the return value should NOT be mutated; otherwise we might be screwed!
  170. //
  171. // (tcp argument is optional and used to pass into ignore_path)
  172. static char* extract_sandboxed_pwd(char* real_pwd, struct tcb* tcp) {
  173. assert(CDE_exec_mode);
  174. // spoof getcwd by only taking the part BELOW cde-root/
  175. // e.g., if real_pwd is:
  176. // /home/bob/cde-package/cde-root/home/alice/cool-experiment
  177. // then return:
  178. // /home/alice/cool-experiment
  179. // as cwd
  180. int cde_pseudo_root_dir_len = strlen(cde_pseudo_root_dir);
  181. char real_pwd_is_within_cde_pseudo_root_dir =
  182. ((strlen(real_pwd) >= cde_pseudo_root_dir_len) &&
  183. (strncmp(real_pwd, cde_pseudo_root_dir, cde_pseudo_root_dir_len) == 0));
  184. // if real_pwd is within a strange directory like '/tmp' that should
  185. // be ignored, AND if it resides OUTSIDE of cde_pseudo_root_dir, then
  186. // simply return itself
  187. //
  188. // e.g., if real_pwd is '/tmp', then return itself,
  189. // but if real_pwd is '/tmp/cde-package/cde-root/home/pgbovine' and
  190. // cde_pseudo_root_dir is '/tmp/cde-package/cde-root/', then
  191. // treat it like any normal path (extract '/home/pgbovine')
  192. if (ignore_path(real_pwd, tcp) && !real_pwd_is_within_cde_pseudo_root_dir) {
  193. return real_pwd;
  194. }
  195. // sanity check, make sure real_pwd is within/ cde_pseudo_root_dir,
  196. // if we're not ignoring it
  197. if (!real_pwd_is_within_cde_pseudo_root_dir) {
  198. // if we're in this mode, then we're okay!!! don't return an error!
  199. if (cde_exec_from_outside_cderoot) {
  200. return real_pwd;
  201. }
  202. else {
  203. fprintf(stderr,
  204. "Fatal error: '%s' is outside of cde-root/ and NOT being ignored.\n",
  205. real_pwd);
  206. exit(1);
  207. }
  208. }
  209. // regular action: truncate path up to and including 'cde-root/'
  210. char* sandboxed_pwd = (real_pwd + cde_pseudo_root_dir_len);
  211. // special case for '/' directory:
  212. if (strlen(sandboxed_pwd) == 0) {
  213. return (char*)"/";
  214. }
  215. else {
  216. return sandboxed_pwd;
  217. }
  218. }
  219. // prepend CDE_ROOT_DIR to the given path string, assumes that the string
  220. // starts with '/' (i.e., it's an absolute path)
  221. // (mallocs a new string)
  222. char* prepend_cderoot(char* path) {
  223. assert(IS_ABSPATH(path));
  224. return format("%s%s", CDE_ROOT_DIR, path);
  225. }
  226. // WARNING: this function behaves differently depending on value of CDE_exec_mode
  227. char* create_abspath_within_cderoot(char* path) {
  228. assert(IS_ABSPATH(path)); // Pre-req: path must be an absolute path!
  229. if (CDE_exec_mode) {
  230. // if we're making a cde-exec run, then simply re-route it
  231. // inside of cde_pseudo_root_dir
  232. /* SUPER WEIRD special case: Sometimes 'path' will ALREADY BE within
  233. cde_pseudo_root_dir, so in those cases, do NOT redirect it again.
  234. Instead, simply strdup the original path (and maybe issue a warning).
  235. This can happen if, say, the target program reads /proc/self/maps
  236. or /proc/<pid>/maps and extracts the final field in a line, which
  237. represents the filename of a file that's been mmapped into the
  238. process's address space. If we're running in cde-exec mode, then
  239. the filename extracted from the maps 'pseudo-file' is actually an
  240. absolute path WITHIN cde-root/. e.g.,:
  241. 00754000-00755000 rw-p 00165000 08:01 85299 /home/pgbovine/cde-package/cde-root/bin/foo
  242. If we try to blindly redirect this path within cde-root/ again,
  243. we'll get something nonsensical like:
  244. /home/pgbovine/cde-package/cde-root/home/pgbovine/cde-package/cde-root/bin/foo
  245. To prevent such atrocities, we just do a simple check to see if a
  246. path is already within cde-root/, and if so, then don't redirect it.
  247. */
  248. if(strncmp(path, cde_pseudo_root_dir, strlen(cde_pseudo_root_dir)) == 0) {
  249. // TODO: maybe print a warning to stderr or a log file?
  250. //fprintf(stderr, "CDE WARNING: refusing to redirect path that's within cde-root/: '%s'", path);
  251. return strdup(path);
  252. }
  253. else {
  254. if (CDE_exec_streaming_mode) {
  255. // copy file into local cde-root/ 'cache' (if necessary)
  256. // we REALLY rely on cached_files_trie for performance to avoid
  257. // unnecessary filesystem accesses
  258. if (TrieContains(cached_files_trie, path)) {
  259. // cache hit! fall-through
  260. }
  261. else {
  262. printf("Accessing remote file: '%s'\n", path);
  263. // copy from remote -> local
  264. create_mirror_file(path, cde_remote_root_dir, cde_pseudo_root_dir);
  265. // VERY IMPORTANT: add ALL paths to cached_files_trie, even
  266. // for nonexistent files, so that we can avoid trying to access
  267. // those nonexistent files on the remote machine in future
  268. // executions. Remember, ANY filesystem access we can avoid
  269. // will lead to speed-ups.
  270. TrieInsert(cached_files_trie, path);
  271. if (cached_files_fp) {
  272. fprintf(cached_files_fp, "%s\n", path);
  273. }
  274. }
  275. }
  276. // normal behavior - redirect into cde-root/
  277. return format("%s%s", cde_pseudo_root_dir, path);
  278. }
  279. }
  280. else {
  281. // if we're making an ORIGINAL (tracing) run, then simply prepend
  282. // CDE_ROOT_DIR to path and canonicalize it
  283. char* path_within_cde_root = prepend_cderoot(path);
  284. // really really tricky ;) if the child process has changed
  285. // directories, then we can't rely on path_within_cde_root to
  286. // exist. instead, we must create an ABSOLUTE path based on
  287. // cde_starting_pwd, which is the directory where cde-exec was first launched!
  288. char* ret = canonicalize_path(path_within_cde_root, cde_starting_pwd);
  289. free(path_within_cde_root);
  290. assert(IS_ABSPATH(ret));
  291. return ret;
  292. }
  293. }
  294. // original_abspath must be an absolute path
  295. // create all the corresponding 'mirror' directories within
  296. // cde-package/cde-root/, MAKING SURE TO CREATE DIRECTORY SYMLINKS
  297. // when necessary (sort of emulate "mkdir -p" functionality)
  298. // if pop_one is non-zero, then pop last element before doing "mkdir -p"
  299. static void make_mirror_dirs_in_cde_package(char* original_abspath, int pop_one) {
  300. create_mirror_dirs(original_abspath, (char*)"", CDE_ROOT_DIR, pop_one);
  301. }
  302. // does simple string comparisons on ABSOLUTE PATHS.
  303. // (tcp argument is optional and used for tcp->p_ignores)
  304. static int ignore_path(char* filename, struct tcb* tcp) {
  305. assert(cde_options_initialized);
  306. // sometimes you will get a BOGUS empty filename ... in that case,
  307. // simply ignore it (this might hide some true errors, though!!!)
  308. if (filename[0] == '\0') {
  309. return 1;
  310. }
  311. assert(IS_ABSPATH(filename));
  312. int i;
  313. // process-specific ignores take precedence over global ignores
  314. // remember, tcp is optional
  315. if (tcp && tcp->p_ignores) {
  316. if (strcmp(filename, tcp->p_ignores->process_name) == 0) {
  317. if (CDE_verbose_mode) {
  318. printf("IGNORED '%s' (process=%s)\n", filename, tcp->p_ignores->process_name);
  319. }
  320. return 1;
  321. }
  322. for (i = 0; i < tcp->p_ignores->process_ignore_prefix_paths_ind; i++) {
  323. char* p = tcp->p_ignores->process_ignore_prefix_paths[i];
  324. if (strncmp(filename, p, strlen(p)) == 0) {
  325. if (CDE_verbose_mode) {
  326. printf("IGNORED '%s' [%s] (process=%s)\n", filename, p, tcp->p_ignores->process_name);
  327. }
  328. return 1;
  329. }
  330. }
  331. }
  332. // redirect paths override ignore paths
  333. for (i = 0; i < redirect_exact_paths_ind; i++) {
  334. if (strcmp(filename, redirect_exact_paths[i]) == 0) {
  335. return 0;
  336. }
  337. }
  338. for (i = 0; i < redirect_prefix_paths_ind; i++) {
  339. char* p = redirect_prefix_paths[i];
  340. if (strncmp(filename, p, strlen(p)) == 0) {
  341. return 0;
  342. }
  343. }
  344. for (i = 0; i < redirect_substr_paths_ind; i++) {
  345. if (strstr(filename, redirect_substr_paths[i])) {
  346. return 0;
  347. }
  348. }
  349. for (i = 0; i < ignore_exact_paths_ind; i++) {
  350. if (strcmp(filename, ignore_exact_paths[i]) == 0) {
  351. return 1;
  352. }
  353. }
  354. for (i = 0; i < ignore_prefix_paths_ind; i++) {
  355. char* p = ignore_prefix_paths[i];
  356. if (strncmp(filename, p, strlen(p)) == 0) {
  357. return 1;
  358. }
  359. }
  360. for (i = 0; i < ignore_substr_paths_ind; i++) {
  361. if (strstr(filename, ignore_substr_paths[i])) {
  362. return 1;
  363. }
  364. }
  365. if (cde_exec_from_outside_cderoot) {
  366. // if we're running cde-exec from OUTSIDE of cde-root/, then adopt a
  367. // 'Union FS' like policy where if a version of the file exists
  368. // within cde-package/cde-root/, then use it (return 0 to NOT
  369. // ignore), otherwise try using the version in the real system
  370. // directory (return 1 to ignore)
  371. struct stat tmp_statbuf;
  372. char* redirected_filename = create_abspath_within_cderoot(filename);
  373. if (stat(redirected_filename, &tmp_statbuf) == 0) {
  374. free(redirected_filename);
  375. return 0;
  376. }
  377. else {
  378. free(redirected_filename);
  379. return 1;
  380. }
  381. }
  382. else {
  383. // do NOT ignore by default. if you want to ignore everything except
  384. // for what's explicitly specified by 'redirect' directives, then
  385. // use an option like "ignore_prefix=/" (to ignore everything) and
  386. // then add redirect_prefix= and redirect_exact= directives accordingly
  387. return 0;
  388. }
  389. }
  390. // copies a file into its respective location within cde-root/,
  391. // creating all necessary intermediate sub-directories and symlinks
  392. //
  393. // if filename is a symlink, then copy both it AND its target into cde-root
  394. static void copy_file_into_cde_root(char* filename, char* child_current_pwd) {
  395. assert(filename);
  396. assert(!CDE_exec_mode);
  397. // resolve absolute path relative to child_current_pwd and
  398. // get rid of '..', '.', and other weird symbols
  399. char* filename_abspath = canonicalize_path(filename, child_current_pwd);
  400. // don't copy filename that we're ignoring (remember to use ABSOLUTE PATH)
  401. if (ignore_path(filename_abspath, NULL)) {
  402. free(filename_abspath);
  403. return;
  404. }
  405. if (CDE_copied_files_logfile) {
  406. fprintf(CDE_copied_files_logfile, "%s\n", filename_abspath);
  407. }
  408. create_mirror_file(filename_abspath, (char*)"", CDE_ROOT_DIR);
  409. free(filename_abspath);
  410. }
  411. extern int isascii(int c);
  412. extern int isprint(int c);
  413. extern int isspace(int c);
  414. #define STRING_ISGRAPHIC(c) ( ((c) == '\t' || (isascii (c) && isprint (c))) )
  415. // modify a single argument to the given system call
  416. // to a path within cde-root/, if applicable
  417. //
  418. // arg_num == 1 mean modify first register arg
  419. // arg_num == 2 mean modify second register arg
  420. static void modify_syscall_single_arg(struct tcb* tcp, int arg_num, char* filename) {
  421. assert(CDE_exec_mode);
  422. assert(filename);
  423. char* redirected_filename =
  424. redirect_filename_into_cderoot(filename, tcp->current_dir, tcp);
  425. if (!redirected_filename) {
  426. return;
  427. }
  428. if (!tcp->childshm) {
  429. begin_setup_shmat(tcp);
  430. // no more need for filename, so don't leak it
  431. free(redirected_filename);
  432. return; // MUST punt early here!!!
  433. }
  434. // redirect all requests for absolute paths to version within cde-root/
  435. // if those files exist!
  436. strcpy(tcp->localshm, redirected_filename); // hopefully this doesn't overflow :0
  437. //printf(" redirect %s\n", tcp->localshm);
  438. //static char tmp[MAXPATHLEN];
  439. //EXITIF(umovestr(tcp, (long)tcp->childshm, sizeof tmp, tmp) < 0);
  440. //printf(" %s\n", tmp);
  441. struct user_regs_struct cur_regs;
  442. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  443. if (arg_num == 1) {
  444. #if defined (I386)
  445. cur_regs.ebx = (long)tcp->childshm;
  446. #elif defined(X86_64)
  447. if (IS_32BIT_EMU) {
  448. cur_regs.rbx = (long)tcp->childshm;
  449. }
  450. else {
  451. cur_regs.rdi = (long)tcp->childshm;
  452. }
  453. #else
  454. #error "Unknown architecture (not I386 or X86_64)"
  455. #endif
  456. }
  457. else {
  458. assert(arg_num == 2);
  459. #if defined (I386)
  460. cur_regs.ecx = (long)tcp->childshm;
  461. #elif defined(X86_64)
  462. if (IS_32BIT_EMU) {
  463. cur_regs.rcx = (long)tcp->childshm;
  464. }
  465. else {
  466. cur_regs.rsi = (long)tcp->childshm;
  467. }
  468. #else
  469. #error "Unknown architecture (not I386 or X86_64)"
  470. #endif
  471. }
  472. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  473. free(redirected_filename);
  474. }
  475. // copy and paste from modify_syscall_first_arg ;)
  476. static void modify_syscall_two_args(struct tcb* tcp) {
  477. assert(CDE_exec_mode);
  478. if (!tcp->childshm) {
  479. begin_setup_shmat(tcp);
  480. return; // MUST punt early here!!!
  481. }
  482. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
  483. char* redirected_filename1 =
  484. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  485. free(filename1);
  486. char* filename2 = strcpy_from_child(tcp, tcp->u_arg[1]);
  487. char* redirected_filename2 =
  488. redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
  489. free(filename2);
  490. // gotta do both, yuck
  491. if (redirected_filename1 && redirected_filename2) {
  492. strcpy(tcp->localshm, redirected_filename1);
  493. int len1 = strlen(redirected_filename1);
  494. char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
  495. strcpy(redirect_file2_begin, redirected_filename2);
  496. struct user_regs_struct cur_regs;
  497. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  498. #if defined (I386)
  499. cur_regs.ebx = (long)tcp->childshm;
  500. cur_regs.ecx = (long)(((char*)tcp->childshm) + len1 + 1);
  501. #elif defined(X86_64)
  502. if (IS_32BIT_EMU) {
  503. cur_regs.rbx = (long)tcp->childshm;
  504. cur_regs.rcx = (long)(((char*)tcp->childshm) + len1 + 1);
  505. }
  506. else {
  507. cur_regs.rdi = (long)tcp->childshm;
  508. cur_regs.rsi = (long)(((char*)tcp->childshm) + len1 + 1);
  509. }
  510. #else
  511. #error "Unknown architecture (not I386 or X86_64)"
  512. #endif
  513. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  514. //static char tmp[MAXPATHLEN];
  515. //EXITIF(umovestr(tcp, (long)cur_regs.ebx, sizeof tmp, tmp) < 0);
  516. //printf(" ebx: %s\n", tmp);
  517. //EXITIF(umovestr(tcp, (long)cur_regs.ecx, sizeof tmp, tmp) < 0);
  518. //printf(" ecx: %s\n", tmp);
  519. }
  520. else if (redirected_filename1) {
  521. strcpy(tcp->localshm, redirected_filename1);
  522. struct user_regs_struct cur_regs;
  523. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  524. #if defined (I386)
  525. cur_regs.ebx = (long)tcp->childshm; // only set EBX
  526. #elif defined(X86_64)
  527. if (IS_32BIT_EMU) {
  528. cur_regs.rbx = (long)tcp->childshm;
  529. }
  530. else {
  531. cur_regs.rdi = (long)tcp->childshm;
  532. }
  533. #else
  534. #error "Unknown architecture (not I386 or X86_64)"
  535. #endif
  536. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  537. }
  538. else if (redirected_filename2) {
  539. strcpy(tcp->localshm, redirected_filename2);
  540. struct user_regs_struct cur_regs;
  541. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  542. #if defined (I386)
  543. cur_regs.ecx = (long)tcp->childshm; // only set ECX
  544. #elif defined(X86_64)
  545. if (IS_32BIT_EMU) {
  546. cur_regs.rcx = (long)tcp->childshm;
  547. }
  548. else {
  549. cur_regs.rsi = (long)tcp->childshm;
  550. }
  551. #else
  552. #error "Unknown architecture (not I386 or X86_64)"
  553. #endif
  554. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  555. }
  556. if (redirected_filename1) free(redirected_filename1);
  557. if (redirected_filename2) free(redirected_filename2);
  558. }
  559. // modify the second and fourth args to redirect into cde-root/
  560. // really nasty copy-and-paste from modify_syscall_two_args above
  561. static void modify_syscall_second_and_fourth_args(struct tcb* tcp) {
  562. assert(CDE_exec_mode);
  563. if (!tcp->childshm) {
  564. begin_setup_shmat(tcp);
  565. return; // MUST punt early here!!!
  566. }
  567. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[1]);
  568. char* redirected_filename1 =
  569. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  570. free(filename1);
  571. char* filename2 = strcpy_from_child(tcp, tcp->u_arg[3]);
  572. char* redirected_filename2 =
  573. redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
  574. free(filename2);
  575. // gotta do both, yuck
  576. if (redirected_filename1 && redirected_filename2) {
  577. strcpy(tcp->localshm, redirected_filename1);
  578. int len1 = strlen(redirected_filename1);
  579. char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
  580. strcpy(redirect_file2_begin, redirected_filename2);
  581. struct user_regs_struct cur_regs;
  582. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  583. #if defined (I386)
  584. cur_regs.ecx = (long)tcp->childshm;
  585. cur_regs.esi = (long)(((char*)tcp->childshm) + len1 + 1);
  586. #elif defined(X86_64)
  587. if (IS_32BIT_EMU) {
  588. cur_regs.rcx = (long)tcp->childshm;
  589. cur_regs.rsi = (long)(((char*)tcp->childshm) + len1 + 1);
  590. }
  591. else {
  592. cur_regs.rsi = (long)tcp->childshm;
  593. cur_regs.rcx = (long)(((char*)tcp->childshm) + len1 + 1);
  594. }
  595. #else
  596. #error "Unknown architecture (not I386 or X86_64)"
  597. #endif
  598. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  599. }
  600. else if (redirected_filename1) {
  601. strcpy(tcp->localshm, redirected_filename1);
  602. struct user_regs_struct cur_regs;
  603. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  604. #if defined (I386)
  605. cur_regs.ecx = (long)tcp->childshm;
  606. #elif defined(X86_64)
  607. if (IS_32BIT_EMU) {
  608. cur_regs.rcx = (long)tcp->childshm;
  609. }
  610. else {
  611. cur_regs.rsi = (long)tcp->childshm;
  612. }
  613. #else
  614. #error "Unknown architecture (not I386 or X86_64)"
  615. #endif
  616. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  617. }
  618. else if (redirected_filename2) {
  619. strcpy(tcp->localshm, redirected_filename2);
  620. struct user_regs_struct cur_regs;
  621. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  622. #if defined (I386)
  623. cur_regs.esi = (long)tcp->childshm; // only set ECX
  624. #elif defined(X86_64)
  625. if (IS_32BIT_EMU) {
  626. cur_regs.rsi = (long)tcp->childshm;
  627. }
  628. else {
  629. cur_regs.rcx = (long)tcp->childshm;
  630. }
  631. #else
  632. #error "Unknown architecture (not I386 or X86_64)"
  633. #endif
  634. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  635. }
  636. if (redirected_filename1) free(redirected_filename1);
  637. if (redirected_filename2) free(redirected_filename2);
  638. }
  639. // modify the first and third args to redirect into cde-root/
  640. // really nasty copy-and-paste from modify_syscall_two_args above
  641. static void modify_syscall_first_and_third_args(struct tcb* tcp) {
  642. assert(CDE_exec_mode);
  643. if (!tcp->childshm) {
  644. begin_setup_shmat(tcp);
  645. return; // MUST punt early here!!!
  646. }
  647. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
  648. char* redirected_filename1 =
  649. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  650. free(filename1);
  651. char* filename2 = strcpy_from_child(tcp, tcp->u_arg[2]);
  652. char* redirected_filename2 =
  653. redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
  654. free(filename2);
  655. // gotta do both, yuck
  656. if (redirected_filename1 && redirected_filename2) {
  657. strcpy(tcp->localshm, redirected_filename1);
  658. int len1 = strlen(redirected_filename1);
  659. char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
  660. strcpy(redirect_file2_begin, redirected_filename2);
  661. struct user_regs_struct cur_regs;
  662. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  663. #if defined (I386)
  664. cur_regs.ebx = (long)tcp->childshm;
  665. cur_regs.edx = (long)(((char*)tcp->childshm) + len1 + 1);
  666. #elif defined(X86_64)
  667. if (IS_32BIT_EMU) {
  668. cur_regs.rbx = (long)tcp->childshm;
  669. cur_regs.rdx = (long)(((char*)tcp->childshm) + len1 + 1);
  670. }
  671. else {
  672. cur_regs.rdi = (long)tcp->childshm;
  673. cur_regs.rdx = (long)(((char*)tcp->childshm) + len1 + 1);
  674. }
  675. #else
  676. #error "Unknown architecture (not I386 or X86_64)"
  677. #endif
  678. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  679. }
  680. else if (redirected_filename1) {
  681. strcpy(tcp->localshm, redirected_filename1);
  682. struct user_regs_struct cur_regs;
  683. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  684. #if defined (I386)
  685. cur_regs.ebx = (long)tcp->childshm;
  686. #elif defined(X86_64)
  687. if (IS_32BIT_EMU) {
  688. cur_regs.rbx = (long)tcp->childshm;
  689. }
  690. else {
  691. cur_regs.rdi = (long)tcp->childshm;
  692. }
  693. #else
  694. #error "Unknown architecture (not I386 or X86_64)"
  695. #endif
  696. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  697. }
  698. else if (redirected_filename2) {
  699. strcpy(tcp->localshm, redirected_filename2);
  700. struct user_regs_struct cur_regs;
  701. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  702. #if defined (I386)
  703. cur_regs.edx = (long)tcp->childshm; // only set ECX
  704. #elif defined(X86_64)
  705. if (IS_32BIT_EMU) {
  706. cur_regs.rdx = (long)tcp->childshm;
  707. }
  708. else {
  709. cur_regs.rdx = (long)tcp->childshm;
  710. }
  711. #else
  712. #error "Unknown architecture (not I386 or X86_64)"
  713. #endif
  714. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  715. }
  716. if (redirected_filename1) free(redirected_filename1);
  717. if (redirected_filename2) free(redirected_filename2);
  718. }
  719. // create a malloc'ed filename that contains a version within cde-root/
  720. // return NULL if the filename should NOT be redirected
  721. // WARNING: behavior differs based on CDE_exec_mode!
  722. //
  723. // (tcp argument is optional and used to pass into ignore_path)
  724. static char* redirect_filename_into_cderoot(char* filename, char* child_current_pwd, struct tcb* tcp) {
  725. /* sometimes this is called with a null arg ... investigate further
  726. before making this hack permanent, though
  727. if (!filename) {
  728. return NULL;
  729. }
  730. */
  731. assert(filename);
  732. assert(child_current_pwd);
  733. char* filename_abspath = NULL;
  734. if (CDE_exec_mode) {
  735. // canonicalize_path has the desirable side effect of preventing
  736. // 'malicious' paths from going below the pseudo-root '/' ... e.g.,
  737. // if filename is '/home/pgbovine/../../../../'
  738. // then filename_abspath is simply '/'
  739. //
  740. // we resolve relative paths w.r.t.
  741. // extract_sandboxed_pwd(child_current_pwd), so that programs
  742. // can't use relative paths like '../../../' to get out of sandbox
  743. //
  744. // this is why it's VERY IMPORTANT to canonicalize before creating a
  745. // path into CDE_ROOT_DIR, so that absolute paths can't 'escape'
  746. // the sandbox
  747. filename_abspath =
  748. canonicalize_path(filename, extract_sandboxed_pwd(child_current_pwd, tcp));
  749. }
  750. else {
  751. filename_abspath = canonicalize_path(filename, child_current_pwd);
  752. }
  753. assert(filename_abspath);
  754. // don't redirect paths that we're ignoring (remember to use ABSOLUTE PATH)
  755. if (ignore_path(filename_abspath, tcp)) {
  756. free(filename_abspath);
  757. return NULL;
  758. }
  759. // WARNING: behavior of create_abspath_within_cderoot
  760. // differs based on CDE_exec_mode!
  761. char* ret = create_abspath_within_cderoot(filename_abspath);
  762. if (CDE_verbose_mode) {
  763. printf("redirect '%s' => '%s'\n", filename, ret);
  764. }
  765. free(filename_abspath);
  766. return ret;
  767. }
  768. /* standard functionality for syscalls that take a filename as first argument
  769. cde (package creation) mode:
  770. - if abspath(filename) is outside pwd, then copy it into cde-root/
  771. cde-exec mode:
  772. - if abspath(filename) is outside pwd, then redirect it into cde-root/
  773. sys_open(filename, flags, mode)
  774. sys_creat(filename, mode)
  775. sys_chmod(filename, ...)
  776. sys_chown(filename, ...)
  777. sys_chown16(filename, ...)
  778. sys_lchown(filename, ...)
  779. sys_lchown16(filename, ...)
  780. sys_stat(filename, ...)
  781. sys_stat64(filename, ...)
  782. sys_lstat(filename, ...)
  783. sys_lstat64(filename, ...)
  784. sys_truncate(path, length)
  785. sys_truncate64(path, length)
  786. sys_access(filename, mode)
  787. sys_utime(filename, ...)
  788. sys_readlink(path, ...)
  789. */
  790. void CDE_begin_standard_fileop(struct tcb* tcp, const char* syscall_name) {
  791. //char* filename = strcpy_from_child(tcp, tcp->u_arg[0]);
  792. /* Patch by Edward Wang
  793. "Attached is a patch to fix a small bug that happens when a syscall
  794. is called without any arguments (tcp->u_arg[0] is "0"). This
  795. happened to me a few times when I was trying to package a portable
  796. version of VLC media player."
  797. */
  798. char* filename = strcpy_from_child_or_null(tcp, tcp->u_arg[0]);
  799. if (filename == NULL)
  800. return;
  801. if (CDE_verbose_mode) {
  802. printf("[%d] BEGIN %s '%s'\n", tcp->pid, syscall_name, filename);
  803. }
  804. if (CDE_exec_mode) {
  805. if (filename) {
  806. modify_syscall_single_arg(tcp, 1, filename);
  807. }
  808. }
  809. else {
  810. // pre-emptively copy the given file into cde-root/, silencing warnings for
  811. // non-existent files.
  812. // (Note that filename can sometimes be a JUNKY STRING due to weird race
  813. // conditions when strace is tracing complex multi-process applications)
  814. if (filename) {
  815. copy_file_into_cde_root(filename, tcp->current_dir);
  816. }
  817. }
  818. free(filename);
  819. }
  820. /* standard functionality for *at syscalls that take a dirfd as first
  821. argument, followed by a filepath
  822. e.g., see documentation for http://linux.die.net/man/2/openat
  823. example syscalls:
  824. openat,faccessat,fstatat64,fchownat,fchmodat,futimesat,mknodat
  825. if filepath is an absolute path, or if filepath is a relative path but
  826. dirfd is AT_FDCWD, then:
  827. cde (package creation) mode:
  828. - if abspath(filepath) is outside pwd, then copy it into cde-root/
  829. exec mode:
  830. - if abspath(filepath) is outside pwd, then redirect it into cde-root/
  831. issue a warning if filepath is a relative path but dirfd is NOT AT_FDCWD
  832. */
  833. void CDE_begin_at_fileop(struct tcb* tcp, const char* syscall_name) {
  834. char* filename = strcpy_from_child(tcp, tcp->u_arg[1]);
  835. if (CDE_verbose_mode) {
  836. printf("[%d] BEGIN %s '%s' (dirfd=%u)\n", tcp->pid, syscall_name, filename, (unsigned int)tcp->u_arg[0]);
  837. }
  838. if (!IS_ABSPATH(filename) && tcp->u_arg[0] != AT_FDCWD) {
  839. fprintf(stderr,
  840. "CDE WARNING (unsupported operation): %s '%s' is a relative path and dirfd != AT_FDCWD\n",
  841. syscall_name, filename);
  842. goto done; // punt early!
  843. }
  844. if (CDE_exec_mode) {
  845. modify_syscall_single_arg(tcp, 2, filename);
  846. }
  847. else {
  848. // pre-emptively copy the given file into cde-root/, silencing warnings for
  849. // non-existent files.
  850. // (Note that filename can sometimes be a JUNKY STRING due to weird race
  851. // conditions when strace is tracing complex multi-process applications)
  852. copy_file_into_cde_root(filename, tcp->current_dir);
  853. }
  854. done:
  855. free(filename);
  856. }
  857. // input_buffer_arg_index is the index of the input filename argument
  858. // output_buffer_arg_index is the index of the argument where the output
  859. // buffer is being held (we clobber this in some special cases)
  860. static void CDE_end_readlink_internal(struct tcb* tcp, int input_buffer_arg_index, int output_buffer_arg_index) {
  861. char* filename = strcpy_from_child(tcp, tcp->u_arg[input_buffer_arg_index]);
  862. if (CDE_exec_mode) {
  863. if (tcp->u_rval >= 0) {
  864. // super hack! if the program is trying to access the special
  865. // /proc/self/exe file, return perceived_program_fullpath if
  866. // available, or else cde-exec will ERRONEOUSLY return the path
  867. // to the dynamic linker (e.g., ld-linux.so.2).
  868. //
  869. // programs like 'java' rely on the value of /proc/self/exe
  870. // being the true path to the executable, in order to dynamically
  871. // load libraries based on paths relative to that full path!
  872. char is_proc_self_exe = (strcmp(filename, "/proc/self/exe") == 0);
  873. // another super hack! programs like Google Earth
  874. // ('googleearth-bin') access /proc/self/exe as /proc/<pid>/exe
  875. // where <pid> is ITS OWN PID! be sure to handle that case properly
  876. // (but don't worry about handling cases where <pid> is the PID of
  877. // another process).
  878. //
  879. // (again, these programs use the real path of /proc/<pid>/exe as
  880. // a basis for dynamically loading libraries, so we must properly
  881. // 'fake' this value)
  882. char* self_pid_name = format("/proc/%d/exe", tcp->pid);
  883. char is_proc_self_pid_exe = (strcmp(filename, self_pid_name) == 0);
  884. free(self_pid_name);
  885. if ((is_proc_self_exe || is_proc_self_pid_exe) &&
  886. tcp->perceived_program_fullpath) {
  887. memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
  888. tcp->perceived_program_fullpath,
  889. strlen(tcp->perceived_program_fullpath) + 1);
  890. // VERY SUBTLE - set %eax (the syscall return value) to the length
  891. // of the FAKED STRING, since readlink is supposed to return the
  892. // length of the returned path (some programs like Python rely
  893. // on that length to allocated memory)
  894. struct user_regs_struct cur_regs;
  895. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  896. #if defined (I386)
  897. cur_regs.eax = (long)strlen(tcp->perceived_program_fullpath);
  898. #elif defined(X86_64)
  899. cur_regs.rax = (long)strlen(tcp->perceived_program_fullpath);
  900. #else
  901. #error "Unknown architecture (not I386 or X86_64)"
  902. #endif
  903. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  904. }
  905. // if the program tries to read /proc/self/cwd, then treat it like
  906. // a CDE_end_getcwd call, returning a fake cwd:
  907. //
  908. // (note that we don't handle /proc/<pid>/cwd yet)
  909. else if (strcmp(filename, "/proc/self/cwd") == 0) {
  910. // copied from CDE_end_getcwd
  911. char* sandboxed_pwd = extract_sandboxed_pwd(tcp->current_dir, tcp);
  912. memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
  913. sandboxed_pwd, strlen(sandboxed_pwd) + 1);
  914. // VERY SUBTLE - set %eax (the syscall return value) to the length
  915. // of the FAKED STRING, since readlink is supposed to return the
  916. // length of the returned path (some programs like Python rely
  917. // on that length to allocated memory)
  918. struct user_regs_struct cur_regs;
  919. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  920. #if defined (I386)
  921. cur_regs.eax = (long)strlen(sandboxed_pwd);
  922. #elif defined(X86_64)
  923. cur_regs.rax = (long)strlen(sandboxed_pwd);
  924. #else
  925. #error "Unknown architecture (not I386 or X86_64)"
  926. #endif
  927. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  928. }
  929. else {
  930. // inspect the return value (stored in readlink_target) and if
  931. // it's a relative path that starts with './' and contains a '//'
  932. // marker, then it MIGHT actually be a "munged" version of an
  933. // absolute path symlink that was turned into a relative path
  934. // when the original file was copied (okapi-ed) into the package.
  935. // e.g., a symlink to an absolute path like /lib/libc.so.6 might
  936. // be munged into some monstrous relative path like:
  937. //
  938. // ./../../../../..//lib/libc.so.6
  939. //
  940. // so that it can reference the version of /lib/libc.so.6 from
  941. // WITHIN THE PACKAGE rather than the native one on the target
  942. // machine. However, when the target program does a readlink(),
  943. // it expects to the syscall to return '/lib/libc.so.6', so we
  944. // must properly "un-munge" these sorts of symlinks.
  945. //
  946. // (Note that we don't have this problem with symlinks to
  947. // relative paths.)
  948. // first get the length of the return value string ...
  949. struct user_regs_struct cur_regs;
  950. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  951. #if defined (I386)
  952. int ret_length = cur_regs.eax;
  953. #elif defined(X86_64)
  954. int ret_length = cur_regs.rax;
  955. #else
  956. #error "Unknown architecture (not I386 or X86_64)"
  957. #endif
  958. char readlink_target[MAXPATHLEN];
  959. if (umoven(tcp, tcp->u_arg[output_buffer_arg_index], ret_length, readlink_target) == 0) {
  960. // remember to cap off the end ...
  961. readlink_target[ret_length] = '\0';
  962. // now readlink_target is the string that's "returned" by this
  963. // readlink syscall
  964. // is there a leading './' marker?
  965. if (strncmp(readlink_target, "./", 2) == 0) {
  966. // now check for a distinctive '//' marker, indicative of munged paths.
  967. // However, this simple check can still result in false positives!!!
  968. char* suffix = strstr(readlink_target, "//");
  969. if (suffix) {
  970. assert(suffix[0] == '/');
  971. suffix++; // skip one of the slashes
  972. assert(IS_ABSPATH(suffix));
  973. // as a final sanity check, see if this file actually exists
  974. // within cde_pseudo_root_dir, to prevent false positives
  975. char* actual_path = format("%s%s", cde_pseudo_root_dir, suffix);
  976. struct stat st;
  977. if (lstat(actual_path, &st) == 0) {
  978. // clobber the syscall's return value with 'suffix'
  979. memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
  980. suffix, strlen(suffix) + 1);
  981. // VERY SUBTLE - set %eax (the syscall return value) to the length
  982. // of the FAKED STRING, since readlink is supposed to return the
  983. // length of the returned path (some programs like Python rely
  984. // on that length to allocated memory)
  985. #if defined (I386)
  986. cur_regs.eax = (long)strlen(suffix);
  987. #elif defined(X86_64)
  988. cur_regs.rax = (long)strlen(suffix);
  989. #else
  990. #error "Unknown architecture (not I386 or X86_64)"
  991. #endif
  992. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  993. }
  994. free(actual_path);
  995. }
  996. }
  997. }
  998. }
  999. }
  1000. }
  1001. free(filename);
  1002. }
  1003. void CDE_end_readlink(struct tcb* tcp) {
  1004. // output buffer is second argument (index 1)
  1005. CDE_end_readlink_internal(tcp, 0, 1);
  1006. }
  1007. void CDE_end_readlinkat(struct tcb* tcp) {
  1008. // output buffer is third argument (index 2)
  1009. CDE_end_readlink_internal(tcp, 1, 2);
  1010. }
  1011. void CDE_begin_execve(struct tcb* tcp) {
  1012. // null all these out up-top, then deallocate them in 'done'
  1013. char* exe_filename = NULL;
  1014. char* redirected_path = NULL;
  1015. char* exe_filename_abspath = NULL;
  1016. char* script_command = NULL;
  1017. char* ld_linux_filename = NULL;
  1018. char* ld_linux_fullpath = NULL;
  1019. exe_filename = strcpy_from_child(tcp, tcp->u_arg[0]);
  1020. // only attempt to do the ld-linux.so.2 trick if exe_filename
  1021. // is a valid executable file ... otherwise don't do
  1022. // anything and simply let the execve fail just like it's supposed to
  1023. struct stat filename_stat;
  1024. // NULL out p_ignores since you might have inherited it from your parent after
  1025. // forking, but when you exec, you're probably now executing a different program
  1026. tcp->p_ignores = NULL;
  1027. if (CDE_verbose_mode) {
  1028. printf("[%d] CDE_begin_execve '%s'\n", tcp->pid, exe_filename);
  1029. }
  1030. if (CDE_exec_mode) {
  1031. // if we're purposely ignoring a path to an executable (e.g.,
  1032. // ignoring "/bin/bash" to prevent crashes on certain Ubuntu
  1033. // machines), then DO NOT use the ld-linux trick and simply
  1034. // execve the file normally
  1035. //
  1036. // (note that this check doesn't pick up the case when a textual script
  1037. // is being executed (e.g., with "#!/bin/bash" as its shebang line),
  1038. // since exe_filename is the script's name and NOT "/bin/bash".
  1039. // We will need to handle this case LATER in the function.)
  1040. char* opened_filename_abspath =
  1041. canonicalize_path(exe_filename, extract_sandboxed_pwd(tcp->current_dir, tcp));
  1042. if (ignore_path(opened_filename_abspath, tcp)) {
  1043. free(opened_filename_abspath);
  1044. goto done;
  1045. }
  1046. // check for presence in process_ignores, and if found, set
  1047. // tcp->p_ignores and punt
  1048. int i;
  1049. for (i = 0; i < process_ignores_ind; i++) {
  1050. if (strcmp(opened_filename_abspath, process_ignores[i].process_name) == 0) {
  1051. //printf("IGNORED '%s'\n", opened_filename_abspath);
  1052. tcp->p_ignores = &process_ignores[i];
  1053. free(opened_filename_abspath);
  1054. goto done; // TOTALLY PUNT!!!
  1055. }
  1056. }
  1057. free(opened_filename_abspath);
  1058. redirected_path = redirect_filename_into_cderoot(exe_filename, tcp->current_dir, tcp);
  1059. }
  1060. char* path_to_executable = NULL;
  1061. if (redirected_path) {
  1062. // TODO: we don't check whether it's a real executable file :/
  1063. if (stat(redirected_path, &filename_stat) != 0) {
  1064. goto done;
  1065. }
  1066. path_to_executable = redirected_path;
  1067. }
  1068. else {
  1069. // just check the file itself (REMEMBER TO GET ITS ABSOLUTE PATH!)
  1070. exe_filename_abspath = canonicalize_path(exe_filename, tcp->current_dir);
  1071. // TODO: we don't check whether it's a real executable file :/
  1072. if (stat(exe_filename_abspath, &filename_stat) != 0) {
  1073. goto done;
  1074. }
  1075. path_to_executable = exe_filename_abspath;
  1076. }
  1077. assert(path_to_executable);
  1078. // WARNING: ld-linux.so.2 only works on dynamically-linked binary
  1079. // executable files; it will fail if you invoke it on:
  1080. // - a textual script file
  1081. // - a statically-linked binary
  1082. //
  1083. // for a textual script file, we must invoke ld-linux.so.2 on the
  1084. // target of the shebang #! (which can itself take arguments)
  1085. //
  1086. // e.g., #! /bin/sh
  1087. // e.g., #! /usr/bin/env python
  1088. char is_textual_script = 0;
  1089. char is_elf_binary = 0;
  1090. FILE* f = fopen(path_to_executable, "rb"); // open in binary mode
  1091. assert(f);
  1092. char header[5];
  1093. memset(header, 0, sizeof(header));
  1094. fgets(header, 5, f); // 5 means 4 bytes + 1 null terminating byte
  1095. if (strcmp(header, "\177ELF") == 0) {
  1096. is_elf_binary = 1;
  1097. }
  1098. fclose(f);
  1099. if (is_elf_binary) {
  1100. // look for whether it's a statically-linked binary ...
  1101. // if so, then there is NO need to call ld-linux.so.2 on it;
  1102. // we can just execute it directly (in fact, ld-linux.so.2
  1103. // will fail on static binaries!)
  1104. // mallocs a new string if successful
  1105. // (this string is most likely "/lib/ld-linux.so.2")
  1106. ld_linux_filename = find_ELF_program_interpreter(path_to_executable);
  1107. if (!ld_linux_filename) {
  1108. // if the program interpreter isn't found, then it's a static
  1109. // binary, so let the execve call proceed normally
  1110. if (CDE_exec_mode) {
  1111. // redirect the executable's path to within $CDE_ROOT_DIR:
  1112. modify_syscall_single_arg(tcp, 1, exe_filename);
  1113. }
  1114. else {
  1115. copy_file_into_cde_root(exe_filename, tcp->current_dir);
  1116. }
  1117. // remember to EXIT EARLY!
  1118. goto done;
  1119. }
  1120. assert(IS_ABSPATH(ld_linux_filename));
  1121. }
  1122. else {
  1123. // find out whether it's a script file (starting with #! line)
  1124. FILE* f = fopen(path_to_executable, "rb"); // open in binary mode
  1125. size_t len = 0;
  1126. ssize_t read;
  1127. char* tmp = NULL; // getline() mallocs for us
  1128. read = getline(&tmp, &len, f);
  1129. if (read > 2) {
  1130. assert(tmp[read-1] == '\n'); // strip of trailing newline
  1131. tmp[read-1] = '\0'; // strip of trailing newline
  1132. if (tmp[0] == '#' && tmp[1] == '!') {
  1133. is_textual_script = 1;
  1134. script_command = strdup(&tmp[2]);
  1135. }
  1136. }
  1137. free(tmp);
  1138. /* Patch from Yang Chen
  1139. "I am packaging our tool using it. I found there is a possible
  1140. bug in cde.c where opened files were not closed. In a long run,
  1141. it could cause fopen fail. I noticed it because our toolchain has
  1142. a lot of invocations on shell scripts and hence hit this
  1143. problem.""
  1144. */
  1145. fclose(f);
  1146. if (!script_command) {
  1147. fprintf(stderr, "Fatal error: '%s' seems to be a script without a #! line.\n(cde can only execute scripts that start with a proper #! line)\n",
  1148. path_to_executable);
  1149. exit(1);
  1150. }
  1151. // now find the program interpreter for the script_command
  1152. // executable, be sure to grab the FIRST TOKEN since that's
  1153. // the actual executable name ...
  1154. // TODO: this will fail if the executable's path has a space in it
  1155. //
  1156. // mallocs a new string if successful
  1157. // (this string is most likely "/lib/ld-linux.so.2")
  1158. // libc is so dumb ... strtok() alters its argument in an un-kosher way
  1159. tmp = strdup(script_command);
  1160. char* p = strtok(tmp, " ");
  1161. // to have find_ELF_program_interpreter succeed, we might need to
  1162. // redirect the path inside CDE_ROOT_DIR:
  1163. char* script_command_filename = NULL;
  1164. if (CDE_exec_mode) {
  1165. // this path should look like the name in the #! line, just
  1166. // canonicalized to be an absolute path
  1167. char* script_command_abspath =
  1168. canonicalize_path(p, extract_sandboxed_pwd(tcp->current_dir, tcp));
  1169. if (ignore_path(script_command_abspath, tcp)) {
  1170. free(script_command_abspath);
  1171. free(tmp);
  1172. goto done; // PUNT!
  1173. }
  1174. // check for presence in process_ignores, and if found, set
  1175. // tcp->p_ignores and punt
  1176. int i;
  1177. for (i = 0; i < process_ignores_ind; i++) {
  1178. if (strcmp(script_command_abspath, process_ignores[i].process_name) == 0) {
  1179. //printf("IGNORED (script) '%s'\n", script_command_abspath);
  1180. tcp->p_ignores = &process_ignores[i];
  1181. free(script_command_abspath);
  1182. free(tmp);
  1183. goto done; // TOTALLY PUNT!!!
  1184. }
  1185. }
  1186. free(script_command_abspath);
  1187. script_command_filename = redirect_filename_into_cderoot(p, tcp->current_dir, tcp);
  1188. }
  1189. if (!script_command_filename) {
  1190. script_command_filename = strdup(p);
  1191. }
  1192. ld_linux_filename = find_ELF_program_interpreter(script_command_filename);
  1193. free(script_command_filename);
  1194. free(tmp);
  1195. if (!ld_linux_filename) {
  1196. // if the program interpreter isn't found, then it's a static
  1197. // binary, so let the execve call proceed unmodified
  1198. // TODO: is this the right thing to do here? I think we might
  1199. // need to do something better here (think harder about this case!)
  1200. if (CDE_exec_mode) {
  1201. // redirect the executable's path to within cde-root/:
  1202. modify_syscall_single_arg(tcp, 1, exe_filename);
  1203. }
  1204. goto done;
  1205. }
  1206. assert(IS_ABSPATH(ld_linux_filename));
  1207. }
  1208. assert(!(is_elf_binary && is_textual_script));
  1209. if (CDE_exec_mode) {
  1210. // set up shared memory segment if we haven't done so yet
  1211. if (!tcp->childshm) {
  1212. begin_setup_shmat(tcp);
  1213. goto done; // MUST punt early here!!!
  1214. }
  1215. ld_linux_fullpath = create_abspath_within_cderoot(ld_linux_filename);
  1216. /* we're gonna do some craziness here to redirect the OS to call
  1217. cde-root/lib/ld-linux.so.2 rather than the real program, since
  1218. ld-linux.so.2 is closely-tied with the version of libc in
  1219. cde-root/. */
  1220. if (is_textual_script) {
  1221. /* we're running a script with a shebang (#!), so
  1222. let's set up the shared memory segment (tcp->localshm) like so:
  1223. if (CDE_use_linker_from_package) {
  1224. base --> tcp->localshm : "cde-root/lib/ld-linux.so.2" (ld_linux_fullpath)
  1225. script_command token 0 : "/usr/bin/env"
  1226. script_command token 1 : "python"
  1227. ... (for as many tokens as available) ...
  1228. new_argv --> argv pointers : point to tcp->childshm ("cde-root/lib/ld-linux.so.2")
  1229. argv pointers : point to script_command token 0
  1230. argv pointers : point to script_command token 1
  1231. ... (for as many tokens as available) ...
  1232. argv pointers : point to tcp->u_arg[0] (original program name)
  1233. argv pointers : point to child program's argv[1]
  1234. argv pointers : point to child program's argv[2]
  1235. argv pointers : point to child program's argv[3]
  1236. argv pointers : [...]
  1237. argv pointers : NULL
  1238. }
  1239. else {
  1240. base --> script_command token 0 REDIRECTED into cde-root:
  1241. e.g., "/home/pgbovine/cde-package/cde-root/usr/bin/env"
  1242. script_command token 1 : "python"
  1243. ... (for as many tokens as available) ...
  1244. new_argv --> argv pointers : point to script_command token 0
  1245. argv pointers : point to script_command token 1
  1246. ... (for as many tokens as available) ...
  1247. argv pointers : point to tcp->u_arg[0] (original program name)
  1248. argv pointers : point to child program's argv[1]
  1249. argv pointers : point to child program's argv[2]
  1250. argv pointers : point to child program's argv[3]
  1251. argv pointers : [...]
  1252. argv pointers : NULL
  1253. }
  1254. Note that we only need to do this if we're in CDE_exec_mode */
  1255. //printf("script_command='%s', path_to_executable='%s'\n", script_command, path_to_executable);
  1256. char* base = (char*)tcp->localshm;
  1257. int ld_linux_offset = 0;
  1258. if (CDE_use_linker_from_package) {
  1259. strcpy(base, ld_linux_fullpath);
  1260. ld_linux_offset = strlen(ld_linux_fullpath) + 1;
  1261. }
  1262. char* cur_loc = (char*)(base + ld_linux_offset);
  1263. char* script_command_token_starts[200]; // stores starting locations of each token
  1264. int script_command_num_tokens = 0;
  1265. // set this ONCE on the first token
  1266. tcp->perceived_program_fullpath = NULL;
  1267. // tokenize script_command into tokens, and insert them into argv
  1268. // TODO: this will fail if the shebang line contains file paths
  1269. // with spaces, quotes, or other weird characters!
  1270. char* p;
  1271. for (p = strtok(script_command, " "); p; p = strtok(NULL, " ")) {
  1272. //printf(" token = %s\n", p);
  1273. // set to the first token!
  1274. if (!tcp->perceived_program_fullpath) {
  1275. tcp->perceived_program_fullpath = strdup(p);
  1276. // kludgy special-case handling for !CDE_use_linker_from_package mode
  1277. //
  1278. // set the first script_command token to a string that's
  1279. // redirected INSIDE of cde-root ...
  1280. if (!CDE_use_linker_from_package) {
  1281. char* program_full_path_in_cderoot =
  1282. redirect_filename_into_cderoot(tcp->perceived_program_fullpath, tcp->current_dir, tcp);
  1283. strcpy(cur_loc, program_full_path_in_cderoot);
  1284. script_command_token_starts[script_command_num_tokens] = cur_loc;
  1285. cur_loc += (strlen(program_full_path_in_cderoot) + 1);
  1286. script_command_num_tokens++;
  1287. free(program_full_path_in_cderoot);
  1288. continue;
  1289. }
  1290. }
  1291. strcpy(cur_loc, p);
  1292. script_command_token_starts[script_command_num_tokens] = cur_loc;
  1293. cur_loc += (strlen(p) + 1);
  1294. script_command_num_tokens++;
  1295. }
  1296. // We need to use raw numeric arithmetic to get the proper offsets, since
  1297. // we need to properly handle tracing of 32-bit target programs using a
  1298. // 64-bit cde-exec. personality_wordsize[current_personality] gives the
  1299. // word size for the target process (e.g., 4 bytes for a 32-bit and 8 bytes
  1300. // for a 64-bit target process).
  1301. unsigned long new_argv_raw = (unsigned long)(cur_loc);
  1302. // really subtle, these addresses should be in the CHILD's address space, not the parent's
  1303. // points to ld_linux_fullpath
  1304. char** new_argv_0 = (char**)new_argv_raw;
  1305. *new_argv_0 = (char*)tcp->childshm;
  1306. if (CDE_verbose_mode) {
  1307. char* tmp = strcpy_from_child(tcp, (long)*new_argv_0);
  1308. printf(" new_argv[0]='%s'\n", tmp);
  1309. if (tmp) free(tmp);
  1310. }
  1311. // points to all the tokens of script_command
  1312. int i;
  1313. for (i = 0; i < script_command_num_tokens; i++) {
  1314. // ugly subtle indexing differences between modes :/
  1315. if (CDE_use_linker_from_package) {
  1316. char** new_argv_i_plus_1 = (char**)(new_argv_raw + ((i+1) * personality_wordsize[current_personality]));
  1317. *new_argv_i_plus_1 = (char*)tcp->childshm + (script_command_token_starts[i] - base);
  1318. if (CDE_verbose_mode) {
  1319. char* tmp = strcpy_from_child(tcp, (long)*new_argv_i_plus_1);
  1320. printf(" new_argv[%d]='%s'\n", i+1, tmp);
  1321. if (tmp) free(tmp);
  1322. }
  1323. }
  1324. else {
  1325. char** new_argv_i = (char**)(new_argv_raw + (i * personality_wordsize[current_personality]));
  1326. *new_argv_i = (char*)tcp->childshm + (script_command_token_starts[i] - base);
  1327. if (CDE_verbose_mode) {
  1328. char* tmp = strcpy_from_child(tcp, (long)*new_argv_i);
  1329. printf(" new_argv[%d]='%s'\n", i, tmp);
  1330. if (tmp) free(tmp);
  1331. }
  1332. }
  1333. }
  1334. // ugly subtle indexing differences between modes :/
  1335. int first_nontoken_index;
  1336. if (CDE_use_linker_from_package) {
  1337. first_nontoken_index = script_command_num_tokens + 1;
  1338. }
  1339. else {
  1340. first_nontoken_index = script_command_num_tokens;
  1341. }
  1342. // now populate the original program name from tcp->u_arg[0]
  1343. char** new_argv_f = (char**)(new_argv_raw + (first_nontoken_index * personality_wordsize[current_personality]));
  1344. *new_argv_f = (char*)tcp->u_arg[0];
  1345. if (CDE_verbose_mode) {
  1346. char* tmp = strcpy_from_child(tcp, (long)*new_argv_f);
  1347. printf(" new_argv[%d]='%s'\n", first_nontoken_index, tmp);
  1348. if (tmp) free(tmp);
  1349. }
  1350. // now populate argv[first_nontoken_index:] directly from child's original space
  1351. // (original arguments)
  1352. unsigned long child_argv_raw = (unsigned long)tcp->u_arg[1]; // in child's address space
  1353. char* cur_arg = NULL;
  1354. i = 1; // start at argv[1]
  1355. while (1) {
  1356. // read a word from child_argv_raw ...
  1357. EXITIF(umoven(tcp,
  1358. (long)(child_argv_raw + (i * personality_wordsize[current_personality])),
  1359. personality_wordsize[current_personality],
  1360. (void*)&cur_arg) < 0);
  1361. // Now set new_argv_raw[i+first_nontoken_index] = cur_arg, except the tricky part is that
  1362. // new_argv_raw might actually be for a 32-bit target process, so if
  1363. // we're on a 64-bit machine, we can't just use char* pointer arithmetic.
  1364. // We must use raw numeric arithmetic to get the proper offsets.
  1365. char** new_argv_i_plus_f = (char**)(new_argv_raw + ((i+first_nontoken_index) * personality_wordsize[current_personality]));
  1366. *new_argv_i_plus_f = cur_arg;
  1367. // null-terminated exit condition
  1368. if (cur_arg == NULL) {
  1369. break;
  1370. }
  1371. if (CDE_verbose_mode) {
  1372. char* tmp = strcpy_from_child(tcp, (long)cur_arg);
  1373. printf(" new_argv[%d]='%s'\n", i+first_nontoken_index, tmp);
  1374. if (tmp) free(tmp);
  1375. }
  1376. i++;
  1377. }
  1378. // now set ebx to the new program name and ecx to the new argv array
  1379. // to alter the arguments of the execv system call :0
  1380. struct user_regs_struct cur_regs;
  1381. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  1382. #if defined (I386)
  1383. cur_regs.ebx = (long)tcp->childshm; // location of base
  1384. cur_regs.ecx = ((long)tcp->childshm) + ((char*)new_argv_raw - base); // location of new_argv
  1385. #elif defined(X86_64)
  1386. if (IS_32BIT_EMU) {
  1387. cur_regs.rbx = (long)tcp->childshm;
  1388. cur_regs.rcx = ((long)tcp->childshm) + ((char*)new_argv_raw - base);
  1389. }
  1390. else {
  1391. cur_regs.rdi = (long)tcp->childshm;
  1392. cur_regs.rsi = ((long)tcp->childshm) + ((char*)new_argv_raw - base);
  1393. }
  1394. #else
  1395. #error "Unknown architecture (not I386 or X86_64)"
  1396. #endif
  1397. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  1398. }
  1399. else {
  1400. /* we're running a dynamically-linked binary executable, go
  1401. let's set up the shared memory segment (tcp->localshm) like so:
  1402. base --> tcp->localshm : "cde-root/lib/ld-linux.so.2" (ld_linux_fullpath)
  1403. real_program_path_base --> : path to target program's binary
  1404. new_argv --> argv pointers : point to tcp->childshm ("cde-root/lib/ld-linux.so.2")
  1405. argv pointers : point to tcp->childshm + strlen(ld_linux_fullpath),
  1406. which is real_program_path_base in the CHILD's address space
  1407. argv pointers : point to child program's argv[1]
  1408. argv pointers : point to child program's argv[2]
  1409. argv pointers : point to child program's argv[3]
  1410. argv pointers : [...]
  1411. argv pointers : NULL
  1412. Note that we only need to do this if we're in CDE_exec_mode
  1413. and CDE_use_linker_from_package is on */
  1414. char* base = (char*)tcp->localshm;
  1415. strcpy(base, ld_linux_fullpath);
  1416. int offset1 = strlen(ld_linux_fullpath) + 1;
  1417. tcp->perceived_program_fullpath = strcpy_from_child(tcp, tcp->u_arg[0]);
  1418. /* NOTE: we now only do this hack for 'java', since that seems to
  1419. be the only known program that requires it (to the best of my
  1420. knowledge). i don't want to implement this hack for all
  1421. programs since there are programs like 'ccache' and 'ccrypt'
  1422. that NEED to use the original names for the program files
  1423. (which are themselves symlinks) rather than the names resulting
  1424. from following all symlinks.
  1425. ok, this is super super super gross, but what we need to do is
  1426. to set tcp->perceived_program_fullpath to the full path to the
  1427. actual file of the target program's binary, making sure to first
  1428. follow ALL symlinks. otherwise programs like 'java' will fail
  1429. since they rely on the absolute path.
  1430. e.g., try invoking 'java' explicitly with the dynamic linker,
  1431. and it will fail since /usr/bin/java is a symlink and not the
  1432. path to the true binary. 'java' actually inspects the path to
  1433. the binary in order to dynamically generate paths to libraries
  1434. that it needs to load at start-up time ... gross! e.g.,:
  1435. $ /lib/ld-linux.so.2 /usr/bin/java
  1436. /usr/bin/java: error while loading shared libraries: libjli.so: cannot open shared object file: No such file or directory
  1437. This fails because it cannot find libjli.so on a search path
  1438. based on /usr/bin/java.
  1439. Since cde-exec starts up a target program by explicitly
  1440. invoking the dynamic linker, it will face the same failure ...
  1441. unless we pass in the absolute path to the REAL binary (not a
  1442. symlink) to the dynamic linker. we will do so by:
  1443. 1.) Getting the original path (tcp->u_arg[0])
  1444. 2.) Creating a version inside of cde-root/
  1445. 3.) Calling realpath() on that path in order to follow
  1446. and resolve all symlinks
  1447. 4.) Calling extract_sandboxed_pwd() in order to get
  1448. the version of that path back *outside* of cde-root/
  1449. */
  1450. if (strcmp(basename(tcp->perceived_program_fullpath), "java") == 0) {
  1451. // create a path WITHIN cde-root, so that we can call realpath on it.
  1452. // (otherwise this path might not exist natively on the target machine!)
  1453. char* program_full_path_in_cderoot =
  1454. redirect_filename_into_cderoot(tcp->perceived_program_fullpath, tcp->current_dir, tcp);
  1455. if (program_full_path_in_cderoot) {
  1456. // realpath follows ALL symbolic links and returns the path to the TRUE binary file :)
  1457. char* program_realpath_in_cde_root = realpath_strdup(program_full_path_in_cderoot);
  1458. if (program_realpath_in_cde_root) {
  1459. // extract_sandboxed_pwd (perhaps badly named for this scenario)
  1460. // extracts the part of program_realpath_in_cde_root that comes AFTER cde-root/
  1461. // (note that extract_sandboxed_pwd does NOT malloc a new string)
  1462. char* tmp_old = tcp->perceived_program_fullpath;
  1463. tcp->perceived_program_fullpath = strdup(extract_sandboxed_pwd(program_realpath_in_cde_root, tcp));
  1464. free(tmp_old);
  1465. free(program_realpath_in_cde_root);
  1466. }
  1467. free(program_full_path_in_cderoot);
  1468. }
  1469. }
  1470. char* real_program_path_base = (char*)(base + offset1);
  1471. strcpy(real_program_path_base, tcp->perceived_program_fullpath);
  1472. int offset2 = strlen(tcp->perceived_program_fullpath) + 1;
  1473. // We need to use raw numeric arithmetic to get the proper offsets, since
  1474. // we need to properly handle tracing of 32-bit target programs using a
  1475. // 64-bit cde-exec. personality_wordsize[current_personality] gives the
  1476. // word size for the target process (e.g., 4 bytes for a 32-bit and 8 bytes
  1477. // for a 64-bit target process).
  1478. unsigned long new_argv_raw = (unsigned long)(base + offset1 + offset2);
  1479. // really subtle, these addresses should be in the CHILD's address space, not the parent's:
  1480. // points to ld_linux_fullpath
  1481. char** new_argv_0 = (char**)new_argv_raw;
  1482. *new_argv_0 = (char*)tcp->childshm;
  1483. if (CDE_verbose_mode) {
  1484. char* tmp = strcpy_from_child(tcp, (long)*new_argv_0);
  1485. printf(" new_argv[0]='%s'\n", tmp);
  1486. if (tmp) free(tmp);
  1487. }
  1488. char** new_argv_1 = (char**)(new_argv_raw + personality_wordsize[current_personality]);
  1489. // points to the full path to the target program (real_program_path_base)
  1490. *new_argv_1 = (char*)tcp->childshm + offset1;
  1491. if (CDE_verbose_mode) {
  1492. char* tmp = strcpy_from_child(tcp, (long)*new_argv_1);
  1493. printf(" new_argv[1]='%s'\n", tmp);
  1494. if (tmp) free(tmp);
  1495. }
  1496. // now populate argv[1:] directly from child's original space (the original arguments)
  1497. unsigned long child_argv_raw = (unsigned long)tcp->u_arg[1]; // in child's address space
  1498. char* cur_arg = NULL;
  1499. int i = 1; // start at argv[1], since we're ignoring argv[0]
  1500. while (1) {
  1501. // read a word from child_argv_raw ...
  1502. EXITIF(umoven(tcp,
  1503. (long)(child_argv_raw + (i * personality_wordsize[current_personality])),
  1504. personality_wordsize[current_personality],
  1505. (void*)&cur_arg) < 0);
  1506. // Now set new_argv_raw[i+1] = cur_arg, except the tricky part is that
  1507. // new_argv_raw might actually be for a 32-bit target process, so if
  1508. // we're on a 64-bit machine, we can't just use char* pointer arithmetic.
  1509. // We must use raw numeric arithmetic to get the proper offsets.
  1510. char** new_argv_i_plus_1 = (char**)(new_argv_raw + ((i+1) * personality_wordsize[current_personality]));
  1511. *new_argv_i_plus_1 = cur_arg;
  1512. // null-terminated exit condition
  1513. if (cur_arg == NULL) {
  1514. break;
  1515. }
  1516. if (CDE_verbose_mode) {
  1517. char* tmp = strcpy_from_child(tcp, (long)cur_arg);
  1518. printf(" new_argv[%d]='%s'\n", i+1, tmp);
  1519. if (tmp) free(tmp);
  1520. }
  1521. i++;
  1522. }
  1523. if (CDE_use_linker_from_package) {
  1524. // now set ebx to the new program name and ecx to the new argv array
  1525. // to alter the arguments of the execv system call :0
  1526. struct user_regs_struct cur_regs;
  1527. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  1528. #if defined (I386)
  1529. cur_regs.ebx = (long)tcp->childshm; // location of base
  1530. cur_regs.ecx = ((long)tcp->childshm) + offset1 + offset2; // location of new_argv
  1531. #elif defined(X86_64)
  1532. if (IS_32BIT_EMU) {
  1533. cur_regs.rbx = (long)tcp->childshm;
  1534. cur_regs.rcx = ((long)tcp->childshm) + offset1 + offset2;
  1535. }
  1536. else {
  1537. cur_regs.rdi = (long)tcp->childshm;
  1538. cur_regs.rsi = ((long)tcp->childshm) + offset1 + offset2;
  1539. }
  1540. #else
  1541. #error "Unknown architecture (not I386 or X86_64)"
  1542. #endif
  1543. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  1544. }
  1545. else {
  1546. // simply redirect the executable's path to within cde-root/:
  1547. modify_syscall_single_arg(tcp, 1, exe_filename);
  1548. }
  1549. }
  1550. // if tcp->perceived_program_fullpath has been set, then it might be
  1551. // a RELATIVE PATH (e.g., ./googleearth-bin), so we need to make it
  1552. // into an ABSOLUTE PATH within cde-root/, but to only grab the
  1553. // component that comes after cde-root/, since that's what the
  1554. // program PERCEIVES its full path to be
  1555. if (tcp->perceived_program_fullpath) {
  1556. char* redirected_path =
  1557. redirect_filename_into_cderoot(tcp->perceived_program_fullpath,
  1558. tcp->current_dir, tcp);
  1559. // redirected_path could be NULL (e.g., if it's in cde.ignore),
  1560. // in which case just do nothing
  1561. if (redirected_path) {
  1562. char* old_perceived_program_fullpath = tcp->perceived_program_fullpath;
  1563. // extract_sandboxed_pwd (perhaps badly named for this scenario)
  1564. // extracts the part of redirected_path that comes AFTER cde-root/
  1565. // (note that extract_sandboxed_pwd does NOT malloc a new string)
  1566. tcp->perceived_program_fullpath =
  1567. strdup(extract_sandboxed_pwd(redirected_path, tcp));
  1568. free(old_perceived_program_fullpath);
  1569. }
  1570. }
  1571. }
  1572. else {
  1573. copy_file_into_cde_root(exe_filename, tcp->current_dir);
  1574. if (ld_linux_filename) {
  1575. // copy ld-linux.so.2 (or whatever the program interpreter is) into cde-root
  1576. copy_file_into_cde_root(ld_linux_filename, tcp->current_dir);
  1577. }
  1578. // very subtle! if we're executing a textual script with a #!, we
  1579. // need to grab the name of the executable from the #! string into
  1580. // cde-root, since strace doesn't normally pick it up as a dependency
  1581. if (is_textual_script) {
  1582. //printf("script_command='%s', path_to_executable='%s'\n", script_command, path_to_executable);
  1583. char* p;
  1584. for (p = strtok(script_command, " "); p; p = strtok(NULL, " ")) {
  1585. struct stat p_stat;
  1586. if (stat(p, &p_stat) == 0) {
  1587. copy_file_into_cde_root(p, tcp->current_dir);
  1588. }
  1589. break;
  1590. }
  1591. }
  1592. }
  1593. done:
  1594. // make sure ALL of these vars are initially set to NULL when declared:
  1595. if (exe_filename) {
  1596. free(exe_filename);
  1597. }
  1598. if (redirected_path) {
  1599. free(redirected_path);
  1600. }
  1601. if (exe_filename_abspath) {
  1602. free(exe_filename_abspath);
  1603. }
  1604. if (script_command) {
  1605. free(script_command);
  1606. }
  1607. if (ld_linux_filename) {
  1608. free(ld_linux_filename);
  1609. }
  1610. if (ld_linux_fullpath) {
  1611. free(ld_linux_fullpath);
  1612. }
  1613. }
  1614. void CDE_end_execve(struct tcb* tcp) {
  1615. if (CDE_verbose_mode) {
  1616. printf("[%d] CDE_end_execve\n", tcp->pid);
  1617. }
  1618. if (CDE_exec_mode) {
  1619. // WOW, what a gross hack! execve detaches all shared memory
  1620. // segments, so childshm is no longer valid. we must clear it so
  1621. // that begin_setup_shmat() will be called again
  1622. tcp->childshm = NULL;
  1623. }
  1624. }
  1625. void CDE_begin_file_unlink(struct tcb* tcp) {
  1626. char* filename = strcpy_from_child(tcp, tcp->u_arg[0]);
  1627. if (CDE_verbose_mode) {
  1628. printf("[%d] BEGIN unlink '%s'\n", tcp->pid, filename);
  1629. }
  1630. if (CDE_exec_mode) {
  1631. modify_syscall_single_arg(tcp, 1, filename);
  1632. }
  1633. else {
  1634. char* redirected_path = redirect_filename_into_cderoot(filename, tcp->current_dir, tcp);
  1635. if (redirected_path) {
  1636. unlink(redirected_path);
  1637. free(redirected_path);
  1638. }
  1639. }
  1640. }
  1641. // copy-and-paste from CDE_begin_file_unlink,
  1642. // except adjusting for unlinkat signature:
  1643. // int unlinkat(int dirfd, const char *pathname, int flags);
  1644. void CDE_begin_file_unlinkat(struct tcb* tcp) {
  1645. char* filename = strcpy_from_child(tcp, tcp->u_arg[1]);
  1646. if (CDE_verbose_mode) {
  1647. printf("[%d] BEGIN unlinkat '%s'\n", tcp->pid, filename);
  1648. }
  1649. if (!IS_ABSPATH(filename) && tcp->u_arg[0] != AT_FDCWD) {
  1650. fprintf(stderr, "CDE WARNING: unlinkat '%s' is a relative path and dirfd != AT_FDCWD\n", filename);
  1651. return; // punt early!
  1652. }
  1653. if (CDE_exec_mode) {
  1654. modify_syscall_single_arg(tcp, 2, filename);
  1655. }
  1656. else {
  1657. char* redirected_path = redirect_filename_into_cderoot(filename, tcp->current_dir, tcp);
  1658. if (redirected_path) {
  1659. unlink(redirected_path);
  1660. free(redirected_path);
  1661. }
  1662. }
  1663. }
  1664. void CDE_begin_file_link(struct tcb* tcp) {
  1665. if (CDE_verbose_mode) {
  1666. printf("[%d] BEGIN link\n", tcp->pid);
  1667. }
  1668. if (CDE_exec_mode) {
  1669. modify_syscall_two_args(tcp);
  1670. }
  1671. else {
  1672. // just try to do the link operation within the CDE package
  1673. // TODO: is this too early since the original link hasn't been done yet?
  1674. // (I don't think so ...)
  1675. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
  1676. char* redirected_filename1 =
  1677. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  1678. // first copy the origin file into cde-root/ before trying to link it
  1679. copy_file_into_cde_root(filename1, tcp->current_dir);
  1680. char* filename2 = strcpy_from_child(tcp, tcp->u_arg[1]);
  1681. char* redirected_filename2 =
  1682. redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
  1683. link(redirected_filename1, redirected_filename2);
  1684. free(filename1);
  1685. free(filename2);
  1686. free(redirected_filename1);
  1687. free(redirected_filename2);
  1688. }
  1689. }
  1690. // copy-and-paste from file_link functions above,
  1691. // except adjusting for linkat signature:
  1692. // linkat(int olddirfd, char* oldpath, int newdirfd, char* newpath, int flags);
  1693. void CDE_begin_file_linkat(struct tcb* tcp) {
  1694. char* oldpath = strcpy_from_child(tcp, tcp->u_arg[1]);
  1695. char* newpath = strcpy_from_child(tcp, tcp->u_arg[3]);
  1696. if (CDE_verbose_mode) {
  1697. printf("[%d] BEGIN linkat(%s, %s)\n", tcp->pid, oldpath, newpath);
  1698. }
  1699. if (!IS_ABSPATH(oldpath) && tcp->u_arg[0] != AT_FDCWD) {
  1700. fprintf(stderr,
  1701. "CDE WARNING: linkat '%s' is a relative path and dirfd != AT_FDCWD\n",
  1702. oldpath);
  1703. goto done; // punt early!
  1704. }
  1705. if (!IS_ABSPATH(newpath) && tcp->u_arg[2] != AT_FDCWD) {
  1706. fprintf(stderr,
  1707. "CDE WARNING: linkat '%s' is a relative path and dirfd != AT_FDCWD\n",
  1708. newpath);
  1709. goto done; // punt early!
  1710. }
  1711. if (CDE_exec_mode) {
  1712. modify_syscall_second_and_fourth_args(tcp);
  1713. }
  1714. else {
  1715. // just try to do the link operation within the CDE package
  1716. // TODO: is this too early since the original link hasn't been done yet?
  1717. // (I don't think so ...)
  1718. //
  1719. char* redirected_oldpath = redirect_filename_into_cderoot(oldpath, tcp->current_dir, tcp);
  1720. // first copy the origin file into cde-root/ before trying to link it
  1721. copy_file_into_cde_root(oldpath, tcp->current_dir);
  1722. char* redirected_newpath = redirect_filename_into_cderoot(newpath, tcp->current_dir, tcp);
  1723. link(redirected_oldpath, redirected_newpath);
  1724. free(redirected_oldpath);
  1725. free(redirected_newpath);
  1726. }
  1727. done:
  1728. free(oldpath);
  1729. free(newpath);
  1730. }
  1731. void CDE_begin_file_symlink(struct tcb* tcp) {
  1732. if (CDE_verbose_mode) {
  1733. printf("[%d] BEGIN symlink\n", tcp->pid);
  1734. }
  1735. if (CDE_exec_mode) {
  1736. modify_syscall_two_args(tcp);
  1737. }
  1738. else {
  1739. // TODO: what about properly munging symlinks to absolute paths inside of
  1740. // the CDE package? e.g., if you symlink to '/lib/libc.so.6', perhaps that
  1741. // path should be munged to '../../lib/libc.so.6' within the CDE package???
  1742. char* oldname = strcpy_from_child(tcp, tcp->u_arg[0]);
  1743. char* newname = strcpy_from_child(tcp, tcp->u_arg[1]);
  1744. char* newname_redirected = redirect_filename_into_cderoot(newname, tcp->current_dir, tcp);
  1745. symlink(oldname, newname_redirected);
  1746. free(oldname);
  1747. free(newname);
  1748. free(newname_redirected);
  1749. }
  1750. }
  1751. // copy-and-paste from above,
  1752. // except adjusting for symlinkat signature:
  1753. // symlinkat(char* oldpath, int newdirfd, char* newpath);
  1754. void CDE_begin_file_symlinkat(struct tcb* tcp) {
  1755. if (CDE_verbose_mode) {
  1756. printf("[%d] BEGIN symlinkat\n", tcp->pid);
  1757. }
  1758. char* newpath = strcpy_from_child(tcp, tcp->u_arg[2]);
  1759. if (!IS_ABSPATH(newpath) && tcp->u_arg[1] != AT_FDCWD) {
  1760. fprintf(stderr, "CDE WARNING: symlinkat '%s' is a relative path and dirfd != AT_FDCWD\n", newpath);
  1761. free(newpath);
  1762. return; // punt early!
  1763. }
  1764. if (CDE_exec_mode) {
  1765. modify_syscall_first_and_third_args(tcp);
  1766. }
  1767. else {
  1768. char* oldname = strcpy_from_child(tcp, tcp->u_arg[0]);
  1769. char* newpath_redirected = redirect_filename_into_cderoot(newpath, tcp->current_dir, tcp);
  1770. symlink(oldname, newpath_redirected);
  1771. free(oldname);
  1772. free(newpath_redirected);
  1773. }
  1774. free(newpath);
  1775. }
  1776. void CDE_begin_file_rename(struct tcb* tcp) {
  1777. if (CDE_verbose_mode) {
  1778. printf("[%d] BEGIN rename\n", tcp->pid);
  1779. }
  1780. if (CDE_exec_mode) {
  1781. modify_syscall_two_args(tcp);
  1782. }
  1783. }
  1784. void CDE_end_file_rename(struct tcb* tcp) {
  1785. if (CDE_verbose_mode) {
  1786. printf("[%d] END rename\n", tcp->pid);
  1787. }
  1788. if (CDE_exec_mode) {
  1789. // empty
  1790. }
  1791. else {
  1792. if (tcp->u_rval == 0) {
  1793. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
  1794. char* redirected_filename1 =
  1795. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  1796. free(filename1);
  1797. // remove original file from cde-root/
  1798. if (redirected_filename1) {
  1799. unlink(redirected_filename1);
  1800. free(redirected_filename1);
  1801. }
  1802. // copy the destination file into cde-root/
  1803. char* dst_filename = strcpy_from_child(tcp, tcp->u_arg[1]);
  1804. copy_file_into_cde_root(dst_filename, tcp->current_dir);
  1805. free(dst_filename);
  1806. }
  1807. }
  1808. }
  1809. // copy-and-paste from file_rename functions above,
  1810. // except adjusting for linkat signature:
  1811. // renameat(int olddirfd, char* oldpath, int newdirfd, char* newpath);
  1812. void CDE_begin_file_renameat(struct tcb* tcp) {
  1813. if (CDE_verbose_mode) {
  1814. printf("[%d] BEGIN renameat\n", tcp->pid);
  1815. }
  1816. char* oldpath = strcpy_from_child(tcp, tcp->u_arg[1]);
  1817. char* newpath = strcpy_from_child(tcp, tcp->u_arg[3]);
  1818. if (!IS_ABSPATH(oldpath) && tcp->u_arg[0] != AT_FDCWD) {
  1819. fprintf(stderr,
  1820. "CDE WARNING: renameat '%s' is a relative path and dirfd != AT_FDCWD\n",
  1821. oldpath);
  1822. goto done; // punt early!
  1823. }
  1824. if (!IS_ABSPATH(newpath) && tcp->u_arg[2] != AT_FDCWD) {
  1825. fprintf(stderr,
  1826. "CDE WARNING: renameat '%s' is a relative path and dirfd != AT_FDCWD\n",
  1827. newpath);
  1828. goto done; // punt early!
  1829. }
  1830. if (CDE_exec_mode) {
  1831. modify_syscall_second_and_fourth_args(tcp);
  1832. }
  1833. done:
  1834. free(oldpath);
  1835. free(newpath);
  1836. }
  1837. void CDE_end_file_renameat(struct tcb* tcp) {
  1838. if (CDE_verbose_mode) {
  1839. printf("[%d] END renameat\n", tcp->pid);
  1840. }
  1841. if (CDE_exec_mode) {
  1842. // empty
  1843. }
  1844. else {
  1845. if (tcp->u_rval == 0) {
  1846. char* filename1 = strcpy_from_child(tcp, tcp->u_arg[1]);
  1847. char* redirected_filename1 =
  1848. redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
  1849. free(filename1);
  1850. // remove original file from cde-root/
  1851. if (redirected_filename1) {
  1852. unlink(redirected_filename1);
  1853. free(redirected_filename1);
  1854. }
  1855. // copy the destination file into cde-root/
  1856. char* dst_filename = strcpy_from_child(tcp, tcp->u_arg[3]);
  1857. copy_file_into_cde_root(dst_filename, tcp->current_dir);
  1858. free(dst_filename);
  1859. }
  1860. }
  1861. }
  1862. void CDE_begin_chdir(struct tcb* tcp) {
  1863. CDE_begin_standard_fileop(tcp, "chdir");
  1864. }
  1865. void CDE_end_fchdir(struct tcb* tcp);
  1866. void CDE_end_chdir(struct tcb* tcp) {
  1867. CDE_end_fchdir(tcp); // this will update tcp->current_dir
  1868. }
  1869. void CDE_end_fchdir(struct tcb* tcp) {
  1870. // only do this on success
  1871. if (tcp->u_rval == 0) {
  1872. // update current_dir
  1873. // A reliable way to get the current directory is using /proc/<pid>/cwd
  1874. char* cwd_symlink_name = format("/proc/%d/cwd", tcp->pid);
  1875. tcp->current_dir[0] = '\0';
  1876. int len = readlink(cwd_symlink_name, tcp->current_dir, MAXPATHLEN);
  1877. assert(tcp->current_dir[0] != '\0');
  1878. assert(len >= 0);
  1879. tcp->current_dir[len] = '\0'; // wow, readlink doesn't put the cap on the end!!!
  1880. free(cwd_symlink_name);
  1881. // now copy into cde-root/ if necessary
  1882. if (!CDE_exec_mode) {
  1883. char* redirected_path =
  1884. redirect_filename_into_cderoot(tcp->current_dir, tcp->current_dir, tcp);
  1885. if (redirected_path) {
  1886. make_mirror_dirs_in_cde_package(tcp->current_dir, 0);
  1887. free(redirected_path);
  1888. }
  1889. }
  1890. }
  1891. }
  1892. void CDE_begin_mkdir(struct tcb* tcp) {
  1893. CDE_begin_standard_fileop(tcp, "mkdir");
  1894. }
  1895. void CDE_end_mkdir(struct tcb* tcp, int input_buffer_arg_index) {
  1896. if (CDE_verbose_mode) {
  1897. printf("[%d] END mkdir*\n", tcp->pid);
  1898. }
  1899. if (CDE_exec_mode) {
  1900. // empty
  1901. }
  1902. else {
  1903. // mkdir either when the call succeeds or only fails because the
  1904. // directory already exists
  1905. if ((tcp->u_rval == 0) || (tcp->u_rval == EEXIST)) {
  1906. // sometimes mkdir is called with a BOGUS argument, so silently skip those cases
  1907. char* dirname_arg = strcpy_from_child(tcp, tcp->u_arg[input_buffer_arg_index]);
  1908. char* dirname_abspath = canonicalize_path(dirname_arg, tcp->current_dir);
  1909. make_mirror_dirs_in_cde_package(dirname_abspath, 0);
  1910. free(dirname_abspath);
  1911. free(dirname_arg);
  1912. }
  1913. }
  1914. }
  1915. // copy-and-paste from mkdir functions above,
  1916. // except adjusting for mkdirat signature:
  1917. // int mkdirat(int dirfd, const char *pathname, mode_t mode);
  1918. void CDE_begin_mkdirat(struct tcb* tcp) {
  1919. CDE_begin_at_fileop(tcp, "mkdirat");
  1920. }
  1921. void CDE_end_mkdirat(struct tcb* tcp) {
  1922. CDE_end_mkdir(tcp, 1);
  1923. }
  1924. void CDE_begin_rmdir(struct tcb* tcp) {
  1925. CDE_begin_standard_fileop(tcp, "rmdir");
  1926. }
  1927. void CDE_end_rmdir(struct tcb* tcp, int input_buffer_arg_index) {
  1928. if (CDE_verbose_mode) {
  1929. printf("[%d] END rmdir*\n", tcp->pid);
  1930. }
  1931. if (CDE_exec_mode) {
  1932. // empty
  1933. }
  1934. else {
  1935. if (tcp->u_rval == 0) {
  1936. char* dirname_arg = strcpy_from_child(tcp, tcp->u_arg[input_buffer_arg_index]);
  1937. char* redirected_path =
  1938. redirect_filename_into_cderoot(dirname_arg, tcp->current_dir, tcp);
  1939. if (redirected_path) {
  1940. rmdir(redirected_path);
  1941. free(redirected_path);
  1942. }
  1943. free(dirname_arg);
  1944. }
  1945. }
  1946. }
  1947. void CDE_begin_unlinkat_rmdir(struct tcb* tcp) {
  1948. CDE_begin_at_fileop(tcp, "unlinkat_rmdir");
  1949. }
  1950. void CDE_end_unlinkat_rmdir(struct tcb* tcp) {
  1951. CDE_end_rmdir(tcp, 1);
  1952. }
  1953. // from Goanna
  1954. #define FILEBACK 8 /* It is OK to use a file backed region. */
  1955. // TODO: this is probably very Linux-specific ;)
  1956. static void* find_free_addr(int pid, int prot, unsigned long size) {
  1957. FILE *f;
  1958. char filename[20];
  1959. char s[80];
  1960. char r, w, x, p;
  1961. sprintf(filename, "/proc/%d/maps", pid);
  1962. f = fopen(filename, "r");
  1963. if (!f) {
  1964. fprintf(stderr, "Can not find a free address in pid %d: %s\n.", pid, strerror(errno));
  1965. }
  1966. while (fgets(s, sizeof(s), f) != NULL) {
  1967. unsigned long cstart, cend;
  1968. int major, minor;
  1969. sscanf(s, "%lx-%lx %c%c%c%c %*x %x:%x", &cstart, &cend, &r, &w, &x, &p, &major, &minor);
  1970. if (cend - cstart < size) {
  1971. continue;
  1972. }
  1973. if (!(prot & FILEBACK) && (major || minor)) {
  1974. continue;
  1975. }
  1976. if (p != 'p') {
  1977. continue;
  1978. }
  1979. if ((prot & PROT_READ) && (r != 'r')) {
  1980. continue;
  1981. }
  1982. if ((prot & PROT_EXEC) && (x != 'x')) {
  1983. continue;
  1984. }
  1985. if ((prot & PROT_WRITE) && (w != 'w')) {
  1986. continue;
  1987. }
  1988. fclose(f);
  1989. return (void *)cstart;
  1990. }
  1991. fclose(f);
  1992. return NULL;
  1993. }
  1994. void alloc_tcb_CDE_fields(struct tcb* tcp) {
  1995. tcp->localshm = NULL;
  1996. tcp->childshm = NULL;
  1997. tcp->setting_up_shm = 0;
  1998. if (CDE_exec_mode) {
  1999. key_t key;
  2000. // randomly probe for a valid shm key
  2001. do {
  2002. errno = 0;
  2003. key = rand();
  2004. tcp->shmid = shmget(key, SHARED_PAGE_SIZE, IPC_CREAT|IPC_EXCL|0600);
  2005. } while (tcp->shmid == -1 && errno == EEXIST);
  2006. tcp->localshm = (char*)shmat(tcp->shmid, NULL, 0);
  2007. if ((long)tcp->localshm == -1) {
  2008. perror("shmat");
  2009. exit(1);
  2010. }
  2011. if (shmctl(tcp->shmid, IPC_RMID, NULL) == -1) {
  2012. perror("shmctl(IPC_RMID)");
  2013. exit(1);
  2014. }
  2015. assert(tcp->localshm);
  2016. }
  2017. tcp->current_dir = NULL;
  2018. tcp->p_ignores = NULL;
  2019. }
  2020. void free_tcb_CDE_fields(struct tcb* tcp) {
  2021. if (tcp->localshm) {
  2022. shmdt(tcp->localshm);
  2023. }
  2024. // need to null out elts in case table entries are recycled
  2025. tcp->localshm = NULL;
  2026. tcp->childshm = NULL;
  2027. tcp->setting_up_shm = 0;
  2028. tcp->p_ignores = NULL;
  2029. if (tcp->current_dir) {
  2030. free(tcp->current_dir);
  2031. tcp->current_dir = NULL;
  2032. }
  2033. }
  2034. // inject a system call in the child process to tell it to attach our
  2035. // shared memory segment, so that it can read modified paths from there
  2036. //
  2037. // Setup a shared memory region within child process,
  2038. // then repeat current system call
  2039. //
  2040. // WARNING: this code is very tricky and gross!
  2041. static void begin_setup_shmat(struct tcb* tcp) {
  2042. assert(tcp->localshm);
  2043. assert(!tcp->childshm); // avoid duplicate calls
  2044. // stash away original registers so that we can restore them later
  2045. struct user_regs_struct cur_regs;
  2046. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  2047. memcpy(&tcp->saved_regs, &cur_regs, sizeof(cur_regs));
  2048. #if defined (I386)
  2049. // The return value of shmat (attached address) is actually stored in
  2050. // the child's address space
  2051. tcp->savedaddr = find_free_addr(tcp->pid, PROT_READ|PROT_WRITE, sizeof(int));
  2052. // store *tcp->savedaddr data (in child's address space) so that we can restore it later:
  2053. tcp->savedword = ptrace(PTRACE_PEEKDATA, tcp->pid, tcp->savedaddr, 0);
  2054. EXITIF(errno); // PTRACE_PEEKDATA reports error in errno
  2055. // To make the target process execute a shmat() on 32-bit x86, we need to make
  2056. // it execute the special __NR_ipc syscall with SHMAT as a param:
  2057. /* The shmat call is implemented as a godawful sys_ipc. */
  2058. cur_regs.orig_eax = __NR_ipc;
  2059. /* The parameters are passed in ebx, ecx, edx, esi, edi, and ebp */
  2060. cur_regs.ebx = SHMAT;
  2061. /* The kernel names the rest of these, first, second, third, ptr,
  2062. * and fifth. Only first, second and ptr are used as inputs. Third
  2063. * is a pointer to the output (unsigned long).
  2064. */
  2065. cur_regs.ecx = tcp->shmid;
  2066. cur_regs.edx = 0; /* shmat flags */
  2067. cur_regs.esi = (long)tcp->savedaddr; /* Pointer to the return value in the
  2068. child's address space. */
  2069. cur_regs.edi = (long)NULL; /* We don't use shmat's shmaddr */
  2070. cur_regs.ebp = 0; /* The "fifth" argument is unused. */
  2071. #elif defined(X86_64)
  2072. if (IS_32BIT_EMU) {
  2073. // If we're on a 64-bit machine but tracing a 32-bit target process, then we
  2074. // need to make the 32-bit __NR_ipc SHMAT syscall as though we're on a 32-bit
  2075. // machine (see code above), except that we use registers like 'rbx' rather
  2076. // than 'ebx'. This was VERY SUBTLE AND TRICKY to finally get right!
  2077. // this code is almost exactly copy-and-paste from the I386 section above,
  2078. // except that the register names are the x86-64 versions of the 32-bit regs
  2079. tcp->savedaddr = find_free_addr(tcp->pid, PROT_READ|PROT_WRITE, sizeof(int));
  2080. tcp->savedword = ptrace(PTRACE_PEEKDATA, tcp->pid, tcp->savedaddr, 0);
  2081. EXITIF(errno);
  2082. cur_regs.orig_rax = 117; // 117 is the numerical value of the __NR_ipc macro (not available on 64-bit hosts!)
  2083. cur_regs.rbx = 21; // 21 is the numerical value of the SHMAT macro (not available on 64-bit hosts!)
  2084. cur_regs.rcx = tcp->shmid;
  2085. cur_regs.rdx = 0;
  2086. cur_regs.rsi = (long)tcp->savedaddr;
  2087. cur_regs.rdi = (long)NULL;
  2088. cur_regs.rbp = 0;
  2089. }
  2090. else {
  2091. // If the target process is 64-bit, then life is good, because
  2092. // there is a direct shmat syscall in x86-64!!!
  2093. cur_regs.orig_rax = __NR_shmat;
  2094. cur_regs.rdi = tcp->shmid;
  2095. cur_regs.rsi = 0;
  2096. cur_regs.rdx = 0;
  2097. }
  2098. #else
  2099. #error "Unknown architecture (not I386 or X86_64)"
  2100. #endif
  2101. EXITIF(ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  2102. tcp->setting_up_shm = 1; // very importante!!!
  2103. }
  2104. void finish_setup_shmat(struct tcb* tcp) {
  2105. struct user_regs_struct cur_regs;
  2106. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  2107. #if defined (I386)
  2108. // setup had better been a success!
  2109. assert(cur_regs.orig_eax == __NR_ipc);
  2110. assert(cur_regs.eax == 0);
  2111. // the pointer to the shared memory segment allocated by shmat() is actually
  2112. // located in *tcp->savedaddr (in the child's address space)
  2113. errno = 0;
  2114. tcp->childshm = (void*)ptrace(PTRACE_PEEKDATA, tcp->pid, tcp->savedaddr, 0);
  2115. EXITIF(errno); // PTRACE_PEEKDATA reports error in errno
  2116. // restore original data in child's address space
  2117. EXITIF(ptrace(PTRACE_POKEDATA, tcp->pid, tcp->savedaddr, tcp->savedword));
  2118. tcp->saved_regs.eax = tcp->saved_regs.orig_eax;
  2119. // back up IP so that we can re-execute previous instruction
  2120. // TODO: is the use of 2 specific to 32-bit machines?
  2121. tcp->saved_regs.eip = tcp->saved_regs.eip - 2;
  2122. #elif defined(X86_64)
  2123. if (IS_32BIT_EMU) {
  2124. // If we're on a 64-bit machine but tracing a 32-bit target process, then we
  2125. // need to handle the return value of the 32-bit __NR_ipc SHMAT syscall as
  2126. // though we're on a 32-bit machine (see code above). This was VERY SUBTLE
  2127. // AND TRICKY to finally get right!
  2128. // setup had better been a success!
  2129. assert(cur_regs.orig_rax == 117 /*__NR_ipc*/);
  2130. assert(cur_regs.rax == 0);
  2131. // the pointer to the shared memory segment allocated by shmat() is actually
  2132. // located in *tcp->savedaddr (in the child's address space)
  2133. errno = 0;
  2134. // this is SUPER IMPORTANT ... only keep the 32 least significant bits
  2135. // (mask with 0xffffffff) before storing the pointer in tcp->childshm,
  2136. // since 32-bit processes only have 32-bit addresses, not 64-bit addresses :0
  2137. tcp->childshm = (void*)(ptrace(PTRACE_PEEKDATA, tcp->pid, tcp->savedaddr, 0) & 0xffffffff);
  2138. EXITIF(errno);
  2139. // restore original data in child's address space
  2140. EXITIF(ptrace(PTRACE_POKEDATA, tcp->pid, tcp->savedaddr, tcp->savedword));
  2141. }
  2142. else {
  2143. // If the target process is 64-bit, then life is good, because
  2144. // there is a direct shmat syscall in x86-64!!!
  2145. assert(cur_regs.orig_rax == __NR_shmat);
  2146. // the return value of the direct shmat syscall is in %rax
  2147. tcp->childshm = (void*)cur_regs.rax;
  2148. }
  2149. // the code below is identical regardless of whether the target process is
  2150. // 32-bit or 64-bit (on a 64-bit host)
  2151. tcp->saved_regs.rax = tcp->saved_regs.orig_rax;
  2152. // back up IP so that we can re-execute previous instruction
  2153. // ... wow, apparently the -2 offset works for 64-bit as well :)
  2154. tcp->saved_regs.rip = tcp->saved_regs.rip - 2;
  2155. #else
  2156. #error "Unknown architecture (not I386 or X86_64)"
  2157. #endif
  2158. EXITIF(ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&tcp->saved_regs) < 0);
  2159. assert(tcp->childshm);
  2160. tcp->setting_up_shm = 0; // very importante!!!
  2161. }
  2162. // copy src into dst, redirecting it into cde-root/ if necessary
  2163. // based on cde_starting_pwd
  2164. //
  2165. // dst should be big enough to hold a full path
  2166. void strcpy_redirected_cderoot(char* dst, char* src) {
  2167. assert(CDE_exec_mode);
  2168. // use cde_starting_pwd (TODO: is that correct?)
  2169. char* redirected_src = redirect_filename_into_cderoot(src, cde_starting_pwd, NULL);
  2170. if (redirected_src) {
  2171. strcpy(dst, redirected_src);
  2172. free(redirected_src);
  2173. }
  2174. else {
  2175. strcpy(dst, src);
  2176. }
  2177. }
  2178. // malloc a new string from child, and return NULL on failure
  2179. static char* strcpy_from_child_or_null(struct tcb* tcp, long addr) {
  2180. char path[MAXPATHLEN];
  2181. if (umovestr(tcp, addr, sizeof path, path) < 0) {
  2182. return NULL;
  2183. }
  2184. return strdup(path);
  2185. }
  2186. // aborts the program if there's an error in strcpy_from_child_or_null
  2187. static char* strcpy_from_child(struct tcb* tcp, long addr) {
  2188. char* ret = strcpy_from_child_or_null(tcp, addr);
  2189. EXITIF(ret == NULL);
  2190. return ret;
  2191. }
  2192. // adapted from the Goanna project by Spillane et al.
  2193. // dst_in_child is a pointer in the child's address space
  2194. static void memcpy_to_child(int pid, char* dst_child, char* src, int size) {
  2195. while (size >= sizeof(int)) {
  2196. long w = *((long*)src);
  2197. EXITIF(ptrace(PTRACE_POKEDATA, pid, dst_child, (long)w) < 0);
  2198. size -= sizeof(int);
  2199. dst_child = (char*)dst_child + sizeof(int);
  2200. src = (char*)src + sizeof(int);
  2201. }
  2202. /* Cleanup the last little bit. */
  2203. if (size) {
  2204. union {
  2205. long l;
  2206. char c[4];
  2207. } dw, sw;
  2208. errno = 0;
  2209. dw.l = ptrace(PTRACE_PEEKDATA, pid, dst_child, 0);
  2210. EXITIF(errno);
  2211. sw.l = *((long*)src);
  2212. /* Little endian sucks. */
  2213. dw.c[0] = sw.c[0];
  2214. if (size >= 2)
  2215. dw.c[1] = sw.c[1];
  2216. if (size >= 3)
  2217. dw.c[2] = sw.c[2];
  2218. assert(size < 4);
  2219. EXITIF(ptrace(PTRACE_POKEDATA, pid, dst_child, dw.l) < 0);
  2220. }
  2221. }
  2222. // TODO: do we still need to keep track of tcp->child_current_pwd
  2223. // if we can just directly access it using /proc/<pid>/cwd ???
  2224. void CDE_end_getcwd(struct tcb* tcp) {
  2225. if (!syserror(tcp)) {
  2226. if (CDE_exec_mode) {
  2227. char* sandboxed_pwd = extract_sandboxed_pwd(tcp->current_dir, tcp);
  2228. memcpy_to_child(tcp->pid, (char*)tcp->u_arg[0],
  2229. sandboxed_pwd, strlen(sandboxed_pwd) + 1);
  2230. // for debugging
  2231. //char* tmp = strcpy_from_child(tcp, tcp->u_arg[0]);
  2232. //printf("[%d] CDE_end_getcwd spoofed: %s\n", tcp->pid, tmp);
  2233. //free(tmp);
  2234. }
  2235. else {
  2236. char* tmp = strcpy_from_child(tcp, tcp->u_arg[0]);
  2237. strcpy(tcp->current_dir, tmp);
  2238. free(tmp);
  2239. //printf("[%d] CDE_end_getcwd: %s\n", tcp->pid, tcp->current_dir);
  2240. }
  2241. }
  2242. }
  2243. // path_envvar is $PATH. Iterate through all entries and if any of them
  2244. // are symlinks, then create their corresponding entries in cde-root/.
  2245. // This takes care of cases where, say, /bin is actually a symlink to
  2246. // another directory like /KNOPPIX/bin. We need to create a symlink
  2247. // 'bin' in cde-root/ and point it to ./KNOPPIX/bin
  2248. //
  2249. // DO THIS AT THE VERY BEGINNING OF EXECUTION!
  2250. static void CDE_create_path_symlink_dirs() {
  2251. char *p;
  2252. int m, n;
  2253. struct stat st;
  2254. char tmp_buf[MAXPATHLEN];
  2255. for (p = getenv("PATH"); p && *p; p += m) {
  2256. if (strchr(p, ':')) {
  2257. n = strchr(p, ':') - p;
  2258. m = n + 1;
  2259. }
  2260. else {
  2261. m = n = strlen(p);
  2262. }
  2263. strncpy(tmp_buf, p, n);
  2264. tmp_buf[n] = '\0';
  2265. // this will NOT follow the symlink ...
  2266. if (lstat(tmp_buf, &st) == 0) {
  2267. char is_symlink = S_ISLNK(st.st_mode);
  2268. if (is_symlink) {
  2269. char* tmp = strdup(tmp_buf);
  2270. copy_file_into_cde_root(tmp, cde_starting_pwd);
  2271. free(tmp);
  2272. }
  2273. }
  2274. }
  2275. // also, this is hacky, but also check /usr/lib to see
  2276. // whether it's a symlink. ld-linux.so.2 will likely try to look
  2277. // for libraries in those places, but they're not in any convenient
  2278. // environment variable
  2279. //
  2280. // note that the other 2 directories that ld-linux.so.2 usually
  2281. // tries to look for libs in, /bin and /lib, will be taken care of by
  2282. // CDE_create_toplevel_symlink_dirs()
  2283. strcpy(tmp_buf, "/usr/lib");
  2284. // this will NOT follow the symlink ...
  2285. if (lstat(tmp_buf, &st) == 0) {
  2286. char is_symlink = S_ISLNK(st.st_mode);
  2287. if (is_symlink) {
  2288. char* tmp = strdup(tmp_buf);
  2289. copy_file_into_cde_root(tmp, cde_starting_pwd);
  2290. free(tmp);
  2291. }
  2292. }
  2293. }
  2294. // scan through all files at top-level root directory ('/') and find if
  2295. // any of them are symlinks to DIRECTORIES. if so, then copy the symlinks
  2296. // and their targets into CDE_ROOT_DIR, so that we can faithfully mirror the
  2297. // original filesystem (at least w.r.t. toplevel symlinks).
  2298. //
  2299. // this is necessary to ensure proper functioning
  2300. // on filesystems that have symlinks at the top level. e.g., on Knoppix
  2301. // 2006-06-01 LiveCD, here is the top-level filesystem structure:
  2302. /*
  2303. /
  2304. UNIONFS/
  2305. bin
  2306. boot
  2307. etc
  2308. ...
  2309. ramdisk/
  2310. home/
  2311. bin --> /UNIONFS/bin (symlink!)
  2312. boot --> /UNIONFS/boot (symlink!)
  2313. home --> /ramdisk/home (symlink)
  2314. etc --> /UNIONFS/etc (symlink!)
  2315. ...
  2316. usr --> /UNIONFS/usr
  2317. */
  2318. static void CDE_create_toplevel_symlink_dirs() {
  2319. DIR* dp = opendir("/");
  2320. assert(dp);
  2321. struct dirent *ep;
  2322. while ((ep = readdir(dp))) {
  2323. char* toplevel_abspath = format("/%s", ep->d_name); // make into abspath
  2324. struct stat st;
  2325. if (lstat(toplevel_abspath, &st) == 0) {
  2326. char is_symlink = S_ISLNK(st.st_mode);
  2327. if (is_symlink) {
  2328. struct stat real_st;
  2329. // only do this for top-level symlinks to DIRECTORIES
  2330. if ((stat(toplevel_abspath, &real_st) == 0) &&
  2331. S_ISDIR(real_st.st_mode)) {
  2332. copy_file_into_cde_root(toplevel_abspath, cde_starting_pwd);
  2333. }
  2334. }
  2335. }
  2336. free(toplevel_abspath);
  2337. }
  2338. closedir(dp);
  2339. }
  2340. void CDE_init_tcb_dir_fields(struct tcb* tcp) {
  2341. // malloc new entries, and then decide whether to inherit from parent
  2342. // process entry or directly initialize
  2343. assert(!tcp->current_dir);
  2344. tcp->current_dir = malloc(MAXPATHLEN); // big boy!
  2345. // if parent exists, then its fields MUST be legit, so grab them
  2346. if (tcp->parent) {
  2347. assert(tcp->parent->current_dir);
  2348. strcpy(tcp->current_dir, tcp->parent->current_dir);
  2349. //printf("inherited %s [%d]\n", tcp->current_dir, tcp->pid);
  2350. // inherit from parent since you're executing the same program after
  2351. // forking (at least until you do an exec)
  2352. tcp->p_ignores = tcp->parent->p_ignores;
  2353. }
  2354. else {
  2355. // otherwise create fresh fields derived from master (cde) process
  2356. getcwd(tcp->current_dir, MAXPATHLEN);
  2357. //printf("fresh %s [%d]\n", tcp->current_dir, tcp->pid);
  2358. }
  2359. // it's possible that tcp->perceived_program_fullpath has already been
  2360. // set, and if so, don't mess with it. only inherit from parent if it
  2361. // hasn't been set yet (TODO: I don't fully understand the rationale
  2362. // for this, but it seems to work in practice so far)
  2363. if (!tcp->perceived_program_fullpath && tcp->parent) {
  2364. // aliased, so don't mutate or free
  2365. tcp->perceived_program_fullpath = tcp->parent->perceived_program_fullpath;
  2366. }
  2367. }
  2368. // find the absolute path to the cde-root/ directory, since that
  2369. // will be where our fake filesystem starts. e.g., if our real pwd is:
  2370. // /home/bob/cde-package/cde-root/home/alice/cool-experiment
  2371. // then the pseudo_root_dir is:
  2372. // /home/bob/cde-package/cde-root
  2373. //
  2374. // if we're running cde-exec from outside of a cde-root/ directory,
  2375. // then try to find the cde-root/ corresponding to the location of the
  2376. // cde-exec executable
  2377. void CDE_init_pseudo_root_dir() {
  2378. assert(CDE_exec_mode);
  2379. struct path* p = new_path_from_abspath(cde_starting_pwd);
  2380. assert(p->depth > 0);
  2381. int i;
  2382. int found_index = -1;
  2383. for (i = 1; i <= p->depth; i++) {
  2384. char* component = get_path_component(p, i);
  2385. if (strcmp(component, CDE_ROOT_NAME) == 0) {
  2386. // flag an error if there is more than one cde-root directory, since
  2387. // we don't support NESTED cde packages o.O
  2388. if (found_index >= 0) {
  2389. fprintf(stderr, "Error: More than one cde-root/ directory found in pwd:\n '%s'\n",
  2390. cde_starting_pwd);
  2391. exit(1);
  2392. }
  2393. found_index = i;
  2394. // keep searching in case there are duplicates, in which case the
  2395. // above assertion will fail
  2396. }
  2397. }
  2398. if (found_index < 0) {
  2399. // if we can't find 'cde-root' in cde_starting_pwd, then we must
  2400. // be executing cde-exec from OUTSIDE of a repository, so set
  2401. // cde_pseudo_root_dir to:
  2402. // dirname(readlink("/proc/self/exe")) + "/cde-root"
  2403. char proc_self_exe[MAXPATHLEN];
  2404. proc_self_exe[0] = '\0';
  2405. int len = readlink("/proc/self/exe",
  2406. proc_self_exe, sizeof proc_self_exe);
  2407. assert(proc_self_exe[0] != '\0');
  2408. assert(len >= 0);
  2409. proc_self_exe[len] = '\0'; // wow, readlink doesn't put cap on the end!
  2410. char* toplevel_cde_root_path =
  2411. format("%s/cde-root", dirname(proc_self_exe));
  2412. strcpy(cde_pseudo_root_dir, toplevel_cde_root_path);
  2413. free(toplevel_cde_root_path);
  2414. cde_exec_from_outside_cderoot = 1;
  2415. }
  2416. else {
  2417. // normal case --- we're currently within a cde-root/ directory, so
  2418. // set that as cde_pseudo_root_dir
  2419. char* tmp = path2str(p, found_index);
  2420. strcpy(cde_pseudo_root_dir, tmp);
  2421. free(tmp);
  2422. }
  2423. delete_path(p);
  2424. }
  2425. // pgbovine - do all CDE initialization here after command-line options
  2426. // have been processed (argv[optind] is the name of the target program)
  2427. void CDE_init(char** argv, int optind) {
  2428. // pgbovine - initialize this before doing anything else!
  2429. getcwd(cde_starting_pwd, sizeof cde_starting_pwd);
  2430. // suppress (most) okapi warnings to prevent terminal noise
  2431. extern char OKAPI_VERBOSE;
  2432. OKAPI_VERBOSE = 0;
  2433. // pgbovine - allow most promiscuous permissions for new files/directories
  2434. umask(0000);
  2435. if (CDE_exec_mode) {
  2436. // must do this before running CDE_init_options()
  2437. CDE_init_pseudo_root_dir();
  2438. if (CDE_exec_streaming_mode) {
  2439. char* tmp = strdup(cde_pseudo_root_dir);
  2440. tmp[strlen(tmp) - strlen(CDE_ROOT_NAME)] = '\0';
  2441. cde_remote_root_dir = format("%scde-remote-root", tmp);
  2442. free(tmp);
  2443. struct stat remote_root_stat;
  2444. if ((stat(cde_remote_root_dir, &remote_root_stat) != 0) ||
  2445. (!S_ISDIR(remote_root_stat.st_mode))) {
  2446. fprintf(stderr, "Fatal error: Running in -s mode but '%s' directory does not exist\n",
  2447. cde_remote_root_dir);
  2448. exit(1);
  2449. }
  2450. // initialize trie
  2451. cached_files_trie = TrieNew();
  2452. char* p = format("%s/../locally-cached-files.txt", cde_pseudo_root_dir);
  2453. cached_files_fp = fopen(p, "r");
  2454. if (cached_files_fp) {
  2455. char* line = NULL;
  2456. size_t len = 0;
  2457. ssize_t read;
  2458. while ((read = getline(&line, &len, cached_files_fp)) != -1) {
  2459. assert(line[read-1] == '\n');
  2460. line[read-1] = '\0'; // strip of trailing newline
  2461. if (line[0] != '\0') {
  2462. // pre-seed cached_files_trie:
  2463. TrieInsert(cached_files_trie, line);
  2464. }
  2465. }
  2466. fclose(cached_files_fp);
  2467. }
  2468. // always open in append mode so that we can be ready to add more
  2469. // entries on subsequent runs ...
  2470. cached_files_fp = fopen(p, "a");
  2471. free(p);
  2472. }
  2473. }
  2474. else {
  2475. if (!CDE_PACKAGE_DIR) { // if it hasn't been set by the '-o' option, set to a default
  2476. CDE_PACKAGE_DIR = (char*)"cde-package";
  2477. }
  2478. // make this an absolute path!
  2479. CDE_PACKAGE_DIR = canonicalize_path(CDE_PACKAGE_DIR, cde_starting_pwd);
  2480. CDE_ROOT_DIR = format("%s/%s", CDE_PACKAGE_DIR, CDE_ROOT_NAME);
  2481. assert(IS_ABSPATH(CDE_ROOT_DIR));
  2482. mkdir(CDE_PACKAGE_DIR, 0777);
  2483. mkdir(CDE_ROOT_DIR, 0777);
  2484. // if we can't even create CDE_ROOT_DIR, then abort with a failure
  2485. struct stat cde_rootdir_stat;
  2486. if (stat(CDE_ROOT_DIR, &cde_rootdir_stat)) {
  2487. fprintf(stderr, "Error: Cannot create CDE root directory at \"%s\"\n", CDE_ROOT_DIR);
  2488. exit(1);
  2489. }
  2490. // collect uname information in CDE_PACKAGE_DIR/cde.uname
  2491. struct utsname uname_info;
  2492. if (uname(&uname_info) >= 0) {
  2493. char* fn = format("%s/cde.uname", CDE_PACKAGE_DIR);
  2494. FILE* uname_f = fopen(fn, "w");
  2495. free(fn);
  2496. if (uname_f) {
  2497. fprintf(uname_f, "uname: '%s' '%s' '%s' '%s'\n",
  2498. uname_info.sysname,
  2499. uname_info.release,
  2500. uname_info.version,
  2501. uname_info.machine);
  2502. fclose(uname_f);
  2503. }
  2504. }
  2505. // if cde.options doesn't yet exist, create it in pwd and seed it
  2506. // with default values that are useful to ignore in practice
  2507. //
  2508. // do this BEFORE CDE_init_options() so that we pick up those
  2509. // ignored values
  2510. struct stat cde_options_stat;
  2511. if (stat("cde.options", &cde_options_stat)) {
  2512. FILE* f = fopen("cde.options", "w");
  2513. fputs(CDE_OPTIONS_VERSION_NUM, f);
  2514. fputs(" (do not alter this first line!)\n", f);
  2515. // /dev, /proc, and /sys are special system directories with fake files
  2516. //
  2517. // some sub-directories within /var contains 'volatile' temp files
  2518. // that change when system is running normally
  2519. //
  2520. // (Note that it's a bit too much to simply ignore all of /var,
  2521. // since files in dirs like /var/lib might be required - e.g., see
  2522. // gnome-sudoku example)
  2523. //
  2524. // $HOME/.Xauthority is used for X11 authentication via ssh, so we need to
  2525. // use the REAL version and not the one in cde-root/
  2526. //
  2527. // ignore "/tmp" and "/tmp/*" since programs often put lots of
  2528. // session-specific stuff into /tmp so DO NOT track files within
  2529. // there, or else you will risk severely 'overfitting' and ruining
  2530. // portability across machines. it's safe to assume that all Linux
  2531. // distros have a /tmp directory that anybody can write into
  2532. fputs("\n# These directories often contain pseudo-files that shouldn't be tracked\n", f);
  2533. fputs("ignore_prefix=/dev/\n", f);
  2534. fputs("ignore_exact=/dev\n", f);
  2535. fputs("ignore_prefix=/proc/\n", f);
  2536. fputs("ignore_exact=/proc\n", f);
  2537. fputs("ignore_prefix=/sys/\n", f);
  2538. fputs("ignore_exact=/sys\n", f);
  2539. fputs("ignore_prefix=/var/cache/\n", f);
  2540. fputs("ignore_prefix=/var/lock/\n", f);
  2541. fputs("ignore_prefix=/var/log/\n", f);
  2542. fputs("ignore_prefix=/var/run/\n", f);
  2543. fputs("ignore_prefix=/var/tmp/\n", f);
  2544. fputs("ignore_prefix=/tmp/\n", f);
  2545. fputs("ignore_exact=/tmp\n", f);
  2546. fputs("\n# un-comment the entries below if you think they might help your app:\n", f);
  2547. fputs("#ignore_exact=/etc/ld.so.cache\n", f);
  2548. fputs("#ignore_exact=/etc/ld.so.preload\n", f);
  2549. fputs("#ignore_exact=/etc/ld.so.nohwcap\n", f);
  2550. fputs("\n# Ignore .Xauthority to allow X Windows programs to work\n", f);
  2551. fputs("ignore_substr=.Xauthority\n", f);
  2552. // we gotta ignore /etc/resolv.conf or else Google Earth can't
  2553. // access the network when on another machine, so it won't work
  2554. // (and I think other network-facing apps might not work either!)
  2555. fputs("\n# Ignore so that networking can work properly\n", f);
  2556. fputs("ignore_exact=/etc/resolv.conf\n", f);
  2557. fputs("# These files might be useful to ignore along with /etc/resolv.conf\n", f);
  2558. fputs("# (un-comment if you want to try them)\n", f);
  2559. fputs("#ignore_exact=/etc/host.conf\n", f);
  2560. fputs("#ignore_exact=/etc/hosts\n", f);
  2561. fputs("#ignore_exact=/etc/nsswitch.conf\n", f);
  2562. fputs("#ignore_exact=/etc/gai.conf\n", f);
  2563. // ewencp also suggests looking into ignoring these other
  2564. // networking-related files:
  2565. /* Hmm, good point. There's probably lots -- if you're trying to
  2566. run a server, /etc/hostname, /etc/hosts.allow and
  2567. /etc/hosts.deny could all be problematic. /etc/hosts could be
  2568. a problem for client or server, although its unusual to have
  2569. much in there. One way it could definitely be a problem is if
  2570. the hostname is in /etc/hosts and you want to use it as a
  2571. server, e.g. I run on my machine (ahoy) the server and client,
  2572. which appears in /etc/hosts, and then when cde-exec runs it
  2573. ends up returning 127.0.0.1. But for all of these, I actually
  2574. don't know when the file gets read, so I'm not certain any of
  2575. them are really a problem. */
  2576. fputs("\n# Access the target machine's password files:\n", f);
  2577. fputs("# (some programs like texmacs need these lines to be commented-out,\n", f);
  2578. fputs("# since they try to use home directory paths within the passwd file,\n", f);
  2579. fputs("# and those paths might not exist within the package.)\n", f);
  2580. fputs("ignore_prefix=/etc/passwd\n", f);
  2581. fputs("ignore_prefix=/etc/shadow\n", f);
  2582. fputs("\n# These environment vars might lead to 'overfitting' and hinder portability\n", f);
  2583. fputs("ignore_environment_var=DBUS_SESSION_BUS_ADDRESS\n", f);
  2584. fputs("ignore_environment_var=ORBIT_SOCKETDIR\n", f);
  2585. fputs("ignore_environment_var=SESSION_MANAGER\n", f);
  2586. fputs("ignore_environment_var=XAUTHORITY\n", f);
  2587. fputs("ignore_environment_var=DISPLAY\n", f);
  2588. fclose(f);
  2589. }
  2590. }
  2591. // do this AFTER creating cde.options
  2592. CDE_init_options();
  2593. if (CDE_exec_mode) {
  2594. CDE_load_environment_vars();
  2595. }
  2596. else {
  2597. // pgbovine - copy 'cde' executable to CDE_PACKAGE_DIR and rename
  2598. // it 'cde-exec', so that it can be included in the executable
  2599. //
  2600. // use /proc/self/exe since argv[0] might be simply 'cde'
  2601. // (if the cde binary is in $PATH and we're invoking it only by its name)
  2602. char* fn = format("%s/cde-exec", CDE_PACKAGE_DIR);
  2603. copy_file((char*)"/proc/self/exe", fn, 0777);
  2604. free(fn);
  2605. CDE_create_convenience_scripts(argv, optind);
  2606. // make a cde.log file that contains commands to reproduce original
  2607. // run within cde-package
  2608. struct stat tmp;
  2609. FILE* log_f;
  2610. char* log_filename = format("%s/cde.log", CDE_PACKAGE_DIR);
  2611. if (stat(log_filename, &tmp)) {
  2612. log_f = fopen(log_filename, "w");
  2613. fprintf(log_f, "cd '" CDE_ROOT_NAME "%s'", cde_starting_pwd);
  2614. fputc('\n', log_f);
  2615. }
  2616. else {
  2617. log_f = fopen(log_filename, "a");
  2618. }
  2619. free(log_filename);
  2620. fprintf(log_f, "'./%s.cde'", basename(argv[optind]));
  2621. int i;
  2622. for (i = optind + 1; argv[i] != NULL; i++) {
  2623. fprintf(log_f, " '%s'", argv[i]); // add quotes for accuracy
  2624. }
  2625. fputc('\n', log_f);
  2626. fclose(log_f);
  2627. CDE_create_path_symlink_dirs();
  2628. CDE_create_toplevel_symlink_dirs();
  2629. // copy /proc/self/environ to capture the FULL set of environment vars
  2630. char* fullenviron_fn = format("%s/cde.full-environment", CDE_PACKAGE_DIR);
  2631. copy_file((char*)"/proc/self/environ", fullenviron_fn, 0666);
  2632. free(fullenviron_fn);
  2633. }
  2634. }
  2635. // create a '.cde' version of the target program inside the corresponding
  2636. // location of cde_starting_pwd within CDE_ROOT_DIR, which is a
  2637. // shell script that invokes it using cde-exec
  2638. //
  2639. // also, if target_program_fullpath is only a program name
  2640. // (without any '/' chars in it, then also create a convenience script
  2641. // at the top level of the package)
  2642. //
  2643. // argv[optind] is the target program's name
  2644. static void CDE_create_convenience_scripts(char** argv, int optind) {
  2645. assert(!CDE_exec_mode);
  2646. char* target_program_fullpath = argv[optind];
  2647. // only take the basename to construct cde_script_name,
  2648. // since target_program_fullpath could be a relative path like '../python'
  2649. char* cde_script_name = format("%s.cde", basename(target_program_fullpath));
  2650. char* progname_redirected =
  2651. redirect_filename_into_cderoot(cde_script_name, cde_starting_pwd, NULL);
  2652. if (progname_redirected) {
  2653. // make sure directory exists :)
  2654. make_mirror_dirs_in_cde_package(cde_starting_pwd, 0);
  2655. // this is sort of tricky. we need to insert in a bunch of ../ so
  2656. // that we can find cde-exec, which is right in the cde-package directory
  2657. struct path* p = new_path_from_abspath(cde_starting_pwd);
  2658. char dot_dots[MAXPATHLEN];
  2659. assert(p->depth > 0);
  2660. strcpy(dot_dots, "..");
  2661. int i;
  2662. for (i = 1; i <= p->depth; i++) {
  2663. strcat(dot_dots, "/..");
  2664. }
  2665. delete_path(p);
  2666. FILE* f = fopen(progname_redirected, "w");
  2667. fprintf(f, "#!/bin/sh\n");
  2668. fprintf(f, "%s/cde-exec", dot_dots);
  2669. // include original command-line options
  2670. for (i = 1; i < optind; i++) {
  2671. fprintf(f, " '%s'", argv[i]);
  2672. }
  2673. // double quotes seem to work well for making $@ more accurate
  2674. fprintf(f, " '%s' \"$@\"\n", target_program_fullpath);
  2675. fclose(f);
  2676. chmod(progname_redirected, 0777); // now make the script executable
  2677. free(progname_redirected);
  2678. }
  2679. if (!strchr(target_program_fullpath, '/')) {
  2680. char* toplevel_script_name = format("%s/%s", CDE_PACKAGE_DIR, cde_script_name);
  2681. FILE* f = fopen(toplevel_script_name, "w");
  2682. // Thanks to probono@puredarwin.org for the following more robust
  2683. // start-up script idea, which creates a program that can be
  2684. // double-clicked and run from anywhere.
  2685. fprintf(f, "#!/bin/sh\n");
  2686. fprintf(f, "HERE=\"$(dirname \"$(readlink -f \"${0}\")\")\"\n");
  2687. fprintf(f, "cd \"$HERE/cde-root\" && ../cde-exec");
  2688. // include original command-line options
  2689. int i;
  2690. for (i = 1; i < optind; i++) {
  2691. fprintf(f, " '%s'", argv[i]);
  2692. }
  2693. // double quotes seem to work well for make $@ more accurate
  2694. fprintf(f, " '%s' \"$@\"\n", target_program_fullpath);
  2695. fclose(f);
  2696. chmod(toplevel_script_name, 0777); // now make the script executable
  2697. free(toplevel_script_name);
  2698. }
  2699. free(cde_script_name);
  2700. }
  2701. static void _add_to_array_internal(char** my_array, int* p_len, char* p, char* array_name) {
  2702. assert(my_array[*p_len] == NULL);
  2703. my_array[*p_len] = strdup(p);
  2704. if (CDE_verbose_mode) {
  2705. printf("%s[%d] = '%s'\n", array_name, *p_len, my_array[*p_len]);
  2706. }
  2707. (*p_len)++;
  2708. if (*p_len >= 100) {
  2709. fprintf(stderr, "Fatal error: more than 100 entries in %s\n", array_name);
  2710. exit(1);
  2711. }
  2712. }
  2713. void CDE_add_ignore_exact_path(char* p) {
  2714. _add_to_array_internal(ignore_exact_paths, &ignore_exact_paths_ind, p, (char*)"ignore_exact_paths");
  2715. }
  2716. void CDE_add_ignore_prefix_path(char* p) {
  2717. _add_to_array_internal(ignore_prefix_paths, &ignore_prefix_paths_ind, p, (char*)"ignore_prefix_paths");
  2718. }
  2719. void CDE_add_ignore_substr_path(char* p) {
  2720. _add_to_array_internal(ignore_substr_paths, &ignore_substr_paths_ind, p, (char*)"ignore_substr_paths");
  2721. }
  2722. void CDE_add_redirect_exact_path(char* p) {
  2723. _add_to_array_internal(redirect_exact_paths, &redirect_exact_paths_ind, p, (char*)"redirect_exact_paths");
  2724. }
  2725. void CDE_add_redirect_prefix_path(char* p) {
  2726. _add_to_array_internal(redirect_prefix_paths, &redirect_prefix_paths_ind, p, (char*)"redirect_prefix_paths");
  2727. }
  2728. void CDE_add_redirect_substr_path(char* p) {
  2729. _add_to_array_internal(redirect_substr_paths, &redirect_substr_paths_ind, p, (char*)"redirect_substr_paths");
  2730. }
  2731. void CDE_add_ignore_envvar(char* p) {
  2732. _add_to_array_internal(ignore_envvars, &ignore_envvars_ind, p, (char*)"ignore_envvars");
  2733. }
  2734. // call this at the VERY BEGINNING of execution, so that ignore paths can be
  2735. // specified on the command line (e.g., using the '-i' and '-p' options)
  2736. void CDE_clear_options_arrays() {
  2737. memset(ignore_exact_paths, 0, sizeof(ignore_exact_paths));
  2738. memset(ignore_prefix_paths, 0, sizeof(ignore_prefix_paths));
  2739. memset(ignore_substr_paths, 0, sizeof(ignore_substr_paths));
  2740. memset(redirect_exact_paths, 0, sizeof(redirect_exact_paths));
  2741. memset(redirect_prefix_paths, 0, sizeof(redirect_prefix_paths));
  2742. memset(redirect_substr_paths, 0, sizeof(redirect_substr_paths));
  2743. memset(ignore_envvars, 0, sizeof(ignore_envvars));
  2744. memset(process_ignores, 0, sizeof(process_ignores));
  2745. ignore_exact_paths_ind = 0;
  2746. ignore_prefix_paths_ind = 0;
  2747. ignore_substr_paths_ind = 0;
  2748. redirect_exact_paths_ind = 0;
  2749. redirect_prefix_paths_ind = 0;
  2750. redirect_substr_paths_ind = 0;
  2751. ignore_envvars_ind = 0;
  2752. process_ignores_ind = 0;
  2753. }
  2754. // initialize arrays based on the cde.options file, which has the grammar:
  2755. //
  2756. // ignore_exact=<exact path to ignore>
  2757. // ignore_prefix=<path prefix to ignore>
  2758. // ignore_substr=<path substring to ignore>
  2759. // redirect_exact=<exact path to allow>
  2760. // redirect_prefix=<path prefix to allow>
  2761. // redirect_substr=<path substring to allow>
  2762. // ignore_environment_var=<environment variable to ignore>
  2763. //
  2764. // On 2011-06-22, added support for process-specific ignores, with the following syntax:
  2765. // ignore_process=<exact path to ignore>
  2766. // {
  2767. // process_ignore_prefix=<path prefix to ignore for the given process>
  2768. // }
  2769. static void CDE_init_options() {
  2770. // Pre-req: CDE_clear_options_arrays() has already been called!
  2771. char in_braces = false;
  2772. FILE* f = NULL;
  2773. if (CDE_exec_mode) {
  2774. // look for a cde.options file in the package
  2775. // you must run this AFTER running CDE_init_pseudo_root_dir()
  2776. assert(*cde_pseudo_root_dir);
  2777. char* options_file = format("%s/../cde.options", cde_pseudo_root_dir);
  2778. f = fopen(options_file, "r");
  2779. if (!f) {
  2780. fprintf(stderr, "Fatal error: missing cde.options file\n");
  2781. fprintf(stderr, "(trying to locate file at %s)\n", options_file);
  2782. exit(1);
  2783. }
  2784. free(options_file);
  2785. }
  2786. else {
  2787. // look for a cde.options file in pwd
  2788. f = fopen("cde.options", "r");
  2789. // if found, copy it into the package
  2790. if (f) {
  2791. char* fn = format("%s/cde.options", CDE_PACKAGE_DIR);
  2792. copy_file((char*)"cde.options", fn, 0666);
  2793. free(fn);
  2794. }
  2795. else {
  2796. fprintf(stderr, "Fatal error: missing cde.options file\n");
  2797. exit(1);
  2798. }
  2799. }
  2800. char is_first_line = 1;
  2801. char* line = NULL;
  2802. size_t len = 0;
  2803. ssize_t read;
  2804. while ((read = getline(&line, &len, f)) != -1) {
  2805. assert(line[read-1] == '\n');
  2806. line[read-1] = '\0'; // strip of trailing newline
  2807. // strip off leading and trailing spaces
  2808. while (*line && isspace(*line)) {
  2809. line++;
  2810. }
  2811. int last = strlen(line) - 1;
  2812. while (last >= 0 && isspace(line[last])) {
  2813. line[last] = '\0';
  2814. last--;
  2815. }
  2816. // make sure there's an appropriate version number on first line
  2817. if (is_first_line) {
  2818. if (strncmp(line, CDE_OPTIONS_VERSION_NUM, strlen(CDE_OPTIONS_VERSION_NUM)) != 0) {
  2819. fprintf(stderr, "Error: cde.options file incompatible with this version of cde ('%s')\n",
  2820. CDE_OPTIONS_VERSION_NUM);
  2821. exit(1);
  2822. }
  2823. is_first_line = 0;
  2824. continue;
  2825. }
  2826. // ignore blank or comment lines
  2827. if (line[0] == '\0' || line[0] == '#') {
  2828. continue;
  2829. }
  2830. // for process_ignore_prefix directives
  2831. if (line[0] == '{') {
  2832. assert(process_ignores_ind > 0); // ignore_process must've come first!
  2833. in_braces = 1;
  2834. continue;
  2835. }
  2836. else if (line[0] == '}') {
  2837. in_braces = 0;
  2838. continue;
  2839. }
  2840. char* p;
  2841. char is_first_token = 1;
  2842. char set_id = -1;
  2843. for (p = strtok(line, "="); p; p = strtok(NULL, "=")) {
  2844. if (is_first_token) {
  2845. if (strcmp(p, "ignore_exact") == 0) {
  2846. set_id = 1;
  2847. }
  2848. else if (strcmp(p, "ignore_prefix") == 0) {
  2849. set_id = 2;
  2850. }
  2851. else if (strcmp(p, "ignore_environment_var") == 0) {
  2852. set_id = 3;
  2853. }
  2854. else if (strcmp(p, "redirect_exact") == 0) {
  2855. set_id = 4;
  2856. }
  2857. else if (strcmp(p, "redirect_prefix") == 0) {
  2858. set_id = 5;
  2859. }
  2860. else if (strcmp(p, "ignore_substr") == 0) {
  2861. set_id = 6;
  2862. }
  2863. else if (strcmp(p, "redirect_substr") == 0) {
  2864. set_id = 7;
  2865. }
  2866. else if (strcmp(p, "ignore_process") == 0) {
  2867. set_id = 8;
  2868. }
  2869. else if (strcmp(p, "process_ignore_prefix") == 0) {
  2870. if (!in_braces) {
  2871. fprintf(stderr, "Fatal error in cde.options: 'process_ignore_prefix' must be enclosed in { } after an 'ignore_process' directive\n");
  2872. exit(1);
  2873. }
  2874. set_id = 9;
  2875. }
  2876. else {
  2877. fprintf(stderr, "Fatal error in cde.options: unrecognized token '%s'\n", p);
  2878. exit(1);
  2879. }
  2880. if (in_braces && set_id != 9) {
  2881. fprintf(stderr, "Fatal error in cde.options: Only 'process_ignore_prefix' is allowed within { } after an 'ignore_process' directive\n");
  2882. exit(1);
  2883. }
  2884. is_first_token = 0;
  2885. }
  2886. else {
  2887. struct PI* cur = NULL;
  2888. switch (set_id) {
  2889. case 1:
  2890. CDE_add_ignore_exact_path(p);
  2891. break;
  2892. case 2:
  2893. CDE_add_ignore_prefix_path(p);
  2894. break;
  2895. case 3:
  2896. CDE_add_ignore_envvar(p);
  2897. break;
  2898. case 4:
  2899. CDE_add_redirect_exact_path(p);
  2900. break;
  2901. case 5:
  2902. CDE_add_redirect_prefix_path(p);
  2903. break;
  2904. case 6:
  2905. CDE_add_ignore_substr_path(p);
  2906. break;
  2907. case 7:
  2908. CDE_add_redirect_substr_path(p);
  2909. break;
  2910. case 8:
  2911. assert(process_ignores[process_ignores_ind].process_name == NULL);
  2912. process_ignores[process_ignores_ind].process_name = strdup(p);
  2913. process_ignores[process_ignores_ind].process_ignore_prefix_paths_ind = 0;
  2914. // debug printf
  2915. //fprintf(stderr, "process_ignores[%d] = '%s'\n",
  2916. // process_ignores_ind, process_ignores[process_ignores_ind].process_name);
  2917. process_ignores_ind++;
  2918. if (process_ignores_ind >= 50) {
  2919. fprintf(stderr, "Fatal error in cde.options: more than 50 'ignore_process' entries\n");
  2920. exit(1);
  2921. }
  2922. break;
  2923. case 9:
  2924. assert(process_ignores_ind > 0);
  2925. // attach to the LATEST element in process_ignores
  2926. cur = &process_ignores[process_ignores_ind-1];
  2927. assert(cur->process_name);
  2928. cur->process_ignore_prefix_paths[cur->process_ignore_prefix_paths_ind] = strdup(p);
  2929. // debug printf
  2930. //fprintf(stderr, "process_ignores[%s][%d] = '%s'\n",
  2931. // cur->process_name,
  2932. // cur->process_ignore_prefix_paths_ind,
  2933. // cur->process_ignore_prefix_paths[cur->process_ignore_prefix_paths_ind]);
  2934. cur->process_ignore_prefix_paths_ind++;
  2935. if (cur->process_ignore_prefix_paths_ind >= 20) {
  2936. fprintf(stderr, "Fatal error in cde.options: more than 20 'process_ignore_prefix' entries\n");
  2937. exit(1);
  2938. }
  2939. break;
  2940. default:
  2941. assert(0);
  2942. }
  2943. break;
  2944. }
  2945. }
  2946. }
  2947. fclose(f);
  2948. cde_options_initialized = 1;
  2949. }
  2950. static void CDE_load_environment_vars() {
  2951. static char cde_full_environment_abspath[MAXPATHLEN];
  2952. strcpy(cde_full_environment_abspath, cde_pseudo_root_dir);
  2953. strcat(cde_full_environment_abspath, "/../cde.full-environment");
  2954. struct stat env_file_stat;
  2955. if (stat(cde_full_environment_abspath, &env_file_stat)) {
  2956. perror(cde_full_environment_abspath);
  2957. exit(1);
  2958. }
  2959. int full_environment_fd = open(cde_full_environment_abspath, O_RDONLY);
  2960. void* environ_start =
  2961. (char*)mmap(0, env_file_stat.st_size, PROT_READ, MAP_PRIVATE, full_environment_fd, 0);
  2962. char* environ_str = (char*)environ_start;
  2963. while (*environ_str) {
  2964. int environ_strlen = strlen(environ_str);
  2965. // format: "name=value"
  2966. // note that 'value' might itself contain '=' characters,
  2967. // so only split on the FIRST '='
  2968. char* cur = strdup(environ_str); // strtok needs to mutate
  2969. char* name = NULL;
  2970. char* val = NULL;
  2971. int count = 0;
  2972. char* p;
  2973. int start_index_of_value = 0;
  2974. // strtok is so dumb!!! need to munch through the entire string
  2975. // before it restores the string to its original value
  2976. for (p = strtok(cur, "="); p; p = strtok(NULL, "=")) {
  2977. if (count == 0) {
  2978. name = strdup(p);
  2979. }
  2980. else if (count == 1) {
  2981. start_index_of_value = (p - cur);
  2982. }
  2983. count++;
  2984. }
  2985. if (start_index_of_value) {
  2986. val = strdup(environ_str + start_index_of_value);
  2987. }
  2988. // make sure we're not ignoring this environment var:
  2989. int i;
  2990. int ignore_me = 0;
  2991. for (i = 0; i < ignore_envvars_ind; i++) {
  2992. if (strcmp(name, ignore_envvars[i]) == 0) {
  2993. ignore_me = 1;
  2994. break;
  2995. }
  2996. }
  2997. // ignore an invalid variable with an empty name or a name
  2998. // that's simply a newline character (some files have a trailing
  2999. // newline, which strtok picks up, ugh):
  3000. if (!name || (strcmp(name, "\n") == 0)) {
  3001. ignore_me = 1;
  3002. }
  3003. if (!ignore_me) {
  3004. // subtle ... if val is NULL, then we should call setenv() with
  3005. // an empty string as val, NOT a NULL, since calling it with a
  3006. // NULL parameter will cause it to DELETE the environment
  3007. // variable, not set it to ""
  3008. if (val) {
  3009. setenv(name, val, 1);
  3010. }
  3011. else {
  3012. setenv(name, "", 1);
  3013. }
  3014. }
  3015. else {
  3016. if (CDE_verbose_mode) {
  3017. printf("ignored envvar '%s' => '%s'\n", name, val);
  3018. }
  3019. }
  3020. if (name) free(name);
  3021. if (val) free(val);
  3022. free(cur);
  3023. // every string in cde_full_environment_abspath is
  3024. // null-terminated, so this advances to the next string
  3025. environ_str += (environ_strlen + 1);
  3026. }
  3027. munmap(environ_start, env_file_stat.st_size);
  3028. close(full_environment_fd);
  3029. }
  3030. // if we're running in CDE_exec_mode, redirect path argument for bind()
  3031. // and connect() into cde-root sandbox
  3032. void CDE_begin_socket_bind_or_connect(struct tcb *tcp) {
  3033. if (CDE_verbose_mode) {
  3034. printf("[%d] BEGIN socket bind/connect\n", tcp->pid);
  3035. }
  3036. // only do this redirection in CDE_exec_mode
  3037. if (!CDE_exec_mode) {
  3038. return;
  3039. }
  3040. // code adapted from printsock in strace-4.5.20/net.c
  3041. long addr = tcp->u_arg[1];
  3042. int addrlen = tcp->u_arg[2];
  3043. union {
  3044. char pad[128];
  3045. struct sockaddr sa;
  3046. struct sockaddr_un sau;
  3047. } addrbuf;
  3048. if (addr == 0) {
  3049. return;
  3050. }
  3051. if (addrlen < 2 || addrlen > sizeof(addrbuf)) {
  3052. addrlen = sizeof(addrbuf);
  3053. }
  3054. memset(&addrbuf, 0, sizeof(addrbuf));
  3055. if (umoven(tcp, addr, addrlen, addrbuf.pad) < 0) {
  3056. return;
  3057. }
  3058. addrbuf.pad[sizeof(addrbuf.pad) - 1] = '\0';
  3059. /* AF_FILE is also a synonym for AF_UNIX */
  3060. if (addrbuf.sa.sa_family == AF_UNIX) {
  3061. if (addrlen > 2 && addrbuf.sau.sun_path[0]) {
  3062. //tprintf("path=");
  3063. // addr + sizeof(addrbuf.sau.sun_family) is the location of the real path
  3064. char* original_path = strcpy_from_child(tcp, addr + sizeof(addrbuf.sau.sun_family));
  3065. if (original_path) {
  3066. //printf("original_path='%s'\n", original_path);
  3067. char* redirected_path =
  3068. redirect_filename_into_cderoot(original_path, tcp->current_dir, tcp);
  3069. // could be null if path is being ignored by cde.options
  3070. if (redirected_path) {
  3071. //printf("redirected_path: '%s'\n", redirected_path);
  3072. unsigned long new_pathlen = strlen(redirected_path);
  3073. // alter the socket address field to point to redirected path
  3074. memcpy_to_child(tcp->pid, (char*)(addr + sizeof(addrbuf.sau.sun_family)),
  3075. redirected_path, new_pathlen + 1);
  3076. free(redirected_path);
  3077. // remember the 2 extra bytes for the sun_family field!
  3078. unsigned long new_totallen = new_pathlen + sizeof(addrbuf.sau.sun_family);
  3079. struct user_regs_struct cur_regs;
  3080. EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
  3081. #if defined (I386)
  3082. // on i386, things are tricky tricky!
  3083. // the kernel uses socketcall() as a common entry
  3084. // point for all socket-related system calls
  3085. // http://www.kernel.org/doc/man-pages/online/pages/man2/socketcall.2.html
  3086. //
  3087. // the ecx register contains a pointer to an array of 3 pointers
  3088. // (of size 'unsigned long'), which represents the 3 arguments
  3089. // to the bind/connect syscall. they are:
  3090. // arg[0] - socket number
  3091. // arg[1] - pointer to socket address structure
  3092. // arg[2] - length of socket address structure
  3093. // we need to alter the length field to new_totallen,
  3094. // which is VERY IMPORTANT or else the path that the
  3095. // kernel sees will be truncated!!!
  3096. // we want to override arg[2], which is located at:
  3097. // cur_regs.ecx + 2*sizeof(unsigned long)
  3098. memcpy_to_child(tcp->pid, (char*)(cur_regs.ecx + 2*sizeof(unsigned long)),
  3099. (char*)(&new_totallen), sizeof(unsigned long));
  3100. #elif defined(X86_64)
  3101. // on x86-64, things are much simpler. the length field is
  3102. // stored in %rdx (the third argument), so simply override
  3103. // that register with new_totallen
  3104. cur_regs.rdx = (long)new_totallen;
  3105. ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
  3106. #else
  3107. #error "Unknown architecture (not I386 or X86_64)"
  3108. #endif
  3109. }
  3110. free(original_path);
  3111. }
  3112. }
  3113. }
  3114. else {
  3115. if (CDE_block_net_access) {
  3116. // blank out the sockaddr argument if you want to block network access
  3117. //
  3118. // I think that blocking 'bind' prevents setting up sockets to accept
  3119. // incoming connections, and blocking 'connect' prevents outgoing
  3120. // connections.
  3121. struct sockaddr s;
  3122. memset(&s, 0, sizeof(s));
  3123. memcpy_to_child(tcp->pid, (char*)addr, (char*)&s, sizeof(s));
  3124. }
  3125. }
  3126. }