/strace-4.6/cde.c
https://github.com/gidden/CDE · C · 3782 lines · 2204 code · 613 blank · 965 comment · 525 complexity · 124eb0ee2f55ded549539ccbc1129166 MD5 · raw file
Large files are truncated click here to view the full file
- /*
- CDE: Code, Data, and Environment packaging for Linux
- http://www.stanford.edu/~pgbovine/cde.html
- Philip Guo
- CDE is currently licensed under GPL v3:
- Copyright (c) 2010-2011 Philip Guo <pg@cs.stanford.edu>
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- */
- /* Linux system call calling conventions:
- According to this page:
- http://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-on-x86-64
- ... and the source code for systrace: http://www.citi.umich.edu/u/provos/systrace/
- 32-bit x86:
- syscall number: %eax
- first 6 syscall parameters: %ebx, %ecx, %edx, %esi, %edi, %ebp
- 64-bit x86-64:
- syscall number: %rax
- first 6 syscall parameters (for a 64-bit target process): %rdi, %rsi, %rdx, %rcx, %r8 and %r9
- first 6 syscall parameters (for a 32-bit target process): %rbx, %rcx, %rdx, %rsi, %rdi, %rbp
- (note how these are similar to the 32-bit syscall parameter registers)
- */
- #include "cde.h"
- #include "okapi.h"
- #include <dirent.h>
- // for CDE_begin_socket_bind_or_connect
- #include <sys/socket.h>
- #include <sys/un.h>
- #include <time.h>
- #include <sys/utsname.h> // for uname
- // TODO: eliminate this hack if it results in a compile-time error
- #include "config.h" // to get I386 / X86_64 definitions
- #if defined (I386)
- __asm__(".symver shmctl,shmctl@GLIBC_2.0"); // hack to eliminate glibc 2.2 dependency
- #endif
- // 1 if we are executing code in a CDE package,
- // 0 for tracing regular execution
- char CDE_exec_mode;
- char CDE_verbose_mode = 0; // -v option
- // only valid if !CDE_exec_mode
- char* CDE_PACKAGE_DIR = NULL;
- char* CDE_ROOT_DIR = NULL;
- char CDE_block_net_access = 0; // -n option
- // only relevant if CDE_exec_mode = 1
- char CDE_exec_streaming_mode = 0; // -s option
- #if defined(X86_64)
- // current_personality == 1 means that a 64-bit cde-exec is actually tracking a
- // 32-bit target process at the moment:
- #define IS_32BIT_EMU (current_personality == 1)
- #endif
- // Super-simple trie implementation for doing fast string matching:
- // adapted from my earlier IncPy project
- typedef struct _trie {
- struct _trie* children[128]; // we support ASCII characters from 0 to 127
- int elt_is_present; // 1 if there is an element present here
- } Trie;
- static Trie* TrieNew(void) {
- // VERY important to blank out the contents with a calloc()
- return (Trie*)calloc(1, sizeof(Trie));
- }
- /* currently unused ... but could be useful in the future
- static void TrieDelete(Trie* t) {
- // free all your children before freeing yourself
- unsigned char i;
- for (i = 0; i < 128; i++) {
- if (t->children[i]) {
- TrieDelete(t->children[i]);
- }
- }
- free(t);
- }
- */
- static void TrieInsert(Trie* t, char* ascii_string) {
- while (*ascii_string != '\0') {
- unsigned char idx = (unsigned char)*ascii_string;
- assert(idx < 128); // we don't support extended ASCII characters
- if (!t->children[idx]) {
- t->children[idx] = TrieNew();
- }
- t = t->children[idx];
- ascii_string++;
- }
- t->elt_is_present = 1;
- }
- static int TrieContains(Trie* t, char* ascii_string) {
- while (*ascii_string != '\0') {
- unsigned char idx = (unsigned char)*ascii_string;
- t = t->children[idx];
- if (!t) {
- return 0; // early termination, no match!
- }
- ascii_string++;
- }
- return t->elt_is_present;
- }
- // 1 if we should use the dynamic linker from within the package
- // (much more portable, but might be less robust since the dynamic linker
- // must be invoked explicitly, which leads to some weird-ass bugs)
- // 0 if we should attempt to use the native dynamic linker from target machine
- // (not portable at all since the target machine's dynamic linker must
- // match the libc version WITHIN the package, but potentially more
- // robust if the target and source machines are identically-configured)
- char CDE_use_linker_from_package = 1; // ON by default, -l option to turn OFF
- // only 1 if we are running cde-exec from OUTSIDE of a cde-root/ directory
- char cde_exec_from_outside_cderoot = 0;
- FILE* CDE_copied_files_logfile = NULL;
- static char cde_options_initialized = 0; // set to 1 after CDE_init_options() done
- static void begin_setup_shmat(struct tcb* tcp);
- static void* find_free_addr(int pid, int exec, unsigned long size);
- static char* strcpy_from_child(struct tcb* tcp, long addr);
- static char* strcpy_from_child_or_null(struct tcb* tcp, long addr);
- static int ignore_path(char* filename, struct tcb* tcp);
- #define SHARED_PAGE_SIZE (MAXPATHLEN * 4)
- static char* redirect_filename_into_cderoot(char* filename, char* child_current_pwd, struct tcb* tcp);
- static void memcpy_to_child(int pid, char* dst_child, char* src, int size);
- // the true pwd of the cde executable AT THE START of execution
- char cde_starting_pwd[MAXPATHLEN];
- // these arrays are initialized in CDE_init_options()
- // yeah, statically-sized arrays are dumb but easy to implement :)
- static char* ignore_exact_paths[100];
- static char* ignore_prefix_paths[100];
- static char* ignore_substr_paths[100];
- int ignore_exact_paths_ind = 0;
- int ignore_prefix_paths_ind = 0;
- int ignore_substr_paths_ind = 0;
- // these override their ignore path counterparts
- static char* redirect_exact_paths[100];
- static char* redirect_prefix_paths[100];
- static char* redirect_substr_paths[100];
- int redirect_exact_paths_ind = 0;
- int redirect_prefix_paths_ind = 0;
- int redirect_substr_paths_ind = 0;
- static char* ignore_envvars[100]; // each element should be an environment variable to ignore
- int ignore_envvars_ind = 0;
- struct PI process_ignores[50];
- int process_ignores_ind = 0;
- // the absolute path to the cde-root/ directory, since that will be
- // where our fake filesystem starts. e.g., if cde_starting_pwd is
- // /home/bob/cde-package/cde-root/home/alice/cool-experiment
- // then cde_pseudo_root_dir is:
- // /home/bob/cde-package/cde-root
- //
- // only relevant when we're executing in CDE_exec_mode
- char cde_pseudo_root_dir[MAXPATHLEN];
- // the path to where the root directory is mounted on the remote machine
- // (only relevant for "cde-exec -s")
- char* cde_remote_root_dir = NULL;
- // file paths that should be accessed in cde-package/cde-root/
- // rather than on the remote machine (only relevant for "cde-exec -s")
- static Trie* cached_files_trie = NULL;
- FILE* cached_files_fp = NULL; // save cached_files_trie on-disk as "locally-cached-files.txt"
- // to shut up gcc warnings without going thru #include hell
- extern ssize_t getline(char **lineptr, size_t *n, FILE *stream);
- extern char* find_ELF_program_interpreter(char * file_name); // from ../readelf-mini/libreadelf-mini.a
- extern void path_pop(struct path* p);
- static void CDE_init_options(void);
- static void CDE_create_convenience_scripts(char** argv, int optind);
- static void CDE_create_toplevel_symlink_dirs(void);
- static void CDE_create_path_symlink_dirs(void);
- static void CDE_load_environment_vars(void);
- // returns a component within real_pwd that represents the part within
- // cde_pseudo_root_dir
- // the return value should NOT be mutated; otherwise we might be screwed!
- //
- // (tcp argument is optional and used to pass into ignore_path)
- static char* extract_sandboxed_pwd(char* real_pwd, struct tcb* tcp) {
- assert(CDE_exec_mode);
- // spoof getcwd by only taking the part BELOW cde-root/
- // e.g., if real_pwd is:
- // /home/bob/cde-package/cde-root/home/alice/cool-experiment
- // then return:
- // /home/alice/cool-experiment
- // as cwd
- int cde_pseudo_root_dir_len = strlen(cde_pseudo_root_dir);
- char real_pwd_is_within_cde_pseudo_root_dir =
- ((strlen(real_pwd) >= cde_pseudo_root_dir_len) &&
- (strncmp(real_pwd, cde_pseudo_root_dir, cde_pseudo_root_dir_len) == 0));
- // if real_pwd is within a strange directory like '/tmp' that should
- // be ignored, AND if it resides OUTSIDE of cde_pseudo_root_dir, then
- // simply return itself
- //
- // e.g., if real_pwd is '/tmp', then return itself,
- // but if real_pwd is '/tmp/cde-package/cde-root/home/pgbovine' and
- // cde_pseudo_root_dir is '/tmp/cde-package/cde-root/', then
- // treat it like any normal path (extract '/home/pgbovine')
- if (ignore_path(real_pwd, tcp) && !real_pwd_is_within_cde_pseudo_root_dir) {
- return real_pwd;
- }
- // sanity check, make sure real_pwd is within/ cde_pseudo_root_dir,
- // if we're not ignoring it
- if (!real_pwd_is_within_cde_pseudo_root_dir) {
- // if we're in this mode, then we're okay!!! don't return an error!
- if (cde_exec_from_outside_cderoot) {
- return real_pwd;
- }
- else {
- fprintf(stderr,
- "Fatal error: '%s' is outside of cde-root/ and NOT being ignored.\n",
- real_pwd);
- exit(1);
- }
- }
- // regular action: truncate path up to and including 'cde-root/'
- char* sandboxed_pwd = (real_pwd + cde_pseudo_root_dir_len);
- // special case for '/' directory:
- if (strlen(sandboxed_pwd) == 0) {
- return (char*)"/";
- }
- else {
- return sandboxed_pwd;
- }
- }
- // prepend CDE_ROOT_DIR to the given path string, assumes that the string
- // starts with '/' (i.e., it's an absolute path)
- // (mallocs a new string)
- char* prepend_cderoot(char* path) {
- assert(IS_ABSPATH(path));
- return format("%s%s", CDE_ROOT_DIR, path);
- }
- // WARNING: this function behaves differently depending on value of CDE_exec_mode
- char* create_abspath_within_cderoot(char* path) {
- assert(IS_ABSPATH(path)); // Pre-req: path must be an absolute path!
- if (CDE_exec_mode) {
- // if we're making a cde-exec run, then simply re-route it
- // inside of cde_pseudo_root_dir
- /* SUPER WEIRD special case: Sometimes 'path' will ALREADY BE within
- cde_pseudo_root_dir, so in those cases, do NOT redirect it again.
- Instead, simply strdup the original path (and maybe issue a warning).
- This can happen if, say, the target program reads /proc/self/maps
- or /proc/<pid>/maps and extracts the final field in a line, which
- represents the filename of a file that's been mmapped into the
- process's address space. If we're running in cde-exec mode, then
- the filename extracted from the maps 'pseudo-file' is actually an
- absolute path WITHIN cde-root/. e.g.,:
- 00754000-00755000 rw-p 00165000 08:01 85299 /home/pgbovine/cde-package/cde-root/bin/foo
- If we try to blindly redirect this path within cde-root/ again,
- we'll get something nonsensical like:
- /home/pgbovine/cde-package/cde-root/home/pgbovine/cde-package/cde-root/bin/foo
- To prevent such atrocities, we just do a simple check to see if a
- path is already within cde-root/, and if so, then don't redirect it.
-
- */
- if(strncmp(path, cde_pseudo_root_dir, strlen(cde_pseudo_root_dir)) == 0) {
- // TODO: maybe print a warning to stderr or a log file?
- //fprintf(stderr, "CDE WARNING: refusing to redirect path that's within cde-root/: '%s'", path);
- return strdup(path);
- }
- else {
- if (CDE_exec_streaming_mode) {
- // copy file into local cde-root/ 'cache' (if necessary)
- // we REALLY rely on cached_files_trie for performance to avoid
- // unnecessary filesystem accesses
- if (TrieContains(cached_files_trie, path)) {
- // cache hit! fall-through
- }
- else {
- printf("Accessing remote file: '%s'\n", path);
- // copy from remote -> local
- create_mirror_file(path, cde_remote_root_dir, cde_pseudo_root_dir);
- // VERY IMPORTANT: add ALL paths to cached_files_trie, even
- // for nonexistent files, so that we can avoid trying to access
- // those nonexistent files on the remote machine in future
- // executions. Remember, ANY filesystem access we can avoid
- // will lead to speed-ups.
- TrieInsert(cached_files_trie, path);
- if (cached_files_fp) {
- fprintf(cached_files_fp, "%s\n", path);
- }
- }
- }
- // normal behavior - redirect into cde-root/
- return format("%s%s", cde_pseudo_root_dir, path);
- }
- }
- else {
- // if we're making an ORIGINAL (tracing) run, then simply prepend
- // CDE_ROOT_DIR to path and canonicalize it
- char* path_within_cde_root = prepend_cderoot(path);
- // really really tricky ;) if the child process has changed
- // directories, then we can't rely on path_within_cde_root to
- // exist. instead, we must create an ABSOLUTE path based on
- // cde_starting_pwd, which is the directory where cde-exec was first launched!
- char* ret = canonicalize_path(path_within_cde_root, cde_starting_pwd);
- free(path_within_cde_root);
- assert(IS_ABSPATH(ret));
- return ret;
- }
- }
- // original_abspath must be an absolute path
- // create all the corresponding 'mirror' directories within
- // cde-package/cde-root/, MAKING SURE TO CREATE DIRECTORY SYMLINKS
- // when necessary (sort of emulate "mkdir -p" functionality)
- // if pop_one is non-zero, then pop last element before doing "mkdir -p"
- static void make_mirror_dirs_in_cde_package(char* original_abspath, int pop_one) {
- create_mirror_dirs(original_abspath, (char*)"", CDE_ROOT_DIR, pop_one);
- }
- // does simple string comparisons on ABSOLUTE PATHS.
- // (tcp argument is optional and used for tcp->p_ignores)
- static int ignore_path(char* filename, struct tcb* tcp) {
- assert(cde_options_initialized);
- // sometimes you will get a BOGUS empty filename ... in that case,
- // simply ignore it (this might hide some true errors, though!!!)
- if (filename[0] == '\0') {
- return 1;
- }
- assert(IS_ABSPATH(filename));
- int i;
- // process-specific ignores take precedence over global ignores
- // remember, tcp is optional
- if (tcp && tcp->p_ignores) {
- if (strcmp(filename, tcp->p_ignores->process_name) == 0) {
- if (CDE_verbose_mode) {
- printf("IGNORED '%s' (process=%s)\n", filename, tcp->p_ignores->process_name);
- }
- return 1;
- }
- for (i = 0; i < tcp->p_ignores->process_ignore_prefix_paths_ind; i++) {
- char* p = tcp->p_ignores->process_ignore_prefix_paths[i];
- if (strncmp(filename, p, strlen(p)) == 0) {
- if (CDE_verbose_mode) {
- printf("IGNORED '%s' [%s] (process=%s)\n", filename, p, tcp->p_ignores->process_name);
- }
- return 1;
- }
- }
- }
- // redirect paths override ignore paths
- for (i = 0; i < redirect_exact_paths_ind; i++) {
- if (strcmp(filename, redirect_exact_paths[i]) == 0) {
- return 0;
- }
- }
- for (i = 0; i < redirect_prefix_paths_ind; i++) {
- char* p = redirect_prefix_paths[i];
- if (strncmp(filename, p, strlen(p)) == 0) {
- return 0;
- }
- }
- for (i = 0; i < redirect_substr_paths_ind; i++) {
- if (strstr(filename, redirect_substr_paths[i])) {
- return 0;
- }
- }
- for (i = 0; i < ignore_exact_paths_ind; i++) {
- if (strcmp(filename, ignore_exact_paths[i]) == 0) {
- return 1;
- }
- }
- for (i = 0; i < ignore_prefix_paths_ind; i++) {
- char* p = ignore_prefix_paths[i];
- if (strncmp(filename, p, strlen(p)) == 0) {
- return 1;
- }
- }
- for (i = 0; i < ignore_substr_paths_ind; i++) {
- if (strstr(filename, ignore_substr_paths[i])) {
- return 1;
- }
- }
- if (cde_exec_from_outside_cderoot) {
- // if we're running cde-exec from OUTSIDE of cde-root/, then adopt a
- // 'Union FS' like policy where if a version of the file exists
- // within cde-package/cde-root/, then use it (return 0 to NOT
- // ignore), otherwise try using the version in the real system
- // directory (return 1 to ignore)
- struct stat tmp_statbuf;
- char* redirected_filename = create_abspath_within_cderoot(filename);
- if (stat(redirected_filename, &tmp_statbuf) == 0) {
- free(redirected_filename);
- return 0;
- }
- else {
- free(redirected_filename);
- return 1;
- }
- }
- else {
- // do NOT ignore by default. if you want to ignore everything except
- // for what's explicitly specified by 'redirect' directives, then
- // use an option like "ignore_prefix=/" (to ignore everything) and
- // then add redirect_prefix= and redirect_exact= directives accordingly
- return 0;
- }
- }
- // copies a file into its respective location within cde-root/,
- // creating all necessary intermediate sub-directories and symlinks
- //
- // if filename is a symlink, then copy both it AND its target into cde-root
- static void copy_file_into_cde_root(char* filename, char* child_current_pwd) {
- assert(filename);
- assert(!CDE_exec_mode);
- // resolve absolute path relative to child_current_pwd and
- // get rid of '..', '.', and other weird symbols
- char* filename_abspath = canonicalize_path(filename, child_current_pwd);
- // don't copy filename that we're ignoring (remember to use ABSOLUTE PATH)
- if (ignore_path(filename_abspath, NULL)) {
- free(filename_abspath);
- return;
- }
- if (CDE_copied_files_logfile) {
- fprintf(CDE_copied_files_logfile, "%s\n", filename_abspath);
- }
- create_mirror_file(filename_abspath, (char*)"", CDE_ROOT_DIR);
- free(filename_abspath);
- }
- extern int isascii(int c);
- extern int isprint(int c);
- extern int isspace(int c);
- #define STRING_ISGRAPHIC(c) ( ((c) == '\t' || (isascii (c) && isprint (c))) )
- // modify a single argument to the given system call
- // to a path within cde-root/, if applicable
- //
- // arg_num == 1 mean modify first register arg
- // arg_num == 2 mean modify second register arg
- static void modify_syscall_single_arg(struct tcb* tcp, int arg_num, char* filename) {
- assert(CDE_exec_mode);
- assert(filename);
- char* redirected_filename =
- redirect_filename_into_cderoot(filename, tcp->current_dir, tcp);
- if (!redirected_filename) {
- return;
- }
- if (!tcp->childshm) {
- begin_setup_shmat(tcp);
- // no more need for filename, so don't leak it
- free(redirected_filename);
- return; // MUST punt early here!!!
- }
- // redirect all requests for absolute paths to version within cde-root/
- // if those files exist!
- strcpy(tcp->localshm, redirected_filename); // hopefully this doesn't overflow :0
- //printf(" redirect %s\n", tcp->localshm);
- //static char tmp[MAXPATHLEN];
- //EXITIF(umovestr(tcp, (long)tcp->childshm, sizeof tmp, tmp) < 0);
- //printf(" %s\n", tmp);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- if (arg_num == 1) {
- #if defined (I386)
- cur_regs.ebx = (long)tcp->childshm;
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rbx = (long)tcp->childshm;
- }
- else {
- cur_regs.rdi = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- }
- else {
- assert(arg_num == 2);
- #if defined (I386)
- cur_regs.ecx = (long)tcp->childshm;
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rcx = (long)tcp->childshm;
- }
- else {
- cur_regs.rsi = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- }
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- free(redirected_filename);
- }
- // copy and paste from modify_syscall_first_arg ;)
- static void modify_syscall_two_args(struct tcb* tcp) {
- assert(CDE_exec_mode);
- if (!tcp->childshm) {
- begin_setup_shmat(tcp);
- return; // MUST punt early here!!!
- }
- char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
- char* redirected_filename1 =
- redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
- free(filename1);
- char* filename2 = strcpy_from_child(tcp, tcp->u_arg[1]);
- char* redirected_filename2 =
- redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
- free(filename2);
- // gotta do both, yuck
- if (redirected_filename1 && redirected_filename2) {
- strcpy(tcp->localshm, redirected_filename1);
- int len1 = strlen(redirected_filename1);
- char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
- strcpy(redirect_file2_begin, redirected_filename2);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.ebx = (long)tcp->childshm;
- cur_regs.ecx = (long)(((char*)tcp->childshm) + len1 + 1);
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rbx = (long)tcp->childshm;
- cur_regs.rcx = (long)(((char*)tcp->childshm) + len1 + 1);
- }
- else {
- cur_regs.rdi = (long)tcp->childshm;
- cur_regs.rsi = (long)(((char*)tcp->childshm) + len1 + 1);
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- //static char tmp[MAXPATHLEN];
- //EXITIF(umovestr(tcp, (long)cur_regs.ebx, sizeof tmp, tmp) < 0);
- //printf(" ebx: %s\n", tmp);
- //EXITIF(umovestr(tcp, (long)cur_regs.ecx, sizeof tmp, tmp) < 0);
- //printf(" ecx: %s\n", tmp);
- }
- else if (redirected_filename1) {
- strcpy(tcp->localshm, redirected_filename1);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.ebx = (long)tcp->childshm; // only set EBX
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rbx = (long)tcp->childshm;
- }
- else {
- cur_regs.rdi = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- else if (redirected_filename2) {
- strcpy(tcp->localshm, redirected_filename2);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.ecx = (long)tcp->childshm; // only set ECX
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rcx = (long)tcp->childshm;
- }
- else {
- cur_regs.rsi = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- if (redirected_filename1) free(redirected_filename1);
- if (redirected_filename2) free(redirected_filename2);
- }
- // modify the second and fourth args to redirect into cde-root/
- // really nasty copy-and-paste from modify_syscall_two_args above
- static void modify_syscall_second_and_fourth_args(struct tcb* tcp) {
- assert(CDE_exec_mode);
- if (!tcp->childshm) {
- begin_setup_shmat(tcp);
- return; // MUST punt early here!!!
- }
- char* filename1 = strcpy_from_child(tcp, tcp->u_arg[1]);
- char* redirected_filename1 =
- redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
- free(filename1);
- char* filename2 = strcpy_from_child(tcp, tcp->u_arg[3]);
- char* redirected_filename2 =
- redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
- free(filename2);
- // gotta do both, yuck
- if (redirected_filename1 && redirected_filename2) {
- strcpy(tcp->localshm, redirected_filename1);
- int len1 = strlen(redirected_filename1);
- char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
- strcpy(redirect_file2_begin, redirected_filename2);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.ecx = (long)tcp->childshm;
- cur_regs.esi = (long)(((char*)tcp->childshm) + len1 + 1);
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rcx = (long)tcp->childshm;
- cur_regs.rsi = (long)(((char*)tcp->childshm) + len1 + 1);
- }
- else {
- cur_regs.rsi = (long)tcp->childshm;
- cur_regs.rcx = (long)(((char*)tcp->childshm) + len1 + 1);
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- else if (redirected_filename1) {
- strcpy(tcp->localshm, redirected_filename1);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.ecx = (long)tcp->childshm;
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rcx = (long)tcp->childshm;
- }
- else {
- cur_regs.rsi = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- else if (redirected_filename2) {
- strcpy(tcp->localshm, redirected_filename2);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.esi = (long)tcp->childshm; // only set ECX
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rsi = (long)tcp->childshm;
- }
- else {
- cur_regs.rcx = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- if (redirected_filename1) free(redirected_filename1);
- if (redirected_filename2) free(redirected_filename2);
- }
- // modify the first and third args to redirect into cde-root/
- // really nasty copy-and-paste from modify_syscall_two_args above
- static void modify_syscall_first_and_third_args(struct tcb* tcp) {
- assert(CDE_exec_mode);
- if (!tcp->childshm) {
- begin_setup_shmat(tcp);
- return; // MUST punt early here!!!
- }
- char* filename1 = strcpy_from_child(tcp, tcp->u_arg[0]);
- char* redirected_filename1 =
- redirect_filename_into_cderoot(filename1, tcp->current_dir, tcp);
- free(filename1);
- char* filename2 = strcpy_from_child(tcp, tcp->u_arg[2]);
- char* redirected_filename2 =
- redirect_filename_into_cderoot(filename2, tcp->current_dir, tcp);
- free(filename2);
- // gotta do both, yuck
- if (redirected_filename1 && redirected_filename2) {
- strcpy(tcp->localshm, redirected_filename1);
- int len1 = strlen(redirected_filename1);
- char* redirect_file2_begin = ((char*)tcp->localshm) + len1 + 1;
- strcpy(redirect_file2_begin, redirected_filename2);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.ebx = (long)tcp->childshm;
- cur_regs.edx = (long)(((char*)tcp->childshm) + len1 + 1);
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rbx = (long)tcp->childshm;
- cur_regs.rdx = (long)(((char*)tcp->childshm) + len1 + 1);
- }
- else {
- cur_regs.rdi = (long)tcp->childshm;
- cur_regs.rdx = (long)(((char*)tcp->childshm) + len1 + 1);
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- else if (redirected_filename1) {
- strcpy(tcp->localshm, redirected_filename1);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.ebx = (long)tcp->childshm;
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rbx = (long)tcp->childshm;
- }
- else {
- cur_regs.rdi = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- else if (redirected_filename2) {
- strcpy(tcp->localshm, redirected_filename2);
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.edx = (long)tcp->childshm; // only set ECX
- #elif defined(X86_64)
- if (IS_32BIT_EMU) {
- cur_regs.rdx = (long)tcp->childshm;
- }
- else {
- cur_regs.rdx = (long)tcp->childshm;
- }
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- if (redirected_filename1) free(redirected_filename1);
- if (redirected_filename2) free(redirected_filename2);
- }
- // create a malloc'ed filename that contains a version within cde-root/
- // return NULL if the filename should NOT be redirected
- // WARNING: behavior differs based on CDE_exec_mode!
- //
- // (tcp argument is optional and used to pass into ignore_path)
- static char* redirect_filename_into_cderoot(char* filename, char* child_current_pwd, struct tcb* tcp) {
- /* sometimes this is called with a null arg ... investigate further
- before making this hack permanent, though
- if (!filename) {
- return NULL;
- }
- */
- assert(filename);
- assert(child_current_pwd);
- char* filename_abspath = NULL;
- if (CDE_exec_mode) {
- // canonicalize_path has the desirable side effect of preventing
- // 'malicious' paths from going below the pseudo-root '/' ... e.g.,
- // if filename is '/home/pgbovine/../../../../'
- // then filename_abspath is simply '/'
- //
- // we resolve relative paths w.r.t.
- // extract_sandboxed_pwd(child_current_pwd), so that programs
- // can't use relative paths like '../../../' to get out of sandbox
- //
- // this is why it's VERY IMPORTANT to canonicalize before creating a
- // path into CDE_ROOT_DIR, so that absolute paths can't 'escape'
- // the sandbox
- filename_abspath =
- canonicalize_path(filename, extract_sandboxed_pwd(child_current_pwd, tcp));
- }
- else {
- filename_abspath = canonicalize_path(filename, child_current_pwd);
- }
- assert(filename_abspath);
- // don't redirect paths that we're ignoring (remember to use ABSOLUTE PATH)
- if (ignore_path(filename_abspath, tcp)) {
- free(filename_abspath);
- return NULL;
- }
- // WARNING: behavior of create_abspath_within_cderoot
- // differs based on CDE_exec_mode!
- char* ret = create_abspath_within_cderoot(filename_abspath);
- if (CDE_verbose_mode) {
- printf("redirect '%s' => '%s'\n", filename, ret);
- }
- free(filename_abspath);
- return ret;
- }
- /* standard functionality for syscalls that take a filename as first argument
- cde (package creation) mode:
- - if abspath(filename) is outside pwd, then copy it into cde-root/
- cde-exec mode:
- - if abspath(filename) is outside pwd, then redirect it into cde-root/
- sys_open(filename, flags, mode)
- sys_creat(filename, mode)
- sys_chmod(filename, ...)
- sys_chown(filename, ...)
- sys_chown16(filename, ...)
- sys_lchown(filename, ...)
- sys_lchown16(filename, ...)
- sys_stat(filename, ...)
- sys_stat64(filename, ...)
- sys_lstat(filename, ...)
- sys_lstat64(filename, ...)
- sys_truncate(path, length)
- sys_truncate64(path, length)
- sys_access(filename, mode)
- sys_utime(filename, ...)
- sys_readlink(path, ...)
- */
- void CDE_begin_standard_fileop(struct tcb* tcp, const char* syscall_name) {
- //char* filename = strcpy_from_child(tcp, tcp->u_arg[0]);
- /* Patch by Edward Wang
- "Attached is a patch to fix a small bug that happens when a syscall
- is called without any arguments (tcp->u_arg[0] is "0"). This
- happened to me a few times when I was trying to package a portable
- version of VLC media player."
- */
- char* filename = strcpy_from_child_or_null(tcp, tcp->u_arg[0]);
- if (filename == NULL)
- return;
- if (CDE_verbose_mode) {
- printf("[%d] BEGIN %s '%s'\n", tcp->pid, syscall_name, filename);
- }
- if (CDE_exec_mode) {
- if (filename) {
- modify_syscall_single_arg(tcp, 1, filename);
- }
- }
- else {
- // pre-emptively copy the given file into cde-root/, silencing warnings for
- // non-existent files.
- // (Note that filename can sometimes be a JUNKY STRING due to weird race
- // conditions when strace is tracing complex multi-process applications)
- if (filename) {
- copy_file_into_cde_root(filename, tcp->current_dir);
- }
- }
- free(filename);
- }
- /* standard functionality for *at syscalls that take a dirfd as first
- argument, followed by a filepath
- e.g., see documentation for http://linux.die.net/man/2/openat
- example syscalls:
- openat,faccessat,fstatat64,fchownat,fchmodat,futimesat,mknodat
- if filepath is an absolute path, or if filepath is a relative path but
- dirfd is AT_FDCWD, then:
- cde (package creation) mode:
- - if abspath(filepath) is outside pwd, then copy it into cde-root/
- exec mode:
- - if abspath(filepath) is outside pwd, then redirect it into cde-root/
- issue a warning if filepath is a relative path but dirfd is NOT AT_FDCWD
- */
- void CDE_begin_at_fileop(struct tcb* tcp, const char* syscall_name) {
- char* filename = strcpy_from_child(tcp, tcp->u_arg[1]);
- if (CDE_verbose_mode) {
- printf("[%d] BEGIN %s '%s' (dirfd=%u)\n", tcp->pid, syscall_name, filename, (unsigned int)tcp->u_arg[0]);
- }
- if (!IS_ABSPATH(filename) && tcp->u_arg[0] != AT_FDCWD) {
- fprintf(stderr,
- "CDE WARNING (unsupported operation): %s '%s' is a relative path and dirfd != AT_FDCWD\n",
- syscall_name, filename);
- goto done; // punt early!
- }
- if (CDE_exec_mode) {
- modify_syscall_single_arg(tcp, 2, filename);
- }
- else {
- // pre-emptively copy the given file into cde-root/, silencing warnings for
- // non-existent files.
- // (Note that filename can sometimes be a JUNKY STRING due to weird race
- // conditions when strace is tracing complex multi-process applications)
- copy_file_into_cde_root(filename, tcp->current_dir);
- }
- done:
- free(filename);
- }
- // input_buffer_arg_index is the index of the input filename argument
- // output_buffer_arg_index is the index of the argument where the output
- // buffer is being held (we clobber this in some special cases)
- static void CDE_end_readlink_internal(struct tcb* tcp, int input_buffer_arg_index, int output_buffer_arg_index) {
- char* filename = strcpy_from_child(tcp, tcp->u_arg[input_buffer_arg_index]);
- if (CDE_exec_mode) {
- if (tcp->u_rval >= 0) {
- // super hack! if the program is trying to access the special
- // /proc/self/exe file, return perceived_program_fullpath if
- // available, or else cde-exec will ERRONEOUSLY return the path
- // to the dynamic linker (e.g., ld-linux.so.2).
- //
- // programs like 'java' rely on the value of /proc/self/exe
- // being the true path to the executable, in order to dynamically
- // load libraries based on paths relative to that full path!
- char is_proc_self_exe = (strcmp(filename, "/proc/self/exe") == 0);
- // another super hack! programs like Google Earth
- // ('googleearth-bin') access /proc/self/exe as /proc/<pid>/exe
- // where <pid> is ITS OWN PID! be sure to handle that case properly
- // (but don't worry about handling cases where <pid> is the PID of
- // another process).
- //
- // (again, these programs use the real path of /proc/<pid>/exe as
- // a basis for dynamically loading libraries, so we must properly
- // 'fake' this value)
- char* self_pid_name = format("/proc/%d/exe", tcp->pid);
- char is_proc_self_pid_exe = (strcmp(filename, self_pid_name) == 0);
- free(self_pid_name);
- if ((is_proc_self_exe || is_proc_self_pid_exe) &&
- tcp->perceived_program_fullpath) {
- memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
- tcp->perceived_program_fullpath,
- strlen(tcp->perceived_program_fullpath) + 1);
- // VERY SUBTLE - set %eax (the syscall return value) to the length
- // of the FAKED STRING, since readlink is supposed to return the
- // length of the returned path (some programs like Python rely
- // on that length to allocated memory)
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.eax = (long)strlen(tcp->perceived_program_fullpath);
- #elif defined(X86_64)
- cur_regs.rax = (long)strlen(tcp->perceived_program_fullpath);
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- // if the program tries to read /proc/self/cwd, then treat it like
- // a CDE_end_getcwd call, returning a fake cwd:
- //
- // (note that we don't handle /proc/<pid>/cwd yet)
- else if (strcmp(filename, "/proc/self/cwd") == 0) {
- // copied from CDE_end_getcwd
- char* sandboxed_pwd = extract_sandboxed_pwd(tcp->current_dir, tcp);
- memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
- sandboxed_pwd, strlen(sandboxed_pwd) + 1);
- // VERY SUBTLE - set %eax (the syscall return value) to the length
- // of the FAKED STRING, since readlink is supposed to return the
- // length of the returned path (some programs like Python rely
- // on that length to allocated memory)
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- cur_regs.eax = (long)strlen(sandboxed_pwd);
- #elif defined(X86_64)
- cur_regs.rax = (long)strlen(sandboxed_pwd);
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- else {
- // inspect the return value (stored in readlink_target) and if
- // it's a relative path that starts with './' and contains a '//'
- // marker, then it MIGHT actually be a "munged" version of an
- // absolute path symlink that was turned into a relative path
- // when the original file was copied (okapi-ed) into the package.
- // e.g., a symlink to an absolute path like /lib/libc.so.6 might
- // be munged into some monstrous relative path like:
- //
- // ./../../../../..//lib/libc.so.6
- //
- // so that it can reference the version of /lib/libc.so.6 from
- // WITHIN THE PACKAGE rather than the native one on the target
- // machine. However, when the target program does a readlink(),
- // it expects to the syscall to return '/lib/libc.so.6', so we
- // must properly "un-munge" these sorts of symlinks.
- //
- // (Note that we don't have this problem with symlinks to
- // relative paths.)
- // first get the length of the return value string ...
- struct user_regs_struct cur_regs;
- EXITIF(ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long)&cur_regs) < 0);
- #if defined (I386)
- int ret_length = cur_regs.eax;
- #elif defined(X86_64)
- int ret_length = cur_regs.rax;
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- char readlink_target[MAXPATHLEN];
- if (umoven(tcp, tcp->u_arg[output_buffer_arg_index], ret_length, readlink_target) == 0) {
- // remember to cap off the end ...
- readlink_target[ret_length] = '\0';
- // now readlink_target is the string that's "returned" by this
- // readlink syscall
- // is there a leading './' marker?
- if (strncmp(readlink_target, "./", 2) == 0) {
- // now check for a distinctive '//' marker, indicative of munged paths.
- // However, this simple check can still result in false positives!!!
- char* suffix = strstr(readlink_target, "//");
- if (suffix) {
- assert(suffix[0] == '/');
- suffix++; // skip one of the slashes
- assert(IS_ABSPATH(suffix));
- // as a final sanity check, see if this file actually exists
- // within cde_pseudo_root_dir, to prevent false positives
- char* actual_path = format("%s%s", cde_pseudo_root_dir, suffix);
- struct stat st;
- if (lstat(actual_path, &st) == 0) {
- // clobber the syscall's return value with 'suffix'
- memcpy_to_child(tcp->pid, (char*)tcp->u_arg[output_buffer_arg_index],
- suffix, strlen(suffix) + 1);
- // VERY SUBTLE - set %eax (the syscall return value) to the length
- // of the FAKED STRING, since readlink is supposed to return the
- // length of the returned path (some programs like Python rely
- // on that length to allocated memory)
- #if defined (I386)
- cur_regs.eax = (long)strlen(suffix);
- #elif defined(X86_64)
- cur_regs.rax = (long)strlen(suffix);
- #else
- #error "Unknown architecture (not I386 or X86_64)"
- #endif
- ptrace(PTRACE_SETREGS, tcp->pid, NULL, (long)&cur_regs);
- }
- free(actual_path);
- }
- }
- }
- }
- }
- }
- free(filename);
- }
- void CDE_end_readlink(struct tcb* tcp) {
- // output buffer is second argument (index 1)
- CDE_end_readlink_internal(tcp, 0, 1);
- }
- void CDE_end_readlinkat(struct tcb* tcp) {
- // output buffer is third argument (index 2)
- CDE_end_readlink_internal(tcp, 1, 2);
- }
- void CDE_begin_execve(struct tcb* tcp) {
- // null all these out up-top, then deallocate them in 'done'
- char* exe_filename = NULL;
- char* redirected_path = NULL;
- char* exe_filename_abspath = NULL;
- char* script_command = NULL;
- char* ld_linux_filename = NULL;
- char* ld_linux_fullpath = NULL;
- exe_filename = strcpy_from_child(tcp, tcp->u_arg[0]);
- // only attempt to do the ld-linux.so.2 trick if exe_filename
- // is a valid executable file ... otherwise don't do
- // anything and simply let the execve fail just like it's supposed to
- struct stat filename_stat;
- // NULL out p_ignores since you might have inherited it from your parent after
- // forking, but when you exec, you're probably now executing a different program
- tcp->p_ignores = NULL;
- if (CDE_verbose_mode) {
- printf("[%d] CDE_begin_execve '%s'\n", tcp->pid, exe_filename);
- }
- if (CDE_exec_mode) {
- // if we're purposely ignoring a path to an executable (e.g.,
- // ignoring "/bin/bash" to prevent crashes on certain Ubuntu
- // machines), then DO NOT use the ld-linux trick and simply
- // execve the file normally
- //
- // (note that this check doesn't pick up the case when a textual script
- // is being executed (e.g., with "#!/bin/bash" as its shebang line),
- // since exe_filename is the script's name and NOT "/bin/bash".
- // We will need to handle this case LATER in the function.)
- char* opened_filename_abspath =
- canonicalize_path(exe_filename, extract_sandboxed_pwd(tcp->current_dir, tcp));
- if (ignore_path(opened_filename_abspath, tcp)) {
- free(opened_filename_abspath);
- goto done;
- }
- // check for presence in process_ignores, and if found, set
- // tcp->p_ignores and punt
- int i;
- for (i = 0; i < process_ignores_ind; i++) {
- if (strcmp(opened_filename_abspath, process_ignores[i].process_name) == 0) {
- //printf("IGNORED '%s'\n", opened_filename_abspath);
- tcp->p_ignores = &process_ignores[i];
- free(opened_filename_abspath);
- goto done; // TOTALLY PUNT!!!
- }
- }
- free(opened_filename_abspath);
- redirected_path = redirect_filename_into_cderoot(exe_filename, tcp->current_dir, tcp);
- }
- char* path_to_executable = NULL;
- if (redirected_path) {
- // TODO: we don't check whether it's a real executable file :/
- if (stat(redirected_path, &filename_stat) != 0) {
- goto done;
- }
- path_to_executable = redirected_path;
- }
- else {
- // just check the file itself (REMEMBER TO GET ITS ABSOLUTE PATH!)
- exe_filename_abspath = canonicalize_path(exe_filename, tcp->current_dir);
- // TODO: we don't check whether it's a real executable file :/
- if (stat(exe_filename_abspath, &filename_stat) != 0) {
- goto done;
- }
- path_to_executable = exe_filename_abspath;
- }
- assert(path_to_executable);
- // WARNING: ld-linux.so.2 only works on dynamically-linked binary
- // executable files; it will fail if you invoke it on:
- // - a textual script file
- // - a statically-linked binary
- //
- // for a textual script file, we must invoke ld-linux.so.2 on the
- // target of the shebang #! (which can itself take arguments)
- //
- // e.g., #! /bin/sh
- // e.g., #! /usr/bin/env python
- char is_textual_script = 0;
- char is_elf_binary = 0;
- FILE* f = fopen(path_to_executable, "rb"); // open in binary mode
- assert(f);
- char header[5];
- memset(header, 0, sizeof(header));
- fgets(header, 5, f); // 5 means 4 bytes + 1 null terminating byte
- if (strcmp(header, "\177ELF") == 0) {
- is_elf_binary = 1;
- }
- fclose(f);
- if (is_elf_binary) {
- // look for whether it's a statically-linked binary ...
- // if so, then there is NO need to call ld-linux.so.2 on it;
- // we can just execute it directly (in fact, ld-linux.so.2
- // will fail on static binaries!)
- // mallocs a new string if successful
- // (this string is most likely "/lib/ld-linux.so.2")
- ld_linux_filename = find_ELF_program_interpreter(path_to_executable);
- if (!ld_linux_filename) {
- // if the program interpreter isn't found, then it's a static
- // binary, so let the execve call proceed normally
- if (CDE_exec_mode) {
- // redirect the executable's path to within $CDE_ROOT_DIR:
- modify_syscall_single_arg(tcp, 1, exe_filename);
- }
- else {
- copy_file_into_cde_root(exe_filename, tcp->current_dir);
- }
- // remember to EXIT EARLY!
- goto done;
- }
- assert(IS_ABSPATH(ld_linux_filename));
- }
- else {
- // find out whether it's a script file (starting with #! line)
- FILE* f = fopen(path_to_executable, "rb"); // open in binary mode
- size_t len = 0;
- ssize_t read;
- char* tmp = NULL; // getline() mallocs for us
- read = getline(&tmp, &len, f);
- if (read > 2) {
- assert(tmp[read-1] == '\n'); // strip of trailing newline
- tmp[read-1] = '\0'; // strip of trailing newline
- if (tmp[0] == '#' && tmp[1] == '!') {
- is_textual_script = 1;
- script_command = strdup(&tmp[2]);
- }
- }
- free(tmp);
- /* Patch from Yang Chen
- "I am packaging our tool using it. I found there is a possible
- bug in cde.c where opened files were not closed. In a long run,
- it could cause fopen fail. I noticed it because our toolchain has
- a lot of invocations on shell scripts and hence hit this
- problem.""
- */
- fclose(f);
- if (!script_command) {
- fprintf(stderr, "Fatal error: '%s' seems to be a script without a #! line.\n(cde can only execute scripts that start with a proper #! line)\n",
- path_to_executable);
- exit(1);
- }
- // now find the program interpreter for the script_command
- // executable, be sure to grab the FIRST TOKEN since that's
- // the actual executable name ...
- // TODO: this will fail if the executable's path has a space in it
- //
- // mallocs a new string if successful
- // (this string is most likely "/lib/ld-linux.so.2")
- // libc is so dumb ... strtok() alters its argument in an un-kosher way
- tmp = strdup(script_command);
- char* p = strtok(tmp, " ");
- // to have find_ELF_program_interpreter succeed, we might need to
- // redirect the path inside CDE_ROOT_DIR:
- char* script_command_filename = NULL;
- if (CDE_exec_mode) {
- // this path should look like the name in the #! line, just
- // canonicalized to be an absolute path
- char* script_command_abspath =
- canonicalize_path(p, extract_sandboxed_pwd(tcp->current_dir, tcp));
- if (ignore_path(script_command_abspath, tcp)) {
- free(script_command_abspath);
- free(tmp);
- goto done; // PUNT!
- }
- // check for presence in process_ignores, and if found, set
- // tcp->p_ignores and punt
- int i;
- for (i = 0; i < process_ignores_ind; i++) {
- if (strcmp(script_command_abspath, process_ignores[i].process_name) == 0) {
- //printf("IGNORED (script) '%s'\n", script_command_abspath);
- tcp->p_ignores = &process_ignores[i];
- free(script_command_abspath);
- free(tmp);
- goto done; // TOTALLY PUNT!!!
- }
- }
- free(script_command_abspath);
- script_command_filename = redirect_filename_into_cderoot(p, tcp->current_dir, tcp);
- }
- if (!script_command_filename) {
- script_command_filename = strdup(p);
- }
- ld_linux_filename = find_ELF_program_interpreter(script_command_filename);
- free(script_command_filename);
- free(tmp);
- if (!ld_linux_filename) {
- // if the program interpreter isn't found, then it's a static
- // binary, so let the execve call proceed unmodified
- // TODO: is this the right thing to do here? I think we might
- // need to do something better here (think harder about this case!)
- if (CDE_exec_mode) {
- // redirect the executable's path to within cde-root/:
- modify_syscall_single_arg(tcp, 1, exe_filename);
- }
- goto done;
- }
- assert(IS_ABSPATH(ld_linux_filename));
- }
- assert(!(is_elf_binary && is_textual_script));
- if (CDE_exec_mode) {
- // set up shared memory segment if we haven't done so yet
- if (!tcp->childshm) {
- begin_setup_shmat(tcp);
- goto done; // MUST punt early here!!!
- }
- ld_linux_fullpath = create_abspath_within_cderoot(ld_linux_filename);
- /* we're gonna do some craziness here to redirect the OS to call
- cde-root/lib/ld-linux.so.2 rather than the real program, since
- ld-linux.so.2 is closely-tied with the version of libc in
- cde-root/. */
- if (is_textual_script) {
- /* we're running a script with a shebang (#!), so
- let's set up the shared memory segment (tcp->localshm) like so:
- if (CDE_use_linker_from_package) {
- base --> tcp->localshm : "cde-root/lib/ld-linux.so.2" (ld_linux_fullpath)
- script_command token 0 : "/usr/bin/env"
- script_command token 1 : "python"
- ... (for as many t…