/cc/fileworld.cpp

https://github.com/lazyboy/lazypersonal · C++ · 216 lines · 165 code · 23 blank · 28 comment · 52 complexity · 96c741b3c201f2afcb849b44c3ff7267 MD5 · raw file

  1. // Sample solution for http://www dot ferozeh.com slash Interviews slash misc slah imo.aspx
  2. // try2: Read each file at most two times, at most two files open at a time.
  3. // W = max_number_of_words_in_file, W0 = num_words_in_first_file, O(W0lgWO + lgW0 * Sum|W|)
  4. // lazyboybd -= at =- gmail -= dot =- com
  5. #include <iostream>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <vector>
  9. #include <stdlib.h>
  10. #include <algorithm>
  11. using namespace std;
  12. #define rep(i, n) for (i = 0; i < (n); ++i)
  13. #define is_char(x) (((x)>='a'&&(x)<='z')||((x)>='A'&&(x)<='Z'))
  14. char* file_input[] = {
  15. "I am a bat, man how do You do testing far",
  16. "I am a cat walk man, How do you test it",
  17. "I am a very smashing man how do you do foo bar"//,
  18. //""
  19. };
  20. char* get_word(char** p, int* len) {
  21. while(**p && !is_char(**p)) ++(*p);
  22. if (!**p) return NULL;
  23. char* ret = *p;
  24. *len = 0;
  25. while(**p && is_char(**p)) ++(*p), ++(*len);
  26. // we're modifying input
  27. //**p = 0;
  28. return ret;
  29. }
  30. bool strnieq(char* a, char* b, int alen, int blen) {
  31. if (alen == blen) {
  32. while(alen--) {
  33. if (tolower(*a) != tolower(*b)) return false;
  34. a++, b++;
  35. }
  36. return true;
  37. }
  38. return false;
  39. }
  40. void print_word(char*p, int len) {
  41. while(len--) putchar(*p++);
  42. }
  43. // join "foo", "bar", "baz" to "foo-bar-baz", also mallocs.
  44. char* word_up(char* a, int alen, char* b, int blen, char* c, int clen) {
  45. int len = alen + blen + clen + 3;
  46. char *ret = (char*) malloc(sizeof(char) * len);
  47. while(alen--) *ret++ = *a++; *ret++ = '-';
  48. while(blen--) *ret++ = *b++; *ret++ = '-';
  49. while(clen--) *ret++ = *c++; *ret++ = 0;
  50. return ret - len;
  51. }
  52. typedef pair<char*, int> entry;
  53. bool ptr_cmp(const entry& a, const entry& b) {
  54. return stricmp(a.first, b.first) < 0;
  55. }
  56. bool not_found(const entry& a) {
  57. // A is moving
  58. if (a.second == false) {
  59. free(a.first);
  60. }
  61. return !a.second;
  62. }
  63. vector<entry>::iterator is_in_list(vector<entry>& v, const entry& e, char* w, bool& ret) {
  64. vector<entry>::iterator eit = lower_bound(v.begin(), v.end(), e, ptr_cmp);
  65. ret = eit != v.end() && !stricmp(eit->first, w);
  66. return eit;
  67. }
  68. bool find_and_set_word_in_vector(vector<entry>& v, char* wj0, int lenj0, char* wj1, int lenj1, char* wj2, int lenj2) {
  69. char* t = word_up(wj0, lenj0, wj1, lenj1, wj2, lenj2);
  70. entry tentry = make_pair(t, false);
  71. bool is_found;
  72. vector<entry>::iterator e = is_in_list(v, tentry, t, is_found);
  73. if (is_found) {
  74. e->second = true; // set it to found.
  75. }
  76. return is_found;
  77. }
  78. int try2(int n) {
  79. int i, j, k;
  80. vector<entry> master_list;
  81. // open file0
  82. char* p0 = file_input[0];
  83. int len0, len1, len2;
  84. char* w00 = get_word(&p0, &len0);
  85. char* w01 = get_word(&p0, &len1);
  86. char* w02 = get_word(&p0, &len2);
  87. while (w02 != NULL) {
  88. char* t = word_up(w00, len0, w01, len1, w02, len2);
  89. master_list.push_back(make_pair(t, false));
  90. // advance reading word in file0
  91. w00 = w01, w01 = w02; len0 = len1, len1 = len2;
  92. w02 = get_word(&p0, &len2);
  93. }
  94. sort(master_list.begin(), master_list.end(), ptr_cmp);
  95. //rep(i, master_list.size()) puts(master_list[i].first);
  96. rep(j, n) if (j > 0) {
  97. rep(i, master_list.size()) master_list[i].second = false;
  98. // open and read file j
  99. char* pj = file_input[j];
  100. int lenj0, lenj1, lenj2;
  101. char* wj0 = get_word(&pj, &lenj0);
  102. char* wj1 = get_word(&pj, &lenj1);
  103. char* wj2 = get_word(&pj, &lenj2);
  104. while (wj2 != NULL) {
  105. find_and_set_word_in_vector(master_list, wj0, lenj0, wj1, lenj1, wj2, lenj2);
  106. // advance reading word in filej
  107. wj0 = wj1, wj1 = wj2; lenj0 = lenj1, lenj1 = lenj2;
  108. wj2 = get_word(&pj, &lenj2);
  109. }
  110. // close file j
  111. // Filter out not found entries.
  112. vector<entry>::iterator last = remove_if(master_list.begin(), master_list.end(), not_found);
  113. master_list.resize(last - master_list.begin());
  114. //printf("After pass %d\n", j);
  115. //rep(i, master_list.size()) printf("%d: %s\n", i, master_list[i].first);
  116. }
  117. // now read the files again to remove stuffs.
  118. rep(j, n) {
  119. // open filej (except file0, which is still open)
  120. char* pj = file_input[j];
  121. int lenj0, lenj1, lenj2;
  122. char* wj0 = get_word(&pj, &lenj0);
  123. char* wj1 = get_word(&pj, &lenj1);
  124. char* wj2 = get_word(&pj, &lenj2);
  125. bool print0 = true, print1 = true, print2 = true;
  126. printf("File %d\nOriginal: %s\nOutpooot: ", j, file_input[j]);
  127. bool fir = true;
  128. while (wj2 != NULL) {
  129. bool found = find_and_set_word_in_vector(master_list, wj0, lenj0, wj1, lenj1, wj2, lenj2);
  130. if (!found) {
  131. // maybe print wj0
  132. if (print0) { if (fir) fir = 0; else printf(" "); print_word(wj0, lenj0); }
  133. print0 = print1; print1 = print2; print2 = true;
  134. }
  135. else {
  136. // we found match, set next two words to be not printed as well.
  137. print0 = false; print1 = false; print2 = true;
  138. }
  139. // advance reading word in filej
  140. wj0 = wj1, wj1 = wj2; lenj0 = lenj1, lenj1 = lenj2;
  141. wj2 = get_word(&pj, &lenj2);
  142. }
  143. if (wj0 != NULL && print0) { if (fir) fir = 0; else printf(" "); print_word(wj0, lenj0); }
  144. if (wj1 != NULL && print1) { if (fir) fir = 0; else printf(" "); print_word(wj1, lenj1); }
  145. printf("\n");
  146. }
  147. // close file0
  148. // clean up stuffs.
  149. rep(i, master_list.size()) free(master_list[i].first); master_list.clear();
  150. return 0;
  151. }
  152. int try1_naive(int n) {
  153. int i, j, k;
  154. char* p0 = file_input[0];
  155. int len0, len1, len2;
  156. char* w00 = get_word(&p0, &len0);
  157. char* w01 = get_word(&p0, &len1);
  158. char* w02 = get_word(&p0, &len2);
  159. char* w[55][3];
  160. int len[55][3];
  161. while(w02 != NULL) {
  162. rep(j, n) if (j > 0) {
  163. char f = false;
  164. char *pj = file_input[j];
  165. w[j][0] = get_word(&pj, &len[j][0]), w[j][1] = get_word(&pj, &len[j][1]), w[j][2] = get_word(&pj, &len[j][2]);
  166. while(w[j][2] != NULL) {
  167. if (strnieq(w00, w[j][0], len0, len[j][0]) && strnieq(w01, w[j][1], len1, len[j][1]) && strnieq(w02, w[j][2], len2, len[j][2])) {
  168. f = true; break;
  169. }
  170. w[j][0] = w[j][1], w[j][1] = w[j][2], len[j][0] = len[j][1], len[j][1] = len[j][2];
  171. w[j][2] = get_word(&pj, &len[j][2]);
  172. }
  173. if (!f) break;
  174. }
  175. if (j >= n) {
  176. // found w00, w01, w02
  177. print_word(w00, len0), putchar(' ');
  178. print_word(w01, len1), putchar(' ');
  179. print_word(w02, len2); putchar('\n');
  180. }
  181. // advance reading word in file0
  182. w00 = w01, w01 = w02; len0 = len1, len1 = len2;
  183. w02 = get_word(&p0, &len2);
  184. }
  185. return 0;
  186. }
  187. int main() {
  188. int n = sizeof(file_input)/sizeof(file_input[0]);
  189. printf("Num files: %d\n", n);
  190. //try1_naive(n);
  191. try2(n);
  192. return 0;
  193. }