PageRenderTime 28ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/usr/src/cmd/sort/common/streams.c

https://github.com/richlowe/illumos-gate
C | 592 lines | 394 code | 95 blank | 103 comment | 85 complexity | a1ded989ef82146d02e037839b92e674 MD5 | raw file
  1. /*
  2. * CDDL HEADER START
  3. *
  4. * The contents of this file are subject to the terms of the
  5. * Common Development and Distribution License (the "License").
  6. * You may not use this file except in compliance with the License.
  7. *
  8. * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  9. * or http://www.opensolaris.org/os/licensing.
  10. * See the License for the specific language governing permissions
  11. * and limitations under the License.
  12. *
  13. * When distributing Covered Code, include this CDDL HEADER in each
  14. * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15. * If applicable, add the following below this CDDL HEADER, with the
  16. * fields enclosed by brackets "[]" replaced with your own identifying
  17. * information: Portions Copyright [yyyy] [name of copyright owner]
  18. *
  19. * CDDL HEADER END
  20. */
  21. /*
  22. * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
  23. * Use is subject to license terms.
  24. */
  25. #pragma ident "%Z%%M% %I% %E% SMI"
  26. #include "streams.h"
  27. static const stream_ops_t invalid_ops = {
  28. NULL,
  29. NULL,
  30. NULL,
  31. NULL,
  32. NULL,
  33. NULL,
  34. NULL,
  35. NULL,
  36. NULL,
  37. NULL
  38. };
  39. stream_t *
  40. stream_new(int src)
  41. {
  42. stream_t *str = safe_realloc(NULL, sizeof (stream_t));
  43. stream_clear(str);
  44. stream_set(str, src);
  45. return (str);
  46. }
  47. void
  48. stream_set(stream_t *str, flag_t flags)
  49. {
  50. if (flags & STREAM_SOURCE_MASK) {
  51. ASSERT((flags & STREAM_SOURCE_MASK) == STREAM_ARRAY ||
  52. (flags & STREAM_SOURCE_MASK) == STREAM_SINGLE ||
  53. (flags & STREAM_SOURCE_MASK) == STREAM_MMAP ||
  54. (flags & STREAM_SOURCE_MASK) == STREAM_WIDE);
  55. str->s_status &= ~STREAM_SOURCE_MASK;
  56. str->s_status |= flags & STREAM_SOURCE_MASK;
  57. switch (flags & STREAM_SOURCE_MASK) {
  58. case STREAM_NO_SOURCE:
  59. str->s_element_size = 0;
  60. str->s_ops = invalid_ops;
  61. return;
  62. case STREAM_ARRAY:
  63. /*
  64. * Array streams inherit element size.
  65. */
  66. str->s_ops = stream_array_ops;
  67. break;
  68. case STREAM_MMAP:
  69. str->s_element_size = sizeof (char);
  70. str->s_ops = stream_mmap_ops;
  71. break;
  72. case STREAM_SINGLE:
  73. str->s_element_size = sizeof (char);
  74. str->s_ops = stream_stdio_ops;
  75. break;
  76. case STREAM_WIDE:
  77. str->s_element_size = sizeof (wchar_t);
  78. str->s_ops = stream_wide_ops;
  79. break;
  80. default:
  81. die(EMSG_UNKN_STREAM, str->s_status);
  82. }
  83. }
  84. str->s_status |= (flags & ~STREAM_SOURCE_MASK);
  85. if (str->s_status & STREAM_UNIQUE)
  86. switch (str->s_status & STREAM_SOURCE_MASK) {
  87. case STREAM_SINGLE :
  88. str->s_ops.sop_put_line =
  89. stream_stdio_put_line_unique;
  90. break;
  91. case STREAM_WIDE :
  92. str->s_ops.sop_put_line =
  93. stream_wide_put_line_unique;
  94. break;
  95. default :
  96. break;
  97. }
  98. if (str->s_status & STREAM_INSTANT)
  99. switch (str->s_status & STREAM_SOURCE_MASK) {
  100. case STREAM_SINGLE :
  101. str->s_ops.sop_fetch =
  102. stream_stdio_fetch_overwrite;
  103. break;
  104. case STREAM_WIDE :
  105. str->s_ops.sop_fetch =
  106. stream_wide_fetch_overwrite;
  107. break;
  108. default :
  109. break;
  110. }
  111. }
  112. void
  113. stream_unset(stream_t *streamp, flag_t flags)
  114. {
  115. ASSERT(!(flags & STREAM_SOURCE_MASK));
  116. streamp->s_status &= ~(flags & ~STREAM_SOURCE_MASK);
  117. }
  118. int
  119. stream_is_primed(stream_t *streamp)
  120. {
  121. return (streamp->s_status & STREAM_PRIMED);
  122. }
  123. void
  124. stream_clear(stream_t *str)
  125. {
  126. (void) memset(str, 0, sizeof (stream_t));
  127. }
  128. static void
  129. stream_copy(stream_t *dest, stream_t *src)
  130. {
  131. (void) memcpy(dest, src, sizeof (stream_t));
  132. }
  133. void
  134. stream_stat_chain(stream_t *strp)
  135. {
  136. struct stat buf;
  137. stream_t *cur_strp = strp;
  138. while (cur_strp != NULL) {
  139. if (cur_strp->s_status & STREAM_NOTFILE ||
  140. cur_strp->s_status & STREAM_ARRAY) {
  141. cur_strp = cur_strp->s_next;
  142. continue;
  143. }
  144. if (stat(cur_strp->s_filename, &buf) < 0)
  145. die(EMSG_STAT, cur_strp->s_filename);
  146. cur_strp->s_dev = buf.st_dev;
  147. cur_strp->s_ino = buf.st_ino;
  148. cur_strp->s_filesize = buf.st_size;
  149. cur_strp = cur_strp->s_next;
  150. }
  151. }
  152. uint_t
  153. stream_count_chain(stream_t *str)
  154. {
  155. uint_t n = 0;
  156. while (str != NULL) {
  157. n++;
  158. str = str->s_next;
  159. }
  160. return (n);
  161. }
  162. int
  163. stream_open_for_read(sort_t *S, stream_t *str)
  164. {
  165. int fd;
  166. ASSERT(!(str->s_status & STREAM_OUTPUT));
  167. /*
  168. * STREAM_ARRAY streams are open by definition.
  169. */
  170. if ((str->s_status & STREAM_SOURCE_MASK) == STREAM_ARRAY) {
  171. stream_set(str, STREAM_ARRAY | STREAM_OPEN);
  172. return (1);
  173. }
  174. /*
  175. * Set data type according to locale for input from stdin.
  176. */
  177. if (str->s_status & STREAM_NOTFILE) {
  178. str->s_type.BF.s_fp = stdin;
  179. stream_set(str, STREAM_OPEN | (S->m_single_byte_locale ?
  180. STREAM_SINGLE : STREAM_WIDE));
  181. return (1);
  182. }
  183. ASSERT(str->s_filename);
  184. #ifndef DEBUG_DISALLOW_MMAP
  185. if (S->m_single_byte_locale &&
  186. str->s_filesize > 0 &&
  187. str->s_filesize < SSIZE_MAX) {
  188. /*
  189. * make mmap() attempt; set s_status and return if successful
  190. */
  191. fd = open(str->s_filename, O_RDONLY);
  192. if (fd < 0) {
  193. if (errno == EMFILE || errno == ENFILE)
  194. return (-1);
  195. else
  196. die(EMSG_OPEN, str->s_filename);
  197. }
  198. str->s_buffer = mmap(0, str->s_filesize, PROT_READ,
  199. MAP_SHARED, fd, 0);
  200. if (str->s_buffer != MAP_FAILED) {
  201. str->s_buffer_size = str->s_filesize;
  202. str->s_type.SF.s_fd = fd;
  203. stream_set(str, STREAM_MMAP | STREAM_OPEN);
  204. stream_unset(str, STREAM_PRIMED);
  205. return (1);
  206. }
  207. /*
  208. * Otherwise the mmap() failed due to address space exhaustion;
  209. * since we have already opened the file, we close it and drop
  210. * into the normal (STDIO) case.
  211. */
  212. (void) close(fd);
  213. str->s_buffer = NULL;
  214. }
  215. #endif /* DEBUG_DISALLOW_MMAP */
  216. if ((str->s_type.BF.s_fp = fopen(str->s_filename, "r")) == NULL) {
  217. if (errno == EMFILE || errno == ENFILE)
  218. return (-1);
  219. else
  220. die(EMSG_OPEN, str->s_filename);
  221. }
  222. str->s_type.BF.s_vbuf = safe_realloc(NULL, STDIO_VBUF_SIZE);
  223. if (setvbuf(str->s_type.BF.s_fp, str->s_type.BF.s_vbuf, _IOFBF,
  224. STDIO_VBUF_SIZE) != 0) {
  225. safe_free(str->s_type.BF.s_vbuf);
  226. str->s_type.BF.s_vbuf = NULL;
  227. }
  228. stream_set(str, STREAM_OPEN | (S->m_single_byte_locale ? STREAM_SINGLE :
  229. STREAM_WIDE));
  230. stream_unset(str, STREAM_PRIMED);
  231. return (1);
  232. }
  233. void
  234. stream_set_size(stream_t *str, size_t new_size)
  235. {
  236. /*
  237. * p_new_size is new_size rounded upwards to nearest multiple of
  238. * PAGESIZE, since mmap() is going to reserve it in any case. This
  239. * ensures that the far end of the buffer is also aligned, such that we
  240. * obtain aligned pointers if we choose to subtract from it.
  241. */
  242. size_t p_new_size = (new_size + PAGESIZE) & ~(PAGESIZE - 1);
  243. if (str->s_buffer_size == p_new_size)
  244. return;
  245. if (str->s_buffer != NULL)
  246. (void) munmap(str->s_buffer, str->s_buffer_size);
  247. if (new_size == 0) {
  248. str->s_buffer = NULL;
  249. str->s_buffer_size = 0;
  250. return;
  251. }
  252. str->s_buffer = xzmap(0, p_new_size, PROT_READ | PROT_WRITE,
  253. MAP_PRIVATE, 0);
  254. if (str->s_buffer == MAP_FAILED)
  255. die(EMSG_MMAP);
  256. str->s_buffer_size = p_new_size;
  257. }
  258. void
  259. stream_add_file_to_chain(stream_t **str_chain, char *filename)
  260. {
  261. stream_t *str;
  262. str = stream_new(STREAM_NO_SOURCE);
  263. str->s_filename = filename;
  264. str->s_type.SF.s_fd = -1;
  265. stream_push_to_chain(str_chain, str);
  266. }
  267. void
  268. stream_push_to_chain(stream_t **str_chain, stream_t *streamp)
  269. {
  270. stream_t *cur_streamp = *str_chain;
  271. if (cur_streamp == NULL) {
  272. *str_chain = streamp;
  273. streamp->s_next = NULL;
  274. return;
  275. }
  276. while (cur_streamp->s_next != NULL)
  277. cur_streamp = cur_streamp->s_next;
  278. cur_streamp->s_next = streamp;
  279. streamp->s_previous = cur_streamp;
  280. streamp->s_next = NULL;
  281. }
  282. static void
  283. stream_dump(stream_t *str_in, stream_t *str_out)
  284. {
  285. ASSERT(!(str_in->s_status & STREAM_OUTPUT));
  286. ASSERT(str_out->s_status & STREAM_OUTPUT);
  287. SOP_PUT_LINE(str_out, &str_in->s_current);
  288. while (!SOP_EOS(str_in)) {
  289. SOP_FETCH(str_in);
  290. SOP_PUT_LINE(str_out, &str_in->s_current);
  291. }
  292. }
  293. /*
  294. * stream_push_to_temporary() with flags set to ST_CACHE merely copies the
  295. * stream_t pointer onto the chain. With flags set to ST_NOCACHE, the stream is
  296. * written out to a file. Stream pointers passed to stream_push_to_temporary()
  297. * must refer to allocated objects, and not to objects created on function
  298. * stacks. Finally, if strp == NULL, stream_push_to_temporary() creates and
  299. * pushes the new stream; the output stream is left open if ST_OPEN is set.
  300. */
  301. stream_t *
  302. stream_push_to_temporary(stream_t **str_chain, stream_t *streamp, int flags)
  303. {
  304. stream_t *out_streamp;
  305. if (flags & ST_CACHE) {
  306. ASSERT(streamp->s_status & STREAM_ARRAY);
  307. stream_set(streamp, STREAM_NOT_FREEABLE | STREAM_TEMPORARY);
  308. stream_push_to_chain(str_chain, streamp);
  309. return (streamp);
  310. }
  311. out_streamp = safe_realloc(NULL, sizeof (stream_t));
  312. if (streamp != NULL) {
  313. stream_copy(out_streamp, streamp);
  314. stream_unset(out_streamp, STREAM_OPEN);
  315. ASSERT(streamp->s_element_size == sizeof (char) ||
  316. streamp->s_element_size == sizeof (wchar_t));
  317. stream_set(out_streamp,
  318. streamp->s_element_size == 1 ? STREAM_SINGLE : STREAM_WIDE);
  319. out_streamp->s_buffer = NULL;
  320. out_streamp->s_buffer_size = 0;
  321. } else {
  322. stream_clear(out_streamp);
  323. stream_set(out_streamp, flags & ST_WIDE ? STREAM_WIDE :
  324. STREAM_SINGLE);
  325. }
  326. (void) bump_file_template();
  327. out_streamp->s_filename = strdup(get_file_template());
  328. if (SOP_OPEN_FOR_WRITE(out_streamp) == -1)
  329. return (NULL);
  330. stream_set(out_streamp, STREAM_TEMPORARY);
  331. stream_push_to_chain(str_chain, out_streamp);
  332. if (streamp != NULL) {
  333. /*
  334. * We reset the input stream to the beginning, and copy it in
  335. * sequence to the output stream, freeing the raw_collate field
  336. * as we go.
  337. */
  338. if (SOP_PRIME(streamp) != PRIME_SUCCEEDED)
  339. die(EMSG_BADPRIME);
  340. stream_dump(streamp, out_streamp);
  341. }
  342. if (!(flags & ST_OPEN)) {
  343. SOP_FREE(out_streamp);
  344. (void) SOP_CLOSE(out_streamp);
  345. }
  346. /*
  347. * Now that we've written this stream to disk, we needn't protect any
  348. * in-memory consumer.
  349. */
  350. if (streamp != NULL)
  351. streamp->s_consumer = NULL;
  352. return (out_streamp);
  353. }
  354. void
  355. stream_close_all_previous(stream_t *tail_streamp)
  356. {
  357. stream_t *cur_streamp;
  358. ASSERT(tail_streamp != NULL);
  359. cur_streamp = tail_streamp->s_previous;
  360. while (cur_streamp != NULL) {
  361. (void) SOP_FREE(cur_streamp);
  362. if (SOP_IS_CLOSABLE(cur_streamp))
  363. (void) SOP_CLOSE(cur_streamp);
  364. cur_streamp = cur_streamp->s_previous;
  365. }
  366. }
  367. void
  368. stream_unlink_temporary(stream_t *streamp)
  369. {
  370. if (streamp->s_status & STREAM_TEMPORARY) {
  371. (void) SOP_FREE(streamp);
  372. if (streamp->s_ops.sop_unlink)
  373. (void) SOP_UNLINK(streamp);
  374. }
  375. }
  376. /*
  377. * stream_insert() takes input from src stream, converts to each line to
  378. * collatable form, and places a line_rec_t in dest stream, which is of type
  379. * STREAM_ARRAY.
  380. */
  381. int
  382. stream_insert(sort_t *S, stream_t *src, stream_t *dest)
  383. {
  384. ssize_t i = dest->s_type.LA.s_array_size;
  385. line_rec_t *l_series;
  386. char *l_convert = dest->s_buffer;
  387. int return_val = ST_MEM_AVAIL;
  388. int fetch_result = NEXT_LINE_COMPLETE;
  389. /*
  390. * Scan through until total bytes allowed accumulated, and return.
  391. * Use SOP_FETCH(src) so that this works for all stream types,
  392. * and so that we can repeat until eos.
  393. *
  394. * For each new line, we move back sizeof (line_rec_t) from the end of
  395. * the array buffer, and copy into the start of the array buffer. When
  396. * the pointers meet, or when we exhaust the current stream, we return.
  397. * If we have not filled the current memory allocation, we return
  398. * ST_MEM_AVAIL, else we return ST_MEM_FILLED.
  399. */
  400. ASSERT(stream_is_primed(src));
  401. ASSERT(dest->s_status & STREAM_ARRAY);
  402. /*LINTED ALIGNMENT*/
  403. l_series = (line_rec_t *)((caddr_t)dest->s_buffer
  404. + dest->s_buffer_size) - dest->s_type.LA.s_array_size;
  405. if (dest->s_type.LA.s_array_size)
  406. l_convert = l_series->l_collate.sp +
  407. l_series->l_collate_length + src->s_element_size;
  408. /*
  409. * current line has been set prior to entry
  410. */
  411. src->s_current.l_collate.sp = l_convert;
  412. src->s_current.l_collate_bufsize = (caddr_t)l_series
  413. - (caddr_t)l_convert - sizeof (line_rec_t);
  414. src->s_current.l_raw_collate.sp = NULL;
  415. if (src->s_current.l_collate_bufsize <= 0)
  416. return (ST_MEM_FILLED);
  417. src->s_consumer = dest;
  418. while (src->s_current.l_collate_bufsize > 0 &&
  419. (src->s_current.l_collate_length = S->m_coll_convert(
  420. S->m_fields_head, &src->s_current, FCV_FAIL,
  421. S->m_field_separator)) >= 0) {
  422. ASSERT((char *)l_series > l_convert);
  423. l_series--;
  424. l_convert += src->s_current.l_collate_length;
  425. if ((char *)l_series <= l_convert) {
  426. __S(stats_incr_insert_filled_downward());
  427. l_series++;
  428. return_val = ST_MEM_FILLED;
  429. break;
  430. }
  431. /*
  432. * There's no collision with the lower part of the buffer, so we
  433. * can safely begin processing the line. In the debug case, we
  434. * test for uninitialized data by copying a non-zero pattern.
  435. */
  436. #ifdef DEBUG
  437. memset(l_series, 0x1ff11ff1, sizeof (line_rec_t));
  438. #endif
  439. copy_line_rec(&src->s_current, l_series);
  440. i++;
  441. if (SOP_EOS(src) ||
  442. (fetch_result = SOP_FETCH(src)) == NEXT_LINE_INCOMPLETE)
  443. break;
  444. src->s_current.l_collate.sp = l_convert;
  445. src->s_current.l_collate_bufsize = (caddr_t)l_series
  446. - (caddr_t)l_convert - sizeof (line_rec_t);
  447. src->s_current.l_raw_collate.sp = NULL;
  448. }
  449. if (fetch_result == NEXT_LINE_INCOMPLETE) {
  450. __S(stats_incr_insert_filled_input());
  451. return_val = ST_MEM_FILLED;
  452. } else if (src->s_current.l_collate_length < 0 ||
  453. src->s_current.l_collate_bufsize <= 0) {
  454. __S(stats_incr_insert_filled_upward());
  455. return_val = ST_MEM_FILLED;
  456. }
  457. if (fetch_result != NEXT_LINE_INCOMPLETE &&
  458. src->s_current.l_collate_length < 0 &&
  459. i == 0)
  460. /*
  461. * There's no room for conversion of our only line; need to
  462. * execute with larger memory.
  463. */
  464. die(EMSG_MEMORY);
  465. /*
  466. * Set up pointer array to line records.
  467. */
  468. if (i > dest->s_type.LA.s_array_size)
  469. dest->s_type.LA.s_array = safe_realloc(dest->s_type.LA.s_array,
  470. sizeof (line_rec_t *) * i);
  471. dest->s_type.LA.s_array_size = i;
  472. i = 0;
  473. while (i < dest->s_type.LA.s_array_size) {
  474. dest->s_type.LA.s_array[i] = l_series;
  475. l_series++;
  476. i++;
  477. }
  478. /*
  479. * LINES_ARRAY streams are always open.
  480. */
  481. stream_set(dest, STREAM_OPEN);
  482. return (return_val);
  483. }
  484. /*
  485. * stream_swap_buffer() exchanges the stream's buffer with the proffered one;
  486. * s_current is not adjusted so this is safe only for STREAM_INSTANT.
  487. */
  488. void
  489. stream_swap_buffer(stream_t *str, char **buf, size_t *size)
  490. {
  491. void *tb = *buf;
  492. size_t ts = *size;
  493. *buf = str->s_buffer;
  494. *size = str->s_buffer_size;
  495. str->s_buffer = tb;
  496. str->s_buffer_size = ts;
  497. }