/usr/src/tools/diskomizer/diskomizer64mpism.c
C | 5417 lines | 4501 code | 528 blank | 388 comment | 1214 complexity | 83822d2969540770a7df01777bee7815 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception
- /*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
- #pragma ident "%Z%%M% %I% %E% SMI"
- /*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
- /*
- * diskomizer64mpism
- *
- * Write to and then read from disk partitions and or files.
- *
- * This is a test program.
- *
- * To do:
- *
- * The messages it prints out at the begining are a mess.
- * The code should be better commented.
- *
- * Chris.Gerhard@uk.sun.com - SMCC CTE
- */
- #include "args.h"
- #include "diskomizer64mpism.h"
- #include "bufs.h"
- #include "buf_init.h"
- #include <netdb.h>
- #include <sys/systeminfo.h>
- #include <tnf/probe.h>
- #include <sys/times.h>
- #include <diskomizer/log.h>
- #include "findap.h"
- #include "device_control.h"
- #include "timeval.h"
- #include "list_ops.h"
- #include "bits.h"
- #include "locks.h"
- #include "shm_ops.h"
- #include "signal_catch.h"
- #include "limit.h"
- #include "time.h"
- #include "prompt.h"
- #include "errors.h"
- #include "utils.h"
- #include "shared_device_info.h"
- #include "decode_errors.h"
- #include <sys/utsname.h>
- #include <sys/statvfs.h>
- #include <dlfcn.h>
- #include <diskomizer/uadmin.h>
- #include <usage_tracking/usage_tracking.h>
- #include "disko_usage_track.h"
- #ifdef __i386
- #include <note.h>
- #endif
- #define OPEN_BRACE '{'
- #define CLOSE_BRACE '}'
- #define DEFAULT_BLOCK_SIZE 0x200 /* 512 */
- static char diskomizer_str[] = "diskomizer";
- iolen_index_t max_disk_io_len;
- static char *diffs;
- static char diffs_str[] = "diffs";
- static char write_str[] = "write";
- static char read_str[] = "read";
- static const char nil[] = "nil";
- #define NIL(A) (A == NULL ? nil : A)
- static char *random_str;
- static hrtime_t stoptime;
- static time_t (*secs_till_exit)(void);
- static struct timeval start_time;
- void *usage_tracking_handle;
- enum read_type {
- NORMAL_READ,
- RETRY_READ,
- WRITE_READ,
- READ_ONLY_RAND,
- READ_ONLY_SEQ
- };
- typedef enum read_type read_type_t;
- typedef uchar_t (*initializer_t)(int buf, int i);
- static int exit_status = EXIT_SUCCESS;
- /*
- * The minimum block size that can be used. Essentially the lowest common
- * muliple of the blocksizes available.
- */
- static int min_block_size;
- struct proc_store {
- pid_t pid;
- };
- static struct proc_store *proc_store;
- /*
- * The daio_ops
- */
- static struct daio_ops *daio;
- /*
- * All the functions we have
- */
- time_t handle_read(struct aio_str *aiop, ullong_t start);
- time_t do_new_read(struct aio_str *aiop, ullong_t start, read_type_t read_type);
- time_t handle_readonly_rand(struct aio_str *aiop, ullong_t start);
- static int proc_no;
- long long convert_time(struct timeval tv);
- void update_time_stats(char off, struct times *tp, long long tyme,
- struct aio_str *aiop);
- static int pend_write_with_lock(bitmap_t map[], ullong_t off, int maplen);
- static int do_memcmp(ullong_t start, struct aio_str *aiop);
- static struct device *open_device(char *name, struct paths *, ullong_t size,
- int paths_to_use, int error_paths);
- static void check_matching_io(ullong_t start, struct aio_str *aiop);
- static int is_master(void);
- static void unwritten_block_rand(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen);
- static void unwritten_block_seq(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen);
- extern void close_and_free_paths(struct device *dev);
- extern void run_func(uchar_t *buf, size_t size);
- static struct bufhdr
- build_bufhr(struct device *dev, ullong_t start, ullong_t off);
- void newfd(struct aio_str *aiop);
- struct fds *open_path(struct device *devp, char *name, ullong_t size);
- void cancel_all_io_byfd(struct fds *fd);
- struct fds *find_path(struct fds *fdhead, char path_id);
- static int check_for_duplicate_paths(struct device *devp);
- static void do_start_cancelled_io(struct device *devices, ullong_t start);
- static int return_zero(void);
- static int return_one(void);
- /*
- * The error handling functions.
- */
- static loop_type on_error_reread(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_exit(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_stop(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_nop(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_abort(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_pause(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_retry(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_rewrite(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_fail_path(ullong_t start, struct aio_str *aiop);
- /*
- * Error handling init functions.
- */
- static int init_path_stop_check(void);
- static int init_stop_check(void);
- /*
- * the "globals" that we use
- */
- struct device *devices; /* all the devices there are */
- write_buf_initializer_t init_uchar_func;
- static read_buf_initializer_t read_buffer_initializer;
- pid_t pgrp;
- /*
- * statics
- */
- static pid_t parent_pid;
- static pid_t killer_pid;
- static on_error_t *on_error_corrupt;
- static on_error_t *on_error_short;
- static on_error_t *on_write_error;
- static int Longest_device_name = 0;
- static int Longest_logical_name = 0;
- static int write_loops;
- static int usr1_exit = 0;
- struct shm_ops *shm_ops;
- /*
- * Count of the total number of io's that are currently cancelled.
- */
- static int cancelled_count = 0;
- /*
- * Start cancelled. Only gets unset from nop if there are cancelled ios
- * to restart. A rare thing.
- */
- static void (*start_cancelled_io)(struct device *, ullong_t) =
- (void (*)(struct device *, ullong_t)) nop;
- static void (*start_deferred)(struct device *dev, ullong_t) =
- (void (*)(struct device *, ullong_t)) nop;
- static int (*stop_check)(void *handle) = (int (*)(void *))return_zero;
- static int (*path_stop_check)(struct fds *fd, struct device *dev) =
- (int (*)(struct fds *, struct device *dev))return_zero;
- static char nom[] = "no memory";
- #define NOT_NULL(A) (A == NULL ? &nom[0] : A)
- #define PLURAL(A) (A == 1 ? "" : "s")
- #define LEN_BYTES2BLOCKS(A) (A->length / INDEX_TO_DIOLEN(max_disk_io_len))
- #define TRUE_OR_FALSE(A) (A ? "true" : "false")
- struct error_handlers {
- char *name; /* String that describes this error handler */
- on_error_t func; /* error handleing function */
- int (*setup)(void); /* init routine for the error handler */
- uint_t breaker:1; /* Is this the last error handler on the list */
- uint_t rw:2; /* Does this hander apply to read or write or both */
- };
- #define READ_ERR 1
- #define WRITE_ERR (READ_ERR << 1)
- #define BOTH_ERR (READ_ERR | WRITE_ERR)
- struct error_handlers on_error_table[] = {
- {"EXIT", on_error_exit, return_one, 1, BOTH_ERR},
- {"ABORT", on_error_abort, return_one, 1, BOTH_ERR},
- {"CONTINUE", on_error_nop, return_one, 0, BOTH_ERR},
- {"NONE", on_error_nop, return_one, 0, BOTH_ERR},
- {"STOP", on_error_stop, init_stop_check, 1, BOTH_ERR},
- {"PAUSE", on_error_pause, return_one, 0, BOTH_ERR},
- {"RETRY", on_error_retry, return_one, 0, BOTH_ERR},
- {"FAIL_PATH", on_error_fail_path, init_path_stop_check, 0, BOTH_ERR},
- {"UADMIN", on_error_uadmin, uadmin_init, 1, BOTH_ERR},
- {"REREAD", on_error_reread, return_one, 0, READ_ERR},
- {"REWRITE", on_error_rewrite, return_one, 0, WRITE_ERR}
- };
- /*
- * TNF declarations.
- */
- /*
- * The DEFINE should not have explicit mentions of the daio_ZZZZ
- * elements, they should be opaque.
- */
- TNF_DEFINE_RECORD_5(aio_str_t, aio_tnf_str,
- tnf_opaque, buf, tnf_short, iolen, tnf_ulonglong, off,
- tnf_longlong, aio_res.result.daio_return,
- tnf_uint, aio_res.result.daio_errno)
- /*
- * locking functions.
- */
- static char *
- hostname(void)
- {
- static char hostname[MAXHOSTNAMELEN + 1];
- (void) sysinfo(SI_HOSTNAME, &hostname[0], MAXHOSTNAMELEN);
- return (&hostname[0]);
- }
- int
- this_proc(void)
- {
- return (proc_no);
- }
- static int
- return_one(void)
- {
- return (1);
- }
- static int
- return_zero(void)
- {
- return (0);
- }
- void
- nop(void)
- {
- }
- static void
- not_null_free(void *ptr)
- {
- if (ptr != NULL)
- free(ptr);
- }
- static time_t
- inf_secs_till_exit(void)
- {
- return (LONG_MAX);
- }
- static time_t
- do_secs_till_exit(void)
- {
- return ((stoptime - gethrtime()) / BILLION);
- }
- off64_t
- start_offset(void)
- {
- return ((off64_t)(opts.start_offset *
- (ullong_t)INDEX_TO_DIOLEN(max_disk_io_len)));
- }
- static void
- return_aio_read_buf(struct aio_str *aiop)
- {
- if (!(aiop->count % opts.expert_release_read_buffers_after_n_uses) &&
- aiop->buf != NULL) {
- return_read_buf(aiop->buf);
- aiop->buf = NULL;
- }
- }
- static int
- do_stop_check(void *handle)
- {
- return (get_shared_stop_flag(handle, this_proc()));
- }
- int
- is_readonly(void)
- {
- return (opts.o_rdonly == 1);
- }
- const char *
- rw_string(void)
- {
- return (is_readonly() ? read_str : write_str);
- }
- /*
- * background. disassociate from controlling tty make session leader
- * then fork. The parent exits and the child goes on in
- * the back ground.
- */
- static void
- background()
- {
- pid_t pid;
- (void) freopen("/dev/null", "+r", stdin);
- pid = opts.use_fork1 == 0 ? fork() : fork1();
- if (pid == 0) {
- if (setsid() == (pid_t)-1)
- pperror("setsid");
- return;
- }
- if (pid < 0) {
- FORK_ERROR(opts.use_fork1 == 0 ? "" : "s");
- exit(1);
- }
- exit(0);
- }
- static struct blks *
- aio_attach(struct aio_str *aiop)
- {
- int error_count = 0;
- struct blks *blocks;
- while ((blocks = shm_ops->attach(AIO_BLOCK_HANDLE(aiop))) ==
- NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(AIO_BLOCK_HANDLE(aiop));
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- return (blocks);
- }
- static void
- update_aio_time_stats(struct aio_str *aiop, struct times *ts)
- {
- if (aiop->count > 0) {
- ullong_t len = LEN_BYTES2BLOCKS(aiop->dev);
- update_time_stats((100 * MIN(aiop->dev->block, len))/
- ((aiop->dev->length/
- INDEX_TO_DIOLEN(max_disk_io_len))),
- ts,
- DAIO_GET_TIME_TAKEN(aiop->aio_res), aiop);
- }
- }
- static void
- update_aio_read_stats(struct aio_str *aiop)
- {
- update_aio_time_stats(aiop, &aiop->fd->read_times);
- }
- static void
- update_aio_write_stats(struct aio_str *aiop)
- {
- update_aio_time_stats(aiop, &aiop->fd->write_times);
- }
- ullong_t
- diskomizer_off2byteoff(ullong_t off)
- {
- return ((off + opts.start_offset) * INDEX_TO_DIOLEN(max_disk_io_len));
- }
- static ullong_t
- byteoff2diskomizer_off(ullong_t off)
- {
- return ((off/INDEX_TO_DIOLEN(max_disk_io_len)) - opts.start_offset);
- }
- /*
- * Sanity check.
- */
- #define ASSERT_OFFSET(X) \
- assert(byteoff2diskomizer_off(diskomizer_off2byteoff(X)) == X)
- static ullong_t
- aio_str2byteoff(struct aio_str *aiop)
- {
- return (diskomizer_off2byteoff(aiop->off));
- }
- static int64_t
- aio_str2lba(struct aio_str *aiop)
- {
- long long byteoff;
- int64_t lba;
- if (aiop->dev->v_part == NULL || aiop->dev->device_block_size == 0) {
- return (-1);
- }
- byteoff = aio_str2byteoff(aiop);
- lba = byteoff / (int64_t)aiop->dev->device_block_size;
- return (aiop->dev->v_part->p_start + lba);
- }
- static void
- plog_dd(int pri, struct aio_str *aiop)
- {
- if ((INDEX_TO_DIOLEN(max_disk_io_len) %
- INDEX_TO_DIOLEN(aiop->iolen)) == 0) {
- daio->plog_dd(pri, aiop->fd->fd, INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(max_disk_io_len),
- aio_str2byteoff(aiop));
- }
- }
- static void
- report_device(int pri, struct aio_str *aiop)
- {
- plog(pri, "Requested File %s (%s)\n",
- aiop->fd->name, aiop->dev->logicalname);
- }
- /*
- * report block.
- *
- * Report all the information about the block that was requested to be read
- */
- static void
- report_offset(int pri, struct aio_str *aiop)
- {
- long long byteoff;
- long long lba;
- byteoff = aio_str2byteoff(aiop);
- lba = aio_str2lba(aiop);
- if (lba >= 0) {
- plog(pri,
- "Requested File offset 0t%lld (0x%llx), block size "
- "0t%d (0x%x), LBA 0t%lld (0x%llx)\n",
- byteoff, byteoff, INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen), lba, lba);
- } else {
- plog(pri, "Requested File offset 0t%lld (0x%llx), block size "
- "0t%d (0x%x)\n", byteoff, byteoff,
- INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen));
- }
- }
- static void
- report_device_and_offset(int pri, struct aio_str *aiop)
- {
- report_device(pri, aiop);
- report_offset(pri, aiop);
- }
- /*
- * report_error. This is the generic error reporting routine.
- * It reports all errors to stderr, giving similar information
- * and advise as to other commands that can be tried.
- */
- void
- report_error(struct aio_str *aiop, const union err_info u, err_type error)
- {
- struct timeval now_tv;
- long long disk_block;
- int pri;
- while (my_gettimeofday(&now_tv, NULL) == -1)
- pperror("gettimeofday");
- disk_block = aio_str2byteoff(aiop);
- mutex->stderr_enter();
- if (error == ERR_HUNG) {
- pri = LOG_WARNING;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- report_device(pri, aiop);
- if (is_readonly()) {
- plog(pri, "%s has %ld out of %ld read%s\n",
- aiop->dev->logicalname,
- aiop->fd->number_of_hung_read,
- aiop->fd->total_read,
- aiop->fd->number_of_hung_read == 1 ? "" : "s");
- } else {
- plog(pri, "%s has %ld out of %ld read%s and %ld "
- "out of %ld write%s\n",
- aiop->dev->logicalname,
- aiop->fd->number_of_hung_read,
- aiop->fd->total_read,
- aiop->fd->number_of_hung_read == 1 ? "" : "s",
- aiop->fd->number_of_hung_write,
- aiop->fd->total_write,
- aiop->fd->number_of_hung_write == 1 ? "" : "s");
- }
- plog(pri, "waiting for more than %ld second%s\n",
- u.time, PLURAL(u.time));
- if (is_readonly()) {
- plog(pri, "Last read took %lld\n",
- aiop->fd->last_read_time/BILLION);
- } else {
- plog(pri, "Last read took %lld, last write took "
- "%lld seconds\n",
- aiop->fd->last_read_time/BILLION,
- aiop->fd->last_write_time/BILLION);
- }
- plog(pri,
- "oldest i/o is a %s waiting for %ld second%s\n",
- is_read_io(aiop) ? "read" : "write",
- now_tv.tv_sec - aiop->tv.tv_sec,
- ((now_tv.tv_sec - aiop->tv.tv_sec) > 1) ? "s" : "");
- } else if (error == ERR_CORRUPT) {
- time_t request_time;
- time_t return_time;
- int read_count;
- struct blks *blocks;
- struct fds *fd;
- pri = LOG_ERR;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- report_device_and_offset(pri, aiop);
- report_error_desc(pri, aiop, u.str);
- blocks = aio_attach(aiop);
- fd = find_path(aiop->dev->fdhead,
- blocks[AIO_BLOCK_INDEX(aiop)].path_id);
- assert(fd != NULL);
- read_count = blocks[AIO_BLOCK_INDEX(aiop)].read_count;
- request_time = blocks[AIO_BLOCK_INDEX(aiop)].last_requested;
- return_time = request_time + GET_LAST_RETURN(
- blocks[AIO_BLOCK_INDEX(aiop)].last_returned_delta);
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- time_log(pri, blocks[AIO_BLOCK_INDEX(aiop)].last_requested,
- "Last %s to the requested block submitted", rw_string());
- time_log(pri, return_time,
- "Last %s to the requested block returned", rw_string());
- plog(pri, "Last %s to the requested block used path: %s\n",
- rw_string(), fd->name);
- if (!is_readonly()) {
- plog(pri, "Requested block has been read %d times "
- "since last written\n", read_count);
- }
- decode_errors(pri, aiop, read_count);
- } else if (error == ERR_DEFERRED) {
- pri = LOG_WARNING;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- plog(pri, "%s to device %s deferred\n",
- u.str, aiop->dev->logicalname);
- } else if (DAIO_RETURN(aiop->aio_res) < 0) {
- char *datestr;
- pri = LOG_ERR;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- (void) plog(pri, "%s %s%s error, errno %d %s\n",
- aiop->fd->name, u.str,
- aiop->retrycnt > 0 ? " retry" : "",
- DAIO_ERROR(aiop->aio_res),
- strerror(DAIO_ERROR(aiop->aio_res)));
- datestr = alloc_time_str_fmt(aiop->tv.tv_sec, "%b %e %H:%M");
- if (datestr != NULL) {
- plog(pri, "Try \"egrep '^%s.*%s' "
- "/var/adm/messages\"\n",
- datestr, hostname());
- free(datestr);
- }
- } else {
- pri = LOG_WARNING;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- plog(pri, "%s short %s%s, Transferred %ld (%#lx)"
- " bytes, requested %d (%#x) bytes.\n",
- aiop->fd->name, u.str,
- aiop->retrycnt > 0 ? " retry" : "",
- (long)DAIO_RETURN(aiop->aio_res),
- (long)DAIO_RETURN(aiop->aio_res),
- INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen));
- }
- dlog(pri, "Block at byte offset 0t%lld (%#llx) block size %d (%#x)\n",
- disk_block, disk_block, INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen));
- plog_dd(pri, aiop);
- (void) fflush(stderr);
- (void) fsync(fileno(stderr));
- mutex->stderr_exit();
- }
- ulong_t
- my_lrand(void)
- {
- union {
- ulong_t l;
- uint32_t i[sizeof (ulong_t)/sizeof (uint32_t)];
- } u;
- int i;
- #ifdef __lint
- ZERO_OBJ(u);
- #endif
- for (i = 0; i < (sizeof (ulong_t)/sizeof (uint32_t)); i++)
- u.i[i] = (uint32_t)lrand48();
- return (u.l);
- }
- static void
- remove_from_all_aios(struct aio_str *aiop)
- {
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- }
- static void
- infantacide(void)
- {
- (void) killpg(pgrp, SIGTERM);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_exit(ullong_t start, struct aio_str *aiop)
- {
- union err_info err_info;
- err_info.str = "On error exit";
- DAIO_SET_RETURN(aiop->aio_res, 0);
- report_error(aiop, err_info, ERR_SYS);
- remove_from_all_aios(aiop);
- (void) sighold(SIGTERM);
- if (incr_shared_device_error(aiop->dev->shared_data_handle,
- aiop->dev->errors) != -1) {
- aiop->dev->errors = 0;
- }
- exit_status = EXIT_FAILURE;
- exit(exit_status);
- /*NOTREACHED*/
- return (BREAK);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_stop(ullong_t start, struct aio_str *aiop)
- {
- pfprintf(stderr, "%s Set On error stop\n", aiop->fd->name);
- if (set_shared_stop_flag(aiop->dev->shared_data_handle) == -1)
- aiop->dev->need_to_stop = 1;
- aiop->dev->stop_flag = 1;
- return (BREAK);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_nop(ullong_t start, struct aio_str *aiop)
- {
- if (is_write_io(aiop)) {
- struct blks *blocks;
- /*
- * Clear the last io as the retries never suceeded
- * so we don't read this block which is now in an
- * undefined state.
- */
- blocks = aio_attach(aiop);
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io = NULL;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- }
- pfprintf(stderr, "%s On error continue\n", aiop->fd->name);
- return (BREAK);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_abort(ullong_t start, struct aio_str *aiop)
- {
- union err_info err_info;
- err_info.str = "On error abort";
- report_error(aiop, err_info, ERR_SYS);
- /* pfprintf(stderr, "On error abort\n"); */
- (void) sighold(SIGTERM);
- if (incr_shared_device_error(aiop->dev->shared_data_handle,
- aiop->dev->errors) != -1) {
- aiop->dev->errors = 0;
- }
- abort(); /* On error abort. This one is o.k. */
- return (BREAK);
- }
- /*
- * report_hangers_fd. counts the number of I/O requests that
- * have been waiting for more than hanger_time seconds and then
- * calls report_error() with the i/o that has been waiting the
- * longest and a count of the number of i/o requests that are
- * over time. It only calls report_error() when the number of i/o
- * requests or the oldest outstanding i/o change or if the last
- * report was more than hanger_time seconds ago and there are some
- * i/o hung.
- */
- static int
- report_hangers_fd(struct fds *fd, time_t tyme, time_t hanger_time)
- {
- int total_hung_read = 0;
- int total_hung_write = 0;
- #ifdef IO_COUNT_DEBUG
- int total_read = 0;
- int total_write = 0;
- #endif
- struct aio_str *aiop;
- union err_info err_info;
- if (fd->error_path != 0 || fd->stop_flag != 0) {
- return (0);
- }
- /* fd->total_read = fd->total_write = 0; */
- err_info.time = hanger_time;
- for (aiop = fd->all_aios.head; aiop != NULL; aiop = aiop->next) {
- if (tyme - aiop->tv.tv_sec > hanger_time) {
- if (is_read_io(aiop)) {
- total_hung_read++;
- } else {
- total_hung_write++;
- }
- } else {
- break;
- }
- }
- #ifdef IO_COUNT_DEBUG
- assert(total_read == fd->total_read);
- assert(total_write == fd->total_write);
- #endif
- if (fd->oldest_io == NULL) {
- fd->oldest_io = fd->all_aios.head;
- }
- if (total_hung_read != fd->number_of_hung_read ||
- total_hung_write != fd->number_of_hung_write ||
- (fd->all_aios.head != fd->oldest_io &&
- (total_hung_read || total_hung_write))) {
- fd->number_of_hung_read = total_hung_read;
- fd->number_of_hung_write = total_hung_write;
- report_error(fd->all_aios.head, err_info, ERR_HUNG);
- fd->last_report = tyme;
- fd->oldest_io = fd->all_aios.head;
- } else if (total_hung_read + total_hung_write > 0 &&
- fd->all_aios.head != NULL && fd->last_report + hanger_time < tyme) {
- report_error(fd->all_aios.head, err_info, ERR_HUNG);
- fd->last_report = tyme;
- }
- return (total_hung_read + total_hung_write);
- }
- /*
- * Search the list of i/o that are currently outstanding and report
- * on any that have been outstanding for more than hanger_time.
- * Also display howlong the oldest i/o has been Waiting for and when
- * it was submitted.
- */
- static void
- report_hangers(struct device *dev, time_t tyme, time_t hanger_time)
- {
- int total = 0;
- struct fds *fd;
- for (fd = dev->fdhead; ; fd = fd->next) {
- total += report_hangers_fd(fd, tyme, hanger_time);
- if (dev->fdhead == fd->next)
- break;
- }
- }
- static void
- report_all_hangers(struct device *dev, time_t hanger_time)
- {
- struct timeval tv;
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- for (; dev != NULL; dev = dev->next) {
- report_hangers(dev, tv.tv_sec, hanger_time);
- }
- }
- char *
- my_strdup(const char *s)
- {
- char *x = strdup(s);
- if (x == NULL) {
- STRDUP_ERROR(s);
- }
- return (x);
- }
- void *
- my_calloc(long a, long b)
- {
- void *x;
- x = calloc(a, b);
- if (x == NULL) {
- CALLOC_ERROR(a, b);
- return (NULL);
- }
- return (x);
- }
- static void
- add_to_all_aios(struct aio_str *aiop)
- {
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- }
- static int
- init_read(struct aio_str aio[], ullong_t start)
- {
- struct aio_str *aiop;
- aiop = my_calloc(1, sizeof (struct aio_str));
- if (aiop == NULL) {
- pfprintf(stderr, "init_read, can't allocate memory\n");
- return (0);
- }
- aiop->buf = NULL;
- aiop->off = 0;
- aiop->handler = is_readonly() ? handle_readonly_rand : handle_read;
- aiop->dev = aio->dev;
- aiop->fd = aio->dev->fdhead;
- aio->dev->fdhead = aio->dev->fdhead->next;
- add_to_all_aios(aiop);
- (void) do_new_read(aiop, start,
- is_readonly() ? READ_ONLY_RAND : NORMAL_READ);
- return (1);
- }
- static int
- has_no_unwritten(struct aio_str *aiop)
- {
- return (aiop->dev->unwritten == NULL);
- }
- static void
- push_unwritten(struct aio_str *aiop)
- {
- struct offset_list *new;
- if ((new = calloc(1, sizeof (struct offset_list))) == NULL) {
- CALLOC_ERROR(1, sizeof (struct offset_list));
- aiop->dev->failed_to_push_unwritten = 1;
- } else {
- plog(LOG_DEBUG,
- "Block %#llx (0t%lld) %s pushed onto unwritten queue\n",
- aiop->off, aiop->off, aiop->dev->logicalname);
- new->offset = aiop->off;
- new->next = aiop->dev->unwritten;
- aiop->dev->unwritten = new;
- aiop->dev->choose_block = unwritten_block_seq;
- }
- }
- static int
- find_unwritten(bitmap_t *map, struct aio_str *aiop, int maplen)
- {
- struct offset_list *u, *p;
- int status = 0;
- p = NULL;
- for (u = aiop->dev->unwritten; u != NULL; u = u->next) {
- if (pend_write_with_lock(map, u->offset, maplen) == 0) {
- if (p == NULL)
- aiop->dev->unwritten = u->next;
- else
- p->next = u->next;
- aiop->off = u->offset;
- plog(LOG_DEBUG,
- "Block %#llx (0t%lld) %s locked and removed "
- "from unwritten queue\n",
- aiop->off, aiop->off, aiop->dev->logicalname);
- free(u);
- status = 1;
- break;
- }
- p = u;
- }
- return (status);
- }
- static int
- is_unwritten(struct aio_str *aiop)
- {
- struct offset_list *u;
- for (u = aiop->dev->unwritten; u != NULL; u = u->next) {
- if (u->offset == aiop->off)
- return (1);
- }
- return (0);
- }
- /*
- * randomish_block
- * return a random block to try to do io too or from. If we
- * are short of memory the block is less random to try and
- * decrease the number of attach/detach pairs that actually
- * result in system calls, and therefore reduce the number of
- * faults.
- *
- * In particular when short of memory the next io will tend to
- * be in the same block ob blks structures or the next block
- * for odd numbered processes and the previous block for even
- * numbered processes. The overall effect is still close to
- * random at the device, but individual processes thrash less.
- */
- ulong_t
- randomish_block(struct aio_str *aiop)
- {
- ulong_t t;
- if (shm_ops->is_short_of_mem()) {
- t = aiop->off + ((my_lrand() % (shm_ops->max_size() /
- sizeof (struct blks))) * this_proc() % 2 ? -1 : 1);
- } else {
- t = my_lrand();
- }
- return (t);
- }
- /*ARGSUSED2*/
- void
- rand_block(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- ulong_t t;
- ullong_t *next_io_blk_ptr;
- if (is_write_io(aiop)) {
- next_io_blk_ptr = &aiop->dev->next_write_blk;
- if (!(aiop->dev->next_write_blk %
- opts.expert_write_cluster_length)) {
- t = randomish_block(aiop);
- t = t - (t % opts.expert_write_cluster_length);
- } else {
- TNF_PROBE_1(cluster_write, "rand_block",
- "sunw%cte%diskomizer%blocks write cluster",
- tnf_ulonglong, next_read_blk,
- aiop->dev->next_read_blk);
- t = *next_io_blk_ptr;
- }
- } else {
- next_io_blk_ptr = &aiop->dev->next_read_blk;
- if (!(aiop->dev->next_read_blk %
- opts.expert_read_cluster_length)) {
- t = randomish_block(aiop);
- t = t - (t % opts.expert_read_cluster_length);
- } else {
- TNF_PROBE_1(cluster_read, "rand_block",
- "sunw%cte%diskomizer%blocks read cluster",
- tnf_ulonglong, next_read_blk,
- aiop->dev->next_read_blk);
- t = *next_io_blk_ptr;
- }
- }
- aiop->off = (t)%(len);
- aiop->off = find_next_free(map, aiop->off, len, maplen);
- if (next_io_blk_ptr != NULL)
- *next_io_blk_ptr = aiop->off + 1;
- assert(aiop->off < len);
- }
- static void
- unwritten_block(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen, choose_block_t chooser)
- {
- plog(LOG_DEBUG, "in unwritten_block %llx\n",
- aiop->dev->unwritten == NULL ?
- 0LL : aiop->dev->unwritten->offset);
- if (find_unwritten(map, aiop, maplen)) {
- struct blks *blocks;
- if (has_no_unwritten(aiop))
- aiop->dev->choose_block = chooser;
- blocks = aio_attach(aiop);
- blocks[AIO_BLOCK_INDEX(aiop)].u.was_unwritten = 1;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- } else {
- chooser(map, aiop, start, len, maplen);
- }
- }
- /*
- * find the "next" block to read for this aio_str. Increment by
- * by the number of processes, so that when initializing the disk
- * each block only gets written once.
- */
- static void
- seq_block(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- int does_not_have_lock;
- assert((aiop->dev->block % opts.nprocs) == this_proc() ||
- aiop->dev->block == len);
- aiop->off = aiop->dev->block;
- if (aiop->dev->block >= len) {
- does_not_have_lock = 1;
- } else while ((does_not_have_lock =
- pend_write_with_lock(map, aiop->off, maplen)) != 0) {
- /*
- * Only push blocks that this process would have to write
- * onto the unwritten queue. This only effects the last
- * block on the device.
- */
- if ((aiop->off % opts.nprocs) == this_proc()) {
- push_unwritten(aiop);
- }
- aiop->dev->block += opts.nprocs;
- aiop->off = aiop->dev->block % len;
- if (aiop->dev->block >= len) {
- aiop->dev->block = len;
- break;
- }
- }
- if (does_not_have_lock != 0) {
- if (aiop->dev->seq_passes == 0 ||
- --aiop->dev->seq_passes == 0) {
- time_now_log(LOG_NOTICE,
- "Finished sequential %ss on %s count %d",
- is_readonly() ? read_str : write_str,
- aiop->dev->logicalname,
- aiop->count);
- if (has_no_unwritten(aiop)) {
- aiop->dev->choose_block = rand_block;
- } else {
- aiop->dev->choose_block = unwritten_block_rand;
- }
- } else {
- aiop->off = aiop->dev->block = this_proc();
- time_now_log(LOG_NOTICE,
- "Starting sequential series again on %s counts %d",
- aiop->dev->logicalname, aiop->count);
- }
- aiop->dev->choose_block(map, aiop, start, len, maplen);
- } else {
- assert(!does_not_have_lock);
- ASSERT_OFFSET(aiop->dev->block);
- aiop->dev->block += opts.nprocs;
- if (aiop->dev->block >= len) {
- aiop->dev->block = len;
- }
- }
- }
- static void
- unwritten_block_seq(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- unwritten_block(map, aiop, start, len, maplen, seq_block);
- }
- static void
- unwritten_block_rand(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- unwritten_block(map, aiop, start, len, maplen, rand_block);
- }
- /*
- * I leave the source as this _may_ be useful in the future.
- */
- #ifdef NOT_USED_CODE
- static char
- set_write(bitmap_t map[], ullong_t off, int maplen)
- {
- ulong_t tmp = GET_OFF(off) % maplen;
- char status;
- mutex->enter(tmp);
- if (map[tmp] & GET_BIT(off)) {
- /* we are already locked */
- status = 0;
- } else {
- map[tmp] |= GET_BIT(off);
- status = 1;
- }
- mutex->exit(tmp);
- TNF_PROBE_3(set_write, "set_write", "sunw%cte%diskomizer",
- tnf_opaque, off, off,
- tnf_opaque, map, map,
- tnf_char, status, status);
- return (status);
- }
- #endif
- void
- clear_write(bitmap_t map[], ullong_t off, ulong_t maplen)
- {
- ulong_t tmp = (GET_OFF(off) % maplen);
- ulong_t x;
- bitmap_t bit = ~(GET_BIT(off));
- mutex->enter(tmp);
- x = map[tmp];
- map[tmp] &= bit;
- assert(~bit != (ulong_t)0);
- if (x == map[tmp]) {
- plog(LOG_ALERT, "Ooops block %#llx (0t%lld) was not locked\n",
- diskomizer_off2byteoff(off), diskomizer_off2byteoff(off));
- TNF_PROBE_2(clear_write, "clear_write failed",
- "sunw%cte%diskomizer",
- tnf_opaque, off, off, tnf_opaque, map, map);
- } else {
- TNF_PROBE_2(clear_write, "clear_write ok",
- "sunw%cte%diskomizer",
- tnf_opaque, off, off,
- tnf_opaque, map, map);
- }
- mutex->exit(tmp);
- }
- #ifdef NOT_USED_CODE
- static void
- print_bitmap(bitmap_t map[], int maplen)
- {
- int i;
- for (i = 0; i < maplen; i++)
- pprintf("%#8.8X %#8.8X\n", i, map[i]);
- (void) fflush(stdout);
- }
- #endif
- /*
- * find_next_free finds the "next" block that is not locked starting from
- * offset.
- */
- ullong_t
- find_next_free(bitmap_t map[], ullong_t off, int len, int maplen)
- {
- ulong_t tmp = (GET_OFF(off) % maplen);
- ulong_t i = 0;
- bitmap_t bit = GET_BIT(off);
- mutex->enter(tmp);
- while ((map[tmp] & bit) != 0) {
- ulong_t newtmp;
- off = off + 1;
- off %= len;
- newtmp = (GET_OFF(off) % maplen);
- bit = GET_BIT(off);
- mutex->getnext(tmp, newtmp);
- tmp = newtmp;
- TNF_PROBE_2(find_next_free_trying, "find_next_free trying",
- "sunw%cte%diskomizer", tnf_longlong, off, off,
- tnf_opaque, map, map);
- if (!(i < (4 * len))) {
- (void) plog(LOG_ALERT,
- "Unable to find free entry in map %#lx"
- " of length %d\n",
- (ulong_t)&map[0], maplen);
- (void) fflush(stderr);
- /* print_bitmap(map, maplen); */
- mutex->exit(tmp);
- (void) sleep(1);
- mutex->enter(tmp);
- i = 0;
- }
- i++;
- }
- map[tmp] |= GET_BIT(off);
- mutex->exit(tmp);
- TNF_PROBE_2(find_next_free_found, "find_next_free found",
- "sunw%cte%diskomizer", tnf_longlong, off, off,
- tnf_opaque, map, map);
- return (off);
- }
- /*
- * Test to see if the write bit is set for this offset. The lock MUST
- * already be held
- */
- static int
- test_write(bitmap_t map[], ullong_t off, int maplen)
- {
- ulong_t tmp = GET_OFF(off) % maplen;
- return (map[tmp] & GET_BIT(off) ? 1 : 0);
- }
- /*
- * If this block is being read from or written to return true
- * Otherwise return lock it and return.
- */
- static int
- pend_write_with_lock(bitmap_t map[], ullong_t off, int maplen)
- {
- ulong_t tmp = GET_OFF(off) % maplen;
- int status;
- mutex->enter(tmp);
- plog(LOG_DEBUG, "Disk Block %lld\n", diskomizer_off2byteoff(off));
- if (map[tmp] & GET_BIT(off)) {
- status = 1;
- } else {
- map[tmp] |= GET_BIT(off);
- status = 0;
- }
- mutex->exit(tmp);
- return (status);
- }
- static uchar_t
- choose_iolen(struct aio_str *aiop)
- {
- if (is_executable(aiop->buf)) {
- return (max_disk_io_len);
- }
- return (opts.disk_io_sizes.weightings[lrand48() %
- opts.disk_io_sizes.wlen]);
- }
- static void
- init_read_buf(uchar_t *buf, ulong_t len, const uchar_t * const write_buf)
- {
- void *sig = expect_signal(SIGBUS, "memset", buf, len);
- read_buffer_initializer(buf, len, write_buf);
- cancel_expected_signal(SIGBUS, sig);
- }
- static bitmap_t *
- attach_dev_writemap(struct device *dev)
- {
- bitmap_t *map;
- int error_count = 0;
- while ((map = (bitmap_t *)
- shm_ops->attach(dev->writemap_handle)) == NULL) {
- if ((error_count++ % 10000) == 0)
- ATTACH_ERROR(dev->writemap_handle);
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- return (map);
- }
- static bitmap_t *
- attach_aio_writemap(struct aio_str *aiop)
- {
- return (attach_dev_writemap(aiop->dev));
- }
- static void
- clear_writemap(struct aio_str *aiop)
- {
- bitmap_t *map = attach_aio_writemap(aiop);
- clear_write(map, aiop->off, aiop->dev->writemap_size);
- shm_ops->detach(aiop->dev->writemap_handle);
- }
- static void
- clear_writemap_success(struct aio_str *aiop)
- {
- aiop->off = push_recent(aiop->dev->recent, aiop->off);
- if (aiop->off != -1) {
- clear_writemap(aiop);
- }
- }
- static struct blks *
- choose_new_random_read(struct aio_str *aiop, ullong_t start, ullong_t len)
- {
- struct blks *blocks;
- bitmap_t *map;
- if ((aiop->off = pop_recent(aiop->dev->recent)) != -1) {
- return (aio_attach(aiop));
- }
- map = attach_aio_writemap(aiop);
- aiop->retrycnt = 0;
- for (;;) {
- rand_block(map, aiop, start, len,
- aiop->dev->writemap_size);
- blocks = aio_attach(aiop);
- if (is_readonly()) {
- if (0x1 & (uint_t)
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_io)
- break;
- } else {
- if (blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io != NULL)
- break;
- }
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- blocks = NULL;
- clear_write(map, aiop->off, aiop->dev->writemap_size);
- }
- shm_ops->detach(aiop->dev->writemap_handle);
- return (blocks);
- }
- /*
- * do a new read.
- */
- time_t
- do_new_read(struct aio_str *aiop, ullong_t start, read_type_t read_type)
- {
- ullong_t offset;
- int fd = aiop->fd->fd;
- ullong_t len;
- struct blks *blocks = NULL;
- /*
- * if opts.sequential_passes is equal to seq_passes then we are on the
- * first pass or opts.sequential_passes was zero to start with. In
- * the second case once the disk is fill aip->dev->block will contain
- * the address of the last block anyway.
- */
- if (aiop->dev->seq_passes == opts.sequential_passes) {
- len = aiop->dev->block;
- } else {
- len = LEN_BYTES2BLOCKS(aiop->dev);
- }
- if (read_type != RETRY_READ &&
- OPTION(nloops) != 0 && aiop->dev->countdown != 0) {
- if (--aiop->dev->countdown == 0) {
- time_now_log(LOG_INFO, "countdown on device %s is zero",
- aiop->dev->logicalname);
- }
- }
- if (read_type == NORMAL_READ) {
- struct shadow_hdr const *shadow;
- blocks = choose_new_random_read(aiop, start, len);
- if (aiop->buf == NULL)
- aiop->buf = get_read_buf();
- aiop->iolen = blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_iolen;
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- aiop->daio_id.buf = blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io;
- aiop->hdr = build_bufhr(aiop->dev, start, aiop->off);
- shadow = get_shadow_hdr(aiop->daio_id.buf);
- aiop->daio_id.chksum = shadow->chksums[aiop->iolen];
- aiop->daio_id.buf_id = get_write_buf_id(
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io);
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- } else if (read_type == RETRY_READ) {
- aiop->retrycnt++;
- } else if (read_type == WRITE_READ) {
- struct shadow_hdr const *shadow;
- blocks = aio_attach(aiop);
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- aiop->daio_id.buf = blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io;
- aiop->daio_id.buf_id = get_write_buf_id(
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io);
- shadow = get_shadow_hdr(aiop->daio_id.buf);
- aiop->daio_id.chksum = shadow->chksums[aiop->iolen];
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- } else if (read_type == READ_ONLY_RAND) {
- if (aiop->buf == NULL)
- aiop->buf = get_read_buf();
- blocks = choose_new_random_read(aiop, start, len);
- aiop->daio_id.buf =
- BIT2CHARSTAR(blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_io);
- aiop->daio_id.chksum =
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_chksum;
- aiop->iolen = max_disk_io_len;
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- } else if (read_type == READ_ONLY_SEQ) {
- bitmap_t *map;
- if (aiop->buf == NULL)
- aiop->buf = get_read_buf();
- map = attach_aio_writemap(aiop);
- len = LEN_BYTES2BLOCKS(aiop->dev);
- aiop->dev->choose_block(map, aiop, start, len,
- aiop->dev->writemap_size);
- shm_ops->detach(aiop->dev->writemap_handle);
- blocks = aio_attach(aiop);
- aiop->daio_id.buf =
- BIT2CHARSTAR(blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_io);
- aiop->daio_id.chksum =
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_chksum;
- aiop->iolen = max_disk_io_len;
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- }
- offset = aio_str2byteoff(aiop);
- while (my_gettimeofday(&aiop->tv, NULL) == -1)
- pperror("gettimeofday");
- if (blocks == NULL)
- blocks = aio_attach(aiop);
- init_read_buf(aiop->buf, INDEX_TO_DIOLEN(max_disk_io_len),
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io);
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- for (;;) {
- if (aiop->dev->stop_flag ||
- stop_check(aiop->dev->shared_data_handle)) {
- if (aiop->dev->stop_flag == 0) {
- plog(LOG_NOTICE, "Stopping %s\n",
- aiop->dev->logicalname);
- aiop->dev->stop_flag = 1;
- } else if (aiop->dev->need_to_stop &&
- set_shared_stop_flag(
- aiop->dev->shared_data_handle) != -1) {
- aiop->dev->need_to_stop = 0;
- }
- clear_writemap(aiop);
- return_read_buf(aiop->buf);
- aiop->buf = NULL;
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- break;
- }
- ZERO_OBJ(aiop->error.desc);
- /* Move to the begining of the all_aios list */
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- TNF_PROBE_4(aioread, "aioread",
- "sunw%cte%diskomizer%aio read",
- tnf_long, fd, aiop->fd->fd,
- tnf_opaque, offset, offset,
- tnf_opaque, aiop, aiop,
- aio_tnf_str, *aiop, aiop);
- if (daio->aread(fd, aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen), offset,
- &aiop->aio_res, &aiop->daio_id) < 0) {
- if (errno == EAGAIN) {
- AIOREAD_ERROR(fd, aiop->fd->name,
- aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen), offset,
- SEEK_SET, &aiop->aio_res);
- continue;
- } else {
- AIOREAD_ERROR(fd, aiop->fd->name, aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen), offset,
- SEEK_SET, &aiop->aio_res);
- clear_writemap(aiop);
- }
- }
- aiop->fd->total_read++;
- break;
- }
- return (aiop->tv.tv_sec);
- }
- static struct shadow_hdr const *
- set_io_len(struct aio_str *aiop)
- {
- struct shadow_hdr const *shadow_hdr = get_shadow_hdr(aiop->buf);
- if (!shadow_hdr->type.BUF_READY) {
- struct shadow_hdr *shadow;
- int j;
- shadow = (struct shadow_hdr *)shadow_hdr;
- init_buf(aiop->buf);
- for (j = 0; j <= opts.disk_io_sizes.weightings[
- opts.disk_io_sizes.wlen - 1]; j++) {
- shadow->chksums[j] =
- check_bufbody(aiop->buf,
- INDEX_TO_DIOLEN(j));
- }
- shadow->type = get_bufhdr_a(aiop->buf).type;
- shadow->type.BUF_READY = 1;
- if (opts.obscure_execute && is_executable(aiop->buf)) {
- run_func(aiop->buf,
- opts.disk_io_sizes.vals[aiop->iolen] -
- SIZEOF_BUFHDR);
- }
- aiop->iolen = choose_iolen(aiop);
- } else if (shadow_hdr->type.BUF_READ_ONLY) {
- aiop->iolen = max_disk_io_len;
- } else {
- assert(*aiop->buf == 0xAA || *aiop->buf == 0x55);
- aiop->iolen = choose_iolen(aiop);
- }
- return (shadow_hdr);
- }
- static int
- is_sequential(struct aio_str *aiop)
- {
- return (aiop->dev->choose_block == seq_block ||
- aiop->dev->choose_block == unwritten_block_seq);
- }
- static struct aio_str *
- get_deferred_io(struct device *dev)
- {
- struct device *devp;
- struct aio_str *aiop;
- for (devp = dev; devp != NULL; devp = devp->next) {
- if ((aiop = pop_from_aio_list(&devp->deferred_ios)) != NULL) {
- return (aiop);
- }
- }
- return (NULL);
- }
- static void
- deferred_starter(struct device *dev, ullong_t start)
- {
- int all_going = 1;
- struct device *devp;
- for (devp = dev; devp != NULL; devp = devp->next) {
- struct aio_str *aiop;
- check_exit_flag();
- aiop = pop_from_aio_list(&devp->deferred_ios);
- if (aiop != NULL) {
- cancelled_count--;
- aiop->handler(aiop, start);
- }
- if (is_aio_on_list(&devp->deferred_ios)) {
- all_going = 0;
- }
- }
- if (all_going == 1) {
- start_deferred = (void (*)(struct device *, ullong_t)) nop;
- }
- }
- static int
- number_of_writes(struct device *dev)
- {
- struct fds *fd = dev->fdhead;
- int count = 0;
- do {
- count += fd->total_write;
- fd = fd->next;
- } while (fd != dev->fdhead);
- return (count);
- }
- /* static void */
- void
- do_new_write(struct aio_str *aiop, ullong_t start, int retry)
- {
- ullong_t len;
- ullong_t offset;
- long writemap_size;
- struct shadow_hdr const *shadow_hdr;
- struct blks *blocks;
- struct blks *block;
- char deferred;
- if (aiop->fd == NULL) {
- aiop->fd = aiop->dev->fdhead;
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- deferred = 1;
- } else {
- deferred = 0;
- }
- len = LEN_BYTES2BLOCKS(aiop->dev);
- writemap_size = aiop->dev->writemap_size;
- if (!retry) {
- bitmap_t *map;
- int i;
- if (write_loops) {
- if (--aiop->dev->countdown == 0) {
- time_now_log(LOG_INFO,
- "countdown on device %s is zero",
- aiop->dev->logicalname);
- }
- }
- if (aiop->buf == NULL) {
- aiop->buf = get_write_buf();
- }
- shadow_hdr = set_io_len(aiop);
- map = attach_aio_writemap(aiop);
- for (i = 0; /* cstyle */; i++) {
- aiop->dev->choose_block(map, aiop, start, len,
- writemap_size);
- blocks = aio_attach(aiop);
- block = &blocks[AIO_BLOCK_INDEX(aiop)];
- if (block->r.w.last_io == NULL ||
- (block->read_count >= OPTION(read_minimum) &&
- block->r.w.last_io != aiop->buf)) {
- break;
- } else {
- if (block->r.w.last_io == aiop->buf) {
- uchar_t *buf;
- /*
- * Get the new write buf first so that
- * You definitely get a new buffer.
- */
- if ((buf = get_write_buf()) != NULL) {
- return_write_buf(aiop->buf);
- aiop->buf = buf;
- shadow_hdr = set_io_len(aiop);
- break;
- }
- }
- clear_write(map, aiop->off, writemap_size);
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- block = blocks = NULL;
- if (i * OPTION(obscure_search_multiplier) >=
- len || deferred) {
- if (!deferred) {
- union err_info err_info;
- err_info.str = "write";
- report_error(aiop, err_info,
- ERR_DEFERRED);
- }
- remove_from_aio_list(
- &aiop->fd->all_aios, aiop);
- aiop->fd = NULL;
- add_to_aio_list(
- &aiop->dev->deferred_ios, aiop);
- return_write_buf(aiop->buf);
- aiop->buf = NULL;
- shm_ops->detach(
- aiop->dev->writemap_handle);
- if (number_of_writes(aiop->dev) == 0) {
- start_deferred =
- deferred_starter;
- }
- return;
- }
- }
- }
- aiop->retrycnt = 0;
- shm_ops->detach(aiop->dev->writemap_handle);
- } else {
- /* if we are retrying then we already have the lock. */
- ullong_t blockno = aio_str2byteoff(aiop);
- shadow_hdr = get_shadow_hdr(aiop->buf);
- if (retry == 1) {
- pfprintf(stderr,
- "%s Block 0t%lld (%#llx) retry count %d\n",
- aiop->fd->name, blockno, blockno,
- ++aiop->retrycnt);
- }
- blocks = aio_attach(aiop);
- block = &blocks[AIO_BLOCK_INDEX(aiop)];
- }
- offset = (ullong_t)start + (INDEX_TO_DIOLEN(max_disk_io_len)*aiop->off);
- assert((ullong_t)offset >= (ullong_t)start);
- assert((ullong_t)offset <= (ullong_t)(start + aiop->dev->length -
- INDEX_TO_DIOLEN(max_disk_io_len)));
- while (my_gettimeofday(&aiop->tv, NULL) == -1)
- pperror("gettimeofday");
- if (shadow_hdr->type.BUF_READ_ONLY == 0) {
- /*
- * Set up the buffer header and store away the path_id of the
- * path we are using, and the header checksum.
- */
- unprotect_buf(aiop->buf);
- toggle_bufhdr(aiop->buf);
- set_bufhdr_all(aiop->buf, shadow_hdr->chksums[aiop->iolen],
- INDEX_TO_DIOLEN(aiop->iolen),
- aiop->fd->devid, offset, shadow_hdr->type,
- ++block->sequence,
- aiop->tv.tv_sec);
- block->path_id = aiop->fd->path_id;
- block->hdrchksum =
- set_buf_hdrchksum(aiop->buf);
- protect_buf(aiop->buf);
- } else {
- block->hdrchksum = check_bufhdr(aiop->buf,
- get_bufhdr_hdrchksum(aiop->buf));
- }
- if (get_bufhdr_hdrchksum(aiop->buf) != block->hdrchksum) {
- pfprintf(stderr, "writing bad checksum buf %#lx\n",
- (ulong_t)aiop->buf);
- }
- if (opts.obscure_execute && is_executable(aiop->buf)) {
- plog(LOG_DEBUG, "Writing executable buffer\n");
- }
- /*
- * This if is saying that this process should have initialized
- * this block, during the sequential part of the run. So last_io
- * should be set. If not then something went wrong.
- *
- * The block could also been skipped as it was busy and put on
- * the unwritten list, so only check if the unwritten list is
- * empty.
- */
- if (!is_sequential(aiop) && block->r.w.last_io == NULL &&
- (aiop->off % opts.nprocs) == this_proc() &&
- aiop->dev->failed_to_push_unwritten == 0 &&
- blocks[AIO_BLOCK_INDEX(aiop)].u.was_unwritten == 1 &&
- !is_unwritten(aiop)) {
- ullong_t blockno = aio_str2byteoff(aiop);
- pfprintf(stderr, "Device %s\n", aiop->fd->name);
- pfprintf(stderr, "Device len %#llx\n", aiop->dev->length);
- pfprintf(stderr, "This proc %d nprocs %ld\n", this_proc(),
- opts.nprocs);
- pfprintf(stderr, "Block %#llx (0t%lld) byte off %llx error\n",
- aiop->off, aiop->off, blockno);
- pfprintf(stderr, "Last Requested %ld\n",
- block->last_requested);
- pfprintf(stderr, "Last return delta %d\n",
- block->last_returned_delta);
- pfprintf(stderr, "Read Count %d\n", block->read_count);
- pfprintf(stderr, "Last Io Len %d\n",
- INDEX_TO_DIOLEN(block->r.w.last_iolen));
- assert(block->r.w.last_io != NULL);
- }
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- for (;;) {
- if (aiop->dev->stop_flag ||
- stop_check(aiop->dev->shared_data_handle)) {
- if (aiop->dev->stop_flag == 0) {
- plog(LOG_NOTICE, "Stopping %s\n",
- aiop->dev->logicalname);
- aiop->dev->stop_flag = 1;
- } else if (aiop->dev->need_to_stop &&
- set_shared_stop_flag(
- aiop->dev->shared_data_handle) != -1) {
- aiop->dev->need_to_stop = 0;
- }
- clear_writemap(aiop);
- return_write_buf(aiop->buf);
- aiop->buf = NULL;
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- break;
- }
- assert(*aiop->buf == 0xAA || *aiop->buf == 0x55);
- ZERO_OBJ(aiop->error.desc);
- TNF_PROBE_4(daiowrite, "aiowrite",
- "sunw%cte%diskomizer%aio write",
- tnf_long, fd, aiop->fd->fd,
- tnf_opaque, offset, offset,
- tnf_opaque, aiop, aiop,
- aio_tnf_str, *aiop, aiop);
- aiop->daio_id.buf = aiop->buf;
- aiop->daio_id.buf_id = get_write_buf_id(aiop->buf);
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- (void) memcpy(&aiop->hdr, aiop->buf, sizeof (aiop->hdr));
- aiop->daio_id.footer_len = 0;
- /* Move to the begining of the all_aios list */
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- if (daio->awrite(aiop->fd->fd, aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen),
- offset, &aiop->aio_res, &aiop->daio_id) == -1) {
- int serrno = errno;
- AIOWRITE_ERROR(aiop->fd->fd, aiop->fd->name,
- (ulong_t)aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen),
- offset,
- SEEK_SET,
- (ulong_t)&aiop->aio_res);
- if (serrno == EAGAIN) {
- continue;
- } else {
- clear_writemap(aiop);
- }
- } else if (!deferred) {
- aiop->fd->total_write++;
- if (is_aio_on_list(&aiop->dev->deferred_ios)) {
- aiop = pop_from_aio_list(
- &aiop->dev->deferred_ios);
- do_new_write(aiop, start, 0);
- }
- } else {
- aiop->fd->total_write++;
- plog(LOG_NOTICE, "Started deferred io to %s\n",
- aiop->dev->logicalname);
- }
- break;
- }
- }
- /*ARGSUSED1*/
- void
- run_func(uchar_t *buf, size_t size)
- {
- uchar_t *cptr;
- #ifdef SPARC
- uint32_t *last, *ptr;
- #else
- uint32_t *ptr;
- #endif
- void (*func)(void);
- cptr = get_buf_data(buf);
- #ifdef SPARC
- /* check alignment for SPARC */
- if ((ulong_t)cptr % 4) {
- return;
- }
- #endif
- /*LINTED*/
- ptr = (uint32_t *)cptr;
- func = (void (*)(void))(ptr);
- #ifdef SPARC
- last = ptr + size / sizeof (uint32_t);
- for (; ptr < last; ptr++)
- flush((int32_t *)ptr);
- #endif
- plog(LOG_DEBUG, "Running func %#lx in buf %#lx, type %llx\n",
- func, (ulong_t)buf, get_bufhdr(buf).start);
- TNF_PROBE_1(run_func, "run_func",
- "sunw%cte%diskomizer%aio execute run",
- tnf_opaque, buf, buf);
- func();
- }
- struct fds *
- find_path(struct fds *fdhead, char path_id)
- {
- struct fds *fd;
- for (fd = fdhead->next; ; fd = fd->next) {
- if (fd->path_id == path_id)
- return (fd);
- if (fd == fdhead)
- return (NULL);
- }
- }
- static struct bufhdr
- build_bufhr(struct device *dev, ullong_t start, ullong_t off)
- {
- struct bufhdr hdr;
- struct shadow_hdr const *shadow_hdr;
- struct blks *block;
- struct blks *blocks;
- struct fds *fd;
- ushort16_t hdrchksum;
- int error_count = 0;
- ullong_t offset = (ullong_t)start +
- (ullong_t)(INDEX_TO_DIOLEN(max_disk_io_len)*off);
- while ((blocks = shm_ops->attach(DEV_BLOCK_HANDLE(dev, off))) == NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(DEV_BLOCK_HANDLE(dev, off));
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- block = &blocks[DEV_BLOCK_INDEX(dev, off)];
- ZERO_OBJ(hdr);
- fd = find_path(dev->fdhead, block->path_id);
- assert(fd != NULL);
- if (block->bad_hdr) {
- (void) memcpy(&hdr, block->r.w.last_io, SIZEOF_BUFHDR);
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- return (hdr);
- }
- shadow_hdr = get_shadow_hdr(block->r.w.last_io);
- if (block->ab == 1) {
- hdr.start = hdr.end = BUF_TYPE_A;
- hdr.ab.a.chksum = shadow_hdr->chksums[block->r.w.last_iolen];
- hdr.ab.a.type = shadow_hdr->type;
- hdr.ab.a.type.sequence = block->sequence;
- hdr.ab.a.devid = fd->devid;
- hdr.ab.a.off = offset;
- hdr.ab.a.time = block->last_requested;
- hdr.ab.a.did = master_pid();
- hdr.ab.a.len = INDEX_TO_DIOLEN(block->r.w.last_iolen);
- get_serial_and_provider(hdr.ab.a.serial_and_provider,
- SIZEOF_SERIAL_AND_PROVIDER);
- } else {
- hdr.start = hdr.end = BUF_TYPE_B;
- hdr.ab.b.time = block->last_requested;
- hdr.ab.b.chksum = shadow_hdr->chksums[block->r.w.last_iolen];
- hdr.ab.b.type = shadow_hdr->type;
- hdr.ab.b.type.sequence = block->sequence;
- hdr.ab.b.devid = fd->devid;
- hdr.ab.b.off = offset;
- hdr.ab.b.did = master_pid();
- hdr.ab.b.len = INDEX_TO_DIOLEN(block->r.w.last_iolen);
- get_serial_and_provider(hdr.ab.b.serial_and_provider,
- SIZEOF_SERIAL_AND_PROVIDER);
- }
- if ((hdrchksum = set_hdrchksum(&hdr)) != block->hdrchksum) {
- pfprintf(stderr,
- "Bad rebuilt buf header is %#x should be %#x\n",
- block->hdrchksum, hdrchksum);
- }
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- return (hdr);
- }
- struct bufhdr
- build_prevbufhr(struct device *dev, ullong_t start, ullong_t off)
- {
- struct bufhdr hdr;
- struct shadow_hdr const *shadow_hdr;
- struct blks *block;
- struct blks *blocks;
- struct fds *fd;
- ushort16_t hdrchksum;
- int error_count = 0;
- ullong_t offset = (ullong_t)start +
- (ullong_t)(INDEX_TO_DIOLEN(max_disk_io_len)*off);
- while ((blocks = shm_ops->attach(DEV_BLOCK_HANDLE(dev, off))) == NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(DEV_BLOCK_HANDLE(dev, off));
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- block = &blocks[DEV_BLOCK_INDEX(dev, off)];
- ZERO_OBJ(hdr);
- fd = find_path(dev->fdhead, block->path_id);
- assert(fd != NULL);
- if (block->bad_hdr) {
- (void) memcpy(&hdr, block->r.w.last_io, SIZEOF_BUFHDR);
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- return (hdr);
- }
- shadow_hdr = get_shadow_hdr(block->r.w.prev_io);
- if (block->ab != 1) {
- hdr.start = hdr.end = BUF_TYPE_A;
- hdr.ab.a.time = block->u.prev_requested;
- hdr.ab.a.chksum = shadow_hdr->chksums[block->r.w.prev_iolen];
- hdr.ab.a.type = shadow_hdr->type;
- hdr.ab.a.type.sequence = block->sequence - 1;
- hdr.ab.a.devid = fd->devid;
- hdr.ab.a.off = offset;
- get_serial_and_provider(hdr.ab.a.serial_and_provider,
- SIZEOF_SERIAL_AND_PROVIDER);
- hdr.ab.a.len = INDEX_TO_DIOLEN(block->r.w.prev_iolen);
- hdr.ab.a.did = master_pid();
- } else {
- hdr.start = hdr.end = BUF_TYPE_B;
- hdr.ab.b.chksum = shadow_hdr->chksums[block->r.w.prev_iolen];
- hdr.ab.b.type = shadow_hdr->type;
- hdr.ab.b.type.sequence = block->sequence - 1;
- hdr.ab.b.devid = fd->devid;
- hdr.ab.b.off = offset;
- hdr.ab.b.len = INDEX_TO_DIOLEN(block->r.w.prev_iolen);
- hdr.ab.b.time = block->u.prev_requested;
- hdr.ab.b.did = master_pid();
- get_serial_and_provider(hdr.ab.b.serial_and_provider,
- SIZEOF_SERIAL_AND_PROVIDER);
- }
- if ((hdrchksum = set_hdrchksum(&hdr)) != block->hdrchksum) {
- pfprintf(stderr,
- "Bad rebuilt buf header is %#x should be %#x\n",
- block->hdrchksum, hdrchksum);
- }
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- return (hdr);
- }
- static struct diff_return
- memdiff_data(FILE *err, uchar_t *goodptr, uchar_t *badptr,
- int offset, int len)
- {
- int i;
- struct diff_return dr;
- union {
- uchar_t c[sizeof (uint64_t) / sizeof (uchar_t)];
- uint32_t i[sizeof (uint64_t) / sizeof (uint32_t)];
- uint64_t l;
- } good, bad, diff;
- dr.bits = 0LL;
- dr.count = 0LL;
- for (i = 0; i < len; i += sizeof (uint64_t)) {
- (void) memcpy(&good.c[0], goodptr, sizeof (uint64_t));
- (void) memcpy(&bad.c[0], badptr, sizeof (uint64_t));
- diff.i[0] = good.i[0] ^ bad.i[0];
- diff.i[1] = good.i[1] ^ bad.i[1];
- if (!opts.expert_small_diffs || diff.l) {
- int bc = count_uint32_bits(diff.i[0]) +
- count_uint32_bits(diff.i[1]);
- dr.count += bc;
- dr.bits |= diff.l;
- #ifdef _BIG_ENDIAN
- (void) fprintf(err,
- "0x%8.8x %8.8x%8.8x %8.8x%8.8x "
- "%8.8x%8.8x %2.2d\n", i + offset,
- good.i[0], good.i[1], bad.i[0], bad.i[1],
- diff.i[0], diff.i[1], bc);
- #elif defined(_LITTLE_ENDIAN)
- (void) fprintf(err,
- "0x%8.8x "
- "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x "
- "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x "
- "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x "
- "%2.2d\n", i + offset,
- good.c[0], good.c[1], good.c[2], good.c[3],
- good.c[4], good.c[5], good.c[6], good.c[7],
- bad.c[0], bad.c[1], bad.c[2], bad.c[3],
- bad.c[4], bad.c[5], bad.c[6], bad.c[7],
- diff.c[0], diff.c[1], diff.c[2], diff.c[3],
- diff.c[4], diff.c[5], diff.c[6], diff.c[7],
- bc);
- #else
- #error "niether _BIG_ENDIAN or _LITTLE_ENDIAN defined"
- #endif
- }
- badptr += sizeof (uint64_t);
- goodptr += sizeof (uint64_t);
- }
- return (dr);
- }
- static struct diff_return
- memdiff_bufhdr(FILE *err, uchar_t *buf, uchar_t *good_hdr)
- {
- union {
- struct bufhdr hdr;
- uchar_t c[SIZEOF_BUFHDR];
- } bad;
- bad.hdr = get_bufhdr(buf);
- return (memdiff_data(err, good_hdr, &bad.c[0], 0, SIZEOF_BUFHDR));
- }
- char *
- diff_file(void)
- {
- char *wd;
- static char *diffs_file;
- if (NULL == diffs_file) {
- if (diffs[0] != '/' && (wd = getcwd(NULL, 128)) != NULL) {
- int x = strlen(diffs) + strlen(wd) + 2;
- if ((diffs_file = malloc(x)) != NULL) {
- snprintf(diffs_file, x, "%s/%s", wd, diffs);
- } else {
- diffs_file = diffs;
- }
- free(wd);
- } else {
- diffs_file = diffs;
- }
- }
- return (diffs_file);
- }
- struct diff_return
- memdiff_buf(uint64_t off, struct device *dev, uchar_t *buf, uint32_t iolen,
- struct fds *fd, const char *str, struct error *error)
- {
- static const char zero2seven[] = "0 1 2 3 4 5 6 7";
- uchar_t *badptr;
- uchar_t *goodptr;
- uchar_t *prevptr; /* pointer to the previous buffer that was written */
- FILE *err;
- time_t now;
- sigset_t nset; /* new set */
- sigset_t oset; /* old set */
- int sigprocmask_status;
- int error_count = 0;
- struct diff_return dr;
- struct diff_return dr2;
- struct blks *blocks, *block;
- union {
- struct bufhdr hdr;
- uchar_t c[SIZEOF_BUFHDR];
- } good, prev;
- while ((blocks = shm_ops->attach(DEV_BLOCK_HANDLE(dev, off))) == NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(DEV_BLOCK_HANDLE(dev, off));
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- block = &blocks[DEV_BLOCK_INDEX(dev, off)];
- if (block->r.w.last_io == NULL) {
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- dr.count = -1;
- return (dr);
- }
- goodptr = get_buf_data(block->r.w.last_io);
- if (block->r.w.prev_io != NULL) {
- prevptr = get_buf_data(block->r.w.prev_io);
- } else {
- prevptr = NULL;
- }
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- badptr = get_buf_data(buf);
- if ((err = fopen(diff_file(), "a+")) == NULL) {
- err = stderr;
- FOPEN_ERROR(diff_file(), "a+");
- (void) sigemptyset(&nset);
- (void) sigaddset(&nset, SIGINT);
- (void) sigaddset(&nset, SIGTERM);
- sigprocmask_status =
- sigprocmask(SIG_BLOCK, &nset, &oset);
- mutex->stderr_enter();
- }
- now = time(NULL);
- (void) fprintf(err, "diskomizer %s\n", VERSION);
- print_bufhdr_offsets(err);
- (void) fprintf(err, "Error Instance %d\n", get_error_instance_number());
- (void) fprintf(err, "Diffs dumped %s", ctime(&now));
- (void) fprintf(err, "Diffs from %s for block 0x%llx\n",
- str, diskomizer_off2byteoff(off));
- (void) fprintf(err,
- "use \""
- "dd if=%s bs=%d iseek=%lld count=1\" to read the block\n",
- fd->longname, iolen,
- (opts.start_offset) + off);
- good.hdr = build_bufhr(dev, start_offset(), off);
- decode_header(err, &good.c[0], buf);
- (void) fprintf(err, "%10.10s %16.16s %16.16s %16.16s %s\n",
- "", "Written", "Read", "Diffs", "Bit count");
- (void) fprintf(err, "%10.10s %16.16s %16.16s %16.16s\n",
- "Offset", zero2seven, zero2seven, zero2seven);
- dr = memdiff_bufhdr(err, buf, &good.c[0]);
- dr2 = memdiff_data(err, goodptr, badptr, SIZEOF_BUFHDR,
- iolen - SIZEOF_BUFHDR);
- dr.count += dr2.count;
- dr.bits |= dr2.bits;
- (void) fprintf(err, "End of diffs for block 0x%llx\n",
- diskomizer_off2byteoff(off));
- if (prevptr != NULL && opts.display_prev_diffs) {
- prev.hdr = build_prevbufhr(dev, start_offset(), off);
- (void) fprintf(err, "Diffs from %s for previous io to block "
- "0x%llx\n", str, diskomizer_off2byteoff(off));
- (void) fprintf(err, "%10.10s %16.16s %16.16s %16.16s %s\n",
- "", "Written", "Read", "Diffs", "Bit count");
- (void) fprintf(err, "%10.10s %16.16s %16.16s %16.16s\n",
- "Offset", zero2seven, zero2seven, zero2seven);
- (void) memdiff_bufhdr(err, buf, &prev.c[0]);
- (void) memdiff_data(err, prevptr, badptr, SIZEOF_BUFHDR,
- iolen - SIZEOF_BUFHDR);
- }
- (void) fflush(err);
- if (fsync(fileno(err)) == -1) {
- FSYNC_ERROR(fileno(err), diffs);
- }
- if (err != stderr) {
- (void) fclose(err);
- error->diff_file = diff_file();
- dlog(LOG_ERR, "Diffs file dumped to %s\n", diff_file());
- } else {
- mutex->stderr_exit();
- error->diff_file = NULL;
- if (sigprocmask_status == 0)
- (void) sigprocmask(SIG_SETMASK, &oset, NULL);
- }
- return (dr);
- }
- struct diff_return
- memdiff(struct aio_str *aiop, char *str)
- {
- struct diff_return dr;
- if (!is_readonly()) {
- aiop->error.dr = memdiff_buf(aiop->off, aiop->dev, aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen), aiop->fd, str, &aiop->error);
- return (aiop->error.dr);
- }
- dr.count = dr.bits = 0;
- return (dr);
- }
- /*
- * Check to see if the buffer that has been read matches the previous
- * buffer that was written. This would spot if an write never got to
- * the disk.
- */
- int
- check_previous_buffer(check_t check_sum, struct aio_str *aiop)
- {
- uchar_t *previous_buf_written;
- struct blks *blocks;
- time_t prev_time;
- blocks = aio_attach(aiop);
- previous_buf_written = blocks[AIO_BLOCK_INDEX(aiop)].r.w.prev_io;
- prev_time = blocks[AIO_BLOCK_INDEX(aiop)].u.prev_requested;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- if (previous_buf_written != NULL) {
- struct bufhdr_a hdr_a;
- struct bufhdr hdr;
- hdr = get_bufhdr(previous_buf_written);
- hdr_a = conv_bufhdr(&hdr);
- if (check_sum == hdr_a.chksum) {
- char *time_str;
- time_str = alloc_time_str(prev_time);
- pfprintf(stderr, "block %llx checksum matches"
- " the previous block written at %s\n",
- aio_str2byteoff(aiop), NOT_NULL(time_str));
- not_null_free(time_str);
- return (1);
- }
- }
- return (0);
- }
- static int
- check_old_data(struct aio_str *aiop)
- {
- const char *x;
- time_t tyme;
- pid_t did;
- if (((x = get_buf_serial_and_provider(aiop->buf)) == NULL) ||
- cmp_serial_and_provider(x) != 0) {
- char y[SIZEOF_SERIAL_AND_PROVIDER];
- get_serial_and_provider(y, SIZEOF_SERIAL_AND_PROVIDER);
- if (x == NULL) {
- plog(LOG_NOTICE, "block %llx contains data that "
- "could not be recognized.\n",
- aio_str2byteoff(aiop));
- } else {
- plog(LOG_NOTICE, "block %llx contains data written "
- "by host %.*s not %.*s\n", aio_str2byteoff(aiop),
- SIZEOF_SERIAL_AND_PROVIDER, x,
- SIZEOF_SERIAL_AND_PROVIDER, y);
- }
- return (1);
- } else if ((tyme = get_buf_time(aiop->buf)) < start_time.tv_sec) {
- char *t = alloc_time_str(tyme);
- plog(LOG_NOTICE, "block %llx contains data written before "
- "this instance started. It was written at %s\n",
- aio_str2byteoff(aiop), NOT_NULL(t));
- not_null_free(t);
- return (1);
- } else if ((did = get_buf_did(aiop->buf)) != master_pid()) {
- plog(LOG_NOTICE, "block at byte offset %llx not written by "
- "this instance, but by %ld\n", aio_str2byteoff(aiop), did);
- return (1);
- }
- return (0);
- }
- int
- check_header(ullong_t start, struct aio_str *aiop)
- {
- if (!is_readonly()) {
- ushort16_t bufhdrchksum;
- ushort16_t hdrchksum;
- bufhdrchksum = get_bufhdr_hdrchksum(aiop->buf);
- hdrchksum = check_bufhdr(aiop->buf, bufhdrchksum);
- if (bufhdrchksum != hdrchksum || hdrchksum == 0) {
- plog(LOG_ERR, "block %llx bad header checksum\n",
- aio_str2byteoff(aiop));
- return (0);
- } else {
- struct bufhdr_a hdr_a;
- ullong_t off;
- struct bufhdr hdr;
- hdr = get_bufhdr(aiop->buf);
- (void) check_old_data(aiop);
- hdr_a = conv_bufhdr(&hdr);
- off = byteoff2diskomizer_off(hdr_a.off);
- if (off != aiop->off) {
- plog(LOG_ERR,
- "On disk header says device byte offset "
- "%llx (0t%lld), which calculates "
- "diskomizer block %#llx (0t%lld), I "
- "requested diskomizer block "
- "%#llx (0t%lld)\n",
- hdr_a.off, hdr_a.off, off, off,
- aiop->off, aiop->off);
- return (0);
- }
- }
- }
- return (1);
- }
- int
- do_memcmp(ullong_t start, struct aio_str *aiop)
- {
- check_t check_sum;
- uchar_t *last;
- struct blks *blocks;
- int status = 0;
- if (check_header(start, aiop) == 0)
- return (0);
- blocks = aio_attach(aiop);
- if ((last = blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io) != NULL) {
- if (memcmp(get_buf_data(last), get_buf_data(aiop->buf),
- INDEX_TO_DIOLEN(aiop->iolen) - SIZEOF_BUFHDR)) {
- struct bufhdr_a hdr_a;
- struct bufhdr hdr = get_bufhdr(last);
- hdr_a = conv_bufhdr(&hdr);
- /* the memcmp failed */
- check_sum = check_aiobuf(aiop);
- dfprintf(stderr, "block %llx buf %#lx does not match "
- "what was written, what was read %#lx,"
- " written %#lx\n", aio_str2byteoff(aiop),
- (ulong_t)last, check_sum, hdr_a.chksum);
- if (check_previous_buffer(check_sum, aiop) == 0) {
- check_old_data(aiop);
- }
- } else {
- status = 1;
- }
- } else {
- status = 1;
- }
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- return (status);
- }
- /*
- * print number of bytes will print the given number in full and then
- * convert it to a human readable form and print it to 2 decimal places.
- */
- void
- print_number_of_bytes(unsigned long long x, char *singular, char *plural)
- {
- char *str = x != 1 ? plural : singular;
- const char *units;
- int j;
- int y;
- /*
- * All the units that fit in 64 bits:
- * kilo, mega, giga, tera, peta, exa
- */
- static const char *all_units[] = { "K", "M", "G", "T", "P", "E" };
- (void) printf("\t%#llx, %lld, %s ", x, x, str);
- units = NULL;
- for (j = 0; j < (sizeof (all_units) / sizeof (all_units[0])); j++) {
- if (x / 1024) {
- y = ((x * 1000) / 1024) % 1000;
- x = x / 1024;
- units = all_units[j];
- } else {
- break;
- }
- }
- if (units) {
- /* Round up it necessary */
- if (y % 10 >= 5) {
- y = y + 10;
- }
- /* loose the least significant digit */
- y = y/10;
- if (y >= 100) {
- y -= 100;
- x++;
- }
- (void) printf("(%lld.%.2d %s)\n", x, y, units);
- } else {
- (void) printf("\n");
- }
- }
- void
- print_number(unsigned long long i, char *singular, char *plural)
- {
- char *str = i != 1 ? plural : singular;
- (void) printf("\t%#llx, %lld, %s\n", i, i, str);
- }
- /*
- * given that the bufhdr for the io has a good check sum but is not
- * for this device find the correct device and offset for the io
- * and report this.
- */
- struct fds *
- check_matching_path_io(struct bufhdr_a *hdr, struct fds *fd)
- {
- struct fds *x, *fdh;
- fdh = fd;
- for (x = fd->next; /* make cstyle happy */; x = x->next) {
- if (memcmp(&hdr->devid, &fd->devid,
- sizeof (struct device_id)) == 0) {
- return (fd);
- }
- if (x == fdh) {
- return (NULL);
- } else {
- fd = x;
- }
- }
- /*NOTREACHED*/
- }
- void
- read_and_check(ullong_t start, struct device *dev, ullong_t off,
- struct aio_str *aiop)
- {
- uchar_t *buf;
- ullong_t status;
- ullong_t diskoff = diskomizer_off2byteoff(off);
- struct blks *blocks;
- struct blks *block;
- struct shadow_hdr const *shadow;
- int error_count = 0;
- buf = calloc(1, INDEX_TO_DIOLEN(max_disk_io_len));
- if (buf == NULL) {
- CALLOC_ERROR(1L, (ulong_t)INDEX_TO_DIOLEN(max_disk_io_len));
- return;
- }
- while ((blocks = shm_ops->attach(DEV_BLOCK_HANDLE(dev, off))) == NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(DEV_BLOCK_HANDLE(dev, off));
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- block = &blocks[DEV_BLOCK_INDEX(dev, off)];
- aiop->daio_id.buf = block->r.w.last_io;
- aiop->daio_id.bufs =
- INDEX_TO_DIOLEN(opts.disk_io_sizes.vals[block->r.w.last_iolen]);
- aiop->daio_id.buf_id = get_write_buf_id(block->r.w.last_io);
- aiop->hdr = build_bufhr(dev, start, off);
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- aiop->daio_id.footer_len = 0;
- shadow = get_shadow_hdr(aiop->daio_id.buf);
- aiop->daio_id.chksum = shadow->chksums[aiop->iolen];
- status = daio->pread(dev->fdhead->fd, buf,
- opts.disk_io_sizes.vals[block->r.w.last_iolen], diskoff,
- &aiop->daio_id);
- if (status == DAIO_CORRUPT) {
- int check_sum;
- struct error error;
- ulong_t shadow_chksum = check_bufbody(block->r.w.last_io,
- opts.disk_io_sizes.vals[block->r.w.last_iolen]);
- ZERO_OBJ(error);
- check_sum = check_buf(buf,
- INDEX_TO_DIOLEN(block->r.w.last_iolen), &error);
- if (check_sum != shadow_chksum) {
- pfprintf(stderr,
- "Off %#llx (%lld) header differs "
- "on disk\n", diskoff, diskoff);
- aiop->error.dr = memdiff_buf(off, dev, buf,
- opts.disk_io_sizes.vals[block->r.w.last_iolen],
- dev->fdhead, "read and check", &aiop->error);
- } else {
- if (memcmp(get_buf_data(buf),
- get_buf_data(block->r.w.last_io),
- INDEX_TO_DIOLEN(block->r.w.last_iolen) -
- SIZEOF_BUFHDR) != 0) {
- pfprintf(stderr,
- "Off %#llx (%lld) body differs "
- "on disk\n", diskoff, diskoff);
- aiop->error.dr = memdiff_buf(off, dev, buf,
- opts.disk_io_sizes.vals[
- block->r.w.last_iolen],
- dev->fdhead, "read and check",
- &aiop->error);
- } else {
- ullong_t off = aio_str2byteoff(aiop);
- pfprintf(stderr,
- "Data at byte offset %#llx (%lld) "
- "on disk, matches the data just read "
- "from %#llx (%lld)\n", diskoff, diskoff,
- off, off);
- }
- }
- } else if (status != opts.disk_io_sizes.vals[block->r.w.last_iolen]) {
- PREAD_ERROR(dev->fdhead->fd, dev->fdhead->name, (ulong_t)buf,
- opts.disk_io_sizes.vals[block->r.w.last_iolen], diskoff);
- }
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- free(buf);
- }
- /*
- * Check the buffer contents matches an io which it internally thinks it
- * is. This error path is used when we have read a block X but the contents
- * of the block is not for block X but for block Y. So this routine gives
- * information regarding the last movements of block Y.
- *
- * This routine must be called with all the mutexs held, see
- * mutex->grab_all().
- *
- * To Do:
- * bounds checking must be done on off before using it!
- */
- void
- check_by_buffer(ullong_t start, struct device *dev, struct aio_str *aiop)
- {
- struct bufhdr_a read_hdr;
- struct bufhdr hdr;
- ullong_t off;
- struct blks *block;
- struct blks *blocks;
- ulong_t shadow_chksum;
- int error_count = 0;
- bitmap_t *map;
- hdr = get_bufhdr(aiop->buf);
- read_hdr = conv_bufhdr(&hdr);
- off = byteoff2diskomizer_off(read_hdr.off);
- if ((long long)off < 0 || off >= LEN_BYTES2BLOCKS(dev)) {
- return;
- }
- aiop->error.doff = off;
- aiop->error.dev = dev;
- if (aiop->dev == dev && off == aiop->off) {
- return;
- }
- map = attach_dev_writemap(dev);
- if (test_write(map, off, LEN_BYTES2BLOCKS(dev))) {
- /*
- * Grr. There is an IO outstanding on this block on this device,
- * I don't know whether it is a read or a write
- */
- shm_ops->detach(dev->writemap_handle);
- aiop->error.desc.UNABLE_TO_LOCK = 1;
- dfprintf(stderr,
- "Block %#llx 0t%lld is currently locked for dev %s\n",
- diskomizer_off2byteoff(off),
- diskomizer_off2byteoff(off), dev->logicalname);
- return;
- }
- shm_ops->detach(dev->writemap_handle);
- while ((blocks = shm_ops->attach(DEV_BLOCK_HANDLE(dev, off))) == NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(DEV_BLOCK_HANDLE(dev, off));
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- block = &blocks[DEV_BLOCK_INDEX(dev, off)];
- if (block->r.w.last_io != NULL &&
- (shadow_chksum = check_bufbody(block->r.w.last_io,
- opts.disk_io_sizes.vals[block->r.w.last_iolen])) ==
- read_hdr.chksum &&
- memcmp(get_buf_data(aiop->buf),
- get_buf_data(block->r.w.last_io),
- opts.disk_io_sizes.vals[aiop->iolen] - SIZEOF_BUFHDR) == 0) {
- struct fds *fd;
- fd = find_path(dev->fdhead, block->path_id);
- assert(fd != NULL);
- aiop->error.desc.MATCHING_LAST = 1;
- aiop->error.last_requested = block->last_requested;
- aiop->error.delta = block->last_returned_delta;
- aiop->error.doff = off;
- aiop->error.dev = dev;
- aiop->error.path_id = block->path_id;
- dlog(LOG_ERR, "Buffer matches last write to block %#llx "
- "(0t%lld) (block %#llx 0t%lld) on dev %s path %s\n",
- off, off, diskomizer_off2byteoff(off),
- diskomizer_off2byteoff(off), dev->logicalname,
- fd->name);
- dtime_log(LOG_ERR, block->last_requested,
- "Last write to %s block %#llx (0t%lld) requested",
- dev->logicalname, diskomizer_off2byteoff(off),
- diskomizer_off2byteoff(off));
- dtime_log(LOG_ERR,
- block->last_requested +
- block->last_returned_delta,
- "Last write to %s block %#llx (0t%lld)"
- " returned ", dev->logicalname,
- diskomizer_off2byteoff(off),
- diskomizer_off2byteoff(off));
- read_and_check(start, dev, off, aiop);
- } else if (block->r.w.prev_io != NULL) {
- shadow_chksum = check_bufbody(block->r.w.prev_io,
- MIN(opts.disk_io_sizes.vals[block->r.w.last_iolen],
- opts.disk_io_sizes.vals[block->r.w.prev_iolen]));
- if (shadow_chksum == read_hdr.chksum &&
- memcmp(get_buf_data(aiop->buf),
- get_buf_data(block->r.w.prev_io),
- opts.disk_io_sizes.vals[aiop->iolen] -
- SIZEOF_BUFHDR)) {
- aiop->error.desc.MATCHING_PREV = 1;
- aiop->error.doff = off;
- aiop->error.last_requested = block->last_requested;
- dlog(LOG_ERR,
- "Buffer matches block %#llx (block %lld) dev "
- "%s prev io\n",
- (ullong_t)off, diskomizer_off2byteoff(off),
- dev->logicalname);
- dtime_log(LOG_ERR, block->u.prev_requested,
- "Prev write to %s block %#llx (0t%lld)"
- " requested %s\n",
- dev->logicalname,
- diskomizer_off2byteoff(off),
- diskomizer_off2byteoff(off));
- } else {
- dfprintf(stderr, "Buffer claiming to be from block "
- "%#llx dev %s does not match either of the "
- "last two ios.\n",
- (ullong_t)diskomizer_off2byteoff(off),
- dev->logicalname);
- }
- }
- shm_ops->detach(DEV_BLOCK_HANDLE(dev, off));
- }
- /*ARGSUSED*/
- void
- check_matching_io(ullong_t start, struct aio_str *aiop)
- {
- struct device *device;
- struct bufhdr_a hdr_a;
- struct bufhdr hdr = get_bufhdr(aiop->buf);
- hdr_a = conv_bufhdr(&hdr);
- if (hdr_a.hdrchksum != check_bufhdr(aiop->buf, hdr_a.hdrchksum)) {
- return;
- }
- mutex->grab_all();
- for (device = devices; device != NULL; device = device->next) {
- struct fds *fd;
- if ((fd = check_matching_path_io(&hdr_a,
- device->fdhead)) != NULL) {
- /*
- * Now we have the device to which this io was sent
- */
- aiop->error.desc.MATCHING_DEVICE = 1;
- aiop->error.fd = fd;
- aiop->error.doff = byteoff2diskomizer_off(hdr_a.off);
- aiop->error.last_requested = hdr_a.time;
- check_by_buffer(start, device, aiop);
- dfprintf(stderr, "Block read from %s matches block "
- "written to %s\n", aiop->fd->name,
- fd->name);
- }
- }
- mutex->drop_all();
- }
- static loop_type
- on_error_pause(ullong_t start, struct aio_str *aiop)
- {
- int isread = is_read_io(aiop);
- pfprintf(stderr, "On %s error pause %d seconds\n",
- isread ? "read" : "write", OPTION(pause_time));
- (void) sleep(opts.pause_time);
- if (!isread) {
- return (CONTINUE);
- }
- if (!do_memcmp(start, aiop)) {
- check_matching_io(start, aiop);
- memdiff(aiop, "pause");
- return (CONTINUE);
- } else {
- return (BREAK);
- }
- }
- static void
- bring_error_path_online(struct fds *fd)
- {
- struct fds *start = fd;
- do {
- if (fd->error_path == 1) {
- pfprintf(stderr,
- "Path %s brought on line\n", fd->name);
- fd->error_path = 0;
- break;
- }
- fd = fd->next;
- } while (fd != start);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_fail_path(ullong_t start, struct aio_str *aiop)
- {
- char *name = aiop->fd->name;
- pfprintf(stderr,
- "On error fail path %s failed\n", aiop->fd->name);
- if (set_shared_stop_flag(aiop->fd->shared_data_handle) == -1) {
- aiop->fd->need_to_stop = 1;
- } else {
- aiop->fd->stop_flag = 1;
- cancel_all_io_byfd(aiop->fd);
- snapshot_recent(aiop->dev->recent);
- bring_error_path_online(aiop->fd);
- }
- newfd(aiop);
- if (aiop->fd->stop_flag == 1 || aiop->fd->need_to_stop == 1) {
- /*
- * All the paths have failed, we muddle on to complete any furhter
- * error action down this failed path.
- */
- pfprintf(stderr, "On error fail path %s continuing\n", name);
- return (BREAK);
- } else {
- aiop->retrycnt = 0;
- pfprintf(stderr, "On error fail path %s retrying\n", name);
- return (RETRY);
- }
- }
- /*ARGSUSED*/
- static loop_type
- on_error_retry(ullong_t start, struct aio_str *aiop)
- {
- int isread = is_read_io(aiop);
- short max = (isread ?
- OPTION(max_read_retries) : OPTION(max_write_retries));
- if (aiop->retrycnt < max) {
- pfprintf(stderr,
- "On %s error retry %d, %d remaining %s blk %#llx\n",
- isread ? "read": "write",
- 1+aiop->retrycnt, max-(1+aiop->retrycnt),
- aiop->fd->name, aio_str2byteoff(aiop));
- return (RETRY);
- } else {
- return (CONTINUE);
- }
- }
- static loop_type
- on_error_rewrite(ullong_t start, struct aio_str *aiop)
- {
- ssize_t status;
- union err_info err_info;
- struct shadow_hdr const *shadow_hdr = get_shadow_hdr(aiop->buf);
- ullong_t offset = (ullong_t)start +
- (ullong_t)(INDEX_TO_DIOLEN(max_disk_io_len)*aiop->off);
- err_info.str = "pwrite";
- pfprintf(stderr, "%s On error rewrite\n", aiop->fd->name);
- aiop->daio_id.buf = aiop->buf;
- aiop->daio_id.buf_id = get_write_buf_id(aiop->buf);
- aiop->daio_id.chksum = shadow_hdr->chksums[aiop->iolen];
- aiop->daio_id.bufs = opts.disk_io_sizes.vals[aiop->iolen];
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- aiop->daio_id.footer_len = 0;
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- (void) memcpy(aiop->daio_id.hdr, aiop->daio_id.buf,
- aiop->daio_id.hdr_len);
- (void) my_gettimeofday(&aiop->tv, NULL);
- status = daio->pwrite(aiop->fd->fd, aiop->buf,
- opts.disk_io_sizes.vals[aiop->iolen], offset, &aiop->daio_id);
- DAIO_SET_RETURN(aiop->aio_res, status);
- DAIO_SET_ERROR(aiop->aio_res, errno);
- if (status != opts.disk_io_sizes.vals[aiop->iolen]) {
- report_error(aiop, err_info, ERR_SYS);
- aiop->dev->errors += 1;
- return (CONTINUE);
- }
- return (BREAK);
- }
- static loop_type
- on_error_reread(ullong_t start, struct aio_str *aiop)
- {
- ssize_t status;
- union err_info err_info;
- ullong_t offset = (ullong_t)start +
- (ullong_t)(INDEX_TO_DIOLEN(max_disk_io_len)*aiop->off);
- err_info.str = "pread";
- pfprintf(stderr, "%s On error re-read\n", aiop->fd->name);
- (void) my_gettimeofday(&aiop->tv, NULL);
- status = daio->pread(aiop->fd->fd, aiop->buf,
- opts.disk_io_sizes.vals[aiop->iolen], offset, &aiop->daio_id);
- DAIO_SET_RETURN(aiop->aio_res, status);
- DAIO_SET_ERROR(aiop->aio_res, errno);
- if (status == DAIO_CORRUPT) {
- if (is_readonly() || !do_memcmp(start, aiop)) {
- report_error(aiop, err_info, ERR_CORRUPT);
- aiop->dev->errors += 1;
- memdiff(aiop, err_info.str);
- return (CONTINUE);
- }
- /* There should be an assert here */
- } else if (status != opts.disk_io_sizes.vals[aiop->iolen]) {
- report_error(aiop, err_info, ERR_SYS);
- aiop->dev->errors += 1;
- return (CONTINUE);
- }
- return (BREAK);
- }
- static int
- do_path_stop_check(struct fds *fd, struct device *dev)
- {
- if (fd->error_path == 0 && fd->stop_flag == 0) {
- if (fd->need_to_stop == 1 &&
- set_shared_stop_flag(fd->shared_data_handle) != -1) {
- fd->need_to_stop = 0;
- /*
- * need to cancel all the io outstanding for this
- * path
- */
- fd->stop_flag = 1;
- cancel_all_io_byfd(fd);
- snapshot_recent(dev->recent);
- bring_error_path_online(fd);
- } else {
- if (do_stop_check(fd->shared_data_handle) == 1) {
- fd->stop_flag = 1;
- cancel_all_io_byfd(fd);
- bring_error_path_online(fd);
- } else {
- return (0);
- }
- }
- }
- return (1);
- }
- static int
- init_stop_check(void)
- {
- stop_check = do_stop_check;
- return (1);
- }
- static int
- init_path_stop_check(void)
- {
- path_stop_check = do_path_stop_check;
- return (1);
- }
- void
- newfd(struct aio_str *aiop)
- {
- struct fds *fd = aiop->fd;
- if (aiop->fd != aiop->fd->next) {
- while (aiop->fd->next != fd) {
- if (path_stop_check(aiop->fd->next, aiop->dev) == 0) {
- aiop->fd = aiop->fd->next;
- remove_from_aio_list(&fd->all_aios, aiop);
- add_to_all_aios(aiop);
- return;
- }
- aiop->fd = aiop->fd->next;
- }
- /*
- * To get here we searched them all and found none that
- * were not error paths or had been stopped. Reset the path
- * back to the original.
- */
- aiop->fd = fd;
- }
- }
- loop_type
- handle_err_generic(struct aio_str *aiop, ullong_t start,
- on_error_t *on_error_func)
- {
- struct blks *blocks = NULL;
- loop_type status = BREAK;
- blocks = aio_attach(aiop);
- aiop->dev->errors++;
- if (blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io !=
- NULL && does_check(daio->what_checker())) {
- char i;
- union err_info err_info;
- err_info.str = "aioread";
- if (on_error_func == on_error_corrupt) {
- memdiff(aiop, (aiop->retrycnt == 0) ?
- "aioread" : "aioread RETRY");
- check_matching_io(start, aiop);
- report_error(aiop, err_info, ERR_CORRUPT);
- } else {
- report_error(aiop, err_info, ERR_SYS);
- }
- for (i = 0; on_error_func[i] != NULL; i++) {
- loop_type l;
- if ((l = on_error_func[i](start, aiop)) == BREAK)
- break;
- else if (l == RETRY) {
- status = RETRY;
- break;
- }
- }
- }
- if (incr_shared_device_error(aiop->dev->shared_data_handle,
- aiop->dev->errors) != -1) {
- aiop->dev->errors = 0;
- }
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- return (status);
- }
- loop_type
- handle_write_error(struct aio_str *aiop, ullong_t start)
- {
- loop_type status = BREAK;
- int i;
- aiop->dev->errors++;
- for (i = 0; on_write_error[i] != NULL; i++) {
- loop_type l;
- if ((l = on_write_error[i](start, aiop)) == BREAK)
- break;
- else if (l == RETRY) {
- status = RETRY;
- break;
- }
- }
- if (incr_shared_device_error(aiop->dev->shared_data_handle,
- aiop->dev->errors) != -1) {
- aiop->dev->errors = 0;
- }
- return (status);
- }
- loop_type
- handle_read_corrupt(struct aio_str *aiop, ullong_t start)
- {
- return (handle_err_generic(aiop, start, on_error_corrupt));
- }
- loop_type
- handle_read_short(struct aio_str *aiop, ullong_t start)
- {
- return (handle_err_generic(aiop, start, on_error_short));
- }
- static time_t
- handle_readonly(struct aio_str *aiop, ullong_t start, read_type_t read_type)
- {
- struct blks *blocks = NULL;
- struct timeval tv;
- hrtime_t delta = DAIO_GET_TIME_TAKEN(aiop->aio_res);
- TNF_PROBE_2(handle_read, "handle_readonly",
- "sunw%cte%diskomizer%aio readonly wait",
- tnf_opaque, aiop, aiop,
- aio_tnf_str, *aiop, aiop);
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- if (aiop->fd == NULL) {
- aiop->fd = aiop->dev->fdhead;
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- } else {
- if (DAIO_RETURN(aiop->aio_res) == DAIO_CORRUPT) {
- if (handle_read_corrupt(aiop, start) == RETRY) {
- return (do_new_read(aiop, start, RETRY_READ));
- }
- } else if (DAIO_RETURN(aiop->aio_res) !=
- opts.disk_io_sizes.vals[aiop->iolen]) {
- if (handle_read_short(aiop, start) == RETRY) {
- return (do_new_read(aiop, start, RETRY_READ));
- }
- } else {
- struct blks *block;
- aiop->fd->last_read_time = delta;
- if (aiop->retrycnt != 0) {
- char *now_str;
- now_str = alloc_time_str(tv.tv_sec);
- pprintf("Read retry %d of block 0x%llx "
- "on %s o.k. %s\n",
- aiop->retrycnt,
- aio_str2byteoff(aiop),
- aiop->fd->name,
- NIL(now_str));
- not_null_free(now_str);
- }
- blocks = aio_attach(aiop);
- block = &blocks[AIO_BLOCK_INDEX(aiop)];
- block->r.o.prev_io =
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_io;
- block->r.o.prev_chksum =
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_chksum;
- block->r.o.last_io =
- (((ulong_t)aiop->daio_id.buf) & 0x1);
- block->r.o.last_chksum = aiop->daio_id.chksum;
- block->u.prev_requested = block->last_requested;
- block->last_requested = aiop->tv.tv_sec;
- block->last_returned_delta =
- tv.tv_sec - aiop->tv.tv_sec;
- }
- if (blocks == NULL) {
- blocks = aio_attach(aiop);
- }
- blocks[AIO_BLOCK_INDEX(aiop)].read_count += 1;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- clear_writemap(aiop);
- newfd(aiop);
- }
- update_aio_read_stats(aiop);
- aiop->count++;
- return_aio_read_buf(aiop);
- return (do_new_read(aiop, start, read_type));
- }
- time_t
- handle_readonly_rand(struct aio_str *aiop, ullong_t start)
- {
- return (handle_readonly(aiop, start, READ_ONLY_RAND));
- }
- time_t
- handle_readonly_seq(struct aio_str *aiop, ullong_t start)
- {
- return (handle_readonly(aiop, start, READ_ONLY_SEQ));
- }
- time_t
- handle_read(struct aio_str *aiop, ullong_t start)
- {
- struct blks *blocks = NULL;
- hrtime_t delta = DAIO_GET_TIME_TAKEN(aiop->aio_res);
- TNF_PROBE_2(handle_read, "handle_read",
- "sunw%cte%diskomizer%aio read wait",
- tnf_opaque, aiop, aiop,
- aio_tnf_str, *aiop, aiop);
- aiop->fd->total_read--;
- if (DAIO_RETURN(aiop->aio_res) == DAIO_CORRUPT) {
- if (!do_memcmp(start, aiop) &&
- handle_read_corrupt(aiop, start) == RETRY) {
- return (do_new_read(aiop, start, RETRY_READ));
- }
- } else if (DAIO_RETURN(aiop->aio_res) !=
- opts.disk_io_sizes.vals[aiop->iolen]) {
- if (handle_read_short(aiop, start) == RETRY) {
- return (do_new_read(aiop, start, RETRY_READ));
- }
- } else {
- aiop->fd->last_read_time = delta;
- if (aiop->retrycnt != 0) {
- pprintf("Read retry %d of block 0x%llx on %s o.k.\n",
- aiop->retrycnt, aio_str2byteoff(aiop),
- aiop->fd->name);
- }
- if (opts.obscure_execute && is_executable(aiop->buf)) {
- run_func(aiop->buf,
- opts.disk_io_sizes.vals[aiop->iolen] -
- SIZEOF_BUFHDR);
- }
- }
- if (blocks == NULL) {
- blocks = aio_attach(aiop);
- }
- blocks[AIO_BLOCK_INDEX(aiop)].read_count += 1;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- clear_writemap(aiop);
- update_aio_read_stats(aiop);
- aiop->count++;
- return_aio_read_buf(aiop);
- newfd(aiop);
- return (do_new_read(aiop, start, NORMAL_READ));
- }
- time_t
- handle_read_then_write(struct aio_str *aiop, ullong_t start)
- {
- hrtime_t delta = DAIO_GET_TIME_TAKEN(aiop->aio_res);
- struct timeval tv;
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- update_aio_read_stats(aiop);
- if (aiop->fd == NULL) {
- /* This is the first write so no read to check */
- aiop->count++;
- aiop->buf = get_write_buf();
- aiop->fd = aiop->dev->fdhead;
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- } else {
- aiop->fd->total_read--;
- if (DAIO_RETURN(aiop->aio_res) == DAIO_CORRUPT) {
- /* handle read error */
- if (!do_memcmp(start, aiop) &&
- handle_read_corrupt(aiop, start) == RETRY) {
- return (do_new_read(aiop, start, RETRY_READ));
- }
- } else if (DAIO_RETURN(aiop->aio_res) !=
- opts.disk_io_sizes.vals[aiop->iolen]) {
- /* handle read error */
- if (handle_read_short(aiop, start) == RETRY) {
- return (do_new_read(aiop, start, RETRY_READ));
- }
- } else {
- if (opts.obscure_execute && is_executable(aiop->buf)) {
- run_func(aiop->buf,
- opts.disk_io_sizes.vals[aiop->iolen] -
- SIZEOF_BUFHDR);
- }
- aiop->fd->last_read_time = delta;
- }
- return_read_buf(aiop->buf);
- /*
- * Need to return the disk block to the free list
- *
- * the use of clear_writemap_success() reflects the fact that
- * to get here the write to this block must have succeeded.
- */
- clear_writemap_success(aiop);
- aiop->buf = get_write_buf();
- (void) set_io_len(aiop);
- newfd(aiop);
- aiop->count++;
- }
- aiop->handler = handle_write_then_read;
- do_new_write(aiop, start, 0);
- return (tv.tv_sec);
- }
- time_t
- handle_write_then_read(struct aio_str *aiop, ullong_t start)
- {
- struct timeval tv;
- struct blks *block;
- struct blks *blocks;
- struct bufhdr hdr;
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- assert(aiop->buf == NULL || *aiop->buf == 0xAA || *aiop->buf == 0x55);
- aiop->fd->total_write--;
- if (DAIO_RETURN(aiop->aio_res) !=
- opts.disk_io_sizes.vals[aiop->iolen]) {
- union err_info err_info;
- err_info.str = "aiowrite";
- report_error(aiop, err_info, ERR_SYS);
- if (handle_write_error(aiop, start) == RETRY) {
- do_new_write(aiop, start, 1);
- } else {
- if (is_sequential(aiop) &&
- (aiop->off % opts.nprocs) == this_proc()) {
- push_unwritten(aiop);
- }
- do_new_write(aiop, start, 0);
- }
- return (tv.tv_sec);
- }
- update_aio_write_stats(aiop);
- aiop->count++;
- aiop->fd->last_write_time = DAIO_GET_TIME_TAKEN(aiop->aio_res);
- hdr = get_bufhdr(aiop->buf);
- blocks = aio_attach(aiop);
- block = &blocks[AIO_BLOCK_INDEX(aiop)];
- block->r.w.prev_io = block->r.w.last_io;
- block->r.w.prev_iolen = block->r.w.last_iolen;
- block->u.prev_requested = block->last_requested;
- block->last_requested = aiop->tv.tv_sec;
- block->last_returned_delta = tv.tv_sec - aiop->tv.tv_sec;
- block->r.w.last_io = aiop->buf;
- block->r.w.last_iolen = aiop->iolen;
- if (hdr.start == BUF_TYPE_A)
- block->ab = 1;
- else
- block->ab = 0;
- block->read_count = 0;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- return_write_buf(aiop->buf);
- aiop->buf = get_read_buf();
- aiop->handler = handle_read_then_write;
- return (do_new_read(aiop, start, WRITE_READ));
- }
- time_t
- handle_write(struct aio_str *aiop, ullong_t start)
- {
- struct timeval tv;
- union err_info err_info;
- struct bufhdr hdr;
- struct blks *block;
- struct blks *blocks;
- err_info.str = "aiowrite";
- if (aiop->buf != NULL) {
- if (*aiop->buf != 0xAA && *aiop->buf != 0x55) {
- char tmp = *aiop->buf;
- void *sig = expect_signal(SIGSEGV,
- "Buffer not mapped writable but was updated!",
- aiop->buf, sizeof (*aiop->buf));
- *aiop->buf = 0;
- *aiop->buf = tmp;
- cancel_expected_signal(SIGSEGV, sig);
- exit(1);
- }
- if (aiop->fd != NULL) {
- aiop->fd->total_write--;
- }
- } else {
- if (aiop->fd != NULL) {
- aiop->fd->total_write--;
- plog(LOG_WARNING, "buf == NULL, off %#llx (0t%lld)\n",
- (ullong_t)aiop->off, (ullong_t)aiop->off);
- }
- }
- TNF_PROBE_2(handle_write, "handle_write",
- "sunw%cte%diskomizer%aio write wait",
- tnf_opaque, aiop, aiop,
- aio_tnf_str, *aiop, aiop);
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- if (DAIO_RETURN(aiop->aio_res) !=
- opts.disk_io_sizes.vals[aiop->iolen]) {
- /* retry the write */
- if (aiop->fd != NULL) {
- report_error(aiop, err_info, ERR_SYS);
- if (handle_write_error(aiop, start) == RETRY) {
- do_new_write(aiop, start, 1);
- } else {
- if (is_sequential(aiop) &&
- (aiop->off % opts.nprocs) ==
- this_proc()) {
- push_unwritten(aiop);
- }
- do_new_write(aiop, start, 0);
- }
- return (tv.tv_sec);
- } else {
- aiop->fd = aiop->dev->fdhead;
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- do_new_write(aiop, start, 0);
- return (tv.tv_sec);
- }
- }
- aiop->fd->last_write_time = DAIO_GET_TIME_TAKEN(aiop->aio_res);
- update_aio_write_stats(aiop);
- aiop->count++;
- hdr = get_bufhdr(aiop->buf);
- if (aiop->retrycnt) {
- ullong_t block = aio_str2byteoff(aiop);
- pfprintf(stderr,
- "%s Block 0t%lld (%#llx) retry %d succeeded\n",
- aiop->fd->name, (ullong_t)block,
- (ullong_t)block, ++aiop->retrycnt);
- }
- blocks = aio_attach(aiop);
- block = &blocks[AIO_BLOCK_INDEX(aiop)];
- block->r.w.prev_io = block->r.w.last_io;
- block->r.w.prev_iolen = block->r.w.last_iolen;
- block->u.prev_requested = block->last_requested;
- block->last_requested = aiop->tv.tv_sec;
- block->last_returned_delta = tv.tv_sec - aiop->tv.tv_sec;
- block->r.w.last_io = aiop->buf;
- block->r.w.last_iolen = aiop->iolen;
- if (hdr.start == BUF_TYPE_A)
- block->ab = 1;
- else
- block->ab = 0;
- block->read_count = 0;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- clear_writemap_success(aiop);
- assert(aiop->buf != NULL);
- if (aiop->count %
- opts.expert_release_write_buffers_after_n_uses == 0) {
- return_write_buf(aiop->buf);
- aiop->buf = NULL;
- }
- newfd(aiop);
- do_new_write(aiop, start, 0);
- return (tv.tv_sec);
- }
- void
- init_all_aio(struct device *devices, struct aio_str *aio,
- int count)
- {
- int i, j;
- struct device *device;
- for (j = i = 0; i < count; i++) {
- for (device = devices; device != NULL; device = device->next) {
- if (i == 0)
- device->block = this_proc();
- aio[j].dev = device;
- aio[j].fd = NULL;
- aio[j].iolen = 0;
- add_to_aio_list(&device->stopped_ios, &aio[j]);
- while (my_gettimeofday(&device->state_ttl, NULL) == -1)
- pperror("gettimeofday");
- j++;
- }
- }
- }
- void
- cancel_all_io_byfd(struct fds *fd)
- {
- struct aioqtop not_cancelled;
- struct aioqtop cancelled;
- struct aio_str *io;
- struct device *devp;
- int count = 0;
- ZERO_OBJ(not_cancelled);
- ZERO_OBJ(cancelled);
- while ((io = pop_from_aio_list(&fd->all_aios)) != NULL) {
- devp = io->dev;
- if (daio->cancel(&io->aio_res) == -1) {
- io->count = errno;
- add_to_aio_list(¬_cancelled, io);
- } else {
- count++;
- add_to_aio_list(&cancelled, io);
- }
- }
- plog(LOG_WARNING, "%d io's cancelled to path %s\n", count,
- fd->name);
- while ((io = pop_from_aio_list(&cancelled)) != NULL) {
- DAIO_SET_ERROR(io->aio_res, ECANCELED);
- DAIO_SET_RETURN(io->aio_res, -1);
- add_to_aio_list(&devp->cancelled, io);
- cancelled_count++;
- }
- if (cancelled_count) {
- start_cancelled_io = do_start_cancelled_io;
- }
- fd->all_aios = not_cancelled;
- }
- void
- cancel_all_io(void)
- {
- int i = 0;
- int errors = 0;
- int total = 0;
- int reaped = 0;
- struct aio_str *io;
- struct aio_str *first_error_io = NULL;
- struct fds *fd;
- struct aioqtop not_cancelled;
- struct device *dev;
- /*
- * If daio is NULL, then no paths can be open so nothing more to do.
- */
- if (daio == NULL)
- return;
- time_now_log(LOG_NOTICE, gettext("cancelling all aios\n"));
- ZERO_OBJ(not_cancelled);
- for (dev = devices; dev != NULL; dev = dev->next) {
- for (fd = dev->fdhead; ; fd = fd->next) {
- while ((io = pop_from_aio_list(
- &fd->all_aios)) != NULL) {
- total++;
- if (daio->cancel(&io->aio_res) == -1) {
- io->count = errno;
- add_to_aio_list(¬_cancelled, io);
- } else {
- i++;
- }
- }
- if (fd->next == dev->fdhead)
- break;
- }
- }
- /*
- * Now reap all the remaining ios, popping them off the list of
- * ios that could not be cancelled.
- */
- while ((io = (aio_str_t *)daio->wait(NULL)) != (aio_str_t *)-1 ||
- errno != EINVAL) {
- if (io != (aio_str_t *)-1 && io != (aio_str_t *)0) {
- reaped++;
- remove_from_aio_list(¬_cancelled, io);
- }
- }
- /*
- * If the list contains more than one entry there was a problem,
- * probably in the internal logic of diskomizer.
- */
- while ((io = pop_from_aio_list(¬_cancelled)) != NULL) {
- errno = io->count;
- /*
- * If we were interupted the signal might have come in while
- * we were handling an io so we could have just one io
- * that is not in the aio system. So only report errors
- * if there are more then one. If there are more than one
- * report them all.
- *
- */
- if (errors++ > 0) {
- if (first_error_io != NULL) {
- AIOCANCEL_ERROR(first_error_io);
- first_error_io = NULL;
- }
- AIOCANCEL_ERROR(io);
- } else {
- first_error_io = io;
- }
- }
- for (dev = devices; dev != NULL; dev = dev->next) {
- close_and_free_paths(dev);
- }
- time_now_log(LOG_NOTICE,
- "%d/%d aios cancelled successfully, %d reaped\n",
- i, total, reaped);
- }
- pid_t
- master_pid()
- {
- return (parent_pid);
- }
- static int
- is_master()
- {
- return (parent_pid == getpid());
- }
- static void
- register_death(pid_t pid)
- {
- int i;
- for (i = 0; i < opts.nprocs; i++) {
- if (proc_store[i].pid != pid) {
- proc_store[i].pid = 0;
- break;
- }
- }
- }
- static int
- ischildless()
- {
- int i;
- if (proc_store != NULL) {
- for (i = 0; i < opts.nprocs; i++) {
- if (proc_store[i].pid != 0)
- return (0);
- }
- }
- return (1);
- }
- static int
- haskids()
- {
- return (!ischildless());
- }
- static void
- mourning(pid_t pid, int stat)
- {
- union {
- char dir[PATH_MAX];
- char buf[SIG2STR_MAX];
- } u;
- if (pid == -1)
- return;
- if (!WIFEXITED(stat) && !WIFSIGNALED(stat))
- return;
- register_death(pid);
- if (WIFSIGNALED(stat)) {
- char *x = strsignal(WTERMSIG(stat));
- if (sig2str(WTERMSIG(stat), u.buf) == -1) {
- (void) strcpy(u.buf, "(Unknown)");
- }
- plog(LOG_ERR,
- "Process %ld killed by signal %d %s,%s%s%s.\n",
- (ulong_t)pid, WTERMSIG(stat), u.buf,
- x == NULL ? "" : " ", x == NULL ? "" : x,
- WCOREDUMP(stat) ? " core dumped" : "");
- }
- if (pid) {
- (void) snprintf(u.dir, sizeof (u.dir), "%s/%ld",
- opts.workingdir, (ulong_t)pid);
- plog(LOG_DEBUG, "removing %s\n", u.dir);
- if (rmdir(u.dir) == -1)
- pperror("rmdir(%s)", u.dir);
- }
- }
- static int
- all_countdowns_zero(struct device *devp)
- {
- while (devp) {
- if (devp->countdown > 0) {
- return (0);
- }
- devp = devp->next;
- }
- return (1);
- }
- static int
- stoptime_reached(void)
- {
- return (stoptime > 0 && stoptime < gethrtime());
- }
- static void
- report_exit_reason(void)
- {
- if (opts.nloops && all_countdowns_zero(devices)) {
- time_now_log(LOG_NOTICE,
- "All devices have completed %ld loops; exiting",
- opts.nloops);
- } else if (stoptime_reached()) {
- time_now_log(LOG_NOTICE, "stop time reached; exiting");
- }
- }
- void
- cleanup(void)
- {
- (void) sigignore(SIGTERM);
- (void) sigignore(SIGINT);
- new_log_transaction(stderr);
- if (is_master()) {
- int stat;
- pid_t pid;
- struct device *dev;
- infantacide();
- while (haskids() &&
- (pid = waitpid((pid_t)-1, &stat, WNOHANG)) != -1 &&
- errno != ECHILD) {
- if (pid == 0) {
- sleep(1);
- infantacide();
- } else {
- mourning(pid, stat);
- }
- }
- save_data_bufs();
- for (dev = devices; dev != NULL; dev = dev->next) {
- close_and_free_paths(dev);
- }
- if (rmdir(opts.workingdir) == -1) {
- pperror("rmdir(%s)", opts.workingdir);
- }
- shm_ops->fini();
- } else {
- report_exit_reason();
- cancel_all_io();
- }
- time_now_log(LOG_NOTICE, "exiting");
- }
- /*
- * change_dir change into our own directory.
- */
- void
- change_dir()
- {
- char dir[PATH_MAX];
- (void) snprintf(dir, sizeof (dir),
- "%s/%ld", opts.workingdir, (ulong_t)getpid());
- if (mkdir(opts.workingdir, 0755) == -1 && errno != EEXIST)
- pperror("mkdir(%s, 0755)", opts.workingdir);
- if (mkdir(dir, 0755) == -1)
- pperror("mkdir(%s, 0755)", dir);
- if (chdir(dir) == -1) {
- pperror("chdir(%s)", dir);
- (void) snprintf(dir, sizeof (dir),
- "%s.%ld", diffs_str, (ulong_t)getpid());
- diffs = strdup(dir);
- if (diffs == NULL)
- diffs = diffs_str;
- } else {
- diffs = diffs_str;
- }
- }
- /*
- * aios_outstanding_or_on_hold:
- * return 1 if there are aios outstanding.
- * return 0 if there are none.
- */
- int
- aios_queued_to_fd(struct device *dev)
- {
- struct fds *fd;
- for (fd = dev->fdhead; ; fd = fd->next) {
- if (is_aio_on_list(&fd->all_aios))
- return (1);
- if (fd->next == dev->fdhead)
- break;
- }
- return (0);
- }
- struct device *
- first_to_restart(struct device *devices)
- {
- struct device *dev;
- struct device *first_to_start;
- struct timeval now_tv;
- do {
- while (my_gettimeofday(&now_tv, NULL) == -1)
- pperror("gettimeofday");
- for (first_to_start = devices;
- first_to_start != NULL && (
- is_aio_on_list(&first_to_start->stopped_ios) == 0 ||
- get_dev_state(first_to_start, &now_tv) ==
- DEV_NOT_READY);
- first_to_start = first_to_start->next) {
- /*LINTED*/
- }
- if (first_to_start == NULL)
- break;
- for (dev = first_to_start->next; dev != NULL;
- dev = dev->next) {
- if (get_dev_state(dev, &now_tv) == DEV_NOT_READY) {
- continue;
- }
- if (is_aio_on_list(&first_to_start->stopped_ios) !=
- 0 && dev->state_ttl.tv_sec != -1 &&
- (first_to_start->state_ttl.tv_sec == -1 ||
- timeval_lt(dev->state_ttl,
- first_to_start->state_ttl))) {
- first_to_start = dev;
- }
- }
- } while (first_to_start != NULL &&
- first_to_start->state_ttl.tv_sec == -1 && sleep(1) != 2);
- return (first_to_start);
- }
- struct aio_str *
- wait_to_restart(struct device *devices)
- {
- struct device *first_to_start;
- struct aio_str *aiop;
- struct timeval tv;
- first_to_start = first_to_restart(devices);
- if (first_to_start == NULL)
- return (NULL);
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- tv = timeval_timeval_sub(first_to_start->state_ttl, tv);
- if (tv.tv_sec) {
- char buf[128];
- (void) strftime(buf, 128, TIME_FORMAT,
- localtime(&first_to_start->state_ttl.tv_sec));
- if (tv.tv_sec > secs_till_exit()) {
- pprintf("All IO on hold until after our exit time.\n");
- exit(0);
- }
- pprintf("Sleeping for %ld seconds, until %s\n", tv.tv_sec, buf);
- (void) sleep(tv.tv_sec);
- check_exit_flag();
- }
- if (tv.tv_usec) {
- (void) usleep(tv.tv_usec);
- check_exit_flag();
- }
- aiop = pop_from_aio_list(&first_to_start->stopped_ios);
- if (aiop != NULL && aiop->fd != NULL)
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- return (aiop);
- }
- int
- aios_outstanding(struct device *devices)
- {
- struct device *dev;
- for (dev = devices; dev != NULL; dev = dev->next) {
- if (aios_queued_to_fd(dev)) {
- return (1);
- }
- }
- return (0);
- }
- #ifdef NOT_USED
- int
- aios_on_hold(struct device *devices)
- {
- struct device *dev;
- for (dev = devices; dev != NULL; dev = dev->next) {
- if (is_aio_on_list(&dev->stopped_ios)) {
- return (1);
- }
- }
- return (0);
- }
- #endif
- int
- aios_outstanding_or_on_hold(struct device *devices)
- {
- struct device *dev;
- for (dev = devices; dev != NULL; dev = dev->next) {
- if (is_aio_on_list(&dev->cancelled))
- return (1);
- if (is_aio_on_list(&dev->stopped_ios))
- return (1);
- if (aios_queued_to_fd(dev)) {
- return (1);
- }
- }
- return (0);
- }
- /*
- * Return true if diskomizer would exit before the time supplied is reached.
- */
- int
- would_stop_before(time_t secs)
- {
- time_t xit;
- if (secs == -1 || (xit = secs_till_exit()) < 0) {
- return (0);
- } else {
- struct timeval tv;
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- return (secs >= tv.tv_sec + xit);
- }
- }
- /*ARGSUSED*/
- static int
- has_cancelled(struct device *devices)
- {
- return (cancelled_count == 0 ? 0 : 1);
- #ifdef SLOW_BUT_SURE
- while (device != NULL) {
- if (is_aio_on_list(&device->cancelled)) {
- return (1);
- }
- device = device->next;
- }
- return (0);
- #endif
- }
- struct timeval
- get_timeout(struct device *devices, int report_time)
- {
- struct device *first_to_start;
- struct timeval tv;
- time_t secs_til_xit;
- if (has_cancelled(devices)) {
- tv.tv_sec = tv.tv_usec = 0;
- return (tv);
- }
- secs_til_xit = secs_till_exit();
- if (secs_til_xit < 0) {
- secs_til_xit = report_time;
- }
- if (!all_running()) {
- first_to_start = first_to_restart(devices);
- } else {
- first_to_start = NULL;
- }
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- if (first_to_start == NULL) {
- tv.tv_sec = MIN(secs_til_xit, report_time);
- tv.tv_usec = 0;
- return (tv);
- }
- tv = timeval_timeval_sub(first_to_start->state_ttl, tv);
- if (tv.tv_sec > report_time || tv.tv_sec > secs_til_xit) {
- tv.tv_sec = MIN(secs_til_xit, report_time);
- tv.tv_usec = 0;
- } else if (tv.tv_sec < 0) {
- tv.tv_sec = tv.tv_usec = 0;
- }
- return (tv);
- }
- void
- init_all_blk_str(struct device *dev, int proc_no)
- {
- int i;
- struct timeval now_tv;
- for (i = 0; dev != NULL; dev = dev->next, i++) {
- if ((i % opts.nprocs) == proc_no) {
- /* init_block_str(dev); */
- while (my_gettimeofday(&now_tv, NULL) == -1)
- pperror("gettimeofday");
- (void) set_dev_state(dev, DEV_NOT_READY, DEV_STOPPED,
- &now_tv);
- }
- }
- }
- /*
- * Start the first cancelled io for each device.
- */
- void
- do_start_cancelled_io(struct device *devices, ullong_t start)
- {
- struct device *devp;
- for (devp = devices; devp != NULL; devp = devp->next) {
- struct aio_str *aiop;
- check_exit_flag();
- aiop = pop_from_aio_list(&devp->cancelled);
- if (aiop != NULL) {
- cancelled_count--;
- aiop->handler(aiop, start);
- }
- }
- if (!has_cancelled(devices)) {
- start_cancelled_io =
- (void (*)(struct device *devices, ullong_t start))nop;
- }
- }
- static void
- report_times(void)
- {
- struct tms tms;
- if (times(&tms) != (clock_t)-1) {
- plog(LOG_NOTICE, "User time %d seconds\n",
- tms.tms_cutime/CLK_TCK);
- plog(LOG_NOTICE, "System time %d seconds\n",
- tms.tms_cstime/CLK_TCK);
- }
- }
- void
- do_aio(struct device *devices, ullong_t start, int report_time)
- {
- struct aio_str *aio_writes;
- int i;
- int rdflag = 0;
- pid_t pid;
- int ndevices = how_many_devices(devices);
- dev_state dev_state;
- if (opts.wthreads + opts.wrthreads == 0) {
- plog(LOG_ERR, "WTHREADS and WRTHREADS can not both be zero\n");
- exit(1);
- }
- aio_writes = my_calloc((opts.wthreads + opts.wrthreads) * ndevices,
- sizeof (struct aio_str));
- if (aio_writes == NULL) {
- pfprintf(stderr, "Can't allocate write structures\n");
- exit(1);
- }
- proc_store = my_calloc(opts.nprocs, sizeof (struct proc_store));
- if (proc_store == NULL) {
- pfprintf(stderr, "Can't allocate process store\n");
- exit(1);
- }
- if (opts.seconds_to_run > 0) {
- secs_till_exit = do_secs_till_exit;
- stoptime = gethrtime() + (opts.seconds_to_run * 1000 * MILLION);
- } else {
- secs_till_exit = inf_secs_till_exit;
- stoptime = -1;
- }
- (void) printf("\tPID = %ld\n", (ulong_t)getpid());
- (void) printf("\t%s\n", gettext(checker_string(daio->what_checker())));
- if (opts_fini() != 0) {
- exit(EXIT_FAILURE);
- }
- save_usage_tracking(usage_tracking_handle, opts.obscure_usage_file);
- send_usage_tracking(usage_tracking_handle);
- close_usage_tracking(usage_tracking_handle);
- init_read_bufs(devices);
- init_all_write_bufs(aio_writes, devices);
- shm_ops->complete(NULL);
- report_uadmin();
- if (opts.debug_no_action) {
- exit(EXIT_SUCCESS);
- }
- if (!is_readonly() && opts.expert_do_path_check &&
- check_for_duplicate_paths(devices) == 0) {
- exit(EXIT_FAILURE);
- }
- (void) sighold(SIGTERM);
- (void) sighold(SIGINT);
- proc_no = 0;
- NOTE(COMPETING_THREADS_NOW)
- for (i = 0; i < opts.nprocs; i++) {
- int forkcount = 0;
- (void) fflush(stdout);
- (void) fflush(stderr);
- do {
- pid = opts.use_fork1 == 0 ? fork() : fork1();
- if (pid == -1) {
- FORK_ERROR(opts.use_fork1 == 0 ?
- "" : "1");
- if (forkcount >= opts.max_fork_failure)
- break;
- forkcount++;
- (void) sleep(opts.fork_failure_wait_time);
- }
- } while (pid == -1);
- if (pid == -1)
- FORK_ERROR(opts.use_fork1 == 0 ? "" : "1");
- else if (pid == 0) {
- proc_no = i;
- break;
- }
- proc_store[i].pid = pid;
- plog(LOG_DEBUG, "fork%s %ld\n", opts.use_fork1 == 0 ? "" : "1",
- (ulong_t)pid);
- }
- if (pid != 0) { /* We are the parent */
- int status;
- (void) sigrelse(SIGINT);
- (void) sigrelse(SIGTERM);
- while ((pid = waitpid((pid_t)-1, &status, 0)) != -1 &&
- errno != ECHILD) {
- if (WIFEXITED(status) || WIFSIGNALED(status)) {
- mourning(pid, status);
- if (WEXITSTATUS(status) != 0 ||
- WIFSIGNALED(status)) {
- exit_status = EXIT_FAILURE;
- }
- }
- }
- report_times();
- exit(exit_status);
- }
- free(proc_store);
- daio->init((opts.wthreads + opts.rthreads + opts.wrthreads) * ndevices);
- (void) sigrelse(SIGINT);
- (void) sigrelse(SIGTERM);
- change_dir();
- /* init_all_blk_str(devices, proc_no); */
- if (usr1_exit)
- exit(0);
- init_all_aio(devices, aio_writes, opts.wthreads + opts.wrthreads);
- assert(devices->block == this_proc());
- new_log_transaction(stderr);
- /* This is the main loop for the diskomizer. */
- for (i = 0; aios_outstanding_or_on_hold(devices) != 0; i++) {
- struct aio_str *aiop;
- struct timeval timeout, now_tv;
- time_t tyme;
- int x;
- check_exit_flag();
- if (aios_outstanding(devices) != 0) {
- timeout = get_timeout(devices, report_time);
- new_log_transaction(stderr);
- aiop = (struct aio_str *)daio->wait(&timeout);
- x = errno;
- check_exit_flag();
- while (my_gettimeofday(&now_tv, NULL) == -1)
- pperror("gettimeofday");
- if ((long)aiop == -1) {
- errno = x;
- if (errno == EINVAL &&
- aios_outstanding_or_on_hold(
- devices) == 0) {
- AIOWAIT_ERROR(timeout);
- exit(1);
- }
- AIOWAIT_ERROR(timeout);
- continue;
- } else if ((long)aiop == 0) {
- /* the aiowait timed out */
- report_all_hangers(devices, report_time);
- restart_stopped_devices(start, devices,
- &now_tv);
- continue;
- }
- } else {
- /*
- * If all the io requests have been stopped then we call
- * wait_to_restart which will return the first io to
- * restart from all the devices and it will sleep until
- * that io is due to be queued. It will return NULL if the
- * next io to start would be started after the process should
- * have exited or if there are no stopped devices or if all
- * the ios have been deferred.
- */
- if ((aiop = wait_to_restart(devices)) != NULL) {
- while (my_gettimeofday(&now_tv, NULL) == -1)
- pperror("gettimeofday");
- dev_state = set_dev_state(aiop->dev,
- DEV_STOPPED, DEV_STARTING, &now_tv);
- assert(dev_state == DEV_STOPPED ||
- dev_state == DEV_STARTING);
- } else if ((aiop = get_deferred_io(devices)) == NULL) {
- exit(exit_status);
- }
- }
- if (aiop == NULL ||
- (opts.nloops != 0 && aiop->dev->countdown == 0) ||
- stoptime_reached()) {
- /* this stops the other processes being killed */
- usr1_exit++;
- if (aiop != NULL) {
- if (aiop->fd != NULL)
- remove_from_aio_list(
- &aiop->fd->all_aios, aiop);
- if (aiop->next != NULL)
- aiop->next->prev = aiop->prev;
- }
- continue;
- }
- /*
- * Controls.
- */
- dev_state = get_dev_state(aiop->dev, &now_tv);
- if (dev_state == DEV_RUNNING || dev_state == DEV_STARTING) {
- tyme = aiop->handler(aiop, start);
- } else {
- if (aiop->fd != NULL) {
- remove_from_aio_list(&aiop->fd->all_aios,
- aiop);
- }
- add_to_aio_list(&aiop->dev->stopped_ios, aiop);
- if (aios_queued_to_fd(aiop->dev) == 0) {
- (void) set_dev_state(aiop->dev,
- DEV_STOPPING, DEV_STOPPED, &now_tv);
- }
- }
- start_cancelled_io(devices, start);
- start_deferred(devices, start);
- report_hangers(aiop->dev, tyme, report_time);
- restart_stopped_devices(start, devices, &now_tv);
- if (aiop->off >= aiop->dev->read_start_block &&
- aiop->handler != handle_read &&
- i > (2*(opts.rthreads + opts.wthreads))) {
- if (opts.rthreads > aiop->dev->running_rthreads) {
- if (aiop->dev->running_rthreads == 0 &&
- rdflag == 0) {
- rdflag = 1;
- time_now_log(LOG_NOTICE,
- "Starting first %s reader %d",
- random_str, i);
- }
- if (init_read(aiop, start))
- aiop->dev->running_rthreads++;
- } else if (rdflag == 1) {
- time_now_log(LOG_NOTICE,
- "All %sreaders started %d",
- random_str, i);
- rdflag = 2;
- }
- }
- }
- exit(exit_status);
- }
- /*
- * select_error_func:
- * search the handlers array for an entry whose name matches the name
- * passed in. If the name passed in is NULL then default to using the
- * first handler in the list.
- */
- static int
- select_error_func(const char *name,
- struct error_handlers *handlers,
- int nhandlers,
- on_error_t *oef,
- int rw)
- {
- int i;
- struct error_handlers *h;
- if (name == NULL) {
- h = &handlers[0];
- } else for (i = 0; i < nhandlers; i++) {
- h = &handlers[i];
- if (((h->rw & rw) != 0) && strcasecmp(h->name, name) == 0) {
- break;
- } else {
- h = NULL;
- }
- }
- if (h != NULL) {
- *oef = h->func;
- if (h->setup() != 1) {
- fprintf(stderr, "Unable to init %s\n", h->name);
- }
- return (h->breaker == 0 ? 0 : 1);
- }
- return (-1);
- }
- on_error_t *
- setup_onerror(char *prog, const char *str, int rw)
- {
- char *tmp;
- char *opaque;
- char *toogo;
- char i;
- on_error_t *oef = NULL;
- if ((toogo = strdup(str)) == NULL) {
- (void) fprintf(stderr, "strdup(%s) failed: %s\n",
- str, strerror(errno));
- return (NULL);
- }
- for (i = 0, tmp = toogo; ; i++) {
- on_error_t *noef;
- int n;
- if ((tmp = strtok_r(tmp, ",", &opaque)) == NULL) {
- break;
- }
- noef = realloc(oef, (i+2) * sizeof (on_error_t));
- if (noef == NULL) {
- free(toogo);
- free(oef);
- return (NULL);
- }
- oef = noef;
- oef[i+1] = NULL;
- if ((n = select_error_func(tmp, on_error_table,
- ARRAY_LEN(on_error_table), &oef[i], rw)) != 0) {
- if (n == -1) {
- (void) fprintf(stderr,
- "bad on error option %s in %s\n",
- NIL(tmp), str);
- }
- break;
- }
- tmp = NULL;
- }
- free(toogo);
- if (oef == NULL) {
- if ((oef = malloc(sizeof (on_error_t))) != NULL) {
- oef[0] = NULL;
- }
- }
- return (oef);
- }
- int
- how_many_devices(struct device *devices)
- {
- int i = 0;
- while (devices != NULL) {
- i++;
- devices = devices->next;
- }
- return (i);
- }
- ullong_t
- set_file_size(const char *dir)
- {
- struct statvfs buf;
- if (opts.expert_amount_to_leave_unused && opts.number_of_files &&
- statvfs(dir, &buf) != -1) {
- ullong_t count;
- count = (buf.f_bavail * buf.f_frsize) -
- opts.expert_amount_to_leave_unused;
- count = (count/opts.number_of_files);
- return (count);
- }
- return (opts.file_size);
- }
- int
- set_number_of_files(const char *dir)
- {
- struct statvfs buf;
- if (opts.expert_amount_to_leave_unused && opts.file_size &&
- statvfs(dir, &buf) != -1) {
- ullong_t count;
- longlong_t n;
- int i;
- count = (ullong_t)buf.f_bavail * (ullong_t)buf.f_frsize;
- count -= opts.expert_amount_to_leave_unused;
- n = count / opts.file_size;
- i = (int)(MIN(n, INT_MAX));
- if (opts.number_of_files)
- return (MIN(i, opts.number_of_files));
- else
- return (i);
- }
- return (opts.number_of_files);
- }
- /*
- * read the path as if it is a symbolic link and process that.
- */
- static char *
- do_link(char *path)
- {
- char buf[PATH_MAX+1];
- char *res;
- int x;
- if ((x = readlink(path, &buf[0], sizeof (buf))) > 0) {
- buf[x] = NULL;
- res = full_path(path, &buf[0]);
- } else {
- res = my_strdup(path);
- }
- if (res == NULL) {
- exit(EXIT_FAILURE);
- }
- return (res);
- }
- static struct fds *
- open_path_count(struct device *devp, char *name, ullong_t size)
- {
- struct fds *fd;
- int i = 0;
- do {
- if ((fd = open_path(devp, name, size)) != NULL)
- break;
- } while (i++ < opts.open_retries);
- return (fd);
- }
- struct device *
- open_path_group(struct paths *paths, int paths_to_use, int error_paths)
- {
- int count;
- struct other_paths *opath;
- struct device *devp;
- struct fds *fd;
- int total_paths = paths_to_use + error_paths;
- if ((devp = (struct device *)my_calloc(1,
- sizeof (struct device))) == NULL) {
- return (NULL);
- }
- for (count = 0, opath = paths->op;
- count < total_paths && opath != NULL; /* */) {
- if ((fd = open_path_count(devp,
- opath->path, 0)) != NULL) {
- fd->error_path =
- count >= paths_to_use ? 1 : 0;
- fd->path_id = count++;
- }
- opath = opath->next;
- }
- return (devp);
- }
- /*
- * Open_devices
- *
- * ARGUMENT: char *name
- * A space seperated list of devices. Devices may be grouped by
- * putting curly brackets around them to sepficy multiple paths to
- * the same device.
- */
- struct device *
- open_devices(char *name)
- {
- struct device *devp;
- struct device *newone;
- int brace_count = 0;
- int error_paths = opts.error_paths;
- int paths_to_use = opts.paths_to_use;
- struct paths *path_group = NULL;
- struct other_paths *op;
- char *tmp;
- char *toogo;
- char *opaque;
- if ((toogo = strdup(name)) == NULL) {
- (void) fprintf(stderr, "strdup(%s) failed: %s\n", name,
- strerror(errno));
- exit(1);
- }
- tmp = toogo;
- devp = NULL;
- while ((tmp = strtok_r(tmp, "\t ", &opaque)) != NULL) {
- struct stat64 sbuf;
- if (usr1_exit)
- exit(0);
- if (*tmp == OPEN_BRACE) {
- if (brace_count++ == 0) {
- error_paths = paths_to_use = 0;
- }
- if (path_group == NULL) {
- path_group = my_calloc(
- sizeof (struct paths), 1);
- if (path_group == NULL) {
- exit(EXIT_FAILURE);
- }
- }
- } else if (*tmp == CLOSE_BRACE) {
- if (--brace_count == 0) {
- if (path_group->logicalpath == NULL) {
- plog(LOG_WARNING, gettext(
- "Empty path device list "
- "found"));
- free(path_group);
- path_group = NULL;
- continue;
- }
- newone = open_device(NULL,
- path_group, opts.file_size,
- paths_to_use, error_paths);
- if (newone != NULL) {
- newone->next = devp;
- devp = newone;
- }
- free_paths(path_group);
- path_group = NULL;
- error_paths = opts.error_paths;
- paths_to_use = opts.paths_to_use;
- }
- if (brace_count < 0)
- plog(LOG_WARNING,
- "Unbalanced braces in device list\n");
- } else if (*tmp == '-') {
- /* PATH options */
- plog(LOG_WARNING, "Path options are not currently "
- "supported: \"%s\" ignored\n", tmp);
- } else if (path_group != NULL) {
- if (path_group->op == NULL) {
- op = my_calloc(
- sizeof (struct other_paths), 1);
- if (op == NULL) {
- exit(EXIT_FAILURE);
- }
- path_group->op = op;
- path_group->logicalpath = my_strdup(tmp);
- } else {
- /* lint does not like empty loops */
- for (op = path_group->op; op->next != NULL; ) {
- op = op->next;
- }
- op->next = my_calloc(
- sizeof (struct other_paths), 1);
- if (op->next == NULL) {
- exit(EXIT_FAILURE);
- }
- op = op->next;
- }
- if (brace_count > 1) {
- if (path_stop_check == do_path_stop_check) {
- error_paths++;
- }
- } else {
- paths_to_use++;
- }
- op->path = do_link(tmp);
- } else if ((opts.number_of_files ||
- opts.expert_amount_to_leave_unused) &&
- daio->stat(tmp, &sbuf) != -1 && S_ISDIR(sbuf.st_mode)) {
- int len = strlen(tmp) +
- strlen(opts.obscure_data_file_basename) + 16;
- int i;
- char *x;
- int nf = set_number_of_files(tmp);
- ullong_t size = set_file_size(tmp);
- pprintf("%s %d files of %lld bytes\n", tmp, nf,
- (ullong_t)size);
- for (i = 0; i < nf; i++) {
- /*
- * If the open succeds then we just have to
- * "leak" this memory here as it is in use
- * in the device structures.
- */
- if ((x = malloc(len)) == NULL) {
- MALLOC_ERROR(len);
- exit(1);
- }
- (void) sprintf(x, "%s/%s%d", tmp,
- opts.obscure_data_file_basename, i);
- newone = open_device(x, NULL, size,
- paths_to_use, error_paths);
- if (newone != NULL) {
- newone->next = devp;
- devp = newone;
- } else {
- free(x);
- }
- }
- } else {
- newone = open_device(tmp, NULL, opts.file_size,
- paths_to_use, error_paths);
- if (newone != NULL) {
- newone->next = devp;
- devp = newone;
- }
- }
- tmp = NULL;
- }
- if (brace_count != 0) {
- plog(LOG_WARNING, "Unbalanced braces in device list\n");
- }
- /* don't free toogo as it is being used in the devices structures. */
- /* free(toogo); */
- if (usr1_exit) {
- exit(0);
- }
- if (devp != NULL) {
- init_device_control(devp);
- }
- return (devp);
- }
- void
- print_dev(struct device *dev)
- {
- struct fds *fds;
- static const char device_str[] = "device";
- (void) printf("Logical Device: %s\n", dev->logicalname);
- USAGE_TRACKING_OPEN_KEY(device_str, NULL, dev->logicalname);
- fds = dev->fdhead;
- (void) printf("Physical device%s:\n", fds->next == fds ? "" : "s");
- USAGE_TRACKING_OPEN_KEY("paths", NULL, NULL);
- for (;;) {
- USAGE_TRACKING_STORE_KEY_VALUE("longname", fds->longname);
- USAGE_TRACKING_STORE_KEY_VALUE("created", TRUE_OR_FALSE(
- fds->created));
- (void) printf("\t%s%s%s%s%s%s\n", fds->longname,
- fds->error_path || fds->created ? " (" : "",
- fds->error_path ? "error path" : "",
- fds->error_path && fds->created ? ", " : "",
- fds->created ? "created" : "",
- fds->error_path || fds->created ? ")" : "");
- if (fds->longname != fds->shortname) {
- (void) printf("\t\t(%s)\n", fds->shortname);
- }
- if (fds->next != dev->fdhead) {
- fds = fds->next;
- } else {
- break;
- }
- }
- USAGE_TRACKING_CLOSE_KEY();
- print_number_of_bytes(dev->device_block_size,
- "Device block size", "Device block size");
- print_number_of_bytes(dev->length, "length", "length");
- print_number(LEN_BYTES2BLOCKS(dev), "block", "blocks");
- (void) fflush(stdout);
- if (write_loops) {
- print_number(dev->countdown, "write", "writes");
- } else if (opts.nloops) {
- print_number(dev->countdown, "read", "reads");
- }
- USAGE_TRACKING_STORE_KEY_VALUE_INT("length", dev->length);
- USAGE_TRACKING_STORE_KEY_VALUE_INT("blocks", LEN_BYTES2BLOCKS(dev));
- USAGE_TRACKING_CLOSE_KEY();
- }
- /*
- * close all the fds and free the data associated with all the paths
- * for a device.
- */
- void
- close_and_free_paths(struct device *dev)
- {
- struct fds *fd, *next;
- for (fd = dev->fdhead, next = fd->next; ; fd = next, next = fd->next) {
- (void) daio->close(fd->fd);
- if (opts.expert_cleanup_created_files && fd->created &&
- is_master() && exit_status == EXIT_SUCCESS &&
- get_shared_device_error(dev->shared_data_handle) == 0) {
- pprintf(gettext("Removing %s\n"), fd->longname);
- if (daio->unlink(fd->longname) == -1) {
- pperror(gettext("unlink(%s)"), fd->longname);
- }
- }
- if (fd->shortname != fd->longname) {
- free(fd->longname);
- }
- free(fd->shortname);
- free(fd);
- if (dev->fdhead == next) {
- break;
- }
- }
- dev->fdhead = NULL;
- }
- struct fds *
- open_path(struct device *devp, char *name, ullong_t size)
- {
- struct fds *fd;
- struct dk_cinfo dk_cinfo;
- struct stat64 sbuf;
- char create;
- check_exit_flag();
- if (daio->stat(name, &sbuf) == -1) {
- if (size == 0) {
- pfprintf(stderr, "stat(%s) == -1 errno = %d (%s)\n",
- name, errno, strerror(errno));
- return (NULL);
- } else {
- create = 1;
- }
- } else {
- create = 0;
- }
- if ((fd = (struct fds *)calloc(1, sizeof (struct fds))) == NULL) {
- return (NULL);
- }
- if (opts.debug_no_action == 0) {
- if ((fd->fd = daio->open(name,
- (is_readonly() ? O_RDONLY : O_RDWR)|
- (opts.o_sync ? O_SYNC : 0)|
- (opts.o_excl ? O_EXCL : 0) |
- (opts.o_ndelay ? O_NDELAY : 0) |
- (create ? O_CREAT : 0) |
- (opts.o_trunc ? O_TRUNC : 0), 0600)) == -1) {
- pperror("open(%s, %s%s%s%s%s%s)",
- name, (is_readonly() ? "O_RDONLY" : "O_RDWR"),
- (opts.o_excl ? "|O_EXCL": ""),
- (opts.o_sync ? "|O_SYNC": ""),
- (opts.o_ndelay ? "|O_NDELAY": ""),
- (opts.o_trunc ? "|O_TRUNC": ""),
- (create ? "|O_CREAT, 0600" : ""));
- free(fd);
- return (NULL);
- }
- if (daio->directio(fd->fd, opts.directio == 1 ?
- DIRECTIO_ON : DIRECTIO_OFF) == -1) {
- if (errno != ENOTTY || opts.directio == 1) {
- pperror("directio(\"%s\") failed", name);
- }
- }
- fd->created = create;
- if (create) {
- if (daio->fstat(fd->fd, &sbuf) == -1) {
- FSTAT_ERROR(fd->fd, name);
- (void) daio->close(fd->fd);
- free(fd);
- return (NULL);
- }
- if (S_ISREG(sbuf.st_mode) &&
- daio->ftruncate(fd->fd, size) == -1) {
- (void) daio->close(fd->fd);
- free(fd);
- return (NULL);
- }
- }
- }
- if (sbuf.st_mode & (S_IFCHR|S_IFBLK)) {
- fd->devid.dev = sbuf.st_rdev;
- } else {
- fd->devid.dev = sbuf.st_dev;
- }
- fd->devid.ino = sbuf.st_ino;
- fd->read_times.str = read_str;
- fd->read_times.best = 0xffffffff;
- fd->write_times.str = write_str;
- fd->write_times.best = 0xffffffff;
- fd->last_read_time = fd->last_write_time = ~0;
- if ((fd->longname = strdup(name)) == NULL) {
- (void) daio->close(fd->fd);
- free(fd);
- return (NULL);
- }
- fd->stop_flag = 0;
- fd->shared_data_handle = init_shared_device_info(opts.nprocs);
- if (fd->shared_data_handle == NULL) {
- plog(LOG_ERR, gettext("Unable to allocate shared data "
- "handle for %s\n"), name);
- }
- if (daio->ioctl(fd->fd, DKIOCINFO, &dk_cinfo) == -1) {
- fd->shortname = fd->longname;
- } else {
- fd->shortname = calloc(1,
- strlen(dk_cinfo.dki_dname) + (3 * 10));
- if (fd->shortname == NULL) {
- fd->shortname = fd->longname;
- } else {
- (void) sprintf(fd->shortname, "%s%d:%c",
- dk_cinfo.dki_dname, dk_cinfo.dki_unit,
- dk_cinfo.dki_partition + 'a');
- }
- }
- if (opts.use_long_names) {
- fd->name = fd->longname;
- } else {
- fd->name = fd->shortname;
- }
- Longest_device_name = MAX(Longest_device_name, strlen(fd->name));
- if (devp->fdhead == NULL) {
- devp->fdhead = fd;
- fd->next = fd;
- } else {
- fd->next = devp->fdhead->next;
- devp->fdhead->next = fd;
- }
- return (fd);
- }
- void *
- read_vtoc_all_paths(struct fds *fdhead)
- {
- struct fds *fd;
- void *handle = NULL;
- fd = fdhead;
- do {
- if ((handle = daio->read_vtoc(fd->fd)) != NULL) {
- break;
- }
- fd = fd->next;
- } while (fd->next != fdhead); /* do loop! */
- return (handle);
- }
- struct paths *
- do_ap(const char *inpath)
- {
- return (daio->findap(inpath, opts.dev_tree));
- }
- /*
- * Set the minimum possible block size that can be used for all the devices
- * in this test set. Typically the block sizes seen are 512 bytes, 2048 bytes
- * or 4096 bytes. It will choose the smallest common multiple of the block
- * sizes available. Typically this will just be the largest block size of
- * all the devices but if you had a 3K and 4K block sized device this will
- * return the smallest block size possible is 12k.
- */
- static void
- set_minimum_block_size(int block_size)
- {
- if (min_block_size == 0) {
- min_block_size = block_size;
- } else {
- min_block_size = min_block_size * block_size /
- gcd(min_block_size, block_size);
- }
- }
- struct device *
- open_device(char *name, struct paths *paths, ullong_t size,
- int paths_to_use, int error_paths)
- {
- struct stat64 sbuf;
- ullong_t nsize;
- void *vtoc_handle;
- struct device *devp;
- struct fds *fd;
- int total_paths = paths_to_use + error_paths;
- if (paths != NULL) {
- name = my_strdup(paths->logicalpath);
- if (name == NULL) {
- return (NULL);
- }
- devp = open_path_group(paths, paths_to_use, error_paths);
- } else if (total_paths > 1 &&
- (paths = do_ap(name)) != NULL) {
- devp = open_path_group(paths, paths_to_use, error_paths);
- free_paths(paths);
- } else {
- if ((devp = (struct device *)calloc(1,
- sizeof (struct device))) == NULL) {
- return (NULL);
- }
- if ((fd = open_path_count(devp, name, size)) != NULL) {
- fd->path_id = 0;
- fd->error_path = 0;
- }
- }
- Longest_logical_name = MAX(Longest_logical_name, strlen(name));
- if (devp->fdhead == NULL) {
- free(devp);
- return (NULL);
- } else {
- devp->logicalname = name;
- }
- if (opts.debug_no_action) {
- return (devp);
- }
- /*
- * this is a mess.
- */
- if (daio->fstat(devp->fdhead->fd, &sbuf) == -1) {
- FSTAT_ERROR(devp->fdhead->fd, devp->fdhead->name);
- close_and_free_paths(devp);
- free(devp);
- return (NULL);
- }
- devp->next = NULL;
- devp->choose_block = seq_block;
- if (!(sbuf.st_mode & S_IFCHR)) {
- plog(LOG_DEBUG, "Not a character device\n");
- nsize = (ullong_t)sbuf.st_size;
- devp->device_block_size = SIZEOF_BUF;
- } else if ((vtoc_handle = read_vtoc_all_paths(devp->fdhead)) == NULL) {
- nsize = (ullong_t)SIZEOF_BUF*(ullong_t)sbuf.st_blocks;
- devp->device_block_size = SIZEOF_BUF;
- } else {
- const struct disko_partition *part;
- devp->device_block_size = disko_vtoc_sectorsz(vtoc_handle);
- if (devp->device_block_size == 0) {
- devp->device_block_size = DEFAULT_BLOCK_SIZE;
- }
- part = disko_vtoc_this_partition(vtoc_handle);
- devp->v_part = malloc(sizeof (struct disko_partition));
- if (devp->v_part != NULL) {
- *devp->v_part = *part;
- }
- nsize = (ullong_t)devp->device_block_size * part->p_size;
- disko_vtoc_free(vtoc_handle);
- }
- set_minimum_block_size(devp->device_block_size);
- if (size == 0 || (nsize > 0 && nsize < size)) {
- size = nsize;
- }
- if (size == 0) {
- (void) fprintf(stderr, gettext("File size is zero on %s\n"),
- name);
- (void) fflush(stderr);
- close_and_free_paths(devp);
- free(devp);
- return (NULL);
- }
- devp->length = size-(opts.start_offset *
- INDEX_TO_DIOLEN(max_disk_io_len));
- devp->read_start_block = opts.expert_recent_log_size +
- ((devp->length/INDEX_TO_DIOLEN(max_disk_io_len)) *
- opts.start_reads_percentage)/100;
- if (devp->read_start_block < (opts.nprocs * (opts.rthreads +
- opts.wthreads))) {
- devp->read_start_block = (opts.nprocs * (opts.rthreads +
- opts.wthreads));
- }
- if (devp->read_start_block > LEN_BYTES2BLOCKS(devp)) {
- devp->read_start_block = LEN_BYTES2BLOCKS(devp);
- }
- if (opts.nloops) {
- devp->countdown = (opts.nloops * devp->length) /
- (opts.nprocs * INDEX_TO_DIOLEN(max_disk_io_len));
- } else {
- devp->countdown = ~(uint64_t)0;
- }
- print_dev(devp);
- print_number_of_bytes(size, "size", "size");
- if (opts.start_offset * INDEX_TO_DIOLEN(max_disk_io_len) > size) {
- (void) printf("starting offset is greater than disk size! ");
- (void) printf("%llx > %llx\n", (ullong_t)(opts.start_offset *
- INDEX_TO_DIOLEN(max_disk_io_len)), (ullong_t)size);
- close_and_free_paths(devp);
- free(devp);
- return (NULL);
- }
- if (LEN_BYTES2BLOCKS(devp) < (opts.nprocs *
- (opts.wthreads + opts.rthreads + opts.wrthreads))) {
- (void) printf("There are not enough blocks (%#llx) to support "
- "this many I/O's (%#lx) on device %s, device closed\n",
- (ullong_t)LEN_BYTES2BLOCKS(devp),
- (ulong_t)(opts.nprocs * (opts.wthreads +
- opts.rthreads + opts.wrthreads)), name);
- close_and_free_paths(devp);
- free(devp);
- return (NULL);
- }
- if (opts.expert_max_active_time == 0) {
- devp->state_ttl.tv_sec = 0;
- devp->state_ttl.tv_usec = 0;
- } else {
- while (my_gettimeofday(&devp->state_ttl, NULL) == -1)
- pperror("gettimeofday");
- devp->state_ttl = set_ttl(devp->state_ttl,
- opts.expert_max_active_time,
- opts.expert_min_active_time);
- }
- devp->shared_data_handle = init_shared_device_info(opts.nprocs);
- if (devp->shared_data_handle == NULL) {
- plog(LOG_ERR, gettext("Unable to allocate shared data "
- "handle for %s\n"), name);
- }
- devp->seq_passes = opts.sequential_passes;
- devp->recent = init_recent(opts.expert_recent_log_size);
- return (devp);
- }
- static int
- check_for_duplicate_paths(struct device *devp)
- {
- uchar_t *buf;
- int buflen = min_block_size;
- struct device *d;
- struct fds *fd;
- int status = 1;
- if ((buf = malloc(buflen)) == NULL) {
- return (0);
- }
- memset(buf, NULL, buflen);
- /*
- * first zero all the target blocks
- */
- for (d = devp; d != NULL; d = d->next) {
- fd = d->fdhead;
- do {
- check_exit_flag();
- if (daio->pwrite(fd->fd, buf, buflen,
- INDEX_TO_DIOLEN(max_disk_io_len) *
- OPTION(start_offset), NULL) != buflen) {
- PWRITE_ERROR(fd->fd, fd->name,
- (ulong_t)buf,
- buflen,
- INDEX_TO_DIOLEN(
- max_disk_io_len) *
- OPTION(start_offset));
- status = 0;
- }
- fd = fd->next;
- } while (fd != d->fdhead);
- }
- /* Now write the dev structure to the first path only */
- for (d = devp; d != NULL; d = d->next) {
- check_exit_flag();
- fd = d->fdhead;
- (void) memcpy(&buf[0], d, sizeof (struct device));
- if (daio->pwrite(fd->fd, buf, buflen,
- INDEX_TO_DIOLEN(max_disk_io_len) *
- OPTION(start_offset), NULL) != buflen) {
- PWRITE_ERROR(fd->fd, fd->name, (ulong_t)buf,
- buflen, INDEX_TO_DIOLEN(max_disk_io_len) *
- OPTION(start_offset));
- status = 0;
- }
- }
- /*
- * Now read all the blocks via each path and verify that they
- * are ok.
- */
- for (d = devp; d != NULL; d = d->next) {
- fd = d->fdhead;
- do {
- check_exit_flag();
- memset(buf, NULL, buflen);
- if (daio->pread(fd->fd, buf, buflen,
- INDEX_TO_DIOLEN(max_disk_io_len) *
- OPTION(start_offset), NULL) != buflen) {
- PREAD_ERROR(fd->fd, fd->name,
- (ulong_t)buf, buflen,
- INDEX_TO_DIOLEN(max_disk_io_len) *
- OPTION(start_offset));
- status = 0;
- } else if (memcmp(buf, d, sizeof (struct device)) !=
- 0) {
- status = 0;
- pfprintf(stderr,
- "dev %s path %s failed path check\n",
- d->logicalname, fd->name);
- }
- fd = fd->next;
- } while (fd != d->fdhead);
- }
- free(buf);
- return (status);
- }
- void
- print_uname(FILE *out)
- {
- static char uname_str[] = "uname";
- struct utsname name;
- char platform[255], hw_prov[255], domain[255];
- (void) sysinfo(SI_PLATFORM, &platform[0], sizeof (platform));
- (void) sysinfo(SI_HW_PROVIDER, &hw_prov[0], sizeof (hw_prov));
- (void) sysinfo(SI_SRPC_DOMAIN, &domain[0], sizeof (domain));
- if (uname(&name) == -1) {
- pperror(uname_str);
- }
- (void) fprintf(out, "System info:\n\t%s %s %s %s %s %s %s\n",
- name.sysname, name.nodename,
- name.release, name.version, name.machine, platform,
- hw_prov);
- }
- void
- set_max_blocks(void)
- {
- int i;
- for (i = 0; i < opts.disk_io_sizes.wlen; i++) {
- if (opts.disk_io_sizes.vals[max_disk_io_len] <
- opts.disk_io_sizes.vals[opts.disk_io_sizes.weightings[i]]) {
- max_disk_io_len = opts.disk_io_sizes.weightings[i];
- }
- }
- }
- int
- check_block_sizes(void)
- {
- int i;
- int bs;
- int ret = 0;
- for (i = 0; i < opts.disk_io_sizes.wlen; i++) {
- bs = opts.disk_io_sizes.vals[opts.disk_io_sizes.weightings[i]];
- if ((bs % min_block_size) != 0) {
- plog(LOG_ERR, "Disk IO size 0x%x (%d) is not a "
- "multiple of the minimum block size, 0x%x (%d)\n",
- bs, bs, min_block_size, min_block_size);
- ret = 1;
- }
- }
- return (ret);
- }
- static void
- usr1(int sig, siginfo_t *info, void *v)
- {
- plog(LOG_DEBUG, "USR1 caught\n");
- usr1_exit++;
- }
- static int exit_flag;
- /*ARGSUSED*/
- static void
- set_exit_flag(int sig, siginfo_t *info, void *v)
- {
- plog(LOG_DEBUG, "Sig %d\n", sig);
- if (info == NULL) {
- /*
- * Keyboard generated SIGINT has no info pointer.
- */
- if (sig == SIGINT)
- killer_pid = master_pid();
- } else if (killer_pid == 0)
- killer_pid = info->si_pid;
- exit_flag++;
- }
- void
- check_exit_flag()
- {
- if (exit_flag) {
- (void) sighold(SIGTERM);
- exit(killer_pid == master_pid() ? exit_status : EXIT_FAILURE);
- }
- }
- static void
- print_startup_info(void)
- {
- (void) printf("Setting up to do:\n");
- print_number(LONG_BIT, "Bit mode", "Bit mode");
- print_number_of_bytes(min_block_size,
- "Common block size", "Common block size");
- (void) printf("\tRead %s mode\n", is_readonly() ? "only" : write_str);
- random_str = is_readonly() ? "random " : "";
- print_number(opts.wthreads, "write", "writes");
- print_number(opts.wrthreads, "Write - read", "Write - reads");
- print_number(opts.rthreads, "read", "reads");
- print_number_of_bytes(INDEX_TO_DIOLEN(max_disk_io_len),
- "Max block size", "Max block size");
- print_number(opts.nprocs, "proc", "procs");
- print_number(opts.nlocks, "lock", "locks");
- (void) printf("\t%d%% of disk written before reads start\n",
- opts.start_reads_percentage);
- if (!is_readonly() && opts.obscure_execute &&
- does_check(daio->what_checker())) {
- (void) printf("\tWill execute code read into buffer\n");
- }
- (void) printf("\tUsing %s as buffer allocator\n",
- shm_ops->longname(NULL));
- USAGE_TRACKING_STORE_KEY_VALUE("allocator", shm_ops->longname(NULL));
- #define UT_KVS(A) USAGE_TRACKING_STORE_KEY_VALUE_INT(#A, opts.A);
- UT_KVS(nprocs);
- UT_KVS(wthreads);
- UT_KVS(wrthreads);
- UT_KVS(rthreads);
- #undef UT_KVS
- }
- static void
- setup_signals(void)
- {
- setup_signal_catcher(SIGTERM, set_exit_flag, SA_SIGINFO);
- setup_signal_catcher(SIGHUP, set_exit_flag, SA_SIGINFO);
- setup_signal_catcher(SIGINT, set_exit_flag, SA_SIGINFO);
- setup_signal_catcher(SIGUSR1, usr1, 0);
- }
- int
- main(int argc, char **argv)
- {
- const char *path;
- srand48(getpid());
- path = set_diskomizer_path();
- if (do_args(argc, argv, pprintf, path) == 0) {
- usage(*argv);
- }
- /*
- * Usage tracking has to open after argument checking as we need
- * the values from the configuration files.
- */
- usage_tracking_handle = open_usage_tracking(
- opts.obscure_usagetracking_domain,
- opts.obscure_sendmail,
- opts.obscure_usage_email, /* from */
- "diskomizer", /* to */
- diskomizer_str, /* tool */
- VERSION);
- setup_signal((int (*)(void *, const char *, ...))pfprintf, stderr);
- set_limits();
- set_max_blocks();
- if (opts.STDOUT != NULL)
- if (freopen(opts.STDOUT, "a+", stdout) == NULL) {
- pperror("Unable to open %s for stdout\n",
- opts.STDOUT);
- exit(1);
- }
- if (opts.STDERR != NULL)
- if (freopen(opts.STDERR, "a+", stderr) == NULL) {
- pperror("Unable to open %s for stderr\n",
- opts.STDERR);
- exit(1);
- }
- popenlog("diskomizer");
- if (my_gettimeofday(&start_time, NULL) == -1) {
- plog(LOG_ERR, "Unable to get time of day\n");
- exit(EXIT_FAILURE);
- }
- set_serial_and_provider();
- if (opts.expert_write_cluster_length == 0)
- opts.expert_write_cluster_length = 1;
- if (opts.expert_read_cluster_length == 0)
- opts.expert_read_cluster_length = 1;
- if (opts.STDERR != NULL && opts.STDOUT != NULL && opts.background)
- background();
- (void) printf("\tCopyright %s Sun Microsystems, Inc."
- " All Rights Reserved\n\tUse is subject to license terms.\n\t"
- "Version %s\n", THIS_YEAR, VERSION);
- print_args(argc, argv, (void (*)(const char *, ...))printf);
- /* Check for values which mean we do nothing */
- if (opts.nprocs < 1 || opts.start_reads_percentage > 100) {
- exit(1);
- }
- if (opts.read_minimum > 0 &&
- opts.rthreads < opts.wthreads * opts.read_minimum) {
- (void) printf("WARNING: The ratio of readers to writers with "
- "read_minimum set to %d\ncould lead to thrashing "
- "or deadlock\n", opts.read_minimum);
- }
- /*
- * Set up all the functions to use.
- */
- /* First what to do on error. */
- if ((on_error_short = setup_onerror(*argv, opts.on_error_short,
- READ_ERR)) == NULL) {
- exit_status = EXIT_FAILURE;
- exit(exit_status);
- }
- if ((on_error_corrupt = setup_onerror(*argv, opts.on_error_corrupt,
- READ_ERR)) == NULL) {
- exit_status = EXIT_FAILURE;
- exit(exit_status);
- }
- if ((on_write_error = setup_onerror(*argv, opts.on_write_error,
- WRITE_ERR)) == NULL) {
- exit_status = EXIT_FAILURE;
- exit(exit_status);
- }
- /* Now the type of allocator to be used */
- if ((init_uchar_func = setup_write_buf_initializer()) == NULL ||
- (read_buffer_initializer = setup_read_buf_initializer()) == NULL) {
- usage(*argv);
- }
- /* Choose a shared memmory allocator */
- shm_ops = choose_shm_ops(opts.allocator);
- if (opts.device == NULL) {
- (void) prompt();
- }
- if (opts.device == NULL) {
- pfprintf(stderr, "No devices specified.\n");
- exit(1);
- }
- print_uname(stdout);
- USAGE_TRACKING_STORE_KEY_VALUE_INT("pid", getpid());
- print_bufhdr_offsets(stdout);
- print_serial_and_provider(stdout);
- if (opts.nlocks == 0)
- opts.nlocks = (opts.nprocs * 2) + 1; /* should be prime */
- pgrp = setpgrp();
- parent_pid = getpid();
- setup_signals();
- /* Now setup the locking primitives to use to protect the bit maps */
- init_locks();
- /* register a clean up routine. */
- (void) atexit(cleanup);
- /* get our daio */
- if ((daio = daio_choose_ops(opts.aio_routines)) == NULL) {
- char *reason = dlerror();
- (void) pfprintf(stderr,
- "Unable to load daio routines(%s): %s\n",
- opts.aio_routines,
- reason == NULL ? "Unknown" : reason);
- exit(1);
- }
- daio->init_master(opts.checker, INDEX_TO_DIOLEN(max_disk_io_len));
- if (opts.nloops && opts.rthreads == 0 && opts.wrthreads == 0) {
- write_loops = 1;
- }
- USAGE_TRACKING_OPEN_KEY("devices", NULL, NULL);
- /* now open the devices */
- if ((devices = open_devices(opts.device)) == NULL) {
- (void) pfprintf(stderr, "No devices opened\n");
- exit(1);
- }
- USAGE_TRACKING_CLOSE_KEY();
- findap_fini(); /* free up any data that was cached */
- if (check_block_sizes()) {
- exit(1);
- }
- print_startup_info();
- /* and go! */
- do_aio(devices, start_offset(), opts.report_time);
- /*NOTREACHED*/
- return (1);
- }
- long long
- convert_time(struct timeval tv)
- {
- long long tyme;
- long long mill = MILLION;
- tyme = (long long)tv.tv_sec;
- assert(tyme >= 0);
- tyme *= mill;
- assert(tyme >= 0);
- tyme += tv.tv_usec;
- assert(tyme >= 0);
- return (tyme);
- }
- int
- longest_logical_name(void)
- {
- return (Longest_logical_name);
- }
- int
- longest_device_name(void)
- {
- return (Longest_device_name);
- }
- void
- update_time_stats(char off, struct times *tp, hrtime_t hrtyme,
- struct aio_str *aiop)
- {
- if (hrtyme < 0) {
- pfprintf(stderr, "Warning time appears to go backwards\n");
- return;
- }
- if (hrtyme > tp->worst) {
- tp->worst = hrtyme;
- }
- if (hrtyme < tp->best) {
- tp->best = hrtyme;
- }
- tp->ave -= tp->last_few[tp->count % ARRAY_LEN(tp->last_few)];
- tp->last_few[tp->count++ % ARRAY_LEN(tp->last_few)] = hrtyme;
- tp->ave += hrtyme;
- if (opts.how_often_to_report &&
- (tp->count % opts.how_often_to_report) == 0) {
- plog(LOG_INFO, "%-*s (%-*s) %s times (%.*f,%.*f,%.*f) %3d%%\n",
- longest_logical_name(), aiop->dev->logicalname,
- longest_device_name(), aiop->fd->name,
- tp->str,
- opts.expert_decimal_places, (double)tp->best/ACCURACY,
- opts.expert_decimal_places,
- (double)(tp->ave/MIN(tp->count,
- ARRAY_LEN(tp->last_few)))/ACCURACY,
- opts.expert_decimal_places, (double)tp->worst/ACCURACY,
- off);
- }
- }