/usr/src/tools/diskomizer/diskomizer64mpism.c
C | 5417 lines | 4501 code | 528 blank | 388 comment | 1214 complexity | 83822d2969540770a7df01777bee7815 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception
Large files files are truncated, but you can click here to view the full file
- /*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
- #pragma ident "%Z%%M% %I% %E% SMI"
- /*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
- /*
- * diskomizer64mpism
- *
- * Write to and then read from disk partitions and or files.
- *
- * This is a test program.
- *
- * To do:
- *
- * The messages it prints out at the begining are a mess.
- * The code should be better commented.
- *
- * Chris.Gerhard@uk.sun.com - SMCC CTE
- */
- #include "args.h"
- #include "diskomizer64mpism.h"
- #include "bufs.h"
- #include "buf_init.h"
- #include <netdb.h>
- #include <sys/systeminfo.h>
- #include <tnf/probe.h>
- #include <sys/times.h>
- #include <diskomizer/log.h>
- #include "findap.h"
- #include "device_control.h"
- #include "timeval.h"
- #include "list_ops.h"
- #include "bits.h"
- #include "locks.h"
- #include "shm_ops.h"
- #include "signal_catch.h"
- #include "limit.h"
- #include "time.h"
- #include "prompt.h"
- #include "errors.h"
- #include "utils.h"
- #include "shared_device_info.h"
- #include "decode_errors.h"
- #include <sys/utsname.h>
- #include <sys/statvfs.h>
- #include <dlfcn.h>
- #include <diskomizer/uadmin.h>
- #include <usage_tracking/usage_tracking.h>
- #include "disko_usage_track.h"
- #ifdef __i386
- #include <note.h>
- #endif
- #define OPEN_BRACE '{'
- #define CLOSE_BRACE '}'
- #define DEFAULT_BLOCK_SIZE 0x200 /* 512 */
- static char diskomizer_str[] = "diskomizer";
- iolen_index_t max_disk_io_len;
- static char *diffs;
- static char diffs_str[] = "diffs";
- static char write_str[] = "write";
- static char read_str[] = "read";
- static const char nil[] = "nil";
- #define NIL(A) (A == NULL ? nil : A)
- static char *random_str;
- static hrtime_t stoptime;
- static time_t (*secs_till_exit)(void);
- static struct timeval start_time;
- void *usage_tracking_handle;
- enum read_type {
- NORMAL_READ,
- RETRY_READ,
- WRITE_READ,
- READ_ONLY_RAND,
- READ_ONLY_SEQ
- };
- typedef enum read_type read_type_t;
- typedef uchar_t (*initializer_t)(int buf, int i);
- static int exit_status = EXIT_SUCCESS;
- /*
- * The minimum block size that can be used. Essentially the lowest common
- * muliple of the blocksizes available.
- */
- static int min_block_size;
- struct proc_store {
- pid_t pid;
- };
- static struct proc_store *proc_store;
- /*
- * The daio_ops
- */
- static struct daio_ops *daio;
- /*
- * All the functions we have
- */
- time_t handle_read(struct aio_str *aiop, ullong_t start);
- time_t do_new_read(struct aio_str *aiop, ullong_t start, read_type_t read_type);
- time_t handle_readonly_rand(struct aio_str *aiop, ullong_t start);
- static int proc_no;
- long long convert_time(struct timeval tv);
- void update_time_stats(char off, struct times *tp, long long tyme,
- struct aio_str *aiop);
- static int pend_write_with_lock(bitmap_t map[], ullong_t off, int maplen);
- static int do_memcmp(ullong_t start, struct aio_str *aiop);
- static struct device *open_device(char *name, struct paths *, ullong_t size,
- int paths_to_use, int error_paths);
- static void check_matching_io(ullong_t start, struct aio_str *aiop);
- static int is_master(void);
- static void unwritten_block_rand(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen);
- static void unwritten_block_seq(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen);
- extern void close_and_free_paths(struct device *dev);
- extern void run_func(uchar_t *buf, size_t size);
- static struct bufhdr
- build_bufhr(struct device *dev, ullong_t start, ullong_t off);
- void newfd(struct aio_str *aiop);
- struct fds *open_path(struct device *devp, char *name, ullong_t size);
- void cancel_all_io_byfd(struct fds *fd);
- struct fds *find_path(struct fds *fdhead, char path_id);
- static int check_for_duplicate_paths(struct device *devp);
- static void do_start_cancelled_io(struct device *devices, ullong_t start);
- static int return_zero(void);
- static int return_one(void);
- /*
- * The error handling functions.
- */
- static loop_type on_error_reread(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_exit(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_stop(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_nop(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_abort(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_pause(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_retry(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_rewrite(ullong_t start, struct aio_str *aiop);
- static loop_type on_error_fail_path(ullong_t start, struct aio_str *aiop);
- /*
- * Error handling init functions.
- */
- static int init_path_stop_check(void);
- static int init_stop_check(void);
- /*
- * the "globals" that we use
- */
- struct device *devices; /* all the devices there are */
- write_buf_initializer_t init_uchar_func;
- static read_buf_initializer_t read_buffer_initializer;
- pid_t pgrp;
- /*
- * statics
- */
- static pid_t parent_pid;
- static pid_t killer_pid;
- static on_error_t *on_error_corrupt;
- static on_error_t *on_error_short;
- static on_error_t *on_write_error;
- static int Longest_device_name = 0;
- static int Longest_logical_name = 0;
- static int write_loops;
- static int usr1_exit = 0;
- struct shm_ops *shm_ops;
- /*
- * Count of the total number of io's that are currently cancelled.
- */
- static int cancelled_count = 0;
- /*
- * Start cancelled. Only gets unset from nop if there are cancelled ios
- * to restart. A rare thing.
- */
- static void (*start_cancelled_io)(struct device *, ullong_t) =
- (void (*)(struct device *, ullong_t)) nop;
- static void (*start_deferred)(struct device *dev, ullong_t) =
- (void (*)(struct device *, ullong_t)) nop;
- static int (*stop_check)(void *handle) = (int (*)(void *))return_zero;
- static int (*path_stop_check)(struct fds *fd, struct device *dev) =
- (int (*)(struct fds *, struct device *dev))return_zero;
- static char nom[] = "no memory";
- #define NOT_NULL(A) (A == NULL ? &nom[0] : A)
- #define PLURAL(A) (A == 1 ? "" : "s")
- #define LEN_BYTES2BLOCKS(A) (A->length / INDEX_TO_DIOLEN(max_disk_io_len))
- #define TRUE_OR_FALSE(A) (A ? "true" : "false")
- struct error_handlers {
- char *name; /* String that describes this error handler */
- on_error_t func; /* error handleing function */
- int (*setup)(void); /* init routine for the error handler */
- uint_t breaker:1; /* Is this the last error handler on the list */
- uint_t rw:2; /* Does this hander apply to read or write or both */
- };
- #define READ_ERR 1
- #define WRITE_ERR (READ_ERR << 1)
- #define BOTH_ERR (READ_ERR | WRITE_ERR)
- struct error_handlers on_error_table[] = {
- {"EXIT", on_error_exit, return_one, 1, BOTH_ERR},
- {"ABORT", on_error_abort, return_one, 1, BOTH_ERR},
- {"CONTINUE", on_error_nop, return_one, 0, BOTH_ERR},
- {"NONE", on_error_nop, return_one, 0, BOTH_ERR},
- {"STOP", on_error_stop, init_stop_check, 1, BOTH_ERR},
- {"PAUSE", on_error_pause, return_one, 0, BOTH_ERR},
- {"RETRY", on_error_retry, return_one, 0, BOTH_ERR},
- {"FAIL_PATH", on_error_fail_path, init_path_stop_check, 0, BOTH_ERR},
- {"UADMIN", on_error_uadmin, uadmin_init, 1, BOTH_ERR},
- {"REREAD", on_error_reread, return_one, 0, READ_ERR},
- {"REWRITE", on_error_rewrite, return_one, 0, WRITE_ERR}
- };
- /*
- * TNF declarations.
- */
- /*
- * The DEFINE should not have explicit mentions of the daio_ZZZZ
- * elements, they should be opaque.
- */
- TNF_DEFINE_RECORD_5(aio_str_t, aio_tnf_str,
- tnf_opaque, buf, tnf_short, iolen, tnf_ulonglong, off,
- tnf_longlong, aio_res.result.daio_return,
- tnf_uint, aio_res.result.daio_errno)
- /*
- * locking functions.
- */
- static char *
- hostname(void)
- {
- static char hostname[MAXHOSTNAMELEN + 1];
- (void) sysinfo(SI_HOSTNAME, &hostname[0], MAXHOSTNAMELEN);
- return (&hostname[0]);
- }
- int
- this_proc(void)
- {
- return (proc_no);
- }
- static int
- return_one(void)
- {
- return (1);
- }
- static int
- return_zero(void)
- {
- return (0);
- }
- void
- nop(void)
- {
- }
- static void
- not_null_free(void *ptr)
- {
- if (ptr != NULL)
- free(ptr);
- }
- static time_t
- inf_secs_till_exit(void)
- {
- return (LONG_MAX);
- }
- static time_t
- do_secs_till_exit(void)
- {
- return ((stoptime - gethrtime()) / BILLION);
- }
- off64_t
- start_offset(void)
- {
- return ((off64_t)(opts.start_offset *
- (ullong_t)INDEX_TO_DIOLEN(max_disk_io_len)));
- }
- static void
- return_aio_read_buf(struct aio_str *aiop)
- {
- if (!(aiop->count % opts.expert_release_read_buffers_after_n_uses) &&
- aiop->buf != NULL) {
- return_read_buf(aiop->buf);
- aiop->buf = NULL;
- }
- }
- static int
- do_stop_check(void *handle)
- {
- return (get_shared_stop_flag(handle, this_proc()));
- }
- int
- is_readonly(void)
- {
- return (opts.o_rdonly == 1);
- }
- const char *
- rw_string(void)
- {
- return (is_readonly() ? read_str : write_str);
- }
- /*
- * background. disassociate from controlling tty make session leader
- * then fork. The parent exits and the child goes on in
- * the back ground.
- */
- static void
- background()
- {
- pid_t pid;
- (void) freopen("/dev/null", "+r", stdin);
- pid = opts.use_fork1 == 0 ? fork() : fork1();
- if (pid == 0) {
- if (setsid() == (pid_t)-1)
- pperror("setsid");
- return;
- }
- if (pid < 0) {
- FORK_ERROR(opts.use_fork1 == 0 ? "" : "s");
- exit(1);
- }
- exit(0);
- }
- static struct blks *
- aio_attach(struct aio_str *aiop)
- {
- int error_count = 0;
- struct blks *blocks;
- while ((blocks = shm_ops->attach(AIO_BLOCK_HANDLE(aiop))) ==
- NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(AIO_BLOCK_HANDLE(aiop));
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- return (blocks);
- }
- static void
- update_aio_time_stats(struct aio_str *aiop, struct times *ts)
- {
- if (aiop->count > 0) {
- ullong_t len = LEN_BYTES2BLOCKS(aiop->dev);
- update_time_stats((100 * MIN(aiop->dev->block, len))/
- ((aiop->dev->length/
- INDEX_TO_DIOLEN(max_disk_io_len))),
- ts,
- DAIO_GET_TIME_TAKEN(aiop->aio_res), aiop);
- }
- }
- static void
- update_aio_read_stats(struct aio_str *aiop)
- {
- update_aio_time_stats(aiop, &aiop->fd->read_times);
- }
- static void
- update_aio_write_stats(struct aio_str *aiop)
- {
- update_aio_time_stats(aiop, &aiop->fd->write_times);
- }
- ullong_t
- diskomizer_off2byteoff(ullong_t off)
- {
- return ((off + opts.start_offset) * INDEX_TO_DIOLEN(max_disk_io_len));
- }
- static ullong_t
- byteoff2diskomizer_off(ullong_t off)
- {
- return ((off/INDEX_TO_DIOLEN(max_disk_io_len)) - opts.start_offset);
- }
- /*
- * Sanity check.
- */
- #define ASSERT_OFFSET(X) \
- assert(byteoff2diskomizer_off(diskomizer_off2byteoff(X)) == X)
- static ullong_t
- aio_str2byteoff(struct aio_str *aiop)
- {
- return (diskomizer_off2byteoff(aiop->off));
- }
- static int64_t
- aio_str2lba(struct aio_str *aiop)
- {
- long long byteoff;
- int64_t lba;
- if (aiop->dev->v_part == NULL || aiop->dev->device_block_size == 0) {
- return (-1);
- }
- byteoff = aio_str2byteoff(aiop);
- lba = byteoff / (int64_t)aiop->dev->device_block_size;
- return (aiop->dev->v_part->p_start + lba);
- }
- static void
- plog_dd(int pri, struct aio_str *aiop)
- {
- if ((INDEX_TO_DIOLEN(max_disk_io_len) %
- INDEX_TO_DIOLEN(aiop->iolen)) == 0) {
- daio->plog_dd(pri, aiop->fd->fd, INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(max_disk_io_len),
- aio_str2byteoff(aiop));
- }
- }
- static void
- report_device(int pri, struct aio_str *aiop)
- {
- plog(pri, "Requested File %s (%s)\n",
- aiop->fd->name, aiop->dev->logicalname);
- }
- /*
- * report block.
- *
- * Report all the information about the block that was requested to be read
- */
- static void
- report_offset(int pri, struct aio_str *aiop)
- {
- long long byteoff;
- long long lba;
- byteoff = aio_str2byteoff(aiop);
- lba = aio_str2lba(aiop);
- if (lba >= 0) {
- plog(pri,
- "Requested File offset 0t%lld (0x%llx), block size "
- "0t%d (0x%x), LBA 0t%lld (0x%llx)\n",
- byteoff, byteoff, INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen), lba, lba);
- } else {
- plog(pri, "Requested File offset 0t%lld (0x%llx), block size "
- "0t%d (0x%x)\n", byteoff, byteoff,
- INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen));
- }
- }
- static void
- report_device_and_offset(int pri, struct aio_str *aiop)
- {
- report_device(pri, aiop);
- report_offset(pri, aiop);
- }
- /*
- * report_error. This is the generic error reporting routine.
- * It reports all errors to stderr, giving similar information
- * and advise as to other commands that can be tried.
- */
- void
- report_error(struct aio_str *aiop, const union err_info u, err_type error)
- {
- struct timeval now_tv;
- long long disk_block;
- int pri;
- while (my_gettimeofday(&now_tv, NULL) == -1)
- pperror("gettimeofday");
- disk_block = aio_str2byteoff(aiop);
- mutex->stderr_enter();
- if (error == ERR_HUNG) {
- pri = LOG_WARNING;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- report_device(pri, aiop);
- if (is_readonly()) {
- plog(pri, "%s has %ld out of %ld read%s\n",
- aiop->dev->logicalname,
- aiop->fd->number_of_hung_read,
- aiop->fd->total_read,
- aiop->fd->number_of_hung_read == 1 ? "" : "s");
- } else {
- plog(pri, "%s has %ld out of %ld read%s and %ld "
- "out of %ld write%s\n",
- aiop->dev->logicalname,
- aiop->fd->number_of_hung_read,
- aiop->fd->total_read,
- aiop->fd->number_of_hung_read == 1 ? "" : "s",
- aiop->fd->number_of_hung_write,
- aiop->fd->total_write,
- aiop->fd->number_of_hung_write == 1 ? "" : "s");
- }
- plog(pri, "waiting for more than %ld second%s\n",
- u.time, PLURAL(u.time));
- if (is_readonly()) {
- plog(pri, "Last read took %lld\n",
- aiop->fd->last_read_time/BILLION);
- } else {
- plog(pri, "Last read took %lld, last write took "
- "%lld seconds\n",
- aiop->fd->last_read_time/BILLION,
- aiop->fd->last_write_time/BILLION);
- }
- plog(pri,
- "oldest i/o is a %s waiting for %ld second%s\n",
- is_read_io(aiop) ? "read" : "write",
- now_tv.tv_sec - aiop->tv.tv_sec,
- ((now_tv.tv_sec - aiop->tv.tv_sec) > 1) ? "s" : "");
- } else if (error == ERR_CORRUPT) {
- time_t request_time;
- time_t return_time;
- int read_count;
- struct blks *blocks;
- struct fds *fd;
- pri = LOG_ERR;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- report_device_and_offset(pri, aiop);
- report_error_desc(pri, aiop, u.str);
- blocks = aio_attach(aiop);
- fd = find_path(aiop->dev->fdhead,
- blocks[AIO_BLOCK_INDEX(aiop)].path_id);
- assert(fd != NULL);
- read_count = blocks[AIO_BLOCK_INDEX(aiop)].read_count;
- request_time = blocks[AIO_BLOCK_INDEX(aiop)].last_requested;
- return_time = request_time + GET_LAST_RETURN(
- blocks[AIO_BLOCK_INDEX(aiop)].last_returned_delta);
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- time_log(pri, blocks[AIO_BLOCK_INDEX(aiop)].last_requested,
- "Last %s to the requested block submitted", rw_string());
- time_log(pri, return_time,
- "Last %s to the requested block returned", rw_string());
- plog(pri, "Last %s to the requested block used path: %s\n",
- rw_string(), fd->name);
- if (!is_readonly()) {
- plog(pri, "Requested block has been read %d times "
- "since last written\n", read_count);
- }
- decode_errors(pri, aiop, read_count);
- } else if (error == ERR_DEFERRED) {
- pri = LOG_WARNING;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- plog(pri, "%s to device %s deferred\n",
- u.str, aiop->dev->logicalname);
- } else if (DAIO_RETURN(aiop->aio_res) < 0) {
- char *datestr;
- pri = LOG_ERR;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- (void) plog(pri, "%s %s%s error, errno %d %s\n",
- aiop->fd->name, u.str,
- aiop->retrycnt > 0 ? " retry" : "",
- DAIO_ERROR(aiop->aio_res),
- strerror(DAIO_ERROR(aiop->aio_res)));
- datestr = alloc_time_str_fmt(aiop->tv.tv_sec, "%b %e %H:%M");
- if (datestr != NULL) {
- plog(pri, "Try \"egrep '^%s.*%s' "
- "/var/adm/messages\"\n",
- datestr, hostname());
- free(datestr);
- }
- } else {
- pri = LOG_WARNING;
- time_log(pri, now_tv.tv_sec, "Time now");
- time_log(pri, aiop->tv.tv_sec, "Requested io requested at");
- plog(pri, "%s short %s%s, Transferred %ld (%#lx)"
- " bytes, requested %d (%#x) bytes.\n",
- aiop->fd->name, u.str,
- aiop->retrycnt > 0 ? " retry" : "",
- (long)DAIO_RETURN(aiop->aio_res),
- (long)DAIO_RETURN(aiop->aio_res),
- INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen));
- }
- dlog(pri, "Block at byte offset 0t%lld (%#llx) block size %d (%#x)\n",
- disk_block, disk_block, INDEX_TO_DIOLEN(aiop->iolen),
- INDEX_TO_DIOLEN(aiop->iolen));
- plog_dd(pri, aiop);
- (void) fflush(stderr);
- (void) fsync(fileno(stderr));
- mutex->stderr_exit();
- }
- ulong_t
- my_lrand(void)
- {
- union {
- ulong_t l;
- uint32_t i[sizeof (ulong_t)/sizeof (uint32_t)];
- } u;
- int i;
- #ifdef __lint
- ZERO_OBJ(u);
- #endif
- for (i = 0; i < (sizeof (ulong_t)/sizeof (uint32_t)); i++)
- u.i[i] = (uint32_t)lrand48();
- return (u.l);
- }
- static void
- remove_from_all_aios(struct aio_str *aiop)
- {
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- }
- static void
- infantacide(void)
- {
- (void) killpg(pgrp, SIGTERM);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_exit(ullong_t start, struct aio_str *aiop)
- {
- union err_info err_info;
- err_info.str = "On error exit";
- DAIO_SET_RETURN(aiop->aio_res, 0);
- report_error(aiop, err_info, ERR_SYS);
- remove_from_all_aios(aiop);
- (void) sighold(SIGTERM);
- if (incr_shared_device_error(aiop->dev->shared_data_handle,
- aiop->dev->errors) != -1) {
- aiop->dev->errors = 0;
- }
- exit_status = EXIT_FAILURE;
- exit(exit_status);
- /*NOTREACHED*/
- return (BREAK);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_stop(ullong_t start, struct aio_str *aiop)
- {
- pfprintf(stderr, "%s Set On error stop\n", aiop->fd->name);
- if (set_shared_stop_flag(aiop->dev->shared_data_handle) == -1)
- aiop->dev->need_to_stop = 1;
- aiop->dev->stop_flag = 1;
- return (BREAK);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_nop(ullong_t start, struct aio_str *aiop)
- {
- if (is_write_io(aiop)) {
- struct blks *blocks;
- /*
- * Clear the last io as the retries never suceeded
- * so we don't read this block which is now in an
- * undefined state.
- */
- blocks = aio_attach(aiop);
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io = NULL;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- }
- pfprintf(stderr, "%s On error continue\n", aiop->fd->name);
- return (BREAK);
- }
- /*ARGSUSED*/
- static loop_type
- on_error_abort(ullong_t start, struct aio_str *aiop)
- {
- union err_info err_info;
- err_info.str = "On error abort";
- report_error(aiop, err_info, ERR_SYS);
- /* pfprintf(stderr, "On error abort\n"); */
- (void) sighold(SIGTERM);
- if (incr_shared_device_error(aiop->dev->shared_data_handle,
- aiop->dev->errors) != -1) {
- aiop->dev->errors = 0;
- }
- abort(); /* On error abort. This one is o.k. */
- return (BREAK);
- }
- /*
- * report_hangers_fd. counts the number of I/O requests that
- * have been waiting for more than hanger_time seconds and then
- * calls report_error() with the i/o that has been waiting the
- * longest and a count of the number of i/o requests that are
- * over time. It only calls report_error() when the number of i/o
- * requests or the oldest outstanding i/o change or if the last
- * report was more than hanger_time seconds ago and there are some
- * i/o hung.
- */
- static int
- report_hangers_fd(struct fds *fd, time_t tyme, time_t hanger_time)
- {
- int total_hung_read = 0;
- int total_hung_write = 0;
- #ifdef IO_COUNT_DEBUG
- int total_read = 0;
- int total_write = 0;
- #endif
- struct aio_str *aiop;
- union err_info err_info;
- if (fd->error_path != 0 || fd->stop_flag != 0) {
- return (0);
- }
- /* fd->total_read = fd->total_write = 0; */
- err_info.time = hanger_time;
- for (aiop = fd->all_aios.head; aiop != NULL; aiop = aiop->next) {
- if (tyme - aiop->tv.tv_sec > hanger_time) {
- if (is_read_io(aiop)) {
- total_hung_read++;
- } else {
- total_hung_write++;
- }
- } else {
- break;
- }
- }
- #ifdef IO_COUNT_DEBUG
- assert(total_read == fd->total_read);
- assert(total_write == fd->total_write);
- #endif
- if (fd->oldest_io == NULL) {
- fd->oldest_io = fd->all_aios.head;
- }
- if (total_hung_read != fd->number_of_hung_read ||
- total_hung_write != fd->number_of_hung_write ||
- (fd->all_aios.head != fd->oldest_io &&
- (total_hung_read || total_hung_write))) {
- fd->number_of_hung_read = total_hung_read;
- fd->number_of_hung_write = total_hung_write;
- report_error(fd->all_aios.head, err_info, ERR_HUNG);
- fd->last_report = tyme;
- fd->oldest_io = fd->all_aios.head;
- } else if (total_hung_read + total_hung_write > 0 &&
- fd->all_aios.head != NULL && fd->last_report + hanger_time < tyme) {
- report_error(fd->all_aios.head, err_info, ERR_HUNG);
- fd->last_report = tyme;
- }
- return (total_hung_read + total_hung_write);
- }
- /*
- * Search the list of i/o that are currently outstanding and report
- * on any that have been outstanding for more than hanger_time.
- * Also display howlong the oldest i/o has been Waiting for and when
- * it was submitted.
- */
- static void
- report_hangers(struct device *dev, time_t tyme, time_t hanger_time)
- {
- int total = 0;
- struct fds *fd;
- for (fd = dev->fdhead; ; fd = fd->next) {
- total += report_hangers_fd(fd, tyme, hanger_time);
- if (dev->fdhead == fd->next)
- break;
- }
- }
- static void
- report_all_hangers(struct device *dev, time_t hanger_time)
- {
- struct timeval tv;
- while (my_gettimeofday(&tv, NULL) == -1)
- pperror("gettimeofday");
- for (; dev != NULL; dev = dev->next) {
- report_hangers(dev, tv.tv_sec, hanger_time);
- }
- }
- char *
- my_strdup(const char *s)
- {
- char *x = strdup(s);
- if (x == NULL) {
- STRDUP_ERROR(s);
- }
- return (x);
- }
- void *
- my_calloc(long a, long b)
- {
- void *x;
- x = calloc(a, b);
- if (x == NULL) {
- CALLOC_ERROR(a, b);
- return (NULL);
- }
- return (x);
- }
- static void
- add_to_all_aios(struct aio_str *aiop)
- {
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- }
- static int
- init_read(struct aio_str aio[], ullong_t start)
- {
- struct aio_str *aiop;
- aiop = my_calloc(1, sizeof (struct aio_str));
- if (aiop == NULL) {
- pfprintf(stderr, "init_read, can't allocate memory\n");
- return (0);
- }
- aiop->buf = NULL;
- aiop->off = 0;
- aiop->handler = is_readonly() ? handle_readonly_rand : handle_read;
- aiop->dev = aio->dev;
- aiop->fd = aio->dev->fdhead;
- aio->dev->fdhead = aio->dev->fdhead->next;
- add_to_all_aios(aiop);
- (void) do_new_read(aiop, start,
- is_readonly() ? READ_ONLY_RAND : NORMAL_READ);
- return (1);
- }
- static int
- has_no_unwritten(struct aio_str *aiop)
- {
- return (aiop->dev->unwritten == NULL);
- }
- static void
- push_unwritten(struct aio_str *aiop)
- {
- struct offset_list *new;
- if ((new = calloc(1, sizeof (struct offset_list))) == NULL) {
- CALLOC_ERROR(1, sizeof (struct offset_list));
- aiop->dev->failed_to_push_unwritten = 1;
- } else {
- plog(LOG_DEBUG,
- "Block %#llx (0t%lld) %s pushed onto unwritten queue\n",
- aiop->off, aiop->off, aiop->dev->logicalname);
- new->offset = aiop->off;
- new->next = aiop->dev->unwritten;
- aiop->dev->unwritten = new;
- aiop->dev->choose_block = unwritten_block_seq;
- }
- }
- static int
- find_unwritten(bitmap_t *map, struct aio_str *aiop, int maplen)
- {
- struct offset_list *u, *p;
- int status = 0;
- p = NULL;
- for (u = aiop->dev->unwritten; u != NULL; u = u->next) {
- if (pend_write_with_lock(map, u->offset, maplen) == 0) {
- if (p == NULL)
- aiop->dev->unwritten = u->next;
- else
- p->next = u->next;
- aiop->off = u->offset;
- plog(LOG_DEBUG,
- "Block %#llx (0t%lld) %s locked and removed "
- "from unwritten queue\n",
- aiop->off, aiop->off, aiop->dev->logicalname);
- free(u);
- status = 1;
- break;
- }
- p = u;
- }
- return (status);
- }
- static int
- is_unwritten(struct aio_str *aiop)
- {
- struct offset_list *u;
- for (u = aiop->dev->unwritten; u != NULL; u = u->next) {
- if (u->offset == aiop->off)
- return (1);
- }
- return (0);
- }
- /*
- * randomish_block
- * return a random block to try to do io too or from. If we
- * are short of memory the block is less random to try and
- * decrease the number of attach/detach pairs that actually
- * result in system calls, and therefore reduce the number of
- * faults.
- *
- * In particular when short of memory the next io will tend to
- * be in the same block ob blks structures or the next block
- * for odd numbered processes and the previous block for even
- * numbered processes. The overall effect is still close to
- * random at the device, but individual processes thrash less.
- */
- ulong_t
- randomish_block(struct aio_str *aiop)
- {
- ulong_t t;
- if (shm_ops->is_short_of_mem()) {
- t = aiop->off + ((my_lrand() % (shm_ops->max_size() /
- sizeof (struct blks))) * this_proc() % 2 ? -1 : 1);
- } else {
- t = my_lrand();
- }
- return (t);
- }
- /*ARGSUSED2*/
- void
- rand_block(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- ulong_t t;
- ullong_t *next_io_blk_ptr;
- if (is_write_io(aiop)) {
- next_io_blk_ptr = &aiop->dev->next_write_blk;
- if (!(aiop->dev->next_write_blk %
- opts.expert_write_cluster_length)) {
- t = randomish_block(aiop);
- t = t - (t % opts.expert_write_cluster_length);
- } else {
- TNF_PROBE_1(cluster_write, "rand_block",
- "sunw%cte%diskomizer%blocks write cluster",
- tnf_ulonglong, next_read_blk,
- aiop->dev->next_read_blk);
- t = *next_io_blk_ptr;
- }
- } else {
- next_io_blk_ptr = &aiop->dev->next_read_blk;
- if (!(aiop->dev->next_read_blk %
- opts.expert_read_cluster_length)) {
- t = randomish_block(aiop);
- t = t - (t % opts.expert_read_cluster_length);
- } else {
- TNF_PROBE_1(cluster_read, "rand_block",
- "sunw%cte%diskomizer%blocks read cluster",
- tnf_ulonglong, next_read_blk,
- aiop->dev->next_read_blk);
- t = *next_io_blk_ptr;
- }
- }
- aiop->off = (t)%(len);
- aiop->off = find_next_free(map, aiop->off, len, maplen);
- if (next_io_blk_ptr != NULL)
- *next_io_blk_ptr = aiop->off + 1;
- assert(aiop->off < len);
- }
- static void
- unwritten_block(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen, choose_block_t chooser)
- {
- plog(LOG_DEBUG, "in unwritten_block %llx\n",
- aiop->dev->unwritten == NULL ?
- 0LL : aiop->dev->unwritten->offset);
- if (find_unwritten(map, aiop, maplen)) {
- struct blks *blocks;
- if (has_no_unwritten(aiop))
- aiop->dev->choose_block = chooser;
- blocks = aio_attach(aiop);
- blocks[AIO_BLOCK_INDEX(aiop)].u.was_unwritten = 1;
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- } else {
- chooser(map, aiop, start, len, maplen);
- }
- }
- /*
- * find the "next" block to read for this aio_str. Increment by
- * by the number of processes, so that when initializing the disk
- * each block only gets written once.
- */
- static void
- seq_block(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- int does_not_have_lock;
- assert((aiop->dev->block % opts.nprocs) == this_proc() ||
- aiop->dev->block == len);
- aiop->off = aiop->dev->block;
- if (aiop->dev->block >= len) {
- does_not_have_lock = 1;
- } else while ((does_not_have_lock =
- pend_write_with_lock(map, aiop->off, maplen)) != 0) {
- /*
- * Only push blocks that this process would have to write
- * onto the unwritten queue. This only effects the last
- * block on the device.
- */
- if ((aiop->off % opts.nprocs) == this_proc()) {
- push_unwritten(aiop);
- }
- aiop->dev->block += opts.nprocs;
- aiop->off = aiop->dev->block % len;
- if (aiop->dev->block >= len) {
- aiop->dev->block = len;
- break;
- }
- }
- if (does_not_have_lock != 0) {
- if (aiop->dev->seq_passes == 0 ||
- --aiop->dev->seq_passes == 0) {
- time_now_log(LOG_NOTICE,
- "Finished sequential %ss on %s count %d",
- is_readonly() ? read_str : write_str,
- aiop->dev->logicalname,
- aiop->count);
- if (has_no_unwritten(aiop)) {
- aiop->dev->choose_block = rand_block;
- } else {
- aiop->dev->choose_block = unwritten_block_rand;
- }
- } else {
- aiop->off = aiop->dev->block = this_proc();
- time_now_log(LOG_NOTICE,
- "Starting sequential series again on %s counts %d",
- aiop->dev->logicalname, aiop->count);
- }
- aiop->dev->choose_block(map, aiop, start, len, maplen);
- } else {
- assert(!does_not_have_lock);
- ASSERT_OFFSET(aiop->dev->block);
- aiop->dev->block += opts.nprocs;
- if (aiop->dev->block >= len) {
- aiop->dev->block = len;
- }
- }
- }
- static void
- unwritten_block_seq(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- unwritten_block(map, aiop, start, len, maplen, seq_block);
- }
- static void
- unwritten_block_rand(bitmap_t *map, struct aio_str *aiop,
- ullong_t start, ullong_t len, int maplen)
- {
- unwritten_block(map, aiop, start, len, maplen, rand_block);
- }
- /*
- * I leave the source as this _may_ be useful in the future.
- */
- #ifdef NOT_USED_CODE
- static char
- set_write(bitmap_t map[], ullong_t off, int maplen)
- {
- ulong_t tmp = GET_OFF(off) % maplen;
- char status;
- mutex->enter(tmp);
- if (map[tmp] & GET_BIT(off)) {
- /* we are already locked */
- status = 0;
- } else {
- map[tmp] |= GET_BIT(off);
- status = 1;
- }
- mutex->exit(tmp);
- TNF_PROBE_3(set_write, "set_write", "sunw%cte%diskomizer",
- tnf_opaque, off, off,
- tnf_opaque, map, map,
- tnf_char, status, status);
- return (status);
- }
- #endif
- void
- clear_write(bitmap_t map[], ullong_t off, ulong_t maplen)
- {
- ulong_t tmp = (GET_OFF(off) % maplen);
- ulong_t x;
- bitmap_t bit = ~(GET_BIT(off));
- mutex->enter(tmp);
- x = map[tmp];
- map[tmp] &= bit;
- assert(~bit != (ulong_t)0);
- if (x == map[tmp]) {
- plog(LOG_ALERT, "Ooops block %#llx (0t%lld) was not locked\n",
- diskomizer_off2byteoff(off), diskomizer_off2byteoff(off));
- TNF_PROBE_2(clear_write, "clear_write failed",
- "sunw%cte%diskomizer",
- tnf_opaque, off, off, tnf_opaque, map, map);
- } else {
- TNF_PROBE_2(clear_write, "clear_write ok",
- "sunw%cte%diskomizer",
- tnf_opaque, off, off,
- tnf_opaque, map, map);
- }
- mutex->exit(tmp);
- }
- #ifdef NOT_USED_CODE
- static void
- print_bitmap(bitmap_t map[], int maplen)
- {
- int i;
- for (i = 0; i < maplen; i++)
- pprintf("%#8.8X %#8.8X\n", i, map[i]);
- (void) fflush(stdout);
- }
- #endif
- /*
- * find_next_free finds the "next" block that is not locked starting from
- * offset.
- */
- ullong_t
- find_next_free(bitmap_t map[], ullong_t off, int len, int maplen)
- {
- ulong_t tmp = (GET_OFF(off) % maplen);
- ulong_t i = 0;
- bitmap_t bit = GET_BIT(off);
- mutex->enter(tmp);
- while ((map[tmp] & bit) != 0) {
- ulong_t newtmp;
- off = off + 1;
- off %= len;
- newtmp = (GET_OFF(off) % maplen);
- bit = GET_BIT(off);
- mutex->getnext(tmp, newtmp);
- tmp = newtmp;
- TNF_PROBE_2(find_next_free_trying, "find_next_free trying",
- "sunw%cte%diskomizer", tnf_longlong, off, off,
- tnf_opaque, map, map);
- if (!(i < (4 * len))) {
- (void) plog(LOG_ALERT,
- "Unable to find free entry in map %#lx"
- " of length %d\n",
- (ulong_t)&map[0], maplen);
- (void) fflush(stderr);
- /* print_bitmap(map, maplen); */
- mutex->exit(tmp);
- (void) sleep(1);
- mutex->enter(tmp);
- i = 0;
- }
- i++;
- }
- map[tmp] |= GET_BIT(off);
- mutex->exit(tmp);
- TNF_PROBE_2(find_next_free_found, "find_next_free found",
- "sunw%cte%diskomizer", tnf_longlong, off, off,
- tnf_opaque, map, map);
- return (off);
- }
- /*
- * Test to see if the write bit is set for this offset. The lock MUST
- * already be held
- */
- static int
- test_write(bitmap_t map[], ullong_t off, int maplen)
- {
- ulong_t tmp = GET_OFF(off) % maplen;
- return (map[tmp] & GET_BIT(off) ? 1 : 0);
- }
- /*
- * If this block is being read from or written to return true
- * Otherwise return lock it and return.
- */
- static int
- pend_write_with_lock(bitmap_t map[], ullong_t off, int maplen)
- {
- ulong_t tmp = GET_OFF(off) % maplen;
- int status;
- mutex->enter(tmp);
- plog(LOG_DEBUG, "Disk Block %lld\n", diskomizer_off2byteoff(off));
- if (map[tmp] & GET_BIT(off)) {
- status = 1;
- } else {
- map[tmp] |= GET_BIT(off);
- status = 0;
- }
- mutex->exit(tmp);
- return (status);
- }
- static uchar_t
- choose_iolen(struct aio_str *aiop)
- {
- if (is_executable(aiop->buf)) {
- return (max_disk_io_len);
- }
- return (opts.disk_io_sizes.weightings[lrand48() %
- opts.disk_io_sizes.wlen]);
- }
- static void
- init_read_buf(uchar_t *buf, ulong_t len, const uchar_t * const write_buf)
- {
- void *sig = expect_signal(SIGBUS, "memset", buf, len);
- read_buffer_initializer(buf, len, write_buf);
- cancel_expected_signal(SIGBUS, sig);
- }
- static bitmap_t *
- attach_dev_writemap(struct device *dev)
- {
- bitmap_t *map;
- int error_count = 0;
- while ((map = (bitmap_t *)
- shm_ops->attach(dev->writemap_handle)) == NULL) {
- if ((error_count++ % 10000) == 0)
- ATTACH_ERROR(dev->writemap_handle);
- }
- if (error_count > 0)
- plog(LOG_WARNING, "attached o.k.\n");
- return (map);
- }
- static bitmap_t *
- attach_aio_writemap(struct aio_str *aiop)
- {
- return (attach_dev_writemap(aiop->dev));
- }
- static void
- clear_writemap(struct aio_str *aiop)
- {
- bitmap_t *map = attach_aio_writemap(aiop);
- clear_write(map, aiop->off, aiop->dev->writemap_size);
- shm_ops->detach(aiop->dev->writemap_handle);
- }
- static void
- clear_writemap_success(struct aio_str *aiop)
- {
- aiop->off = push_recent(aiop->dev->recent, aiop->off);
- if (aiop->off != -1) {
- clear_writemap(aiop);
- }
- }
- static struct blks *
- choose_new_random_read(struct aio_str *aiop, ullong_t start, ullong_t len)
- {
- struct blks *blocks;
- bitmap_t *map;
- if ((aiop->off = pop_recent(aiop->dev->recent)) != -1) {
- return (aio_attach(aiop));
- }
- map = attach_aio_writemap(aiop);
- aiop->retrycnt = 0;
- for (;;) {
- rand_block(map, aiop, start, len,
- aiop->dev->writemap_size);
- blocks = aio_attach(aiop);
- if (is_readonly()) {
- if (0x1 & (uint_t)
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_io)
- break;
- } else {
- if (blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io != NULL)
- break;
- }
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- blocks = NULL;
- clear_write(map, aiop->off, aiop->dev->writemap_size);
- }
- shm_ops->detach(aiop->dev->writemap_handle);
- return (blocks);
- }
- /*
- * do a new read.
- */
- time_t
- do_new_read(struct aio_str *aiop, ullong_t start, read_type_t read_type)
- {
- ullong_t offset;
- int fd = aiop->fd->fd;
- ullong_t len;
- struct blks *blocks = NULL;
- /*
- * if opts.sequential_passes is equal to seq_passes then we are on the
- * first pass or opts.sequential_passes was zero to start with. In
- * the second case once the disk is fill aip->dev->block will contain
- * the address of the last block anyway.
- */
- if (aiop->dev->seq_passes == opts.sequential_passes) {
- len = aiop->dev->block;
- } else {
- len = LEN_BYTES2BLOCKS(aiop->dev);
- }
- if (read_type != RETRY_READ &&
- OPTION(nloops) != 0 && aiop->dev->countdown != 0) {
- if (--aiop->dev->countdown == 0) {
- time_now_log(LOG_INFO, "countdown on device %s is zero",
- aiop->dev->logicalname);
- }
- }
- if (read_type == NORMAL_READ) {
- struct shadow_hdr const *shadow;
- blocks = choose_new_random_read(aiop, start, len);
- if (aiop->buf == NULL)
- aiop->buf = get_read_buf();
- aiop->iolen = blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_iolen;
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- aiop->daio_id.buf = blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io;
- aiop->hdr = build_bufhr(aiop->dev, start, aiop->off);
- shadow = get_shadow_hdr(aiop->daio_id.buf);
- aiop->daio_id.chksum = shadow->chksums[aiop->iolen];
- aiop->daio_id.buf_id = get_write_buf_id(
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io);
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- } else if (read_type == RETRY_READ) {
- aiop->retrycnt++;
- } else if (read_type == WRITE_READ) {
- struct shadow_hdr const *shadow;
- blocks = aio_attach(aiop);
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- aiop->daio_id.buf = blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io;
- aiop->daio_id.buf_id = get_write_buf_id(
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io);
- shadow = get_shadow_hdr(aiop->daio_id.buf);
- aiop->daio_id.chksum = shadow->chksums[aiop->iolen];
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- } else if (read_type == READ_ONLY_RAND) {
- if (aiop->buf == NULL)
- aiop->buf = get_read_buf();
- blocks = choose_new_random_read(aiop, start, len);
- aiop->daio_id.buf =
- BIT2CHARSTAR(blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_io);
- aiop->daio_id.chksum =
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_chksum;
- aiop->iolen = max_disk_io_len;
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- } else if (read_type == READ_ONLY_SEQ) {
- bitmap_t *map;
- if (aiop->buf == NULL)
- aiop->buf = get_read_buf();
- map = attach_aio_writemap(aiop);
- len = LEN_BYTES2BLOCKS(aiop->dev);
- aiop->dev->choose_block(map, aiop, start, len,
- aiop->dev->writemap_size);
- shm_ops->detach(aiop->dev->writemap_handle);
- blocks = aio_attach(aiop);
- aiop->daio_id.buf =
- BIT2CHARSTAR(blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_io);
- aiop->daio_id.chksum =
- blocks[AIO_BLOCK_INDEX(aiop)].r.o.last_chksum;
- aiop->iolen = max_disk_io_len;
- aiop->daio_id.bufs = INDEX_TO_DIOLEN(aiop->iolen);
- }
- offset = aio_str2byteoff(aiop);
- while (my_gettimeofday(&aiop->tv, NULL) == -1)
- pperror("gettimeofday");
- if (blocks == NULL)
- blocks = aio_attach(aiop);
- init_read_buf(aiop->buf, INDEX_TO_DIOLEN(max_disk_io_len),
- blocks[AIO_BLOCK_INDEX(aiop)].r.w.last_io);
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- for (;;) {
- if (aiop->dev->stop_flag ||
- stop_check(aiop->dev->shared_data_handle)) {
- if (aiop->dev->stop_flag == 0) {
- plog(LOG_NOTICE, "Stopping %s\n",
- aiop->dev->logicalname);
- aiop->dev->stop_flag = 1;
- } else if (aiop->dev->need_to_stop &&
- set_shared_stop_flag(
- aiop->dev->shared_data_handle) != -1) {
- aiop->dev->need_to_stop = 0;
- }
- clear_writemap(aiop);
- return_read_buf(aiop->buf);
- aiop->buf = NULL;
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- break;
- }
- ZERO_OBJ(aiop->error.desc);
- /* Move to the begining of the all_aios list */
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- TNF_PROBE_4(aioread, "aioread",
- "sunw%cte%diskomizer%aio read",
- tnf_long, fd, aiop->fd->fd,
- tnf_opaque, offset, offset,
- tnf_opaque, aiop, aiop,
- aio_tnf_str, *aiop, aiop);
- if (daio->aread(fd, aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen), offset,
- &aiop->aio_res, &aiop->daio_id) < 0) {
- if (errno == EAGAIN) {
- AIOREAD_ERROR(fd, aiop->fd->name,
- aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen), offset,
- SEEK_SET, &aiop->aio_res);
- continue;
- } else {
- AIOREAD_ERROR(fd, aiop->fd->name, aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen), offset,
- SEEK_SET, &aiop->aio_res);
- clear_writemap(aiop);
- }
- }
- aiop->fd->total_read++;
- break;
- }
- return (aiop->tv.tv_sec);
- }
- static struct shadow_hdr const *
- set_io_len(struct aio_str *aiop)
- {
- struct shadow_hdr const *shadow_hdr = get_shadow_hdr(aiop->buf);
- if (!shadow_hdr->type.BUF_READY) {
- struct shadow_hdr *shadow;
- int j;
- shadow = (struct shadow_hdr *)shadow_hdr;
- init_buf(aiop->buf);
- for (j = 0; j <= opts.disk_io_sizes.weightings[
- opts.disk_io_sizes.wlen - 1]; j++) {
- shadow->chksums[j] =
- check_bufbody(aiop->buf,
- INDEX_TO_DIOLEN(j));
- }
- shadow->type = get_bufhdr_a(aiop->buf).type;
- shadow->type.BUF_READY = 1;
- if (opts.obscure_execute && is_executable(aiop->buf)) {
- run_func(aiop->buf,
- opts.disk_io_sizes.vals[aiop->iolen] -
- SIZEOF_BUFHDR);
- }
- aiop->iolen = choose_iolen(aiop);
- } else if (shadow_hdr->type.BUF_READ_ONLY) {
- aiop->iolen = max_disk_io_len;
- } else {
- assert(*aiop->buf == 0xAA || *aiop->buf == 0x55);
- aiop->iolen = choose_iolen(aiop);
- }
- return (shadow_hdr);
- }
- static int
- is_sequential(struct aio_str *aiop)
- {
- return (aiop->dev->choose_block == seq_block ||
- aiop->dev->choose_block == unwritten_block_seq);
- }
- static struct aio_str *
- get_deferred_io(struct device *dev)
- {
- struct device *devp;
- struct aio_str *aiop;
- for (devp = dev; devp != NULL; devp = devp->next) {
- if ((aiop = pop_from_aio_list(&devp->deferred_ios)) != NULL) {
- return (aiop);
- }
- }
- return (NULL);
- }
- static void
- deferred_starter(struct device *dev, ullong_t start)
- {
- int all_going = 1;
- struct device *devp;
- for (devp = dev; devp != NULL; devp = devp->next) {
- struct aio_str *aiop;
- check_exit_flag();
- aiop = pop_from_aio_list(&devp->deferred_ios);
- if (aiop != NULL) {
- cancelled_count--;
- aiop->handler(aiop, start);
- }
- if (is_aio_on_list(&devp->deferred_ios)) {
- all_going = 0;
- }
- }
- if (all_going == 1) {
- start_deferred = (void (*)(struct device *, ullong_t)) nop;
- }
- }
- static int
- number_of_writes(struct device *dev)
- {
- struct fds *fd = dev->fdhead;
- int count = 0;
- do {
- count += fd->total_write;
- fd = fd->next;
- } while (fd != dev->fdhead);
- return (count);
- }
- /* static void */
- void
- do_new_write(struct aio_str *aiop, ullong_t start, int retry)
- {
- ullong_t len;
- ullong_t offset;
- long writemap_size;
- struct shadow_hdr const *shadow_hdr;
- struct blks *blocks;
- struct blks *block;
- char deferred;
- if (aiop->fd == NULL) {
- aiop->fd = aiop->dev->fdhead;
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- deferred = 1;
- } else {
- deferred = 0;
- }
- len = LEN_BYTES2BLOCKS(aiop->dev);
- writemap_size = aiop->dev->writemap_size;
- if (!retry) {
- bitmap_t *map;
- int i;
- if (write_loops) {
- if (--aiop->dev->countdown == 0) {
- time_now_log(LOG_INFO,
- "countdown on device %s is zero",
- aiop->dev->logicalname);
- }
- }
- if (aiop->buf == NULL) {
- aiop->buf = get_write_buf();
- }
- shadow_hdr = set_io_len(aiop);
- map = attach_aio_writemap(aiop);
- for (i = 0; /* cstyle */; i++) {
- aiop->dev->choose_block(map, aiop, start, len,
- writemap_size);
- blocks = aio_attach(aiop);
- block = &blocks[AIO_BLOCK_INDEX(aiop)];
- if (block->r.w.last_io == NULL ||
- (block->read_count >= OPTION(read_minimum) &&
- block->r.w.last_io != aiop->buf)) {
- break;
- } else {
- if (block->r.w.last_io == aiop->buf) {
- uchar_t *buf;
- /*
- * Get the new write buf first so that
- * You definitely get a new buffer.
- */
- if ((buf = get_write_buf()) != NULL) {
- return_write_buf(aiop->buf);
- aiop->buf = buf;
- shadow_hdr = set_io_len(aiop);
- break;
- }
- }
- clear_write(map, aiop->off, writemap_size);
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- block = blocks = NULL;
- if (i * OPTION(obscure_search_multiplier) >=
- len || deferred) {
- if (!deferred) {
- union err_info err_info;
- err_info.str = "write";
- report_error(aiop, err_info,
- ERR_DEFERRED);
- }
- remove_from_aio_list(
- &aiop->fd->all_aios, aiop);
- aiop->fd = NULL;
- add_to_aio_list(
- &aiop->dev->deferred_ios, aiop);
- return_write_buf(aiop->buf);
- aiop->buf = NULL;
- shm_ops->detach(
- aiop->dev->writemap_handle);
- if (number_of_writes(aiop->dev) == 0) {
- start_deferred =
- deferred_starter;
- }
- return;
- }
- }
- }
- aiop->retrycnt = 0;
- shm_ops->detach(aiop->dev->writemap_handle);
- } else {
- /* if we are retrying then we already have the lock. */
- ullong_t blockno = aio_str2byteoff(aiop);
- shadow_hdr = get_shadow_hdr(aiop->buf);
- if (retry == 1) {
- pfprintf(stderr,
- "%s Block 0t%lld (%#llx) retry count %d\n",
- aiop->fd->name, blockno, blockno,
- ++aiop->retrycnt);
- }
- blocks = aio_attach(aiop);
- block = &blocks[AIO_BLOCK_INDEX(aiop)];
- }
- offset = (ullong_t)start + (INDEX_TO_DIOLEN(max_disk_io_len)*aiop->off);
- assert((ullong_t)offset >= (ullong_t)start);
- assert((ullong_t)offset <= (ullong_t)(start + aiop->dev->length -
- INDEX_TO_DIOLEN(max_disk_io_len)));
- while (my_gettimeofday(&aiop->tv, NULL) == -1)
- pperror("gettimeofday");
- if (shadow_hdr->type.BUF_READ_ONLY == 0) {
- /*
- * Set up the buffer header and store away the path_id of the
- * path we are using, and the header checksum.
- */
- unprotect_buf(aiop->buf);
- toggle_bufhdr(aiop->buf);
- set_bufhdr_all(aiop->buf, shadow_hdr->chksums[aiop->iolen],
- INDEX_TO_DIOLEN(aiop->iolen),
- aiop->fd->devid, offset, shadow_hdr->type,
- ++block->sequence,
- aiop->tv.tv_sec);
- block->path_id = aiop->fd->path_id;
- block->hdrchksum =
- set_buf_hdrchksum(aiop->buf);
- protect_buf(aiop->buf);
- } else {
- block->hdrchksum = check_bufhdr(aiop->buf,
- get_bufhdr_hdrchksum(aiop->buf));
- }
- if (get_bufhdr_hdrchksum(aiop->buf) != block->hdrchksum) {
- pfprintf(stderr, "writing bad checksum buf %#lx\n",
- (ulong_t)aiop->buf);
- }
- if (opts.obscure_execute && is_executable(aiop->buf)) {
- plog(LOG_DEBUG, "Writing executable buffer\n");
- }
- /*
- * This if is saying that this process should have initialized
- * this block, during the sequential part of the run. So last_io
- * should be set. If not then something went wrong.
- *
- * The block could also been skipped as it was busy and put on
- * the unwritten list, so only check if the unwritten list is
- * empty.
- */
- if (!is_sequential(aiop) && block->r.w.last_io == NULL &&
- (aiop->off % opts.nprocs) == this_proc() &&
- aiop->dev->failed_to_push_unwritten == 0 &&
- blocks[AIO_BLOCK_INDEX(aiop)].u.was_unwritten == 1 &&
- !is_unwritten(aiop)) {
- ullong_t blockno = aio_str2byteoff(aiop);
- pfprintf(stderr, "Device %s\n", aiop->fd->name);
- pfprintf(stderr, "Device len %#llx\n", aiop->dev->length);
- pfprintf(stderr, "This proc %d nprocs %ld\n", this_proc(),
- opts.nprocs);
- pfprintf(stderr, "Block %#llx (0t%lld) byte off %llx error\n",
- aiop->off, aiop->off, blockno);
- pfprintf(stderr, "Last Requested %ld\n",
- block->last_requested);
- pfprintf(stderr, "Last return delta %d\n",
- block->last_returned_delta);
- pfprintf(stderr, "Read Count %d\n", block->read_count);
- pfprintf(stderr, "Last Io Len %d\n",
- INDEX_TO_DIOLEN(block->r.w.last_iolen));
- assert(block->r.w.last_io != NULL);
- }
- shm_ops->detach(AIO_BLOCK_HANDLE(aiop));
- for (;;) {
- if (aiop->dev->stop_flag ||
- stop_check(aiop->dev->shared_data_handle)) {
- if (aiop->dev->stop_flag == 0) {
- plog(LOG_NOTICE, "Stopping %s\n",
- aiop->dev->logicalname);
- aiop->dev->stop_flag = 1;
- } else if (aiop->dev->need_to_stop &&
- set_shared_stop_flag(
- aiop->dev->shared_data_handle) != -1) {
- aiop->dev->need_to_stop = 0;
- }
- clear_writemap(aiop);
- return_write_buf(aiop->buf);
- aiop->buf = NULL;
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- break;
- }
- assert(*aiop->buf == 0xAA || *aiop->buf == 0x55);
- ZERO_OBJ(aiop->error.desc);
- TNF_PROBE_4(daiowrite, "aiowrite",
- "sunw%cte%diskomizer%aio write",
- tnf_long, fd, aiop->fd->fd,
- tnf_opaque, offset, offset,
- tnf_opaque, aiop, aiop,
- aio_tnf_str, *aiop, aiop);
- aiop->daio_id.buf = aiop->buf;
- aiop->daio_id.buf_id = get_write_buf_id(aiop->buf);
- aiop->daio_id.hdr_len = sizeof (aiop->hdr);
- aiop->daio_id.hdr = (uchar_t *)&aiop->hdr;
- (void) memcpy(&aiop->hdr, aiop->buf, sizeof (aiop->hdr));
- aiop->daio_id.footer_len = 0;
- /* Move to the begining of the all_aios list */
- remove_from_aio_list(&aiop->fd->all_aios, aiop);
- add_to_aio_list(&aiop->fd->all_aios, aiop);
- if (daio->awrite(aiop->fd->fd, aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen),
- offset, &aiop->aio_res, &aiop->daio_id) == -1) {
- int serrno = errno;
- AIOWRITE_ERROR(aiop->fd->fd, aiop->fd->name,
- (ulong_t)aiop->buf,
- INDEX_TO_DIOLEN(aiop->iolen),
- offset,
- SEEK_SET,
- (ulong_t)&aiop->aio_res);
- if (serrno == EAGAIN) {
- continue;
- } else {
- clear_writemap(aiop);
- }
- } else if (!deferred) {
- aiop->fd->total_write++;
- if (is_aio_on_list(&aiop->dev->deferred_ios)) {
- aiop = pop_from_aio_list(
- &aiop->dev->deferred_ios);
- do_new_write(aiop, start, 0);
- }
- } else {
- aiop->fd->total_write++;
- plog(LOG_NOTICE, "Started deferred io to %s\n",
- aiop->dev->logicalname);
- }
- break;
- }
- }
- /*ARGSUSED1*/
- void
- run_func(uchar_t *buf, size_t size)
- {
- uchar_t *cptr;
- #ifdef SPARC
- uint32_t *last, *ptr;
- #else
- uint32_t *ptr;
- #endif
- void (*func)(void);
- cptr = get_buf_data(buf);
- #ifdef SPARC
- /* check alignment for SPARC */
- if ((ulong_t)cptr % 4) {
- return;
- }
- #endif
- /*LINTED*/
- ptr = (uint32_t *)cptr;
- func = (void (*)(void))(ptr);
- #ifdef SPARC
- last = ptr + size / sizeof (uint32_t);
- for (; ptr < last; ptr++)
- flush((int32_t *)ptr);
- #endif
- plog(LOG_DEBUG, "Running func %#lx in buf %#lx, type %llx\n",
- func, (ulong_t)buf, get_bufhdr(buf).start);
- TNF_PROBE_1(run_func, "run_func",
- "sunw%cte%diskomizer%aio execute run",
- tnf_opaque, buf, buf);
- func();
- }
- struct fds *
- find_path(struct fds *fdhead, char path_id)
- {
- struct fds *fd;
- for (fd = fdhead->next; ; fd = fd->next) {
- if (fd->path_id == path_id)
- return (fd);
- if (fd == fdhead)
- return (NULL);
- }
- }
- static struct bufhdr
- build_bufhr(struct device *dev, ullong_t start, ullong_t off)
- {
- struct bufhdr hdr;
- struct shadow_hdr const *shadow_hdr;
- struct blks *block;
- struct blks *blocks;
- struct fds *fd;
- ushort16_t hdrchksum;
- int error_count = 0;
- ullong_t offset = (ullong_t)start +
- (ullong_t)(INDEX_TO_DIOLEN(max_disk_io_len)*off);
- while ((blocks = shm_ops->attach(DEV_BLOCK_HANDLE(dev, off))) == NULL) {
- if (error_count++ % 10000 == 0)
- ATTACH_ERROR(DEV_BLOCK_HANDLE(dev, off)…
Large files files are truncated, but you can click here to view the full file