/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh

https://github.com/adilger/zfs · Korn Shell · 187 lines · 133 code · 12 blank · 42 comment · 4 complexity · 9f86b2087f6107a964bf361d3041c661 MD5 · raw file

  1. #!/bin/ksh -p
  2. #
  3. # CDDL HEADER START
  4. #
  5. # This file and its contents are supplied under the terms of the
  6. # Common Development and Distribution License ("CDDL"), version 1.0.
  7. # You may only use this file in accordance with the terms of version
  8. # 1.0 of the CDDL.
  9. #
  10. # A full copy of the text of the CDDL should have accompanied this
  11. # source. A copy of the CDDL is also available via the Internet at
  12. # http://www.illumos.org/license/CDDL.
  13. #
  14. # CDDL HEADER END
  15. #
  16. #
  17. # Copyright (c) 2019, Datto Inc. All rights reserved.
  18. #
  19. . $STF_SUITE/include/libtest.shlib
  20. . $STF_SUITE/tests/functional/replacement/replacement.cfg
  21. #
  22. # DESCRIPTION:
  23. # Testing resilver restart logic both with and without the deferred resilver
  24. # feature enabled, verifying that resilver is not restarted when it is
  25. # unnecessary.
  26. #
  27. # STRATEGY:
  28. # 1. Create a pool
  29. # 2. Create four filesystems with the primary cache disable to force reads
  30. # 3. Write four files simultaneously, one to each filesystem
  31. # 4. Do with and without deferred resilvers enabled
  32. # a. Replace a vdev with a spare & suspend resilver immediately
  33. # b. Verify resilver starts properly
  34. # c. Offline / online another vdev to introduce a new DTL range
  35. # d. Verify resilver restart restart or defer
  36. # e. Inject read errors on vdev that was offlined / onlned
  37. # f. Verify that resilver did not restart
  38. # g. Unsuspend resilver and wait for it to finish
  39. # h. Verify that there are two resilvers and nothing is deferred
  40. #
  41. function cleanup
  42. {
  43. log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME
  44. log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
  45. $ORIG_SCAN_SUSPEND_PROGRESS
  46. log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX
  47. log_must zinject -c all
  48. destroy_pool $TESTPOOL1
  49. rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
  50. }
  51. # count resilver events in zpool and number of deferred rsilvers on vdevs
  52. function verify_restarts # <msg> <cnt> <defer>
  53. {
  54. msg=$1
  55. cnt=$2
  56. defer=$3
  57. # check the number of resilver start in events log
  58. RESILVERS=$(zpool events | grep -c sysevent.fs.zfs.resilver_start)
  59. log_note "expected $cnt resilver start(s)$msg, found $RESILVERS"
  60. [[ "$RESILVERS" -ne "$cnt" ]] &&
  61. log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS"
  62. [[ -z "$defer" ]] && return
  63. # use zdb to find which vdevs have the resilver defer flag
  64. VDEV_DEFERS=$(zdb -C $TESTPOOL1 | awk '
  65. /children/ { gsub(/[^0-9]/, ""); child = $0 }
  66. /com\.datto:resilver_defer$/ { print child }
  67. ')
  68. if [[ "$defer" == "-" ]]
  69. then
  70. [[ -n $VDEV_DEFERS ]] &&
  71. log_fail "didn't expect any vdevs to have resilver deferred"
  72. return
  73. fi
  74. [[ $VDEV_DEFERS -eq $defer ]] ||
  75. log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS"
  76. }
  77. log_assert "Check for unnecessary resilver restarts"
  78. ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS)
  79. ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
  80. ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
  81. set -A RESTARTS -- '1' '2' '2' '2'
  82. set -A VDEVS -- '' '' '' ''
  83. set -A DEFER_RESTARTS -- '1' '1' '1' '2'
  84. set -A DEFER_VDEVS -- '-' '2' '2' '-'
  85. VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
  86. log_onexit cleanup
  87. # ensure that enough events will be saved
  88. log_must set_tunable32 ZEVENT_LEN_MAX 512
  89. log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
  90. log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL1 \
  91. raidz ${VDEV_FILES[@]}
  92. # create 4 filesystems
  93. for fs in fs{0..3}
  94. do
  95. log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL1/$fs
  96. done
  97. # simultaneously write 16M to each of them
  98. set -A DATAPATHS /$TESTPOOL1/fs{0..3}/dat.0
  99. log_note "Writing data files"
  100. for path in ${DATAPATHS[@]}
  101. do
  102. dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 &
  103. done
  104. wait
  105. # test without and with deferred resilve feature enabled
  106. for test in "without" "with"
  107. do
  108. log_note "Testing $test deferred resilvers"
  109. if [[ $test == "with" ]]
  110. then
  111. log_must zpool set feature@resilver_defer=enabled $TESTPOOL1
  112. RESTARTS=( "${DEFER_RESTARTS[@]}" )
  113. VDEVS=( "${DEFER_VDEVS[@]}" )
  114. VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}"
  115. fi
  116. # clear the events
  117. log_must zpool events -c
  118. # limit scanning time
  119. log_must set_tunable32 RESILVER_MIN_TIME_MS 50
  120. # initiate a resilver and suspend the scan as soon as possible
  121. log_must zpool replace $TESTPOOL1 $VDEV_REPLACE
  122. log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
  123. # there should only be 1 resilver start
  124. verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}"
  125. # offline then online a vdev to introduce a new DTL range after current
  126. # scan, which should restart (or defer) the resilver
  127. log_must zpool offline $TESTPOOL1 ${VDEV_FILES[2]}
  128. log_must zpool sync $TESTPOOL1
  129. log_must zpool online $TESTPOOL1 ${VDEV_FILES[2]}
  130. log_must zpool sync $TESTPOOL1
  131. # there should now be 2 resilver starts w/o defer, 1 with defer
  132. verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}"
  133. # inject read io errors on vdev and verify resilver does not restart
  134. log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL1
  135. log_must cat ${DATAPATHS[1]} > /dev/null
  136. log_must zinject -c all
  137. # there should still be 2 resilver starts w/o defer, 1 with defer
  138. verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}"
  139. # unsuspend resilver
  140. log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
  141. log_must set_tunable32 RESILVER_MIN_TIME_MS 3000
  142. # wait for resilver to finish
  143. log_must zpool wait -t resilver $TESTPOOL1
  144. log_must is_pool_resilvered $TESTPOOL1
  145. # wait for a few txg's to see if a resilver happens
  146. log_must zpool sync $TESTPOOL1
  147. log_must zpool sync $TESTPOOL1
  148. # there should now be 2 resilver starts
  149. verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"
  150. done
  151. log_pass "Resilver did not restart unnecessarily"