/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh

https://github.com/adilger/zfs · Korn Shell · 191 lines · 88 code · 33 blank · 70 comment · 4 complexity · 2748009fb326683e952745e703679dab MD5 · raw file

  1. #!/bin/ksh -p
  2. #
  3. # CDDL HEADER START
  4. #
  5. # This file and its contents are supplied under the terms of the
  6. # Common Development and Distribution License ("CDDL"), version 1.0.
  7. # You may only use this file in accordance with the terms of version
  8. # 1.0 of the CDDL.
  9. #
  10. # A full copy of the text of the CDDL should have accompanied this
  11. # source. A copy of the CDDL is also available via the Internet at
  12. # http://www.illumos.org/license/CDDL.
  13. #
  14. # CDDL HEADER END
  15. #
  16. #
  17. # Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  18. #
  19. . $STF_SUITE/include/libtest.shlib
  20. . $STF_SUITE/tests/functional/events/events_common.kshlib
  21. . $STF_SUITE/tests/functional/fault/fault.cfg
  22. #
  23. # DESCRIPTION:
  24. # Testing Fault Management Agent ZED Logic - Physically removed device is
  25. # made unavail and onlined when reattached
  26. #
  27. # STRATEGY:
  28. # 1. Create a pool
  29. # 2. Simulate physical removal of one device
  30. # 3. Verify the device is unvailable
  31. # 4. Reattach the device
  32. # 5. Verify the device is onlined
  33. # 6. Repeat the same tests with a spare device:
  34. # zed will use the spare to handle the removed data device
  35. # 7. Repeat the same tests again with a faulted spare device:
  36. # the removed data device should be unavailable
  37. #
  38. # NOTE: the use of 'block_device_wait' throughout the test helps avoid race
  39. # conditions caused by mixing creation/removal events from partitioning the
  40. # disk (zpool create) and events from physically removing it (remove_disk).
  41. #
  42. # NOTE: the test relies on 'zpool sync' to prompt the kmods to transition a
  43. # vdev to the unavailable state. The ZED does receive a removal notification
  44. # but only relies on it to activate a hot spare. Additional work is planned
  45. # to extend an existing ioctl interface to allow the ZED to transition the
  46. # vdev in to a removed state.
  47. #
  48. verify_runnable "both"
  49. if is_linux; then
  50. # Add one 512b scsi_debug device (4Kn would generate IO errors)
  51. # NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
  52. # add 32m of fudge
  53. load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) 1 1 1 '512b'
  54. else
  55. log_unsupported "scsi debug module unsupported"
  56. fi
  57. function cleanup
  58. {
  59. destroy_pool $TESTPOOL
  60. rm -f $filedev1
  61. rm -f $filedev2
  62. rm -f $filedev3
  63. rm -f $sparedev
  64. unload_scsi_debug
  65. }
  66. log_assert "ZED detects physically removed devices"
  67. log_onexit cleanup
  68. filedev1="$TEST_BASE_DIR/file-vdev-1"
  69. filedev2="$TEST_BASE_DIR/file-vdev-2"
  70. filedev3="$TEST_BASE_DIR/file-vdev-3"
  71. sparedev="$TEST_BASE_DIR/file-vdev-spare"
  72. removedev=$(get_debug_device)
  73. typeset poolconfs=(
  74. "mirror $filedev1 $removedev"
  75. "raidz3 $filedev1 $filedev2 $filedev3 $removedev"
  76. "mirror $filedev1 $filedev2 special mirror $filedev3 $removedev"
  77. )
  78. log_must truncate -s $SPA_MINDEVSIZE $filedev1
  79. log_must truncate -s $SPA_MINDEVSIZE $filedev2
  80. log_must truncate -s $SPA_MINDEVSIZE $filedev3
  81. log_must truncate -s $SPA_MINDEVSIZE $sparedev
  82. for conf in "${poolconfs[@]}"
  83. do
  84. # 1. Create a pool
  85. log_must zpool create -f $TESTPOOL $conf
  86. block_device_wait ${DEV_DSKDIR}/${removedev}
  87. mntpnt=$(get_prop mountpoint /$TESTPOOL) ||
  88. log_fail "get_prop mountpoint /$TESTPOOL"
  89. # 2. Simulate physical removal of one device
  90. remove_disk $removedev
  91. log_must mkfile 1m $mntpnt/file
  92. log_must zpool sync $TESTPOOL
  93. # 3. Verify the device is unvailable.
  94. log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
  95. # 4. Reattach the device
  96. insert_disk $removedev
  97. # 5. Verify the device is onlined
  98. log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
  99. # cleanup
  100. destroy_pool $TESTPOOL
  101. log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
  102. block_device_wait ${DEV_DSKDIR}/${removedev}
  103. done
  104. # 6. Repeat the same tests with a spare device: zed will use the spare to handle
  105. # the removed data device
  106. for conf in "${poolconfs[@]}"
  107. do
  108. # 1. Create a pool with a spare
  109. log_must zpool create -f $TESTPOOL $conf
  110. block_device_wait ${DEV_DSKDIR}/${removedev}
  111. log_must zpool add $TESTPOOL spare $sparedev
  112. mntpnt=$(get_prop mountpoint /$TESTPOOL) ||
  113. log_fail "get_prop mountpoint /$TESTPOOL"
  114. # 2. Simulate physical removal of one device
  115. remove_disk $removedev
  116. log_must mkfile 1m $mntpnt/file
  117. log_must zpool sync $TESTPOOL
  118. # 3. Verify the device is handled by the spare.
  119. log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
  120. log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
  121. # 4. Reattach the device
  122. insert_disk $removedev
  123. # 5. Verify the device is onlined
  124. log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
  125. # cleanup
  126. destroy_pool $TESTPOOL
  127. log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
  128. block_device_wait ${DEV_DSKDIR}/${removedev}
  129. done
  130. # 7. Repeat the same tests again with a faulted spare device: zed should offline
  131. # the removed data device if no spare is available
  132. for conf in "${poolconfs[@]}"
  133. do
  134. # 1. Create a pool with a spare
  135. log_must zpool create -f $TESTPOOL $conf
  136. block_device_wait ${DEV_DSKDIR}/${removedev}
  137. log_must zpool add $TESTPOOL spare $sparedev
  138. mntpnt=$(get_prop mountpoint /$TESTPOOL) ||
  139. log_fail "get_prop mountpoint /$TESTPOOL"
  140. # 2. Fault the spare device making it unavailable
  141. log_must zpool offline -f $TESTPOOL $sparedev
  142. log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED"
  143. # 3. Simulate physical removal of one device
  144. remove_disk $removedev
  145. log_must mkfile 1m $mntpnt/file
  146. log_must zpool sync $TESTPOOL
  147. # 4. Verify the device is unavailable
  148. log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
  149. # 5. Reattach the device
  150. insert_disk $removedev
  151. # 6. Verify the device is onlined
  152. log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
  153. # cleanup
  154. destroy_pool $TESTPOOL
  155. log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
  156. block_device_wait ${DEV_DSKDIR}/${removedev}
  157. done
  158. log_pass "ZED detects physically removed devices"