PageRenderTime 61ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/fs/nfs/blocklayout/dev.c

https://github.com/kvaneesh/linux
C | 548 lines | 448 code | 85 blank | 15 comment | 68 complexity | 60cb2090c81195b6cebd62204719dd5e MD5 | raw file
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2014-2016 Christoph Hellwig.
  4. */
  5. #include <linux/sunrpc/svc.h>
  6. #include <linux/blkdev.h>
  7. #include <linux/nfs4.h>
  8. #include <linux/nfs_fs.h>
  9. #include <linux/nfs_xdr.h>
  10. #include <linux/pr.h>
  11. #include "blocklayout.h"
  12. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  13. static void
  14. bl_free_device(struct pnfs_block_dev *dev)
  15. {
  16. if (dev->nr_children) {
  17. int i;
  18. for (i = 0; i < dev->nr_children; i++)
  19. bl_free_device(&dev->children[i]);
  20. kfree(dev->children);
  21. } else {
  22. if (dev->pr_registered) {
  23. const struct pr_ops *ops =
  24. dev->bdev->bd_disk->fops->pr_ops;
  25. int error;
  26. error = ops->pr_register(dev->bdev, dev->pr_key, 0,
  27. false);
  28. if (error)
  29. pr_err("failed to unregister PR key.\n");
  30. }
  31. if (dev->bdev)
  32. blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
  33. }
  34. }
  35. void
  36. bl_free_deviceid_node(struct nfs4_deviceid_node *d)
  37. {
  38. struct pnfs_block_dev *dev =
  39. container_of(d, struct pnfs_block_dev, node);
  40. bl_free_device(dev);
  41. kfree_rcu(dev, node.rcu);
  42. }
  43. static int
  44. nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
  45. {
  46. __be32 *p;
  47. int i;
  48. p = xdr_inline_decode(xdr, 4);
  49. if (!p)
  50. return -EIO;
  51. b->type = be32_to_cpup(p++);
  52. switch (b->type) {
  53. case PNFS_BLOCK_VOLUME_SIMPLE:
  54. p = xdr_inline_decode(xdr, 4);
  55. if (!p)
  56. return -EIO;
  57. b->simple.nr_sigs = be32_to_cpup(p++);
  58. if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
  59. dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
  60. return -EIO;
  61. }
  62. b->simple.len = 4 + 4;
  63. for (i = 0; i < b->simple.nr_sigs; i++) {
  64. p = xdr_inline_decode(xdr, 8 + 4);
  65. if (!p)
  66. return -EIO;
  67. p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
  68. b->simple.sigs[i].sig_len = be32_to_cpup(p++);
  69. if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
  70. pr_info("signature too long: %d\n",
  71. b->simple.sigs[i].sig_len);
  72. return -EIO;
  73. }
  74. p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
  75. if (!p)
  76. return -EIO;
  77. memcpy(&b->simple.sigs[i].sig, p,
  78. b->simple.sigs[i].sig_len);
  79. b->simple.len += 8 + 4 + \
  80. (XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
  81. }
  82. break;
  83. case PNFS_BLOCK_VOLUME_SLICE:
  84. p = xdr_inline_decode(xdr, 8 + 8 + 4);
  85. if (!p)
  86. return -EIO;
  87. p = xdr_decode_hyper(p, &b->slice.start);
  88. p = xdr_decode_hyper(p, &b->slice.len);
  89. b->slice.volume = be32_to_cpup(p++);
  90. break;
  91. case PNFS_BLOCK_VOLUME_CONCAT:
  92. p = xdr_inline_decode(xdr, 4);
  93. if (!p)
  94. return -EIO;
  95. b->concat.volumes_count = be32_to_cpup(p++);
  96. if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
  97. dprintk("Too many volumes: %d\n", b->concat.volumes_count);
  98. return -EIO;
  99. }
  100. p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
  101. if (!p)
  102. return -EIO;
  103. for (i = 0; i < b->concat.volumes_count; i++)
  104. b->concat.volumes[i] = be32_to_cpup(p++);
  105. break;
  106. case PNFS_BLOCK_VOLUME_STRIPE:
  107. p = xdr_inline_decode(xdr, 8 + 4);
  108. if (!p)
  109. return -EIO;
  110. p = xdr_decode_hyper(p, &b->stripe.chunk_size);
  111. b->stripe.volumes_count = be32_to_cpup(p++);
  112. if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
  113. dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
  114. return -EIO;
  115. }
  116. p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
  117. if (!p)
  118. return -EIO;
  119. for (i = 0; i < b->stripe.volumes_count; i++)
  120. b->stripe.volumes[i] = be32_to_cpup(p++);
  121. break;
  122. case PNFS_BLOCK_VOLUME_SCSI:
  123. p = xdr_inline_decode(xdr, 4 + 4 + 4);
  124. if (!p)
  125. return -EIO;
  126. b->scsi.code_set = be32_to_cpup(p++);
  127. b->scsi.designator_type = be32_to_cpup(p++);
  128. b->scsi.designator_len = be32_to_cpup(p++);
  129. p = xdr_inline_decode(xdr, b->scsi.designator_len);
  130. if (!p)
  131. return -EIO;
  132. if (b->scsi.designator_len > 256)
  133. return -EIO;
  134. memcpy(&b->scsi.designator, p, b->scsi.designator_len);
  135. p = xdr_inline_decode(xdr, 8);
  136. if (!p)
  137. return -EIO;
  138. p = xdr_decode_hyper(p, &b->scsi.pr_key);
  139. break;
  140. default:
  141. dprintk("unknown volume type!\n");
  142. return -EIO;
  143. }
  144. return 0;
  145. }
  146. static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
  147. struct pnfs_block_dev_map *map)
  148. {
  149. map->start = dev->start;
  150. map->len = dev->len;
  151. map->disk_offset = dev->disk_offset;
  152. map->bdev = dev->bdev;
  153. return true;
  154. }
  155. static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
  156. struct pnfs_block_dev_map *map)
  157. {
  158. int i;
  159. for (i = 0; i < dev->nr_children; i++) {
  160. struct pnfs_block_dev *child = &dev->children[i];
  161. if (child->start > offset ||
  162. child->start + child->len <= offset)
  163. continue;
  164. child->map(child, offset - child->start, map);
  165. return true;
  166. }
  167. dprintk("%s: ran off loop!\n", __func__);
  168. return false;
  169. }
  170. static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
  171. struct pnfs_block_dev_map *map)
  172. {
  173. struct pnfs_block_dev *child;
  174. u64 chunk;
  175. u32 chunk_idx;
  176. u64 disk_offset;
  177. chunk = div_u64(offset, dev->chunk_size);
  178. div_u64_rem(chunk, dev->nr_children, &chunk_idx);
  179. if (chunk_idx >= dev->nr_children) {
  180. dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
  181. __func__, chunk_idx, offset, dev->chunk_size);
  182. /* error, should not happen */
  183. return false;
  184. }
  185. /* truncate offset to the beginning of the stripe */
  186. offset = chunk * dev->chunk_size;
  187. /* disk offset of the stripe */
  188. disk_offset = div_u64(offset, dev->nr_children);
  189. child = &dev->children[chunk_idx];
  190. child->map(child, disk_offset, map);
  191. map->start += offset;
  192. map->disk_offset += disk_offset;
  193. map->len = dev->chunk_size;
  194. return true;
  195. }
  196. static int
  197. bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
  198. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
  199. static int
  200. bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
  201. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  202. {
  203. struct pnfs_block_volume *v = &volumes[idx];
  204. struct block_device *bdev;
  205. dev_t dev;
  206. dev = bl_resolve_deviceid(server, v, gfp_mask);
  207. if (!dev)
  208. return -EIO;
  209. bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
  210. if (IS_ERR(bdev)) {
  211. printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
  212. MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
  213. return PTR_ERR(bdev);
  214. }
  215. d->bdev = bdev;
  216. d->len = i_size_read(d->bdev->bd_inode);
  217. d->map = bl_map_simple;
  218. printk(KERN_INFO "pNFS: using block device %s\n",
  219. d->bdev->bd_disk->disk_name);
  220. return 0;
  221. }
  222. static bool
  223. bl_validate_designator(struct pnfs_block_volume *v)
  224. {
  225. switch (v->scsi.designator_type) {
  226. case PS_DESIGNATOR_EUI64:
  227. if (v->scsi.code_set != PS_CODE_SET_BINARY)
  228. return false;
  229. if (v->scsi.designator_len != 8 &&
  230. v->scsi.designator_len != 10 &&
  231. v->scsi.designator_len != 16)
  232. return false;
  233. return true;
  234. case PS_DESIGNATOR_NAA:
  235. if (v->scsi.code_set != PS_CODE_SET_BINARY)
  236. return false;
  237. if (v->scsi.designator_len != 8 &&
  238. v->scsi.designator_len != 16)
  239. return false;
  240. return true;
  241. case PS_DESIGNATOR_T10:
  242. case PS_DESIGNATOR_NAME:
  243. pr_err("pNFS: unsupported designator "
  244. "(code set %d, type %d, len %d.\n",
  245. v->scsi.code_set,
  246. v->scsi.designator_type,
  247. v->scsi.designator_len);
  248. return false;
  249. default:
  250. pr_err("pNFS: invalid designator "
  251. "(code set %d, type %d, len %d.\n",
  252. v->scsi.code_set,
  253. v->scsi.designator_type,
  254. v->scsi.designator_len);
  255. return false;
  256. }
  257. }
  258. /*
  259. * Try to open the udev path for the WWN. At least on Debian the udev
  260. * by-id path will always point to the dm-multipath device if one exists.
  261. */
  262. static struct block_device *
  263. bl_open_udev_path(struct pnfs_block_volume *v)
  264. {
  265. struct block_device *bdev;
  266. const char *devname;
  267. devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
  268. v->scsi.designator_len, v->scsi.designator);
  269. if (!devname)
  270. return ERR_PTR(-ENOMEM);
  271. bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
  272. if (IS_ERR(bdev)) {
  273. pr_warn("pNFS: failed to open device %s (%ld)\n",
  274. devname, PTR_ERR(bdev));
  275. }
  276. kfree(devname);
  277. return bdev;
  278. }
  279. /*
  280. * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
  281. * wwn- links will only point to the first discovered SCSI device there.
  282. */
  283. static struct block_device *
  284. bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
  285. {
  286. struct block_device *bdev;
  287. const char *devname;
  288. devname = kasprintf(GFP_KERNEL,
  289. "/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
  290. v->scsi.designator_type,
  291. v->scsi.designator_len, v->scsi.designator);
  292. if (!devname)
  293. return ERR_PTR(-ENOMEM);
  294. bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
  295. kfree(devname);
  296. return bdev;
  297. }
  298. static int
  299. bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
  300. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  301. {
  302. struct pnfs_block_volume *v = &volumes[idx];
  303. struct block_device *bdev;
  304. const struct pr_ops *ops;
  305. int error;
  306. if (!bl_validate_designator(v))
  307. return -EINVAL;
  308. bdev = bl_open_dm_mpath_udev_path(v);
  309. if (IS_ERR(bdev))
  310. bdev = bl_open_udev_path(v);
  311. if (IS_ERR(bdev))
  312. return PTR_ERR(bdev);
  313. d->bdev = bdev;
  314. d->len = i_size_read(d->bdev->bd_inode);
  315. d->map = bl_map_simple;
  316. d->pr_key = v->scsi.pr_key;
  317. pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
  318. d->bdev->bd_disk->disk_name, d->pr_key);
  319. ops = d->bdev->bd_disk->fops->pr_ops;
  320. if (!ops) {
  321. pr_err("pNFS: block device %s does not support reservations.",
  322. d->bdev->bd_disk->disk_name);
  323. error = -EINVAL;
  324. goto out_blkdev_put;
  325. }
  326. error = ops->pr_register(d->bdev, 0, d->pr_key, true);
  327. if (error) {
  328. pr_err("pNFS: failed to register key for block device %s.",
  329. d->bdev->bd_disk->disk_name);
  330. goto out_blkdev_put;
  331. }
  332. d->pr_registered = true;
  333. return 0;
  334. out_blkdev_put:
  335. blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
  336. return error;
  337. }
  338. static int
  339. bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
  340. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  341. {
  342. struct pnfs_block_volume *v = &volumes[idx];
  343. int ret;
  344. ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
  345. if (ret)
  346. return ret;
  347. d->disk_offset = v->slice.start;
  348. d->len = v->slice.len;
  349. return 0;
  350. }
  351. static int
  352. bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
  353. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  354. {
  355. struct pnfs_block_volume *v = &volumes[idx];
  356. u64 len = 0;
  357. int ret, i;
  358. d->children = kcalloc(v->concat.volumes_count,
  359. sizeof(struct pnfs_block_dev), GFP_KERNEL);
  360. if (!d->children)
  361. return -ENOMEM;
  362. for (i = 0; i < v->concat.volumes_count; i++) {
  363. ret = bl_parse_deviceid(server, &d->children[i],
  364. volumes, v->concat.volumes[i], gfp_mask);
  365. if (ret)
  366. return ret;
  367. d->nr_children++;
  368. d->children[i].start += len;
  369. len += d->children[i].len;
  370. }
  371. d->len = len;
  372. d->map = bl_map_concat;
  373. return 0;
  374. }
  375. static int
  376. bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
  377. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  378. {
  379. struct pnfs_block_volume *v = &volumes[idx];
  380. u64 len = 0;
  381. int ret, i;
  382. d->children = kcalloc(v->stripe.volumes_count,
  383. sizeof(struct pnfs_block_dev), GFP_KERNEL);
  384. if (!d->children)
  385. return -ENOMEM;
  386. for (i = 0; i < v->stripe.volumes_count; i++) {
  387. ret = bl_parse_deviceid(server, &d->children[i],
  388. volumes, v->stripe.volumes[i], gfp_mask);
  389. if (ret)
  390. return ret;
  391. d->nr_children++;
  392. len += d->children[i].len;
  393. }
  394. d->len = len;
  395. d->chunk_size = v->stripe.chunk_size;
  396. d->map = bl_map_stripe;
  397. return 0;
  398. }
  399. static int
  400. bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
  401. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  402. {
  403. switch (volumes[idx].type) {
  404. case PNFS_BLOCK_VOLUME_SIMPLE:
  405. return bl_parse_simple(server, d, volumes, idx, gfp_mask);
  406. case PNFS_BLOCK_VOLUME_SLICE:
  407. return bl_parse_slice(server, d, volumes, idx, gfp_mask);
  408. case PNFS_BLOCK_VOLUME_CONCAT:
  409. return bl_parse_concat(server, d, volumes, idx, gfp_mask);
  410. case PNFS_BLOCK_VOLUME_STRIPE:
  411. return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
  412. case PNFS_BLOCK_VOLUME_SCSI:
  413. return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
  414. default:
  415. dprintk("unsupported volume type: %d\n", volumes[idx].type);
  416. return -EIO;
  417. }
  418. }
  419. struct nfs4_deviceid_node *
  420. bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
  421. gfp_t gfp_mask)
  422. {
  423. struct nfs4_deviceid_node *node = NULL;
  424. struct pnfs_block_volume *volumes;
  425. struct pnfs_block_dev *top;
  426. struct xdr_stream xdr;
  427. struct xdr_buf buf;
  428. struct page *scratch;
  429. int nr_volumes, ret, i;
  430. __be32 *p;
  431. scratch = alloc_page(gfp_mask);
  432. if (!scratch)
  433. goto out;
  434. xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
  435. xdr_set_scratch_page(&xdr, scratch);
  436. p = xdr_inline_decode(&xdr, sizeof(__be32));
  437. if (!p)
  438. goto out_free_scratch;
  439. nr_volumes = be32_to_cpup(p++);
  440. volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
  441. gfp_mask);
  442. if (!volumes)
  443. goto out_free_scratch;
  444. for (i = 0; i < nr_volumes; i++) {
  445. ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
  446. if (ret < 0)
  447. goto out_free_volumes;
  448. }
  449. top = kzalloc(sizeof(*top), gfp_mask);
  450. if (!top)
  451. goto out_free_volumes;
  452. ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
  453. node = &top->node;
  454. nfs4_init_deviceid_node(node, server, &pdev->dev_id);
  455. if (ret)
  456. nfs4_mark_deviceid_unavailable(node);
  457. out_free_volumes:
  458. kfree(volumes);
  459. out_free_scratch:
  460. __free_page(scratch);
  461. out:
  462. return node;
  463. }