PageRenderTime 1703ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/fs/ksmbd/transport_rdma.c

https://github.com/kvaneesh/linux
C | 1804 lines | 1446 code | 267 blank | 91 comment | 185 complexity | f9e7546075d688ecee45524b52c82a59 MD5 | raw file
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2017, Microsoft Corporation.
  4. * Copyright (C) 2018, LG Electronics.
  5. *
  6. * Author(s): Long Li <longli@microsoft.com>,
  7. * Hyunchul Lee <hyc.lee@gmail.com>
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
  17. * the GNU General Public License for more details.
  18. */
  19. #define SUBMOD_NAME "smb_direct"
  20. #include <linux/kthread.h>
  21. #include <linux/list.h>
  22. #include <linux/mempool.h>
  23. #include <linux/highmem.h>
  24. #include <linux/scatterlist.h>
  25. #include <rdma/ib_verbs.h>
  26. #include <rdma/rdma_cm.h>
  27. #include <rdma/rw.h>
  28. #include "glob.h"
  29. #include "connection.h"
  30. #include "smb_common.h"
  31. #include "smbstatus.h"
  32. #include "transport_rdma.h"
  33. #define SMB_DIRECT_PORT 5445
  34. #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
  35. /* SMB_DIRECT negotiation timeout in seconds */
  36. #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
  37. #define SMB_DIRECT_MAX_SEND_SGES 8
  38. #define SMB_DIRECT_MAX_RECV_SGES 1
  39. /*
  40. * Default maximum number of RDMA read/write outstanding on this connection
  41. * This value is possibly decreased during QP creation on hardware limit
  42. */
  43. #define SMB_DIRECT_CM_INITIATOR_DEPTH 8
  44. /* Maximum number of retries on data transfer operations */
  45. #define SMB_DIRECT_CM_RETRY 6
  46. /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
  47. #define SMB_DIRECT_CM_RNR_RETRY 0
  48. /*
  49. * User configurable initial values per SMB_DIRECT transport connection
  50. * as defined in [MS-SMBD] 3.1.1.1
  51. * Those may change after a SMB_DIRECT negotiation
  52. */
  53. /* The local peer's maximum number of credits to grant to the peer */
  54. static int smb_direct_receive_credit_max = 255;
  55. /* The remote peer's credit request of local peer */
  56. static int smb_direct_send_credit_target = 255;
  57. /* The maximum single message size can be sent to remote peer */
  58. static int smb_direct_max_send_size = 8192;
  59. /* The maximum fragmented upper-layer payload receive size supported */
  60. static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
  61. /* The maximum single-message size which can be received */
  62. static int smb_direct_max_receive_size = 8192;
  63. static int smb_direct_max_read_write_size = 1024 * 1024;
  64. static int smb_direct_max_outstanding_rw_ops = 8;
  65. static struct smb_direct_listener {
  66. struct rdma_cm_id *cm_id;
  67. } smb_direct_listener;
  68. static struct workqueue_struct *smb_direct_wq;
  69. enum smb_direct_status {
  70. SMB_DIRECT_CS_NEW = 0,
  71. SMB_DIRECT_CS_CONNECTED,
  72. SMB_DIRECT_CS_DISCONNECTING,
  73. SMB_DIRECT_CS_DISCONNECTED,
  74. };
  75. struct smb_direct_transport {
  76. struct ksmbd_transport transport;
  77. enum smb_direct_status status;
  78. bool full_packet_received;
  79. wait_queue_head_t wait_status;
  80. struct rdma_cm_id *cm_id;
  81. struct ib_cq *send_cq;
  82. struct ib_cq *recv_cq;
  83. struct ib_pd *pd;
  84. struct ib_qp *qp;
  85. int max_send_size;
  86. int max_recv_size;
  87. int max_fragmented_send_size;
  88. int max_fragmented_recv_size;
  89. int max_rdma_rw_size;
  90. spinlock_t reassembly_queue_lock;
  91. struct list_head reassembly_queue;
  92. int reassembly_data_length;
  93. int reassembly_queue_length;
  94. int first_entry_offset;
  95. wait_queue_head_t wait_reassembly_queue;
  96. spinlock_t receive_credit_lock;
  97. int recv_credits;
  98. int count_avail_recvmsg;
  99. int recv_credit_max;
  100. int recv_credit_target;
  101. spinlock_t recvmsg_queue_lock;
  102. struct list_head recvmsg_queue;
  103. spinlock_t empty_recvmsg_queue_lock;
  104. struct list_head empty_recvmsg_queue;
  105. int send_credit_target;
  106. atomic_t send_credits;
  107. spinlock_t lock_new_recv_credits;
  108. int new_recv_credits;
  109. atomic_t rw_avail_ops;
  110. wait_queue_head_t wait_send_credits;
  111. wait_queue_head_t wait_rw_avail_ops;
  112. mempool_t *sendmsg_mempool;
  113. struct kmem_cache *sendmsg_cache;
  114. mempool_t *recvmsg_mempool;
  115. struct kmem_cache *recvmsg_cache;
  116. wait_queue_head_t wait_send_payload_pending;
  117. atomic_t send_payload_pending;
  118. wait_queue_head_t wait_send_pending;
  119. atomic_t send_pending;
  120. struct delayed_work post_recv_credits_work;
  121. struct work_struct send_immediate_work;
  122. struct work_struct disconnect_work;
  123. bool negotiation_requested;
  124. };
  125. #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
  126. enum {
  127. SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
  128. SMB_DIRECT_MSG_DATA_TRANSFER
  129. };
  130. static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
  131. struct smb_direct_send_ctx {
  132. struct list_head msg_list;
  133. int wr_cnt;
  134. bool need_invalidate_rkey;
  135. unsigned int remote_key;
  136. };
  137. struct smb_direct_sendmsg {
  138. struct smb_direct_transport *transport;
  139. struct ib_send_wr wr;
  140. struct list_head list;
  141. int num_sge;
  142. struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
  143. struct ib_cqe cqe;
  144. u8 packet[];
  145. };
  146. struct smb_direct_recvmsg {
  147. struct smb_direct_transport *transport;
  148. struct list_head list;
  149. int type;
  150. struct ib_sge sge;
  151. struct ib_cqe cqe;
  152. bool first_segment;
  153. u8 packet[];
  154. };
  155. struct smb_direct_rdma_rw_msg {
  156. struct smb_direct_transport *t;
  157. struct ib_cqe cqe;
  158. struct completion *completion;
  159. struct rdma_rw_ctx rw_ctx;
  160. struct sg_table sgt;
  161. struct scatterlist sg_list[0];
  162. };
  163. static inline int get_buf_page_count(void *buf, int size)
  164. {
  165. return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
  166. (uintptr_t)buf / PAGE_SIZE;
  167. }
  168. static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
  169. static void smb_direct_post_recv_credits(struct work_struct *work);
  170. static int smb_direct_post_send_data(struct smb_direct_transport *t,
  171. struct smb_direct_send_ctx *send_ctx,
  172. struct kvec *iov, int niov,
  173. int remaining_data_length);
  174. static inline struct smb_direct_transport *
  175. smb_trans_direct_transfort(struct ksmbd_transport *t)
  176. {
  177. return container_of(t, struct smb_direct_transport, transport);
  178. }
  179. static inline void
  180. *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
  181. {
  182. return (void *)recvmsg->packet;
  183. }
  184. static inline bool is_receive_credit_post_required(int receive_credits,
  185. int avail_recvmsg_count)
  186. {
  187. return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
  188. avail_recvmsg_count >= (receive_credits >> 2);
  189. }
  190. static struct
  191. smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
  192. {
  193. struct smb_direct_recvmsg *recvmsg = NULL;
  194. spin_lock(&t->recvmsg_queue_lock);
  195. if (!list_empty(&t->recvmsg_queue)) {
  196. recvmsg = list_first_entry(&t->recvmsg_queue,
  197. struct smb_direct_recvmsg,
  198. list);
  199. list_del(&recvmsg->list);
  200. }
  201. spin_unlock(&t->recvmsg_queue_lock);
  202. return recvmsg;
  203. }
  204. static void put_recvmsg(struct smb_direct_transport *t,
  205. struct smb_direct_recvmsg *recvmsg)
  206. {
  207. ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
  208. recvmsg->sge.length, DMA_FROM_DEVICE);
  209. spin_lock(&t->recvmsg_queue_lock);
  210. list_add(&recvmsg->list, &t->recvmsg_queue);
  211. spin_unlock(&t->recvmsg_queue_lock);
  212. }
  213. static struct
  214. smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
  215. {
  216. struct smb_direct_recvmsg *recvmsg = NULL;
  217. spin_lock(&t->empty_recvmsg_queue_lock);
  218. if (!list_empty(&t->empty_recvmsg_queue)) {
  219. recvmsg = list_first_entry(&t->empty_recvmsg_queue,
  220. struct smb_direct_recvmsg, list);
  221. list_del(&recvmsg->list);
  222. }
  223. spin_unlock(&t->empty_recvmsg_queue_lock);
  224. return recvmsg;
  225. }
  226. static void put_empty_recvmsg(struct smb_direct_transport *t,
  227. struct smb_direct_recvmsg *recvmsg)
  228. {
  229. ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
  230. recvmsg->sge.length, DMA_FROM_DEVICE);
  231. spin_lock(&t->empty_recvmsg_queue_lock);
  232. list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
  233. spin_unlock(&t->empty_recvmsg_queue_lock);
  234. }
  235. static void enqueue_reassembly(struct smb_direct_transport *t,
  236. struct smb_direct_recvmsg *recvmsg,
  237. int data_length)
  238. {
  239. spin_lock(&t->reassembly_queue_lock);
  240. list_add_tail(&recvmsg->list, &t->reassembly_queue);
  241. t->reassembly_queue_length++;
  242. /*
  243. * Make sure reassembly_data_length is updated after list and
  244. * reassembly_queue_length are updated. On the dequeue side
  245. * reassembly_data_length is checked without a lock to determine
  246. * if reassembly_queue_length and list is up to date
  247. */
  248. virt_wmb();
  249. t->reassembly_data_length += data_length;
  250. spin_unlock(&t->reassembly_queue_lock);
  251. }
  252. static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
  253. {
  254. if (!list_empty(&t->reassembly_queue))
  255. return list_first_entry(&t->reassembly_queue,
  256. struct smb_direct_recvmsg, list);
  257. else
  258. return NULL;
  259. }
  260. static void smb_direct_disconnect_rdma_work(struct work_struct *work)
  261. {
  262. struct smb_direct_transport *t =
  263. container_of(work, struct smb_direct_transport,
  264. disconnect_work);
  265. if (t->status == SMB_DIRECT_CS_CONNECTED) {
  266. t->status = SMB_DIRECT_CS_DISCONNECTING;
  267. rdma_disconnect(t->cm_id);
  268. }
  269. }
  270. static void
  271. smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
  272. {
  273. if (t->status == SMB_DIRECT_CS_CONNECTED)
  274. queue_work(smb_direct_wq, &t->disconnect_work);
  275. }
  276. static void smb_direct_send_immediate_work(struct work_struct *work)
  277. {
  278. struct smb_direct_transport *t = container_of(work,
  279. struct smb_direct_transport, send_immediate_work);
  280. if (t->status != SMB_DIRECT_CS_CONNECTED)
  281. return;
  282. smb_direct_post_send_data(t, NULL, NULL, 0, 0);
  283. }
  284. static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
  285. {
  286. struct smb_direct_transport *t;
  287. struct ksmbd_conn *conn;
  288. t = kzalloc(sizeof(*t), GFP_KERNEL);
  289. if (!t)
  290. return NULL;
  291. t->cm_id = cm_id;
  292. cm_id->context = t;
  293. t->status = SMB_DIRECT_CS_NEW;
  294. init_waitqueue_head(&t->wait_status);
  295. spin_lock_init(&t->reassembly_queue_lock);
  296. INIT_LIST_HEAD(&t->reassembly_queue);
  297. t->reassembly_data_length = 0;
  298. t->reassembly_queue_length = 0;
  299. init_waitqueue_head(&t->wait_reassembly_queue);
  300. init_waitqueue_head(&t->wait_send_credits);
  301. init_waitqueue_head(&t->wait_rw_avail_ops);
  302. spin_lock_init(&t->receive_credit_lock);
  303. spin_lock_init(&t->recvmsg_queue_lock);
  304. INIT_LIST_HEAD(&t->recvmsg_queue);
  305. spin_lock_init(&t->empty_recvmsg_queue_lock);
  306. INIT_LIST_HEAD(&t->empty_recvmsg_queue);
  307. init_waitqueue_head(&t->wait_send_payload_pending);
  308. atomic_set(&t->send_payload_pending, 0);
  309. init_waitqueue_head(&t->wait_send_pending);
  310. atomic_set(&t->send_pending, 0);
  311. spin_lock_init(&t->lock_new_recv_credits);
  312. INIT_DELAYED_WORK(&t->post_recv_credits_work,
  313. smb_direct_post_recv_credits);
  314. INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
  315. INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
  316. conn = ksmbd_conn_alloc();
  317. if (!conn)
  318. goto err;
  319. conn->transport = KSMBD_TRANS(t);
  320. KSMBD_TRANS(t)->conn = conn;
  321. KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
  322. return t;
  323. err:
  324. kfree(t);
  325. return NULL;
  326. }
  327. static void free_transport(struct smb_direct_transport *t)
  328. {
  329. struct smb_direct_recvmsg *recvmsg;
  330. wake_up_interruptible(&t->wait_send_credits);
  331. ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
  332. wait_event(t->wait_send_payload_pending,
  333. atomic_read(&t->send_payload_pending) == 0);
  334. wait_event(t->wait_send_pending,
  335. atomic_read(&t->send_pending) == 0);
  336. cancel_work_sync(&t->disconnect_work);
  337. cancel_delayed_work_sync(&t->post_recv_credits_work);
  338. cancel_work_sync(&t->send_immediate_work);
  339. if (t->qp) {
  340. ib_drain_qp(t->qp);
  341. ib_destroy_qp(t->qp);
  342. }
  343. ksmbd_debug(RDMA, "drain the reassembly queue\n");
  344. do {
  345. spin_lock(&t->reassembly_queue_lock);
  346. recvmsg = get_first_reassembly(t);
  347. if (recvmsg) {
  348. list_del(&recvmsg->list);
  349. spin_unlock(&t->reassembly_queue_lock);
  350. put_recvmsg(t, recvmsg);
  351. } else {
  352. spin_unlock(&t->reassembly_queue_lock);
  353. }
  354. } while (recvmsg);
  355. t->reassembly_data_length = 0;
  356. if (t->send_cq)
  357. ib_free_cq(t->send_cq);
  358. if (t->recv_cq)
  359. ib_free_cq(t->recv_cq);
  360. if (t->pd)
  361. ib_dealloc_pd(t->pd);
  362. if (t->cm_id)
  363. rdma_destroy_id(t->cm_id);
  364. smb_direct_destroy_pools(t);
  365. ksmbd_conn_free(KSMBD_TRANS(t)->conn);
  366. kfree(t);
  367. }
  368. static struct smb_direct_sendmsg
  369. *smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
  370. {
  371. struct smb_direct_sendmsg *msg;
  372. msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
  373. if (!msg)
  374. return ERR_PTR(-ENOMEM);
  375. msg->transport = t;
  376. INIT_LIST_HEAD(&msg->list);
  377. msg->num_sge = 0;
  378. return msg;
  379. }
  380. static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
  381. struct smb_direct_sendmsg *msg)
  382. {
  383. int i;
  384. if (msg->num_sge > 0) {
  385. ib_dma_unmap_single(t->cm_id->device,
  386. msg->sge[0].addr, msg->sge[0].length,
  387. DMA_TO_DEVICE);
  388. for (i = 1; i < msg->num_sge; i++)
  389. ib_dma_unmap_page(t->cm_id->device,
  390. msg->sge[i].addr, msg->sge[i].length,
  391. DMA_TO_DEVICE);
  392. }
  393. mempool_free(msg, t->sendmsg_mempool);
  394. }
  395. static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
  396. {
  397. switch (recvmsg->type) {
  398. case SMB_DIRECT_MSG_DATA_TRANSFER: {
  399. struct smb_direct_data_transfer *req =
  400. (struct smb_direct_data_transfer *)recvmsg->packet;
  401. struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
  402. + le32_to_cpu(req->data_offset) - 4);
  403. ksmbd_debug(RDMA,
  404. "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
  405. le16_to_cpu(req->credits_granted),
  406. le16_to_cpu(req->credits_requested),
  407. req->data_length, req->remaining_data_length,
  408. hdr->ProtocolId, hdr->Command);
  409. break;
  410. }
  411. case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
  412. struct smb_direct_negotiate_req *req =
  413. (struct smb_direct_negotiate_req *)recvmsg->packet;
  414. ksmbd_debug(RDMA,
  415. "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
  416. le16_to_cpu(req->min_version),
  417. le16_to_cpu(req->max_version),
  418. le16_to_cpu(req->credits_requested),
  419. le32_to_cpu(req->preferred_send_size),
  420. le32_to_cpu(req->max_receive_size),
  421. le32_to_cpu(req->max_fragmented_size));
  422. if (le16_to_cpu(req->min_version) > 0x0100 ||
  423. le16_to_cpu(req->max_version) < 0x0100)
  424. return -EOPNOTSUPP;
  425. if (le16_to_cpu(req->credits_requested) <= 0 ||
  426. le32_to_cpu(req->max_receive_size) <= 128 ||
  427. le32_to_cpu(req->max_fragmented_size) <=
  428. 128 * 1024)
  429. return -ECONNABORTED;
  430. break;
  431. }
  432. default:
  433. return -EINVAL;
  434. }
  435. return 0;
  436. }
  437. static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
  438. {
  439. struct smb_direct_recvmsg *recvmsg;
  440. struct smb_direct_transport *t;
  441. recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
  442. t = recvmsg->transport;
  443. if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
  444. if (wc->status != IB_WC_WR_FLUSH_ERR) {
  445. pr_err("Recv error. status='%s (%d)' opcode=%d\n",
  446. ib_wc_status_msg(wc->status), wc->status,
  447. wc->opcode);
  448. smb_direct_disconnect_rdma_connection(t);
  449. }
  450. put_empty_recvmsg(t, recvmsg);
  451. return;
  452. }
  453. ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
  454. ib_wc_status_msg(wc->status), wc->status,
  455. wc->opcode);
  456. ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
  457. recvmsg->sge.length, DMA_FROM_DEVICE);
  458. switch (recvmsg->type) {
  459. case SMB_DIRECT_MSG_NEGOTIATE_REQ:
  460. t->negotiation_requested = true;
  461. t->full_packet_received = true;
  462. wake_up_interruptible(&t->wait_status);
  463. break;
  464. case SMB_DIRECT_MSG_DATA_TRANSFER: {
  465. struct smb_direct_data_transfer *data_transfer =
  466. (struct smb_direct_data_transfer *)recvmsg->packet;
  467. int data_length = le32_to_cpu(data_transfer->data_length);
  468. int avail_recvmsg_count, receive_credits;
  469. if (data_length) {
  470. if (t->full_packet_received)
  471. recvmsg->first_segment = true;
  472. if (le32_to_cpu(data_transfer->remaining_data_length))
  473. t->full_packet_received = false;
  474. else
  475. t->full_packet_received = true;
  476. enqueue_reassembly(t, recvmsg, data_length);
  477. wake_up_interruptible(&t->wait_reassembly_queue);
  478. spin_lock(&t->receive_credit_lock);
  479. receive_credits = --(t->recv_credits);
  480. avail_recvmsg_count = t->count_avail_recvmsg;
  481. spin_unlock(&t->receive_credit_lock);
  482. } else {
  483. put_empty_recvmsg(t, recvmsg);
  484. spin_lock(&t->receive_credit_lock);
  485. receive_credits = --(t->recv_credits);
  486. avail_recvmsg_count = ++(t->count_avail_recvmsg);
  487. spin_unlock(&t->receive_credit_lock);
  488. }
  489. t->recv_credit_target =
  490. le16_to_cpu(data_transfer->credits_requested);
  491. atomic_add(le16_to_cpu(data_transfer->credits_granted),
  492. &t->send_credits);
  493. if (le16_to_cpu(data_transfer->flags) &
  494. SMB_DIRECT_RESPONSE_REQUESTED)
  495. queue_work(smb_direct_wq, &t->send_immediate_work);
  496. if (atomic_read(&t->send_credits) > 0)
  497. wake_up_interruptible(&t->wait_send_credits);
  498. if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
  499. mod_delayed_work(smb_direct_wq,
  500. &t->post_recv_credits_work, 0);
  501. break;
  502. }
  503. default:
  504. break;
  505. }
  506. }
  507. static int smb_direct_post_recv(struct smb_direct_transport *t,
  508. struct smb_direct_recvmsg *recvmsg)
  509. {
  510. struct ib_recv_wr wr;
  511. int ret;
  512. recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
  513. recvmsg->packet, t->max_recv_size,
  514. DMA_FROM_DEVICE);
  515. ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
  516. if (ret)
  517. return ret;
  518. recvmsg->sge.length = t->max_recv_size;
  519. recvmsg->sge.lkey = t->pd->local_dma_lkey;
  520. recvmsg->cqe.done = recv_done;
  521. wr.wr_cqe = &recvmsg->cqe;
  522. wr.next = NULL;
  523. wr.sg_list = &recvmsg->sge;
  524. wr.num_sge = 1;
  525. ret = ib_post_recv(t->qp, &wr, NULL);
  526. if (ret) {
  527. pr_err("Can't post recv: %d\n", ret);
  528. ib_dma_unmap_single(t->cm_id->device,
  529. recvmsg->sge.addr, recvmsg->sge.length,
  530. DMA_FROM_DEVICE);
  531. smb_direct_disconnect_rdma_connection(t);
  532. return ret;
  533. }
  534. return ret;
  535. }
  536. static int smb_direct_read(struct ksmbd_transport *t, char *buf,
  537. unsigned int size)
  538. {
  539. struct smb_direct_recvmsg *recvmsg;
  540. struct smb_direct_data_transfer *data_transfer;
  541. int to_copy, to_read, data_read, offset;
  542. u32 data_length, remaining_data_length, data_offset;
  543. int rc;
  544. struct smb_direct_transport *st = smb_trans_direct_transfort(t);
  545. again:
  546. if (st->status != SMB_DIRECT_CS_CONNECTED) {
  547. pr_err("disconnected\n");
  548. return -ENOTCONN;
  549. }
  550. /*
  551. * No need to hold the reassembly queue lock all the time as we are
  552. * the only one reading from the front of the queue. The transport
  553. * may add more entries to the back of the queue at the same time
  554. */
  555. if (st->reassembly_data_length >= size) {
  556. int queue_length;
  557. int queue_removed = 0;
  558. /*
  559. * Need to make sure reassembly_data_length is read before
  560. * reading reassembly_queue_length and calling
  561. * get_first_reassembly. This call is lock free
  562. * as we never read at the end of the queue which are being
  563. * updated in SOFTIRQ as more data is received
  564. */
  565. virt_rmb();
  566. queue_length = st->reassembly_queue_length;
  567. data_read = 0;
  568. to_read = size;
  569. offset = st->first_entry_offset;
  570. while (data_read < size) {
  571. recvmsg = get_first_reassembly(st);
  572. data_transfer = smb_direct_recvmsg_payload(recvmsg);
  573. data_length = le32_to_cpu(data_transfer->data_length);
  574. remaining_data_length =
  575. le32_to_cpu(data_transfer->remaining_data_length);
  576. data_offset = le32_to_cpu(data_transfer->data_offset);
  577. /*
  578. * The upper layer expects RFC1002 length at the
  579. * beginning of the payload. Return it to indicate
  580. * the total length of the packet. This minimize the
  581. * change to upper layer packet processing logic. This
  582. * will be eventually remove when an intermediate
  583. * transport layer is added
  584. */
  585. if (recvmsg->first_segment && size == 4) {
  586. unsigned int rfc1002_len =
  587. data_length + remaining_data_length;
  588. *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
  589. data_read = 4;
  590. recvmsg->first_segment = false;
  591. ksmbd_debug(RDMA,
  592. "returning rfc1002 length %d\n",
  593. rfc1002_len);
  594. goto read_rfc1002_done;
  595. }
  596. to_copy = min_t(int, data_length - offset, to_read);
  597. memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
  598. to_copy);
  599. /* move on to the next buffer? */
  600. if (to_copy == data_length - offset) {
  601. queue_length--;
  602. /*
  603. * No need to lock if we are not at the
  604. * end of the queue
  605. */
  606. if (queue_length) {
  607. list_del(&recvmsg->list);
  608. } else {
  609. spin_lock_irq(&st->reassembly_queue_lock);
  610. list_del(&recvmsg->list);
  611. spin_unlock_irq(&st->reassembly_queue_lock);
  612. }
  613. queue_removed++;
  614. put_recvmsg(st, recvmsg);
  615. offset = 0;
  616. } else {
  617. offset += to_copy;
  618. }
  619. to_read -= to_copy;
  620. data_read += to_copy;
  621. }
  622. spin_lock_irq(&st->reassembly_queue_lock);
  623. st->reassembly_data_length -= data_read;
  624. st->reassembly_queue_length -= queue_removed;
  625. spin_unlock_irq(&st->reassembly_queue_lock);
  626. spin_lock(&st->receive_credit_lock);
  627. st->count_avail_recvmsg += queue_removed;
  628. if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
  629. spin_unlock(&st->receive_credit_lock);
  630. mod_delayed_work(smb_direct_wq,
  631. &st->post_recv_credits_work, 0);
  632. } else {
  633. spin_unlock(&st->receive_credit_lock);
  634. }
  635. st->first_entry_offset = offset;
  636. ksmbd_debug(RDMA,
  637. "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
  638. data_read, st->reassembly_data_length,
  639. st->first_entry_offset);
  640. read_rfc1002_done:
  641. return data_read;
  642. }
  643. ksmbd_debug(RDMA, "wait_event on more data\n");
  644. rc = wait_event_interruptible(st->wait_reassembly_queue,
  645. st->reassembly_data_length >= size ||
  646. st->status != SMB_DIRECT_CS_CONNECTED);
  647. if (rc)
  648. return -EINTR;
  649. goto again;
  650. }
  651. static void smb_direct_post_recv_credits(struct work_struct *work)
  652. {
  653. struct smb_direct_transport *t = container_of(work,
  654. struct smb_direct_transport, post_recv_credits_work.work);
  655. struct smb_direct_recvmsg *recvmsg;
  656. int receive_credits, credits = 0;
  657. int ret;
  658. int use_free = 1;
  659. spin_lock(&t->receive_credit_lock);
  660. receive_credits = t->recv_credits;
  661. spin_unlock(&t->receive_credit_lock);
  662. if (receive_credits < t->recv_credit_target) {
  663. while (true) {
  664. if (use_free)
  665. recvmsg = get_free_recvmsg(t);
  666. else
  667. recvmsg = get_empty_recvmsg(t);
  668. if (!recvmsg) {
  669. if (use_free) {
  670. use_free = 0;
  671. continue;
  672. } else {
  673. break;
  674. }
  675. }
  676. recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
  677. recvmsg->first_segment = false;
  678. ret = smb_direct_post_recv(t, recvmsg);
  679. if (ret) {
  680. pr_err("Can't post recv: %d\n", ret);
  681. put_recvmsg(t, recvmsg);
  682. break;
  683. }
  684. credits++;
  685. }
  686. }
  687. spin_lock(&t->receive_credit_lock);
  688. t->recv_credits += credits;
  689. t->count_avail_recvmsg -= credits;
  690. spin_unlock(&t->receive_credit_lock);
  691. spin_lock(&t->lock_new_recv_credits);
  692. t->new_recv_credits += credits;
  693. spin_unlock(&t->lock_new_recv_credits);
  694. if (credits)
  695. queue_work(smb_direct_wq, &t->send_immediate_work);
  696. }
  697. static void send_done(struct ib_cq *cq, struct ib_wc *wc)
  698. {
  699. struct smb_direct_sendmsg *sendmsg, *sibling;
  700. struct smb_direct_transport *t;
  701. struct list_head *pos, *prev, *end;
  702. sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
  703. t = sendmsg->transport;
  704. ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
  705. ib_wc_status_msg(wc->status), wc->status,
  706. wc->opcode);
  707. if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
  708. pr_err("Send error. status='%s (%d)', opcode=%d\n",
  709. ib_wc_status_msg(wc->status), wc->status,
  710. wc->opcode);
  711. smb_direct_disconnect_rdma_connection(t);
  712. }
  713. if (sendmsg->num_sge > 1) {
  714. if (atomic_dec_and_test(&t->send_payload_pending))
  715. wake_up(&t->wait_send_payload_pending);
  716. } else {
  717. if (atomic_dec_and_test(&t->send_pending))
  718. wake_up(&t->wait_send_pending);
  719. }
  720. /* iterate and free the list of messages in reverse. the list's head
  721. * is invalid.
  722. */
  723. for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
  724. prev != end; pos = prev, prev = prev->prev) {
  725. sibling = container_of(pos, struct smb_direct_sendmsg, list);
  726. smb_direct_free_sendmsg(t, sibling);
  727. }
  728. sibling = container_of(pos, struct smb_direct_sendmsg, list);
  729. smb_direct_free_sendmsg(t, sibling);
  730. }
  731. static int manage_credits_prior_sending(struct smb_direct_transport *t)
  732. {
  733. int new_credits;
  734. spin_lock(&t->lock_new_recv_credits);
  735. new_credits = t->new_recv_credits;
  736. t->new_recv_credits = 0;
  737. spin_unlock(&t->lock_new_recv_credits);
  738. return new_credits;
  739. }
  740. static int smb_direct_post_send(struct smb_direct_transport *t,
  741. struct ib_send_wr *wr)
  742. {
  743. int ret;
  744. if (wr->num_sge > 1)
  745. atomic_inc(&t->send_payload_pending);
  746. else
  747. atomic_inc(&t->send_pending);
  748. ret = ib_post_send(t->qp, wr, NULL);
  749. if (ret) {
  750. pr_err("failed to post send: %d\n", ret);
  751. if (wr->num_sge > 1) {
  752. if (atomic_dec_and_test(&t->send_payload_pending))
  753. wake_up(&t->wait_send_payload_pending);
  754. } else {
  755. if (atomic_dec_and_test(&t->send_pending))
  756. wake_up(&t->wait_send_pending);
  757. }
  758. smb_direct_disconnect_rdma_connection(t);
  759. }
  760. return ret;
  761. }
  762. static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
  763. struct smb_direct_send_ctx *send_ctx,
  764. bool need_invalidate_rkey,
  765. unsigned int remote_key)
  766. {
  767. INIT_LIST_HEAD(&send_ctx->msg_list);
  768. send_ctx->wr_cnt = 0;
  769. send_ctx->need_invalidate_rkey = need_invalidate_rkey;
  770. send_ctx->remote_key = remote_key;
  771. }
  772. static int smb_direct_flush_send_list(struct smb_direct_transport *t,
  773. struct smb_direct_send_ctx *send_ctx,
  774. bool is_last)
  775. {
  776. struct smb_direct_sendmsg *first, *last;
  777. int ret;
  778. if (list_empty(&send_ctx->msg_list))
  779. return 0;
  780. first = list_first_entry(&send_ctx->msg_list,
  781. struct smb_direct_sendmsg,
  782. list);
  783. last = list_last_entry(&send_ctx->msg_list,
  784. struct smb_direct_sendmsg,
  785. list);
  786. last->wr.send_flags = IB_SEND_SIGNALED;
  787. last->wr.wr_cqe = &last->cqe;
  788. if (is_last && send_ctx->need_invalidate_rkey) {
  789. last->wr.opcode = IB_WR_SEND_WITH_INV;
  790. last->wr.ex.invalidate_rkey = send_ctx->remote_key;
  791. }
  792. ret = smb_direct_post_send(t, &first->wr);
  793. if (!ret) {
  794. smb_direct_send_ctx_init(t, send_ctx,
  795. send_ctx->need_invalidate_rkey,
  796. send_ctx->remote_key);
  797. } else {
  798. atomic_add(send_ctx->wr_cnt, &t->send_credits);
  799. wake_up(&t->wait_send_credits);
  800. list_for_each_entry_safe(first, last, &send_ctx->msg_list,
  801. list) {
  802. smb_direct_free_sendmsg(t, first);
  803. }
  804. }
  805. return ret;
  806. }
  807. static int wait_for_credits(struct smb_direct_transport *t,
  808. wait_queue_head_t *waitq, atomic_t *credits)
  809. {
  810. int ret;
  811. do {
  812. if (atomic_dec_return(credits) >= 0)
  813. return 0;
  814. atomic_inc(credits);
  815. ret = wait_event_interruptible(*waitq,
  816. atomic_read(credits) > 0 ||
  817. t->status != SMB_DIRECT_CS_CONNECTED);
  818. if (t->status != SMB_DIRECT_CS_CONNECTED)
  819. return -ENOTCONN;
  820. else if (ret < 0)
  821. return ret;
  822. } while (true);
  823. }
  824. static int wait_for_send_credits(struct smb_direct_transport *t,
  825. struct smb_direct_send_ctx *send_ctx)
  826. {
  827. int ret;
  828. if (send_ctx &&
  829. (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
  830. ret = smb_direct_flush_send_list(t, send_ctx, false);
  831. if (ret)
  832. return ret;
  833. }
  834. return wait_for_credits(t, &t->wait_send_credits, &t->send_credits);
  835. }
  836. static int smb_direct_create_header(struct smb_direct_transport *t,
  837. int size, int remaining_data_length,
  838. struct smb_direct_sendmsg **sendmsg_out)
  839. {
  840. struct smb_direct_sendmsg *sendmsg;
  841. struct smb_direct_data_transfer *packet;
  842. int header_length;
  843. int ret;
  844. sendmsg = smb_direct_alloc_sendmsg(t);
  845. if (IS_ERR(sendmsg))
  846. return PTR_ERR(sendmsg);
  847. /* Fill in the packet header */
  848. packet = (struct smb_direct_data_transfer *)sendmsg->packet;
  849. packet->credits_requested = cpu_to_le16(t->send_credit_target);
  850. packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
  851. packet->flags = 0;
  852. packet->reserved = 0;
  853. if (!size)
  854. packet->data_offset = 0;
  855. else
  856. packet->data_offset = cpu_to_le32(24);
  857. packet->data_length = cpu_to_le32(size);
  858. packet->remaining_data_length = cpu_to_le32(remaining_data_length);
  859. packet->padding = 0;
  860. ksmbd_debug(RDMA,
  861. "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
  862. le16_to_cpu(packet->credits_requested),
  863. le16_to_cpu(packet->credits_granted),
  864. le32_to_cpu(packet->data_offset),
  865. le32_to_cpu(packet->data_length),
  866. le32_to_cpu(packet->remaining_data_length));
  867. /* Map the packet to DMA */
  868. header_length = sizeof(struct smb_direct_data_transfer);
  869. /* If this is a packet without payload, don't send padding */
  870. if (!size)
  871. header_length =
  872. offsetof(struct smb_direct_data_transfer, padding);
  873. sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
  874. (void *)packet,
  875. header_length,
  876. DMA_TO_DEVICE);
  877. ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
  878. if (ret) {
  879. smb_direct_free_sendmsg(t, sendmsg);
  880. return ret;
  881. }
  882. sendmsg->num_sge = 1;
  883. sendmsg->sge[0].length = header_length;
  884. sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
  885. *sendmsg_out = sendmsg;
  886. return 0;
  887. }
  888. static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
  889. {
  890. bool high = is_vmalloc_addr(buf);
  891. struct page *page;
  892. int offset, len;
  893. int i = 0;
  894. if (nentries < get_buf_page_count(buf, size))
  895. return -EINVAL;
  896. offset = offset_in_page(buf);
  897. buf -= offset;
  898. while (size > 0) {
  899. len = min_t(int, PAGE_SIZE - offset, size);
  900. if (high)
  901. page = vmalloc_to_page(buf);
  902. else
  903. page = kmap_to_page(buf);
  904. if (!sg_list)
  905. return -EINVAL;
  906. sg_set_page(sg_list, page, len, offset);
  907. sg_list = sg_next(sg_list);
  908. buf += PAGE_SIZE;
  909. size -= len;
  910. offset = 0;
  911. i++;
  912. }
  913. return i;
  914. }
  915. static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
  916. struct scatterlist *sg_list, int nentries,
  917. enum dma_data_direction dir)
  918. {
  919. int npages;
  920. npages = get_sg_list(buf, size, sg_list, nentries);
  921. if (npages <= 0)
  922. return -EINVAL;
  923. return ib_dma_map_sg(device, sg_list, npages, dir);
  924. }
  925. static int post_sendmsg(struct smb_direct_transport *t,
  926. struct smb_direct_send_ctx *send_ctx,
  927. struct smb_direct_sendmsg *msg)
  928. {
  929. int i;
  930. for (i = 0; i < msg->num_sge; i++)
  931. ib_dma_sync_single_for_device(t->cm_id->device,
  932. msg->sge[i].addr, msg->sge[i].length,
  933. DMA_TO_DEVICE);
  934. msg->cqe.done = send_done;
  935. msg->wr.opcode = IB_WR_SEND;
  936. msg->wr.sg_list = &msg->sge[0];
  937. msg->wr.num_sge = msg->num_sge;
  938. msg->wr.next = NULL;
  939. if (send_ctx) {
  940. msg->wr.wr_cqe = NULL;
  941. msg->wr.send_flags = 0;
  942. if (!list_empty(&send_ctx->msg_list)) {
  943. struct smb_direct_sendmsg *last;
  944. last = list_last_entry(&send_ctx->msg_list,
  945. struct smb_direct_sendmsg,
  946. list);
  947. last->wr.next = &msg->wr;
  948. }
  949. list_add_tail(&msg->list, &send_ctx->msg_list);
  950. send_ctx->wr_cnt++;
  951. return 0;
  952. }
  953. msg->wr.wr_cqe = &msg->cqe;
  954. msg->wr.send_flags = IB_SEND_SIGNALED;
  955. return smb_direct_post_send(t, &msg->wr);
  956. }
  957. static int smb_direct_post_send_data(struct smb_direct_transport *t,
  958. struct smb_direct_send_ctx *send_ctx,
  959. struct kvec *iov, int niov,
  960. int remaining_data_length)
  961. {
  962. int i, j, ret;
  963. struct smb_direct_sendmsg *msg;
  964. int data_length;
  965. struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
  966. ret = wait_for_send_credits(t, send_ctx);
  967. if (ret)
  968. return ret;
  969. data_length = 0;
  970. for (i = 0; i < niov; i++)
  971. data_length += iov[i].iov_len;
  972. ret = smb_direct_create_header(t, data_length, remaining_data_length,
  973. &msg);
  974. if (ret) {
  975. atomic_inc(&t->send_credits);
  976. return ret;
  977. }
  978. for (i = 0; i < niov; i++) {
  979. struct ib_sge *sge;
  980. int sg_cnt;
  981. sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
  982. sg_cnt = get_mapped_sg_list(t->cm_id->device,
  983. iov[i].iov_base, iov[i].iov_len,
  984. sg, SMB_DIRECT_MAX_SEND_SGES - 1,
  985. DMA_TO_DEVICE);
  986. if (sg_cnt <= 0) {
  987. pr_err("failed to map buffer\n");
  988. ret = -ENOMEM;
  989. goto err;
  990. } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
  991. pr_err("buffer not fitted into sges\n");
  992. ret = -E2BIG;
  993. ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
  994. DMA_TO_DEVICE);
  995. goto err;
  996. }
  997. for (j = 0; j < sg_cnt; j++) {
  998. sge = &msg->sge[msg->num_sge];
  999. sge->addr = sg_dma_address(&sg[j]);
  1000. sge->length = sg_dma_len(&sg[j]);
  1001. sge->lkey = t->pd->local_dma_lkey;
  1002. msg->num_sge++;
  1003. }
  1004. }
  1005. ret = post_sendmsg(t, send_ctx, msg);
  1006. if (ret)
  1007. goto err;
  1008. return 0;
  1009. err:
  1010. smb_direct_free_sendmsg(t, msg);
  1011. atomic_inc(&t->send_credits);
  1012. return ret;
  1013. }
  1014. static int smb_direct_writev(struct ksmbd_transport *t,
  1015. struct kvec *iov, int niovs, int buflen,
  1016. bool need_invalidate, unsigned int remote_key)
  1017. {
  1018. struct smb_direct_transport *st = smb_trans_direct_transfort(t);
  1019. int remaining_data_length;
  1020. int start, i, j;
  1021. int max_iov_size = st->max_send_size -
  1022. sizeof(struct smb_direct_data_transfer);
  1023. int ret;
  1024. struct kvec vec;
  1025. struct smb_direct_send_ctx send_ctx;
  1026. if (st->status != SMB_DIRECT_CS_CONNECTED)
  1027. return -ENOTCONN;
  1028. //FIXME: skip RFC1002 header..
  1029. buflen -= 4;
  1030. iov[0].iov_base += 4;
  1031. iov[0].iov_len -= 4;
  1032. remaining_data_length = buflen;
  1033. ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
  1034. smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
  1035. start = i = 0;
  1036. buflen = 0;
  1037. while (true) {
  1038. buflen += iov[i].iov_len;
  1039. if (buflen > max_iov_size) {
  1040. if (i > start) {
  1041. remaining_data_length -=
  1042. (buflen - iov[i].iov_len);
  1043. ret = smb_direct_post_send_data(st, &send_ctx,
  1044. &iov[start], i - start,
  1045. remaining_data_length);
  1046. if (ret)
  1047. goto done;
  1048. } else {
  1049. /* iov[start] is too big, break it */
  1050. int nvec = (buflen + max_iov_size - 1) /
  1051. max_iov_size;
  1052. for (j = 0; j < nvec; j++) {
  1053. vec.iov_base =
  1054. (char *)iov[start].iov_base +
  1055. j * max_iov_size;
  1056. vec.iov_len =
  1057. min_t(int, max_iov_size,
  1058. buflen - max_iov_size * j);
  1059. remaining_data_length -= vec.iov_len;
  1060. ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
  1061. remaining_data_length);
  1062. if (ret)
  1063. goto done;
  1064. }
  1065. i++;
  1066. if (i == niovs)
  1067. break;
  1068. }
  1069. start = i;
  1070. buflen = 0;
  1071. } else {
  1072. i++;
  1073. if (i == niovs) {
  1074. /* send out all remaining vecs */
  1075. remaining_data_length -= buflen;
  1076. ret = smb_direct_post_send_data(st, &send_ctx,
  1077. &iov[start], i - start,
  1078. remaining_data_length);
  1079. if (ret)
  1080. goto done;
  1081. break;
  1082. }
  1083. }
  1084. }
  1085. done:
  1086. ret = smb_direct_flush_send_list(st, &send_ctx, true);
  1087. /*
  1088. * As an optimization, we don't wait for individual I/O to finish
  1089. * before sending the next one.
  1090. * Send them all and wait for pending send count to get to 0
  1091. * that means all the I/Os have been out and we are good to return
  1092. */
  1093. wait_event(st->wait_send_payload_pending,
  1094. atomic_read(&st->send_payload_pending) == 0);
  1095. return ret;
  1096. }
  1097. static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
  1098. enum dma_data_direction dir)
  1099. {
  1100. struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
  1101. struct smb_direct_rdma_rw_msg, cqe);
  1102. struct smb_direct_transport *t = msg->t;
  1103. if (wc->status != IB_WC_SUCCESS) {
  1104. pr_err("read/write error. opcode = %d, status = %s(%d)\n",
  1105. wc->opcode, ib_wc_status_msg(wc->status), wc->status);
  1106. smb_direct_disconnect_rdma_connection(t);
  1107. }
  1108. if (atomic_inc_return(&t->rw_avail_ops) > 0)
  1109. wake_up(&t->wait_rw_avail_ops);
  1110. rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
  1111. msg->sg_list, msg->sgt.nents, dir);
  1112. sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
  1113. complete(msg->completion);
  1114. kfree(msg);
  1115. }
  1116. static void read_done(struct ib_cq *cq, struct ib_wc *wc)
  1117. {
  1118. read_write_done(cq, wc, DMA_FROM_DEVICE);
  1119. }
  1120. static void write_done(struct ib_cq *cq, struct ib_wc *wc)
  1121. {
  1122. read_write_done(cq, wc, DMA_TO_DEVICE);
  1123. }
  1124. static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
  1125. int buf_len, u32 remote_key, u64 remote_offset,
  1126. u32 remote_len, bool is_read)
  1127. {
  1128. struct smb_direct_rdma_rw_msg *msg;
  1129. int ret;
  1130. DECLARE_COMPLETION_ONSTACK(completion);
  1131. struct ib_send_wr *first_wr = NULL;
  1132. ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
  1133. if (ret < 0)
  1134. return ret;
  1135. /* TODO: mempool */
  1136. msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
  1137. sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
  1138. if (!msg) {
  1139. atomic_inc(&t->rw_avail_ops);
  1140. return -ENOMEM;
  1141. }
  1142. msg->sgt.sgl = &msg->sg_list[0];
  1143. ret = sg_alloc_table_chained(&msg->sgt,
  1144. get_buf_page_count(buf, buf_len),
  1145. msg->sg_list, SG_CHUNK_SIZE);
  1146. if (ret) {
  1147. atomic_inc(&t->rw_avail_ops);
  1148. kfree(msg);
  1149. return -ENOMEM;
  1150. }
  1151. ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents);
  1152. if (ret <= 0) {
  1153. pr_err("failed to get pages\n");
  1154. goto err;
  1155. }
  1156. ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
  1157. msg->sg_list, get_buf_page_count(buf, buf_len),
  1158. 0, remote_offset, remote_key,
  1159. is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1160. if (ret < 0) {
  1161. pr_err("failed to init rdma_rw_ctx: %d\n", ret);
  1162. goto err;
  1163. }
  1164. msg->t = t;
  1165. msg->cqe.done = is_read ? read_done : write_done;
  1166. msg->completion = &completion;
  1167. first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
  1168. &msg->cqe, NULL);
  1169. ret = ib_post_send(t->qp, first_wr, NULL);
  1170. if (ret) {
  1171. pr_err("failed to post send wr: %d\n", ret);
  1172. goto err;
  1173. }
  1174. wait_for_completion(&completion);
  1175. return 0;
  1176. err:
  1177. atomic_inc(&t->rw_avail_ops);
  1178. if (first_wr)
  1179. rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
  1180. msg->sg_list, msg->sgt.nents,
  1181. is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1182. sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
  1183. kfree(msg);
  1184. return ret;
  1185. }
  1186. static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf,
  1187. unsigned int buflen, u32 remote_key,
  1188. u64 remote_offset, u32 remote_len)
  1189. {
  1190. return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
  1191. remote_key, remote_offset,
  1192. remote_len, false);
  1193. }
  1194. static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf,
  1195. unsigned int buflen, u32 remote_key,
  1196. u64 remote_offset, u32 remote_len)
  1197. {
  1198. return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
  1199. remote_key, remote_offset,
  1200. remote_len, true);
  1201. }
  1202. static void smb_direct_disconnect(struct ksmbd_transport *t)
  1203. {
  1204. struct smb_direct_transport *st = smb_trans_direct_transfort(t);
  1205. ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
  1206. smb_direct_disconnect_rdma_work(&st->disconnect_work);
  1207. wait_event_interruptible(st->wait_status,
  1208. st->status == SMB_DIRECT_CS_DISCONNECTED);
  1209. free_transport(st);
  1210. }
  1211. static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
  1212. struct rdma_cm_event *event)
  1213. {
  1214. struct smb_direct_transport *t = cm_id->context;
  1215. ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
  1216. cm_id, rdma_event_msg(event->event), event->event);
  1217. switch (event->event) {
  1218. case RDMA_CM_EVENT_ESTABLISHED: {
  1219. t->status = SMB_DIRECT_CS_CONNECTED;
  1220. wake_up_interruptible(&t->wait_status);
  1221. break;
  1222. }
  1223. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  1224. case RDMA_CM_EVENT_DISCONNECTED: {
  1225. t->status = SMB_DIRECT_CS_DISCONNECTED;
  1226. wake_up_interruptible(&t->wait_status);
  1227. wake_up_interruptible(&t->wait_reassembly_queue);
  1228. wake_up(&t->wait_send_credits);
  1229. break;
  1230. }
  1231. case RDMA_CM_EVENT_CONNECT_ERROR: {
  1232. t->status = SMB_DIRECT_CS_DISCONNECTED;
  1233. wake_up_interruptible(&t->wait_status);
  1234. break;
  1235. }
  1236. default:
  1237. pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
  1238. cm_id, rdma_event_msg(event->event),
  1239. event->event);
  1240. break;
  1241. }
  1242. return 0;
  1243. }
  1244. static void smb_direct_qpair_handler(struct ib_event *event, void *context)
  1245. {
  1246. struct smb_direct_transport *t = context;
  1247. ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
  1248. t->cm_id, ib_event_msg(event->event), event->event);
  1249. switch (event->event) {
  1250. case IB_EVENT_CQ_ERR:
  1251. case IB_EVENT_QP_FATAL:
  1252. smb_direct_disconnect_rdma_connection(t);
  1253. break;
  1254. default:
  1255. break;
  1256. }
  1257. }
  1258. static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
  1259. int failed)
  1260. {
  1261. struct smb_direct_sendmsg *sendmsg;
  1262. struct smb_direct_negotiate_resp *resp;
  1263. int ret;
  1264. sendmsg = smb_direct_alloc_sendmsg(t);
  1265. if (IS_ERR(sendmsg))
  1266. return -ENOMEM;
  1267. resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
  1268. if (failed) {
  1269. memset(resp, 0, sizeof(*resp));
  1270. resp->min_version = cpu_to_le16(0x0100);
  1271. resp->max_version = cpu_to_le16(0x0100);
  1272. resp->status = STATUS_NOT_SUPPORTED;
  1273. } else {
  1274. resp->status = STATUS_SUCCESS;
  1275. resp->min_version = SMB_DIRECT_VERSION_LE;
  1276. resp->max_version = SMB_DIRECT_VERSION_LE;
  1277. resp->negotiated_version = SMB_DIRECT_VERSION_LE;
  1278. resp->reserved = 0;
  1279. resp->credits_requested =
  1280. cpu_to_le16(t->send_credit_target);
  1281. resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
  1282. resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
  1283. resp->preferred_send_size = cpu_to_le32(t->max_send_size);
  1284. resp->max_receive_size = cpu_to_le32(t->max_recv_size);
  1285. resp->max_fragmented_size =
  1286. cpu_to_le32(t->max_fragmented_recv_size);
  1287. }
  1288. sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
  1289. (void *)resp, sizeof(*resp),
  1290. DMA_TO_DEVICE);
  1291. ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
  1292. if (ret) {
  1293. smb_direct_free_sendmsg(t, sendmsg);
  1294. return ret;
  1295. }
  1296. sendmsg->num_sge = 1;
  1297. sendmsg->sge[0].length = sizeof(*resp);
  1298. sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
  1299. ret = post_sendmsg(t, NULL, sendmsg);
  1300. if (ret) {
  1301. smb_direct_free_sendmsg(t, sendmsg);
  1302. return ret;
  1303. }
  1304. wait_event(t->wait_send_pending,
  1305. atomic_read(&t->send_pending) == 0);
  1306. return 0;
  1307. }
  1308. static int smb_direct_accept_client(struct smb_direct_transport *t)
  1309. {
  1310. struct rdma_conn_param conn_param;
  1311. struct ib_port_immutable port_immutable;
  1312. u32 ird_ord_hdr[2];
  1313. int ret;
  1314. memset(&conn_param, 0, sizeof(conn_param));
  1315. conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
  1316. SMB_DIRECT_CM_INITIATOR_DEPTH);
  1317. conn_param.responder_resources = 0;
  1318. t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
  1319. t->cm_id->port_num,
  1320. &port_immutable);
  1321. if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
  1322. ird_ord_hdr[0] = conn_param.responder_resources;
  1323. ird_ord_hdr[1] = 1;
  1324. conn_param.private_data = ird_ord_hdr;
  1325. conn_param.private_data_len = sizeof(ird_ord_hdr);
  1326. } else {
  1327. conn_param.private_data = NULL;
  1328. conn_param.private_data_len = 0;
  1329. }
  1330. conn_param.retry_count = SMB_DIRECT_CM_RETRY;
  1331. conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
  1332. conn_param.flow_control = 0;
  1333. ret = rdma_accept(t->cm_id, &conn_param);
  1334. if (ret) {
  1335. pr_err("error at rdma_accept: %d\n", ret);
  1336. return ret;
  1337. }
  1338. wait_event_interruptible(t->wait_status,
  1339. t->status != SMB_DIRECT_CS_NEW);
  1340. if (t->status != SMB_DIRECT_CS_CONNECTED)
  1341. return -ENOTCONN;
  1342. return 0;
  1343. }
  1344. static int smb_direct_negotiate(struct smb_direct_transport *t)
  1345. {
  1346. int ret;
  1347. struct smb_direct_recvmsg *recvmsg;
  1348. struct smb_direct_negotiate_req *req;
  1349. recvmsg = get_free_recvmsg(t);
  1350. if (!recvmsg)
  1351. return -ENOMEM;
  1352. recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
  1353. ret = smb_direct_post_recv(t, recvmsg);
  1354. if (ret) {
  1355. pr_err("Can't post recv: %d\n", ret);
  1356. goto out;
  1357. }
  1358. t->negotiation_requested = false;
  1359. ret = smb_direct_accept_client(t);
  1360. if (ret) {
  1361. pr_err("Can't accept client\n");
  1362. goto out;
  1363. }
  1364. smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
  1365. ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
  1366. ret = wait_event_interruptible_timeout(t->wait_status,
  1367. t->negotiation_requested ||
  1368. t->status == SMB_DIRECT_CS_DISCONNECTED,
  1369. SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
  1370. if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
  1371. ret = ret < 0 ? ret : -ETIMEDOUT;
  1372. goto out;
  1373. }
  1374. ret = smb_direct_check_recvmsg(recvmsg);
  1375. if (ret == -ECONNABORTED)
  1376. goto out;
  1377. req = (struct smb_direct_negotiate_req *)recvmsg->packet;
  1378. t->max_recv_size = min_t(int, t->max_recv_size,
  1379. le32_to_cpu(req->preferred_send_size));
  1380. t->max_send_size = min_t(int, t->max_send_size,
  1381. le32_to_cpu(req->max_receive_size));
  1382. t->max_fragmented_send_size =
  1383. le32_to_cpu(req->max_fragmented_size);
  1384. ret = smb_direct_send_negotiate_response(t, ret);
  1385. out:
  1386. if (recvmsg)
  1387. put_recvmsg(t, recvmsg);
  1388. return ret;
  1389. }
  1390. static int smb_direct_init_params(struct smb_direct_transport *t,
  1391. struct ib_qp_cap *cap)
  1392. {
  1393. struct ib_device *device = t->cm_id->device;
  1394. int max_send_sges, max_pages, max_rw_wrs, max_send_wrs;
  1395. /* need 2 more sge. because a SMB_DIRECT header will be mapped,
  1396. * and maybe a send buffer could be not page aligned.
  1397. */
  1398. t->max_send_size = smb_direct_max_send_size;
  1399. max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2;
  1400. if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
  1401. pr_err("max_send_size %d is too large\n", t->max_send_size);
  1402. return -EINVAL;
  1403. }
  1404. /*
  1405. * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA
  1406. * read/writes. HCA guarantees at least max_send_sge of sges for
  1407. * a RDMA read/write work request, and if memory registration is used,
  1408. * we need reg_mr, local_inv wrs for each read/write.
  1409. */
  1410. t->max_rdma_rw_size = smb_direct_max_read_write_size;
  1411. max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
  1412. max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES);
  1413. max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num,
  1414. max_pages) * 2;
  1415. max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
  1416. max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
  1417. if (max_send_wrs > device->attrs.max_cqe ||
  1418. max_send_wrs > device->attrs.max_qp_wr) {
  1419. pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n",
  1420. smb_direct_send_credit_target,
  1421. smb_direct_max_outstanding_rw_ops);
  1422. pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
  1423. device->attrs.max_cqe, device->attrs.max_qp_wr);
  1424. return -EINVAL;
  1425. }
  1426. if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
  1427. smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
  1428. pr_err("consider lowering receive_credit_max = %d\n",
  1429. smb_direct_receive_credit_max);
  1430. pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
  1431. device->attrs.max_cqe, device->attrs.max_qp_wr);
  1432. return -EINVAL;
  1433. }
  1434. if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
  1435. pr_err("warning: device max_send_sge = %d too small\n",
  1436. device->attrs.max_send_sge);
  1437. return -EINVAL;
  1438. }
  1439. if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
  1440. pr_err("warning: device max_recv_sge = %d too small\n",
  1441. device->attrs.max_recv_sge);
  1442. return -EINVAL;
  1443. }
  1444. t->recv_credits = 0;
  1445. t->count_avail_recvmsg = 0;
  1446. t->recv_credit_max = smb_direct_receive_credit_max;
  1447. t->recv_credit_target = 10;
  1448. t->new_recv_credits = 0;
  1449. t->send_credit_target = smb_direct_send_credit_target;
  1450. atomic_set(&t->send_credits, 0);
  1451. atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops);
  1452. t->max_send_size = smb_direct_max_send_size;
  1453. t->max_recv_size = smb_direct_max_receive_size;
  1454. t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
  1455. cap->max_send_wr = max_send_wrs;
  1456. cap->max_recv_wr = t->recv_credit_max;
  1457. cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
  1458. cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
  1459. cap->max_inline_data = 0;
  1460. cap->max_rdma_ctxs = 0;
  1461. return 0;
  1462. }
  1463. static void smb_direct_destroy_pools(struct smb_direct_transport *t)
  1464. {
  1465. struct smb_direct_recvmsg *recvmsg;
  1466. while ((recvmsg = get_free_recvmsg(t)))
  1467. mempool_free(recvmsg, t->recvmsg_mempool);
  1468. while ((recvmsg = get_empty_recvmsg(t)))
  1469. mempool_free(recvmsg, t->recvmsg_mempool);
  1470. mempool_destroy(t->recvmsg_mempool);
  1471. t->recvmsg_mempool = NULL;
  1472. kmem_cache_destroy(t->recvmsg_cache);
  1473. t->recvmsg_cache = NULL;
  1474. mempool_destroy(t->sendmsg_mempool);
  1475. t->sendmsg_mempool = NULL;
  1476. kmem_cache_destroy(t->sendmsg_cache);
  1477. t->sendmsg_cache = NULL;
  1478. }
  1479. static int smb_direct_create_pools(struct smb_direct_transport *t)
  1480. {
  1481. char name[80];
  1482. int i;
  1483. struct smb_direct_recvmsg *recvmsg;
  1484. snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
  1485. t->sendmsg_cache = kmem_cache_create(name,
  1486. sizeof(struct smb_direct_sendmsg) +
  1487. sizeof(struct smb_direct_negotiate_resp),
  1488. 0, SLAB_HWCACHE_ALIGN, NULL);
  1489. if (!t->sendmsg_cache)
  1490. return -ENOMEM;
  1491. t->sendmsg_mempool = mempool_create(t->send_credit_target,
  1492. mempool_alloc_slab, mempool_free_slab,
  1493. t->sendmsg_cache);
  1494. if (!t->sendmsg_mempool)
  1495. goto err;
  1496. snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
  1497. t->recvmsg_cache = kmem_cache_create(name,
  1498. sizeof(struct smb_direct_recvmsg) +
  1499. t->max_recv_size,
  1500. 0, SLAB_HWCACHE_ALIGN, NULL);
  1501. if (!t->recvmsg_cache)
  1502. goto err;
  1503. t->recvmsg_mempool =
  1504. mempool_create(t->recv_credit_max, mempool_alloc_slab,
  1505. mempool_free_slab, t->recvmsg_cache);
  1506. if (!t->recvmsg_mempool)
  1507. goto err;
  1508. INIT_LIST_HEAD(&t->recvmsg_queue);
  1509. for (i = 0; i < t->recv_credit_max; i++) {
  1510. recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
  1511. if (!recvmsg)
  1512. goto err;
  1513. recvmsg->transport = t;
  1514. list_add(&recvmsg->list, &t->recvmsg_queue);
  1515. }
  1516. t->count_avail_recvmsg = t->recv_credit_max;
  1517. return 0;
  1518. err:
  1519. smb_direct_destroy_pools(t);
  1520. return -ENOMEM;
  1521. }
  1522. static int smb_direct_create_qpair(struct smb_direct_transport *t,
  1523. struct ib_qp_cap *cap)
  1524. {
  1525. int ret;
  1526. struct ib_qp_init_attr qp_attr;
  1527. t->pd = ib_alloc_pd(t->cm_id->device, 0);
  1528. if (IS_ERR(t->pd)) {
  1529. pr_err("Can't create RDMA PD\n");
  1530. ret = PTR_ERR(t->pd);
  1531. t->pd = NULL;
  1532. return ret;
  1533. }
  1534. t->send_cq = ib_alloc_cq(t->cm_id->device, t,
  1535. t->send_credit_target, 0, IB_POLL_WORKQUEUE);
  1536. if (IS_ERR(t->send_cq)) {
  1537. pr_err("Can't cre