PageRenderTime 89ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/fs/cifs/smbdirect.c

https://gitlab.com/kush/linux
C | 1728 lines | 1245 code | 263 blank | 220 comment | 144 complexity | 9d8209a46a22efa201ccceee5322be42 MD5 | raw file
  1. /*
  2. * Copyright (C) 2017, Microsoft Corporation.
  3. *
  4. * Author(s): Long Li <longli@microsoft.com>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
  14. * the GNU General Public License for more details.
  15. */
  16. #include <linux/module.h>
  17. #include <linux/highmem.h>
  18. #include "smbdirect.h"
  19. #include "cifs_debug.h"
  20. #include "cifsproto.h"
  21. #include "smb2proto.h"
  22. static struct smbd_response *get_empty_queue_buffer(
  23. struct smbd_connection *info);
  24. static struct smbd_response *get_receive_buffer(
  25. struct smbd_connection *info);
  26. static void put_receive_buffer(
  27. struct smbd_connection *info,
  28. struct smbd_response *response);
  29. static int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
  30. static void destroy_receive_buffers(struct smbd_connection *info);
  31. static void put_empty_packet(
  32. struct smbd_connection *info, struct smbd_response *response);
  33. static void enqueue_reassembly(
  34. struct smbd_connection *info,
  35. struct smbd_response *response, int data_length);
  36. static struct smbd_response *_get_first_reassembly(
  37. struct smbd_connection *info);
  38. static int smbd_post_recv(
  39. struct smbd_connection *info,
  40. struct smbd_response *response);
  41. static int smbd_post_send_empty(struct smbd_connection *info);
  42. static int smbd_post_send_data(
  43. struct smbd_connection *info,
  44. struct kvec *iov, int n_vec, int remaining_data_length);
  45. static int smbd_post_send_page(struct smbd_connection *info,
  46. struct page *page, unsigned long offset,
  47. size_t size, int remaining_data_length);
  48. static void destroy_mr_list(struct smbd_connection *info);
  49. static int allocate_mr_list(struct smbd_connection *info);
  50. /* SMBD version number */
  51. #define SMBD_V1 0x0100
  52. /* Port numbers for SMBD transport */
  53. #define SMB_PORT 445
  54. #define SMBD_PORT 5445
  55. /* Address lookup and resolve timeout in ms */
  56. #define RDMA_RESOLVE_TIMEOUT 5000
  57. /* SMBD negotiation timeout in seconds */
  58. #define SMBD_NEGOTIATE_TIMEOUT 120
  59. /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */
  60. #define SMBD_MIN_RECEIVE_SIZE 128
  61. #define SMBD_MIN_FRAGMENTED_SIZE 131072
  62. /*
  63. * Default maximum number of RDMA read/write outstanding on this connection
  64. * This value is possibly decreased during QP creation on hardware limit
  65. */
  66. #define SMBD_CM_RESPONDER_RESOURCES 32
  67. /* Maximum number of retries on data transfer operations */
  68. #define SMBD_CM_RETRY 6
  69. /* No need to retry on Receiver Not Ready since SMBD manages credits */
  70. #define SMBD_CM_RNR_RETRY 0
  71. /*
  72. * User configurable initial values per SMBD transport connection
  73. * as defined in [MS-SMBD] 3.1.1.1
  74. * Those may change after a SMBD negotiation
  75. */
  76. /* The local peer's maximum number of credits to grant to the peer */
  77. int smbd_receive_credit_max = 255;
  78. /* The remote peer's credit request of local peer */
  79. int smbd_send_credit_target = 255;
  80. /* The maximum single message size can be sent to remote peer */
  81. int smbd_max_send_size = 1364;
  82. /* The maximum fragmented upper-layer payload receive size supported */
  83. int smbd_max_fragmented_recv_size = 1024 * 1024;
  84. /* The maximum single-message size which can be received */
  85. int smbd_max_receive_size = 8192;
  86. /* The timeout to initiate send of a keepalive message on idle */
  87. int smbd_keep_alive_interval = 120;
  88. /*
  89. * User configurable initial values for RDMA transport
  90. * The actual values used may be lower and are limited to hardware capabilities
  91. */
  92. /* Default maximum number of SGEs in a RDMA write/read */
  93. int smbd_max_frmr_depth = 2048;
  94. /* If payload is less than this byte, use RDMA send/recv not read/write */
  95. int rdma_readwrite_threshold = 4096;
  96. /* Transport logging functions
  97. * Logging are defined as classes. They can be OR'ed to define the actual
  98. * logging level via module parameter smbd_logging_class
  99. * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and
  100. * log_rdma_event()
  101. */
  102. #define LOG_OUTGOING 0x1
  103. #define LOG_INCOMING 0x2
  104. #define LOG_READ 0x4
  105. #define LOG_WRITE 0x8
  106. #define LOG_RDMA_SEND 0x10
  107. #define LOG_RDMA_RECV 0x20
  108. #define LOG_KEEP_ALIVE 0x40
  109. #define LOG_RDMA_EVENT 0x80
  110. #define LOG_RDMA_MR 0x100
  111. static unsigned int smbd_logging_class;
  112. module_param(smbd_logging_class, uint, 0644);
  113. MODULE_PARM_DESC(smbd_logging_class,
  114. "Logging class for SMBD transport 0x0 to 0x100");
  115. #define ERR 0x0
  116. #define INFO 0x1
  117. static unsigned int smbd_logging_level = ERR;
  118. module_param(smbd_logging_level, uint, 0644);
  119. MODULE_PARM_DESC(smbd_logging_level,
  120. "Logging level for SMBD transport, 0 (default): error, 1: info");
  121. #define log_rdma(level, class, fmt, args...) \
  122. do { \
  123. if (level <= smbd_logging_level || class & smbd_logging_class) \
  124. cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\
  125. } while (0)
  126. #define log_outgoing(level, fmt, args...) \
  127. log_rdma(level, LOG_OUTGOING, fmt, ##args)
  128. #define log_incoming(level, fmt, args...) \
  129. log_rdma(level, LOG_INCOMING, fmt, ##args)
  130. #define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args)
  131. #define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args)
  132. #define log_rdma_send(level, fmt, args...) \
  133. log_rdma(level, LOG_RDMA_SEND, fmt, ##args)
  134. #define log_rdma_recv(level, fmt, args...) \
  135. log_rdma(level, LOG_RDMA_RECV, fmt, ##args)
  136. #define log_keep_alive(level, fmt, args...) \
  137. log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args)
  138. #define log_rdma_event(level, fmt, args...) \
  139. log_rdma(level, LOG_RDMA_EVENT, fmt, ##args)
  140. #define log_rdma_mr(level, fmt, args...) \
  141. log_rdma(level, LOG_RDMA_MR, fmt, ##args)
  142. /*
  143. * Destroy the transport and related RDMA and memory resources
  144. * Need to go through all the pending counters and make sure on one is using
  145. * the transport while it is destroyed
  146. */
  147. static void smbd_destroy_rdma_work(struct work_struct *work)
  148. {
  149. struct smbd_response *response;
  150. struct smbd_connection *info =
  151. container_of(work, struct smbd_connection, destroy_work);
  152. unsigned long flags;
  153. log_rdma_event(INFO, "destroying qp\n");
  154. ib_drain_qp(info->id->qp);
  155. rdma_destroy_qp(info->id);
  156. /* Unblock all I/O waiting on the send queue */
  157. wake_up_interruptible_all(&info->wait_send_queue);
  158. log_rdma_event(INFO, "cancelling idle timer\n");
  159. cancel_delayed_work_sync(&info->idle_timer_work);
  160. log_rdma_event(INFO, "cancelling send immediate work\n");
  161. cancel_delayed_work_sync(&info->send_immediate_work);
  162. log_rdma_event(INFO, "wait for all send to finish\n");
  163. wait_event(info->wait_smbd_send_pending,
  164. info->smbd_send_pending == 0);
  165. log_rdma_event(INFO, "wait for all recv to finish\n");
  166. wake_up_interruptible(&info->wait_reassembly_queue);
  167. wait_event(info->wait_smbd_recv_pending,
  168. info->smbd_recv_pending == 0);
  169. log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
  170. wait_event(info->wait_send_pending,
  171. atomic_read(&info->send_pending) == 0);
  172. wait_event(info->wait_send_payload_pending,
  173. atomic_read(&info->send_payload_pending) == 0);
  174. log_rdma_event(INFO, "freeing mr list\n");
  175. wake_up_interruptible_all(&info->wait_mr);
  176. wait_event(info->wait_for_mr_cleanup,
  177. atomic_read(&info->mr_used_count) == 0);
  178. destroy_mr_list(info);
  179. /* It's not posssible for upper layer to get to reassembly */
  180. log_rdma_event(INFO, "drain the reassembly queue\n");
  181. do {
  182. spin_lock_irqsave(&info->reassembly_queue_lock, flags);
  183. response = _get_first_reassembly(info);
  184. if (response) {
  185. list_del(&response->list);
  186. spin_unlock_irqrestore(
  187. &info->reassembly_queue_lock, flags);
  188. put_receive_buffer(info, response);
  189. } else
  190. spin_unlock_irqrestore(&info->reassembly_queue_lock, flags);
  191. } while (response);
  192. info->reassembly_data_length = 0;
  193. log_rdma_event(INFO, "free receive buffers\n");
  194. wait_event(info->wait_receive_queues,
  195. info->count_receive_queue + info->count_empty_packet_queue
  196. == info->receive_credit_max);
  197. destroy_receive_buffers(info);
  198. ib_free_cq(info->send_cq);
  199. ib_free_cq(info->recv_cq);
  200. ib_dealloc_pd(info->pd);
  201. rdma_destroy_id(info->id);
  202. /* free mempools */
  203. mempool_destroy(info->request_mempool);
  204. kmem_cache_destroy(info->request_cache);
  205. mempool_destroy(info->response_mempool);
  206. kmem_cache_destroy(info->response_cache);
  207. info->transport_status = SMBD_DESTROYED;
  208. wake_up_all(&info->wait_destroy);
  209. }
  210. static int smbd_process_disconnected(struct smbd_connection *info)
  211. {
  212. schedule_work(&info->destroy_work);
  213. return 0;
  214. }
  215. static void smbd_disconnect_rdma_work(struct work_struct *work)
  216. {
  217. struct smbd_connection *info =
  218. container_of(work, struct smbd_connection, disconnect_work);
  219. if (info->transport_status == SMBD_CONNECTED) {
  220. info->transport_status = SMBD_DISCONNECTING;
  221. rdma_disconnect(info->id);
  222. }
  223. }
  224. static void smbd_disconnect_rdma_connection(struct smbd_connection *info)
  225. {
  226. queue_work(info->workqueue, &info->disconnect_work);
  227. }
  228. /* Upcall from RDMA CM */
  229. static int smbd_conn_upcall(
  230. struct rdma_cm_id *id, struct rdma_cm_event *event)
  231. {
  232. struct smbd_connection *info = id->context;
  233. log_rdma_event(INFO, "event=%d status=%d\n",
  234. event->event, event->status);
  235. switch (event->event) {
  236. case RDMA_CM_EVENT_ADDR_RESOLVED:
  237. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  238. info->ri_rc = 0;
  239. complete(&info->ri_done);
  240. break;
  241. case RDMA_CM_EVENT_ADDR_ERROR:
  242. info->ri_rc = -EHOSTUNREACH;
  243. complete(&info->ri_done);
  244. break;
  245. case RDMA_CM_EVENT_ROUTE_ERROR:
  246. info->ri_rc = -ENETUNREACH;
  247. complete(&info->ri_done);
  248. break;
  249. case RDMA_CM_EVENT_ESTABLISHED:
  250. log_rdma_event(INFO, "connected event=%d\n", event->event);
  251. info->transport_status = SMBD_CONNECTED;
  252. wake_up_interruptible(&info->conn_wait);
  253. break;
  254. case RDMA_CM_EVENT_CONNECT_ERROR:
  255. case RDMA_CM_EVENT_UNREACHABLE:
  256. case RDMA_CM_EVENT_REJECTED:
  257. log_rdma_event(INFO, "connecting failed event=%d\n", event->event);
  258. info->transport_status = SMBD_DISCONNECTED;
  259. wake_up_interruptible(&info->conn_wait);
  260. break;
  261. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  262. case RDMA_CM_EVENT_DISCONNECTED:
  263. /* This happenes when we fail the negotiation */
  264. if (info->transport_status == SMBD_NEGOTIATE_FAILED) {
  265. info->transport_status = SMBD_DISCONNECTED;
  266. wake_up(&info->conn_wait);
  267. break;
  268. }
  269. info->transport_status = SMBD_DISCONNECTED;
  270. smbd_process_disconnected(info);
  271. break;
  272. default:
  273. break;
  274. }
  275. return 0;
  276. }
  277. /* Upcall from RDMA QP */
  278. static void
  279. smbd_qp_async_error_upcall(struct ib_event *event, void *context)
  280. {
  281. struct smbd_connection *info = context;
  282. log_rdma_event(ERR, "%s on device %s info %p\n",
  283. ib_event_msg(event->event), event->device->name, info);
  284. switch (event->event) {
  285. case IB_EVENT_CQ_ERR:
  286. case IB_EVENT_QP_FATAL:
  287. smbd_disconnect_rdma_connection(info);
  288. default:
  289. break;
  290. }
  291. }
  292. static inline void *smbd_request_payload(struct smbd_request *request)
  293. {
  294. return (void *)request->packet;
  295. }
  296. static inline void *smbd_response_payload(struct smbd_response *response)
  297. {
  298. return (void *)response->packet;
  299. }
  300. /* Called when a RDMA send is done */
  301. static void send_done(struct ib_cq *cq, struct ib_wc *wc)
  302. {
  303. int i;
  304. struct smbd_request *request =
  305. container_of(wc->wr_cqe, struct smbd_request, cqe);
  306. log_rdma_send(INFO, "smbd_request %p completed wc->status=%d\n",
  307. request, wc->status);
  308. if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
  309. log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n",
  310. wc->status, wc->opcode);
  311. smbd_disconnect_rdma_connection(request->info);
  312. }
  313. for (i = 0; i < request->num_sge; i++)
  314. ib_dma_unmap_single(request->info->id->device,
  315. request->sge[i].addr,
  316. request->sge[i].length,
  317. DMA_TO_DEVICE);
  318. if (request->has_payload) {
  319. if (atomic_dec_and_test(&request->info->send_payload_pending))
  320. wake_up(&request->info->wait_send_payload_pending);
  321. } else {
  322. if (atomic_dec_and_test(&request->info->send_pending))
  323. wake_up(&request->info->wait_send_pending);
  324. }
  325. mempool_free(request, request->info->request_mempool);
  326. }
  327. static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp)
  328. {
  329. log_rdma_event(INFO, "resp message min_version %u max_version %u "
  330. "negotiated_version %u credits_requested %u "
  331. "credits_granted %u status %u max_readwrite_size %u "
  332. "preferred_send_size %u max_receive_size %u "
  333. "max_fragmented_size %u\n",
  334. resp->min_version, resp->max_version, resp->negotiated_version,
  335. resp->credits_requested, resp->credits_granted, resp->status,
  336. resp->max_readwrite_size, resp->preferred_send_size,
  337. resp->max_receive_size, resp->max_fragmented_size);
  338. }
  339. /*
  340. * Process a negotiation response message, according to [MS-SMBD]3.1.5.7
  341. * response, packet_length: the negotiation response message
  342. * return value: true if negotiation is a success, false if failed
  343. */
  344. static bool process_negotiation_response(
  345. struct smbd_response *response, int packet_length)
  346. {
  347. struct smbd_connection *info = response->info;
  348. struct smbd_negotiate_resp *packet = smbd_response_payload(response);
  349. if (packet_length < sizeof(struct smbd_negotiate_resp)) {
  350. log_rdma_event(ERR,
  351. "error: packet_length=%d\n", packet_length);
  352. return false;
  353. }
  354. if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) {
  355. log_rdma_event(ERR, "error: negotiated_version=%x\n",
  356. le16_to_cpu(packet->negotiated_version));
  357. return false;
  358. }
  359. info->protocol = le16_to_cpu(packet->negotiated_version);
  360. if (packet->credits_requested == 0) {
  361. log_rdma_event(ERR, "error: credits_requested==0\n");
  362. return false;
  363. }
  364. info->receive_credit_target = le16_to_cpu(packet->credits_requested);
  365. if (packet->credits_granted == 0) {
  366. log_rdma_event(ERR, "error: credits_granted==0\n");
  367. return false;
  368. }
  369. atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted));
  370. atomic_set(&info->receive_credits, 0);
  371. if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) {
  372. log_rdma_event(ERR, "error: preferred_send_size=%d\n",
  373. le32_to_cpu(packet->preferred_send_size));
  374. return false;
  375. }
  376. info->max_receive_size = le32_to_cpu(packet->preferred_send_size);
  377. if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) {
  378. log_rdma_event(ERR, "error: max_receive_size=%d\n",
  379. le32_to_cpu(packet->max_receive_size));
  380. return false;
  381. }
  382. info->max_send_size = min_t(int, info->max_send_size,
  383. le32_to_cpu(packet->max_receive_size));
  384. if (le32_to_cpu(packet->max_fragmented_size) <
  385. SMBD_MIN_FRAGMENTED_SIZE) {
  386. log_rdma_event(ERR, "error: max_fragmented_size=%d\n",
  387. le32_to_cpu(packet->max_fragmented_size));
  388. return false;
  389. }
  390. info->max_fragmented_send_size =
  391. le32_to_cpu(packet->max_fragmented_size);
  392. info->rdma_readwrite_threshold =
  393. rdma_readwrite_threshold > info->max_fragmented_send_size ?
  394. info->max_fragmented_send_size :
  395. rdma_readwrite_threshold;
  396. info->max_readwrite_size = min_t(u32,
  397. le32_to_cpu(packet->max_readwrite_size),
  398. info->max_frmr_depth * PAGE_SIZE);
  399. info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE;
  400. return true;
  401. }
  402. /*
  403. * Check and schedule to send an immediate packet
  404. * This is used to extend credtis to remote peer to keep the transport busy
  405. */
  406. static void check_and_send_immediate(struct smbd_connection *info)
  407. {
  408. if (info->transport_status != SMBD_CONNECTED)
  409. return;
  410. info->send_immediate = true;
  411. /*
  412. * Promptly send a packet if our peer is running low on receive
  413. * credits
  414. */
  415. if (atomic_read(&info->receive_credits) <
  416. info->receive_credit_target - 1)
  417. queue_delayed_work(
  418. info->workqueue, &info->send_immediate_work, 0);
  419. }
  420. static void smbd_post_send_credits(struct work_struct *work)
  421. {
  422. int ret = 0;
  423. int use_receive_queue = 1;
  424. int rc;
  425. struct smbd_response *response;
  426. struct smbd_connection *info =
  427. container_of(work, struct smbd_connection,
  428. post_send_credits_work);
  429. if (info->transport_status != SMBD_CONNECTED) {
  430. wake_up(&info->wait_receive_queues);
  431. return;
  432. }
  433. if (info->receive_credit_target >
  434. atomic_read(&info->receive_credits)) {
  435. while (true) {
  436. if (use_receive_queue)
  437. response = get_receive_buffer(info);
  438. else
  439. response = get_empty_queue_buffer(info);
  440. if (!response) {
  441. /* now switch to emtpy packet queue */
  442. if (use_receive_queue) {
  443. use_receive_queue = 0;
  444. continue;
  445. } else
  446. break;
  447. }
  448. response->type = SMBD_TRANSFER_DATA;
  449. response->first_segment = false;
  450. rc = smbd_post_recv(info, response);
  451. if (rc) {
  452. log_rdma_recv(ERR,
  453. "post_recv failed rc=%d\n", rc);
  454. put_receive_buffer(info, response);
  455. break;
  456. }
  457. ret++;
  458. }
  459. }
  460. spin_lock(&info->lock_new_credits_offered);
  461. info->new_credits_offered += ret;
  462. spin_unlock(&info->lock_new_credits_offered);
  463. atomic_add(ret, &info->receive_credits);
  464. /* Check if we can post new receive and grant credits to peer */
  465. check_and_send_immediate(info);
  466. }
  467. static void smbd_recv_done_work(struct work_struct *work)
  468. {
  469. struct smbd_connection *info =
  470. container_of(work, struct smbd_connection, recv_done_work);
  471. /*
  472. * We may have new send credits granted from remote peer
  473. * If any sender is blcoked on lack of credets, unblock it
  474. */
  475. if (atomic_read(&info->send_credits))
  476. wake_up_interruptible(&info->wait_send_queue);
  477. /*
  478. * Check if we need to send something to remote peer to
  479. * grant more credits or respond to KEEP_ALIVE packet
  480. */
  481. check_and_send_immediate(info);
  482. }
  483. /* Called from softirq, when recv is done */
  484. static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
  485. {
  486. struct smbd_data_transfer *data_transfer;
  487. struct smbd_response *response =
  488. container_of(wc->wr_cqe, struct smbd_response, cqe);
  489. struct smbd_connection *info = response->info;
  490. int data_length = 0;
  491. log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d "
  492. "byte_len=%d pkey_index=%x\n",
  493. response, response->type, wc->status, wc->opcode,
  494. wc->byte_len, wc->pkey_index);
  495. if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
  496. log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
  497. wc->status, wc->opcode);
  498. smbd_disconnect_rdma_connection(info);
  499. goto error;
  500. }
  501. ib_dma_sync_single_for_cpu(
  502. wc->qp->device,
  503. response->sge.addr,
  504. response->sge.length,
  505. DMA_FROM_DEVICE);
  506. switch (response->type) {
  507. /* SMBD negotiation response */
  508. case SMBD_NEGOTIATE_RESP:
  509. dump_smbd_negotiate_resp(smbd_response_payload(response));
  510. info->full_packet_received = true;
  511. info->negotiate_done =
  512. process_negotiation_response(response, wc->byte_len);
  513. complete(&info->negotiate_completion);
  514. break;
  515. /* SMBD data transfer packet */
  516. case SMBD_TRANSFER_DATA:
  517. data_transfer = smbd_response_payload(response);
  518. data_length = le32_to_cpu(data_transfer->data_length);
  519. /*
  520. * If this is a packet with data playload place the data in
  521. * reassembly queue and wake up the reading thread
  522. */
  523. if (data_length) {
  524. if (info->full_packet_received)
  525. response->first_segment = true;
  526. if (le32_to_cpu(data_transfer->remaining_data_length))
  527. info->full_packet_received = false;
  528. else
  529. info->full_packet_received = true;
  530. enqueue_reassembly(
  531. info,
  532. response,
  533. data_length);
  534. } else
  535. put_empty_packet(info, response);
  536. if (data_length)
  537. wake_up_interruptible(&info->wait_reassembly_queue);
  538. atomic_dec(&info->receive_credits);
  539. info->receive_credit_target =
  540. le16_to_cpu(data_transfer->credits_requested);
  541. atomic_add(le16_to_cpu(data_transfer->credits_granted),
  542. &info->send_credits);
  543. log_incoming(INFO, "data flags %d data_offset %d "
  544. "data_length %d remaining_data_length %d\n",
  545. le16_to_cpu(data_transfer->flags),
  546. le32_to_cpu(data_transfer->data_offset),
  547. le32_to_cpu(data_transfer->data_length),
  548. le32_to_cpu(data_transfer->remaining_data_length));
  549. /* Send a KEEP_ALIVE response right away if requested */
  550. info->keep_alive_requested = KEEP_ALIVE_NONE;
  551. if (le16_to_cpu(data_transfer->flags) &
  552. SMB_DIRECT_RESPONSE_REQUESTED) {
  553. info->keep_alive_requested = KEEP_ALIVE_PENDING;
  554. }
  555. queue_work(info->workqueue, &info->recv_done_work);
  556. return;
  557. default:
  558. log_rdma_recv(ERR,
  559. "unexpected response type=%d\n", response->type);
  560. }
  561. error:
  562. put_receive_buffer(info, response);
  563. }
  564. static struct rdma_cm_id *smbd_create_id(
  565. struct smbd_connection *info,
  566. struct sockaddr *dstaddr, int port)
  567. {
  568. struct rdma_cm_id *id;
  569. int rc;
  570. __be16 *sport;
  571. id = rdma_create_id(&init_net, smbd_conn_upcall, info,
  572. RDMA_PS_TCP, IB_QPT_RC);
  573. if (IS_ERR(id)) {
  574. rc = PTR_ERR(id);
  575. log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc);
  576. return id;
  577. }
  578. if (dstaddr->sa_family == AF_INET6)
  579. sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
  580. else
  581. sport = &((struct sockaddr_in *)dstaddr)->sin_port;
  582. *sport = htons(port);
  583. init_completion(&info->ri_done);
  584. info->ri_rc = -ETIMEDOUT;
  585. rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr,
  586. RDMA_RESOLVE_TIMEOUT);
  587. if (rc) {
  588. log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc);
  589. goto out;
  590. }
  591. wait_for_completion_interruptible_timeout(
  592. &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
  593. rc = info->ri_rc;
  594. if (rc) {
  595. log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc);
  596. goto out;
  597. }
  598. info->ri_rc = -ETIMEDOUT;
  599. rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
  600. if (rc) {
  601. log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc);
  602. goto out;
  603. }
  604. wait_for_completion_interruptible_timeout(
  605. &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
  606. rc = info->ri_rc;
  607. if (rc) {
  608. log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc);
  609. goto out;
  610. }
  611. return id;
  612. out:
  613. rdma_destroy_id(id);
  614. return ERR_PTR(rc);
  615. }
  616. /*
  617. * Test if FRWR (Fast Registration Work Requests) is supported on the device
  618. * This implementation requries FRWR on RDMA read/write
  619. * return value: true if it is supported
  620. */
  621. static bool frwr_is_supported(struct ib_device_attr *attrs)
  622. {
  623. if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
  624. return false;
  625. if (attrs->max_fast_reg_page_list_len == 0)
  626. return false;
  627. return true;
  628. }
  629. static int smbd_ia_open(
  630. struct smbd_connection *info,
  631. struct sockaddr *dstaddr, int port)
  632. {
  633. int rc;
  634. info->id = smbd_create_id(info, dstaddr, port);
  635. if (IS_ERR(info->id)) {
  636. rc = PTR_ERR(info->id);
  637. goto out1;
  638. }
  639. if (!frwr_is_supported(&info->id->device->attrs)) {
  640. log_rdma_event(ERR,
  641. "Fast Registration Work Requests "
  642. "(FRWR) is not supported\n");
  643. log_rdma_event(ERR,
  644. "Device capability flags = %llx "
  645. "max_fast_reg_page_list_len = %u\n",
  646. info->id->device->attrs.device_cap_flags,
  647. info->id->device->attrs.max_fast_reg_page_list_len);
  648. rc = -EPROTONOSUPPORT;
  649. goto out2;
  650. }
  651. info->max_frmr_depth = min_t(int,
  652. smbd_max_frmr_depth,
  653. info->id->device->attrs.max_fast_reg_page_list_len);
  654. info->mr_type = IB_MR_TYPE_MEM_REG;
  655. if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
  656. info->mr_type = IB_MR_TYPE_SG_GAPS;
  657. info->pd = ib_alloc_pd(info->id->device, 0);
  658. if (IS_ERR(info->pd)) {
  659. rc = PTR_ERR(info->pd);
  660. log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc);
  661. goto out2;
  662. }
  663. return 0;
  664. out2:
  665. rdma_destroy_id(info->id);
  666. info->id = NULL;
  667. out1:
  668. return rc;
  669. }
  670. /*
  671. * Send a negotiation request message to the peer
  672. * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3
  673. * After negotiation, the transport is connected and ready for
  674. * carrying upper layer SMB payload
  675. */
  676. static int smbd_post_send_negotiate_req(struct smbd_connection *info)
  677. {
  678. struct ib_send_wr send_wr;
  679. int rc = -ENOMEM;
  680. struct smbd_request *request;
  681. struct smbd_negotiate_req *packet;
  682. request = mempool_alloc(info->request_mempool, GFP_KERNEL);
  683. if (!request)
  684. return rc;
  685. request->info = info;
  686. packet = smbd_request_payload(request);
  687. packet->min_version = cpu_to_le16(SMBD_V1);
  688. packet->max_version = cpu_to_le16(SMBD_V1);
  689. packet->reserved = 0;
  690. packet->credits_requested = cpu_to_le16(info->send_credit_target);
  691. packet->preferred_send_size = cpu_to_le32(info->max_send_size);
  692. packet->max_receive_size = cpu_to_le32(info->max_receive_size);
  693. packet->max_fragmented_size =
  694. cpu_to_le32(info->max_fragmented_recv_size);
  695. request->num_sge = 1;
  696. request->sge[0].addr = ib_dma_map_single(
  697. info->id->device, (void *)packet,
  698. sizeof(*packet), DMA_TO_DEVICE);
  699. if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
  700. rc = -EIO;
  701. goto dma_mapping_failed;
  702. }
  703. request->sge[0].length = sizeof(*packet);
  704. request->sge[0].lkey = info->pd->local_dma_lkey;
  705. ib_dma_sync_single_for_device(
  706. info->id->device, request->sge[0].addr,
  707. request->sge[0].length, DMA_TO_DEVICE);
  708. request->cqe.done = send_done;
  709. send_wr.next = NULL;
  710. send_wr.wr_cqe = &request->cqe;
  711. send_wr.sg_list = request->sge;
  712. send_wr.num_sge = request->num_sge;
  713. send_wr.opcode = IB_WR_SEND;
  714. send_wr.send_flags = IB_SEND_SIGNALED;
  715. log_rdma_send(INFO, "sge addr=%llx length=%x lkey=%x\n",
  716. request->sge[0].addr,
  717. request->sge[0].length, request->sge[0].lkey);
  718. request->has_payload = false;
  719. atomic_inc(&info->send_pending);
  720. rc = ib_post_send(info->id->qp, &send_wr, NULL);
  721. if (!rc)
  722. return 0;
  723. /* if we reach here, post send failed */
  724. log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
  725. atomic_dec(&info->send_pending);
  726. ib_dma_unmap_single(info->id->device, request->sge[0].addr,
  727. request->sge[0].length, DMA_TO_DEVICE);
  728. smbd_disconnect_rdma_connection(info);
  729. dma_mapping_failed:
  730. mempool_free(request, info->request_mempool);
  731. return rc;
  732. }
  733. /*
  734. * Extend the credits to remote peer
  735. * This implements [MS-SMBD] 3.1.5.9
  736. * The idea is that we should extend credits to remote peer as quickly as
  737. * it's allowed, to maintain data flow. We allocate as much receive
  738. * buffer as possible, and extend the receive credits to remote peer
  739. * return value: the new credtis being granted.
  740. */
  741. static int manage_credits_prior_sending(struct smbd_connection *info)
  742. {
  743. int new_credits;
  744. spin_lock(&info->lock_new_credits_offered);
  745. new_credits = info->new_credits_offered;
  746. info->new_credits_offered = 0;
  747. spin_unlock(&info->lock_new_credits_offered);
  748. return new_credits;
  749. }
  750. /*
  751. * Check if we need to send a KEEP_ALIVE message
  752. * The idle connection timer triggers a KEEP_ALIVE message when expires
  753. * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send
  754. * back a response.
  755. * return value:
  756. * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set
  757. * 0: otherwise
  758. */
  759. static int manage_keep_alive_before_sending(struct smbd_connection *info)
  760. {
  761. if (info->keep_alive_requested == KEEP_ALIVE_PENDING) {
  762. info->keep_alive_requested = KEEP_ALIVE_SENT;
  763. return 1;
  764. }
  765. return 0;
  766. }
  767. /*
  768. * Build and prepare the SMBD packet header
  769. * This function waits for avaialbe send credits and build a SMBD packet
  770. * header. The caller then optional append payload to the packet after
  771. * the header
  772. * intput values
  773. * size: the size of the payload
  774. * remaining_data_length: remaining data to send if this is part of a
  775. * fragmented packet
  776. * output values
  777. * request_out: the request allocated from this function
  778. * return values: 0 on success, otherwise actual error code returned
  779. */
  780. static int smbd_create_header(struct smbd_connection *info,
  781. int size, int remaining_data_length,
  782. struct smbd_request **request_out)
  783. {
  784. struct smbd_request *request;
  785. struct smbd_data_transfer *packet;
  786. int header_length;
  787. int rc;
  788. /* Wait for send credits. A SMBD packet needs one credit */
  789. rc = wait_event_interruptible(info->wait_send_queue,
  790. atomic_read(&info->send_credits) > 0 ||
  791. info->transport_status != SMBD_CONNECTED);
  792. if (rc)
  793. return rc;
  794. if (info->transport_status != SMBD_CONNECTED) {
  795. log_outgoing(ERR, "disconnected not sending\n");
  796. return -ENOENT;
  797. }
  798. atomic_dec(&info->send_credits);
  799. request = mempool_alloc(info->request_mempool, GFP_KERNEL);
  800. if (!request) {
  801. rc = -ENOMEM;
  802. goto err;
  803. }
  804. request->info = info;
  805. /* Fill in the packet header */
  806. packet = smbd_request_payload(request);
  807. packet->credits_requested = cpu_to_le16(info->send_credit_target);
  808. packet->credits_granted =
  809. cpu_to_le16(manage_credits_prior_sending(info));
  810. info->send_immediate = false;
  811. packet->flags = 0;
  812. if (manage_keep_alive_before_sending(info))
  813. packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
  814. packet->reserved = 0;
  815. if (!size)
  816. packet->data_offset = 0;
  817. else
  818. packet->data_offset = cpu_to_le32(24);
  819. packet->data_length = cpu_to_le32(size);
  820. packet->remaining_data_length = cpu_to_le32(remaining_data_length);
  821. packet->padding = 0;
  822. log_outgoing(INFO, "credits_requested=%d credits_granted=%d "
  823. "data_offset=%d data_length=%d remaining_data_length=%d\n",
  824. le16_to_cpu(packet->credits_requested),
  825. le16_to_cpu(packet->credits_granted),
  826. le32_to_cpu(packet->data_offset),
  827. le32_to_cpu(packet->data_length),
  828. le32_to_cpu(packet->remaining_data_length));
  829. /* Map the packet to DMA */
  830. header_length = sizeof(struct smbd_data_transfer);
  831. /* If this is a packet without payload, don't send padding */
  832. if (!size)
  833. header_length = offsetof(struct smbd_data_transfer, padding);
  834. request->num_sge = 1;
  835. request->sge[0].addr = ib_dma_map_single(info->id->device,
  836. (void *)packet,
  837. header_length,
  838. DMA_BIDIRECTIONAL);
  839. if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
  840. mempool_free(request, info->request_mempool);
  841. rc = -EIO;
  842. goto err;
  843. }
  844. request->sge[0].length = header_length;
  845. request->sge[0].lkey = info->pd->local_dma_lkey;
  846. *request_out = request;
  847. return 0;
  848. err:
  849. atomic_inc(&info->send_credits);
  850. return rc;
  851. }
  852. static void smbd_destroy_header(struct smbd_connection *info,
  853. struct smbd_request *request)
  854. {
  855. ib_dma_unmap_single(info->id->device,
  856. request->sge[0].addr,
  857. request->sge[0].length,
  858. DMA_TO_DEVICE);
  859. mempool_free(request, info->request_mempool);
  860. atomic_inc(&info->send_credits);
  861. }
  862. /* Post the send request */
  863. static int smbd_post_send(struct smbd_connection *info,
  864. struct smbd_request *request, bool has_payload)
  865. {
  866. struct ib_send_wr send_wr;
  867. int rc, i;
  868. for (i = 0; i < request->num_sge; i++) {
  869. log_rdma_send(INFO,
  870. "rdma_request sge[%d] addr=%llu length=%u\n",
  871. i, request->sge[i].addr, request->sge[i].length);
  872. ib_dma_sync_single_for_device(
  873. info->id->device,
  874. request->sge[i].addr,
  875. request->sge[i].length,
  876. DMA_TO_DEVICE);
  877. }
  878. request->cqe.done = send_done;
  879. send_wr.next = NULL;
  880. send_wr.wr_cqe = &request->cqe;
  881. send_wr.sg_list = request->sge;
  882. send_wr.num_sge = request->num_sge;
  883. send_wr.opcode = IB_WR_SEND;
  884. send_wr.send_flags = IB_SEND_SIGNALED;
  885. if (has_payload) {
  886. request->has_payload = true;
  887. atomic_inc(&info->send_payload_pending);
  888. } else {
  889. request->has_payload = false;
  890. atomic_inc(&info->send_pending);
  891. }
  892. rc = ib_post_send(info->id->qp, &send_wr, NULL);
  893. if (rc) {
  894. log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
  895. if (has_payload) {
  896. if (atomic_dec_and_test(&info->send_payload_pending))
  897. wake_up(&info->wait_send_payload_pending);
  898. } else {
  899. if (atomic_dec_and_test(&info->send_pending))
  900. wake_up(&info->wait_send_pending);
  901. }
  902. smbd_disconnect_rdma_connection(info);
  903. } else
  904. /* Reset timer for idle connection after packet is sent */
  905. mod_delayed_work(info->workqueue, &info->idle_timer_work,
  906. info->keep_alive_interval*HZ);
  907. return rc;
  908. }
  909. static int smbd_post_send_sgl(struct smbd_connection *info,
  910. struct scatterlist *sgl, int data_length, int remaining_data_length)
  911. {
  912. int num_sgs;
  913. int i, rc;
  914. struct smbd_request *request;
  915. struct scatterlist *sg;
  916. rc = smbd_create_header(
  917. info, data_length, remaining_data_length, &request);
  918. if (rc)
  919. return rc;
  920. num_sgs = sgl ? sg_nents(sgl) : 0;
  921. for_each_sg(sgl, sg, num_sgs, i) {
  922. request->sge[i+1].addr =
  923. ib_dma_map_page(info->id->device, sg_page(sg),
  924. sg->offset, sg->length, DMA_BIDIRECTIONAL);
  925. if (ib_dma_mapping_error(
  926. info->id->device, request->sge[i+1].addr)) {
  927. rc = -EIO;
  928. request->sge[i+1].addr = 0;
  929. goto dma_mapping_failure;
  930. }
  931. request->sge[i+1].length = sg->length;
  932. request->sge[i+1].lkey = info->pd->local_dma_lkey;
  933. request->num_sge++;
  934. }
  935. rc = smbd_post_send(info, request, data_length);
  936. if (!rc)
  937. return 0;
  938. dma_mapping_failure:
  939. for (i = 1; i < request->num_sge; i++)
  940. if (request->sge[i].addr)
  941. ib_dma_unmap_single(info->id->device,
  942. request->sge[i].addr,
  943. request->sge[i].length,
  944. DMA_TO_DEVICE);
  945. smbd_destroy_header(info, request);
  946. return rc;
  947. }
  948. /*
  949. * Send a page
  950. * page: the page to send
  951. * offset: offset in the page to send
  952. * size: length in the page to send
  953. * remaining_data_length: remaining data to send in this payload
  954. */
  955. static int smbd_post_send_page(struct smbd_connection *info, struct page *page,
  956. unsigned long offset, size_t size, int remaining_data_length)
  957. {
  958. struct scatterlist sgl;
  959. sg_init_table(&sgl, 1);
  960. sg_set_page(&sgl, page, size, offset);
  961. return smbd_post_send_sgl(info, &sgl, size, remaining_data_length);
  962. }
  963. /*
  964. * Send an empty message
  965. * Empty message is used to extend credits to peer to for keep live
  966. * while there is no upper layer payload to send at the time
  967. */
  968. static int smbd_post_send_empty(struct smbd_connection *info)
  969. {
  970. info->count_send_empty++;
  971. return smbd_post_send_sgl(info, NULL, 0, 0);
  972. }
  973. /*
  974. * Send a data buffer
  975. * iov: the iov array describing the data buffers
  976. * n_vec: number of iov array
  977. * remaining_data_length: remaining data to send following this packet
  978. * in segmented SMBD packet
  979. */
  980. static int smbd_post_send_data(
  981. struct smbd_connection *info, struct kvec *iov, int n_vec,
  982. int remaining_data_length)
  983. {
  984. int i;
  985. u32 data_length = 0;
  986. struct scatterlist sgl[SMBDIRECT_MAX_SGE];
  987. if (n_vec > SMBDIRECT_MAX_SGE) {
  988. cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
  989. return -ENOMEM;
  990. }
  991. sg_init_table(sgl, n_vec);
  992. for (i = 0; i < n_vec; i++) {
  993. data_length += iov[i].iov_len;
  994. sg_set_buf(&sgl[i], iov[i].iov_base, iov[i].iov_len);
  995. }
  996. return smbd_post_send_sgl(info, sgl, data_length, remaining_data_length);
  997. }
  998. /*
  999. * Post a receive request to the transport
  1000. * The remote peer can only send data when a receive request is posted
  1001. * The interaction is controlled by send/receive credit system
  1002. */
  1003. static int smbd_post_recv(
  1004. struct smbd_connection *info, struct smbd_response *response)
  1005. {
  1006. struct ib_recv_wr recv_wr;
  1007. int rc = -EIO;
  1008. response->sge.addr = ib_dma_map_single(
  1009. info->id->device, response->packet,
  1010. info->max_receive_size, DMA_FROM_DEVICE);
  1011. if (ib_dma_mapping_error(info->id->device, response->sge.addr))
  1012. return rc;
  1013. response->sge.length = info->max_receive_size;
  1014. response->sge.lkey = info->pd->local_dma_lkey;
  1015. response->cqe.done = recv_done;
  1016. recv_wr.wr_cqe = &response->cqe;
  1017. recv_wr.next = NULL;
  1018. recv_wr.sg_list = &response->sge;
  1019. recv_wr.num_sge = 1;
  1020. rc = ib_post_recv(info->id->qp, &recv_wr, NULL);
  1021. if (rc) {
  1022. ib_dma_unmap_single(info->id->device, response->sge.addr,
  1023. response->sge.length, DMA_FROM_DEVICE);
  1024. smbd_disconnect_rdma_connection(info);
  1025. log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
  1026. }
  1027. return rc;
  1028. }
  1029. /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */
  1030. static int smbd_negotiate(struct smbd_connection *info)
  1031. {
  1032. int rc;
  1033. struct smbd_response *response = get_receive_buffer(info);
  1034. response->type = SMBD_NEGOTIATE_RESP;
  1035. rc = smbd_post_recv(info, response);
  1036. log_rdma_event(INFO,
  1037. "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x "
  1038. "iov.lkey=%x\n",
  1039. rc, response->sge.addr,
  1040. response->sge.length, response->sge.lkey);
  1041. if (rc)
  1042. return rc;
  1043. init_completion(&info->negotiate_completion);
  1044. info->negotiate_done = false;
  1045. rc = smbd_post_send_negotiate_req(info);
  1046. if (rc)
  1047. return rc;
  1048. rc = wait_for_completion_interruptible_timeout(
  1049. &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ);
  1050. log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc);
  1051. if (info->negotiate_done)
  1052. return 0;
  1053. if (rc == 0)
  1054. rc = -ETIMEDOUT;
  1055. else if (rc == -ERESTARTSYS)
  1056. rc = -EINTR;
  1057. else
  1058. rc = -ENOTCONN;
  1059. return rc;
  1060. }
  1061. static void put_empty_packet(
  1062. struct smbd_connection *info, struct smbd_response *response)
  1063. {
  1064. spin_lock(&info->empty_packet_queue_lock);
  1065. list_add_tail(&response->list, &info->empty_packet_queue);
  1066. info->count_empty_packet_queue++;
  1067. spin_unlock(&info->empty_packet_queue_lock);
  1068. queue_work(info->workqueue, &info->post_send_credits_work);
  1069. }
  1070. /*
  1071. * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1
  1072. * This is a queue for reassembling upper layer payload and present to upper
  1073. * layer. All the inncoming payload go to the reassembly queue, regardless of
  1074. * if reassembly is required. The uuper layer code reads from the queue for all
  1075. * incoming payloads.
  1076. * Put a received packet to the reassembly queue
  1077. * response: the packet received
  1078. * data_length: the size of payload in this packet
  1079. */
  1080. static void enqueue_reassembly(
  1081. struct smbd_connection *info,
  1082. struct smbd_response *response,
  1083. int data_length)
  1084. {
  1085. spin_lock(&info->reassembly_queue_lock);
  1086. list_add_tail(&response->list, &info->reassembly_queue);
  1087. info->reassembly_queue_length++;
  1088. /*
  1089. * Make sure reassembly_data_length is updated after list and
  1090. * reassembly_queue_length are updated. On the dequeue side
  1091. * reassembly_data_length is checked without a lock to determine
  1092. * if reassembly_queue_length and list is up to date
  1093. */
  1094. virt_wmb();
  1095. info->reassembly_data_length += data_length;
  1096. spin_unlock(&info->reassembly_queue_lock);
  1097. info->count_reassembly_queue++;
  1098. info->count_enqueue_reassembly_queue++;
  1099. }
  1100. /*
  1101. * Get the first entry at the front of reassembly queue
  1102. * Caller is responsible for locking
  1103. * return value: the first entry if any, NULL if queue is empty
  1104. */
  1105. static struct smbd_response *_get_first_reassembly(struct smbd_connection *info)
  1106. {
  1107. struct smbd_response *ret = NULL;
  1108. if (!list_empty(&info->reassembly_queue)) {
  1109. ret = list_first_entry(
  1110. &info->reassembly_queue,
  1111. struct smbd_response, list);
  1112. }
  1113. return ret;
  1114. }
  1115. static struct smbd_response *get_empty_queue_buffer(
  1116. struct smbd_connection *info)
  1117. {
  1118. struct smbd_response *ret = NULL;
  1119. unsigned long flags;
  1120. spin_lock_irqsave(&info->empty_packet_queue_lock, flags);
  1121. if (!list_empty(&info->empty_packet_queue)) {
  1122. ret = list_first_entry(
  1123. &info->empty_packet_queue,
  1124. struct smbd_response, list);
  1125. list_del(&ret->list);
  1126. info->count_empty_packet_queue--;
  1127. }
  1128. spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags);
  1129. return ret;
  1130. }
  1131. /*
  1132. * Get a receive buffer
  1133. * For each remote send, we need to post a receive. The receive buffers are
  1134. * pre-allocated in advance.
  1135. * return value: the receive buffer, NULL if none is available
  1136. */
  1137. static struct smbd_response *get_receive_buffer(struct smbd_connection *info)
  1138. {
  1139. struct smbd_response *ret = NULL;
  1140. unsigned long flags;
  1141. spin_lock_irqsave(&info->receive_queue_lock, flags);
  1142. if (!list_empty(&info->receive_queue)) {
  1143. ret = list_first_entry(
  1144. &info->receive_queue,
  1145. struct smbd_response, list);
  1146. list_del(&ret->list);
  1147. info->count_receive_queue--;
  1148. info->count_get_receive_buffer++;
  1149. }
  1150. spin_unlock_irqrestore(&info->receive_queue_lock, flags);
  1151. return ret;
  1152. }
  1153. /*
  1154. * Return a receive buffer
  1155. * Upon returning of a receive buffer, we can post new receive and extend
  1156. * more receive credits to remote peer. This is done immediately after a
  1157. * receive buffer is returned.
  1158. */
  1159. static void put_receive_buffer(
  1160. struct smbd_connection *info, struct smbd_response *response)
  1161. {
  1162. unsigned long flags;
  1163. ib_dma_unmap_single(info->id->device, response->sge.addr,
  1164. response->sge.length, DMA_FROM_DEVICE);
  1165. spin_lock_irqsave(&info->receive_queue_lock, flags);
  1166. list_add_tail(&response->list, &info->receive_queue);
  1167. info->count_receive_queue++;
  1168. info->count_put_receive_buffer++;
  1169. spin_unlock_irqrestore(&info->receive_queue_lock, flags);
  1170. queue_work(info->workqueue, &info->post_send_credits_work);
  1171. }
  1172. /* Preallocate all receive buffer on transport establishment */
  1173. static int allocate_receive_buffers(struct smbd_connection *info, int num_buf)
  1174. {
  1175. int i;
  1176. struct smbd_response *response;
  1177. INIT_LIST_HEAD(&info->reassembly_queue);
  1178. spin_lock_init(&info->reassembly_queue_lock);
  1179. info->reassembly_data_length = 0;
  1180. info->reassembly_queue_length = 0;
  1181. INIT_LIST_HEAD(&info->receive_queue);
  1182. spin_lock_init(&info->receive_queue_lock);
  1183. info->count_receive_queue = 0;
  1184. INIT_LIST_HEAD(&info->empty_packet_queue);
  1185. spin_lock_init(&info->empty_packet_queue_lock);
  1186. info->count_empty_packet_queue = 0;
  1187. init_waitqueue_head(&info->wait_receive_queues);
  1188. for (i = 0; i < num_buf; i++) {
  1189. response = mempool_alloc(info->response_mempool, GFP_KERNEL);
  1190. if (!response)
  1191. goto allocate_failed;
  1192. response->info = info;
  1193. list_add_tail(&response->list, &info->receive_queue);
  1194. info->count_receive_queue++;
  1195. }
  1196. return 0;
  1197. allocate_failed:
  1198. while (!list_empty(&info->receive_queue)) {
  1199. response = list_first_entry(
  1200. &info->receive_queue,
  1201. struct smbd_response, list);
  1202. list_del(&response->list);
  1203. info->count_receive_queue--;
  1204. mempool_free(response, info->response_mempool);
  1205. }
  1206. return -ENOMEM;
  1207. }
  1208. static void destroy_receive_buffers(struct smbd_connection *info)
  1209. {
  1210. struct smbd_response *response;
  1211. while ((response = get_receive_buffer(info)))
  1212. mempool_free(response, info->response_mempool);
  1213. while ((response = get_empty_queue_buffer(info)))
  1214. mempool_free(response, info->response_mempool);
  1215. }
  1216. /*
  1217. * Check and send an immediate or keep alive packet
  1218. * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1
  1219. * Connection.KeepaliveRequested and Connection.SendImmediate
  1220. * The idea is to extend credits to server as soon as it becomes available
  1221. */
  1222. static void send_immediate_work(struct work_struct *work)
  1223. {
  1224. struct smbd_connection *info = container_of(
  1225. work, struct smbd_connection,
  1226. send_immediate_work.work);
  1227. if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
  1228. info->send_immediate) {
  1229. log_keep_alive(INFO, "send an empty message\n");
  1230. smbd_post_send_empty(info);
  1231. }
  1232. }
  1233. /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
  1234. static void idle_connection_timer(struct work_struct *work)
  1235. {
  1236. struct smbd_connection *info = container_of(
  1237. work, struct smbd_connection,
  1238. idle_timer_work.work);
  1239. if (info->keep_alive_requested != KEEP_ALIVE_NONE) {
  1240. log_keep_alive(ERR,
  1241. "error status info->keep_alive_requested=%d\n",
  1242. info->keep_alive_requested);
  1243. smbd_disconnect_rdma_connection(info);
  1244. return;
  1245. }
  1246. log_keep_alive(INFO, "about to send an empty idle message\n");
  1247. smbd_post_send_empty(info);
  1248. /* Setup the next idle timeout work */
  1249. queue_delayed_work(info->workqueue, &info->idle_timer_work,
  1250. info->keep_alive_interval*HZ);
  1251. }
  1252. /* Destroy this SMBD connection, called from upper layer */
  1253. void smbd_destroy(struct smbd_connection *info)
  1254. {
  1255. log_rdma_event(INFO, "destroying rdma session\n");
  1256. /* Kick off the disconnection process */
  1257. smbd_disconnect_rdma_connection(info);
  1258. log_rdma_event(INFO, "wait for transport being destroyed\n");
  1259. wait_event(info->wait_destroy,
  1260. info->transport_status == SMBD_DESTROYED);
  1261. destroy_workqueue(info->workqueue);
  1262. kfree(info);
  1263. }
  1264. /*
  1265. * Reconnect this SMBD connection, called from upper layer
  1266. * return value: 0 on success, or actual error code
  1267. */
  1268. int smbd_reconnect(struct TCP_Server_Info *server)
  1269. {
  1270. log_rdma_event(INFO, "reconnecting rdma session\n");
  1271. if (!server->smbd_conn) {
  1272. log_rdma_event(INFO, "rdma session already destroyed\n");
  1273. goto create_conn;
  1274. }
  1275. /*
  1276. * This is possible if transport is disconnected and we haven't received
  1277. * notification from RDMA, but upper layer has detected timeout
  1278. */
  1279. if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
  1280. log_rdma_event(INFO, "disconnecting transport\n");
  1281. smbd_disconnect_rdma_connection(server->smbd_conn);
  1282. }
  1283. /* wait until the transport is destroyed */
  1284. if (!wait_event_timeout(server->smbd_conn->wait_destroy,
  1285. server->smbd_conn->transport_status == SMBD_DESTROYED, 5*HZ))
  1286. return -EAGAIN;
  1287. destroy_workqueue(server->smbd_conn->workqueue);
  1288. kfree(server->smbd_conn);
  1289. create_conn:
  1290. log_rdma_event(INFO, "creating rdma session\n");
  1291. server->smbd_conn = smbd_get_connection(
  1292. server, (struct sockaddr *) &server->dstaddr);
  1293. log_rdma_event(INFO, "created rdma session info=%p\n",
  1294. server->smbd_conn);
  1295. return server->smbd_conn ? 0 : -ENOENT;
  1296. }
  1297. static void destroy_caches_and_workqueue(struct smbd_connection *info)
  1298. {
  1299. destroy_receive_buffers(info);
  1300. destroy_workqueue(info->workqueue);
  1301. mempool_destroy(info->response_mempool);
  1302. kmem_cache_destroy(info->response_cache);
  1303. mempool_destroy(info->request_mempool);
  1304. kmem_cache_destroy(info->request_cache);
  1305. }
  1306. #define MAX_NAME_LEN 80
  1307. static int allocate_caches_and_workqueue(struct smbd_connection *info)
  1308. {
  1309. char name[MAX_NAME_LEN];
  1310. int rc;
  1311. scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
  1312. info->request_cache =
  1313. kmem_cache_create(
  1314. name,
  1315. sizeof(struct smbd_request) +
  1316. sizeof(struct smbd_data_transfer),
  1317. 0, SLAB_HWCACHE_ALIGN, NULL);
  1318. if (!info->request_cache)
  1319. return -ENOMEM;
  1320. info->request_mempool =
  1321. mempool_create(info->send_credit_target, mempool_alloc_slab,
  1322. mempool_free_slab, info->request_cache);
  1323. if (!info->request_mempool)
  1324. goto out1;
  1325. scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
  1326. info->response_cache =
  1327. kmem_cache_create(
  1328. name,
  1329. sizeof(struct smbd_response) +
  1330. info->max_receive_size,
  1331. 0, SLAB_HWCACHE_ALIGN, NULL);
  1332. if (!info->response_cache)
  1333. goto out2;
  1334. info->response_mempool =
  1335. mempool_create(info->receive_credit_max, mempool_alloc_slab,
  1336. mempool_free_slab, info->response_cache);
  1337. if (!info->response_mempool)
  1338. goto out3;
  1339. scnprintf(name, MAX_NAME_LEN, "smbd_%p", info);
  1340. info->workqueue = create_workqueue(name);
  1341. if (!info->workqueue)
  1342. goto out4;
  1343. rc = allocate_receive_buffers(info, info->receive_credit_max);
  1344. if (rc) {
  1345. log_rdma_event(ERR, "failed to allocate receive buffers\n");
  1346. goto out5;
  1347. }
  1348. return 0;
  1349. out5:
  1350. destroy_workqueue(info->workqueue);
  1351. out4:
  1352. mempool_destroy(info->response_mempool);
  1353. out3:
  1354. kmem_cache_destroy(info->response_cache);
  1355. out2:
  1356. mempool_destroy(info->request_mempool);
  1357. out1:
  1358. kmem_cache_destroy(info->request_cache);
  1359. return -ENOMEM;
  1360. }
  1361. /* Create a SMBD connection, called by upper layer */
  1362. static struct smbd_connection *_smbd_get_connection(
  1363. struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port)
  1364. {
  1365. int rc;
  1366. struct smbd_connection *info;
  1367. struct rdma_conn_param conn_param;
  1368. struct ib_qp_init_attr qp_attr;
  1369. struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
  1370. struct ib_port_immutable port_immutable;
  1371. u32 ird_ord_hdr[2];
  1372. info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL);
  1373. if (!info)
  1374. return NULL;
  1375. info->transport_status = SMBD_CONNECTING;
  1376. rc = smbd_ia_open(info, dstaddr, port);
  1377. if (rc) {
  1378. log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc);
  1379. goto create_id_failed;
  1380. }
  1381. if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
  1382. smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
  1383. log_rdma_event(ERR,
  1384. "consider lowering send_credit_target = %d. "
  1385. "Possible CQE overrun, device "
  1386. "reporting max_cpe %d max_qp_wr %d\n",
  1387. smbd_send_credit_target,
  1388. info->id->device->attrs.max_cqe,
  1389. info->id->device->attrs.max_qp_wr);
  1390. goto config_failed;
  1391. }
  1392. if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
  1393. smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
  1394. log_rdma_event(ERR,
  1395. "consider lowering receive_credit_max = %d. "
  1396. "Possible CQE overrun, device "
  1397. "reporting max_cpe %d max_qp_wr %d\n",
  1398. smbd_receive_credit_max,
  1399. info->id->device->attrs.max_cqe,
  1400. info->id->device->attrs.max_qp_wr);
  1401. goto config_failed;
  1402. }
  1403. info->receive_credit_max = smbd_receive_credit_max;
  1404. info->send_credit_target = smbd_send_credit_target;
  1405. info->max_send_size = smbd_max_send_size;
  1406. info->max_fragmented_recv_size = smbd_max_fragmented_recv_size;
  1407. info->max_receive_size = smbd_max_receive_size;
  1408. info->keep_alive_interval = smbd_keep_alive_interval;
  1409. if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
  1410. log_rdma_event(ERR,
  1411. "warning: device max_send_sge = %d too small\n",
  1412. info->id->device->attrs.max_send_sge);
  1413. log_rdma_event(ERR, "Queue Pair creation may fail\n");
  1414. }
  1415. if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
  1416. log_rdma_event(ERR,
  1417. "warning: device max_recv_sge = %d too small\n",
  1418. info->id->device->attrs.max_recv_sge);
  1419. log_rdma_event(ERR, "Queue Pair creation may fail\n");
  1420. }
  1421. info->send_cq = NULL;
  1422. info->recv_cq = NULL;
  1423. info->send_cq = ib_alloc_cq(info->id->device, info,
  1424. info->send_credit_target, 0, IB_POLL_SOFTIRQ);
  1425. if (IS_ERR(info->send_cq)) {
  1426. info->send_cq = NULL;
  1427. goto alloc_cq_failed;
  1428. }
  1429. info->recv_cq = ib_alloc_cq(info->id->device, info,
  1430. info->receive_credit_max, 0, IB_POLL_SOFTIRQ);
  1431. if (IS_ERR(info->recv_cq)) {
  1432. info->recv_cq = NULL;
  1433. goto alloc_cq_failed;
  1434. }
  1435. memset(&qp_attr, 0, sizeof(qp_attr));
  1436. qp_attr.event_handler = smbd_qp_async_error_upcall;
  1437. qp_attr.qp_context = info;
  1438. qp_attr.cap.max_send_wr = info->send_credit_target;
  1439. qp_attr.cap.max_recv_wr = info->receive_credit_max;
  1440. qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE;
  1441. qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE;
  1442. qp_attr.cap.max_inline_data = 0;
  1443. qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  1444. qp_attr.qp_type = IB_QPT_RC;
  1445. qp_attr.send_cq = info->send_cq;
  1446. qp_attr.recv_cq = info->recv_cq;
  1447. qp_attr.port_num = ~0;
  1448. rc = rdma_create_qp(info->id, info->pd, &qp_attr);
  1449. if (rc) {
  1450. log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc);
  1451. goto create_qp_failed;
  1452. }
  1453. memset(&conn_param, 0, sizeof(conn_param));
  1454. conn_param.initiator_depth = 0;
  1455. conn_param.responder_resources =
  1456. info->id->device->attrs.max_qp_rd_atom
  1457. < SMBD_CM_RESPONDER_RESOURCES ?
  1458. info->id->device->attrs.max_qp_rd_atom :
  1459. SMBD_CM_RESPONDER_RESOURCES;
  1460. info->responder_resources = conn_param.responder_resources;
  1461. log_rdma_mr(INFO, "responder_resources=%d\n",
  1462. info->responder_resources);
  1463. /* Need to send IRD/ORD in private data for iWARP */
  1464. info->id->device->ops.get_port_immutable(
  1465. info->id->device, info