PageRenderTime 1245ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c

https://github.com/kvaneesh/linux
C | 300 lines | 160 code | 42 blank | 98 comment | 26 complexity | 4a363aff0da0219545e154c6e859eec3 MD5 | raw file
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/dma-mapping.h>
  24. #include "amdgpu.h"
  25. #include "amdgpu_ih.h"
  26. /**
  27. * amdgpu_ih_ring_init - initialize the IH state
  28. *
  29. * @adev: amdgpu_device pointer
  30. * @ih: ih ring to initialize
  31. * @ring_size: ring size to allocate
  32. * @use_bus_addr: true when we can use dma_alloc_coherent
  33. *
  34. * Initializes the IH state and allocates a buffer
  35. * for the IH ring buffer.
  36. * Returns 0 for success, errors for failure.
  37. */
  38. int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
  39. unsigned ring_size, bool use_bus_addr)
  40. {
  41. u32 rb_bufsz;
  42. int r;
  43. /* Align ring size */
  44. rb_bufsz = order_base_2(ring_size / 4);
  45. ring_size = (1 << rb_bufsz) * 4;
  46. ih->ring_size = ring_size;
  47. ih->ptr_mask = ih->ring_size - 1;
  48. ih->rptr = 0;
  49. ih->use_bus_addr = use_bus_addr;
  50. if (use_bus_addr) {
  51. dma_addr_t dma_addr;
  52. if (ih->ring)
  53. return 0;
  54. /* add 8 bytes for the rptr/wptr shadows and
  55. * add them to the end of the ring allocation.
  56. */
  57. ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
  58. &dma_addr, GFP_KERNEL);
  59. if (ih->ring == NULL)
  60. return -ENOMEM;
  61. ih->gpu_addr = dma_addr;
  62. ih->wptr_addr = dma_addr + ih->ring_size;
  63. ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
  64. ih->rptr_addr = dma_addr + ih->ring_size + 4;
  65. ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
  66. } else {
  67. unsigned wptr_offs, rptr_offs;
  68. r = amdgpu_device_wb_get(adev, &wptr_offs);
  69. if (r)
  70. return r;
  71. r = amdgpu_device_wb_get(adev, &rptr_offs);
  72. if (r) {
  73. amdgpu_device_wb_free(adev, wptr_offs);
  74. return r;
  75. }
  76. r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
  77. AMDGPU_GEM_DOMAIN_GTT,
  78. &ih->ring_obj, &ih->gpu_addr,
  79. (void **)&ih->ring);
  80. if (r) {
  81. amdgpu_device_wb_free(adev, rptr_offs);
  82. amdgpu_device_wb_free(adev, wptr_offs);
  83. return r;
  84. }
  85. ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
  86. ih->wptr_cpu = &adev->wb.wb[wptr_offs];
  87. ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
  88. ih->rptr_cpu = &adev->wb.wb[rptr_offs];
  89. }
  90. init_waitqueue_head(&ih->wait_process);
  91. return 0;
  92. }
  93. /**
  94. * amdgpu_ih_ring_fini - tear down the IH state
  95. *
  96. * @adev: amdgpu_device pointer
  97. * @ih: ih ring to tear down
  98. *
  99. * Tears down the IH state and frees buffer
  100. * used for the IH ring buffer.
  101. */
  102. void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
  103. {
  104. if (!ih->ring)
  105. return;
  106. if (ih->use_bus_addr) {
  107. /* add 8 bytes for the rptr/wptr shadows and
  108. * add them to the end of the ring allocation.
  109. */
  110. dma_free_coherent(adev->dev, ih->ring_size + 8,
  111. (void *)ih->ring, ih->gpu_addr);
  112. ih->ring = NULL;
  113. } else {
  114. amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
  115. (void **)&ih->ring);
  116. amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
  117. amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
  118. }
  119. }
  120. /**
  121. * amdgpu_ih_ring_write - write IV to the ring buffer
  122. *
  123. * @ih: ih ring to write to
  124. * @iv: the iv to write
  125. * @num_dw: size of the iv in dw
  126. *
  127. * Writes an IV to the ring buffer using the CPU and increment the wptr.
  128. * Used for testing and delegating IVs to a software ring.
  129. */
  130. void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
  131. unsigned int num_dw)
  132. {
  133. uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
  134. unsigned int i;
  135. for (i = 0; i < num_dw; ++i)
  136. ih->ring[wptr++] = cpu_to_le32(iv[i]);
  137. wptr <<= 2;
  138. wptr &= ih->ptr_mask;
  139. /* Only commit the new wptr if we don't overflow */
  140. if (wptr != READ_ONCE(ih->rptr)) {
  141. wmb();
  142. WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
  143. }
  144. }
  145. /* Waiter helper that checks current rptr matches or passes checkpoint wptr */
  146. static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
  147. struct amdgpu_ih_ring *ih,
  148. uint32_t checkpoint_wptr,
  149. uint32_t *prev_rptr)
  150. {
  151. uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask);
  152. /* rptr has wrapped. */
  153. if (cur_rptr < *prev_rptr)
  154. cur_rptr += ih->ptr_mask + 1;
  155. *prev_rptr = cur_rptr;
  156. /* check ring is empty to workaround missing wptr overflow flag */
  157. return cur_rptr >= checkpoint_wptr ||
  158. (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
  159. }
  160. /**
  161. * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
  162. *
  163. * @adev: amdgpu_device pointer
  164. * @ih: ih ring to process
  165. *
  166. * Used to ensure ring has processed IVs up to the checkpoint write pointer.
  167. */
  168. int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
  169. struct amdgpu_ih_ring *ih)
  170. {
  171. uint32_t checkpoint_wptr, rptr;
  172. if (!ih->enabled || adev->shutdown)
  173. return -ENODEV;
  174. checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
  175. /* Order wptr with rptr. */
  176. rmb();
  177. rptr = READ_ONCE(ih->rptr);
  178. /* wptr has wrapped. */
  179. if (rptr > checkpoint_wptr)
  180. checkpoint_wptr += ih->ptr_mask + 1;
  181. return wait_event_interruptible(ih->wait_process,
  182. amdgpu_ih_has_checkpoint_processed(adev, ih,
  183. checkpoint_wptr, &rptr));
  184. }
  185. /**
  186. * amdgpu_ih_process - interrupt handler
  187. *
  188. * @adev: amdgpu_device pointer
  189. * @ih: ih ring to process
  190. *
  191. * Interrupt hander (VI), walk the IH ring.
  192. * Returns irq process return code.
  193. */
  194. int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
  195. {
  196. unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
  197. u32 wptr;
  198. if (!ih->enabled || adev->shutdown)
  199. return IRQ_NONE;
  200. wptr = amdgpu_ih_get_wptr(adev, ih);
  201. restart_ih:
  202. DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
  203. /* Order reading of wptr vs. reading of IH ring data */
  204. rmb();
  205. while (ih->rptr != wptr && --count) {
  206. amdgpu_irq_dispatch(adev, ih);
  207. ih->rptr &= ih->ptr_mask;
  208. }
  209. amdgpu_ih_set_rptr(adev, ih);
  210. wake_up_all(&ih->wait_process);
  211. /* make sure wptr hasn't changed while processing */
  212. wptr = amdgpu_ih_get_wptr(adev, ih);
  213. if (wptr != ih->rptr)
  214. goto restart_ih;
  215. return IRQ_HANDLED;
  216. }
  217. /**
  218. * amdgpu_ih_decode_iv_helper - decode an interrupt vector
  219. *
  220. * @adev: amdgpu_device pointer
  221. * @ih: ih ring to process
  222. * @entry: IV entry
  223. *
  224. * Decodes the interrupt vector at the current rptr
  225. * position and also advance the position for for Vega10
  226. * and later GPUs.
  227. */
  228. void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
  229. struct amdgpu_ih_ring *ih,
  230. struct amdgpu_iv_entry *entry)
  231. {
  232. /* wptr/rptr are in bytes! */
  233. u32 ring_index = ih->rptr >> 2;
  234. uint32_t dw[8];
  235. dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
  236. dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
  237. dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
  238. dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
  239. dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
  240. dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
  241. dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
  242. dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
  243. entry->client_id = dw[0] & 0xff;
  244. entry->src_id = (dw[0] >> 8) & 0xff;
  245. entry->ring_id = (dw[0] >> 16) & 0xff;
  246. entry->vmid = (dw[0] >> 24) & 0xf;
  247. entry->vmid_src = (dw[0] >> 31);
  248. entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
  249. entry->timestamp_src = dw[2] >> 31;
  250. entry->pasid = dw[3] & 0xffff;
  251. entry->pasid_src = dw[3] >> 31;
  252. entry->src_data[0] = dw[4];
  253. entry->src_data[1] = dw[5];
  254. entry->src_data[2] = dw[6];
  255. entry->src_data[3] = dw[7];
  256. /* wptr/rptr are in bytes! */
  257. ih->rptr += 32;
  258. }