/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c

https://gitlab.com/sunny256/linux · C · 850 lines · 621 code · 166 blank · 63 comment · 41 complexity · 179fbdd593e3197334cd4441d38815b8 MD5 · raw file

  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/types.h>
  24. #include <linux/kernel.h>
  25. #include <linux/log2.h>
  26. #include <linux/sched.h>
  27. #include <linux/slab.h>
  28. #include <linux/mutex.h>
  29. #include <linux/device.h>
  30. #include "kfd_pm4_headers.h"
  31. #include "kfd_pm4_headers_diq.h"
  32. #include "kfd_kernel_queue.h"
  33. #include "kfd_priv.h"
  34. #include "kfd_pm4_opcodes.h"
  35. #include "cik_regs.h"
  36. #include "kfd_dbgmgr.h"
  37. #include "kfd_dbgdev.h"
  38. #include "kfd_device_queue_manager.h"
  39. #include "../../radeon/cik_reg.h"
  40. static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
  41. {
  42. dev->kfd2kgd->address_watch_disable(dev->kgd);
  43. }
  44. static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
  45. unsigned int pasid, uint64_t vmid0_address,
  46. uint32_t *packet_buff, size_t size_in_bytes)
  47. {
  48. struct pm4__release_mem *rm_packet;
  49. struct pm4__indirect_buffer_pasid *ib_packet;
  50. struct kfd_mem_obj *mem_obj;
  51. size_t pq_packets_size_in_bytes;
  52. union ULARGE_INTEGER *largep;
  53. union ULARGE_INTEGER addr;
  54. struct kernel_queue *kq;
  55. uint64_t *rm_state;
  56. unsigned int *ib_packet_buff;
  57. int status;
  58. if (WARN_ON(!size_in_bytes))
  59. return -EINVAL;
  60. kq = dbgdev->kq;
  61. pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
  62. sizeof(struct pm4__indirect_buffer_pasid);
  63. /*
  64. * We acquire a buffer from DIQ
  65. * The receive packet buff will be sitting on the Indirect Buffer
  66. * and in the PQ we put the IB packet + sync packet(s).
  67. */
  68. status = kq->ops.acquire_packet_buffer(kq,
  69. pq_packets_size_in_bytes / sizeof(uint32_t),
  70. &ib_packet_buff);
  71. if (status) {
  72. pr_err("acquire_packet_buffer failed\n");
  73. return status;
  74. }
  75. memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
  76. ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
  77. ib_packet->header.count = 3;
  78. ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
  79. ib_packet->header.type = PM4_TYPE_3;
  80. largep = (union ULARGE_INTEGER *) &vmid0_address;
  81. ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
  82. ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
  83. ib_packet->control = (1 << 23) | (1 << 31) |
  84. ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
  85. ib_packet->bitfields5.pasid = pasid;
  86. /*
  87. * for now we use release mem for GPU-CPU synchronization
  88. * Consider WaitRegMem + WriteData as a better alternative
  89. * we get a GART allocations ( gpu/cpu mapping),
  90. * for the sync variable, and wait until:
  91. * (a) Sync with HW
  92. * (b) Sync var is written by CP to mem.
  93. */
  94. rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
  95. (sizeof(struct pm4__indirect_buffer_pasid) /
  96. sizeof(unsigned int)));
  97. status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
  98. &mem_obj);
  99. if (status) {
  100. pr_err("Failed to allocate GART memory\n");
  101. kq->ops.rollback_packet(kq);
  102. return status;
  103. }
  104. rm_state = (uint64_t *) mem_obj->cpu_ptr;
  105. *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
  106. rm_packet->header.opcode = IT_RELEASE_MEM;
  107. rm_packet->header.type = PM4_TYPE_3;
  108. rm_packet->header.count = sizeof(struct pm4__release_mem) /
  109. sizeof(unsigned int) - 2;
  110. rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
  111. rm_packet->bitfields2.event_index =
  112. event_index___release_mem__end_of_pipe;
  113. rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
  114. rm_packet->bitfields2.atc = 0;
  115. rm_packet->bitfields2.tc_wb_action_ena = 1;
  116. addr.quad_part = mem_obj->gpu_addr;
  117. rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
  118. rm_packet->address_hi = addr.u.high_part;
  119. rm_packet->bitfields3.data_sel =
  120. data_sel___release_mem__send_64_bit_data;
  121. rm_packet->bitfields3.int_sel =
  122. int_sel___release_mem__send_data_after_write_confirm;
  123. rm_packet->bitfields3.dst_sel =
  124. dst_sel___release_mem__memory_controller;
  125. rm_packet->data_lo = QUEUESTATE__ACTIVE;
  126. kq->ops.submit_packet(kq);
  127. /* Wait till CP writes sync code: */
  128. status = amdkfd_fence_wait_timeout(
  129. (unsigned int *) rm_state,
  130. QUEUESTATE__ACTIVE, 1500);
  131. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  132. return status;
  133. }
  134. static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
  135. {
  136. /*
  137. * no action is needed in this case,
  138. * just make sure diq will not be used
  139. */
  140. dbgdev->kq = NULL;
  141. return 0;
  142. }
  143. static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
  144. {
  145. struct queue_properties properties;
  146. unsigned int qid;
  147. struct kernel_queue *kq = NULL;
  148. int status;
  149. properties.type = KFD_QUEUE_TYPE_DIQ;
  150. status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
  151. &properties, &qid);
  152. if (status) {
  153. pr_err("Failed to create DIQ\n");
  154. return status;
  155. }
  156. pr_debug("DIQ Created with queue id: %d\n", qid);
  157. kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
  158. if (!kq) {
  159. pr_err("Error getting DIQ\n");
  160. pqm_destroy_queue(dbgdev->pqm, qid);
  161. return -EFAULT;
  162. }
  163. dbgdev->kq = kq;
  164. return status;
  165. }
  166. static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
  167. {
  168. /* disable watch address */
  169. dbgdev_address_watch_disable_nodiq(dbgdev->dev);
  170. return 0;
  171. }
  172. static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
  173. {
  174. /* todo - disable address watch */
  175. int status;
  176. status = pqm_destroy_queue(dbgdev->pqm,
  177. dbgdev->kq->queue->properties.queue_id);
  178. dbgdev->kq = NULL;
  179. return status;
  180. }
  181. static void dbgdev_address_watch_set_registers(
  182. const struct dbg_address_watch_info *adw_info,
  183. union TCP_WATCH_ADDR_H_BITS *addrHi,
  184. union TCP_WATCH_ADDR_L_BITS *addrLo,
  185. union TCP_WATCH_CNTL_BITS *cntl,
  186. unsigned int index, unsigned int vmid)
  187. {
  188. union ULARGE_INTEGER addr;
  189. addr.quad_part = 0;
  190. addrHi->u32All = 0;
  191. addrLo->u32All = 0;
  192. cntl->u32All = 0;
  193. if (adw_info->watch_mask)
  194. cntl->bitfields.mask =
  195. (uint32_t) (adw_info->watch_mask[index] &
  196. ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
  197. else
  198. cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
  199. addr.quad_part = (unsigned long long) adw_info->watch_address[index];
  200. addrHi->bitfields.addr = addr.u.high_part &
  201. ADDRESS_WATCH_REG_ADDHIGH_MASK;
  202. addrLo->bitfields.addr =
  203. (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
  204. cntl->bitfields.mode = adw_info->watch_mode[index];
  205. cntl->bitfields.vmid = (uint32_t) vmid;
  206. /* for now assume it is an ATC address */
  207. cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
  208. pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
  209. pr_debug("\t\t%20s %08x\n", "set reg add high :",
  210. addrHi->bitfields.addr);
  211. pr_debug("\t\t%20s %08x\n", "set reg add low :",
  212. addrLo->bitfields.addr);
  213. }
  214. static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
  215. struct dbg_address_watch_info *adw_info)
  216. {
  217. union TCP_WATCH_ADDR_H_BITS addrHi;
  218. union TCP_WATCH_ADDR_L_BITS addrLo;
  219. union TCP_WATCH_CNTL_BITS cntl;
  220. struct kfd_process_device *pdd;
  221. unsigned int i;
  222. /* taking the vmid for that process on the safe way using pdd */
  223. pdd = kfd_get_process_device_data(dbgdev->dev,
  224. adw_info->process);
  225. if (!pdd) {
  226. pr_err("Failed to get pdd for wave control no DIQ\n");
  227. return -EFAULT;
  228. }
  229. addrHi.u32All = 0;
  230. addrLo.u32All = 0;
  231. cntl.u32All = 0;
  232. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  233. (adw_info->num_watch_points == 0)) {
  234. pr_err("num_watch_points is invalid\n");
  235. return -EINVAL;
  236. }
  237. if (!adw_info->watch_mode || !adw_info->watch_address) {
  238. pr_err("adw_info fields are not valid\n");
  239. return -EINVAL;
  240. }
  241. for (i = 0; i < adw_info->num_watch_points; i++) {
  242. dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
  243. &cntl, i, pdd->qpd.vmid);
  244. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  245. pr_debug("\t\t%20s %08x\n", "register index :", i);
  246. pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
  247. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  248. addrLo.bitfields.addr);
  249. pr_debug("\t\t%20s %08x\n", "Address high is :",
  250. addrHi.bitfields.addr);
  251. pr_debug("\t\t%20s %08x\n", "Address high is :",
  252. addrHi.bitfields.addr);
  253. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  254. cntl.bitfields.mask);
  255. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  256. cntl.bitfields.mode);
  257. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  258. cntl.bitfields.vmid);
  259. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  260. cntl.bitfields.atc);
  261. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  262. pdd->dev->kfd2kgd->address_watch_execute(
  263. dbgdev->dev->kgd,
  264. i,
  265. cntl.u32All,
  266. addrHi.u32All,
  267. addrLo.u32All);
  268. }
  269. return 0;
  270. }
  271. static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
  272. struct dbg_address_watch_info *adw_info)
  273. {
  274. struct pm4__set_config_reg *packets_vec;
  275. union TCP_WATCH_ADDR_H_BITS addrHi;
  276. union TCP_WATCH_ADDR_L_BITS addrLo;
  277. union TCP_WATCH_CNTL_BITS cntl;
  278. struct kfd_mem_obj *mem_obj;
  279. unsigned int aw_reg_add_dword;
  280. uint32_t *packet_buff_uint;
  281. unsigned int i;
  282. int status;
  283. size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
  284. /* we do not control the vmid in DIQ mode, just a place holder */
  285. unsigned int vmid = 0;
  286. addrHi.u32All = 0;
  287. addrLo.u32All = 0;
  288. cntl.u32All = 0;
  289. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  290. (adw_info->num_watch_points == 0)) {
  291. pr_err("num_watch_points is invalid\n");
  292. return -EINVAL;
  293. }
  294. if (!adw_info->watch_mode || !adw_info->watch_address) {
  295. pr_err("adw_info fields are not valid\n");
  296. return -EINVAL;
  297. }
  298. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  299. if (status) {
  300. pr_err("Failed to allocate GART memory\n");
  301. return status;
  302. }
  303. packet_buff_uint = mem_obj->cpu_ptr;
  304. memset(packet_buff_uint, 0, ib_size);
  305. packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
  306. packets_vec[0].header.count = 1;
  307. packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
  308. packets_vec[0].header.type = PM4_TYPE_3;
  309. packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  310. packets_vec[0].bitfields2.insert_vmid = 1;
  311. packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
  312. packets_vec[1].bitfields2.insert_vmid = 0;
  313. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  314. packets_vec[2].bitfields2.insert_vmid = 0;
  315. packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
  316. packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  317. packets_vec[3].bitfields2.insert_vmid = 1;
  318. for (i = 0; i < adw_info->num_watch_points; i++) {
  319. dbgdev_address_watch_set_registers(adw_info,
  320. &addrHi,
  321. &addrLo,
  322. &cntl,
  323. i,
  324. vmid);
  325. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  326. pr_debug("\t\t%20s %08x\n", "register index :", i);
  327. pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
  328. pr_debug("\t\t%20s %p\n", "Add ptr is :",
  329. adw_info->watch_address);
  330. pr_debug("\t\t%20s %08llx\n", "Add is :",
  331. adw_info->watch_address[i]);
  332. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  333. addrLo.bitfields.addr);
  334. pr_debug("\t\t%20s %08x\n", "Address high is :",
  335. addrHi.bitfields.addr);
  336. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  337. cntl.bitfields.mask);
  338. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  339. cntl.bitfields.mode);
  340. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  341. cntl.bitfields.vmid);
  342. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  343. cntl.bitfields.atc);
  344. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  345. aw_reg_add_dword =
  346. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  347. dbgdev->dev->kgd,
  348. i,
  349. ADDRESS_WATCH_REG_CNTL);
  350. packets_vec[0].bitfields2.reg_offset =
  351. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  352. packets_vec[0].reg_data[0] = cntl.u32All;
  353. aw_reg_add_dword =
  354. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  355. dbgdev->dev->kgd,
  356. i,
  357. ADDRESS_WATCH_REG_ADDR_HI);
  358. packets_vec[1].bitfields2.reg_offset =
  359. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  360. packets_vec[1].reg_data[0] = addrHi.u32All;
  361. aw_reg_add_dword =
  362. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  363. dbgdev->dev->kgd,
  364. i,
  365. ADDRESS_WATCH_REG_ADDR_LO);
  366. packets_vec[2].bitfields2.reg_offset =
  367. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  368. packets_vec[2].reg_data[0] = addrLo.u32All;
  369. /* enable watch flag if address is not zero*/
  370. if (adw_info->watch_address[i] > 0)
  371. cntl.bitfields.valid = 1;
  372. else
  373. cntl.bitfields.valid = 0;
  374. aw_reg_add_dword =
  375. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  376. dbgdev->dev->kgd,
  377. i,
  378. ADDRESS_WATCH_REG_CNTL);
  379. packets_vec[3].bitfields2.reg_offset =
  380. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  381. packets_vec[3].reg_data[0] = cntl.u32All;
  382. status = dbgdev_diq_submit_ib(
  383. dbgdev,
  384. adw_info->process->pasid,
  385. mem_obj->gpu_addr,
  386. packet_buff_uint,
  387. ib_size);
  388. if (status) {
  389. pr_err("Failed to submit IB to DIQ\n");
  390. break;
  391. }
  392. }
  393. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  394. return status;
  395. }
  396. static int dbgdev_wave_control_set_registers(
  397. struct dbg_wave_control_info *wac_info,
  398. union SQ_CMD_BITS *in_reg_sq_cmd,
  399. union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
  400. {
  401. int status = 0;
  402. union SQ_CMD_BITS reg_sq_cmd;
  403. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  404. struct HsaDbgWaveMsgAMDGen2 *pMsg;
  405. reg_sq_cmd.u32All = 0;
  406. reg_gfx_index.u32All = 0;
  407. pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
  408. switch (wac_info->mode) {
  409. /* Send command to single wave */
  410. case HSA_DBG_WAVEMODE_SINGLE:
  411. /*
  412. * Limit access to the process waves only,
  413. * by setting vmid check
  414. */
  415. reg_sq_cmd.bits.check_vmid = 1;
  416. reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
  417. reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
  418. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
  419. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  420. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  421. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  422. break;
  423. /* Send command to all waves with matching VMID */
  424. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
  425. reg_gfx_index.bits.sh_broadcast_writes = 1;
  426. reg_gfx_index.bits.se_broadcast_writes = 1;
  427. reg_gfx_index.bits.instance_broadcast_writes = 1;
  428. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  429. break;
  430. /* Send command to all CU waves with matching VMID */
  431. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
  432. reg_sq_cmd.bits.check_vmid = 1;
  433. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  434. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  435. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  436. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  437. break;
  438. default:
  439. return -EINVAL;
  440. }
  441. switch (wac_info->operand) {
  442. case HSA_DBG_WAVEOP_HALT:
  443. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
  444. break;
  445. case HSA_DBG_WAVEOP_RESUME:
  446. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
  447. break;
  448. case HSA_DBG_WAVEOP_KILL:
  449. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
  450. break;
  451. case HSA_DBG_WAVEOP_DEBUG:
  452. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
  453. break;
  454. case HSA_DBG_WAVEOP_TRAP:
  455. if (wac_info->trapId < MAX_TRAPID) {
  456. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
  457. reg_sq_cmd.bits.trap_id = wac_info->trapId;
  458. } else {
  459. status = -EINVAL;
  460. }
  461. break;
  462. default:
  463. status = -EINVAL;
  464. break;
  465. }
  466. if (status == 0) {
  467. *in_reg_sq_cmd = reg_sq_cmd;
  468. *in_reg_gfx_index = reg_gfx_index;
  469. }
  470. return status;
  471. }
  472. static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
  473. struct dbg_wave_control_info *wac_info)
  474. {
  475. int status;
  476. union SQ_CMD_BITS reg_sq_cmd;
  477. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  478. struct kfd_mem_obj *mem_obj;
  479. uint32_t *packet_buff_uint;
  480. struct pm4__set_config_reg *packets_vec;
  481. size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
  482. reg_sq_cmd.u32All = 0;
  483. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  484. &reg_gfx_index);
  485. if (status) {
  486. pr_err("Failed to set wave control registers\n");
  487. return status;
  488. }
  489. /* we do not control the VMID in DIQ, so reset it to a known value */
  490. reg_sq_cmd.bits.vm_id = 0;
  491. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  492. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  493. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  494. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  495. pr_debug("\t\t msg value is: %u\n",
  496. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  497. pr_debug("\t\t vmid is: N/A\n");
  498. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  499. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  500. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  501. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  502. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  503. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  504. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  505. pr_debug("\t\t ibw is : %u\n",
  506. reg_gfx_index.bitfields.instance_broadcast_writes);
  507. pr_debug("\t\t ii is : %u\n",
  508. reg_gfx_index.bitfields.instance_index);
  509. pr_debug("\t\t sebw is : %u\n",
  510. reg_gfx_index.bitfields.se_broadcast_writes);
  511. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  512. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  513. pr_debug("\t\t sbw is : %u\n",
  514. reg_gfx_index.bitfields.sh_broadcast_writes);
  515. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  516. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  517. if (status != 0) {
  518. pr_err("Failed to allocate GART memory\n");
  519. return status;
  520. }
  521. packet_buff_uint = mem_obj->cpu_ptr;
  522. memset(packet_buff_uint, 0, ib_size);
  523. packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
  524. packets_vec[0].header.count = 1;
  525. packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
  526. packets_vec[0].header.type = PM4_TYPE_3;
  527. packets_vec[0].bitfields2.reg_offset =
  528. GRBM_GFX_INDEX / (sizeof(uint32_t)) -
  529. USERCONFIG_REG_BASE;
  530. packets_vec[0].bitfields2.insert_vmid = 0;
  531. packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
  532. packets_vec[1].header.count = 1;
  533. packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
  534. packets_vec[1].header.type = PM4_TYPE_3;
  535. packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
  536. AMD_CONFIG_REG_BASE;
  537. packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
  538. packets_vec[1].bitfields2.insert_vmid = 1;
  539. packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
  540. /* Restore the GRBM_GFX_INDEX register */
  541. reg_gfx_index.u32All = 0;
  542. reg_gfx_index.bits.sh_broadcast_writes = 1;
  543. reg_gfx_index.bits.instance_broadcast_writes = 1;
  544. reg_gfx_index.bits.se_broadcast_writes = 1;
  545. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  546. packets_vec[2].bitfields2.reg_offset =
  547. GRBM_GFX_INDEX / (sizeof(uint32_t)) -
  548. USERCONFIG_REG_BASE;
  549. packets_vec[2].bitfields2.insert_vmid = 0;
  550. packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
  551. status = dbgdev_diq_submit_ib(
  552. dbgdev,
  553. wac_info->process->pasid,
  554. mem_obj->gpu_addr,
  555. packet_buff_uint,
  556. ib_size);
  557. if (status)
  558. pr_err("Failed to submit IB to DIQ\n");
  559. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  560. return status;
  561. }
  562. static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
  563. struct dbg_wave_control_info *wac_info)
  564. {
  565. int status;
  566. union SQ_CMD_BITS reg_sq_cmd;
  567. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  568. struct kfd_process_device *pdd;
  569. reg_sq_cmd.u32All = 0;
  570. /* taking the VMID for that process on the safe way using PDD */
  571. pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
  572. if (!pdd) {
  573. pr_err("Failed to get pdd for wave control no DIQ\n");
  574. return -EFAULT;
  575. }
  576. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  577. &reg_gfx_index);
  578. if (status) {
  579. pr_err("Failed to set wave control registers\n");
  580. return status;
  581. }
  582. /* for non DIQ we need to patch the VMID: */
  583. reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
  584. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  585. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  586. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  587. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  588. pr_debug("\t\t msg value is: %u\n",
  589. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  590. pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
  591. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  592. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  593. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  594. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  595. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  596. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  597. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  598. pr_debug("\t\t ibw is : %u\n",
  599. reg_gfx_index.bitfields.instance_broadcast_writes);
  600. pr_debug("\t\t ii is : %u\n",
  601. reg_gfx_index.bitfields.instance_index);
  602. pr_debug("\t\t sebw is : %u\n",
  603. reg_gfx_index.bitfields.se_broadcast_writes);
  604. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  605. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  606. pr_debug("\t\t sbw is : %u\n",
  607. reg_gfx_index.bitfields.sh_broadcast_writes);
  608. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  609. return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
  610. reg_gfx_index.u32All,
  611. reg_sq_cmd.u32All);
  612. }
  613. int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
  614. {
  615. int status = 0;
  616. unsigned int vmid;
  617. union SQ_CMD_BITS reg_sq_cmd;
  618. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  619. struct kfd_process_device *pdd;
  620. struct dbg_wave_control_info wac_info;
  621. int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
  622. int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
  623. reg_sq_cmd.u32All = 0;
  624. status = 0;
  625. wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
  626. wac_info.operand = HSA_DBG_WAVEOP_KILL;
  627. pr_debug("Killing all process wavefronts\n");
  628. /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
  629. * ATC_VMID15_PASID_MAPPING
  630. * to check which VMID the current process is mapped to.
  631. */
  632. for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
  633. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
  634. (dev->kgd, vmid)) {
  635. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
  636. (dev->kgd, vmid) == p->pasid) {
  637. pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
  638. vmid, p->pasid);
  639. break;
  640. }
  641. }
  642. }
  643. if (vmid > last_vmid_to_scan) {
  644. pr_err("Didn't find vmid for pasid %d\n", p->pasid);
  645. return -EFAULT;
  646. }
  647. /* taking the VMID for that process on the safe way using PDD */
  648. pdd = kfd_get_process_device_data(dev, p);
  649. if (!pdd)
  650. return -EFAULT;
  651. status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
  652. &reg_gfx_index);
  653. if (status != 0)
  654. return -EINVAL;
  655. /* for non DIQ we need to patch the VMID: */
  656. reg_sq_cmd.bits.vm_id = vmid;
  657. dev->kfd2kgd->wave_control_execute(dev->kgd,
  658. reg_gfx_index.u32All,
  659. reg_sq_cmd.u32All);
  660. return 0;
  661. }
  662. void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
  663. enum DBGDEV_TYPE type)
  664. {
  665. pdbgdev->dev = pdev;
  666. pdbgdev->kq = NULL;
  667. pdbgdev->type = type;
  668. pdbgdev->pqm = NULL;
  669. switch (type) {
  670. case DBGDEV_TYPE_NODIQ:
  671. pdbgdev->dbgdev_register = dbgdev_register_nodiq;
  672. pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
  673. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
  674. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
  675. break;
  676. case DBGDEV_TYPE_DIQ:
  677. default:
  678. pdbgdev->dbgdev_register = dbgdev_register_diq;
  679. pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
  680. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
  681. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
  682. break;
  683. }
  684. }