PageRenderTime 89ms CodeModel.GetById 7ms app.highlight 75ms RepoModel.GetById 1ms app.codeStats 0ms

/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c

https://gitlab.com/sunny256/linux
C | 850 lines | 621 code | 166 blank | 63 comment | 41 complexity | 179fbdd593e3197334cd4441d38815b8 MD5 | raw file
  1/*
  2 * Copyright 2014 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 20 * OTHER DEALINGS IN THE SOFTWARE.
 21 *
 22 */
 23
 24#include <linux/types.h>
 25#include <linux/kernel.h>
 26#include <linux/log2.h>
 27#include <linux/sched.h>
 28#include <linux/slab.h>
 29#include <linux/mutex.h>
 30#include <linux/device.h>
 31
 32#include "kfd_pm4_headers.h"
 33#include "kfd_pm4_headers_diq.h"
 34#include "kfd_kernel_queue.h"
 35#include "kfd_priv.h"
 36#include "kfd_pm4_opcodes.h"
 37#include "cik_regs.h"
 38#include "kfd_dbgmgr.h"
 39#include "kfd_dbgdev.h"
 40#include "kfd_device_queue_manager.h"
 41#include "../../radeon/cik_reg.h"
 42
 43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
 44{
 45	dev->kfd2kgd->address_watch_disable(dev->kgd);
 46}
 47
 48static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 49				unsigned int pasid, uint64_t vmid0_address,
 50				uint32_t *packet_buff, size_t size_in_bytes)
 51{
 52	struct pm4__release_mem *rm_packet;
 53	struct pm4__indirect_buffer_pasid *ib_packet;
 54	struct kfd_mem_obj *mem_obj;
 55	size_t pq_packets_size_in_bytes;
 56	union ULARGE_INTEGER *largep;
 57	union ULARGE_INTEGER addr;
 58	struct kernel_queue *kq;
 59	uint64_t *rm_state;
 60	unsigned int *ib_packet_buff;
 61	int status;
 62
 63	if (WARN_ON(!size_in_bytes))
 64		return -EINVAL;
 65
 66	kq = dbgdev->kq;
 67
 68	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
 69				sizeof(struct pm4__indirect_buffer_pasid);
 70
 71	/*
 72	 * We acquire a buffer from DIQ
 73	 * The receive packet buff will be sitting on the Indirect Buffer
 74	 * and in the PQ we put the IB packet + sync packet(s).
 75	 */
 76	status = kq->ops.acquire_packet_buffer(kq,
 77				pq_packets_size_in_bytes / sizeof(uint32_t),
 78				&ib_packet_buff);
 79	if (status) {
 80		pr_err("acquire_packet_buffer failed\n");
 81		return status;
 82	}
 83
 84	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
 85
 86	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
 87
 88	ib_packet->header.count = 3;
 89	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
 90	ib_packet->header.type = PM4_TYPE_3;
 91
 92	largep = (union ULARGE_INTEGER *) &vmid0_address;
 93
 94	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
 95	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
 96
 97	ib_packet->control = (1 << 23) | (1 << 31) |
 98			((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
 99
100	ib_packet->bitfields5.pasid = pasid;
101
102	/*
103	 * for now we use release mem for GPU-CPU synchronization
104	 * Consider WaitRegMem + WriteData as a better alternative
105	 * we get a GART allocations ( gpu/cpu mapping),
106	 * for the sync variable, and wait until:
107	 * (a) Sync with HW
108	 * (b) Sync var is written by CP to mem.
109	 */
110	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
111			(sizeof(struct pm4__indirect_buffer_pasid) /
112					sizeof(unsigned int)));
113
114	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
115					&mem_obj);
116
117	if (status) {
118		pr_err("Failed to allocate GART memory\n");
119		kq->ops.rollback_packet(kq);
120		return status;
121	}
122
123	rm_state = (uint64_t *) mem_obj->cpu_ptr;
124
125	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
126
127	rm_packet->header.opcode = IT_RELEASE_MEM;
128	rm_packet->header.type = PM4_TYPE_3;
129	rm_packet->header.count = sizeof(struct pm4__release_mem) /
130					sizeof(unsigned int) - 2;
131
132	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
133	rm_packet->bitfields2.event_index =
134				event_index___release_mem__end_of_pipe;
135
136	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
137	rm_packet->bitfields2.atc = 0;
138	rm_packet->bitfields2.tc_wb_action_ena = 1;
139
140	addr.quad_part = mem_obj->gpu_addr;
141
142	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
143	rm_packet->address_hi = addr.u.high_part;
144
145	rm_packet->bitfields3.data_sel =
146				data_sel___release_mem__send_64_bit_data;
147
148	rm_packet->bitfields3.int_sel =
149			int_sel___release_mem__send_data_after_write_confirm;
150
151	rm_packet->bitfields3.dst_sel =
152			dst_sel___release_mem__memory_controller;
153
154	rm_packet->data_lo = QUEUESTATE__ACTIVE;
155
156	kq->ops.submit_packet(kq);
157
158	/* Wait till CP writes sync code: */
159	status = amdkfd_fence_wait_timeout(
160			(unsigned int *) rm_state,
161			QUEUESTATE__ACTIVE, 1500);
162
163	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
164
165	return status;
166}
167
168static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
169{
170	/*
171	 * no action is needed in this case,
172	 * just make sure diq will not be used
173	 */
174
175	dbgdev->kq = NULL;
176
177	return 0;
178}
179
180static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
181{
182	struct queue_properties properties;
183	unsigned int qid;
184	struct kernel_queue *kq = NULL;
185	int status;
186
187	properties.type = KFD_QUEUE_TYPE_DIQ;
188
189	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
190				&properties, &qid);
191
192	if (status) {
193		pr_err("Failed to create DIQ\n");
194		return status;
195	}
196
197	pr_debug("DIQ Created with queue id: %d\n", qid);
198
199	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
200
201	if (!kq) {
202		pr_err("Error getting DIQ\n");
203		pqm_destroy_queue(dbgdev->pqm, qid);
204		return -EFAULT;
205	}
206
207	dbgdev->kq = kq;
208
209	return status;
210}
211
212static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
213{
214	/* disable watch address */
215	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
216	return 0;
217}
218
219static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
220{
221	/* todo - disable address watch */
222	int status;
223
224	status = pqm_destroy_queue(dbgdev->pqm,
225			dbgdev->kq->queue->properties.queue_id);
226	dbgdev->kq = NULL;
227
228	return status;
229}
230
231static void dbgdev_address_watch_set_registers(
232			const struct dbg_address_watch_info *adw_info,
233			union TCP_WATCH_ADDR_H_BITS *addrHi,
234			union TCP_WATCH_ADDR_L_BITS *addrLo,
235			union TCP_WATCH_CNTL_BITS *cntl,
236			unsigned int index, unsigned int vmid)
237{
238	union ULARGE_INTEGER addr;
239
240	addr.quad_part = 0;
241	addrHi->u32All = 0;
242	addrLo->u32All = 0;
243	cntl->u32All = 0;
244
245	if (adw_info->watch_mask)
246		cntl->bitfields.mask =
247			(uint32_t) (adw_info->watch_mask[index] &
248					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
249	else
250		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
251
252	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
253
254	addrHi->bitfields.addr = addr.u.high_part &
255					ADDRESS_WATCH_REG_ADDHIGH_MASK;
256	addrLo->bitfields.addr =
257			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
258
259	cntl->bitfields.mode = adw_info->watch_mode[index];
260	cntl->bitfields.vmid = (uint32_t) vmid;
261	/* for now assume it is an ATC address */
262	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
263
264	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
265	pr_debug("\t\t%20s %08x\n", "set reg add high :",
266			addrHi->bitfields.addr);
267	pr_debug("\t\t%20s %08x\n", "set reg add low :",
268			addrLo->bitfields.addr);
269}
270
271static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
272				      struct dbg_address_watch_info *adw_info)
273{
274	union TCP_WATCH_ADDR_H_BITS addrHi;
275	union TCP_WATCH_ADDR_L_BITS addrLo;
276	union TCP_WATCH_CNTL_BITS cntl;
277	struct kfd_process_device *pdd;
278	unsigned int i;
279
280	/* taking the vmid for that process on the safe way using pdd */
281	pdd = kfd_get_process_device_data(dbgdev->dev,
282					adw_info->process);
283	if (!pdd) {
284		pr_err("Failed to get pdd for wave control no DIQ\n");
285		return -EFAULT;
286	}
287
288	addrHi.u32All = 0;
289	addrLo.u32All = 0;
290	cntl.u32All = 0;
291
292	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
293			(adw_info->num_watch_points == 0)) {
294		pr_err("num_watch_points is invalid\n");
295		return -EINVAL;
296	}
297
298	if (!adw_info->watch_mode || !adw_info->watch_address) {
299		pr_err("adw_info fields are not valid\n");
300		return -EINVAL;
301	}
302
303	for (i = 0; i < adw_info->num_watch_points; i++) {
304		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
305						&cntl, i, pdd->qpd.vmid);
306
307		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
308		pr_debug("\t\t%20s %08x\n", "register index :", i);
309		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
310		pr_debug("\t\t%20s %08x\n", "Address Low is :",
311				addrLo.bitfields.addr);
312		pr_debug("\t\t%20s %08x\n", "Address high is :",
313				addrHi.bitfields.addr);
314		pr_debug("\t\t%20s %08x\n", "Address high is :",
315				addrHi.bitfields.addr);
316		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
317				cntl.bitfields.mask);
318		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
319				cntl.bitfields.mode);
320		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
321				cntl.bitfields.vmid);
322		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
323				cntl.bitfields.atc);
324		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
325
326		pdd->dev->kfd2kgd->address_watch_execute(
327						dbgdev->dev->kgd,
328						i,
329						cntl.u32All,
330						addrHi.u32All,
331						addrLo.u32All);
332	}
333
334	return 0;
335}
336
337static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
338				    struct dbg_address_watch_info *adw_info)
339{
340	struct pm4__set_config_reg *packets_vec;
341	union TCP_WATCH_ADDR_H_BITS addrHi;
342	union TCP_WATCH_ADDR_L_BITS addrLo;
343	union TCP_WATCH_CNTL_BITS cntl;
344	struct kfd_mem_obj *mem_obj;
345	unsigned int aw_reg_add_dword;
346	uint32_t *packet_buff_uint;
347	unsigned int i;
348	int status;
349	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
350	/* we do not control the vmid in DIQ mode, just a place holder */
351	unsigned int vmid = 0;
352
353	addrHi.u32All = 0;
354	addrLo.u32All = 0;
355	cntl.u32All = 0;
356
357	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
358			(adw_info->num_watch_points == 0)) {
359		pr_err("num_watch_points is invalid\n");
360		return -EINVAL;
361	}
362
363	if (!adw_info->watch_mode || !adw_info->watch_address) {
364		pr_err("adw_info fields are not valid\n");
365		return -EINVAL;
366	}
367
368	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
369
370	if (status) {
371		pr_err("Failed to allocate GART memory\n");
372		return status;
373	}
374
375	packet_buff_uint = mem_obj->cpu_ptr;
376
377	memset(packet_buff_uint, 0, ib_size);
378
379	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
380
381	packets_vec[0].header.count = 1;
382	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
383	packets_vec[0].header.type = PM4_TYPE_3;
384	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
385	packets_vec[0].bitfields2.insert_vmid = 1;
386	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
387	packets_vec[1].bitfields2.insert_vmid = 0;
388	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
389	packets_vec[2].bitfields2.insert_vmid = 0;
390	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
391	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
392	packets_vec[3].bitfields2.insert_vmid = 1;
393
394	for (i = 0; i < adw_info->num_watch_points; i++) {
395		dbgdev_address_watch_set_registers(adw_info,
396						&addrHi,
397						&addrLo,
398						&cntl,
399						i,
400						vmid);
401
402		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
403		pr_debug("\t\t%20s %08x\n", "register index :", i);
404		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
405		pr_debug("\t\t%20s %p\n", "Add ptr is :",
406				adw_info->watch_address);
407		pr_debug("\t\t%20s %08llx\n", "Add     is :",
408				adw_info->watch_address[i]);
409		pr_debug("\t\t%20s %08x\n", "Address Low is :",
410				addrLo.bitfields.addr);
411		pr_debug("\t\t%20s %08x\n", "Address high is :",
412				addrHi.bitfields.addr);
413		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
414				cntl.bitfields.mask);
415		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
416				cntl.bitfields.mode);
417		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
418				cntl.bitfields.vmid);
419		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
420				cntl.bitfields.atc);
421		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
422
423		aw_reg_add_dword =
424				dbgdev->dev->kfd2kgd->address_watch_get_offset(
425					dbgdev->dev->kgd,
426					i,
427					ADDRESS_WATCH_REG_CNTL);
428
429		packets_vec[0].bitfields2.reg_offset =
430					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
431
432		packets_vec[0].reg_data[0] = cntl.u32All;
433
434		aw_reg_add_dword =
435				dbgdev->dev->kfd2kgd->address_watch_get_offset(
436					dbgdev->dev->kgd,
437					i,
438					ADDRESS_WATCH_REG_ADDR_HI);
439
440		packets_vec[1].bitfields2.reg_offset =
441					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
442		packets_vec[1].reg_data[0] = addrHi.u32All;
443
444		aw_reg_add_dword =
445				dbgdev->dev->kfd2kgd->address_watch_get_offset(
446					dbgdev->dev->kgd,
447					i,
448					ADDRESS_WATCH_REG_ADDR_LO);
449
450		packets_vec[2].bitfields2.reg_offset =
451				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
452		packets_vec[2].reg_data[0] = addrLo.u32All;
453
454		/* enable watch flag if address is not zero*/
455		if (adw_info->watch_address[i] > 0)
456			cntl.bitfields.valid = 1;
457		else
458			cntl.bitfields.valid = 0;
459
460		aw_reg_add_dword =
461				dbgdev->dev->kfd2kgd->address_watch_get_offset(
462					dbgdev->dev->kgd,
463					i,
464					ADDRESS_WATCH_REG_CNTL);
465
466		packets_vec[3].bitfields2.reg_offset =
467					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
468		packets_vec[3].reg_data[0] = cntl.u32All;
469
470		status = dbgdev_diq_submit_ib(
471					dbgdev,
472					adw_info->process->pasid,
473					mem_obj->gpu_addr,
474					packet_buff_uint,
475					ib_size);
476
477		if (status) {
478			pr_err("Failed to submit IB to DIQ\n");
479			break;
480		}
481	}
482
483	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
484	return status;
485}
486
487static int dbgdev_wave_control_set_registers(
488				struct dbg_wave_control_info *wac_info,
489				union SQ_CMD_BITS *in_reg_sq_cmd,
490				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
491{
492	int status = 0;
493	union SQ_CMD_BITS reg_sq_cmd;
494	union GRBM_GFX_INDEX_BITS reg_gfx_index;
495	struct HsaDbgWaveMsgAMDGen2 *pMsg;
496
497	reg_sq_cmd.u32All = 0;
498	reg_gfx_index.u32All = 0;
499	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
500
501	switch (wac_info->mode) {
502	/* Send command to single wave */
503	case HSA_DBG_WAVEMODE_SINGLE:
504		/*
505		 * Limit access to the process waves only,
506		 * by setting vmid check
507		 */
508		reg_sq_cmd.bits.check_vmid = 1;
509		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
510		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
511		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
512
513		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
514		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
515		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
516
517		break;
518
519	/* Send command to all waves with matching VMID */
520	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
521
522		reg_gfx_index.bits.sh_broadcast_writes = 1;
523		reg_gfx_index.bits.se_broadcast_writes = 1;
524		reg_gfx_index.bits.instance_broadcast_writes = 1;
525
526		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
527
528		break;
529
530	/* Send command to all CU waves with matching VMID */
531	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
532
533		reg_sq_cmd.bits.check_vmid = 1;
534		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
535
536		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
537		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
538		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
539
540		break;
541
542	default:
543		return -EINVAL;
544	}
545
546	switch (wac_info->operand) {
547	case HSA_DBG_WAVEOP_HALT:
548		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
549		break;
550
551	case HSA_DBG_WAVEOP_RESUME:
552		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
553		break;
554
555	case HSA_DBG_WAVEOP_KILL:
556		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
557		break;
558
559	case HSA_DBG_WAVEOP_DEBUG:
560		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
561		break;
562
563	case HSA_DBG_WAVEOP_TRAP:
564		if (wac_info->trapId < MAX_TRAPID) {
565			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
566			reg_sq_cmd.bits.trap_id = wac_info->trapId;
567		} else {
568			status = -EINVAL;
569		}
570		break;
571
572	default:
573		status = -EINVAL;
574		break;
575	}
576
577	if (status == 0) {
578		*in_reg_sq_cmd = reg_sq_cmd;
579		*in_reg_gfx_index = reg_gfx_index;
580	}
581
582	return status;
583}
584
585static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
586					struct dbg_wave_control_info *wac_info)
587{
588
589	int status;
590	union SQ_CMD_BITS reg_sq_cmd;
591	union GRBM_GFX_INDEX_BITS reg_gfx_index;
592	struct kfd_mem_obj *mem_obj;
593	uint32_t *packet_buff_uint;
594	struct pm4__set_config_reg *packets_vec;
595	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
596
597	reg_sq_cmd.u32All = 0;
598
599	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
600							&reg_gfx_index);
601	if (status) {
602		pr_err("Failed to set wave control registers\n");
603		return status;
604	}
605
606	/* we do not control the VMID in DIQ, so reset it to a known value */
607	reg_sq_cmd.bits.vm_id = 0;
608
609	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
610
611	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
612	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
613	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
614	pr_debug("\t\t msg value is: %u\n",
615			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
616	pr_debug("\t\t vmid      is: N/A\n");
617
618	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
619	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
620	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
621	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
622	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
623	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
624	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
625
626	pr_debug("\t\t ibw       is : %u\n",
627			reg_gfx_index.bitfields.instance_broadcast_writes);
628	pr_debug("\t\t ii        is : %u\n",
629			reg_gfx_index.bitfields.instance_index);
630	pr_debug("\t\t sebw      is : %u\n",
631			reg_gfx_index.bitfields.se_broadcast_writes);
632	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
633	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
634	pr_debug("\t\t sbw       is : %u\n",
635			reg_gfx_index.bitfields.sh_broadcast_writes);
636
637	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
638
639	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
640
641	if (status != 0) {
642		pr_err("Failed to allocate GART memory\n");
643		return status;
644	}
645
646	packet_buff_uint = mem_obj->cpu_ptr;
647
648	memset(packet_buff_uint, 0, ib_size);
649
650	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
651	packets_vec[0].header.count = 1;
652	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
653	packets_vec[0].header.type = PM4_TYPE_3;
654	packets_vec[0].bitfields2.reg_offset =
655			GRBM_GFX_INDEX / (sizeof(uint32_t)) -
656				USERCONFIG_REG_BASE;
657
658	packets_vec[0].bitfields2.insert_vmid = 0;
659	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
660
661	packets_vec[1].header.count = 1;
662	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
663	packets_vec[1].header.type = PM4_TYPE_3;
664	packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
665						AMD_CONFIG_REG_BASE;
666
667	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
668	packets_vec[1].bitfields2.insert_vmid = 1;
669	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
670
671	/* Restore the GRBM_GFX_INDEX register */
672
673	reg_gfx_index.u32All = 0;
674	reg_gfx_index.bits.sh_broadcast_writes = 1;
675	reg_gfx_index.bits.instance_broadcast_writes = 1;
676	reg_gfx_index.bits.se_broadcast_writes = 1;
677
678
679	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
680	packets_vec[2].bitfields2.reg_offset =
681				GRBM_GFX_INDEX / (sizeof(uint32_t)) -
682					USERCONFIG_REG_BASE;
683
684	packets_vec[2].bitfields2.insert_vmid = 0;
685	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
686
687	status = dbgdev_diq_submit_ib(
688			dbgdev,
689			wac_info->process->pasid,
690			mem_obj->gpu_addr,
691			packet_buff_uint,
692			ib_size);
693
694	if (status)
695		pr_err("Failed to submit IB to DIQ\n");
696
697	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
698
699	return status;
700}
701
702static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
703					struct dbg_wave_control_info *wac_info)
704{
705	int status;
706	union SQ_CMD_BITS reg_sq_cmd;
707	union GRBM_GFX_INDEX_BITS reg_gfx_index;
708	struct kfd_process_device *pdd;
709
710	reg_sq_cmd.u32All = 0;
711
712	/* taking the VMID for that process on the safe way using PDD */
713	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
714
715	if (!pdd) {
716		pr_err("Failed to get pdd for wave control no DIQ\n");
717		return -EFAULT;
718	}
719	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
720							&reg_gfx_index);
721	if (status) {
722		pr_err("Failed to set wave control registers\n");
723		return status;
724	}
725
726	/* for non DIQ we need to patch the VMID: */
727
728	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
729
730	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
731
732	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
733	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
734	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
735	pr_debug("\t\t msg value is: %u\n",
736			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
737	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
738
739	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
740	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
741	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
742	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
743	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
744	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
745	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
746
747	pr_debug("\t\t ibw       is : %u\n",
748			reg_gfx_index.bitfields.instance_broadcast_writes);
749	pr_debug("\t\t ii        is : %u\n",
750			reg_gfx_index.bitfields.instance_index);
751	pr_debug("\t\t sebw      is : %u\n",
752			reg_gfx_index.bitfields.se_broadcast_writes);
753	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
754	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
755	pr_debug("\t\t sbw       is : %u\n",
756			reg_gfx_index.bitfields.sh_broadcast_writes);
757
758	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
759
760	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
761							reg_gfx_index.u32All,
762							reg_sq_cmd.u32All);
763}
764
765int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
766{
767	int status = 0;
768	unsigned int vmid;
769	union SQ_CMD_BITS reg_sq_cmd;
770	union GRBM_GFX_INDEX_BITS reg_gfx_index;
771	struct kfd_process_device *pdd;
772	struct dbg_wave_control_info wac_info;
773	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
774	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
775
776	reg_sq_cmd.u32All = 0;
777	status = 0;
778
779	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
780	wac_info.operand = HSA_DBG_WAVEOP_KILL;
781
782	pr_debug("Killing all process wavefronts\n");
783
784	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
785	 * ATC_VMID15_PASID_MAPPING
786	 * to check which VMID the current process is mapped to.
787	 */
788
789	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
790		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
791				(dev->kgd, vmid)) {
792			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
793					(dev->kgd, vmid) == p->pasid) {
794				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
795						vmid, p->pasid);
796				break;
797			}
798		}
799	}
800
801	if (vmid > last_vmid_to_scan) {
802		pr_err("Didn't find vmid for pasid %d\n", p->pasid);
803		return -EFAULT;
804	}
805
806	/* taking the VMID for that process on the safe way using PDD */
807	pdd = kfd_get_process_device_data(dev, p);
808	if (!pdd)
809		return -EFAULT;
810
811	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
812			&reg_gfx_index);
813	if (status != 0)
814		return -EINVAL;
815
816	/* for non DIQ we need to patch the VMID: */
817	reg_sq_cmd.bits.vm_id = vmid;
818
819	dev->kfd2kgd->wave_control_execute(dev->kgd,
820					reg_gfx_index.u32All,
821					reg_sq_cmd.u32All);
822
823	return 0;
824}
825
826void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
827			enum DBGDEV_TYPE type)
828{
829	pdbgdev->dev = pdev;
830	pdbgdev->kq = NULL;
831	pdbgdev->type = type;
832	pdbgdev->pqm = NULL;
833
834	switch (type) {
835	case DBGDEV_TYPE_NODIQ:
836		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
837		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
838		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
839		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
840		break;
841	case DBGDEV_TYPE_DIQ:
842	default:
843		pdbgdev->dbgdev_register = dbgdev_register_diq;
844		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
845		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
846		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
847		break;
848	}
849
850}