PageRenderTime 72ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 1ms

/drivers/gpu/drm/amd/amdkfd/kfd_topology.c

http://github.com/mirrors/linux-2.6
C | 1549 lines | 1171 code | 232 blank | 146 comment | 163 complexity | f38d4c42f3547c99ee0177e0bf25a003 MD5 | raw file
Possible License(s): LGPL-2.0, AGPL-1.0, GPL-2.0
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/types.h>
  23. #include <linux/kernel.h>
  24. #include <linux/pci.h>
  25. #include <linux/errno.h>
  26. #include <linux/acpi.h>
  27. #include <linux/hash.h>
  28. #include <linux/cpufreq.h>
  29. #include <linux/log2.h>
  30. #include <linux/dmi.h>
  31. #include <linux/atomic.h>
  32. #include "kfd_priv.h"
  33. #include "kfd_crat.h"
  34. #include "kfd_topology.h"
  35. #include "kfd_device_queue_manager.h"
  36. #include "kfd_iommu.h"
  37. #include "amdgpu_amdkfd.h"
  38. #include "amdgpu_ras.h"
  39. /* topology_device_list - Master list of all topology devices */
  40. static struct list_head topology_device_list;
  41. static struct kfd_system_properties sys_props;
  42. static DECLARE_RWSEM(topology_lock);
  43. static atomic_t topology_crat_proximity_domain;
  44. struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
  45. uint32_t proximity_domain)
  46. {
  47. struct kfd_topology_device *top_dev;
  48. struct kfd_topology_device *device = NULL;
  49. down_read(&topology_lock);
  50. list_for_each_entry(top_dev, &topology_device_list, list)
  51. if (top_dev->proximity_domain == proximity_domain) {
  52. device = top_dev;
  53. break;
  54. }
  55. up_read(&topology_lock);
  56. return device;
  57. }
  58. struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
  59. {
  60. struct kfd_topology_device *top_dev = NULL;
  61. struct kfd_topology_device *ret = NULL;
  62. down_read(&topology_lock);
  63. list_for_each_entry(top_dev, &topology_device_list, list)
  64. if (top_dev->gpu_id == gpu_id) {
  65. ret = top_dev;
  66. break;
  67. }
  68. up_read(&topology_lock);
  69. return ret;
  70. }
  71. struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
  72. {
  73. struct kfd_topology_device *top_dev;
  74. top_dev = kfd_topology_device_by_id(gpu_id);
  75. if (!top_dev)
  76. return NULL;
  77. return top_dev->gpu;
  78. }
  79. struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
  80. {
  81. struct kfd_topology_device *top_dev;
  82. struct kfd_dev *device = NULL;
  83. down_read(&topology_lock);
  84. list_for_each_entry(top_dev, &topology_device_list, list)
  85. if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
  86. device = top_dev->gpu;
  87. break;
  88. }
  89. up_read(&topology_lock);
  90. return device;
  91. }
  92. struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
  93. {
  94. struct kfd_topology_device *top_dev;
  95. struct kfd_dev *device = NULL;
  96. down_read(&topology_lock);
  97. list_for_each_entry(top_dev, &topology_device_list, list)
  98. if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
  99. device = top_dev->gpu;
  100. break;
  101. }
  102. up_read(&topology_lock);
  103. return device;
  104. }
  105. /* Called with write topology_lock acquired */
  106. static void kfd_release_topology_device(struct kfd_topology_device *dev)
  107. {
  108. struct kfd_mem_properties *mem;
  109. struct kfd_cache_properties *cache;
  110. struct kfd_iolink_properties *iolink;
  111. struct kfd_perf_properties *perf;
  112. list_del(&dev->list);
  113. while (dev->mem_props.next != &dev->mem_props) {
  114. mem = container_of(dev->mem_props.next,
  115. struct kfd_mem_properties, list);
  116. list_del(&mem->list);
  117. kfree(mem);
  118. }
  119. while (dev->cache_props.next != &dev->cache_props) {
  120. cache = container_of(dev->cache_props.next,
  121. struct kfd_cache_properties, list);
  122. list_del(&cache->list);
  123. kfree(cache);
  124. }
  125. while (dev->io_link_props.next != &dev->io_link_props) {
  126. iolink = container_of(dev->io_link_props.next,
  127. struct kfd_iolink_properties, list);
  128. list_del(&iolink->list);
  129. kfree(iolink);
  130. }
  131. while (dev->perf_props.next != &dev->perf_props) {
  132. perf = container_of(dev->perf_props.next,
  133. struct kfd_perf_properties, list);
  134. list_del(&perf->list);
  135. kfree(perf);
  136. }
  137. kfree(dev);
  138. }
  139. void kfd_release_topology_device_list(struct list_head *device_list)
  140. {
  141. struct kfd_topology_device *dev;
  142. while (!list_empty(device_list)) {
  143. dev = list_first_entry(device_list,
  144. struct kfd_topology_device, list);
  145. kfd_release_topology_device(dev);
  146. }
  147. }
  148. static void kfd_release_live_view(void)
  149. {
  150. kfd_release_topology_device_list(&topology_device_list);
  151. memset(&sys_props, 0, sizeof(sys_props));
  152. }
  153. struct kfd_topology_device *kfd_create_topology_device(
  154. struct list_head *device_list)
  155. {
  156. struct kfd_topology_device *dev;
  157. dev = kfd_alloc_struct(dev);
  158. if (!dev) {
  159. pr_err("No memory to allocate a topology device");
  160. return NULL;
  161. }
  162. INIT_LIST_HEAD(&dev->mem_props);
  163. INIT_LIST_HEAD(&dev->cache_props);
  164. INIT_LIST_HEAD(&dev->io_link_props);
  165. INIT_LIST_HEAD(&dev->perf_props);
  166. list_add_tail(&dev->list, device_list);
  167. return dev;
  168. }
  169. #define sysfs_show_gen_prop(buffer, fmt, ...) \
  170. snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
  171. #define sysfs_show_32bit_prop(buffer, name, value) \
  172. sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
  173. #define sysfs_show_64bit_prop(buffer, name, value) \
  174. sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
  175. #define sysfs_show_32bit_val(buffer, value) \
  176. sysfs_show_gen_prop(buffer, "%u\n", value)
  177. #define sysfs_show_str_val(buffer, value) \
  178. sysfs_show_gen_prop(buffer, "%s\n", value)
  179. static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
  180. char *buffer)
  181. {
  182. ssize_t ret;
  183. /* Making sure that the buffer is an empty string */
  184. buffer[0] = 0;
  185. if (attr == &sys_props.attr_genid) {
  186. ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
  187. } else if (attr == &sys_props.attr_props) {
  188. sysfs_show_64bit_prop(buffer, "platform_oem",
  189. sys_props.platform_oem);
  190. sysfs_show_64bit_prop(buffer, "platform_id",
  191. sys_props.platform_id);
  192. ret = sysfs_show_64bit_prop(buffer, "platform_rev",
  193. sys_props.platform_rev);
  194. } else {
  195. ret = -EINVAL;
  196. }
  197. return ret;
  198. }
  199. static void kfd_topology_kobj_release(struct kobject *kobj)
  200. {
  201. kfree(kobj);
  202. }
  203. static const struct sysfs_ops sysprops_ops = {
  204. .show = sysprops_show,
  205. };
  206. static struct kobj_type sysprops_type = {
  207. .release = kfd_topology_kobj_release,
  208. .sysfs_ops = &sysprops_ops,
  209. };
  210. static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
  211. char *buffer)
  212. {
  213. ssize_t ret;
  214. struct kfd_iolink_properties *iolink;
  215. /* Making sure that the buffer is an empty string */
  216. buffer[0] = 0;
  217. iolink = container_of(attr, struct kfd_iolink_properties, attr);
  218. if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
  219. return -EPERM;
  220. sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
  221. sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
  222. sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
  223. sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
  224. sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
  225. sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
  226. sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
  227. sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
  228. sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
  229. sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
  230. sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
  231. iolink->rec_transfer_size);
  232. ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
  233. return ret;
  234. }
  235. static const struct sysfs_ops iolink_ops = {
  236. .show = iolink_show,
  237. };
  238. static struct kobj_type iolink_type = {
  239. .release = kfd_topology_kobj_release,
  240. .sysfs_ops = &iolink_ops,
  241. };
  242. static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
  243. char *buffer)
  244. {
  245. ssize_t ret;
  246. struct kfd_mem_properties *mem;
  247. /* Making sure that the buffer is an empty string */
  248. buffer[0] = 0;
  249. mem = container_of(attr, struct kfd_mem_properties, attr);
  250. if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
  251. return -EPERM;
  252. sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
  253. sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
  254. sysfs_show_32bit_prop(buffer, "flags", mem->flags);
  255. sysfs_show_32bit_prop(buffer, "width", mem->width);
  256. ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
  257. return ret;
  258. }
  259. static const struct sysfs_ops mem_ops = {
  260. .show = mem_show,
  261. };
  262. static struct kobj_type mem_type = {
  263. .release = kfd_topology_kobj_release,
  264. .sysfs_ops = &mem_ops,
  265. };
  266. static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
  267. char *buffer)
  268. {
  269. ssize_t ret;
  270. uint32_t i, j;
  271. struct kfd_cache_properties *cache;
  272. /* Making sure that the buffer is an empty string */
  273. buffer[0] = 0;
  274. cache = container_of(attr, struct kfd_cache_properties, attr);
  275. if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
  276. return -EPERM;
  277. sysfs_show_32bit_prop(buffer, "processor_id_low",
  278. cache->processor_id_low);
  279. sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
  280. sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
  281. sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
  282. sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
  283. cache->cachelines_per_tag);
  284. sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
  285. sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
  286. sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
  287. snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
  288. for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
  289. for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
  290. /* Check each bit */
  291. if (cache->sibling_map[i] & (1 << j))
  292. ret = snprintf(buffer, PAGE_SIZE,
  293. "%s%d%s", buffer, 1, ",");
  294. else
  295. ret = snprintf(buffer, PAGE_SIZE,
  296. "%s%d%s", buffer, 0, ",");
  297. }
  298. /* Replace the last "," with end of line */
  299. *(buffer + strlen(buffer) - 1) = 0xA;
  300. return ret;
  301. }
  302. static const struct sysfs_ops cache_ops = {
  303. .show = kfd_cache_show,
  304. };
  305. static struct kobj_type cache_type = {
  306. .release = kfd_topology_kobj_release,
  307. .sysfs_ops = &cache_ops,
  308. };
  309. /****** Sysfs of Performance Counters ******/
  310. struct kfd_perf_attr {
  311. struct kobj_attribute attr;
  312. uint32_t data;
  313. };
  314. static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
  315. char *buf)
  316. {
  317. struct kfd_perf_attr *attr;
  318. buf[0] = 0;
  319. attr = container_of(attrs, struct kfd_perf_attr, attr);
  320. if (!attr->data) /* invalid data for PMC */
  321. return 0;
  322. else
  323. return sysfs_show_32bit_val(buf, attr->data);
  324. }
  325. #define KFD_PERF_DESC(_name, _data) \
  326. { \
  327. .attr = __ATTR(_name, 0444, perf_show, NULL), \
  328. .data = _data, \
  329. }
  330. static struct kfd_perf_attr perf_attr_iommu[] = {
  331. KFD_PERF_DESC(max_concurrent, 0),
  332. KFD_PERF_DESC(num_counters, 0),
  333. KFD_PERF_DESC(counter_ids, 0),
  334. };
  335. /****************************************/
  336. static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
  337. char *buffer)
  338. {
  339. struct kfd_topology_device *dev;
  340. uint32_t log_max_watch_addr;
  341. /* Making sure that the buffer is an empty string */
  342. buffer[0] = 0;
  343. if (strcmp(attr->name, "gpu_id") == 0) {
  344. dev = container_of(attr, struct kfd_topology_device,
  345. attr_gpuid);
  346. if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
  347. return -EPERM;
  348. return sysfs_show_32bit_val(buffer, dev->gpu_id);
  349. }
  350. if (strcmp(attr->name, "name") == 0) {
  351. dev = container_of(attr, struct kfd_topology_device,
  352. attr_name);
  353. if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
  354. return -EPERM;
  355. return sysfs_show_str_val(buffer, dev->node_props.name);
  356. }
  357. dev = container_of(attr, struct kfd_topology_device,
  358. attr_props);
  359. if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
  360. return -EPERM;
  361. sysfs_show_32bit_prop(buffer, "cpu_cores_count",
  362. dev->node_props.cpu_cores_count);
  363. sysfs_show_32bit_prop(buffer, "simd_count",
  364. dev->node_props.simd_count);
  365. sysfs_show_32bit_prop(buffer, "mem_banks_count",
  366. dev->node_props.mem_banks_count);
  367. sysfs_show_32bit_prop(buffer, "caches_count",
  368. dev->node_props.caches_count);
  369. sysfs_show_32bit_prop(buffer, "io_links_count",
  370. dev->node_props.io_links_count);
  371. sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
  372. dev->node_props.cpu_core_id_base);
  373. sysfs_show_32bit_prop(buffer, "simd_id_base",
  374. dev->node_props.simd_id_base);
  375. sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
  376. dev->node_props.max_waves_per_simd);
  377. sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
  378. dev->node_props.lds_size_in_kb);
  379. sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
  380. dev->node_props.gds_size_in_kb);
  381. sysfs_show_32bit_prop(buffer, "num_gws",
  382. dev->node_props.num_gws);
  383. sysfs_show_32bit_prop(buffer, "wave_front_size",
  384. dev->node_props.wave_front_size);
  385. sysfs_show_32bit_prop(buffer, "array_count",
  386. dev->node_props.array_count);
  387. sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
  388. dev->node_props.simd_arrays_per_engine);
  389. sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
  390. dev->node_props.cu_per_simd_array);
  391. sysfs_show_32bit_prop(buffer, "simd_per_cu",
  392. dev->node_props.simd_per_cu);
  393. sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
  394. dev->node_props.max_slots_scratch_cu);
  395. sysfs_show_32bit_prop(buffer, "vendor_id",
  396. dev->node_props.vendor_id);
  397. sysfs_show_32bit_prop(buffer, "device_id",
  398. dev->node_props.device_id);
  399. sysfs_show_32bit_prop(buffer, "location_id",
  400. dev->node_props.location_id);
  401. sysfs_show_32bit_prop(buffer, "drm_render_minor",
  402. dev->node_props.drm_render_minor);
  403. sysfs_show_64bit_prop(buffer, "hive_id",
  404. dev->node_props.hive_id);
  405. sysfs_show_32bit_prop(buffer, "num_sdma_engines",
  406. dev->node_props.num_sdma_engines);
  407. sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
  408. dev->node_props.num_sdma_xgmi_engines);
  409. sysfs_show_32bit_prop(buffer, "num_sdma_queues_per_engine",
  410. dev->node_props.num_sdma_queues_per_engine);
  411. sysfs_show_32bit_prop(buffer, "num_cp_queues",
  412. dev->node_props.num_cp_queues);
  413. sysfs_show_64bit_prop(buffer, "unique_id",
  414. dev->node_props.unique_id);
  415. if (dev->gpu) {
  416. log_max_watch_addr =
  417. __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
  418. if (log_max_watch_addr) {
  419. dev->node_props.capability |=
  420. HSA_CAP_WATCH_POINTS_SUPPORTED;
  421. dev->node_props.capability |=
  422. ((log_max_watch_addr <<
  423. HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
  424. HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
  425. }
  426. if (dev->gpu->device_info->asic_family == CHIP_TONGA)
  427. dev->node_props.capability |=
  428. HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
  429. sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
  430. dev->node_props.max_engine_clk_fcompute);
  431. sysfs_show_64bit_prop(buffer, "local_mem_size",
  432. (unsigned long long int) 0);
  433. sysfs_show_32bit_prop(buffer, "fw_version",
  434. dev->gpu->mec_fw_version);
  435. sysfs_show_32bit_prop(buffer, "capability",
  436. dev->node_props.capability);
  437. sysfs_show_32bit_prop(buffer, "sdma_fw_version",
  438. dev->gpu->sdma_fw_version);
  439. }
  440. return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
  441. cpufreq_quick_get_max(0)/1000);
  442. }
  443. static const struct sysfs_ops node_ops = {
  444. .show = node_show,
  445. };
  446. static struct kobj_type node_type = {
  447. .release = kfd_topology_kobj_release,
  448. .sysfs_ops = &node_ops,
  449. };
  450. static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
  451. {
  452. sysfs_remove_file(kobj, attr);
  453. kobject_del(kobj);
  454. kobject_put(kobj);
  455. }
  456. static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
  457. {
  458. struct kfd_iolink_properties *iolink;
  459. struct kfd_cache_properties *cache;
  460. struct kfd_mem_properties *mem;
  461. struct kfd_perf_properties *perf;
  462. if (dev->kobj_iolink) {
  463. list_for_each_entry(iolink, &dev->io_link_props, list)
  464. if (iolink->kobj) {
  465. kfd_remove_sysfs_file(iolink->kobj,
  466. &iolink->attr);
  467. iolink->kobj = NULL;
  468. }
  469. kobject_del(dev->kobj_iolink);
  470. kobject_put(dev->kobj_iolink);
  471. dev->kobj_iolink = NULL;
  472. }
  473. if (dev->kobj_cache) {
  474. list_for_each_entry(cache, &dev->cache_props, list)
  475. if (cache->kobj) {
  476. kfd_remove_sysfs_file(cache->kobj,
  477. &cache->attr);
  478. cache->kobj = NULL;
  479. }
  480. kobject_del(dev->kobj_cache);
  481. kobject_put(dev->kobj_cache);
  482. dev->kobj_cache = NULL;
  483. }
  484. if (dev->kobj_mem) {
  485. list_for_each_entry(mem, &dev->mem_props, list)
  486. if (mem->kobj) {
  487. kfd_remove_sysfs_file(mem->kobj, &mem->attr);
  488. mem->kobj = NULL;
  489. }
  490. kobject_del(dev->kobj_mem);
  491. kobject_put(dev->kobj_mem);
  492. dev->kobj_mem = NULL;
  493. }
  494. if (dev->kobj_perf) {
  495. list_for_each_entry(perf, &dev->perf_props, list) {
  496. kfree(perf->attr_group);
  497. perf->attr_group = NULL;
  498. }
  499. kobject_del(dev->kobj_perf);
  500. kobject_put(dev->kobj_perf);
  501. dev->kobj_perf = NULL;
  502. }
  503. if (dev->kobj_node) {
  504. sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
  505. sysfs_remove_file(dev->kobj_node, &dev->attr_name);
  506. sysfs_remove_file(dev->kobj_node, &dev->attr_props);
  507. kobject_del(dev->kobj_node);
  508. kobject_put(dev->kobj_node);
  509. dev->kobj_node = NULL;
  510. }
  511. }
  512. static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
  513. uint32_t id)
  514. {
  515. struct kfd_iolink_properties *iolink;
  516. struct kfd_cache_properties *cache;
  517. struct kfd_mem_properties *mem;
  518. struct kfd_perf_properties *perf;
  519. int ret;
  520. uint32_t i, num_attrs;
  521. struct attribute **attrs;
  522. if (WARN_ON(dev->kobj_node))
  523. return -EEXIST;
  524. /*
  525. * Creating the sysfs folders
  526. */
  527. dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
  528. if (!dev->kobj_node)
  529. return -ENOMEM;
  530. ret = kobject_init_and_add(dev->kobj_node, &node_type,
  531. sys_props.kobj_nodes, "%d", id);
  532. if (ret < 0)
  533. return ret;
  534. dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
  535. if (!dev->kobj_mem)
  536. return -ENOMEM;
  537. dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
  538. if (!dev->kobj_cache)
  539. return -ENOMEM;
  540. dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
  541. if (!dev->kobj_iolink)
  542. return -ENOMEM;
  543. dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
  544. if (!dev->kobj_perf)
  545. return -ENOMEM;
  546. /*
  547. * Creating sysfs files for node properties
  548. */
  549. dev->attr_gpuid.name = "gpu_id";
  550. dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
  551. sysfs_attr_init(&dev->attr_gpuid);
  552. dev->attr_name.name = "name";
  553. dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
  554. sysfs_attr_init(&dev->attr_name);
  555. dev->attr_props.name = "properties";
  556. dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
  557. sysfs_attr_init(&dev->attr_props);
  558. ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
  559. if (ret < 0)
  560. return ret;
  561. ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
  562. if (ret < 0)
  563. return ret;
  564. ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
  565. if (ret < 0)
  566. return ret;
  567. i = 0;
  568. list_for_each_entry(mem, &dev->mem_props, list) {
  569. mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
  570. if (!mem->kobj)
  571. return -ENOMEM;
  572. ret = kobject_init_and_add(mem->kobj, &mem_type,
  573. dev->kobj_mem, "%d", i);
  574. if (ret < 0)
  575. return ret;
  576. mem->attr.name = "properties";
  577. mem->attr.mode = KFD_SYSFS_FILE_MODE;
  578. sysfs_attr_init(&mem->attr);
  579. ret = sysfs_create_file(mem->kobj, &mem->attr);
  580. if (ret < 0)
  581. return ret;
  582. i++;
  583. }
  584. i = 0;
  585. list_for_each_entry(cache, &dev->cache_props, list) {
  586. cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
  587. if (!cache->kobj)
  588. return -ENOMEM;
  589. ret = kobject_init_and_add(cache->kobj, &cache_type,
  590. dev->kobj_cache, "%d", i);
  591. if (ret < 0)
  592. return ret;
  593. cache->attr.name = "properties";
  594. cache->attr.mode = KFD_SYSFS_FILE_MODE;
  595. sysfs_attr_init(&cache->attr);
  596. ret = sysfs_create_file(cache->kobj, &cache->attr);
  597. if (ret < 0)
  598. return ret;
  599. i++;
  600. }
  601. i = 0;
  602. list_for_each_entry(iolink, &dev->io_link_props, list) {
  603. iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
  604. if (!iolink->kobj)
  605. return -ENOMEM;
  606. ret = kobject_init_and_add(iolink->kobj, &iolink_type,
  607. dev->kobj_iolink, "%d", i);
  608. if (ret < 0)
  609. return ret;
  610. iolink->attr.name = "properties";
  611. iolink->attr.mode = KFD_SYSFS_FILE_MODE;
  612. sysfs_attr_init(&iolink->attr);
  613. ret = sysfs_create_file(iolink->kobj, &iolink->attr);
  614. if (ret < 0)
  615. return ret;
  616. i++;
  617. }
  618. /* All hardware blocks have the same number of attributes. */
  619. num_attrs = ARRAY_SIZE(perf_attr_iommu);
  620. list_for_each_entry(perf, &dev->perf_props, list) {
  621. perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
  622. * num_attrs + sizeof(struct attribute_group),
  623. GFP_KERNEL);
  624. if (!perf->attr_group)
  625. return -ENOMEM;
  626. attrs = (struct attribute **)(perf->attr_group + 1);
  627. if (!strcmp(perf->block_name, "iommu")) {
  628. /* Information of IOMMU's num_counters and counter_ids is shown
  629. * under /sys/bus/event_source/devices/amd_iommu. We don't
  630. * duplicate here.
  631. */
  632. perf_attr_iommu[0].data = perf->max_concurrent;
  633. for (i = 0; i < num_attrs; i++)
  634. attrs[i] = &perf_attr_iommu[i].attr.attr;
  635. }
  636. perf->attr_group->name = perf->block_name;
  637. perf->attr_group->attrs = attrs;
  638. ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
  639. if (ret < 0)
  640. return ret;
  641. }
  642. return 0;
  643. }
  644. /* Called with write topology lock acquired */
  645. static int kfd_build_sysfs_node_tree(void)
  646. {
  647. struct kfd_topology_device *dev;
  648. int ret;
  649. uint32_t i = 0;
  650. list_for_each_entry(dev, &topology_device_list, list) {
  651. ret = kfd_build_sysfs_node_entry(dev, i);
  652. if (ret < 0)
  653. return ret;
  654. i++;
  655. }
  656. return 0;
  657. }
  658. /* Called with write topology lock acquired */
  659. static void kfd_remove_sysfs_node_tree(void)
  660. {
  661. struct kfd_topology_device *dev;
  662. list_for_each_entry(dev, &topology_device_list, list)
  663. kfd_remove_sysfs_node_entry(dev);
  664. }
  665. static int kfd_topology_update_sysfs(void)
  666. {
  667. int ret;
  668. pr_info("Creating topology SYSFS entries\n");
  669. if (!sys_props.kobj_topology) {
  670. sys_props.kobj_topology =
  671. kfd_alloc_struct(sys_props.kobj_topology);
  672. if (!sys_props.kobj_topology)
  673. return -ENOMEM;
  674. ret = kobject_init_and_add(sys_props.kobj_topology,
  675. &sysprops_type, &kfd_device->kobj,
  676. "topology");
  677. if (ret < 0)
  678. return ret;
  679. sys_props.kobj_nodes = kobject_create_and_add("nodes",
  680. sys_props.kobj_topology);
  681. if (!sys_props.kobj_nodes)
  682. return -ENOMEM;
  683. sys_props.attr_genid.name = "generation_id";
  684. sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
  685. sysfs_attr_init(&sys_props.attr_genid);
  686. ret = sysfs_create_file(sys_props.kobj_topology,
  687. &sys_props.attr_genid);
  688. if (ret < 0)
  689. return ret;
  690. sys_props.attr_props.name = "system_properties";
  691. sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
  692. sysfs_attr_init(&sys_props.attr_props);
  693. ret = sysfs_create_file(sys_props.kobj_topology,
  694. &sys_props.attr_props);
  695. if (ret < 0)
  696. return ret;
  697. }
  698. kfd_remove_sysfs_node_tree();
  699. return kfd_build_sysfs_node_tree();
  700. }
  701. static void kfd_topology_release_sysfs(void)
  702. {
  703. kfd_remove_sysfs_node_tree();
  704. if (sys_props.kobj_topology) {
  705. sysfs_remove_file(sys_props.kobj_topology,
  706. &sys_props.attr_genid);
  707. sysfs_remove_file(sys_props.kobj_topology,
  708. &sys_props.attr_props);
  709. if (sys_props.kobj_nodes) {
  710. kobject_del(sys_props.kobj_nodes);
  711. kobject_put(sys_props.kobj_nodes);
  712. sys_props.kobj_nodes = NULL;
  713. }
  714. kobject_del(sys_props.kobj_topology);
  715. kobject_put(sys_props.kobj_topology);
  716. sys_props.kobj_topology = NULL;
  717. }
  718. }
  719. /* Called with write topology_lock acquired */
  720. static void kfd_topology_update_device_list(struct list_head *temp_list,
  721. struct list_head *master_list)
  722. {
  723. while (!list_empty(temp_list)) {
  724. list_move_tail(temp_list->next, master_list);
  725. sys_props.num_devices++;
  726. }
  727. }
  728. static void kfd_debug_print_topology(void)
  729. {
  730. struct kfd_topology_device *dev;
  731. down_read(&topology_lock);
  732. dev = list_last_entry(&topology_device_list,
  733. struct kfd_topology_device, list);
  734. if (dev) {
  735. if (dev->node_props.cpu_cores_count &&
  736. dev->node_props.simd_count) {
  737. pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
  738. dev->node_props.device_id,
  739. dev->node_props.vendor_id);
  740. } else if (dev->node_props.cpu_cores_count)
  741. pr_info("Topology: Add CPU node\n");
  742. else if (dev->node_props.simd_count)
  743. pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
  744. dev->node_props.device_id,
  745. dev->node_props.vendor_id);
  746. }
  747. up_read(&topology_lock);
  748. }
  749. /* Helper function for intializing platform_xx members of
  750. * kfd_system_properties. Uses OEM info from the last CPU/APU node.
  751. */
  752. static void kfd_update_system_properties(void)
  753. {
  754. struct kfd_topology_device *dev;
  755. down_read(&topology_lock);
  756. dev = list_last_entry(&topology_device_list,
  757. struct kfd_topology_device, list);
  758. if (dev) {
  759. sys_props.platform_id =
  760. (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
  761. sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
  762. sys_props.platform_rev = dev->oem_revision;
  763. }
  764. up_read(&topology_lock);
  765. }
  766. static void find_system_memory(const struct dmi_header *dm,
  767. void *private)
  768. {
  769. struct kfd_mem_properties *mem;
  770. u16 mem_width, mem_clock;
  771. struct kfd_topology_device *kdev =
  772. (struct kfd_topology_device *)private;
  773. const u8 *dmi_data = (const u8 *)(dm + 1);
  774. if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
  775. mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
  776. mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
  777. list_for_each_entry(mem, &kdev->mem_props, list) {
  778. if (mem_width != 0xFFFF && mem_width != 0)
  779. mem->width = mem_width;
  780. if (mem_clock != 0)
  781. mem->mem_clk_max = mem_clock;
  782. }
  783. }
  784. }
  785. /*
  786. * Performance counters information is not part of CRAT but we would like to
  787. * put them in the sysfs under topology directory for Thunk to get the data.
  788. * This function is called before updating the sysfs.
  789. */
  790. static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
  791. {
  792. /* These are the only counters supported so far */
  793. return kfd_iommu_add_perf_counters(kdev);
  794. }
  795. /* kfd_add_non_crat_information - Add information that is not currently
  796. * defined in CRAT but is necessary for KFD topology
  797. * @dev - topology device to which addition info is added
  798. */
  799. static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
  800. {
  801. /* Check if CPU only node. */
  802. if (!kdev->gpu) {
  803. /* Add system memory information */
  804. dmi_walk(find_system_memory, kdev);
  805. }
  806. /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
  807. }
  808. /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
  809. * Ignore CRAT for all other devices. AMD APU is identified if both CPU
  810. * and GPU cores are present.
  811. * @device_list - topology device list created by parsing ACPI CRAT table.
  812. * @return - TRUE if invalid, FALSE is valid.
  813. */
  814. static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
  815. {
  816. struct kfd_topology_device *dev;
  817. list_for_each_entry(dev, device_list, list) {
  818. if (dev->node_props.cpu_cores_count &&
  819. dev->node_props.simd_count)
  820. return false;
  821. }
  822. pr_info("Ignoring ACPI CRAT on non-APU system\n");
  823. return true;
  824. }
  825. int kfd_topology_init(void)
  826. {
  827. void *crat_image = NULL;
  828. size_t image_size = 0;
  829. int ret;
  830. struct list_head temp_topology_device_list;
  831. int cpu_only_node = 0;
  832. struct kfd_topology_device *kdev;
  833. int proximity_domain;
  834. /* topology_device_list - Master list of all topology devices
  835. * temp_topology_device_list - temporary list created while parsing CRAT
  836. * or VCRAT. Once parsing is complete the contents of list is moved to
  837. * topology_device_list
  838. */
  839. /* Initialize the head for the both the lists */
  840. INIT_LIST_HEAD(&topology_device_list);
  841. INIT_LIST_HEAD(&temp_topology_device_list);
  842. init_rwsem(&topology_lock);
  843. memset(&sys_props, 0, sizeof(sys_props));
  844. /* Proximity domains in ACPI CRAT tables start counting at
  845. * 0. The same should be true for virtual CRAT tables created
  846. * at this stage. GPUs added later in kfd_topology_add_device
  847. * use a counter.
  848. */
  849. proximity_domain = 0;
  850. /*
  851. * Get the CRAT image from the ACPI. If ACPI doesn't have one
  852. * or if ACPI CRAT is invalid create a virtual CRAT.
  853. * NOTE: The current implementation expects all AMD APUs to have
  854. * CRAT. If no CRAT is available, it is assumed to be a CPU
  855. */
  856. ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
  857. if (!ret) {
  858. ret = kfd_parse_crat_table(crat_image,
  859. &temp_topology_device_list,
  860. proximity_domain);
  861. if (ret ||
  862. kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
  863. kfd_release_topology_device_list(
  864. &temp_topology_device_list);
  865. kfd_destroy_crat_image(crat_image);
  866. crat_image = NULL;
  867. }
  868. }
  869. if (!crat_image) {
  870. ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
  871. COMPUTE_UNIT_CPU, NULL,
  872. proximity_domain);
  873. cpu_only_node = 1;
  874. if (ret) {
  875. pr_err("Error creating VCRAT table for CPU\n");
  876. return ret;
  877. }
  878. ret = kfd_parse_crat_table(crat_image,
  879. &temp_topology_device_list,
  880. proximity_domain);
  881. if (ret) {
  882. pr_err("Error parsing VCRAT table for CPU\n");
  883. goto err;
  884. }
  885. }
  886. kdev = list_first_entry(&temp_topology_device_list,
  887. struct kfd_topology_device, list);
  888. kfd_add_perf_to_topology(kdev);
  889. down_write(&topology_lock);
  890. kfd_topology_update_device_list(&temp_topology_device_list,
  891. &topology_device_list);
  892. atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
  893. ret = kfd_topology_update_sysfs();
  894. up_write(&topology_lock);
  895. if (!ret) {
  896. sys_props.generation_count++;
  897. kfd_update_system_properties();
  898. kfd_debug_print_topology();
  899. pr_info("Finished initializing topology\n");
  900. } else
  901. pr_err("Failed to update topology in sysfs ret=%d\n", ret);
  902. /* For nodes with GPU, this information gets added
  903. * when GPU is detected (kfd_topology_add_device).
  904. */
  905. if (cpu_only_node) {
  906. /* Add additional information to CPU only node created above */
  907. down_write(&topology_lock);
  908. kdev = list_first_entry(&topology_device_list,
  909. struct kfd_topology_device, list);
  910. up_write(&topology_lock);
  911. kfd_add_non_crat_information(kdev);
  912. }
  913. err:
  914. kfd_destroy_crat_image(crat_image);
  915. return ret;
  916. }
  917. void kfd_topology_shutdown(void)
  918. {
  919. down_write(&topology_lock);
  920. kfd_topology_release_sysfs();
  921. kfd_release_live_view();
  922. up_write(&topology_lock);
  923. }
  924. static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
  925. {
  926. uint32_t hashout;
  927. uint32_t buf[7];
  928. uint64_t local_mem_size;
  929. int i;
  930. struct kfd_local_mem_info local_mem_info;
  931. if (!gpu)
  932. return 0;
  933. amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
  934. local_mem_size = local_mem_info.local_mem_size_private +
  935. local_mem_info.local_mem_size_public;
  936. buf[0] = gpu->pdev->devfn;
  937. buf[1] = gpu->pdev->subsystem_vendor |
  938. (gpu->pdev->subsystem_device << 16);
  939. buf[2] = pci_domain_nr(gpu->pdev->bus);
  940. buf[3] = gpu->pdev->device;
  941. buf[4] = gpu->pdev->bus->number;
  942. buf[5] = lower_32_bits(local_mem_size);
  943. buf[6] = upper_32_bits(local_mem_size);
  944. for (i = 0, hashout = 0; i < 7; i++)
  945. hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
  946. return hashout;
  947. }
  948. /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
  949. * the GPU device is not already present in the topology device
  950. * list then return NULL. This means a new topology device has to
  951. * be created for this GPU.
  952. */
  953. static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
  954. {
  955. struct kfd_topology_device *dev;
  956. struct kfd_topology_device *out_dev = NULL;
  957. struct kfd_mem_properties *mem;
  958. struct kfd_cache_properties *cache;
  959. struct kfd_iolink_properties *iolink;
  960. down_write(&topology_lock);
  961. list_for_each_entry(dev, &topology_device_list, list) {
  962. /* Discrete GPUs need their own topology device list
  963. * entries. Don't assign them to CPU/APU nodes.
  964. */
  965. if (!gpu->device_info->needs_iommu_device &&
  966. dev->node_props.cpu_cores_count)
  967. continue;
  968. if (!dev->gpu && (dev->node_props.simd_count > 0)) {
  969. dev->gpu = gpu;
  970. out_dev = dev;
  971. list_for_each_entry(mem, &dev->mem_props, list)
  972. mem->gpu = dev->gpu;
  973. list_for_each_entry(cache, &dev->cache_props, list)
  974. cache->gpu = dev->gpu;
  975. list_for_each_entry(iolink, &dev->io_link_props, list)
  976. iolink->gpu = dev->gpu;
  977. break;
  978. }
  979. }
  980. up_write(&topology_lock);
  981. return out_dev;
  982. }
  983. static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
  984. {
  985. /*
  986. * TODO: Generate an event for thunk about the arrival/removal
  987. * of the GPU
  988. */
  989. }
  990. /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
  991. * patch this after CRAT parsing.
  992. */
  993. static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
  994. {
  995. struct kfd_mem_properties *mem;
  996. struct kfd_local_mem_info local_mem_info;
  997. if (!dev)
  998. return;
  999. /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
  1000. * single bank of VRAM local memory.
  1001. * for dGPUs - VCRAT reports only one bank of Local Memory
  1002. * for APUs - If CRAT from ACPI reports more than one bank, then
  1003. * all the banks will report the same mem_clk_max information
  1004. */
  1005. amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
  1006. list_for_each_entry(mem, &dev->mem_props, list)
  1007. mem->mem_clk_max = local_mem_info.mem_clk_max;
  1008. }
  1009. static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
  1010. {
  1011. struct kfd_iolink_properties *link, *cpu_link;
  1012. struct kfd_topology_device *cpu_dev;
  1013. uint32_t cap;
  1014. uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
  1015. uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
  1016. if (!dev || !dev->gpu)
  1017. return;
  1018. pcie_capability_read_dword(dev->gpu->pdev,
  1019. PCI_EXP_DEVCAP2, &cap);
  1020. if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
  1021. PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
  1022. cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
  1023. CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
  1024. if (!dev->gpu->pci_atomic_requested ||
  1025. dev->gpu->device_info->asic_family == CHIP_HAWAII)
  1026. flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
  1027. CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
  1028. /* GPU only creates direct links so apply flags setting to all */
  1029. list_for_each_entry(link, &dev->io_link_props, list) {
  1030. link->flags = flag;
  1031. cpu_dev = kfd_topology_device_by_proximity_domain(
  1032. link->node_to);
  1033. if (cpu_dev) {
  1034. list_for_each_entry(cpu_link,
  1035. &cpu_dev->io_link_props, list)
  1036. if (cpu_link->node_to == link->node_from)
  1037. cpu_link->flags = cpu_flag;
  1038. }
  1039. }
  1040. }
  1041. int kfd_topology_add_device(struct kfd_dev *gpu)
  1042. {
  1043. uint32_t gpu_id;
  1044. struct kfd_topology_device *dev;
  1045. struct kfd_cu_info cu_info;
  1046. int res = 0;
  1047. struct list_head temp_topology_device_list;
  1048. void *crat_image = NULL;
  1049. size_t image_size = 0;
  1050. int proximity_domain;
  1051. struct amdgpu_ras *ctx;
  1052. INIT_LIST_HEAD(&temp_topology_device_list);
  1053. gpu_id = kfd_generate_gpu_id(gpu);
  1054. pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
  1055. proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
  1056. /* Check to see if this gpu device exists in the topology_device_list.
  1057. * If so, assign the gpu to that device,
  1058. * else create a Virtual CRAT for this gpu device and then parse that
  1059. * CRAT to create a new topology device. Once created assign the gpu to
  1060. * that topology device
  1061. */
  1062. dev = kfd_assign_gpu(gpu);
  1063. if (!dev) {
  1064. res = kfd_create_crat_image_virtual(&crat_image, &image_size,
  1065. COMPUTE_UNIT_GPU, gpu,
  1066. proximity_domain);
  1067. if (res) {
  1068. pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
  1069. gpu_id);
  1070. return res;
  1071. }
  1072. res = kfd_parse_crat_table(crat_image,
  1073. &temp_topology_device_list,
  1074. proximity_domain);
  1075. if (res) {
  1076. pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
  1077. gpu_id);
  1078. goto err;
  1079. }
  1080. down_write(&topology_lock);
  1081. kfd_topology_update_device_list(&temp_topology_device_list,
  1082. &topology_device_list);
  1083. /* Update the SYSFS tree, since we added another topology
  1084. * device
  1085. */
  1086. res = kfd_topology_update_sysfs();
  1087. up_write(&topology_lock);
  1088. if (!res)
  1089. sys_props.generation_count++;
  1090. else
  1091. pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
  1092. gpu_id, res);
  1093. dev = kfd_assign_gpu(gpu);
  1094. if (WARN_ON(!dev)) {
  1095. res = -ENODEV;
  1096. goto err;
  1097. }
  1098. }
  1099. dev->gpu_id = gpu_id;
  1100. gpu->id = gpu_id;
  1101. /* TODO: Move the following lines to function
  1102. * kfd_add_non_crat_information
  1103. */
  1104. /* Fill-in additional information that is not available in CRAT but
  1105. * needed for the topology
  1106. */
  1107. amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
  1108. strncpy(dev->node_props.name, gpu->device_info->asic_name,
  1109. KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
  1110. dev->node_props.simd_arrays_per_engine =
  1111. cu_info.num_shader_arrays_per_engine;
  1112. dev->node_props.vendor_id = gpu->pdev->vendor;
  1113. dev->node_props.device_id = gpu->pdev->device;
  1114. dev->node_props.location_id = pci_dev_id(gpu->pdev);
  1115. dev->node_props.max_engine_clk_fcompute =
  1116. amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
  1117. dev->node_props.max_engine_clk_ccompute =
  1118. cpufreq_quick_get_max(0) / 1000;
  1119. dev->node_props.drm_render_minor =
  1120. gpu->shared_resources.drm_render_minor;
  1121. dev->node_props.hive_id = gpu->hive_id;
  1122. dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
  1123. dev->node_props.num_sdma_xgmi_engines =
  1124. gpu->device_info->num_xgmi_sdma_engines;
  1125. dev->node_props.num_sdma_queues_per_engine =
  1126. gpu->device_info->num_sdma_queues_per_engine;
  1127. dev->node_props.num_gws = (hws_gws_support &&
  1128. dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
  1129. amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
  1130. dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
  1131. dev->node_props.unique_id = gpu->unique_id;
  1132. kfd_fill_mem_clk_max_info(dev);
  1133. kfd_fill_iolink_non_crat_info(dev);
  1134. switch (dev->gpu->device_info->asic_family) {
  1135. case CHIP_KAVERI:
  1136. case CHIP_HAWAII:
  1137. case CHIP_TONGA:
  1138. dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
  1139. HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
  1140. HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
  1141. break;
  1142. case CHIP_CARRIZO:
  1143. case CHIP_FIJI:
  1144. case CHIP_POLARIS10:
  1145. case CHIP_POLARIS11:
  1146. case CHIP_POLARIS12:
  1147. case CHIP_VEGAM:
  1148. pr_debug("Adding doorbell packet type capability\n");
  1149. dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
  1150. HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
  1151. HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
  1152. break;
  1153. case CHIP_VEGA10:
  1154. case CHIP_VEGA12:
  1155. case CHIP_VEGA20:
  1156. case CHIP_RAVEN:
  1157. case CHIP_RENOIR:
  1158. case CHIP_ARCTURUS:
  1159. case CHIP_NAVI10:
  1160. case CHIP_NAVI12:
  1161. case CHIP_NAVI14:
  1162. dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
  1163. HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
  1164. HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
  1165. break;
  1166. default:
  1167. WARN(1, "Unexpected ASIC family %u",
  1168. dev->gpu->device_info->asic_family);
  1169. }
  1170. /*
  1171. * Overwrite ATS capability according to needs_iommu_device to fix
  1172. * potential missing corresponding bit in CRAT of BIOS.
  1173. */
  1174. if (dev->gpu->device_info->needs_iommu_device)
  1175. dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
  1176. else
  1177. dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
  1178. /* Fix errors in CZ CRAT.
  1179. * simd_count: Carrizo CRAT reports wrong simd_count, probably
  1180. * because it doesn't consider masked out CUs
  1181. * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
  1182. */
  1183. if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
  1184. dev->node_props.simd_count =
  1185. cu_info.simd_per_cu * cu_info.cu_active_number;
  1186. dev->node_props.max_waves_per_simd = 10;
  1187. }
  1188. ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
  1189. if (ctx) {
  1190. /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
  1191. dev->node_props.capability |=
  1192. (((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
  1193. ((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
  1194. HSA_CAP_SRAM_EDCSUPPORTED : 0;
  1195. dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
  1196. HSA_CAP_MEM_EDCSUPPORTED : 0;
  1197. dev->node_props.capability |= (ctx->features != 0) ?
  1198. HSA_CAP_RASEVENTNOTIFY : 0;
  1199. }
  1200. kfd_debug_print_topology();
  1201. if (!res)
  1202. kfd_notify_gpu_change(gpu_id, 1);
  1203. err:
  1204. kfd_destroy_crat_image(crat_image);
  1205. return res;
  1206. }
  1207. int kfd_topology_remove_device(struct kfd_dev *gpu)
  1208. {
  1209. struct kfd_topology_device *dev, *tmp;
  1210. uint32_t gpu_id;
  1211. int res = -ENODEV;
  1212. down_write(&topology_lock);
  1213. list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
  1214. if (dev->gpu == gpu) {
  1215. gpu_id = dev->gpu_id;
  1216. kfd_remove_sysfs_node_entry(dev);
  1217. kfd_release_topology_device(dev);
  1218. sys_props.num_devices--;
  1219. res = 0;
  1220. if (kfd_topology_update_sysfs() < 0)
  1221. kfd_topology_release_sysfs();
  1222. break;
  1223. }
  1224. up_write(&topology_lock);
  1225. if (!res)
  1226. kfd_notify_gpu_change(gpu_id, 0);
  1227. return res;
  1228. }
  1229. /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
  1230. * topology. If GPU device is found @idx, then valid kfd_dev pointer is
  1231. * returned through @kdev
  1232. * Return - 0: On success (@kdev will be NULL for non GPU nodes)
  1233. * -1: If end of list
  1234. */
  1235. int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
  1236. {
  1237. struct kfd_topology_device *top_dev;
  1238. uint8_t device_idx = 0;
  1239. *kdev = NULL;
  1240. down_read(&topology_lock);
  1241. list_for_each_entry(top_dev, &topology_device_list, list) {
  1242. if (device_idx == idx) {
  1243. *kdev = top_dev->gpu;
  1244. up_read(&topology_lock);
  1245. return 0;
  1246. }
  1247. device_idx++;
  1248. }
  1249. up_read(&topology_lock);
  1250. return -1;
  1251. }
  1252. static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
  1253. {
  1254. int first_cpu_of_numa_node;
  1255. if (!cpumask || cpumask == cpu_none_mask)
  1256. return -1;
  1257. first_cpu_of_numa_node = cpumask_first(cpumask);
  1258. if (first_cpu_of_numa_node >= nr_cpu_ids)
  1259. return -1;
  1260. #ifdef CONFIG_X86_64
  1261. return cpu_data(first_cpu_of_numa_node).apicid;
  1262. #else
  1263. return first_cpu_of_numa_node;
  1264. #endif
  1265. }
  1266. /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
  1267. * of the given NUMA node (numa_node_id)
  1268. * Return -1 on failure
  1269. */
  1270. int kfd_numa_node_to_apic_id(int numa_node_id)
  1271. {
  1272. if (numa_node_id == -1) {
  1273. pr_warn("Invalid NUMA Node. Use online CPU mask\n");
  1274. return kfd_cpumask_to_apic_id(cpu_online_mask);
  1275. }
  1276. return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
  1277. }
  1278. #if defined(CONFIG_DEBUG_FS)
  1279. int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
  1280. {
  1281. struct kfd_topology_device *dev;
  1282. unsigned int i = 0;
  1283. int r = 0;
  1284. down_read(&topology_lock);
  1285. list_for_each_entry(dev, &topology_device_list, list) {
  1286. if (!dev->gpu) {
  1287. i++;
  1288. continue;
  1289. }
  1290. seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
  1291. r = dqm_debugfs_hqds(m, dev->gpu->dqm);
  1292. if (r)
  1293. break;
  1294. }
  1295. up_read(&topology_lock);
  1296. return r;
  1297. }
  1298. int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
  1299. {
  1300. struct kfd_topology_device *dev;
  1301. unsigned int i = 0;
  1302. int r = 0;
  1303. down_read(&topology_lock);
  1304. list_for_each_entry(dev, &topology_device_list, list) {
  1305. if (!dev->gpu) {
  1306. i++;
  1307. continue;
  1308. }
  1309. seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
  1310. r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
  1311. if (r)
  1312. break;
  1313. }
  1314. up_read(&topology_lock);
  1315. return r;
  1316. }
  1317. #endif