]> www.infradead.org Git - users/hch/block.git/commitdiff
drm/amdgpu: Use NPS ranges from discovery table
authorLijo Lazar <lijo.lazar@amd.com>
Thu, 9 May 2024 11:32:33 +0000 (17:02 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 17 May 2024 21:40:36 +0000 (17:40 -0400)
Add GMC API to fetch NPS range information from discovery table. Use NPS
range information in GMC 9.4.3 SOCs when available, otherwise fallback
to software method.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 824dd5b57d0d397db46e514da58c8d802aa9b19f..7446b351895f49968386952199606ef8358d3a72 100644 (file)
@@ -367,6 +367,35 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
        }
 }
 
+static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
+                                          struct binary_header *bhdr)
+{
+       struct table_info *info;
+       uint16_t checksum;
+       uint16_t offset;
+
+       info = &bhdr->table_list[NPS_INFO];
+       offset = le16_to_cpu(info->offset);
+       checksum = le16_to_cpu(info->checksum);
+
+       struct nps_info_header *nhdr =
+               (struct nps_info_header *)(adev->mman.discovery_bin + offset);
+
+       if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
+               dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
+               return -EINVAL;
+       }
+
+       if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+                                             le32_to_cpu(nhdr->size_bytes),
+                                             checksum)) {
+               dev_dbg(adev->dev, "invalid nps info data table checksum\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int amdgpu_discovery_init(struct amdgpu_device *adev)
 {
        struct table_info *info;
@@ -1667,6 +1696,69 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
        return 0;
 }
 
+union nps_info {
+       struct nps_info_v1_0 v1;
+};
+
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+                                 uint32_t *nps_type,
+                                 struct amdgpu_gmc_memrange **ranges,
+                                 int *range_cnt)
+{
+       struct amdgpu_gmc_memrange *mem_ranges;
+       struct binary_header *bhdr;
+       union nps_info *nps_info;
+       u16 offset;
+       int i;
+
+       if (!nps_type || !range_cnt || !ranges)
+               return -EINVAL;
+
+       if (!adev->mman.discovery_bin) {
+               dev_err(adev->dev,
+                       "fetch mem range failed, ip discovery uninitialized\n");
+               return -EINVAL;
+       }
+
+       bhdr = (struct binary_header *)adev->mman.discovery_bin;
+       offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+
+       if (!offset)
+               return -ENOENT;
+
+       /* If verification fails, return as if NPS table doesn't exist */
+       if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+               return -ENOENT;
+
+       nps_info = (union nps_info *)(adev->mman.discovery_bin + offset);
+
+       switch (le16_to_cpu(nps_info->v1.header.version_major)) {
+       case 1:
+               *nps_type = nps_info->v1.nps_type;
+               *range_cnt = nps_info->v1.count;
+               mem_ranges = kvzalloc(
+                       *range_cnt * sizeof(struct amdgpu_gmc_memrange),
+                       GFP_KERNEL);
+               for (i = 0; i < *range_cnt; i++) {
+                       mem_ranges[i].base_address =
+                               nps_info->v1.instance_info[i].base_address;
+                       mem_ranges[i].limit_address =
+                               nps_info->v1.instance_info[i].limit_address;
+                       mem_ranges[i].nid_mask = -1;
+                       mem_ranges[i].flags = 0;
+               }
+               *ranges = mem_ranges;
+               break;
+       default:
+               dev_err(adev->dev, "Unhandled NPS info table %d.%d\n",
+                       le16_to_cpu(nps_info->v1.header.version_major),
+                       le16_to_cpu(nps_info->v1.header.version_minor));
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
 {
        /* what IP to use for this? */
index 4d03cd5b3410f97952eddd44735ee58b734b4cfb..f5d36525ec3efa97d6fe4ed1bbbea974761b1f9c 100644 (file)
@@ -30,4 +30,9 @@
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
 int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
 
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+                                 uint32_t *nps_type,
+                                 struct amdgpu_gmc_memrange **ranges,
+                                 int *range_cnt);
+
 #endif /* __AMDGPU_DISCOVERY__ */
index f8ed886ffca36df6157d8406e0f13d7b7fff477c..78cd31e929c63988791dcad6a41f910234a31a46 100644 (file)
@@ -1165,3 +1165,79 @@ void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
 {
        device_remove_file(adev->dev, &dev_attr_current_memory_partition);
 }
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+                                struct amdgpu_mem_partition_info *mem_ranges,
+                                int exp_ranges)
+{
+       struct amdgpu_gmc_memrange *ranges;
+       int range_cnt, ret, i, j;
+       uint32_t nps_type;
+
+       if (!mem_ranges)
+               return -EINVAL;
+
+       ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
+                                           &range_cnt);
+
+       if (ret)
+               return ret;
+
+       /* TODO: For now, expect ranges and partition count to be the same.
+        * Adjust if there are holes expected in any NPS domain.
+        */
+       if (range_cnt != exp_ranges) {
+               dev_warn(
+                       adev->dev,
+                       "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
+                       exp_ranges, nps_type, range_cnt);
+               ret = -EINVAL;
+               goto err;
+       }
+
+       for (i = 0; i < exp_ranges; ++i) {
+               if (ranges[i].base_address >= ranges[i].limit_address) {
+                       dev_warn(
+                               adev->dev,
+                               "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
+                               nps_type, i, ranges[i].base_address,
+                               ranges[i].limit_address);
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               /* Check for overlaps, not expecting any now */
+               for (j = i - 1; j >= 0; j--) {
+                       if (max(ranges[j].base_address,
+                               ranges[i].base_address) <=
+                           min(ranges[j].limit_address,
+                               ranges[i].limit_address)) {
+                               dev_warn(
+                                       adev->dev,
+                                       "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
+                                       ranges[j].base_address,
+                                       ranges[j].limit_address,
+                                       ranges[i].base_address,
+                                       ranges[i].limit_address);
+                               ret = -EINVAL;
+                               goto err;
+                       }
+               }
+
+               mem_ranges[i].range.fpfn =
+                       (ranges[i].base_address -
+                        adev->vm_manager.vram_base_offset) >>
+                       AMDGPU_GPU_PAGE_SHIFT;
+               mem_ranges[i].range.lpfn =
+                       (ranges[i].limit_address -
+                        adev->vm_manager.vram_base_offset) >>
+                       AMDGPU_GPU_PAGE_SHIFT;
+               mem_ranges[i].size =
+                       ranges[i].limit_address - ranges[i].base_address + 1;
+       }
+
+err:
+       kfree(ranges);
+
+       return ret;
+}
index 17f40ea1104b00bcfd1596337a2848b9dc5ef210..febca3130497ea23485f4ad48abf6843ceda8946 100644 (file)
@@ -199,6 +199,13 @@ struct amdgpu_mem_partition_info {
 
 #define INVALID_PFN    -1
 
+struct amdgpu_gmc_memrange {
+       uint64_t base_address;
+       uint64_t limit_address;
+       uint32_t flags;
+       int nid_mask;
+};
+
 enum amdgpu_gart_placement {
        AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
        AMDGPU_GART_PLACEMENT_HIGH,
@@ -439,4 +446,8 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
 int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
 void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
 
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+                                struct amdgpu_mem_partition_info *mem_ranges,
+                                int exp_ranges);
+
 #endif
index 7c4e2adae7b3e8d948dcc440aa8ca458c4740eee..094c08cb98e7639b28f97dbafc0fe7536aa11c90 100644 (file)
@@ -1895,7 +1895,7 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
 {
        enum amdgpu_memory_partition mode;
        u32 start_addr = 0, size;
-       int i;
+       int i, r, l;
 
        mode = gmc_v9_0_query_memory_partition(adev);
 
@@ -1918,23 +1918,39 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
                break;
        }
 
-       size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
-       size /= adev->gmc.num_mem_partitions;
+       /* Use NPS range info, if populated */
+       r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
+                                        adev->gmc.num_mem_partitions);
+       if (!r) {
+               l = 0;
+               for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
+                       if (mem_ranges[i].range.lpfn >
+                           mem_ranges[i - 1].range.lpfn)
+                               l = i;
+               }
+
+       } else {
+               /* Fallback to sw based calculation */
+               size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
+               size /= adev->gmc.num_mem_partitions;
+
+               for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+                       mem_ranges[i].range.fpfn = start_addr;
+                       mem_ranges[i].size =
+                               ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+                       mem_ranges[i].range.lpfn = start_addr + size - 1;
+                       start_addr += size;
+               }
 
-       for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
-               mem_ranges[i].range.fpfn = start_addr;
-               mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
-               mem_ranges[i].range.lpfn = start_addr + size - 1;
-               start_addr += size;
+               l = adev->gmc.num_mem_partitions - 1;
        }
 
        /* Adjust the last one */
-       mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn =
+       mem_ranges[l].range.lpfn =
                (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
-       mem_ranges[adev->gmc.num_mem_partitions - 1].size =
+       mem_ranges[l].size =
                adev->gmc.real_vram_size -
-               ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn
-                << AMDGPU_GPU_PAGE_SHIFT);
+               ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
 }
 
 static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)