atomic64_add(incr, >->stats.counters[id]);
}
+#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
+
static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
- "svm_pagefault_count",
- "tlb_inval_count",
- "vma_pagefault_count",
- "vma_pagefault_kb",
+ DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"),
+ DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"),
+ DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"),
+ DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"),
+ DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"),
+ DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"),
+ DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"),
+ DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"),
+ DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"),
+ DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"),
+ DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"),
+ DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"),
+ DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"),
+ DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"),
+ DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"),
+ DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"),
};
/**
enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_US,
XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_KB,
+ XE_GT_STATS_ID_SVM_CPU_COPY_KB,
+ XE_GT_STATS_ID_SVM_4K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_64K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_2M_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_4K_BIND_US,
+ XE_GT_STATS_ID_SVM_64K_BIND_US,
+ XE_GT_STATS_ID_SVM_2M_BIND_US,
/* must be the last entry */
__XE_GT_STATS_NUM_IDS,
};
#include <drm/drm_drv.h>
#include "xe_bo.h"
+#include "xe_exec_queue_types.h"
#include "xe_gt_stats.h"
#include "xe_migrate.h"
#include "xe_module.h"
&vm->svm.garbage_collector.work);
}
+static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt)
+{
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1);
+}
+
static u8
xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
const struct mmu_notifier_range *mmu_range,
*/
for_each_tile(tile, xe, id)
if (xe_pt_zap_ptes_range(tile, vm, range)) {
- tile_mask |= BIT(id);
/*
* WRITE_ONCE pairs with READ_ONCE in
* xe_vm_has_valid_gpu_mapping()
*/
WRITE_ONCE(range->tile_invalidated,
range->tile_invalidated | BIT(id));
+
+ if (!(tile_mask & BIT(id))) {
+ xe_svm_tlb_inval_count_stats_incr(tile->primary_gt);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_count_stats_incr(tile->media_gt);
+ tile_mask |= BIT(id);
+ }
}
return tile_mask;
mmu_range);
}
+static s64 xe_svm_stats_ktime_us_delta(ktime_t start)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ?
+ ktime_us_delta(ktime_get(), start) : 0;
+}
+
+static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta);
+}
+
+static ktime_t xe_svm_stats_ktime_get(void)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0;
+}
+
static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_notifier *notifier,
const struct mmu_notifier_range *mmu_range)
struct xe_vm *vm = gpusvm_to_vm(gpusvm);
struct xe_device *xe = vm->xe;
struct drm_gpusvm_range *r, *first;
+ struct xe_tile *tile;
+ ktime_t start = xe_svm_stats_ktime_get();
u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
- u8 tile_mask = 0;
+ u8 tile_mask = 0, id;
long err;
xe_svm_assert_in_notifier(vm);
r = first;
drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
xe_svm_range_notifier_event_end(vm, r, mmu_range);
+ for_each_tile(tile, xe, id) {
+ if (tile_mask & BIT(id)) {
+ xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start);
+ }
+ }
}
static int __xe_svm_garbage_collector(struct xe_vm *vm,
XE_SVM_COPY_TO_SRAM,
};
+static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ int kb)
+{
+ if (dir == XE_SVM_COPY_TO_VRAM)
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb);
+ else
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb);
+}
+
+static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ unsigned long npages,
+ ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ if (dir == XE_SVM_COPY_TO_VRAM) {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ us_delta);
+ } else {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ us_delta);
+ }
+}
+
static int xe_svm_copy(struct page **pages,
struct drm_pagemap_addr *pagemap_addr,
unsigned long npages, const enum xe_svm_copy_dir dir)
{
struct xe_vram_region *vr = NULL;
+ struct xe_gt *gt = NULL;
struct xe_device *xe;
struct dma_fence *fence = NULL;
unsigned long i;
u64 vram_addr = XE_VRAM_ADDR_INVALID;
int err = 0, pos = 0;
bool sram = dir == XE_SVM_COPY_TO_SRAM;
+ ktime_t start = xe_svm_stats_ktime_get();
/*
* This flow is complex: it locates physically contiguous device pages,
if (!vr && spage) {
vr = page_to_vr(spage);
+ gt = xe_migrate_exec_queue(vr->migrate)->gt;
xe = vr->xe;
}
XE_WARN_ON(spage && page_to_vr(spage) != vr);
int incr = (match && last) ? 1 : 0;
if (vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (i - pos + incr) *
+ (PAGE_SIZE / SZ_1K));
if (sram) {
vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
/* Extra mismatched device page, copy it */
if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (PAGE_SIZE / SZ_1K));
if (sram) {
vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
dma_fence_put(fence);
}
+ /*
+ * XXX: We can't derive the GT here (or anywhere in this functions, but
+ * compute always uses the primary GT so accumlate stats on the likely
+ * GT of the fault.
+ */
+ if (gt)
+ xe_svm_copy_us_stats_incr(gt, dir, npages, start);
+
return err;
#undef XE_MIGRATE_CHUNK_SIZE
#undef XE_VRAM_ADDR_INVALID
return true;
}
+#define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \
+static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range) \
+{ \
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE)
+
+#define DECL_SVM_RANGE_US_STATS(elem, stat) \
+static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range, \
+ ktime_t start) \
+{ \
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start); \
+\
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \
+ us_delta); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_US_STATS(migrate, MIGRATE)
+DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES)
+DECL_SVM_RANGE_US_STATS(bind, BIND)
+DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT)
+
static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct xe_gt *gt, u64 fault_addr,
bool need_vram)
struct xe_tile *tile = gt_to_tile(gt);
int migrate_try_count = ctx.devmem_only ? 3 : 1;
ktime_t end = 0;
+ ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start;
int err;
lockdep_assert_held_write(&vm->lock);
if (IS_ERR(range))
return PTR_ERR(range);
- if (ctx.devmem_only && !range->base.flags.migrate_devmem)
- return -EACCES;
+ xe_svm_range_fault_count_stats_incr(gt, range);
- if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
- return 0;
+ if (ctx.devmem_only && !range->base.flags.migrate_devmem) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) {
+ xe_svm_range_valid_fault_count_stats_incr(gt, range);
+ range_debug(range, "PAGE FAULT - VALID");
+ goto out;
+ }
range_debug(range, "PAGE FAULT");
dpagemap = xe_vma_resolve_pagemap(vma, tile);
if (--migrate_try_count >= 0 &&
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
+ ktime_t migrate_start = xe_svm_stats_ktime_get();
+
/* TODO : For multi-device dpagemap will be used to find the
* remote tile and remote device. Will need to modify
* xe_svm_alloc_vram to use dpagemap for future multi-device
* support.
*/
+ xe_svm_range_migrate_count_stats_incr(gt, range);
err = xe_svm_alloc_vram(tile, range, &ctx);
+ xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
if (migrate_try_count || !ctx.devmem_only) {
}
}
+ get_pages_start = xe_svm_stats_ktime_get();
+
range_debug(range, "GET PAGES");
err = xe_svm_range_get_pages(vm, range, &ctx);
/* Corner where CPU mappings have changed */
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
- goto err_out;
+ goto out;
}
+ xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start);
range_debug(range, "PAGE FAULT - BIND");
+ bind_start = xe_svm_stats_ktime_get();
retry_bind:
xe_vm_lock(vm, false);
fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
}
if (xe_vm_validate_should_retry(NULL, err, &end))
goto retry_bind;
- goto err_out;
+ goto out;
}
xe_vm_unlock(vm);
dma_fence_wait(fence, false);
dma_fence_put(fence);
+ xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
-err_out:
+out:
+ xe_svm_range_fault_us_stats_incr(gt, range, start);
return err;
}