398 lines
10 KiB
C
398 lines
10 KiB
C
|
// SPDX-License-Identifier: MIT
|
||
|
/*
|
||
|
* Copyright © 2022 Intel Corporation
|
||
|
*/
|
||
|
|
||
|
#include "i915_selftest.h"
|
||
|
|
||
|
#include "gem/i915_gem_internal.h"
|
||
|
#include "gem/i915_gem_region.h"
|
||
|
|
||
|
#include "gen8_engine_cs.h"
|
||
|
#include "i915_gem_ww.h"
|
||
|
#include "intel_engine_regs.h"
|
||
|
#include "intel_gpu_commands.h"
|
||
|
#include "intel_context.h"
|
||
|
#include "intel_gt.h"
|
||
|
#include "intel_ring.h"
|
||
|
|
||
|
#include "selftests/igt_flush_test.h"
|
||
|
#include "selftests/i915_random.h"
|
||
|
|
||
|
static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
|
||
|
{
|
||
|
GEM_BUG_ON(addr < i915_vma_offset(vma));
|
||
|
GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
|
||
|
memset64(page_mask_bits(vma->obj->mm.mapping) +
|
||
|
(addr - i915_vma_offset(vma)), val, 1);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
pte_tlbinv(struct intel_context *ce,
|
||
|
struct i915_vma *va,
|
||
|
struct i915_vma *vb,
|
||
|
u64 align,
|
||
|
void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
|
||
|
u64 length,
|
||
|
struct rnd_state *prng)
|
||
|
{
|
||
|
const unsigned int pat_index =
|
||
|
i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE);
|
||
|
struct drm_i915_gem_object *batch;
|
||
|
struct drm_mm_node vb_node;
|
||
|
struct i915_request *rq;
|
||
|
struct i915_vma *vma;
|
||
|
u64 addr;
|
||
|
int err;
|
||
|
u32 *cs;
|
||
|
|
||
|
batch = i915_gem_object_create_internal(ce->vm->i915, 4096);
|
||
|
if (IS_ERR(batch))
|
||
|
return PTR_ERR(batch);
|
||
|
|
||
|
vma = i915_vma_instance(batch, ce->vm, NULL);
|
||
|
if (IS_ERR(vma)) {
|
||
|
err = PTR_ERR(vma);
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||
|
if (err)
|
||
|
goto out;
|
||
|
|
||
|
/* Pin va at random but aligned offset after vma */
|
||
|
addr = round_up(vma->node.start + vma->node.size, align);
|
||
|
/* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
|
||
|
addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)),
|
||
|
va->size, align);
|
||
|
err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER);
|
||
|
if (err) {
|
||
|
pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
|
||
|
goto out;
|
||
|
}
|
||
|
GEM_BUG_ON(i915_vma_offset(va) != addr);
|
||
|
if (vb != va) {
|
||
|
vb_node = vb->node;
|
||
|
vb->node = va->node; /* overwrites the _same_ PTE */
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Now choose random dword at the 1st pinned page.
|
||
|
*
|
||
|
* SZ_64K pages on dg1 require that the whole PT be marked
|
||
|
* containing 64KiB entries. So we make sure that vma
|
||
|
* covers the whole PT, despite being randomly aligned to 64KiB
|
||
|
* and restrict our sampling to the 2MiB PT within where
|
||
|
* we know that we will be using 64KiB pages.
|
||
|
*/
|
||
|
if (align == SZ_64K)
|
||
|
addr = round_up(addr, SZ_2M);
|
||
|
addr = igt_random_offset(prng, addr, addr + align, 8, 8);
|
||
|
|
||
|
if (va != vb)
|
||
|
pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
|
||
|
ce->engine->name, va->obj->mm.region->name ?: "smem",
|
||
|
addr, align, va->resource->page_sizes_gtt,
|
||
|
va->page_sizes.phys, va->page_sizes.sg,
|
||
|
addr & -length, length);
|
||
|
|
||
|
cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
|
||
|
*cs++ = MI_NOOP; /* for later termination */
|
||
|
/*
|
||
|
* Sample the target to see if we spot the updated backing store.
|
||
|
* Gen8 VCS compares immediate value with bitwise-and of two
|
||
|
* consecutive DWORDS pointed by addr, other gen/engines compare value
|
||
|
* with DWORD pointed by addr. Moreover we want to exercise DWORD size
|
||
|
* invalidations. To fulfill all these requirements below values
|
||
|
* have been chosen.
|
||
|
*/
|
||
|
*cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
|
||
|
*cs++ = 0; /* break if *addr == 0 */
|
||
|
*cs++ = lower_32_bits(addr);
|
||
|
*cs++ = upper_32_bits(addr);
|
||
|
vma_set_qw(va, addr, -1);
|
||
|
vma_set_qw(vb, addr, 0);
|
||
|
|
||
|
/* Keep sampling until we get bored */
|
||
|
*cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
|
||
|
*cs++ = lower_32_bits(i915_vma_offset(vma));
|
||
|
*cs++ = upper_32_bits(i915_vma_offset(vma));
|
||
|
|
||
|
i915_gem_object_flush_map(batch);
|
||
|
|
||
|
rq = i915_request_create(ce);
|
||
|
if (IS_ERR(rq)) {
|
||
|
err = PTR_ERR(rq);
|
||
|
goto out_va;
|
||
|
}
|
||
|
|
||
|
err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
|
||
|
if (err) {
|
||
|
i915_request_add(rq);
|
||
|
goto out_va;
|
||
|
}
|
||
|
|
||
|
i915_request_get(rq);
|
||
|
i915_request_add(rq);
|
||
|
|
||
|
/* Short sleep to sanitycheck the batch is spinning before we begin */
|
||
|
msleep(10);
|
||
|
if (va == vb) {
|
||
|
if (!i915_request_completed(rq)) {
|
||
|
pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
|
||
|
ce->engine->name, va->obj->mm.region->name ?: "smem",
|
||
|
addr, align, va->resource->page_sizes_gtt,
|
||
|
va->page_sizes.phys, va->page_sizes.sg);
|
||
|
err = -EIO;
|
||
|
}
|
||
|
} else if (!i915_request_completed(rq)) {
|
||
|
struct i915_vma_resource vb_res = {
|
||
|
.bi.pages = vb->obj->mm.pages,
|
||
|
.bi.page_sizes = vb->obj->mm.page_sizes,
|
||
|
.start = i915_vma_offset(vb),
|
||
|
.vma_size = i915_vma_size(vb)
|
||
|
};
|
||
|
unsigned int pte_flags = 0;
|
||
|
|
||
|
/* Flip the PTE between A and B */
|
||
|
if (i915_gem_object_is_lmem(vb->obj))
|
||
|
pte_flags |= PTE_LM;
|
||
|
ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
|
||
|
|
||
|
/* Flush the PTE update to concurrent HW */
|
||
|
tlbinv(ce->vm, addr & -length, length);
|
||
|
|
||
|
if (wait_for(i915_request_completed(rq), HZ / 2)) {
|
||
|
pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
|
||
|
ce->engine->name);
|
||
|
err = -EINVAL;
|
||
|
}
|
||
|
} else {
|
||
|
pr_err("Spinner ended unexpectedly\n");
|
||
|
err = -EIO;
|
||
|
}
|
||
|
i915_request_put(rq);
|
||
|
|
||
|
cs = page_mask_bits(batch->mm.mapping);
|
||
|
*cs = MI_BATCH_BUFFER_END;
|
||
|
wmb();
|
||
|
|
||
|
out_va:
|
||
|
if (vb != va)
|
||
|
vb->node = vb_node;
|
||
|
i915_vma_unpin(va);
|
||
|
if (i915_vma_unbind_unlocked(va))
|
||
|
err = -EIO;
|
||
|
out:
|
||
|
i915_gem_object_put(batch);
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
|
||
|
{
|
||
|
struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
|
||
|
resource_size_t size = SZ_1G;
|
||
|
|
||
|
/*
|
||
|
* Allocation of largest possible page size allows to test all types
|
||
|
* of pages. To succeed with both allocations, especially in case of Small
|
||
|
* BAR, try to allocate no more than quarter of mappable memory.
|
||
|
*/
|
||
|
if (mr && size > mr->io_size / 4)
|
||
|
size = mr->io_size / 4;
|
||
|
|
||
|
return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
|
||
|
}
|
||
|
|
||
|
static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
|
||
|
{
|
||
|
/*
|
||
|
* SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
|
||
|
* While that does not require the whole 2M block to be contiguous
|
||
|
* it is easier to make it so, since we need that for SZ_2M pagees.
|
||
|
* Since we randomly offset the start of the vma, we need a 4M object
|
||
|
* so that there is a 2M range within it is suitable for SZ_64K PTE.
|
||
|
*/
|
||
|
return i915_gem_object_create_internal(gt->i915, SZ_4M);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
mem_tlbinv(struct intel_gt *gt,
|
||
|
struct drm_i915_gem_object *(*create_fn)(struct intel_gt *),
|
||
|
void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length))
|
||
|
{
|
||
|
unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
|
||
|
struct intel_engine_cs *engine;
|
||
|
struct drm_i915_gem_object *A, *B;
|
||
|
struct i915_ppgtt *ppgtt;
|
||
|
struct i915_vma *va, *vb;
|
||
|
enum intel_engine_id id;
|
||
|
I915_RND_STATE(prng);
|
||
|
void *vaddr;
|
||
|
int err;
|
||
|
|
||
|
/*
|
||
|
* Check that the TLB invalidate is able to revoke an active
|
||
|
* page. We load a page into a spinning COND_BBE loop and then
|
||
|
* remap that page to a new physical address. The old address, and
|
||
|
* so the loop keeps spinning, is retained in the TLB cache until
|
||
|
* we issue an invalidate.
|
||
|
*/
|
||
|
|
||
|
A = create_fn(gt);
|
||
|
if (IS_ERR(A))
|
||
|
return PTR_ERR(A);
|
||
|
|
||
|
vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC);
|
||
|
if (IS_ERR(vaddr)) {
|
||
|
err = PTR_ERR(vaddr);
|
||
|
goto out_a;
|
||
|
}
|
||
|
|
||
|
B = create_fn(gt);
|
||
|
if (IS_ERR(B)) {
|
||
|
err = PTR_ERR(B);
|
||
|
goto out_a;
|
||
|
}
|
||
|
|
||
|
vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC);
|
||
|
if (IS_ERR(vaddr)) {
|
||
|
err = PTR_ERR(vaddr);
|
||
|
goto out_b;
|
||
|
}
|
||
|
|
||
|
GEM_BUG_ON(A->base.size != B->base.size);
|
||
|
if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1))
|
||
|
pr_warn("Failed to allocate contiguous pages for size %zx\n",
|
||
|
A->base.size);
|
||
|
|
||
|
ppgtt = i915_ppgtt_create(gt, 0);
|
||
|
if (IS_ERR(ppgtt)) {
|
||
|
err = PTR_ERR(ppgtt);
|
||
|
goto out_b;
|
||
|
}
|
||
|
|
||
|
va = i915_vma_instance(A, &ppgtt->vm, NULL);
|
||
|
if (IS_ERR(va)) {
|
||
|
err = PTR_ERR(va);
|
||
|
goto out_vm;
|
||
|
}
|
||
|
|
||
|
vb = i915_vma_instance(B, &ppgtt->vm, NULL);
|
||
|
if (IS_ERR(vb)) {
|
||
|
err = PTR_ERR(vb);
|
||
|
goto out_vm;
|
||
|
}
|
||
|
|
||
|
err = 0;
|
||
|
for_each_engine(engine, gt, id) {
|
||
|
struct i915_gem_ww_ctx ww;
|
||
|
struct intel_context *ce;
|
||
|
int bit;
|
||
|
|
||
|
ce = intel_context_create(engine);
|
||
|
if (IS_ERR(ce)) {
|
||
|
err = PTR_ERR(ce);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
i915_vm_put(ce->vm);
|
||
|
ce->vm = i915_vm_get(&ppgtt->vm);
|
||
|
|
||
|
for_i915_gem_ww(&ww, err, true)
|
||
|
err = intel_context_pin_ww(ce, &ww);
|
||
|
if (err)
|
||
|
goto err_put;
|
||
|
|
||
|
for_each_set_bit(bit,
|
||
|
(unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
|
||
|
BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
|
||
|
unsigned int len;
|
||
|
|
||
|
if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj))
|
||
|
continue;
|
||
|
|
||
|
/* sanitycheck the semaphore wake up */
|
||
|
err = pte_tlbinv(ce, va, va,
|
||
|
BIT_ULL(bit),
|
||
|
NULL, SZ_4K,
|
||
|
&prng);
|
||
|
if (err)
|
||
|
goto err_unpin;
|
||
|
|
||
|
for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) {
|
||
|
err = pte_tlbinv(ce, va, vb,
|
||
|
BIT_ULL(bit),
|
||
|
tlbinv,
|
||
|
BIT_ULL(len),
|
||
|
&prng);
|
||
|
if (err)
|
||
|
goto err_unpin;
|
||
|
if (len == ppgtt_size)
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
err_unpin:
|
||
|
intel_context_unpin(ce);
|
||
|
err_put:
|
||
|
intel_context_put(ce);
|
||
|
if (err)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (igt_flush_test(gt->i915))
|
||
|
err = -EIO;
|
||
|
|
||
|
out_vm:
|
||
|
i915_vm_put(&ppgtt->vm);
|
||
|
out_b:
|
||
|
i915_gem_object_put(B);
|
||
|
out_a:
|
||
|
i915_gem_object_put(A);
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
|
||
|
{
|
||
|
intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
|
||
|
}
|
||
|
|
||
|
static int invalidate_full(void *arg)
|
||
|
{
|
||
|
struct intel_gt *gt = arg;
|
||
|
int err;
|
||
|
|
||
|
if (GRAPHICS_VER(gt->i915) < 8)
|
||
|
return 0; /* TLB invalidate not implemented */
|
||
|
|
||
|
err = mem_tlbinv(gt, create_smem, tlbinv_full);
|
||
|
if (err == 0)
|
||
|
err = mem_tlbinv(gt, create_lmem, tlbinv_full);
|
||
|
if (err == -ENODEV || err == -ENXIO)
|
||
|
err = 0;
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
int intel_tlb_live_selftests(struct drm_i915_private *i915)
|
||
|
{
|
||
|
static const struct i915_subtest tests[] = {
|
||
|
SUBTEST(invalidate_full),
|
||
|
};
|
||
|
struct intel_gt *gt;
|
||
|
unsigned int i;
|
||
|
|
||
|
for_each_gt(gt, i915, i) {
|
||
|
int err;
|
||
|
|
||
|
if (intel_gt_is_wedged(gt))
|
||
|
continue;
|
||
|
|
||
|
err = intel_gt_live_subtests(tests, gt);
|
||
|
if (err)
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|