linux-zen-server/drivers/gpu/drm/etnaviv/etnaviv_buffer.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2014-2018 Etnaviv Project
 */

#include <drm/drm_drv.h>

#include "etnaviv_cmdbuf.h"
#include "etnaviv_gpu.h"
#include "etnaviv_gem.h"
#include "etnaviv_mmu.h"

#include "common.xml.h"
#include "state.xml.h"
#include "state_blt.xml.h"
#include "state_hi.xml.h"
#include "state_3d.xml.h"
#include "cmdstream.xml.h"

/*
 * Command Buffer helper:
 */


static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
{
	u32 *vaddr = (u32 *)buffer->vaddr;

	BUG_ON(buffer->user_size >= buffer->size);

	vaddr[buffer->user_size / 4] = data;
	buffer->user_size += 4;
}

static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
	u32 reg, u32 value)
{
	u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;

	buffer->user_size = ALIGN(buffer->user_size, 8);

	/* write a register via cmd stream */
	OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
		    VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
		    VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
	OUT(buffer, value);
}

static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
{
	buffer->user_size = ALIGN(buffer->user_size, 8);

	OUT(buffer, VIV_FE_END_HEADER_OP_END);
}

static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
{
	buffer->user_size = ALIGN(buffer->user_size, 8);

	OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
}

static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
	u16 prefetch, u32 address)
{
	buffer->user_size = ALIGN(buffer->user_size, 8);

	OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
		    VIV_FE_LINK_HEADER_PREFETCH(prefetch));
	OUT(buffer, address);
}

static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
	u32 from, u32 to)
{
	buffer->user_size = ALIGN(buffer->user_size, 8);

	OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
	OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
}

static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
{
	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
		       VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
		       VIVS_GL_SEMAPHORE_TOKEN_TO(to));
}

static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
	struct etnaviv_cmdbuf *buffer, u8 pipe)
{
	u32 flush = 0;

	lockdep_assert_held(&gpu->lock);

	/*
	 * This assumes that if we're switching to 2D, we're switching
	 * away from 3D, and vice versa.  Hence, if we're switching to
	 * the 2D core, we need to flush the 3D depth and color caches,
	 * otherwise we need to flush the 2D pixel engine cache.
	 */
	if (gpu->exec_state == ETNA_PIPE_2D)
		flush = VIVS_GL_FLUSH_CACHE_PE2D;
	else if (gpu->exec_state == ETNA_PIPE_3D)
		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;

	CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);

	CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
		       VIVS_GL_PIPE_SELECT_PIPE(pipe));
}

static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
	struct etnaviv_cmdbuf *buf, u32 off, u32 len)
{
	u32 size = buf->size;
	u32 *ptr = buf->vaddr + off;

	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
			ptr, etnaviv_cmdbuf_get_va(buf,
			&gpu->mmu_context->cmdbuf_mapping) +
			off, size - len * 4 - off);

	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
			ptr, len * 4, 0);
}

/*
 * Safely replace the WAIT of a waitlink with a new command and argument.
 * The GPU may be executing this WAIT while we're modifying it, so we have
 * to write it in a specific order to avoid the GPU branching to somewhere
 * else.  'wl_offset' is the offset to the first byte of the WAIT command.
 */
static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
	unsigned int wl_offset, u32 cmd, u32 arg)
{
	u32 *lw = buffer->vaddr + wl_offset;

	lw[1] = arg;
	mb();
	lw[0] = cmd;
	mb();
}

/*
 * Ensure that there is space in the command buffer to contiguously write
 * 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
 */
static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
	struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
{
	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
		buffer->user_size = 0;

	return etnaviv_cmdbuf_get_va(buffer,
				     &gpu->mmu_context->cmdbuf_mapping) +
	       buffer->user_size;
}

u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
{
	struct etnaviv_cmdbuf *buffer = &gpu->buffer;

	lockdep_assert_held(&gpu->lock);

	/* initialize buffer */
	buffer->user_size = 0;

	CMD_WAIT(buffer);
	CMD_LINK(buffer, 2,
		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
		 + buffer->user_size - 4);

	return buffer->user_size / 8;
}

u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
{
	struct etnaviv_cmdbuf *buffer = &gpu->buffer;

	lockdep_assert_held(&gpu->lock);

	buffer->user_size = 0;

	if (gpu->identity.features & chipFeatures_PIPE_3D) {
		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_3D));
		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
	}

	if (gpu->identity.features & chipFeatures_PIPE_2D) {
		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_2D));
		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
	}

	CMD_END(buffer);

	buffer->user_size = ALIGN(buffer->user_size, 8);

	return buffer->user_size / 8;
}

u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
{
	struct etnaviv_cmdbuf *buffer = &gpu->buffer;

	lockdep_assert_held(&gpu->lock);

	buffer->user_size = 0;

	CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
		       VIVS_MMUv2_PTA_CONFIG_INDEX(id));

	CMD_END(buffer);

	buffer->user_size = ALIGN(buffer->user_size, 8);

	return buffer->user_size / 8;
}

void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
{
	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
	unsigned int waitlink_offset = buffer->user_size - 16;
	u32 link_target, flush = 0;
	bool has_blt = !!(gpu->identity.minor_features5 &
			  chipMinorFeatures5_BLT_ENGINE);

	lockdep_assert_held(&gpu->lock);

	if (gpu->exec_state == ETNA_PIPE_2D)
		flush = VIVS_GL_FLUSH_CACHE_PE2D;
	else if (gpu->exec_state == ETNA_PIPE_3D)
		flush = VIVS_GL_FLUSH_CACHE_DEPTH |
			VIVS_GL_FLUSH_CACHE_COLOR |
			VIVS_GL_FLUSH_CACHE_TEXTURE |
			VIVS_GL_FLUSH_CACHE_TEXTUREVS |
			VIVS_GL_FLUSH_CACHE_SHADER_L2;

	if (flush) {
		unsigned int dwords = 7;

		if (has_blt)
			dwords += 10;

		link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);

		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
		if (has_blt) {
			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
		}
		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
		if (gpu->exec_state == ETNA_PIPE_3D) {
			if (has_blt) {
				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
				CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
			} else {
				CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
					       VIVS_TS_FLUSH_CACHE_FLUSH);
			}
		}
		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
		if (has_blt) {
			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
		}
		CMD_END(buffer);

		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
					    VIV_FE_LINK_HEADER_OP_LINK |
					    VIV_FE_LINK_HEADER_PREFETCH(dwords),
					    link_target);
	} else {
		/* Replace the last link-wait with an "END" command */
		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
					    VIV_FE_END_HEADER_OP_END, 0);
	}
}

/* Append a 'sync point' to the ring buffer. */
void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
{
	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
	unsigned int waitlink_offset = buffer->user_size - 16;
	u32 dwords, target;

	lockdep_assert_held(&gpu->lock);

	/*
	 * We need at most 3 dwords in the return target:
	 * 1 event + 1 end + 1 wait + 1 link.
	 */
	dwords = 4;
	target = etnaviv_buffer_reserve(gpu, buffer, dwords);

	/* Signal sync point event */
	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
		       VIVS_GL_EVENT_FROM_PE);

	/* Stop the FE to 'pause' the GPU */
	CMD_END(buffer);

	/* Append waitlink */
	CMD_WAIT(buffer);
	CMD_LINK(buffer, 2,
		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
		 + buffer->user_size - 4);

	/*
	 * Kick off the 'sync point' command by replacing the previous
	 * WAIT with a link to the address in the ring buffer.
	 */
	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
				    VIV_FE_LINK_HEADER_OP_LINK |
				    VIV_FE_LINK_HEADER_PREFETCH(dwords),
				    target);
}

/* Append a command buffer to the ring buffer. */
void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
	struct etnaviv_iommu_context *mmu_context, unsigned int event,
	struct etnaviv_cmdbuf *cmdbuf)
{
	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
	unsigned int waitlink_offset = buffer->user_size - 16;
	u32 return_target, return_dwords;
	u32 link_target, link_dwords;
	bool switch_context = gpu->exec_state != exec_state;
	bool switch_mmu_context = gpu->mmu_context != mmu_context;
	unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
	bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;
	bool has_blt = !!(gpu->identity.minor_features5 &
			  chipMinorFeatures5_BLT_ENGINE);

	lockdep_assert_held(&gpu->lock);

	if (drm_debug_enabled(DRM_UT_DRIVER))
		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);

	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
					    &gpu->mmu_context->cmdbuf_mapping);
	link_dwords = cmdbuf->size / 8;

	/*
	 * If we need maintenance prior to submitting this buffer, we will
	 * need to append a mmu flush load state, followed by a new
	 * link to this buffer - a total of four additional words.
	 */
	if (need_flush || switch_context) {
		u32 target, extra_dwords;

		/* link command */
		extra_dwords = 1;

		/* flush command */
		if (need_flush) {
			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
				extra_dwords += 1;
			else
				extra_dwords += 3;
		}

		/* pipe switch commands */
		if (switch_context)
			extra_dwords += 4;

		/* PTA load command */
		if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
			extra_dwords += 1;

		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
		/*
		 * Switch MMU context if necessary. Must be done after the
		 * link target has been calculated, as the jump forward in the
		 * kernel ring still uses the last active MMU context before
		 * the switch.
		 */
		if (switch_mmu_context) {
			struct etnaviv_iommu_context *old_context = gpu->mmu_context;

			gpu->mmu_context = etnaviv_iommu_context_get(mmu_context);
			etnaviv_iommu_context_put(old_context);
		}

		if (need_flush) {
			/* Add the MMU flush */
			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
				CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
					       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
					       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
					       VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
					       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
					       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
			} else {
				u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
					    VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;

				if (switch_mmu_context &&
				    gpu->sec_mode == ETNA_SEC_KERNEL) {
					unsigned short id =
						etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
					CMD_LOAD_STATE(buffer,
						VIVS_MMUv2_PTA_CONFIG,
						VIVS_MMUv2_PTA_CONFIG_INDEX(id));
				}

				if (gpu->sec_mode == ETNA_SEC_NONE)
					flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);

				CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
					       flush);
				CMD_SEM(buffer, SYNC_RECIPIENT_FE,
					SYNC_RECIPIENT_PE);
				CMD_STALL(buffer, SYNC_RECIPIENT_FE,
					SYNC_RECIPIENT_PE);
			}

			gpu->flush_seq = new_flush_seq;
		}

		if (switch_context) {
			etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
			gpu->exec_state = exec_state;
		}

		/* And the link to the submitted buffer */
		link_target = etnaviv_cmdbuf_get_va(cmdbuf,
					&gpu->mmu_context->cmdbuf_mapping);
		CMD_LINK(buffer, link_dwords, link_target);

		/* Update the link target to point to above instructions */
		link_target = target;
		link_dwords = extra_dwords;
	}

	/*
	 * Append a LINK to the submitted command buffer to return to
	 * the ring buffer.  return_target is the ring target address.
	 * We need at most 7 dwords in the return target: 2 cache flush +
	 * 2 semaphore stall + 1 event + 1 wait + 1 link.
	 */
	return_dwords = 7;

	/*
	 * When the BLT engine is present we need 6 more dwords in the return
	 * target: 3 enable/flush/disable + 4 enable/semaphore stall/disable,
	 * but we don't need the normal TS flush state.
	 */
	if (has_blt)
		return_dwords += 6;

	return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
	CMD_LINK(cmdbuf, return_dwords, return_target);

	/*
	 * Append a cache flush, stall, event, wait and link pointing back to
	 * the wait command to the ring buffer.
	 */
	if (gpu->exec_state == ETNA_PIPE_2D) {
		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
				       VIVS_GL_FLUSH_CACHE_PE2D);
	} else {
		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
				       VIVS_GL_FLUSH_CACHE_DEPTH |
				       VIVS_GL_FLUSH_CACHE_COLOR);
		if (has_blt) {
			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
			CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
		} else {
			CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
					       VIVS_TS_FLUSH_CACHE_FLUSH);
		}
	}
	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);

	if (has_blt) {
		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
	}

	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
		       VIVS_GL_EVENT_FROM_PE);
	CMD_WAIT(buffer);
	CMD_LINK(buffer, 2,
		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
		 + buffer->user_size - 4);

	if (drm_debug_enabled(DRM_UT_DRIVER))
		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
			return_target,
			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
			cmdbuf->vaddr);

	if (drm_debug_enabled(DRM_UT_DRIVER)) {
		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
			       cmdbuf->vaddr, cmdbuf->size, 0);

		pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
		pr_info("addr: 0x%08x\n", link_target);
		pr_info("back: 0x%08x\n", return_target);
		pr_info("event: %d\n", event);
	}

	/*
	 * Kick off the submitted command by replacing the previous
	 * WAIT with a link to the address in the ring buffer.
	 */
	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
				    VIV_FE_LINK_HEADER_OP_LINK |
				    VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
				    link_target);

	if (drm_debug_enabled(DRM_UT_DRIVER))
		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
}
Initial commit 2023-08-30 17:53:23 +02:00			`// SPDX-License-Identifier: GPL-2.0`
			`/*`
			`* Copyright (C) 2014-2018 Etnaviv Project`
			`*/`

			`#include <drm/drm_drv.h>`

			`#include "etnaviv_cmdbuf.h"`
			`#include "etnaviv_gpu.h"`
			`#include "etnaviv_gem.h"`
			`#include "etnaviv_mmu.h"`

			`#include "common.xml.h"`
			`#include "state.xml.h"`
			`#include "state_blt.xml.h"`
			`#include "state_hi.xml.h"`
			`#include "state_3d.xml.h"`
			`#include "cmdstream.xml.h"`

			`/*`
			`* Command Buffer helper:`
			`*/`


			`static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)`
			`{`
			`u32 vaddr = (u32 )buffer->vaddr;`

			`BUG_ON(buffer->user_size >= buffer->size);`

			`vaddr[buffer->user_size / 4] = data;`
			`buffer->user_size += 4;`
			`}`

			`static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,`
			`u32 reg, u32 value)`
			`{`
			`u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;`

			`buffer->user_size = ALIGN(buffer->user_size, 8);`

			`/* write a register via cmd stream */`
			`OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE \|`
			`VIV_FE_LOAD_STATE_HEADER_COUNT(1) \|`
			`VIV_FE_LOAD_STATE_HEADER_OFFSET(index));`
			`OUT(buffer, value);`
			`}`

			`static inline void CMD_END(struct etnaviv_cmdbuf *buffer)`
			`{`
			`buffer->user_size = ALIGN(buffer->user_size, 8);`

			`OUT(buffer, VIV_FE_END_HEADER_OP_END);`
			`}`

			`static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)`
			`{`
			`buffer->user_size = ALIGN(buffer->user_size, 8);`

			`OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT \| 200);`
			`}`

			`static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,`
			`u16 prefetch, u32 address)`
			`{`
			`buffer->user_size = ALIGN(buffer->user_size, 8);`

			`OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK \|`
			`VIV_FE_LINK_HEADER_PREFETCH(prefetch));`
			`OUT(buffer, address);`
			`}`

			`static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,`
			`u32 from, u32 to)`
			`{`
			`buffer->user_size = ALIGN(buffer->user_size, 8);`

			`OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);`
			`OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) \| VIV_FE_STALL_TOKEN_TO(to));`
			`}`

			`static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)`
			`{`
			`CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,`
			`VIVS_GL_SEMAPHORE_TOKEN_FROM(from) \|`
			`VIVS_GL_SEMAPHORE_TOKEN_TO(to));`
			`}`

			`static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,`
			`struct etnaviv_cmdbuf *buffer, u8 pipe)`
			`{`
			`u32 flush = 0;`

			`lockdep_assert_held(&gpu->lock);`

			`/*`
			`* This assumes that if we're switching to 2D, we're switching`
			`* away from 3D, and vice versa. Hence, if we're switching to`
			`* the 2D core, we need to flush the 3D depth and color caches,`
			`* otherwise we need to flush the 2D pixel engine cache.`
			`*/`
			`if (gpu->exec_state == ETNA_PIPE_2D)`
			`flush = VIVS_GL_FLUSH_CACHE_PE2D;`
			`else if (gpu->exec_state == ETNA_PIPE_3D)`
			`flush = VIVS_GL_FLUSH_CACHE_DEPTH \| VIVS_GL_FLUSH_CACHE_COLOR;`

			`CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`

			`CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,`
			`VIVS_GL_PIPE_SELECT_PIPE(pipe));`
			`}`

			`static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,`
			`struct etnaviv_cmdbuf *buf, u32 off, u32 len)`
			`{`
			`u32 size = buf->size;`
			`u32 *ptr = buf->vaddr + off;`

			`dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",`
			`ptr, etnaviv_cmdbuf_get_va(buf,`
			`&gpu->mmu_context->cmdbuf_mapping) +`
			`off, size - len * 4 - off);`

			`print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,`
			`ptr, len * 4, 0);`
			`}`

			`/*`
			`* Safely replace the WAIT of a waitlink with a new command and argument.`
			`* The GPU may be executing this WAIT while we're modifying it, so we have`
			`* to write it in a specific order to avoid the GPU branching to somewhere`
			`* else. 'wl_offset' is the offset to the first byte of the WAIT command.`
			`*/`
			`static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,`
			`unsigned int wl_offset, u32 cmd, u32 arg)`
			`{`
			`u32 *lw = buffer->vaddr + wl_offset;`

			`lw[1] = arg;`
			`mb();`
			`lw[0] = cmd;`
			`mb();`
			`}`

			`/*`
			`* Ensure that there is space in the command buffer to contiguously write`
			`* 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.`
			`*/`
			`static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,`
			`struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)`
			`{`
			`if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)`
			`buffer->user_size = 0;`

			`return etnaviv_cmdbuf_get_va(buffer,`
			`&gpu->mmu_context->cmdbuf_mapping) +`
			`buffer->user_size;`
			`}`

			`u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)`
			`{`
			`struct etnaviv_cmdbuf *buffer = &gpu->buffer;`

			`lockdep_assert_held(&gpu->lock);`

			`/* initialize buffer */`
			`buffer->user_size = 0;`

			`CMD_WAIT(buffer);`
			`CMD_LINK(buffer, 2,`
			`etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)`
			`+ buffer->user_size - 4);`

			`return buffer->user_size / 8;`
			`}`

			`u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)`
			`{`
			`struct etnaviv_cmdbuf *buffer = &gpu->buffer;`

			`lockdep_assert_held(&gpu->lock);`

			`buffer->user_size = 0;`

			`if (gpu->identity.features & chipFeatures_PIPE_3D) {`
			`CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,`
			`VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_3D));`
			`CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,`
			`mtlb_addr \| VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);`
			`CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`}`

			`if (gpu->identity.features & chipFeatures_PIPE_2D) {`
			`CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,`
			`VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_2D));`
			`CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,`
			`mtlb_addr \| VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);`
			`CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`}`

			`CMD_END(buffer);`

			`buffer->user_size = ALIGN(buffer->user_size, 8);`

			`return buffer->user_size / 8;`
			`}`

			`u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)`
			`{`
			`struct etnaviv_cmdbuf *buffer = &gpu->buffer;`

			`lockdep_assert_held(&gpu->lock);`

			`buffer->user_size = 0;`

			`CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,`
			`VIVS_MMUv2_PTA_CONFIG_INDEX(id));`

			`CMD_END(buffer);`

			`buffer->user_size = ALIGN(buffer->user_size, 8);`

			`return buffer->user_size / 8;`
			`}`

			`void etnaviv_buffer_end(struct etnaviv_gpu *gpu)`
			`{`
			`struct etnaviv_cmdbuf *buffer = &gpu->buffer;`
			`unsigned int waitlink_offset = buffer->user_size - 16;`
			`u32 link_target, flush = 0;`
			`bool has_blt = !!(gpu->identity.minor_features5 &`
			`chipMinorFeatures5_BLT_ENGINE);`

			`lockdep_assert_held(&gpu->lock);`

			`if (gpu->exec_state == ETNA_PIPE_2D)`
			`flush = VIVS_GL_FLUSH_CACHE_PE2D;`
			`else if (gpu->exec_state == ETNA_PIPE_3D)`
			`flush = VIVS_GL_FLUSH_CACHE_DEPTH \|`
			`VIVS_GL_FLUSH_CACHE_COLOR \|`
			`VIVS_GL_FLUSH_CACHE_TEXTURE \|`
			`VIVS_GL_FLUSH_CACHE_TEXTUREVS \|`
			`VIVS_GL_FLUSH_CACHE_SHADER_L2;`

			`if (flush) {`
			`unsigned int dwords = 7;`

			`if (has_blt)`
			`dwords += 10;`

			`link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);`

			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`if (has_blt) {`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);`
			`}`
			`CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);`
			`if (gpu->exec_state == ETNA_PIPE_3D) {`
			`if (has_blt) {`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);`
			`} else {`
			`CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,`
			`VIVS_TS_FLUSH_CACHE_FLUSH);`
			`}`
			`}`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`if (has_blt) {`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);`
			`}`
			`CMD_END(buffer);`

			`etnaviv_buffer_replace_wait(buffer, waitlink_offset,`
			`VIV_FE_LINK_HEADER_OP_LINK \|`
			`VIV_FE_LINK_HEADER_PREFETCH(dwords),`
			`link_target);`
			`} else {`
			`/* Replace the last link-wait with an "END" command */`
			`etnaviv_buffer_replace_wait(buffer, waitlink_offset,`
			`VIV_FE_END_HEADER_OP_END, 0);`
			`}`
			`}`

			`/* Append a 'sync point' to the ring buffer. */`
			`void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)`
			`{`
			`struct etnaviv_cmdbuf *buffer = &gpu->buffer;`
			`unsigned int waitlink_offset = buffer->user_size - 16;`
			`u32 dwords, target;`

			`lockdep_assert_held(&gpu->lock);`

			`/*`
			`* We need at most 3 dwords in the return target:`
			`* 1 event + 1 end + 1 wait + 1 link.`
			`*/`
			`dwords = 4;`
			`target = etnaviv_buffer_reserve(gpu, buffer, dwords);`

			`/* Signal sync point event */`
			`CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) \|`
			`VIVS_GL_EVENT_FROM_PE);`

			`/* Stop the FE to 'pause' the GPU */`
			`CMD_END(buffer);`

			`/* Append waitlink */`
			`CMD_WAIT(buffer);`
			`CMD_LINK(buffer, 2,`
			`etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)`
			`+ buffer->user_size - 4);`

			`/*`
			`* Kick off the 'sync point' command by replacing the previous`
			`* WAIT with a link to the address in the ring buffer.`
			`*/`
			`etnaviv_buffer_replace_wait(buffer, waitlink_offset,`
			`VIV_FE_LINK_HEADER_OP_LINK \|`
			`VIV_FE_LINK_HEADER_PREFETCH(dwords),`
			`target);`
			`}`

			`/* Append a command buffer to the ring buffer. */`
			`void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,`
			`struct etnaviv_iommu_context *mmu_context, unsigned int event,`
			`struct etnaviv_cmdbuf *cmdbuf)`
			`{`
			`struct etnaviv_cmdbuf *buffer = &gpu->buffer;`
			`unsigned int waitlink_offset = buffer->user_size - 16;`
			`u32 return_target, return_dwords;`
			`u32 link_target, link_dwords;`
			`bool switch_context = gpu->exec_state != exec_state;`
			`bool switch_mmu_context = gpu->mmu_context != mmu_context;`
			`unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);`
			`bool need_flush = switch_mmu_context \|\| gpu->flush_seq != new_flush_seq;`
			`bool has_blt = !!(gpu->identity.minor_features5 &`
			`chipMinorFeatures5_BLT_ENGINE);`

			`lockdep_assert_held(&gpu->lock);`

			`if (drm_debug_enabled(DRM_UT_DRIVER))`
			`etnaviv_buffer_dump(gpu, buffer, 0, 0x50);`

			`link_target = etnaviv_cmdbuf_get_va(cmdbuf,`
			`&gpu->mmu_context->cmdbuf_mapping);`
			`link_dwords = cmdbuf->size / 8;`

			`/*`
			`* If we need maintenance prior to submitting this buffer, we will`
			`* need to append a mmu flush load state, followed by a new`
			`* link to this buffer - a total of four additional words.`
			`*/`
			`if (need_flush \|\| switch_context) {`
			`u32 target, extra_dwords;`

			`/* link command */`
			`extra_dwords = 1;`

			`/* flush command */`
			`if (need_flush) {`
			`if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)`
			`extra_dwords += 1;`
			`else`
			`extra_dwords += 3;`
			`}`

			`/* pipe switch commands */`
			`if (switch_context)`
			`extra_dwords += 4;`

			`/* PTA load command */`
			`if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)`
			`extra_dwords += 1;`

			`target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);`
			`/*`
			`* Switch MMU context if necessary. Must be done after the`
			`* link target has been calculated, as the jump forward in the`
			`* kernel ring still uses the last active MMU context before`
			`* the switch.`
			`*/`
			`if (switch_mmu_context) {`
			`struct etnaviv_iommu_context *old_context = gpu->mmu_context;`

			`gpu->mmu_context = etnaviv_iommu_context_get(mmu_context);`
			`etnaviv_iommu_context_put(old_context);`
			`}`

			`if (need_flush) {`
			`/* Add the MMU flush */`
			`if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {`
			`CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,`
			`VIVS_GL_FLUSH_MMU_FLUSH_FEMMU \|`
			`VIVS_GL_FLUSH_MMU_FLUSH_UNK1 \|`
			`VIVS_GL_FLUSH_MMU_FLUSH_UNK2 \|`
			`VIVS_GL_FLUSH_MMU_FLUSH_PEMMU \|`
			`VIVS_GL_FLUSH_MMU_FLUSH_UNK4);`
			`} else {`
			`u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK \|`
			`VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;`

			`if (switch_mmu_context &&`
			`gpu->sec_mode == ETNA_SEC_KERNEL) {`
			`unsigned short id =`
			`etnaviv_iommuv2_get_pta_id(gpu->mmu_context);`
			`CMD_LOAD_STATE(buffer,`
			`VIVS_MMUv2_PTA_CONFIG,`
			`VIVS_MMUv2_PTA_CONFIG_INDEX(id));`
			`}`

			`if (gpu->sec_mode == ETNA_SEC_NONE)`
			`flush \|= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);`

			`CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,`
			`flush);`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE,`
			`SYNC_RECIPIENT_PE);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE,`
			`SYNC_RECIPIENT_PE);`
			`}`

			`gpu->flush_seq = new_flush_seq;`
			`}`

			`if (switch_context) {`
			`etnaviv_cmd_select_pipe(gpu, buffer, exec_state);`
			`gpu->exec_state = exec_state;`
			`}`

			`/* And the link to the submitted buffer */`
			`link_target = etnaviv_cmdbuf_get_va(cmdbuf,`
			`&gpu->mmu_context->cmdbuf_mapping);`
			`CMD_LINK(buffer, link_dwords, link_target);`

			`/* Update the link target to point to above instructions */`
			`link_target = target;`
			`link_dwords = extra_dwords;`
			`}`

			`/*`
			`* Append a LINK to the submitted command buffer to return to`
			`* the ring buffer. return_target is the ring target address.`
			`* We need at most 7 dwords in the return target: 2 cache flush +`
			`* 2 semaphore stall + 1 event + 1 wait + 1 link.`
			`*/`
			`return_dwords = 7;`

			`/*`
			`* When the BLT engine is present we need 6 more dwords in the return`
			`* target: 3 enable/flush/disable + 4 enable/semaphore stall/disable,`
			`* but we don't need the normal TS flush state.`
			`*/`
			`if (has_blt)`
			`return_dwords += 6;`

			`return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);`
			`CMD_LINK(cmdbuf, return_dwords, return_target);`

			`/*`
			`* Append a cache flush, stall, event, wait and link pointing back to`
			`* the wait command to the ring buffer.`
			`*/`
			`if (gpu->exec_state == ETNA_PIPE_2D) {`
			`CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,`
			`VIVS_GL_FLUSH_CACHE_PE2D);`
			`} else {`
			`CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,`
			`VIVS_GL_FLUSH_CACHE_DEPTH \|`
			`VIVS_GL_FLUSH_CACHE_COLOR);`
			`if (has_blt) {`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);`
			`} else {`
			`CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,`
			`VIVS_TS_FLUSH_CACHE_FLUSH);`
			`}`
			`}`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);`

			`if (has_blt) {`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);`
			`CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);`
			`CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);`
			`CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);`
			`}`

			`CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) \|`
			`VIVS_GL_EVENT_FROM_PE);`
			`CMD_WAIT(buffer);`
			`CMD_LINK(buffer, 2,`
			`etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)`
			`+ buffer->user_size - 4);`

			`if (drm_debug_enabled(DRM_UT_DRIVER))`
			`pr_info("stream link to 0x%08x @ 0x%08x %p\n",`
			`return_target,`
			`etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),`
			`cmdbuf->vaddr);`

			`if (drm_debug_enabled(DRM_UT_DRIVER)) {`
			`print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,`
			`cmdbuf->vaddr, cmdbuf->size, 0);`

			`pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);`
			`pr_info("addr: 0x%08x\n", link_target);`
			`pr_info("back: 0x%08x\n", return_target);`
			`pr_info("event: %d\n", event);`
			`}`

			`/*`
			`* Kick off the submitted command by replacing the previous`
			`* WAIT with a link to the address in the ring buffer.`
			`*/`
			`etnaviv_buffer_replace_wait(buffer, waitlink_offset,`
			`VIV_FE_LINK_HEADER_OP_LINK \|`
			`VIV_FE_LINK_HEADER_PREFETCH(link_dwords),`
			`link_target);`

			`if (drm_debug_enabled(DRM_UT_DRIVER))`
			`etnaviv_buffer_dump(gpu, buffer, 0, 0x50);`
			`}`