366 lines
7.8 KiB
C
366 lines
7.8 KiB
C
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||
|
/*
|
||
|
* FP/SIMD state saving and restoring macros
|
||
|
*
|
||
|
* Copyright (C) 2012 ARM Ltd.
|
||
|
* Author: Catalin Marinas <catalin.marinas@arm.com>
|
||
|
*/
|
||
|
|
||
|
#include <asm/assembler.h>
|
||
|
|
||
|
.macro fpsimd_save state, tmpnr
|
||
|
stp q0, q1, [\state, #16 * 0]
|
||
|
stp q2, q3, [\state, #16 * 2]
|
||
|
stp q4, q5, [\state, #16 * 4]
|
||
|
stp q6, q7, [\state, #16 * 6]
|
||
|
stp q8, q9, [\state, #16 * 8]
|
||
|
stp q10, q11, [\state, #16 * 10]
|
||
|
stp q12, q13, [\state, #16 * 12]
|
||
|
stp q14, q15, [\state, #16 * 14]
|
||
|
stp q16, q17, [\state, #16 * 16]
|
||
|
stp q18, q19, [\state, #16 * 18]
|
||
|
stp q20, q21, [\state, #16 * 20]
|
||
|
stp q22, q23, [\state, #16 * 22]
|
||
|
stp q24, q25, [\state, #16 * 24]
|
||
|
stp q26, q27, [\state, #16 * 26]
|
||
|
stp q28, q29, [\state, #16 * 28]
|
||
|
stp q30, q31, [\state, #16 * 30]!
|
||
|
mrs x\tmpnr, fpsr
|
||
|
str w\tmpnr, [\state, #16 * 2]
|
||
|
mrs x\tmpnr, fpcr
|
||
|
str w\tmpnr, [\state, #16 * 2 + 4]
|
||
|
.endm
|
||
|
|
||
|
.macro fpsimd_restore_fpcr state, tmp
|
||
|
/*
|
||
|
* Writes to fpcr may be self-synchronising, so avoid restoring
|
||
|
* the register if it hasn't changed.
|
||
|
*/
|
||
|
mrs \tmp, fpcr
|
||
|
cmp \tmp, \state
|
||
|
b.eq 9999f
|
||
|
msr fpcr, \state
|
||
|
9999:
|
||
|
.endm
|
||
|
|
||
|
/* Clobbers \state */
|
||
|
.macro fpsimd_restore state, tmpnr
|
||
|
ldp q0, q1, [\state, #16 * 0]
|
||
|
ldp q2, q3, [\state, #16 * 2]
|
||
|
ldp q4, q5, [\state, #16 * 4]
|
||
|
ldp q6, q7, [\state, #16 * 6]
|
||
|
ldp q8, q9, [\state, #16 * 8]
|
||
|
ldp q10, q11, [\state, #16 * 10]
|
||
|
ldp q12, q13, [\state, #16 * 12]
|
||
|
ldp q14, q15, [\state, #16 * 14]
|
||
|
ldp q16, q17, [\state, #16 * 16]
|
||
|
ldp q18, q19, [\state, #16 * 18]
|
||
|
ldp q20, q21, [\state, #16 * 20]
|
||
|
ldp q22, q23, [\state, #16 * 22]
|
||
|
ldp q24, q25, [\state, #16 * 24]
|
||
|
ldp q26, q27, [\state, #16 * 26]
|
||
|
ldp q28, q29, [\state, #16 * 28]
|
||
|
ldp q30, q31, [\state, #16 * 30]!
|
||
|
ldr w\tmpnr, [\state, #16 * 2]
|
||
|
msr fpsr, x\tmpnr
|
||
|
ldr w\tmpnr, [\state, #16 * 2 + 4]
|
||
|
fpsimd_restore_fpcr x\tmpnr, \state
|
||
|
.endm
|
||
|
|
||
|
/* Sanity-check macros to help avoid encoding garbage instructions */
|
||
|
|
||
|
.macro _check_general_reg nr
|
||
|
.if (\nr) < 0 || (\nr) > 30
|
||
|
.error "Bad register number \nr."
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro _sve_check_zreg znr
|
||
|
.if (\znr) < 0 || (\znr) > 31
|
||
|
.error "Bad Scalable Vector Extension vector register number \znr."
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro _sve_check_preg pnr
|
||
|
.if (\pnr) < 0 || (\pnr) > 15
|
||
|
.error "Bad Scalable Vector Extension predicate register number \pnr."
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro _check_num n, min, max
|
||
|
.if (\n) < (\min) || (\n) > (\max)
|
||
|
.error "Number \n out of range [\min,\max]"
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro _sme_check_wv v
|
||
|
.if (\v) < 12 || (\v) > 15
|
||
|
.error "Bad vector select register \v."
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
/* SVE instruction encodings for non-SVE-capable assemblers */
|
||
|
/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
|
||
|
|
||
|
/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
|
||
|
.macro _sve_str_v nz, nxbase, offset=0
|
||
|
_sve_check_zreg \nz
|
||
|
_check_general_reg \nxbase
|
||
|
_check_num (\offset), -0x100, 0xff
|
||
|
.inst 0xe5804000 \
|
||
|
| (\nz) \
|
||
|
| ((\nxbase) << 5) \
|
||
|
| (((\offset) & 7) << 10) \
|
||
|
| (((\offset) & 0x1f8) << 13)
|
||
|
.endm
|
||
|
|
||
|
/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
|
||
|
.macro _sve_ldr_v nz, nxbase, offset=0
|
||
|
_sve_check_zreg \nz
|
||
|
_check_general_reg \nxbase
|
||
|
_check_num (\offset), -0x100, 0xff
|
||
|
.inst 0x85804000 \
|
||
|
| (\nz) \
|
||
|
| ((\nxbase) << 5) \
|
||
|
| (((\offset) & 7) << 10) \
|
||
|
| (((\offset) & 0x1f8) << 13)
|
||
|
.endm
|
||
|
|
||
|
/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
|
||
|
.macro _sve_str_p np, nxbase, offset=0
|
||
|
_sve_check_preg \np
|
||
|
_check_general_reg \nxbase
|
||
|
_check_num (\offset), -0x100, 0xff
|
||
|
.inst 0xe5800000 \
|
||
|
| (\np) \
|
||
|
| ((\nxbase) << 5) \
|
||
|
| (((\offset) & 7) << 10) \
|
||
|
| (((\offset) & 0x1f8) << 13)
|
||
|
.endm
|
||
|
|
||
|
/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
|
||
|
.macro _sve_ldr_p np, nxbase, offset=0
|
||
|
_sve_check_preg \np
|
||
|
_check_general_reg \nxbase
|
||
|
_check_num (\offset), -0x100, 0xff
|
||
|
.inst 0x85800000 \
|
||
|
| (\np) \
|
||
|
| ((\nxbase) << 5) \
|
||
|
| (((\offset) & 7) << 10) \
|
||
|
| (((\offset) & 0x1f8) << 13)
|
||
|
.endm
|
||
|
|
||
|
/* RDVL X\nx, #\imm */
|
||
|
.macro _sve_rdvl nx, imm
|
||
|
_check_general_reg \nx
|
||
|
_check_num (\imm), -0x20, 0x1f
|
||
|
.inst 0x04bf5000 \
|
||
|
| (\nx) \
|
||
|
| (((\imm) & 0x3f) << 5)
|
||
|
.endm
|
||
|
|
||
|
/* RDFFR (unpredicated): RDFFR P\np.B */
|
||
|
.macro _sve_rdffr np
|
||
|
_sve_check_preg \np
|
||
|
.inst 0x2519f000 \
|
||
|
| (\np)
|
||
|
.endm
|
||
|
|
||
|
/* WRFFR P\np.B */
|
||
|
.macro _sve_wrffr np
|
||
|
_sve_check_preg \np
|
||
|
.inst 0x25289000 \
|
||
|
| ((\np) << 5)
|
||
|
.endm
|
||
|
|
||
|
/* PFALSE P\np.B */
|
||
|
.macro _sve_pfalse np
|
||
|
_sve_check_preg \np
|
||
|
.inst 0x2518e400 \
|
||
|
| (\np)
|
||
|
.endm
|
||
|
|
||
|
/* SME instruction encodings for non-SME-capable assemblers */
|
||
|
/* (pre binutils 2.38/LLVM 13) */
|
||
|
|
||
|
/* RDSVL X\nx, #\imm */
|
||
|
.macro _sme_rdsvl nx, imm
|
||
|
_check_general_reg \nx
|
||
|
_check_num (\imm), -0x20, 0x1f
|
||
|
.inst 0x04bf5800 \
|
||
|
| (\nx) \
|
||
|
| (((\imm) & 0x3f) << 5)
|
||
|
.endm
|
||
|
|
||
|
/*
|
||
|
* STR (vector from ZA array):
|
||
|
* STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
|
||
|
*/
|
||
|
.macro _sme_str_zav nw, nxbase, offset=0
|
||
|
_sme_check_wv \nw
|
||
|
_check_general_reg \nxbase
|
||
|
_check_num (\offset), -0x100, 0xff
|
||
|
.inst 0xe1200000 \
|
||
|
| (((\nw) & 3) << 13) \
|
||
|
| ((\nxbase) << 5) \
|
||
|
| ((\offset) & 7)
|
||
|
.endm
|
||
|
|
||
|
/*
|
||
|
* LDR (vector to ZA array):
|
||
|
* LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
|
||
|
*/
|
||
|
.macro _sme_ldr_zav nw, nxbase, offset=0
|
||
|
_sme_check_wv \nw
|
||
|
_check_general_reg \nxbase
|
||
|
_check_num (\offset), -0x100, 0xff
|
||
|
.inst 0xe1000000 \
|
||
|
| (((\nw) & 3) << 13) \
|
||
|
| ((\nxbase) << 5) \
|
||
|
| ((\offset) & 7)
|
||
|
.endm
|
||
|
|
||
|
/*
|
||
|
* LDR (ZT0)
|
||
|
*
|
||
|
* LDR ZT0, nx
|
||
|
*/
|
||
|
.macro _ldr_zt nx
|
||
|
_check_general_reg \nx
|
||
|
.inst 0xe11f8000 \
|
||
|
| (\nx << 5)
|
||
|
.endm
|
||
|
|
||
|
/*
|
||
|
* STR (ZT0)
|
||
|
*
|
||
|
* STR ZT0, nx
|
||
|
*/
|
||
|
.macro _str_zt nx
|
||
|
_check_general_reg \nx
|
||
|
.inst 0xe13f8000 \
|
||
|
| (\nx << 5)
|
||
|
.endm
|
||
|
|
||
|
/*
|
||
|
* Zero the entire ZA array
|
||
|
* ZERO ZA
|
||
|
*/
|
||
|
.macro zero_za
|
||
|
.inst 0xc00800ff
|
||
|
.endm
|
||
|
|
||
|
.macro __for from:req, to:req
|
||
|
.if (\from) == (\to)
|
||
|
_for__body %\from
|
||
|
.else
|
||
|
__for %\from, %((\from) + ((\to) - (\from)) / 2)
|
||
|
__for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro _for var:req, from:req, to:req, insn:vararg
|
||
|
.macro _for__body \var:req
|
||
|
.noaltmacro
|
||
|
\insn
|
||
|
.altmacro
|
||
|
.endm
|
||
|
|
||
|
.altmacro
|
||
|
__for \from, \to
|
||
|
.noaltmacro
|
||
|
|
||
|
.purgem _for__body
|
||
|
.endm
|
||
|
|
||
|
/* Update ZCR_EL1.LEN with the new VQ */
|
||
|
.macro sve_load_vq xvqminus1, xtmp, xtmp2
|
||
|
mrs_s \xtmp, SYS_ZCR_EL1
|
||
|
bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
|
||
|
orr \xtmp2, \xtmp2, \xvqminus1
|
||
|
cmp \xtmp2, \xtmp
|
||
|
b.eq 921f
|
||
|
msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
|
||
|
921:
|
||
|
.endm
|
||
|
|
||
|
/* Update SMCR_EL1.LEN with the new VQ */
|
||
|
.macro sme_load_vq xvqminus1, xtmp, xtmp2
|
||
|
mrs_s \xtmp, SYS_SMCR_EL1
|
||
|
bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
|
||
|
orr \xtmp2, \xtmp2, \xvqminus1
|
||
|
cmp \xtmp2, \xtmp
|
||
|
b.eq 921f
|
||
|
msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
|
||
|
921:
|
||
|
.endm
|
||
|
|
||
|
/* Preserve the first 128-bits of Znz and zero the rest. */
|
||
|
.macro _sve_flush_z nz
|
||
|
_sve_check_zreg \nz
|
||
|
mov v\nz\().16b, v\nz\().16b
|
||
|
.endm
|
||
|
|
||
|
.macro sve_flush_z
|
||
|
_for n, 0, 31, _sve_flush_z \n
|
||
|
.endm
|
||
|
.macro sve_flush_p
|
||
|
_for n, 0, 15, _sve_pfalse \n
|
||
|
.endm
|
||
|
.macro sve_flush_ffr
|
||
|
_sve_wrffr 0
|
||
|
.endm
|
||
|
|
||
|
.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
|
||
|
_for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
|
||
|
_for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
|
||
|
cbz \save_ffr, 921f
|
||
|
_sve_rdffr 0
|
||
|
_sve_str_p 0, \nxbase
|
||
|
_sve_ldr_p 0, \nxbase, -16
|
||
|
b 922f
|
||
|
921:
|
||
|
str xzr, [x\nxbase] // Zero out FFR
|
||
|
922:
|
||
|
mrs x\nxtmp, fpsr
|
||
|
str w\nxtmp, [\xpfpsr]
|
||
|
mrs x\nxtmp, fpcr
|
||
|
str w\nxtmp, [\xpfpsr, #4]
|
||
|
.endm
|
||
|
|
||
|
.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
|
||
|
_for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
|
||
|
cbz \restore_ffr, 921f
|
||
|
_sve_ldr_p 0, \nxbase
|
||
|
_sve_wrffr 0
|
||
|
921:
|
||
|
_for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
|
||
|
|
||
|
ldr w\nxtmp, [\xpfpsr]
|
||
|
msr fpsr, x\nxtmp
|
||
|
ldr w\nxtmp, [\xpfpsr, #4]
|
||
|
msr fpcr, x\nxtmp
|
||
|
.endm
|
||
|
|
||
|
.macro sme_save_za nxbase, xvl, nw
|
||
|
mov w\nw, #0
|
||
|
|
||
|
423:
|
||
|
_sme_str_zav \nw, \nxbase
|
||
|
add x\nxbase, x\nxbase, \xvl
|
||
|
add x\nw, x\nw, #1
|
||
|
cmp \xvl, x\nw
|
||
|
bne 423b
|
||
|
.endm
|
||
|
|
||
|
.macro sme_load_za nxbase, xvl, nw
|
||
|
mov w\nw, #0
|
||
|
|
||
|
423:
|
||
|
_sme_ldr_zav \nw, \nxbase
|
||
|
add x\nxbase, x\nxbase, \xvl
|
||
|
add x\nw, x\nw, #1
|
||
|
cmp \xvl, x\nw
|
||
|
bne 423b
|
||
|
.endm
|