202 lines
4.3 KiB
ArmAsm
202 lines
4.3 KiB
ArmAsm
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||
|
/*
|
||
|
* Scalar AES core transform
|
||
|
*
|
||
|
* Copyright (C) 2017 Linaro Ltd.
|
||
|
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||
|
*/
|
||
|
|
||
|
#include <linux/linkage.h>
|
||
|
#include <asm/assembler.h>
|
||
|
#include <asm/cache.h>
|
||
|
|
||
|
.text
|
||
|
.align 5
|
||
|
|
||
|
rk .req r0
|
||
|
rounds .req r1
|
||
|
in .req r2
|
||
|
out .req r3
|
||
|
ttab .req ip
|
||
|
|
||
|
t0 .req lr
|
||
|
t1 .req r2
|
||
|
t2 .req r3
|
||
|
|
||
|
.macro __select, out, in, idx
|
||
|
.if __LINUX_ARM_ARCH__ < 7
|
||
|
and \out, \in, #0xff << (8 * \idx)
|
||
|
.else
|
||
|
ubfx \out, \in, #(8 * \idx), #8
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro __load, out, in, idx, sz, op
|
||
|
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
|
||
|
ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
|
||
|
.else
|
||
|
ldr\op \out, [ttab, \in, lsl #\sz]
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
|
||
|
__select \out0, \in0, 0
|
||
|
__select t0, \in1, 1
|
||
|
__load \out0, \out0, 0, \sz, \op
|
||
|
__load t0, t0, 1, \sz, \op
|
||
|
|
||
|
.if \enc
|
||
|
__select \out1, \in1, 0
|
||
|
__select t1, \in2, 1
|
||
|
.else
|
||
|
__select \out1, \in3, 0
|
||
|
__select t1, \in0, 1
|
||
|
.endif
|
||
|
__load \out1, \out1, 0, \sz, \op
|
||
|
__select t2, \in2, 2
|
||
|
__load t1, t1, 1, \sz, \op
|
||
|
__load t2, t2, 2, \sz, \op
|
||
|
|
||
|
eor \out0, \out0, t0, ror #24
|
||
|
|
||
|
__select t0, \in3, 3
|
||
|
.if \enc
|
||
|
__select \t3, \in3, 2
|
||
|
__select \t4, \in0, 3
|
||
|
.else
|
||
|
__select \t3, \in1, 2
|
||
|
__select \t4, \in2, 3
|
||
|
.endif
|
||
|
__load \t3, \t3, 2, \sz, \op
|
||
|
__load t0, t0, 3, \sz, \op
|
||
|
__load \t4, \t4, 3, \sz, \op
|
||
|
|
||
|
.ifnb \oldcpsr
|
||
|
/*
|
||
|
* This is the final round and we're done with all data-dependent table
|
||
|
* lookups, so we can safely re-enable interrupts.
|
||
|
*/
|
||
|
restore_irqs \oldcpsr
|
||
|
.endif
|
||
|
|
||
|
eor \out1, \out1, t1, ror #24
|
||
|
eor \out0, \out0, t2, ror #16
|
||
|
ldm rk!, {t1, t2}
|
||
|
eor \out1, \out1, \t3, ror #16
|
||
|
eor \out0, \out0, t0, ror #8
|
||
|
eor \out1, \out1, \t4, ror #8
|
||
|
eor \out0, \out0, t1
|
||
|
eor \out1, \out1, t2
|
||
|
.endm
|
||
|
|
||
|
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
|
||
|
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
|
||
|
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
|
||
|
.endm
|
||
|
|
||
|
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
|
||
|
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
|
||
|
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
|
||
|
.endm
|
||
|
|
||
|
.macro do_crypt, round, ttab, ltab, bsz
|
||
|
push {r3-r11, lr}
|
||
|
|
||
|
// Load keys first, to reduce latency in case they're not cached yet.
|
||
|
ldm rk!, {r8-r11}
|
||
|
|
||
|
ldr r4, [in]
|
||
|
ldr r5, [in, #4]
|
||
|
ldr r6, [in, #8]
|
||
|
ldr r7, [in, #12]
|
||
|
|
||
|
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||
|
rev_l r4, t0
|
||
|
rev_l r5, t0
|
||
|
rev_l r6, t0
|
||
|
rev_l r7, t0
|
||
|
#endif
|
||
|
|
||
|
eor r4, r4, r8
|
||
|
eor r5, r5, r9
|
||
|
eor r6, r6, r10
|
||
|
eor r7, r7, r11
|
||
|
|
||
|
mov_l ttab, \ttab
|
||
|
/*
|
||
|
* Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
|
||
|
* L1 cache, assuming cacheline size >= 32. This is a hardening measure
|
||
|
* intended to make cache-timing attacks more difficult. They may not
|
||
|
* be fully prevented, however; see the paper
|
||
|
* https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
|
||
|
* ("Cache-timing attacks on AES") for a discussion of the many
|
||
|
* difficulties involved in writing truly constant-time AES software.
|
||
|
*/
|
||
|
save_and_disable_irqs t0
|
||
|
.set i, 0
|
||
|
.rept 1024 / 128
|
||
|
ldr r8, [ttab, #i + 0]
|
||
|
ldr r9, [ttab, #i + 32]
|
||
|
ldr r10, [ttab, #i + 64]
|
||
|
ldr r11, [ttab, #i + 96]
|
||
|
.set i, i + 128
|
||
|
.endr
|
||
|
push {t0} // oldcpsr
|
||
|
|
||
|
tst rounds, #2
|
||
|
bne 1f
|
||
|
|
||
|
0: \round r8, r9, r10, r11, r4, r5, r6, r7
|
||
|
\round r4, r5, r6, r7, r8, r9, r10, r11
|
||
|
|
||
|
1: subs rounds, rounds, #4
|
||
|
\round r8, r9, r10, r11, r4, r5, r6, r7
|
||
|
bls 2f
|
||
|
\round r4, r5, r6, r7, r8, r9, r10, r11
|
||
|
b 0b
|
||
|
|
||
|
2: .ifb \ltab
|
||
|
add ttab, ttab, #1
|
||
|
.else
|
||
|
mov_l ttab, \ltab
|
||
|
// Prefetch inverse S-box for final round; see explanation above
|
||
|
.set i, 0
|
||
|
.rept 256 / 64
|
||
|
ldr t0, [ttab, #i + 0]
|
||
|
ldr t1, [ttab, #i + 32]
|
||
|
.set i, i + 64
|
||
|
.endr
|
||
|
.endif
|
||
|
|
||
|
pop {rounds} // oldcpsr
|
||
|
\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
|
||
|
|
||
|
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||
|
rev_l r4, t0
|
||
|
rev_l r5, t0
|
||
|
rev_l r6, t0
|
||
|
rev_l r7, t0
|
||
|
#endif
|
||
|
|
||
|
ldr out, [sp]
|
||
|
|
||
|
str r4, [out]
|
||
|
str r5, [out, #4]
|
||
|
str r6, [out, #8]
|
||
|
str r7, [out, #12]
|
||
|
|
||
|
pop {r3-r11, pc}
|
||
|
|
||
|
.align 3
|
||
|
.ltorg
|
||
|
.endm
|
||
|
|
||
|
ENTRY(__aes_arm_encrypt)
|
||
|
do_crypt fround, crypto_ft_tab,, 2
|
||
|
ENDPROC(__aes_arm_encrypt)
|
||
|
|
||
|
.align 5
|
||
|
ENTRY(__aes_arm_decrypt)
|
||
|
do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
|
||
|
ENDPROC(__aes_arm_decrypt)
|