175 lines
4.8 KiB
C
175 lines
4.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_POWERPC_QSPINLOCK_H
|
|
#define _ASM_POWERPC_QSPINLOCK_H
|
|
|
|
#include <linux/compiler.h>
|
|
#include <asm/qspinlock_types.h>
|
|
#include <asm/paravirt.h>
|
|
|
|
#ifdef CONFIG_PPC64
|
|
/*
|
|
* Use the EH=1 hint for accesses that result in the lock being acquired.
|
|
* The hardware is supposed to optimise this pattern by holding the lock
|
|
* cacheline longer, and releasing when a store to the same memory (the
|
|
* unlock) is performed.
|
|
*/
|
|
#define _Q_SPIN_EH_HINT 1
|
|
#else
|
|
#define _Q_SPIN_EH_HINT 0
|
|
#endif
|
|
|
|
/*
|
|
* The trylock itself may steal. This makes trylocks slightly stronger, and
|
|
* makes locks slightly more efficient when stealing.
|
|
*
|
|
* This is compile-time, so if true then there may always be stealers, so the
|
|
* nosteal paths become unused.
|
|
*/
|
|
#define _Q_SPIN_TRY_LOCK_STEAL 1
|
|
|
|
/*
|
|
* Put a speculation barrier after testing the lock/node and finding it
|
|
* busy. Try to prevent pointless speculation in slow paths.
|
|
*
|
|
* Slows down the lockstorm microbenchmark with no stealing, where locking
|
|
* is purely FIFO through the queue. May have more benefit in real workload
|
|
* where speculating into the wrong place could have a greater cost.
|
|
*/
|
|
#define _Q_SPIN_SPEC_BARRIER 0
|
|
|
|
#ifdef CONFIG_PPC64
|
|
/*
|
|
* Execute a miso instruction after passing the MCS lock ownership to the
|
|
* queue head. Miso is intended to make stores visible to other CPUs sooner.
|
|
*
|
|
* This seems to make the lockstorm microbenchmark nospin test go slightly
|
|
* faster on POWER10, but disable for now.
|
|
*/
|
|
#define _Q_SPIN_MISO 0
|
|
#else
|
|
#define _Q_SPIN_MISO 0
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC64
|
|
/*
|
|
* This executes miso after an unlock of the lock word, having ownership
|
|
* pass to the next CPU sooner. This will slow the uncontended path to some
|
|
* degree. Not evidence it helps yet.
|
|
*/
|
|
#define _Q_SPIN_MISO_UNLOCK 0
|
|
#else
|
|
#define _Q_SPIN_MISO_UNLOCK 0
|
|
#endif
|
|
|
|
/*
|
|
* Seems to slow down lockstorm microbenchmark, suspect queue node just
|
|
* has to become shared again right afterwards when its waiter spins on
|
|
* the lock field.
|
|
*/
|
|
#define _Q_SPIN_PREFETCH_NEXT 0
|
|
|
|
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
|
|
{
|
|
return READ_ONCE(lock->val);
|
|
}
|
|
|
|
static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
|
|
{
|
|
return !lock.val;
|
|
}
|
|
|
|
static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
|
|
{
|
|
return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
|
|
}
|
|
|
|
static __always_inline u32 queued_spin_encode_locked_val(void)
|
|
{
|
|
/* XXX: make this use lock value in paca like simple spinlocks? */
|
|
return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
|
|
}
|
|
|
|
static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
|
|
{
|
|
u32 new = queued_spin_encode_locked_val();
|
|
u32 prev;
|
|
|
|
/* Trylock succeeds only when unlocked and no queued nodes */
|
|
asm volatile(
|
|
"1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n"
|
|
" cmpwi 0,%0,0 \n"
|
|
" bne- 2f \n"
|
|
" stwcx. %2,0,%1 \n"
|
|
" bne- 1b \n"
|
|
"\t" PPC_ACQUIRE_BARRIER " \n"
|
|
"2: \n"
|
|
: "=&r" (prev)
|
|
: "r" (&lock->val), "r" (new),
|
|
"i" (_Q_SPIN_EH_HINT)
|
|
: "cr0", "memory");
|
|
|
|
return likely(prev == 0);
|
|
}
|
|
|
|
static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
|
|
{
|
|
u32 new = queued_spin_encode_locked_val();
|
|
u32 prev, tmp;
|
|
|
|
/* Trylock may get ahead of queued nodes if it finds unlocked */
|
|
asm volatile(
|
|
"1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n"
|
|
" andc. %1,%0,%4 \n"
|
|
" bne- 2f \n"
|
|
" and %1,%0,%4 \n"
|
|
" or %1,%1,%3 \n"
|
|
" stwcx. %1,0,%2 \n"
|
|
" bne- 1b \n"
|
|
"\t" PPC_ACQUIRE_BARRIER " \n"
|
|
"2: \n"
|
|
: "=&r" (prev), "=&r" (tmp)
|
|
: "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
|
|
"i" (_Q_SPIN_EH_HINT)
|
|
: "cr0", "memory");
|
|
|
|
return likely(!(prev & ~_Q_TAIL_CPU_MASK));
|
|
}
|
|
|
|
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
|
|
{
|
|
if (!_Q_SPIN_TRY_LOCK_STEAL)
|
|
return __queued_spin_trylock_nosteal(lock);
|
|
else
|
|
return __queued_spin_trylock_steal(lock);
|
|
}
|
|
|
|
void queued_spin_lock_slowpath(struct qspinlock *lock);
|
|
|
|
static __always_inline void queued_spin_lock(struct qspinlock *lock)
|
|
{
|
|
if (!queued_spin_trylock(lock))
|
|
queued_spin_lock_slowpath(lock);
|
|
}
|
|
|
|
static inline void queued_spin_unlock(struct qspinlock *lock)
|
|
{
|
|
smp_store_release(&lock->locked, 0);
|
|
if (_Q_SPIN_MISO_UNLOCK)
|
|
asm volatile("miso" ::: "memory");
|
|
}
|
|
|
|
#define arch_spin_is_locked(l) queued_spin_is_locked(l)
|
|
#define arch_spin_is_contended(l) queued_spin_is_contended(l)
|
|
#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l)
|
|
#define arch_spin_lock(l) queued_spin_lock(l)
|
|
#define arch_spin_trylock(l) queued_spin_trylock(l)
|
|
#define arch_spin_unlock(l) queued_spin_unlock(l)
|
|
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
void pv_spinlocks_init(void);
|
|
#else
|
|
static inline void pv_spinlocks_init(void) { }
|
|
#endif
|
|
|
|
#endif /* _ASM_POWERPC_QSPINLOCK_H */
|