mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:46:16 +00:00
c6f4a90022
The arch_spin_value_unlocked() of ticket-lock would cause the compiler to generate inefficient asm code in riscv architecture because of unnecessary memory access to the contended value. Before the patch: void lockref_get(struct lockref *lockref) { 78: fd010113 add sp,sp,-48 7c: 02813023 sd s0,32(sp) 80: 02113423 sd ra,40(sp) 84: 03010413 add s0,sp,48 0000000000000088 <.LBB296>: CMPXCHG_LOOP( 88: 00053783 ld a5,0(a0) After the patch: void lockref_get(struct lockref *lockref) { CMPXCHG_LOOP( 78: 00053783 ld a5,0(a0) After the patch, the lockref_get() could get in a fast path instead of the function's prologue. This is because ticket lock complex logic would limit compiler optimization for the spinlock fast path, and qspinlock won't. The caller of arch_spin_value_unlocked() could benefit from this change. Currently, the only caller is lockref. Signed-off-by: Guo Ren <guoren@kernel.org> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Waiman Long <longman@redhat.com> Acked-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20230908154339.3250567-1-guoren@kernel.org
95 lines
2.7 KiB
C
95 lines
2.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/*
|
|
* 'Generic' ticket-lock implementation.
|
|
*
|
|
* It relies on atomic_fetch_add() having well defined forward progress
|
|
* guarantees under contention. If your architecture cannot provide this, stick
|
|
* to a test-and-set lock.
|
|
*
|
|
* It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
|
|
* sub-word of the value. This is generally true for anything LL/SC although
|
|
* you'd be hard pressed to find anything useful in architecture specifications
|
|
* about this. If your architecture cannot do this you might be better off with
|
|
* a test-and-set.
|
|
*
|
|
* It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
|
|
* uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
|
|
* a full fence after the spin to upgrade the otherwise-RCpc
|
|
* atomic_cond_read_acquire().
|
|
*
|
|
* The implementation uses smp_cond_load_acquire() to spin, so if the
|
|
* architecture has WFE like instructions to sleep instead of poll for word
|
|
* modifications be sure to implement that (see ARM64 for example).
|
|
*
|
|
*/
|
|
|
|
#ifndef __ASM_GENERIC_SPINLOCK_H
|
|
#define __ASM_GENERIC_SPINLOCK_H
|
|
|
|
#include <linux/atomic.h>
|
|
#include <asm-generic/spinlock_types.h>
|
|
|
|
static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
|
|
{
|
|
u32 val = atomic_fetch_add(1<<16, lock);
|
|
u16 ticket = val >> 16;
|
|
|
|
if (ticket == (u16)val)
|
|
return;
|
|
|
|
/*
|
|
* atomic_cond_read_acquire() is RCpc, but rather than defining a
|
|
* custom cond_read_rcsc() here we just emit a full fence. We only
|
|
* need the prior reads before subsequent writes ordering from
|
|
* smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
|
|
* have no outstanding writes due to the atomic_fetch_add() the extra
|
|
* orderings are free.
|
|
*/
|
|
atomic_cond_read_acquire(lock, ticket == (u16)VAL);
|
|
smp_mb();
|
|
}
|
|
|
|
static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
|
|
{
|
|
u32 old = atomic_read(lock);
|
|
|
|
if ((old >> 16) != (old & 0xffff))
|
|
return false;
|
|
|
|
return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
|
|
}
|
|
|
|
static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
|
|
{
|
|
u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
|
|
u32 val = atomic_read(lock);
|
|
|
|
smp_store_release(ptr, (u16)val + 1);
|
|
}
|
|
|
|
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
|
|
{
|
|
u32 val = lock.counter;
|
|
|
|
return ((val >> 16) == (val & 0xffff));
|
|
}
|
|
|
|
static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
|
{
|
|
arch_spinlock_t val = READ_ONCE(*lock);
|
|
|
|
return !arch_spin_value_unlocked(val);
|
|
}
|
|
|
|
static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
|
{
|
|
u32 val = atomic_read(lock);
|
|
|
|
return (s16)((val >> 16) - (val & 0xffff)) > 1;
|
|
}
|
|
|
|
#include <asm/qrwlock.h>
|
|
|
|
#endif /* __ASM_GENERIC_SPINLOCK_H */
|