linux/arch/x86/include/asm/barrier.h
Linus Torvalds 7453b94851 x86: improve array_index_mask_nospec() code generation
Don't force the inputs to be 'unsigned long', when the comparison can
easily be done in 32-bit if that's more appropriate.

Note that while we can look at the inputs to choose an appropriate size
for the compare instruction, the output is fixed at 'unsigned long'.
That's not technically optimal either, since a 32-bit 'sbbl' would often
be sufficient.

But for the outgoing mask we don't know how the mask ends up being used
(ie we have uses that have an incoming 32-bit array index, but end up
using the mask for other things).  That said, it only costs the extra
REX prefix to always generate the 64-bit mask.

[ A 'sbbl' also always technically generates a 64-bit mask, but with the
  upper 32 bits clear: that's fine for when the incoming index that will
  be masked is already 32-bit, but not if you use the mask to mask a
  pointer afterwards, like the file table lookup does ]

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-05-22 14:12:11 -07:00

84 lines
2.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_BARRIER_H
#define _ASM_X86_BARRIER_H
#include <asm/alternative.h>
#include <asm/nops.h>
/*
* Force strict CPU ordering.
* And yes, this might be required on UP too when we're talking
* to devices.
*/
#ifdef CONFIG_X86_32
#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
#else
#define __mb() asm volatile("mfence":::"memory")
#define __rmb() asm volatile("lfence":::"memory")
#define __wmb() asm volatile("sfence" ::: "memory")
#endif
/**
* array_index_mask_nospec() - generate a mask that is ~0UL when the
* bounds check succeeds and 0 otherwise
* @index: array element index
* @size: number of elements in array
*
* Returns:
* 0 - (index < size)
*/
#define array_index_mask_nospec(idx,sz) ({ \
typeof((idx)+(sz)) __idx = (idx); \
typeof(__idx) __sz = (sz); \
unsigned long __mask; \
asm volatile ("cmp %1,%2; sbb %0,%0" \
:"=r" (__mask) \
:ASM_INPUT_G (__sz), \
"r" (__idx) \
:"cc"); \
__mask; })
/* Prevent speculative execution past this barrier. */
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
#define __dma_rmb() barrier()
#define __dma_wmb() barrier()
#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
#define __smp_rmb() dma_rmb()
#define __smp_wmb() barrier()
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
WRITE_ONCE(*p, v); \
} while (0)
#define __smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
/* Atomic operations are already serializing on x86 */
#define __smp_mb__before_atomic() do { } while (0)
#define __smp_mb__after_atomic() do { } while (0)
/* Writing to CR3 provides a full memory barrier in switch_mm(). */
#define smp_mb__after_switch_mm() do { } while (0)
#include <asm-generic/barrier.h>
#endif /* _ASM_X86_BARRIER_H */