209 lines
4.7 KiB
C
209 lines
4.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2016 Red Hat, Inc.
|
|
* Author: Michael S. Tsirkin <mst@redhat.com>
|
|
*
|
|
* Common macros and functions for ring benchmarking.
|
|
*/
|
|
#ifndef MAIN_H
|
|
#define MAIN_H
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
|
|
extern int param;
|
|
|
|
extern bool do_exit;
|
|
|
|
#if defined(__x86_64__) || defined(__i386__)
|
|
#include "x86intrin.h"
|
|
|
|
static inline void wait_cycles(unsigned long long cycles)
|
|
{
|
|
unsigned long long t;
|
|
|
|
t = __rdtsc();
|
|
while (__rdtsc() - t < cycles) {}
|
|
}
|
|
|
|
#define VMEXIT_CYCLES 500
|
|
#define VMENTRY_CYCLES 500
|
|
|
|
#elif defined(__s390x__)
|
|
static inline void wait_cycles(unsigned long long cycles)
|
|
{
|
|
asm volatile("0: brctg %0,0b" : : "d" (cycles));
|
|
}
|
|
|
|
/* tweak me */
|
|
#define VMEXIT_CYCLES 200
|
|
#define VMENTRY_CYCLES 200
|
|
|
|
#else
|
|
static inline void wait_cycles(unsigned long long cycles)
|
|
{
|
|
_Exit(5);
|
|
}
|
|
#define VMEXIT_CYCLES 0
|
|
#define VMENTRY_CYCLES 0
|
|
#endif
|
|
|
|
static inline void vmexit(void)
|
|
{
|
|
if (!do_exit)
|
|
return;
|
|
|
|
wait_cycles(VMEXIT_CYCLES);
|
|
}
|
|
static inline void vmentry(void)
|
|
{
|
|
if (!do_exit)
|
|
return;
|
|
|
|
wait_cycles(VMENTRY_CYCLES);
|
|
}
|
|
|
|
/* implemented by ring */
|
|
void alloc_ring(void);
|
|
/* guest side */
|
|
int add_inbuf(unsigned, void *, void *);
|
|
void *get_buf(unsigned *, void **);
|
|
void disable_call();
|
|
bool used_empty();
|
|
bool enable_call();
|
|
void kick_available();
|
|
/* host side */
|
|
void disable_kick();
|
|
bool avail_empty();
|
|
bool enable_kick();
|
|
bool use_buf(unsigned *, void **);
|
|
void call_used();
|
|
|
|
/* implemented by main */
|
|
extern bool do_sleep;
|
|
void kick(void);
|
|
void wait_for_kick(void);
|
|
void call(void);
|
|
void wait_for_call(void);
|
|
|
|
extern unsigned ring_size;
|
|
|
|
/* Compiler barrier - similar to what Linux uses */
|
|
#define barrier() asm volatile("" ::: "memory")
|
|
|
|
/* Is there a portable way to do this? */
|
|
#if defined(__x86_64__) || defined(__i386__)
|
|
#define cpu_relax() asm ("rep; nop" ::: "memory")
|
|
#elif defined(__s390x__)
|
|
#define cpu_relax() barrier()
|
|
#elif defined(__aarch64__)
|
|
#define cpu_relax() asm ("yield" ::: "memory")
|
|
#else
|
|
#define cpu_relax() assert(0)
|
|
#endif
|
|
|
|
extern bool do_relax;
|
|
|
|
static inline void busy_wait(void)
|
|
{
|
|
if (do_relax)
|
|
cpu_relax();
|
|
else
|
|
/* prevent compiler from removing busy loops */
|
|
barrier();
|
|
}
|
|
|
|
#if defined(__x86_64__) || defined(__i386__)
|
|
#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
|
|
#elif defined(__aarch64__)
|
|
#define smp_mb() asm volatile("dmb ish" ::: "memory")
|
|
#else
|
|
/*
|
|
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
|
|
* with other __ATOMIC_SEQ_CST calls.
|
|
*/
|
|
#define smp_mb() __sync_synchronize()
|
|
#endif
|
|
|
|
/*
|
|
* This abuses the atomic builtins for thread fences, and
|
|
* adds a compiler barrier.
|
|
*/
|
|
#define smp_release() do { \
|
|
barrier(); \
|
|
__atomic_thread_fence(__ATOMIC_RELEASE); \
|
|
} while (0)
|
|
|
|
#define smp_acquire() do { \
|
|
__atomic_thread_fence(__ATOMIC_ACQUIRE); \
|
|
barrier(); \
|
|
} while (0)
|
|
|
|
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
|
|
#define smp_wmb() barrier()
|
|
#elif defined(__aarch64__)
|
|
#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
|
|
#else
|
|
#define smp_wmb() smp_release()
|
|
#endif
|
|
|
|
#ifndef __always_inline
|
|
#define __always_inline inline __attribute__((always_inline))
|
|
#endif
|
|
|
|
static __always_inline
|
|
void __read_once_size(const volatile void *p, void *res, int size)
|
|
{
|
|
switch (size) {
|
|
case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;
|
|
case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;
|
|
case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;
|
|
case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;
|
|
default:
|
|
barrier();
|
|
__builtin_memcpy((void *)res, (const void *)p, size);
|
|
barrier();
|
|
}
|
|
}
|
|
|
|
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
|
|
{
|
|
switch (size) {
|
|
case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
|
|
case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
|
|
case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
|
|
case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
|
|
default:
|
|
barrier();
|
|
__builtin_memcpy((void *)p, (const void *)res, size);
|
|
barrier();
|
|
}
|
|
}
|
|
|
|
#ifdef __alpha__
|
|
#define READ_ONCE(x) \
|
|
({ \
|
|
union { typeof(x) __val; char __c[1]; } __u; \
|
|
__read_once_size(&(x), __u.__c, sizeof(x)); \
|
|
smp_mb(); /* Enforce dependency ordering from x */ \
|
|
__u.__val; \
|
|
})
|
|
#else
|
|
#define READ_ONCE(x) \
|
|
({ \
|
|
union { typeof(x) __val; char __c[1]; } __u; \
|
|
__read_once_size(&(x), __u.__c, sizeof(x)); \
|
|
__u.__val; \
|
|
})
|
|
#endif
|
|
|
|
#define WRITE_ONCE(x, val) \
|
|
({ \
|
|
union { typeof(x) __val; char __c[1]; } __u = \
|
|
{ .__val = (typeof(x)) (val) }; \
|
|
__write_once_size(&(x), __u.__c, sizeof(x)); \
|
|
__u.__val; \
|
|
})
|
|
|
|
#endif
|