linux-zen-desktop/kernel/sched/alt_sched.h

912 lines
22 KiB
C
Raw Normal View History

2023-08-30 17:31:07 +02:00
#ifndef ALT_SCHED_H
#define ALT_SCHED_H
#include <linux/context_tracking.h>
#include <linux/profile.h>
#include <linux/stop_machine.h>
#include <linux/syscalls.h>
#include <linux/tick.h>
#include <trace/events/power.h>
#include <trace/events/sched.h>
#include "../workqueue_internal.h"
#include "cpupri.h"
2023-10-24 12:59:35 +02:00
#define MIN_SCHED_NORMAL_PRIO (32)
/*
* levels: RT(0-24), reserved(25-31), NORMAL(32-63), cpu idle task(64)
*
* -- BMQ --
* NORMAL: (lower boost range 12, NICE_WIDTH 40, higher boost range 12) / 2
* -- PDS --
* NORMAL: SCHED_EDGE_DELTA + ((NICE_WIDTH 40) / 2)
*/
#define SCHED_LEVELS (64 + 1)
2023-08-30 17:31:07 +02:00
#define IDLE_TASK_SCHED_PRIO (SCHED_LEVELS - 1)
#ifdef CONFIG_SCHED_DEBUG
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
extern void resched_latency_warn(int cpu, u64 latency);
#else
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
static inline void resched_latency_warn(int cpu, u64 latency) {}
#endif
/*
* Increase resolution of nice-level calculations for 64-bit architectures.
* The extra resolution improves shares distribution and load balancing of
* low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
* hierarchies, especially on larger systems. This is not a user-visible change
* and does not change the user-interface for setting shares/weights.
*
* We increase resolution only if we have enough bits to allow this increased
* resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
* are pretty high and the returns do not justify the increased costs.
*
* Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
* increase coverage and consistency always enable it on 64-bit platforms.
*/
#ifdef CONFIG_64BIT
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
# define scale_load_down(w) \
({ \
unsigned long __w = (w); \
if (__w) \
__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
__w; \
})
#else
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) (w)
# define scale_load_down(w) (w)
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
/*
* A weight of 0 or 1 can cause arithmetics problems.
* A weight of a cfs_rq is the sum of weights of which entities
* are queued on this cfs_rq, so a weight of a entity should not be
* too large, so as the shares value of a task group.
* (The default weight is 1024 - so there's no practical
* limitation from this.)
*/
#define MIN_SHARES (1UL << 1)
#define MAX_SHARES (1UL << 18)
#endif
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
#ifdef CONFIG_SCHED_DEBUG
# define const_debug __read_mostly
#else
# define const_debug const
#endif
/* task_struct::on_rq states: */
#define TASK_ON_RQ_QUEUED 1
#define TASK_ON_RQ_MIGRATING 2
static inline int task_on_rq_queued(struct task_struct *p)
{
return p->on_rq == TASK_ON_RQ_QUEUED;
}
static inline int task_on_rq_migrating(struct task_struct *p)
{
return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
}
/*
* wake flags
*/
#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* child wakeup after fork */
#define WF_MIGRATED 0x04 /* internal use, task got migrated */
#define SCHED_QUEUE_BITS (SCHED_LEVELS - 1)
struct sched_queue {
DECLARE_BITMAP(bitmap, SCHED_QUEUE_BITS);
struct list_head heads[SCHED_LEVELS];
};
struct rq;
struct cpuidle_state;
struct balance_callback {
struct balance_callback *next;
void (*func)(struct rq *rq);
};
/*
* This is the main, per-CPU runqueue data structure.
* This data should only be modified by the local cpu.
*/
struct rq {
/* runqueue lock: */
2023-10-24 12:59:35 +02:00
raw_spinlock_t lock;
2023-08-30 17:31:07 +02:00
2023-10-24 12:59:35 +02:00
struct task_struct __rcu *curr;
struct task_struct *idle;
struct task_struct *stop;
struct task_struct *skip;
struct mm_struct *prev_mm;
2023-08-30 17:31:07 +02:00
struct sched_queue queue;
#ifdef CONFIG_SCHED_PDS
u64 time_edge;
#endif
unsigned long prio;
/* switch count */
u64 nr_switches;
atomic_t nr_iowait;
#ifdef CONFIG_SCHED_DEBUG
u64 last_seen_need_resched_ns;
int ticks_without_resched;
#endif
#ifdef CONFIG_MEMBARRIER
int membarrier_state;
#endif
#ifdef CONFIG_SMP
int cpu; /* cpu of this runqueue */
bool online;
unsigned int ttwu_pending;
unsigned char nohz_idle_balance;
unsigned char idle_balance;
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
struct sched_avg avg_irq;
#endif
#ifdef CONFIG_SCHED_SMT
int active_balance;
struct cpu_stop_work active_balance_work;
#endif
struct balance_callback *balance_callback;
#ifdef CONFIG_HOTPLUG_CPU
struct rcuwait hotplug_wait;
#endif
unsigned int nr_pinned;
#endif /* CONFIG_SMP */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_PARAVIRT
u64 prev_steal_time;
#endif /* CONFIG_PARAVIRT */
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
u64 prev_steal_time_rq;
#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
/* For genenal cpu load util */
s32 load_history;
u64 load_block;
u64 load_stamp;
/* calc_load related fields */
unsigned long calc_load_update;
long calc_load_active;
2023-10-24 12:59:35 +02:00
/* Ensure that all clocks are in the same cache line */
u64 clock ____cacheline_aligned;
u64 clock_task;
#ifdef CONFIG_SCHED_BMQ
u64 last_ts_switch;
#endif
2023-08-30 17:31:07 +02:00
unsigned int nr_running;
unsigned long nr_uninterruptible;
#ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP
call_single_data_t hrtick_csd;
#endif
struct hrtimer hrtick_timer;
ktime_t hrtick_time;
#endif
#ifdef CONFIG_SCHEDSTATS
/* latency stats */
struct sched_info rq_sched_info;
unsigned long long rq_cpu_time;
/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
/* sys_sched_yield() stats */
unsigned int yld_count;
/* schedule() stats */
unsigned int sched_switch;
unsigned int sched_count;
unsigned int sched_goidle;
/* try_to_wake_up() stats */
unsigned int ttwu_count;
unsigned int ttwu_local;
#endif /* CONFIG_SCHEDSTATS */
#ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state;
#endif
#ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
call_single_data_t nohz_csd;
#endif
atomic_t nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */
/* Scratch cpumask to be temporarily used under rq_lock */
cpumask_var_t scratch_mask;
};
extern unsigned long rq_load_util(struct rq *rq, unsigned long max);
extern unsigned long calc_load_update;
extern atomic_long_t calc_load_tasks;
extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() this_cpu_ptr(&runqueues)
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
#ifdef CONFIG_SMP
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
void register_sched_domain_sysctl(void);
void unregister_sched_domain_sysctl(void);
#else
static inline void register_sched_domain_sysctl(void)
{
}
static inline void unregister_sched_domain_sysctl(void)
{
}
#endif
extern bool sched_smp_initialized;
enum {
ITSELF_LEVEL_SPACE_HOLDER,
#ifdef CONFIG_SCHED_SMT
SMT_LEVEL_SPACE_HOLDER,
#endif
COREGROUP_LEVEL_SPACE_HOLDER,
CORE_LEVEL_SPACE_HOLDER,
OTHER_LEVEL_SPACE_HOLDER,
NR_CPU_AFFINITY_LEVELS
};
DECLARE_PER_CPU_ALIGNED(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks);
static inline int
__best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask)
{
int cpu;
while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
mask++;
return cpu;
}
static inline int best_mask_cpu(int cpu, const cpumask_t *mask)
{
return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu));
}
extern void flush_smp_call_function_queue(void);
#else /* !CONFIG_SMP */
static inline void flush_smp_call_function_queue(void) { }
#endif
#ifndef arch_scale_freq_tick
static __always_inline
void arch_scale_freq_tick(void)
{
}
#endif
#ifndef arch_scale_freq_capacity
static __always_inline
unsigned long arch_scale_freq_capacity(int cpu)
{
return SCHED_CAPACITY_SCALE;
}
#endif
static inline u64 __rq_clock_broken(struct rq *rq)
{
return READ_ONCE(rq->clock);
}
static inline u64 rq_clock(struct rq *rq)
{
/*
* Relax lockdep_assert_held() checking as in VRQ, call to
* sched_info_xxxx() may not held rq->lock
* lockdep_assert_held(&rq->lock);
*/
return rq->clock;
}
static inline u64 rq_clock_task(struct rq *rq)
{
/*
* Relax lockdep_assert_held() checking as in VRQ, call to
* sched_info_xxxx() may not held rq->lock
* lockdep_assert_held(&rq->lock);
*/
return rq->clock_task;
}
/*
* {de,en}queue flags:
*
* DEQUEUE_SLEEP - task is no longer runnable
* ENQUEUE_WAKEUP - task just became runnable
*
*/
#define DEQUEUE_SLEEP 0x01
#define ENQUEUE_WAKEUP 0x01
/*
* Below are scheduler API which using in other kernel code
* It use the dummy rq_flags
* ToDo : BMQ need to support these APIs for compatibility with mainline
* scheduler code.
*/
struct rq_flags {
unsigned long flags;
};
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(rq->lock);
struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(p->pi_lock)
__acquires(rq->lock);
static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
raw_spin_unlock(&rq->lock);
}
static inline void
task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
__releases(rq->lock)
__releases(p->pi_lock)
{
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
}
static inline void
rq_lock(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_lock(&rq->lock);
}
static inline void
rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
raw_spin_unlock(&rq->lock);
}
static inline void
rq_lock_irq(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_lock_irq(&rq->lock);
}
static inline void
rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
raw_spin_unlock_irq(&rq->lock);
}
static inline struct rq *
this_rq_lock_irq(struct rq_flags *rf)
__acquires(rq->lock)
{
struct rq *rq;
local_irq_disable();
rq = this_rq();
raw_spin_lock(&rq->lock);
return rq;
}
static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
{
return &rq->lock;
}
static inline raw_spinlock_t *rq_lockp(struct rq *rq)
{
return __rq_lockp(rq);
}
static inline void lockdep_assert_rq_held(struct rq *rq)
{
lockdep_assert_held(__rq_lockp(rq));
}
extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
extern void raw_spin_rq_unlock(struct rq *rq);
static inline void raw_spin_rq_lock(struct rq *rq)
{
raw_spin_rq_lock_nested(rq, 0);
}
static inline void raw_spin_rq_lock_irq(struct rq *rq)
{
local_irq_disable();
raw_spin_rq_lock(rq);
}
static inline void raw_spin_rq_unlock_irq(struct rq *rq)
{
raw_spin_rq_unlock(rq);
local_irq_enable();
}
static inline int task_current(struct rq *rq, struct task_struct *p)
{
return rq->curr == p;
}
static inline bool task_on_cpu(struct task_struct *p)
{
return p->on_cpu;
}
extern int task_running_nice(struct task_struct *p);
extern struct static_key_false sched_schedstats;
#ifdef CONFIG_CPU_IDLE
static inline void idle_set_state(struct rq *rq,
struct cpuidle_state *idle_state)
{
rq->idle_state = idle_state;
}
static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{
WARN_ON(!rcu_read_lock_held());
return rq->idle_state;
}
#else
static inline void idle_set_state(struct rq *rq,
struct cpuidle_state *idle_state)
{
}
static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{
return NULL;
}
#endif
static inline int cpu_of(const struct rq *rq)
{
#ifdef CONFIG_SMP
return rq->cpu;
#else
return 0;
#endif
}
#include "stats.h"
#ifdef CONFIG_NO_HZ_COMMON
#define NOHZ_BALANCE_KICK_BIT 0
#define NOHZ_STATS_KICK_BIT 1
#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
/* TODO: needed?
extern void nohz_balance_exit_idle(struct rq *rq);
#else
static inline void nohz_balance_exit_idle(struct rq *rq) { }
*/
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
u64 total;
u64 tick_delta;
u64 irq_start_time;
struct u64_stats_sync sync;
};
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
/*
* Returns the irqtime minus the softirq time computed by ksoftirqd.
* Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
* and never move forward.
*/
static inline u64 irq_time_read(int cpu)
{
struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
unsigned int seq;
u64 total;
do {
seq = __u64_stats_fetch_begin(&irqtime->sync);
total = irqtime->total;
} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
return total;
}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
#endif /* CONFIG_CPU_FREQ */
#ifdef CONFIG_NO_HZ_FULL
extern int __init sched_tick_offload_init(void);
#else
static inline int sched_tick_offload_init(void) { return 0; }
#endif
#ifdef arch_scale_freq_capacity
#ifndef arch_scale_freq_invariant
#define arch_scale_freq_invariant() (true)
#endif
#else /* arch_scale_freq_capacity */
#define arch_scale_freq_invariant() (false)
#endif
extern void schedule_idle(void);
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
/*
* !! For sched_setattr_nocheck() (kernel) only !!
*
* This is actually gross. :(
*
* It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
* tasks, but still be able to sleep. We need this on platforms that cannot
* atomically change clock frequency. Remove once fast switching will be
* available on such platforms.
*
* SUGOV stands for SchedUtil GOVernor.
*/
#define SCHED_FLAG_SUGOV 0x10000000
#ifdef CONFIG_MEMBARRIER
/*
* The scheduler provides memory barriers required by membarrier between:
* - prior user-space memory accesses and store to rq->membarrier_state,
* - store to rq->membarrier_state and following user-space memory accesses.
* In the same way it provides those guarantees around store to rq->curr.
*/
static inline void membarrier_switch_mm(struct rq *rq,
struct mm_struct *prev_mm,
struct mm_struct *next_mm)
{
int membarrier_state;
if (prev_mm == next_mm)
return;
membarrier_state = atomic_read(&next_mm->membarrier_state);
if (READ_ONCE(rq->membarrier_state) == membarrier_state)
return;
WRITE_ONCE(rq->membarrier_state, membarrier_state);
}
#else
static inline void membarrier_switch_mm(struct rq *rq,
struct mm_struct *prev_mm,
struct mm_struct *next_mm)
{
}
#endif
#ifdef CONFIG_NUMA
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
#else
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
{
return nr_cpu_ids;
}
#endif
extern void swake_up_all_locked(struct swait_queue_head *q);
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
#ifdef CONFIG_PREEMPT_DYNAMIC
extern int preempt_dynamic_mode;
extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif
static inline void nohz_run_idle_balance(int cpu) { }
static inline
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
struct task_struct *p)
{
return util;
}
static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
#ifdef CONFIG_SCHED_MM_CID
2023-10-24 12:59:35 +02:00
#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
#define MM_CID_SCAN_DELAY 100 /* 100ms */
extern raw_spinlock_t cid_lock;
extern int use_cid_lock;
extern void sched_mm_cid_migrate_from(struct task_struct *t);
extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t, int src_cpu);
extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
extern void init_sched_mm_cid(struct task_struct *t);
static inline void __mm_cid_put(struct mm_struct *mm, int cid)
{
if (cid < 0)
return;
cpumask_clear_cpu(cid, mm_cidmask(mm));
}
/*
* The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
* the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
* be held to transition to other states.
*
* State transitions synchronized with cmpxchg or try_cmpxchg need to be
* consistent across cpus, which prevents use of this_cpu_cmpxchg.
*/
static inline void mm_cid_put_lazy(struct task_struct *t)
{
struct mm_struct *mm = t->mm;
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
int cid;
lockdep_assert_irqs_disabled();
cid = __this_cpu_read(pcpu_cid->cid);
if (!mm_cid_is_lazy_put(cid) ||
!try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
return;
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
}
static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
{
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
int cid, res;
lockdep_assert_irqs_disabled();
cid = __this_cpu_read(pcpu_cid->cid);
for (;;) {
if (mm_cid_is_unset(cid))
return MM_CID_UNSET;
/*
* Attempt transition from valid or lazy-put to unset.
*/
res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
if (res == cid)
break;
cid = res;
}
return cid;
}
static inline void mm_cid_put(struct mm_struct *mm)
{
int cid;
lockdep_assert_irqs_disabled();
cid = mm_cid_pcpu_unset(mm);
if (cid == MM_CID_UNSET)
return;
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
}
static inline int __mm_cid_try_get(struct mm_struct *mm)
2023-08-30 17:31:07 +02:00
{
struct cpumask *cpumask;
int cid;
cpumask = mm_cidmask(mm);
2023-10-24 12:59:35 +02:00
/*
* Retry finding first zero bit if the mask is temporarily
* filled. This only happens during concurrent remote-clear
* which owns a cid without holding a rq lock.
*/
for (;;) {
cid = cpumask_first_zero(cpumask);
if (cid < nr_cpu_ids)
break;
cpu_relax();
}
if (cpumask_test_and_set_cpu(cid, cpumask))
2023-08-30 17:31:07 +02:00
return -1;
return cid;
}
2023-10-24 12:59:35 +02:00
/*
* Save a snapshot of the current runqueue time of this cpu
* with the per-cpu cid value, allowing to estimate how recently it was used.
*/
static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
2023-08-30 17:31:07 +02:00
{
2023-10-24 12:59:35 +02:00
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
lockdep_assert_rq_held(rq);
WRITE_ONCE(pcpu_cid->time, rq->clock);
2023-08-30 17:31:07 +02:00
}
2023-10-24 12:59:35 +02:00
static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
2023-08-30 17:31:07 +02:00
{
2023-10-24 12:59:35 +02:00
int cid;
2023-08-30 17:31:07 +02:00
2023-10-24 12:59:35 +02:00
/*
* All allocations (even those using the cid_lock) are lock-free. If
* use_cid_lock is set, hold the cid_lock to perform cid allocation to
* guarantee forward progress.
*/
if (!READ_ONCE(use_cid_lock)) {
cid = __mm_cid_try_get(mm);
if (cid >= 0)
goto end;
raw_spin_lock(&cid_lock);
} else {
raw_spin_lock(&cid_lock);
cid = __mm_cid_try_get(mm);
if (cid >= 0)
goto unlock;
}
/*
* cid concurrently allocated. Retry while forcing following
* allocations to use the cid_lock to ensure forward progress.
*/
WRITE_ONCE(use_cid_lock, 1);
/*
* Set use_cid_lock before allocation. Only care about program order
* because this is only required for forward progress.
*/
barrier();
/*
* Retry until it succeeds. It is guaranteed to eventually succeed once
* all newcoming allocations observe the use_cid_lock flag set.
*/
do {
cid = __mm_cid_try_get(mm);
cpu_relax();
} while (cid < 0);
/*
* Allocate before clearing use_cid_lock. Only care about
* program order because this is for forward progress.
*/
barrier();
WRITE_ONCE(use_cid_lock, 0);
unlock:
raw_spin_unlock(&cid_lock);
end:
mm_cid_snapshot_time(rq, mm);
return cid;
}
static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
{
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
struct cpumask *cpumask;
int cid;
lockdep_assert_rq_held(rq);
cpumask = mm_cidmask(mm);
cid = __this_cpu_read(pcpu_cid->cid);
if (mm_cid_is_valid(cid)) {
mm_cid_snapshot_time(rq, mm);
return cid;
}
if (mm_cid_is_lazy_put(cid)) {
if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
}
cid = __mm_cid_get(rq, mm);
__this_cpu_write(pcpu_cid->cid, cid);
return cid;
2023-08-30 17:31:07 +02:00
}
2023-10-24 12:59:35 +02:00
static inline void switch_mm_cid(struct rq *rq,
struct task_struct *prev,
struct task_struct *next)
2023-08-30 17:31:07 +02:00
{
2023-10-24 12:59:35 +02:00
/*
* Provide a memory barrier between rq->curr store and load of
* {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
*
* Should be adapted if context_switch() is modified.
*/
if (!next->mm) { // to kernel
/*
* user -> kernel transition does not guarantee a barrier, but
* we can use the fact that it performs an atomic operation in
* mmgrab().
*/
if (prev->mm) // from user
smp_mb__after_mmgrab();
/*
* kernel -> kernel transition does not change rq->curr->mm
* state. It stays NULL.
*/
} else { // to user
/*
* kernel -> user transition does not provide a barrier
* between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
* Provide it here.
*/
if (!prev->mm) // from kernel
smp_mb();
/*
* user -> user transition guarantees a memory barrier through
* switch_mm() when current->mm changes. If current->mm is
* unchanged, no barrier is needed.
*/
}
2023-08-30 17:31:07 +02:00
if (prev->mm_cid_active) {
2023-10-24 12:59:35 +02:00
mm_cid_snapshot_time(rq, prev->mm);
mm_cid_put_lazy(prev);
2023-08-30 17:31:07 +02:00
prev->mm_cid = -1;
}
if (next->mm_cid_active)
2023-10-24 12:59:35 +02:00
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
2023-08-30 17:31:07 +02:00
}
#else
2023-10-24 12:59:35 +02:00
static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t, int src_cpu) { }
static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
static inline void init_sched_mm_cid(struct task_struct *t) { }
2023-08-30 17:31:07 +02:00
#endif
#endif /* ALT_SCHED_H */