912 lines
22 KiB
C
912 lines
22 KiB
C
#ifndef ALT_SCHED_H
|
|
#define ALT_SCHED_H
|
|
|
|
#include <linux/context_tracking.h>
|
|
#include <linux/profile.h>
|
|
#include <linux/stop_machine.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/tick.h>
|
|
|
|
#include <trace/events/power.h>
|
|
#include <trace/events/sched.h>
|
|
|
|
#include "../workqueue_internal.h"
|
|
|
|
#include "cpupri.h"
|
|
|
|
#define MIN_SCHED_NORMAL_PRIO (32)
|
|
/*
|
|
* levels: RT(0-24), reserved(25-31), NORMAL(32-63), cpu idle task(64)
|
|
*
|
|
* -- BMQ --
|
|
* NORMAL: (lower boost range 12, NICE_WIDTH 40, higher boost range 12) / 2
|
|
* -- PDS --
|
|
* NORMAL: SCHED_EDGE_DELTA + ((NICE_WIDTH 40) / 2)
|
|
*/
|
|
#define SCHED_LEVELS (64 + 1)
|
|
|
|
#define IDLE_TASK_SCHED_PRIO (SCHED_LEVELS - 1)
|
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
|
|
extern void resched_latency_warn(int cpu, u64 latency);
|
|
#else
|
|
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
|
|
static inline void resched_latency_warn(int cpu, u64 latency) {}
|
|
#endif
|
|
|
|
/*
|
|
* Increase resolution of nice-level calculations for 64-bit architectures.
|
|
* The extra resolution improves shares distribution and load balancing of
|
|
* low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
|
|
* hierarchies, especially on larger systems. This is not a user-visible change
|
|
* and does not change the user-interface for setting shares/weights.
|
|
*
|
|
* We increase resolution only if we have enough bits to allow this increased
|
|
* resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
|
|
* are pretty high and the returns do not justify the increased costs.
|
|
*
|
|
* Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
|
|
* increase coverage and consistency always enable it on 64-bit platforms.
|
|
*/
|
|
#ifdef CONFIG_64BIT
|
|
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
|
|
# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
|
|
# define scale_load_down(w) \
|
|
({ \
|
|
unsigned long __w = (w); \
|
|
if (__w) \
|
|
__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
|
|
__w; \
|
|
})
|
|
#else
|
|
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
|
|
# define scale_load(w) (w)
|
|
# define scale_load_down(w) (w)
|
|
#endif
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
|
|
|
|
/*
|
|
* A weight of 0 or 1 can cause arithmetics problems.
|
|
* A weight of a cfs_rq is the sum of weights of which entities
|
|
* are queued on this cfs_rq, so a weight of a entity should not be
|
|
* too large, so as the shares value of a task group.
|
|
* (The default weight is 1024 - so there's no practical
|
|
* limitation from this.)
|
|
*/
|
|
#define MIN_SHARES (1UL << 1)
|
|
#define MAX_SHARES (1UL << 18)
|
|
#endif
|
|
|
|
/*
|
|
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
|
|
*/
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
# define const_debug __read_mostly
|
|
#else
|
|
# define const_debug const
|
|
#endif
|
|
|
|
/* task_struct::on_rq states: */
|
|
#define TASK_ON_RQ_QUEUED 1
|
|
#define TASK_ON_RQ_MIGRATING 2
|
|
|
|
static inline int task_on_rq_queued(struct task_struct *p)
|
|
{
|
|
return p->on_rq == TASK_ON_RQ_QUEUED;
|
|
}
|
|
|
|
static inline int task_on_rq_migrating(struct task_struct *p)
|
|
{
|
|
return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
|
|
}
|
|
|
|
/*
|
|
* wake flags
|
|
*/
|
|
#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
|
|
#define WF_FORK 0x02 /* child wakeup after fork */
|
|
#define WF_MIGRATED 0x04 /* internal use, task got migrated */
|
|
|
|
#define SCHED_QUEUE_BITS (SCHED_LEVELS - 1)
|
|
|
|
struct sched_queue {
|
|
DECLARE_BITMAP(bitmap, SCHED_QUEUE_BITS);
|
|
struct list_head heads[SCHED_LEVELS];
|
|
};
|
|
|
|
struct rq;
|
|
struct cpuidle_state;
|
|
|
|
struct balance_callback {
|
|
struct balance_callback *next;
|
|
void (*func)(struct rq *rq);
|
|
};
|
|
|
|
/*
|
|
* This is the main, per-CPU runqueue data structure.
|
|
* This data should only be modified by the local cpu.
|
|
*/
|
|
struct rq {
|
|
/* runqueue lock: */
|
|
raw_spinlock_t lock;
|
|
|
|
struct task_struct __rcu *curr;
|
|
struct task_struct *idle;
|
|
struct task_struct *stop;
|
|
struct task_struct *skip;
|
|
struct mm_struct *prev_mm;
|
|
|
|
struct sched_queue queue;
|
|
#ifdef CONFIG_SCHED_PDS
|
|
u64 time_edge;
|
|
#endif
|
|
unsigned long prio;
|
|
|
|
/* switch count */
|
|
u64 nr_switches;
|
|
|
|
atomic_t nr_iowait;
|
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
u64 last_seen_need_resched_ns;
|
|
int ticks_without_resched;
|
|
#endif
|
|
|
|
#ifdef CONFIG_MEMBARRIER
|
|
int membarrier_state;
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
int cpu; /* cpu of this runqueue */
|
|
bool online;
|
|
|
|
unsigned int ttwu_pending;
|
|
unsigned char nohz_idle_balance;
|
|
unsigned char idle_balance;
|
|
|
|
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
|
struct sched_avg avg_irq;
|
|
#endif
|
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
int active_balance;
|
|
struct cpu_stop_work active_balance_work;
|
|
#endif
|
|
struct balance_callback *balance_callback;
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
struct rcuwait hotplug_wait;
|
|
#endif
|
|
unsigned int nr_pinned;
|
|
|
|
#endif /* CONFIG_SMP */
|
|
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
|
u64 prev_irq_time;
|
|
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
|
|
#ifdef CONFIG_PARAVIRT
|
|
u64 prev_steal_time;
|
|
#endif /* CONFIG_PARAVIRT */
|
|
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
|
|
u64 prev_steal_time_rq;
|
|
#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
|
|
|
|
/* For genenal cpu load util */
|
|
s32 load_history;
|
|
u64 load_block;
|
|
u64 load_stamp;
|
|
|
|
/* calc_load related fields */
|
|
unsigned long calc_load_update;
|
|
long calc_load_active;
|
|
|
|
/* Ensure that all clocks are in the same cache line */
|
|
u64 clock ____cacheline_aligned;
|
|
u64 clock_task;
|
|
#ifdef CONFIG_SCHED_BMQ
|
|
u64 last_ts_switch;
|
|
#endif
|
|
|
|
unsigned int nr_running;
|
|
unsigned long nr_uninterruptible;
|
|
|
|
#ifdef CONFIG_SCHED_HRTICK
|
|
#ifdef CONFIG_SMP
|
|
call_single_data_t hrtick_csd;
|
|
#endif
|
|
struct hrtimer hrtick_timer;
|
|
ktime_t hrtick_time;
|
|
#endif
|
|
|
|
#ifdef CONFIG_SCHEDSTATS
|
|
|
|
/* latency stats */
|
|
struct sched_info rq_sched_info;
|
|
unsigned long long rq_cpu_time;
|
|
/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
|
|
|
|
/* sys_sched_yield() stats */
|
|
unsigned int yld_count;
|
|
|
|
/* schedule() stats */
|
|
unsigned int sched_switch;
|
|
unsigned int sched_count;
|
|
unsigned int sched_goidle;
|
|
|
|
/* try_to_wake_up() stats */
|
|
unsigned int ttwu_count;
|
|
unsigned int ttwu_local;
|
|
#endif /* CONFIG_SCHEDSTATS */
|
|
|
|
#ifdef CONFIG_CPU_IDLE
|
|
/* Must be inspected within a rcu lock section */
|
|
struct cpuidle_state *idle_state;
|
|
#endif
|
|
|
|
#ifdef CONFIG_NO_HZ_COMMON
|
|
#ifdef CONFIG_SMP
|
|
call_single_data_t nohz_csd;
|
|
#endif
|
|
atomic_t nohz_flags;
|
|
#endif /* CONFIG_NO_HZ_COMMON */
|
|
|
|
/* Scratch cpumask to be temporarily used under rq_lock */
|
|
cpumask_var_t scratch_mask;
|
|
};
|
|
|
|
extern unsigned long rq_load_util(struct rq *rq, unsigned long max);
|
|
|
|
extern unsigned long calc_load_update;
|
|
extern atomic_long_t calc_load_tasks;
|
|
|
|
extern void calc_global_load_tick(struct rq *this_rq);
|
|
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
|
|
|
|
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
|
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
|
|
#define this_rq() this_cpu_ptr(&runqueues)
|
|
#define task_rq(p) cpu_rq(task_cpu(p))
|
|
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
|
|
#define raw_rq() raw_cpu_ptr(&runqueues)
|
|
|
|
#ifdef CONFIG_SMP
|
|
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
|
void register_sched_domain_sysctl(void);
|
|
void unregister_sched_domain_sysctl(void);
|
|
#else
|
|
static inline void register_sched_domain_sysctl(void)
|
|
{
|
|
}
|
|
static inline void unregister_sched_domain_sysctl(void)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
extern bool sched_smp_initialized;
|
|
|
|
enum {
|
|
ITSELF_LEVEL_SPACE_HOLDER,
|
|
#ifdef CONFIG_SCHED_SMT
|
|
SMT_LEVEL_SPACE_HOLDER,
|
|
#endif
|
|
COREGROUP_LEVEL_SPACE_HOLDER,
|
|
CORE_LEVEL_SPACE_HOLDER,
|
|
OTHER_LEVEL_SPACE_HOLDER,
|
|
NR_CPU_AFFINITY_LEVELS
|
|
};
|
|
|
|
DECLARE_PER_CPU_ALIGNED(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks);
|
|
|
|
static inline int
|
|
__best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask)
|
|
{
|
|
int cpu;
|
|
|
|
while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
|
|
mask++;
|
|
|
|
return cpu;
|
|
}
|
|
|
|
static inline int best_mask_cpu(int cpu, const cpumask_t *mask)
|
|
{
|
|
return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu));
|
|
}
|
|
|
|
extern void flush_smp_call_function_queue(void);
|
|
|
|
#else /* !CONFIG_SMP */
|
|
static inline void flush_smp_call_function_queue(void) { }
|
|
#endif
|
|
|
|
#ifndef arch_scale_freq_tick
|
|
static __always_inline
|
|
void arch_scale_freq_tick(void)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifndef arch_scale_freq_capacity
|
|
static __always_inline
|
|
unsigned long arch_scale_freq_capacity(int cpu)
|
|
{
|
|
return SCHED_CAPACITY_SCALE;
|
|
}
|
|
#endif
|
|
|
|
static inline u64 __rq_clock_broken(struct rq *rq)
|
|
{
|
|
return READ_ONCE(rq->clock);
|
|
}
|
|
|
|
static inline u64 rq_clock(struct rq *rq)
|
|
{
|
|
/*
|
|
* Relax lockdep_assert_held() checking as in VRQ, call to
|
|
* sched_info_xxxx() may not held rq->lock
|
|
* lockdep_assert_held(&rq->lock);
|
|
*/
|
|
return rq->clock;
|
|
}
|
|
|
|
static inline u64 rq_clock_task(struct rq *rq)
|
|
{
|
|
/*
|
|
* Relax lockdep_assert_held() checking as in VRQ, call to
|
|
* sched_info_xxxx() may not held rq->lock
|
|
* lockdep_assert_held(&rq->lock);
|
|
*/
|
|
return rq->clock_task;
|
|
}
|
|
|
|
/*
|
|
* {de,en}queue flags:
|
|
*
|
|
* DEQUEUE_SLEEP - task is no longer runnable
|
|
* ENQUEUE_WAKEUP - task just became runnable
|
|
*
|
|
*/
|
|
|
|
#define DEQUEUE_SLEEP 0x01
|
|
|
|
#define ENQUEUE_WAKEUP 0x01
|
|
|
|
|
|
/*
|
|
* Below are scheduler API which using in other kernel code
|
|
* It use the dummy rq_flags
|
|
* ToDo : BMQ need to support these APIs for compatibility with mainline
|
|
* scheduler code.
|
|
*/
|
|
struct rq_flags {
|
|
unsigned long flags;
|
|
};
|
|
|
|
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
|
__acquires(rq->lock);
|
|
|
|
struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
|
__acquires(p->pi_lock)
|
|
__acquires(rq->lock);
|
|
|
|
static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
|
|
__releases(rq->lock)
|
|
{
|
|
raw_spin_unlock(&rq->lock);
|
|
}
|
|
|
|
static inline void
|
|
task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
|
|
__releases(rq->lock)
|
|
__releases(p->pi_lock)
|
|
{
|
|
raw_spin_unlock(&rq->lock);
|
|
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
|
|
}
|
|
|
|
static inline void
|
|
rq_lock(struct rq *rq, struct rq_flags *rf)
|
|
__acquires(rq->lock)
|
|
{
|
|
raw_spin_lock(&rq->lock);
|
|
}
|
|
|
|
static inline void
|
|
rq_unlock(struct rq *rq, struct rq_flags *rf)
|
|
__releases(rq->lock)
|
|
{
|
|
raw_spin_unlock(&rq->lock);
|
|
}
|
|
|
|
static inline void
|
|
rq_lock_irq(struct rq *rq, struct rq_flags *rf)
|
|
__acquires(rq->lock)
|
|
{
|
|
raw_spin_lock_irq(&rq->lock);
|
|
}
|
|
|
|
static inline void
|
|
rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
|
|
__releases(rq->lock)
|
|
{
|
|
raw_spin_unlock_irq(&rq->lock);
|
|
}
|
|
|
|
static inline struct rq *
|
|
this_rq_lock_irq(struct rq_flags *rf)
|
|
__acquires(rq->lock)
|
|
{
|
|
struct rq *rq;
|
|
|
|
local_irq_disable();
|
|
rq = this_rq();
|
|
raw_spin_lock(&rq->lock);
|
|
|
|
return rq;
|
|
}
|
|
|
|
static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
|
|
{
|
|
return &rq->lock;
|
|
}
|
|
|
|
static inline raw_spinlock_t *rq_lockp(struct rq *rq)
|
|
{
|
|
return __rq_lockp(rq);
|
|
}
|
|
|
|
static inline void lockdep_assert_rq_held(struct rq *rq)
|
|
{
|
|
lockdep_assert_held(__rq_lockp(rq));
|
|
}
|
|
|
|
extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
|
|
extern void raw_spin_rq_unlock(struct rq *rq);
|
|
|
|
static inline void raw_spin_rq_lock(struct rq *rq)
|
|
{
|
|
raw_spin_rq_lock_nested(rq, 0);
|
|
}
|
|
|
|
static inline void raw_spin_rq_lock_irq(struct rq *rq)
|
|
{
|
|
local_irq_disable();
|
|
raw_spin_rq_lock(rq);
|
|
}
|
|
|
|
static inline void raw_spin_rq_unlock_irq(struct rq *rq)
|
|
{
|
|
raw_spin_rq_unlock(rq);
|
|
local_irq_enable();
|
|
}
|
|
|
|
static inline int task_current(struct rq *rq, struct task_struct *p)
|
|
{
|
|
return rq->curr == p;
|
|
}
|
|
|
|
static inline bool task_on_cpu(struct task_struct *p)
|
|
{
|
|
return p->on_cpu;
|
|
}
|
|
|
|
extern int task_running_nice(struct task_struct *p);
|
|
|
|
extern struct static_key_false sched_schedstats;
|
|
|
|
#ifdef CONFIG_CPU_IDLE
|
|
static inline void idle_set_state(struct rq *rq,
|
|
struct cpuidle_state *idle_state)
|
|
{
|
|
rq->idle_state = idle_state;
|
|
}
|
|
|
|
static inline struct cpuidle_state *idle_get_state(struct rq *rq)
|
|
{
|
|
WARN_ON(!rcu_read_lock_held());
|
|
return rq->idle_state;
|
|
}
|
|
#else
|
|
static inline void idle_set_state(struct rq *rq,
|
|
struct cpuidle_state *idle_state)
|
|
{
|
|
}
|
|
|
|
static inline struct cpuidle_state *idle_get_state(struct rq *rq)
|
|
{
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
static inline int cpu_of(const struct rq *rq)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
return rq->cpu;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
#include "stats.h"
|
|
|
|
#ifdef CONFIG_NO_HZ_COMMON
|
|
#define NOHZ_BALANCE_KICK_BIT 0
|
|
#define NOHZ_STATS_KICK_BIT 1
|
|
|
|
#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
|
|
#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
|
|
|
|
#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
|
|
|
|
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
|
|
|
|
/* TODO: needed?
|
|
extern void nohz_balance_exit_idle(struct rq *rq);
|
|
#else
|
|
static inline void nohz_balance_exit_idle(struct rq *rq) { }
|
|
*/
|
|
#endif
|
|
|
|
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
|
struct irqtime {
|
|
u64 total;
|
|
u64 tick_delta;
|
|
u64 irq_start_time;
|
|
struct u64_stats_sync sync;
|
|
};
|
|
|
|
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
|
|
|
|
/*
|
|
* Returns the irqtime minus the softirq time computed by ksoftirqd.
|
|
* Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
|
|
* and never move forward.
|
|
*/
|
|
static inline u64 irq_time_read(int cpu)
|
|
{
|
|
struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
|
|
unsigned int seq;
|
|
u64 total;
|
|
|
|
do {
|
|
seq = __u64_stats_fetch_begin(&irqtime->sync);
|
|
total = irqtime->total;
|
|
} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
|
|
|
|
return total;
|
|
}
|
|
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
|
|
|
|
#ifdef CONFIG_CPU_FREQ
|
|
DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
|
|
#endif /* CONFIG_CPU_FREQ */
|
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
extern int __init sched_tick_offload_init(void);
|
|
#else
|
|
static inline int sched_tick_offload_init(void) { return 0; }
|
|
#endif
|
|
|
|
#ifdef arch_scale_freq_capacity
|
|
#ifndef arch_scale_freq_invariant
|
|
#define arch_scale_freq_invariant() (true)
|
|
#endif
|
|
#else /* arch_scale_freq_capacity */
|
|
#define arch_scale_freq_invariant() (false)
|
|
#endif
|
|
|
|
extern void schedule_idle(void);
|
|
|
|
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
|
|
|
/*
|
|
* !! For sched_setattr_nocheck() (kernel) only !!
|
|
*
|
|
* This is actually gross. :(
|
|
*
|
|
* It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
|
|
* tasks, but still be able to sleep. We need this on platforms that cannot
|
|
* atomically change clock frequency. Remove once fast switching will be
|
|
* available on such platforms.
|
|
*
|
|
* SUGOV stands for SchedUtil GOVernor.
|
|
*/
|
|
#define SCHED_FLAG_SUGOV 0x10000000
|
|
|
|
#ifdef CONFIG_MEMBARRIER
|
|
/*
|
|
* The scheduler provides memory barriers required by membarrier between:
|
|
* - prior user-space memory accesses and store to rq->membarrier_state,
|
|
* - store to rq->membarrier_state and following user-space memory accesses.
|
|
* In the same way it provides those guarantees around store to rq->curr.
|
|
*/
|
|
static inline void membarrier_switch_mm(struct rq *rq,
|
|
struct mm_struct *prev_mm,
|
|
struct mm_struct *next_mm)
|
|
{
|
|
int membarrier_state;
|
|
|
|
if (prev_mm == next_mm)
|
|
return;
|
|
|
|
membarrier_state = atomic_read(&next_mm->membarrier_state);
|
|
if (READ_ONCE(rq->membarrier_state) == membarrier_state)
|
|
return;
|
|
|
|
WRITE_ONCE(rq->membarrier_state, membarrier_state);
|
|
}
|
|
#else
|
|
static inline void membarrier_switch_mm(struct rq *rq,
|
|
struct mm_struct *prev_mm,
|
|
struct mm_struct *next_mm)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA
|
|
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
|
|
#else
|
|
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
|
|
{
|
|
return nr_cpu_ids;
|
|
}
|
|
#endif
|
|
|
|
extern void swake_up_all_locked(struct swait_queue_head *q);
|
|
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
|
|
|
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
|
extern int preempt_dynamic_mode;
|
|
extern int sched_dynamic_mode(const char *str);
|
|
extern void sched_dynamic_update(int mode);
|
|
#endif
|
|
|
|
static inline void nohz_run_idle_balance(int cpu) { }
|
|
|
|
static inline
|
|
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
|
struct task_struct *p)
|
|
{
|
|
return util;
|
|
}
|
|
|
|
static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
|
|
|
|
#ifdef CONFIG_SCHED_MM_CID
|
|
|
|
#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
|
|
#define MM_CID_SCAN_DELAY 100 /* 100ms */
|
|
|
|
extern raw_spinlock_t cid_lock;
|
|
extern int use_cid_lock;
|
|
|
|
extern void sched_mm_cid_migrate_from(struct task_struct *t);
|
|
extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t, int src_cpu);
|
|
extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
|
|
extern void init_sched_mm_cid(struct task_struct *t);
|
|
|
|
static inline void __mm_cid_put(struct mm_struct *mm, int cid)
|
|
{
|
|
if (cid < 0)
|
|
return;
|
|
cpumask_clear_cpu(cid, mm_cidmask(mm));
|
|
}
|
|
|
|
/*
|
|
* The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
|
|
* the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
|
|
* be held to transition to other states.
|
|
*
|
|
* State transitions synchronized with cmpxchg or try_cmpxchg need to be
|
|
* consistent across cpus, which prevents use of this_cpu_cmpxchg.
|
|
*/
|
|
static inline void mm_cid_put_lazy(struct task_struct *t)
|
|
{
|
|
struct mm_struct *mm = t->mm;
|
|
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
|
|
int cid;
|
|
|
|
lockdep_assert_irqs_disabled();
|
|
cid = __this_cpu_read(pcpu_cid->cid);
|
|
if (!mm_cid_is_lazy_put(cid) ||
|
|
!try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
|
|
return;
|
|
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
|
|
}
|
|
|
|
static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
|
|
{
|
|
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
|
|
int cid, res;
|
|
|
|
lockdep_assert_irqs_disabled();
|
|
cid = __this_cpu_read(pcpu_cid->cid);
|
|
for (;;) {
|
|
if (mm_cid_is_unset(cid))
|
|
return MM_CID_UNSET;
|
|
/*
|
|
* Attempt transition from valid or lazy-put to unset.
|
|
*/
|
|
res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
|
|
if (res == cid)
|
|
break;
|
|
cid = res;
|
|
}
|
|
return cid;
|
|
}
|
|
|
|
static inline void mm_cid_put(struct mm_struct *mm)
|
|
{
|
|
int cid;
|
|
|
|
lockdep_assert_irqs_disabled();
|
|
cid = mm_cid_pcpu_unset(mm);
|
|
if (cid == MM_CID_UNSET)
|
|
return;
|
|
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
|
|
}
|
|
|
|
static inline int __mm_cid_try_get(struct mm_struct *mm)
|
|
{
|
|
struct cpumask *cpumask;
|
|
int cid;
|
|
|
|
cpumask = mm_cidmask(mm);
|
|
/*
|
|
* Retry finding first zero bit if the mask is temporarily
|
|
* filled. This only happens during concurrent remote-clear
|
|
* which owns a cid without holding a rq lock.
|
|
*/
|
|
for (;;) {
|
|
cid = cpumask_first_zero(cpumask);
|
|
if (cid < nr_cpu_ids)
|
|
break;
|
|
cpu_relax();
|
|
}
|
|
if (cpumask_test_and_set_cpu(cid, cpumask))
|
|
return -1;
|
|
return cid;
|
|
}
|
|
|
|
/*
|
|
* Save a snapshot of the current runqueue time of this cpu
|
|
* with the per-cpu cid value, allowing to estimate how recently it was used.
|
|
*/
|
|
static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
|
|
{
|
|
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
|
|
|
|
lockdep_assert_rq_held(rq);
|
|
WRITE_ONCE(pcpu_cid->time, rq->clock);
|
|
}
|
|
|
|
static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
|
|
{
|
|
int cid;
|
|
|
|
/*
|
|
* All allocations (even those using the cid_lock) are lock-free. If
|
|
* use_cid_lock is set, hold the cid_lock to perform cid allocation to
|
|
* guarantee forward progress.
|
|
*/
|
|
if (!READ_ONCE(use_cid_lock)) {
|
|
cid = __mm_cid_try_get(mm);
|
|
if (cid >= 0)
|
|
goto end;
|
|
raw_spin_lock(&cid_lock);
|
|
} else {
|
|
raw_spin_lock(&cid_lock);
|
|
cid = __mm_cid_try_get(mm);
|
|
if (cid >= 0)
|
|
goto unlock;
|
|
}
|
|
|
|
/*
|
|
* cid concurrently allocated. Retry while forcing following
|
|
* allocations to use the cid_lock to ensure forward progress.
|
|
*/
|
|
WRITE_ONCE(use_cid_lock, 1);
|
|
/*
|
|
* Set use_cid_lock before allocation. Only care about program order
|
|
* because this is only required for forward progress.
|
|
*/
|
|
barrier();
|
|
/*
|
|
* Retry until it succeeds. It is guaranteed to eventually succeed once
|
|
* all newcoming allocations observe the use_cid_lock flag set.
|
|
*/
|
|
do {
|
|
cid = __mm_cid_try_get(mm);
|
|
cpu_relax();
|
|
} while (cid < 0);
|
|
/*
|
|
* Allocate before clearing use_cid_lock. Only care about
|
|
* program order because this is for forward progress.
|
|
*/
|
|
barrier();
|
|
WRITE_ONCE(use_cid_lock, 0);
|
|
unlock:
|
|
raw_spin_unlock(&cid_lock);
|
|
end:
|
|
mm_cid_snapshot_time(rq, mm);
|
|
return cid;
|
|
}
|
|
|
|
static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
|
|
{
|
|
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
|
|
struct cpumask *cpumask;
|
|
int cid;
|
|
|
|
lockdep_assert_rq_held(rq);
|
|
cpumask = mm_cidmask(mm);
|
|
cid = __this_cpu_read(pcpu_cid->cid);
|
|
if (mm_cid_is_valid(cid)) {
|
|
mm_cid_snapshot_time(rq, mm);
|
|
return cid;
|
|
}
|
|
if (mm_cid_is_lazy_put(cid)) {
|
|
if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
|
|
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
|
|
}
|
|
cid = __mm_cid_get(rq, mm);
|
|
__this_cpu_write(pcpu_cid->cid, cid);
|
|
return cid;
|
|
}
|
|
|
|
static inline void switch_mm_cid(struct rq *rq,
|
|
struct task_struct *prev,
|
|
struct task_struct *next)
|
|
{
|
|
/*
|
|
* Provide a memory barrier between rq->curr store and load of
|
|
* {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
|
|
*
|
|
* Should be adapted if context_switch() is modified.
|
|
*/
|
|
if (!next->mm) { // to kernel
|
|
/*
|
|
* user -> kernel transition does not guarantee a barrier, but
|
|
* we can use the fact that it performs an atomic operation in
|
|
* mmgrab().
|
|
*/
|
|
if (prev->mm) // from user
|
|
smp_mb__after_mmgrab();
|
|
/*
|
|
* kernel -> kernel transition does not change rq->curr->mm
|
|
* state. It stays NULL.
|
|
*/
|
|
} else { // to user
|
|
/*
|
|
* kernel -> user transition does not provide a barrier
|
|
* between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
|
|
* Provide it here.
|
|
*/
|
|
if (!prev->mm) // from kernel
|
|
smp_mb();
|
|
/*
|
|
* user -> user transition guarantees a memory barrier through
|
|
* switch_mm() when current->mm changes. If current->mm is
|
|
* unchanged, no barrier is needed.
|
|
*/
|
|
}
|
|
if (prev->mm_cid_active) {
|
|
mm_cid_snapshot_time(rq, prev->mm);
|
|
mm_cid_put_lazy(prev);
|
|
prev->mm_cid = -1;
|
|
}
|
|
if (next->mm_cid_active)
|
|
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
|
|
}
|
|
|
|
#else
|
|
static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
|
|
static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
|
|
static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t, int src_cpu) { }
|
|
static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
|
|
static inline void init_sched_mm_cid(struct task_struct *t) { }
|
|
#endif
|
|
|
|
#endif /* ALT_SCHED_H */
|