#ifndef ALT_SCHED_H #define ALT_SCHED_H #include #include #include #include #include #include #include #include "../workqueue_internal.h" #include "cpupri.h" #ifdef CONFIG_SCHED_BMQ /* bits: * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ #define SCHED_LEVELS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) #endif #ifdef CONFIG_SCHED_PDS /* bits: RT(0-24), reserved(25-31), SCHED_NORMAL_PRIO_NUM(32), cpu idle task(1) */ #define SCHED_LEVELS (64 + 1) #endif /* CONFIG_SCHED_PDS */ #define IDLE_TASK_SCHED_PRIO (SCHED_LEVELS - 1) #ifdef CONFIG_SCHED_DEBUG # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) extern void resched_latency_warn(int cpu, u64 latency); #else # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) static inline void resched_latency_warn(int cpu, u64 latency) {} #endif /* * Increase resolution of nice-level calculations for 64-bit architectures. * The extra resolution improves shares distribution and load balancing of * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup * hierarchies, especially on larger systems. This is not a user-visible change * and does not change the user-interface for setting shares/weights. * * We increase resolution only if we have enough bits to allow this increased * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit * are pretty high and the returns do not justify the increased costs. * * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to * increase coverage and consistency always enable it on 64-bit platforms. */ #ifdef CONFIG_64BIT # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) # define scale_load_down(w) \ ({ \ unsigned long __w = (w); \ if (__w) \ __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ __w; \ }) #else # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) (w) # define scale_load_down(w) (w) #endif #ifdef CONFIG_FAIR_GROUP_SCHED #define ROOT_TASK_GROUP_LOAD NICE_0_LOAD /* * A weight of 0 or 1 can cause arithmetics problems. * A weight of a cfs_rq is the sum of weights of which entities * are queued on this cfs_rq, so a weight of a entity should not be * too large, so as the shares value of a task group. * (The default weight is 1024 - so there's no practical * limitation from this.) */ #define MIN_SHARES (1UL << 1) #define MAX_SHARES (1UL << 18) #endif /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ #ifdef CONFIG_SCHED_DEBUG # define const_debug __read_mostly #else # define const_debug const #endif /* task_struct::on_rq states: */ #define TASK_ON_RQ_QUEUED 1 #define TASK_ON_RQ_MIGRATING 2 static inline int task_on_rq_queued(struct task_struct *p) { return p->on_rq == TASK_ON_RQ_QUEUED; } static inline int task_on_rq_migrating(struct task_struct *p) { return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; } /* * wake flags */ #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x04 /* internal use, task got migrated */ #define SCHED_QUEUE_BITS (SCHED_LEVELS - 1) struct sched_queue { DECLARE_BITMAP(bitmap, SCHED_QUEUE_BITS); struct list_head heads[SCHED_LEVELS]; }; struct rq; struct cpuidle_state; struct balance_callback { struct balance_callback *next; void (*func)(struct rq *rq); }; /* * This is the main, per-CPU runqueue data structure. * This data should only be modified by the local cpu. */ struct rq { /* runqueue lock: */ raw_spinlock_t lock; struct task_struct __rcu *curr; struct task_struct *idle, *stop, *skip; struct mm_struct *prev_mm; struct sched_queue queue; #ifdef CONFIG_SCHED_PDS u64 time_edge; #endif unsigned long prio; /* switch count */ u64 nr_switches; atomic_t nr_iowait; #ifdef CONFIG_SCHED_DEBUG u64 last_seen_need_resched_ns; int ticks_without_resched; #endif #ifdef CONFIG_MEMBARRIER int membarrier_state; #endif #ifdef CONFIG_SMP int cpu; /* cpu of this runqueue */ bool online; unsigned int ttwu_pending; unsigned char nohz_idle_balance; unsigned char idle_balance; #ifdef CONFIG_HAVE_SCHED_AVG_IRQ struct sched_avg avg_irq; #endif #ifdef CONFIG_SCHED_SMT int active_balance; struct cpu_stop_work active_balance_work; #endif struct balance_callback *balance_callback; #ifdef CONFIG_HOTPLUG_CPU struct rcuwait hotplug_wait; #endif unsigned int nr_pinned; #endif /* CONFIG_SMP */ #ifdef CONFIG_IRQ_TIME_ACCOUNTING u64 prev_irq_time; #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ #ifdef CONFIG_PARAVIRT u64 prev_steal_time; #endif /* CONFIG_PARAVIRT */ #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING u64 prev_steal_time_rq; #endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */ /* For genenal cpu load util */ s32 load_history; u64 load_block; u64 load_stamp; /* calc_load related fields */ unsigned long calc_load_update; long calc_load_active; u64 clock, last_tick; u64 last_ts_switch; u64 clock_task; unsigned int nr_running; unsigned long nr_uninterruptible; #ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SMP call_single_data_t hrtick_csd; #endif struct hrtimer hrtick_timer; ktime_t hrtick_time; #endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ struct sched_info rq_sched_info; unsigned long long rq_cpu_time; /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* sys_sched_yield() stats */ unsigned int yld_count; /* schedule() stats */ unsigned int sched_switch; unsigned int sched_count; unsigned int sched_goidle; /* try_to_wake_up() stats */ unsigned int ttwu_count; unsigned int ttwu_local; #endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_CPU_IDLE /* Must be inspected within a rcu lock section */ struct cpuidle_state *idle_state; #endif #ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_SMP call_single_data_t nohz_csd; #endif atomic_t nohz_flags; #endif /* CONFIG_NO_HZ_COMMON */ /* Scratch cpumask to be temporarily used under rq_lock */ cpumask_var_t scratch_mask; }; extern unsigned long rq_load_util(struct rq *rq, unsigned long max); extern unsigned long calc_load_update; extern atomic_long_t calc_load_tasks; extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() this_cpu_ptr(&runqueues) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() raw_cpu_ptr(&runqueues) #ifdef CONFIG_SMP #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) void register_sched_domain_sysctl(void); void unregister_sched_domain_sysctl(void); #else static inline void register_sched_domain_sysctl(void) { } static inline void unregister_sched_domain_sysctl(void) { } #endif extern bool sched_smp_initialized; enum { ITSELF_LEVEL_SPACE_HOLDER, #ifdef CONFIG_SCHED_SMT SMT_LEVEL_SPACE_HOLDER, #endif COREGROUP_LEVEL_SPACE_HOLDER, CORE_LEVEL_SPACE_HOLDER, OTHER_LEVEL_SPACE_HOLDER, NR_CPU_AFFINITY_LEVELS }; DECLARE_PER_CPU_ALIGNED(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); static inline int __best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask) { int cpu; while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) mask++; return cpu; } static inline int best_mask_cpu(int cpu, const cpumask_t *mask) { return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu)); } extern void flush_smp_call_function_queue(void); #else /* !CONFIG_SMP */ static inline void flush_smp_call_function_queue(void) { } #endif #ifndef arch_scale_freq_tick static __always_inline void arch_scale_freq_tick(void) { } #endif #ifndef arch_scale_freq_capacity static __always_inline unsigned long arch_scale_freq_capacity(int cpu) { return SCHED_CAPACITY_SCALE; } #endif static inline u64 __rq_clock_broken(struct rq *rq) { return READ_ONCE(rq->clock); } static inline u64 rq_clock(struct rq *rq) { /* * Relax lockdep_assert_held() checking as in VRQ, call to * sched_info_xxxx() may not held rq->lock * lockdep_assert_held(&rq->lock); */ return rq->clock; } static inline u64 rq_clock_task(struct rq *rq) { /* * Relax lockdep_assert_held() checking as in VRQ, call to * sched_info_xxxx() may not held rq->lock * lockdep_assert_held(&rq->lock); */ return rq->clock_task; } /* * {de,en}queue flags: * * DEQUEUE_SLEEP - task is no longer runnable * ENQUEUE_WAKEUP - task just became runnable * */ #define DEQUEUE_SLEEP 0x01 #define ENQUEUE_WAKEUP 0x01 /* * Below are scheduler API which using in other kernel code * It use the dummy rq_flags * ToDo : BMQ need to support these APIs for compatibility with mainline * scheduler code. */ struct rq_flags { unsigned long flags; }; struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(rq->lock); struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(p->pi_lock) __acquires(rq->lock); static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { raw_spin_unlock(&rq->lock); } static inline void task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) __releases(rq->lock) __releases(p->pi_lock) { raw_spin_unlock(&rq->lock); raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } static inline void rq_lock(struct rq *rq, struct rq_flags *rf) __acquires(rq->lock) { raw_spin_lock(&rq->lock); } static inline void rq_unlock(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { raw_spin_unlock(&rq->lock); } static inline void rq_lock_irq(struct rq *rq, struct rq_flags *rf) __acquires(rq->lock) { raw_spin_lock_irq(&rq->lock); } static inline void rq_unlock_irq(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { raw_spin_unlock_irq(&rq->lock); } static inline struct rq * this_rq_lock_irq(struct rq_flags *rf) __acquires(rq->lock) { struct rq *rq; local_irq_disable(); rq = this_rq(); raw_spin_lock(&rq->lock); return rq; } static inline raw_spinlock_t *__rq_lockp(struct rq *rq) { return &rq->lock; } static inline raw_spinlock_t *rq_lockp(struct rq *rq) { return __rq_lockp(rq); } static inline void lockdep_assert_rq_held(struct rq *rq) { lockdep_assert_held(__rq_lockp(rq)); } extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); extern void raw_spin_rq_unlock(struct rq *rq); static inline void raw_spin_rq_lock(struct rq *rq) { raw_spin_rq_lock_nested(rq, 0); } static inline void raw_spin_rq_lock_irq(struct rq *rq) { local_irq_disable(); raw_spin_rq_lock(rq); } static inline void raw_spin_rq_unlock_irq(struct rq *rq) { raw_spin_rq_unlock(rq); local_irq_enable(); } static inline int task_current(struct rq *rq, struct task_struct *p) { return rq->curr == p; } static inline bool task_on_cpu(struct task_struct *p) { return p->on_cpu; } extern int task_running_nice(struct task_struct *p); extern struct static_key_false sched_schedstats; #ifdef CONFIG_CPU_IDLE static inline void idle_set_state(struct rq *rq, struct cpuidle_state *idle_state) { rq->idle_state = idle_state; } static inline struct cpuidle_state *idle_get_state(struct rq *rq) { WARN_ON(!rcu_read_lock_held()); return rq->idle_state; } #else static inline void idle_set_state(struct rq *rq, struct cpuidle_state *idle_state) { } static inline struct cpuidle_state *idle_get_state(struct rq *rq) { return NULL; } #endif static inline int cpu_of(const struct rq *rq) { #ifdef CONFIG_SMP return rq->cpu; #else return 0; #endif } #include "stats.h" #ifdef CONFIG_NO_HZ_COMMON #define NOHZ_BALANCE_KICK_BIT 0 #define NOHZ_STATS_KICK_BIT 1 #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) #define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) /* TODO: needed? extern void nohz_balance_exit_idle(struct rq *rq); #else static inline void nohz_balance_exit_idle(struct rq *rq) { } */ #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING struct irqtime { u64 total; u64 tick_delta; u64 irq_start_time; struct u64_stats_sync sync; }; DECLARE_PER_CPU(struct irqtime, cpu_irqtime); /* * Returns the irqtime minus the softirq time computed by ksoftirqd. * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime * and never move forward. */ static inline u64 irq_time_read(int cpu) { struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); unsigned int seq; u64 total; do { seq = __u64_stats_fetch_begin(&irqtime->sync); total = irqtime->total; } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); return total; } #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ #ifdef CONFIG_CPU_FREQ DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data); #endif /* CONFIG_CPU_FREQ */ #ifdef CONFIG_NO_HZ_FULL extern int __init sched_tick_offload_init(void); #else static inline int sched_tick_offload_init(void) { return 0; } #endif #ifdef arch_scale_freq_capacity #ifndef arch_scale_freq_invariant #define arch_scale_freq_invariant() (true) #endif #else /* arch_scale_freq_capacity */ #define arch_scale_freq_invariant() (false) #endif extern void schedule_idle(void); #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) /* * !! For sched_setattr_nocheck() (kernel) only !! * * This is actually gross. :( * * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE * tasks, but still be able to sleep. We need this on platforms that cannot * atomically change clock frequency. Remove once fast switching will be * available on such platforms. * * SUGOV stands for SchedUtil GOVernor. */ #define SCHED_FLAG_SUGOV 0x10000000 #ifdef CONFIG_MEMBARRIER /* * The scheduler provides memory barriers required by membarrier between: * - prior user-space memory accesses and store to rq->membarrier_state, * - store to rq->membarrier_state and following user-space memory accesses. * In the same way it provides those guarantees around store to rq->curr. */ static inline void membarrier_switch_mm(struct rq *rq, struct mm_struct *prev_mm, struct mm_struct *next_mm) { int membarrier_state; if (prev_mm == next_mm) return; membarrier_state = atomic_read(&next_mm->membarrier_state); if (READ_ONCE(rq->membarrier_state) == membarrier_state) return; WRITE_ONCE(rq->membarrier_state, membarrier_state); } #else static inline void membarrier_switch_mm(struct rq *rq, struct mm_struct *prev_mm, struct mm_struct *next_mm) { } #endif #ifdef CONFIG_NUMA extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); #else static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) { return nr_cpu_ids; } #endif extern void swake_up_all_locked(struct swait_queue_head *q); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); #ifdef CONFIG_PREEMPT_DYNAMIC extern int preempt_dynamic_mode; extern int sched_dynamic_mode(const char *str); extern void sched_dynamic_update(int mode); #endif static inline void nohz_run_idle_balance(int cpu) { } static inline unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, struct task_struct *p) { return util; } static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; } #ifdef CONFIG_SCHED_MM_CID static inline int __mm_cid_get(struct mm_struct *mm) { struct cpumask *cpumask; int cid; cpumask = mm_cidmask(mm); cid = cpumask_first_zero(cpumask); if (cid >= nr_cpu_ids) return -1; __cpumask_set_cpu(cid, cpumask); return cid; } static inline void mm_cid_put(struct mm_struct *mm, int cid) { lockdep_assert_irqs_disabled(); if (cid < 0) return; raw_spin_lock(&mm->cid_lock); __cpumask_clear_cpu(cid, mm_cidmask(mm)); raw_spin_unlock(&mm->cid_lock); } static inline int mm_cid_get(struct mm_struct *mm) { int ret; lockdep_assert_irqs_disabled(); raw_spin_lock(&mm->cid_lock); ret = __mm_cid_get(mm); raw_spin_unlock(&mm->cid_lock); return ret; } static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { if (prev->mm_cid_active) { if (next->mm_cid_active && next->mm == prev->mm) { /* * Context switch between threads in same mm, hand over * the mm_cid from prev to next. */ next->mm_cid = prev->mm_cid; prev->mm_cid = -1; return; } mm_cid_put(prev->mm, prev->mm_cid); prev->mm_cid = -1; } if (next->mm_cid_active) next->mm_cid = mm_cid_get(next->mm); } #else static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { } #endif #endif /* ALT_SCHED_H */