Patch 2.6.32.28 to 2.6.32.32
/kernel/sched.c
blob:95297f2c284ebeed72c63256397c783efbc289d4 -> blob:5beca3ec439ef9104775b09d51b7de679f4f3289
--- kernel/sched.c
+++ kernel/sched.c
@@ -237,7 +237,7 @@ static void destroy_rt_bandwidth(struct
*/
static DEFINE_MUTEX(sched_domains_mutex);
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
#include <linux/cgroup.h>
@@ -247,13 +247,7 @@ static LIST_HEAD(task_groups);
/* task group related information */
struct task_group {
-#ifdef CONFIG_CGROUP_SCHED
struct cgroup_subsys_state css;
-#endif
-
-#ifdef CONFIG_USER_SCHED
- uid_t uid;
-#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */
@@ -278,35 +272,7 @@ struct task_group {
struct list_head children;
};
-#ifdef CONFIG_USER_SCHED
-
-/* Helper function to pass uid information to create_sched_user() */
-void set_tg_uid(struct user_struct *user)
-{
- user->tg->uid = user->uid;
-}
-
-/*
- * Root task group.
- * Every UID task group (including init_task_group aka UID-0) will
- * be a child to this group.
- */
-struct task_group root_task_group;
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/* Default task group's sched entity on each cpu */
-static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
-/* Default task group's cfs_rq on each cpu */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
-#endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_USER_SCHED */
#define root_task_group init_task_group
-#endif /* CONFIG_USER_SCHED */
/* task_group_lock serializes add/remove of task groups and also changes to
* a task group's cpu shares.
@@ -322,11 +288,7 @@ static int root_task_group_empty(void)
}
#endif
-#ifdef CONFIG_USER_SCHED
-# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
-#else /* !CONFIG_USER_SCHED */
# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
-#endif /* CONFIG_USER_SCHED */
/*
* A weight of 0 or 1 can cause arithmetics problems.
@@ -352,13 +314,7 @@ static inline struct task_group *task_gr
{
struct task_group *tg;
-#ifdef CONFIG_USER_SCHED
- rcu_read_lock();
- tg = __task_cred(p)->user->tg;
- rcu_read_unlock();
-
- return tg;
-#elif defined(CONFIG_CGROUP_SCHED)
+#ifdef CONFIG_CGROUP_SCHED
struct cgroup_subsys_state *css;
css = task_subsys_state(p, cpu_cgroup_subsys_id);
@@ -394,7 +350,7 @@ static inline struct task_group *task_gr
return NULL;
}
-#endif /* CONFIG_GROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
/* CFS-related fields in a runqueue */
struct cfs_rq {
@@ -578,6 +534,7 @@ struct rq {
struct mm_struct *prev_mm;
u64 clock;
+ u64 clock_task;
atomic_t nr_iowait;
@@ -585,6 +542,8 @@ struct rq {
struct root_domain *rd;
struct sched_domain *sd;
+ unsigned long cpu_power;
+
unsigned char idle_at_tick;
/* For active balancing */
int post_schedule;
@@ -605,6 +564,10 @@ struct rq {
u64 avg_idle;
#endif
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ u64 prev_irq_time;
+#endif
+
/* calc_load related fields */
unsigned long calc_load_update;
long calc_load_active;
@@ -642,11 +605,7 @@ struct rq {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-static inline
-void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
-{
- rq->curr->sched_class->check_preempt_curr(rq, p, flags);
-}
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
static inline int cpu_of(struct rq *rq)
{
@@ -673,9 +632,20 @@ static inline int cpu_of(struct rq *rq)
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() (&__raw_get_cpu_var(runqueues))
+static u64 irq_time_cpu(int cpu);
+static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
+
inline void update_rq_clock(struct rq *rq)
{
+ int cpu = cpu_of(rq);
+ u64 irq_time;
+
rq->clock = sched_clock_cpu(cpu_of(rq));
+ irq_time = irq_time_cpu(cpu);
+ if (rq->clock - irq_time > rq->clock_task)
+ rq->clock_task = rq->clock - irq_time;
+
+ sched_irq_time_avg_update(rq, irq_time);
}
/*
@@ -1308,6 +1278,10 @@ static void resched_task(struct task_str
static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
{
}
+
+static void sched_avg_update(struct rq *rq)
+{
+}
#endif /* CONFIG_SMP */
#if BITS_PER_LONG == 32
@@ -1557,24 +1531,9 @@ static unsigned long target_load(int cpu
return max(rq->cpu_load[type-1], total);
}
-static struct sched_group *group_of(int cpu)
-{
- struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
-
- if (!sd)
- return NULL;
-
- return sd->groups;
-}
-
static unsigned long power_of(int cpu)
{
- struct sched_group *group = group_of(cpu);
-
- if (!group)
- return SCHED_LOAD_SCALE;
-
- return group->cpu_power;
+ return cpu_rq(cpu)->cpu_power;
}
static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1856,6 +1815,94 @@ static inline void __set_task_cpu(struct
#endif
}
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+
+/*
+ * There are no locks covering percpu hardirq/softirq time.
+ * They are only modified in account_system_vtime, on corresponding CPU
+ * with interrupts disabled. So, writes are safe.
+ * They are read and saved off onto struct rq in update_rq_clock().
+ * This may result in other CPU reading this CPU's irq time and can
+ * race with irq/account_system_vtime on this CPU. We would either get old
+ * or new value (or semi updated value on 32 bit) with a side effect of
+ * accounting a slice of irq time to wrong task when irq is in progress
+ * while we read rq->clock. That is a worthy compromise in place of having
+ * locks on each irq in account_system_time.
+ */
+static DEFINE_PER_CPU(u64, cpu_hardirq_time);
+static DEFINE_PER_CPU(u64, cpu_softirq_time);
+
+static DEFINE_PER_CPU(u64, irq_start_time);
+static int sched_clock_irqtime;
+
+void enable_sched_clock_irqtime(void)
+{
+ sched_clock_irqtime = 1;
+}
+
+void disable_sched_clock_irqtime(void)
+{
+ sched_clock_irqtime = 0;
+}
+
+static u64 irq_time_cpu(int cpu)
+{
+ if (!sched_clock_irqtime)
+ return 0;
+
+ return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+
+void account_system_vtime(struct task_struct *curr)
+{
+ unsigned long flags;
+ int cpu;
+ u64 now, delta;
+
+ if (!sched_clock_irqtime)
+ return;
+
+ local_irq_save(flags);
+
+ cpu = smp_processor_id();
+ now = sched_clock_cpu(cpu);
+ delta = now - per_cpu(irq_start_time, cpu);
+ per_cpu(irq_start_time, cpu) = now;
+ /*
+ * We do not account for softirq time from ksoftirqd here.
+ * We want to continue accounting softirq time to ksoftirqd thread
+ * in that case, so as not to confuse scheduler with a special task
+ * that do not consume any time, but still wants to run.
+ */
+ if (hardirq_count())
+ per_cpu(cpu_hardirq_time, cpu) += delta;
+ else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
+ per_cpu(cpu_softirq_time, cpu) += delta;
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(account_system_vtime);
+
+static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time)
+{
+ if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) {
+ u64 delta_irq = curr_irq_time - rq->prev_irq_time;
+ rq->prev_irq_time = curr_irq_time;
+ sched_rt_avg_update(rq, delta_irq);
+ }
+}
+
+#else
+
+static u64 irq_time_cpu(int cpu)
+{
+ return 0;
+}
+
+static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
+
+#endif
+
#include "sched_stats.h"
#include "sched_idletask.c"
#include "sched_fair.c"
@@ -1882,8 +1929,8 @@ static void dec_nr_running(struct rq *rq
static void set_load_weight(struct task_struct *p)
{
if (task_has_rt_policy(p)) {
- p->se.load.weight = prio_to_weight[0] * 2;
- p->se.load.inv_weight = prio_to_wmult[0] >> 1;
+ p->se.load.weight = 0;
+ p->se.load.inv_weight = WMULT_CONST;
return;
}
@@ -2064,6 +2111,9 @@ task_hot(struct task_struct *p, u64 now,
if (p->sched_class != &fair_sched_class)
return 0;
+ if (unlikely(p->policy == SCHED_IDLE))
+ return 0;
+
/*
* Buddy candidates are cache hot:
*/
@@ -2335,6 +2385,24 @@ void task_oncpu_function_call(struct tas
preempt_enable();
}
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+{
+ const struct sched_class *class;
+
+ if (p->sched_class == rq->curr->sched_class) {
+ rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+ } else {
+ for_each_class(class) {
+ if (class == rq->curr->sched_class)
+ break;
+ if (class == p->sched_class) {
+ resched_task(rq->curr);
+ break;
+ }
+ }
+ }
+}
+
#ifdef CONFIG_SMP
/*
* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
@@ -3156,6 +3224,8 @@ static void update_cpu_load(struct rq *t
this_rq->calc_load_update += LOAD_FREQ;
calc_load_account_active(this_rq);
}
+
+ sched_avg_update(this_rq);
}
#ifdef CONFIG_SMP
@@ -3287,7 +3357,7 @@ int can_migrate_task(struct task_struct
* 2) too many balance attempts have failed.
*/
- tsk_cache_hot = task_hot(p, rq->clock, sd);
+ tsk_cache_hot = task_hot(p, rq->clock_task, sd);
if (!tsk_cache_hot ||
sd->nr_balance_failed > sd->cache_nice_tries) {
#ifdef CONFIG_SCHEDSTATS
@@ -3470,12 +3540,17 @@ struct sd_lb_stats {
unsigned long this_load;
unsigned long this_load_per_task;
unsigned long this_nr_running;
+ unsigned long this_has_capacity;
+ unsigned int this_idle_cpus;
/* Statistics of the busiest group */
+ unsigned int busiest_idle_cpus;
unsigned long max_load;
unsigned long busiest_load_per_task;
unsigned long busiest_nr_running;
unsigned long busiest_group_capacity;
+ unsigned long busiest_has_capacity;
+ unsigned int busiest_group_weight;
int group_imb; /* Is there imbalance in this sd */
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -3497,7 +3572,10 @@ struct sg_lb_stats {
unsigned long sum_nr_running; /* Nr tasks running in the group */
unsigned long sum_weighted_load; /* Weighted load of group's tasks */
unsigned long group_capacity;
+ unsigned long idle_cpus;
+ unsigned long group_weight;
int group_imb; /* Is there an imbalance in the group ? */
+ int group_has_capacity; /* Is there extra capacity in the group? */
};
/**
@@ -3707,10 +3785,14 @@ unsigned long scale_rt_power(int cpu)
struct rq *rq = cpu_rq(cpu);
u64 total, available;
- sched_avg_update(rq);
-
total = sched_avg_period() + (rq->clock - rq->age_stamp);
- available = total - rq->rt_avg;
+
+ if (unlikely(total < rq->rt_avg)) {
+ /* Ensures that power won't end up being negative */
+ available = 0;
+ } else {
+ available = total - rq->rt_avg;
+ }
if (unlikely((s64)total < SCHED_LOAD_SCALE))
total = SCHED_LOAD_SCALE;
@@ -3748,6 +3830,7 @@ static void update_cpu_power(struct sche
if (!power)
power = 1;
+ cpu_rq(cpu)->cpu_power = power;
sdg->cpu_power = power;
}
@@ -3792,7 +3875,7 @@ static inline void update_sg_lb_stats(st
int local_group, const struct cpumask *cpus,
int *balance, struct sg_lb_stats *sgs)
{
- unsigned long load, max_cpu_load, min_cpu_load;
+ unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
int i;
unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long avg_load_per_task = 0;
@@ -3806,6 +3889,7 @@ static inline void update_sg_lb_stats(st
/* Tally up the load of all CPUs in the group */
max_cpu_load = 0;
min_cpu_load = ~0UL;
+ max_nr_running = 0;
for_each_cpu_and(i, sched_group_cpus(group), cpus) {
struct rq *rq = cpu_rq(i);
@@ -3823,8 +3907,10 @@ static inline void update_sg_lb_stats(st
load = target_load(i, load_idx);
} else {
load = source_load(i, load_idx);
- if (load > max_cpu_load)
+ if (load > max_cpu_load) {
max_cpu_load = load;
+ max_nr_running = rq->nr_running;
+ }
if (min_cpu_load > load)
min_cpu_load = load;
}
@@ -3832,7 +3918,8 @@ static inline void update_sg_lb_stats(st
sgs->group_load += load;
sgs->sum_nr_running += rq->nr_running;
sgs->sum_weighted_load += weighted_cpuload(i);
-
+ if (idle_cpu(i))
+ sgs->idle_cpus++;
}
/*
@@ -3862,11 +3949,14 @@ static inline void update_sg_lb_stats(st
if (sgs->sum_nr_running)
avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
- if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
+ if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1)
sgs->group_imb = 1;
- sgs->group_capacity =
- DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+ sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+ sgs->group_weight = group->group_weight;
+
+ if (sgs->group_capacity > sgs->sum_nr_running)
+ sgs->group_has_capacity = 1;
}
/**
@@ -3913,9 +4003,14 @@ static inline void update_sd_lb_stats(st
/*
* In case the child domain prefers tasks go to siblings
* first, lower the group capacity to one so that we'll try
- * and move all the excess tasks away.
+ * and move all the excess tasks away. We lower the capacity
+ * of a group only if the local group has the capacity to fit
+ * these excess tasks, i.e. nr_running < group_capacity. The
+ * extra check prevents the case where you always pull from the
+ * heaviest group when it is already under-utilized (possible
+ * with a large weight task outweighs the tasks on the system).
*/
- if (prefer_sibling)
+ if (prefer_sibling && !local_group && sds->this_has_capacity)
sgs.group_capacity = min(sgs.group_capacity, 1UL);
if (local_group) {
@@ -3923,14 +4018,19 @@ static inline void update_sd_lb_stats(st
sds->this = group;
sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load;
+ sds->this_has_capacity = sgs.group_has_capacity;
+ sds->this_idle_cpus = sgs.idle_cpus;
} else if (sgs.avg_load > sds->max_load &&
(sgs.sum_nr_running > sgs.group_capacity ||
sgs.group_imb)) {
sds->max_load = sgs.avg_load;
sds->busiest = group;
sds->busiest_nr_running = sgs.sum_nr_running;
+ sds->busiest_idle_cpus = sgs.idle_cpus;
sds->busiest_group_capacity = sgs.group_capacity;
+ sds->busiest_group_weight = sgs.group_weight;
sds->busiest_load_per_task = sgs.sum_weighted_load;
+ sds->busiest_has_capacity = sgs.group_has_capacity;
sds->group_imb = sgs.group_imb;
}
@@ -4076,6 +4176,7 @@ static inline void calculate_imbalance(s
return fix_small_imbalance(sds, this_cpu, imbalance);
}
+
/******* find_busiest_group() helpers end here *********************/
/**
@@ -4127,6 +4228,11 @@ find_busiest_group(struct sched_domain *
* 4) This group is more busy than the avg busieness at this
* sched_domain.
* 5) The imbalance is within the specified limit.
+ *
+ * Note: when doing newidle balance, if the local group has excess
+ * capacity (i.e. nr_running < group_capacity) and the busiest group
+ * does not have any capacity, we force a load balance to pull tasks
+ * to the local group. In this case, we skip past checks 3, 4 and 5.
*/
if (balance && !(*balance))
goto ret;
@@ -4134,6 +4240,11 @@ find_busiest_group(struct sched_domain *
if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;
+ /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
+ if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
+ !sds.busiest_has_capacity)
+ goto force_balance;
+
if (sds.this_load >= sds.max_load)
goto out_balanced;
@@ -4142,9 +4253,28 @@ find_busiest_group(struct sched_domain *
if (sds.this_load >= sds.avg_load)
goto out_balanced;
- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
- goto out_balanced;
+ /*
+ * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
+ * And to check for busy balance use !idle_cpu instead of
+ * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
+ * even when they are idle.
+ */
+ if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
+ if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+ goto out_balanced;
+ } else {
+ /*
+ * This cpu is idle. If the busiest group load doesn't
+ * have more tasks than the number of available cpu's and
+ * there is no imbalance between this and busiest group
+ * wrt to idle cpu's, it is balanced.
+ */
+ if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
+ sds.busiest_nr_running <= sds.busiest_group_weight)
+ goto out_balanced;
+ }
+force_balance:
/* Looks like there is an imbalance. Compute it */
calculate_imbalance(&sds, this_cpu, imbalance);
return sds.busiest;
@@ -4300,7 +4430,14 @@ redo:
if (!ld_moved) {
schedstat_inc(sd, lb_failed[idle]);
- sd->nr_balance_failed++;
+ /*
+ * Increment the failure counter only on periodic balance.
+ * We do not want newidle balance, which can be very
+ * frequent, pollute the failure counter causing
+ * excessive cache_hot migrations and active balances.
+ */
+ if (idle != CPU_NEWLY_IDLE)
+ sd->nr_balance_failed++;
if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
@@ -5045,7 +5182,7 @@ static u64 do_task_delta_exec(struct tas
if (task_current(rq, p)) {
update_rq_clock(rq);
- ns = rq->clock - p->se.exec_start;
+ ns = rq->clock_task - p->se.exec_start;
if ((s64)ns < 0)
ns = 0;
}
@@ -5189,7 +5326,7 @@ void account_system_time(struct task_str
tmp = cputime_to_cputime64(cputime);
if (hardirq_count() - hardirq_offset)
cpustat->irq = cputime64_add(cpustat->irq, tmp);
- else if (softirq_count())
+ else if (in_serving_softirq())
cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
else
cpustat->system = cputime64_add(cpustat->system, tmp);
@@ -7133,7 +7270,19 @@ void __cpuinit init_idle(struct task_str
idle->se.exec_start = sched_clock();
cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+ /*
+ * We're having a chicken and egg problem, even though we are
+ * holding rq->lock, the cpu isn't yet set to this cpu so the
+ * lockdep check in task_group() will fail.
+ *
+ * Similar case to sched_fork(). / Alternatively we could
+ * use task_rq_lock() here and obtain the other rq->lock.
+ *
+ * Silence PROVE_RCU
+ */
+ rcu_read_lock();
__set_task_cpu(idle, cpu);
+ rcu_read_unlock();
rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
@@ -8640,6 +8789,8 @@ static void init_sched_groups_power(int
if (cpu != group_first_cpu(sd->groups))
return;
+ sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
+
child = sd->child;
sd->groups->cpu_power = 0;
@@ -9523,9 +9674,6 @@ void __init sched_init(void)
#ifdef CONFIG_RT_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
-#ifdef CONFIG_USER_SCHED
- alloc_size *= 2;
-#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
alloc_size += num_possible_cpus() * cpumask_size();
#endif
@@ -9543,13 +9691,6 @@ void __init sched_init(void)
init_task_group.cfs_rq = (struct cfs_rq **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
-#ifdef CONFIG_USER_SCHED
- root_task_group.se = (struct sched_entity **)ptr;
- ptr += nr_cpu_ids * sizeof(void **);
-
- root_task_group.cfs_rq = (struct cfs_rq **)ptr;
- ptr += nr_cpu_ids * sizeof(void **);
-#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
init_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -9558,13 +9699,6 @@ void __init sched_init(void)
init_task_group.rt_rq = (struct rt_rq **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
-#ifdef CONFIG_USER_SCHED
- root_task_group.rt_se = (struct sched_rt_entity **)ptr;
- ptr += nr_cpu_ids * sizeof(void **);
-
- root_task_group.rt_rq = (struct rt_rq **)ptr;
- ptr += nr_cpu_ids * sizeof(void **);
-#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CPUMASK_OFFSTACK
for_each_possible_cpu(i) {
@@ -9584,24 +9718,14 @@ void __init sched_init(void)
#ifdef CONFIG_RT_GROUP_SCHED
init_rt_bandwidth(&init_task_group.rt_bandwidth,
global_rt_period(), global_rt_runtime());
-#ifdef CONFIG_USER_SCHED
- init_rt_bandwidth(&root_task_group.rt_bandwidth,
- global_rt_period(), RUNTIME_INF);
-#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
list_add(&init_task_group.list, &task_groups);
INIT_LIST_HEAD(&init_task_group.children);
-#ifdef CONFIG_USER_SCHED
- INIT_LIST_HEAD(&root_task_group.children);
- init_task_group.parent = &root_task_group;
- list_add(&init_task_group.siblings, &root_task_group.children);
-#endif /* CONFIG_USER_SCHED */
-
- autogroup_init(&init_task);
-#endif /* CONFIG_GROUP_SCHED */
+ autogroup_init(&init_task);
+#endif /* CONFIG_CGROUP_SCHED */
#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
@@ -9641,25 +9765,6 @@ void __init sched_init(void)
* directly in rq->cfs (i.e init_task_group->se[] = NULL).
*/
init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);
-#elif defined CONFIG_USER_SCHED
- root_task_group.shares = NICE_0_LOAD;
- init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, 0, NULL);
- /*
- * In case of task-groups formed thr' the user id of tasks,
- * init_task_group represents tasks belonging to root user.
- * Hence it forms a sibling of all subsequent groups formed.
- * In this case, init_task_group gets only a fraction of overall
- * system cpu resource, based on the weight assigned to root
- * user's cpu share (INIT_TASK_GROUP_LOAD). This is accomplished
- * by letting tasks of init_task_group sit in a separate cfs_rq
- * (init_tg_cfs_rq) and having one entity represent this group of
- * tasks in rq->cfs (i.e init_task_group->se[] != NULL).
- */
- init_tg_cfs_entry(&init_task_group,
- &per_cpu(init_tg_cfs_rq, i),
- &per_cpu(init_sched_entity, i), i, 1,
- root_task_group.se[i]);
-
#endif
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -9682,6 +9787,7 @@ void __init sched_init(void)
#ifdef CONFIG_SMP
rq->sd = NULL;
rq->rd = NULL;
+ rq->cpu_power = SCHED_LOAD_SCALE;
rq->post_schedule = 0;
rq->active_balance = 0;
rq->next_balance = jiffies;
@@ -10065,7 +10171,7 @@ static inline void unregister_rt_sched_g
}
#endif /* CONFIG_RT_GROUP_SCHED */
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
static void free_sched_group(struct task_group *tg)
{
free_fair_sched_group(tg);
@@ -10156,12 +10262,12 @@ void __sched_move_task(struct task_struc
if (unlikely(running))
tsk->sched_class->put_prev_task(rq, tsk);
- set_task_rq(tsk, task_cpu(tsk));
-
#ifdef CONFIG_FAIR_GROUP_SCHED
- if (tsk->sched_class->moved_group)
- tsk->sched_class->moved_group(tsk, on_rq);
+ if (tsk->sched_class->task_move_group)
+ tsk->sched_class->task_move_group(tsk, on_rq);
+ else
#endif
+ set_task_rq(tsk, task_cpu(tsk));
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
@@ -10179,7 +10285,7 @@ void sched_move_task(struct task_struct
task_rq_unlock(rq, &flags);
}
-#endif /* CONFIG_GROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
#ifdef CONFIG_FAIR_GROUP_SCHED
static void __set_se_shares(struct sched_entity *se, unsigned long shares)
@@ -10321,13 +10427,6 @@ static int tg_schedulable(struct task_gr
runtime = d->rt_runtime;
}
-#ifdef CONFIG_USER_SCHED
- if (tg == &root_task_group) {
- period = global_rt_period();
- runtime = global_rt_runtime();
- }
-#endif
-
/*
* Cannot have more runtime than the period.
*/
@@ -11103,4 +11202,4 @@ void synchronize_sched_expedited(void)
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#endif /* #else #ifndef CONFIG_SMP */
-#endif /* CONFIG_SCHED_BFS */
+#endif /* CONFIG_SCHED_BFS */