Skip to main content
 首页 » 操作系统

Linux 调度器之/proc/sched_debug文件

2022年07月19日184tintown

一、打印说明

1. 打印内容

# cat /proc/sched_debug 
Sched Debug Version: v0.11, 5.10.xx-android12-x-xx-xxxxxxxxx #1 
ktime                                   : 20791668.206238 
sched_clk                               : 20791683.112454 
cpu_clk                                 : 20791683.112454 
jiffies                                 : 4300090214 
 
sysctl_sched 
  .sysctl_sched_latency                    : 10.000000 
  .sysctl_sched_min_granularity            : 3.000000 
  .sysctl_sched_wakeup_granularity         : 2.000000 
  .sysctl_sched_child_runs_first           : 0 
  .sysctl_sched_features                   : 16722747 //使能了哪些调度feature,见features.h 
  .sysctl_sched_tunable_scaling            : 0 (none) 
 
cpu#0 
  .nr_running                    : 0 
  .nr_switches                   : 515789 
  .nr_uninterruptible            : -348 
  .next_balance                  : 4300.090217 
  .curr->pid                     : 0 
  .clock                         : 20791690.941377 
  .clock_task                    : 20393319.574563 
  .avg_idle                      : 681678 
  .max_idle_balance_cost         : 347294 
  .yld_count                     : 45325 
  .sched_count                   : 558874 
  .sched_goidle                  : 174803 
  .ttwu_count                    : 2261133 
  .ttwu_local                    : 155736 
 
cfs_rq[0]:/ 
  .exec_clock                    : 150058.081435 
  .MIN_vruntime                  : 0.000001 
  .min_vruntime                  : 1032733.837701 //最小虚拟时间 
  .max_vruntime                  : 0.000001 
  .spread                        : 0.000000 
  .spread0                       : 0.000000 
  .nr_spread_over                : 7046 
  .nr_running                    : 0 
  .load                          : 0 //负载信息 
  .load_avg                      : 0 
  .runnable_avg                  : 0 
  .util_avg                      : 0 
  .util_est_enqueued             : 0 
  .removed.load_avg              : 0 
  .removed.util_avg              : 0 
  .removed.runnable_avg          : 0 
  .tg_load_avg_contrib           : 0 
  .tg_load_avg                   : 0 
 
rt_rq[0]: 
  .rt_nr_running                 : 0 
  .rt_nr_migratory               : 0 
  .rt_throttled                  : 0 
  .rt_time                       : 3.853386 
  .rt_runtime                    : 950.000000 
 
dl_rq[0]: 
  .dl_nr_running                 : 0 
  .dl_nr_migratory               : 0 
  .dl_bw->bw                     : 996147 
  .dl_bw->total_bw               : 0 
 
runnable tasks: 
 S            task   PID         tree-key  switches  prio     wait-time             sum-exec        sum-sleep 
------------------------------------------------------------------------------------------------------------- 
 I      rcu_par_gp     4         8.725293         2   100         0.000000         0.009155         0.000000 / 
 D     hang_detect   152         0.000000       675     0         0.000000        87.342714         0.000000 / 
>R   Binder:1061_1 17584      1122.927614       598   120       189.109238      1271.457995       183.664618 /foreground 
 S    Binder:799_2   844        20.058758         2   120         0.950462         0.123000         0.000000 /foreground 
 S HwBinder:1154_1  1722      1420.876848        11   120         5.680075         1.564693         2.107003 /top-app 
 S   Binder:3472_3  3539    555381.752165        65   120        24.436231        92.525768  20237325.907593 /background 
 ... 
 S irq/520-event_0   156         0.000000         4    49         0.000000         0.307768         0.000000 / 
 
 
//每个cpu的都进行打印,这里只保留cpu0的

2. header部分打印函数

//kernel/sched/debug.c 
static void sched_debug_header(struct seq_file *m) 
{ 
    u64 ktime, sched_clk, cpu_clk; 
    unsigned long flags; 
 
    local_irq_save(flags); 
    ktime = ktime_to_ns(ktime_get()); 
    sched_clk = sched_clock(); 
    cpu_clk = local_clock(); 
    local_irq_restore(flags); 
 
    SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n", 
        init_utsname()->release, 
        (int)strcspn(init_utsname()->version, " "), 
        init_utsname()->version); 
 
#define P(x) \ 
    SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x)) 
#define PN(x) \ 
    SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) 
    PN(ktime); 
    PN(sched_clk); 
    PN(cpu_clk); 
    P(jiffies); 
#undef PN 
#undef P 
 
    SEQ_printf(m, "\n"); 
    SEQ_printf(m, "sysctl_sched\n"); 
 
#define P(x) \ 
    SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x)) 
#define PN(x) \ 
    SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) 
    PN(sysctl_sched_latency); 
    PN(sysctl_sched_min_granularity); 
    PN(sysctl_sched_wakeup_granularity); 
    P(sysctl_sched_child_runs_first); 
    P(sysctl_sched_features); 
#undef PN 
#undef P 
 
    SEQ_printf(m, "  .%-40s: %d (%s)\n", 
        "sysctl_sched_tunable_scaling", 
        sysctl_sched_tunable_scaling, 
        sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); 
    SEQ_printf(m, "\n"); 
} 
 
static int sched_debug_show(struct seq_file *m, void *v) 
{ 
    int cpu = (unsigned long)(v - 2); 
 
    if (cpu != -1) 
        print_cpu(m, cpu); 
    else 
        sched_debug_header(m); 
 
    return 0; 
}

3. cpu#0 下的打印

static void print_cpu(struct seq_file *m, int cpu) 
{ 
    struct rq *rq = cpu_rq(cpu); 
 
    SEQ_printf(m, "cpu#%d\n", cpu); 
 
#define P(x)                                \ 
do {                                    \ 
    if (sizeof(rq->x) == 4)                        \ 
        SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));    \ 
    else                                \ 
        SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\ 
} while (0) 
 
#define PN(x) \ 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) 
 
    P(nr_running); 
    P(nr_switches); 
    P(nr_uninterruptible); //以long类型打印unsinged long 
    PN(next_balance); 
    SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); 
    PN(clock); 
    PN(clock_task); 
#undef P 
#undef PN 
 
#ifdef CONFIG_SMP 
#define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n); 
    P64(avg_idle); 
    P64(max_idle_balance_cost); 
#undef P64 
#endif 
 
#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n)); 
    if (schedstat_enabled()) { 
        P(yld_count); 
        P(sched_count); 
        P(sched_goidle); 
        P(ttwu_count); 
        P(ttwu_local); 
    } 
#undef P 
 
    /*下面分别是"cfs_rq[0]:"、"rt_rq[0]:"、"dl_rq[0]:"下的打印*/ 
    print_cfs_stats(m, cpu); 
    print_rt_stats(m, cpu); 
    print_dl_stats(m, cpu); 
 
    print_rq(m, rq, cpu); 
    SEQ_printf(m, "\n"); 
}

4. cfs_rq[0]: 下的打印

void print_cfs_stats(struct seq_file *m, int cpu) 
{ 
    struct cfs_rq *cfs_rq, *pos; 
 
    rcu_read_lock(); 
    //对于rq->leaf_cfs_rq_list上的每一个叶子cfs_rq都调用,若没有使能组调度,就只打印 rq->cfs_rq 
    for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos) 
        print_cfs_rq(m, cpu, cfs_rq); 
    rcu_read_unlock(); 
}

如果需要 CFS 支持组调度管理,那得把所有 CFS 加入到一个链表当中,leaf_cfs_rq_list 成员就是负责把本 CPU 下的就绪队列中各个 CFS 子队列关联起来。并且在 cfs_rq 里面有成员 on_list,其表示当前的 CFS 队列是通过 leaf_cfs_rq_list 成员挂载在 rq->leaf_cfs_rq_list 链表中的。

void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) 
{ 
    s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, spread, rq0_min_vruntime, spread0; 
    struct rq *rq = cpu_rq(cpu); 
    struct sched_entity *last; 
    unsigned long flags; 
 
#ifdef CONFIG_FAIR_GROUP_SCHED 
    SEQ_printf(m, "\n"); 
    SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu); 
#else 
    SEQ_printf(m, "\n"); 
    SEQ_printf(m, "cfs_rq[%d]:\n", cpu); 
#endif 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); //格式:ms.ns 
 
    raw_spin_lock_irqsave(&rq->lock, flags); 
    if (rb_first_cached(&cfs_rq->tasks_timeline)) 
        MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; 
    last = __pick_last_entity(cfs_rq); 
    if (last) 
        max_vruntime = last->vruntime; 
    min_vruntime = cfs_rq->min_vruntime; 
    rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; 
    raw_spin_unlock_irqrestore(&rq->lock, flags); 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime", SPLIT_NS(MIN_vruntime)); 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime", SPLIT_NS(min_vruntime)); 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime", SPLIT_NS(max_vruntime)); 
    spread = max_vruntime - MIN_vruntime; 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); 
    spread0 = min_vruntime - rq0_min_vruntime; 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0", SPLIT_NS(spread0)); 
    SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over); 
    SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running); 
    SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight); 
#ifdef CONFIG_SMP 
    SEQ_printf(m, "  .%-30s: %lu\n", "load_avg", cfs_rq->avg.load_avg); 
    SEQ_printf(m, "  .%-30s: %lu\n", "runnable_avg", cfs_rq->avg.runnable_avg); 
    SEQ_printf(m, "  .%-30s: %lu\n", "util_avg", cfs_rq->avg.util_avg); 
    SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued", cfs_rq->avg.util_est.enqueued); 
    SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg", cfs_rq->removed.load_avg); 
    SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg", cfs_rq->removed.util_avg); 
    SEQ_printf(m, "  .%-30s: %ld\n", "removed.runnable_avg", cfs_rq->removed.runnable_avg); 
#ifdef CONFIG_FAIR_GROUP_SCHED 
    SEQ_printf(m, "  .%-30s: %lu\n", "tg_load_avg_contrib", cfs_rq->tg_load_avg_contrib); 
    SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg", atomic_long_read(&cfs_rq->tg->load_avg)); 
#endif 
#endif 
#ifdef CONFIG_CFS_BANDWIDTH 
    SEQ_printf(m, "  .%-30s: %d\n", "throttled", cfs_rq->throttled); 
    SEQ_printf(m, "  .%-30s: %d\n", "throttle_count", cfs_rq->throttle_count); 
#endif 
 
#ifdef CONFIG_FAIR_GROUP_SCHED 
    print_cfs_group_stats(m, cpu, cfs_rq->tg); //task_group 在此cpu上对应的se 
#endif 
} 
 
static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) 
{ 
    struct sched_entity *se = tg->se[cpu]; //task_group 在此cpu上对应的se 
 
#define P(F)        SEQ_printf(m, "  .%-30s: %lld\n",    #F, (long long)F) 
#define P_SCHEDSTAT(F)    SEQ_printf(m, "  .%-30s: %lld\n",    #F, (long long)schedstat_val(F)) 
#define PN(F)        SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) 
#define PN_SCHEDSTAT(F)    SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F))) 
 
    if (!se) 
        return; 
 
    PN(se->exec_start); 
    PN(se->vruntime); 
    PN(se->sum_exec_runtime); 
 
    if (schedstat_enabled()) { 
        PN_SCHEDSTAT(se->statistics.wait_start); 
        PN_SCHEDSTAT(se->statistics.sleep_start); 
        PN_SCHEDSTAT(se->statistics.block_start); 
        PN_SCHEDSTAT(se->statistics.sleep_max); 
        PN_SCHEDSTAT(se->statistics.block_max); 
        PN_SCHEDSTAT(se->statistics.exec_max); 
        PN_SCHEDSTAT(se->statistics.slice_max); 
        PN_SCHEDSTAT(se->statistics.wait_max); 
        PN_SCHEDSTAT(se->statistics.wait_sum); 
        P_SCHEDSTAT(se->statistics.wait_count); 
    } 
 
    P(se->load.weight); 
#ifdef CONFIG_SMP 
    P(se->avg.load_avg); 
    P(se->avg.util_avg); 
    P(se->avg.runnable_avg); 
#endif 
 
#undef PN_SCHEDSTAT 
#undef PN 
#undef P_SCHEDSTAT 
#undef P 
}

5. rt_rq[0] 下的打印

void print_rt_stats(struct seq_file *m, int cpu) 
{ 
    rt_rq_iter_t iter; 
    struct rt_rq *rt_rq; 
 
    rcu_read_lock(); 
    for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) 
        print_rt_rq(m, cpu, rt_rq); 
    rcu_read_unlock(); 
} 
 
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 
{ 
    SEQ_printf(m, "\n"); 
    SEQ_printf(m, "rt_rq[%d]:\n", cpu); 
 
#define P(x) \ 
    SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) 
#define PU(x) \ 
    SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x)) 
#define PN(x) \ 
    SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) 
 
    PU(rt_nr_running); 
#ifdef CONFIG_SMP 
    PU(rt_nr_migratory); 
#endif 
    P(rt_throttled); 
    PN(rt_time); 
    PN(rt_runtime); 
 
#undef PN 
#undef PU 
#undef P 
}

原生内核使能了 CONFIG_FAIR_GROUP_SCHED,却没有使能 CONFIG_RT_GROUP_SCHED

6. dl_rq[0] 下的打印

void print_dl_stats(struct seq_file *m, int cpu) 
{ 
    print_dl_rq(m, cpu, &cpu_rq(cpu)->dl); 
} 
 
void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) 
{ 
    struct dl_bw *dl_bw; 
 
    SEQ_printf(m, "\n"); 
    SEQ_printf(m, "dl_rq[%d]:\n", cpu); 
 
#define PU(x) \ 
    SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) 
 
    PU(dl_nr_running); 
 
    PU(dl_nr_migratory); 
    dl_bw = &cpu_rq(cpu)->rd->dl_bw; 
    SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw); 
    SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw); 
 
#undef PU 
}

7. runnable tasks: 下的打印

static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) 
{ 
    struct task_struct *g, *p; 
 
    SEQ_printf(m, "\n"); 
    SEQ_printf(m, "runnable tasks:\n"); 
    SEQ_printf(m, " S            task   PID         tree-key  switches  prio" 
           "     wait-time             sum-exec        sum-sleep\n"); 
    SEQ_printf(m, "-------------------------------------------------------" 
           "------------------------------------------------------\n"); 
 
    rcu_read_lock(); 
    for_each_process_thread(g, p) { 
        if (task_cpu(p) != rq_cpu) //对于 task_cpu(p) == rq_cpu 的每一个线程都打印 
            continue; 
 
        print_task(m, rq, p); 
    } 
    rcu_read_unlock(); 
} 
 
 
static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 
{ 
    if (rq->curr == p) 
        SEQ_printf(m, ">R"); 
    else 
        SEQ_printf(m, " %c", task_state_to_char(p)); //此CPU上的所有任务,包括睡眠的 
 
    SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ", 
        p->comm, task_pid_nr(p), 
        SPLIT_NS(p->se.vruntime), //格式: ms.ns 
        (long long)(p->nvcsw + p->nivcsw), //主动放弃cpu+被抢占 
        p->prio); 
 
    SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", 
        SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)), //总等待时间 
        SPLIT_NS(p->se.sum_exec_runtime), //总执行时间 
        SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime))); //纯休眠时间(sleep+D) 
 
#ifdef CONFIG_CGROUP_SCHED 
    SEQ_printf_task_group_path(m, task_group(p), " %s") //task的cgroup分组 
#endif 
 
    SEQ_printf(m, "\n"); 
}

本文参考链接:https://www.cnblogs.com/hellokitty2/p/15664139.html