Skip to main content
 首页 » 操作系统

Linux 调度器之CPU拓扑结构和调度域建立

2022年07月19日24EasonJim

基于MTK kernel-4.14

一、CPU 拓扑结构的构建

1. Arm64 架构的cpu拓扑结构存储在全局数组 struct cpu_topology cpu_topology[NR_CPUS] 变量当中

2. struct cpu_topology 结构的定义:

// include/asm/topology.h 
struct cpu_topology {   
    int thread_id;   
    int core_id;   
    int cluster_id;   
    cpumask_t thread_sibling;   
    cpumask_t core_sibling;   
};   
   
extern struct cpu_topology cpu_topology[NR_CPUS];   
#define topology_physical_package_id(cpu)   (cpu_topology[cpu].cluster_id)   
#define topology_core_id(cpu)       (cpu_topology[cpu].core_id)   
#define topology_core_cpumask(cpu)  (&cpu_topology[cpu].core_sibling)   
#define topology_sibling_cpumask(cpu)   (&cpu_topology[cpu].thread_sibling) 

3. cpu_topology[] 数组的构建来自设备 dts 文件的 "/cpus" 节点下的 "cpu-map" 节点,更新函数为:

//kernel_init() --> kernel_init_freeable() --> smp_prepare_cpus() --> init_cpu_topology() --> parse_dt_topology() --> parse_cluster() --> parse_core() 
static int __init parse_core(struct device_node *core, int cluster_id, int core_id) 
{ 
    ... 
    cpu = get_cpu_for_node(core); 
    if (cpu >= 0) { 
        cpu_topology[cpu].cluster_id = cluster_id; 
        cpu_topology[cpu].core_id = core_id; 
    } 
    ... 
} 
 
//kernel_init() --> kernel_init_freeable()  --> smp_prepare_cpus() --> store_cpu_topology() --> update_siblings_masks() 
static void update_siblings_masks(unsigned int cpuid) 
{ 
    struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; 
    int cpu; 
 
    /* update core and thread sibling masks */ 
    for_each_possible_cpu(cpu) { 
        cpu_topo = &cpu_topology[cpu]; 
 
        if (cpuid_topo->cluster_id != cpu_topo->cluster_id) 
            continue; 
 
        cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); 
        if (cpu != cpuid) 
            cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); //互相设置,只更新了一个cluster的! 
 
        if (cpuid_topo->core_id != cpu_topo->core_id) 
            continue; 
 
        cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); 
        if (cpu != cpuid) 
            cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); 
    } 
}

4. 由附上的debug打印可知,解析后的cpu拓扑结构中各个成员得值如下:

---------------------debug_printf_cpu_topology---------------------- 
thread_id = -1, core_id = 0, cluster_id = 0, thread_sibling = 0, core_sibling = 0-3 
thread_id = -1, core_id = 1, cluster_id = 0, thread_sibling = 1, core_sibling = 0-3 
thread_id = -1, core_id = 2, cluster_id = 0, thread_sibling = 2, core_sibling = 0-3 
thread_id = -1, core_id = 3, cluster_id = 0, thread_sibling = 3, core_sibling = 0-3 
thread_id = -1, core_id = 0, cluster_id = 1, thread_sibling = 4, core_sibling = 4-6 
thread_id = -1, core_id = 1, cluster_id = 1, thread_sibling = 5, core_sibling = 4-6 
thread_id = -1, core_id = 2, cluster_id = 1, thread_sibling = 6, core_sibling = 4-6 
thread_id = -1, core_id = 0, cluster_id = 2, thread_sibling = 7, core_sibling = 7 
-------------------------------------------------

疑问:update_siblings_masks()中只是更新了一个cluster的,为什么会所有的cpu的 thread_sibling/core_sibling 都有更新到 

二、调度域的构建

1. 相关结构描述

/* 
 * 对于ARM的大小核架构只有两个调度域层级,MC 和 DIE,SMT是超线程,只有X86平台上有。 
 */ 
static struct sched_domain_topology_level default_topology[] = { 
#ifdef CONFIG_SCHED_SMT 
    { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, //&cpu_topology[cpu].thread_sibling 这个没有使能 
#endif 
#ifdef CONFIG_SCHED_MC 
    { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, //&cpu_topology[cpu].core_sibling,初始化了sd_flags这个成员函数指针 
#endif 
    { cpu_cpu_mask, SD_INIT_NAME(DIE) }, //return node_to_cpumask_map[per_cpu(numa_node, cpu)] 
    { NULL, }, 
}; 
 
/* 系统中构建调度域使用的是 sched_domain_topology,其指向 default_topology 数组 */ 
static struct sched_domain_topology_level *sched_domain_topology = default_topology; 
 
#define for_each_sd_topology(tl)  for (tl = sched_domain_topology; tl->mask; tl++)

2. 调度域的构建函数

// sched_init_smp --> init_sched_domains --> build_sched_domains 
static int build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) 
{ 
    enum s_alloc alloc_state; 
    struct sched_domain *sd; 
    struct s_data d; 
    int i, ret = -ENOMEM; 
 
    /* 
     * 对全局 default_topology 的 tl->data 的各个per-cpu的变量成员分配内存,对 s_data.rd 分配内存。 
     */ 
    alloc_state = __visit_domain_allocation_hell(&d, cpu_map); /*初始化时是cpu_active_mask*/ 
    if (alloc_state != sa_rootdomain) 
        goto error; 
 
    /* Set up domains for CPUs specified by the cpu_map */ 
    for_each_cpu(i, cpu_map) { 
        struct sched_domain_topology_level *tl; 
 
        sd = NULL; 
        /*只有MC和DIE两个*/ 
        for_each_sd_topology(tl) { 
            //为每个cpu 的每个层级都 build 调度域 
            sd = build_sched_domain(tl, cpu_map, attr, sd, i); 
            //相等说明 d.sd 只指向MC层级的 sd 
            if (tl == sched_domain_topology) 
                *per_cpu_ptr(d.sd, i) = sd; 
            if (tl->flags & SDTL_OVERLAP) //defalut的层级数组每个元素的tl->flags都是没有初始化的,这个不执行 
                sd->flags |= SD_OVERLAP; 
        } 
    } 
 
    /* Build the groups for the domains */ 
    for_each_cpu(i, cpu_map) { 
        for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { //d.sd 是指向MC的每个domain的,此时子层级的已经指向了父层级的,父子的都可以遍历到了 
            sd->span_weight = cpumask_weight(sched_domain_span(sd)); //参数 sd->span 
            if (sd->flags & SD_OVERLAP) {//不成立 
                if (build_overlap_sched_groups(sd, i)) 
                    goto error; 
            } else { 
                if (build_sched_groups(sd, i)) //sd->span 内的sg构成一个环形单链表 
                    goto error; 
            } 
        } 
    } 
 
    /* Calculate CPU capacity for physical packages and nodes */ 
    for (i = nr_cpumask_bits-1; i >= 0; i--) { 
        struct sched_domain_topology_level *tl = sched_domain_topology; 
 
        if (!cpumask_test_cpu(i, cpu_map)) 
            continue; 
 
        //从这里可以看出每个CPU,每个层级都有一个 sd 结构 
        for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) { 
            init_sched_groups_energy(i, sd, tl->energy); //第三个参数恒为NULL 
            claim_allocations(i, sd); 
            init_sched_groups_capacity(i, sd); //这里可以看出sdg->sgc->capacity是sd中所有cpu的算力之和 
        } 
    } 
 
    /* Attach the domains 为每一个rq attach domain*/ 
    rcu_read_lock(); 
    for_each_cpu(i, cpu_map) { 
        /*读取的初始化为-1的*/ 
        int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu); 
        int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu); 
 
        sd = *per_cpu_ptr(d.sd, i); 
 
        /*找出最小和最大算力的cpu,sd的算力内容还是从rq里面来的*/ 
        if ((max_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig > cpu_rq(max_cpu)->cpu_capacity_orig)) 
            WRITE_ONCE(d.rd->max_cap_orig_cpu, i); 
 
        if ((min_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig < cpu_rq(min_cpu)->cpu_capacity_orig)) 
            WRITE_ONCE(d.rd->min_cap_orig_cpu, i); 
 
        /*传参sd = *per_cpu_ptr(d.sd, i),d.rd 为系统唯一的root_domain结构,i是cpu id */ 
        cpu_attach_domain(sd, d.rd, i); 
    } 
    rcu_read_unlock(); 
 
    if (!cpumask_empty(cpu_map)) 
        update_asym_cpucapacity(cpumask_first(cpu_map)); 
 
    ret = 0; 
 
error: 
    /* 
     * 这里面除了 root_domain 判断了引用计数,没有释放外,之前所有申请初始化的成员都已经free掉了。 
     * 自己加debug打印证实的确是释放掉了,但是cpu的domain拓扑还是存在,为什么? 
     */ 
    __free_domain_allocs(&d, alloc_state, cpu_map); 
    return ret; 
}

3. 对于 4+3+1 cpu架构的Soc,加debug后画出的拓扑图如下:

注:sds 为 struct sched_domain_shared *shared 的缩写。

由实验结果可知

(1). 小核和中核的 cpu_rq(cpu)->sd 指向其MC层级的 sched_domain 结构,大核没有MC层级的 sched_domain 结构,大核的 cpu_rq(cpu)->sd 指向其DIE层级的sched_domain 结构。

(2). 所有cpu的 cpu_rq(cpu)->rd 指向同一个全局 root_domain 结构。

(3). DIE层级的 sched_group_capacity.capacity 是其对应的 MC 层级的所有cpu的算力的和,本cluster有几个核就是几倍。

补充:对应层级的 sched_domain 的 private 成员会指向其对应层级的 sched_domain_topology_level.data 成员,也就是 default_topology[i].data,但是 data 中的各个 per-cpu 的指针在build完成后就全释放干净了。

4. 对于 4+4 cpu架构的soc,其调度域拓扑结构如下:

 

5. cache 相关per-cpu变量的构建

// kernel/sched/sched.h 
//select_idle_cpu/__select_idle_sibling/select_idle_sibling_cstate_aware/set_cpu_sd_state_busy/set_cpu_sd_state_idle/nohz_kick_needed 中使用 
DECLARE_PER_CPU(struct sched_domain *, sd_llc); 
DECLARE_PER_CPU(int, sd_llc_size); //fair.c中唤醒路径中的 wake_wide 中使用 
DECLARE_PER_CPU(int, sd_llc_id); //cpus_share_cache 中通过cpu的这个变量来判断是否共享cache 
DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); //set_idle_cores/test_idle_cores 
DECLARE_PER_CPU(struct sched_domain *, sd_numa); //task_numa_migrate中使用 
DECLARE_PER_CPU(struct sched_domain *, sd_asym); //fair.c nohz_kick_needed中使用 
DECLARE_PER_CPU(struct sched_domain *, sd_ea); //find_best_target 中有使用 
DECLARE_PER_CPU(struct sched_domain *, sd_scs); //compute_energy/mtk_idle_power/mtk_busy_power 中都有使用

更新函数:

//sched_init_smp --> init_sched_domains --> build_sched_domains --> cpu_attach_domain --> update_top_cache_domain 
static void update_top_cache_domain(int cpu) 
{ 
    struct sched_domain_shared *sds = NULL; 
    struct sched_domain *sd; 
    struct sched_domain *ea_sd = NULL; 
    int id = cpu; 
    int size = 1; 
 
    /* 找到最顶层的 SD_SHARE_PKG_RESOURCES 来初始化*/ 
    sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); 
    if (sd) { 
        id = cpumask_first(sched_domain_span(sd)); 
        size = cpumask_weight(sched_domain_span(sd)); 
        sds = sd->shared; 
    } 
 
    rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); 
    per_cpu(sd_llc_size, cpu) = size; 
    per_cpu(sd_llc_id, cpu) = id; 
    rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds); 
 
    /* parent->parent是 NULL,没找到的话就返回NULL */ 
    sd = lowest_flag_domain(cpu, SD_NUMA); 
    rcu_assign_pointer(per_cpu(sd_numa, cpu), sd); 
 
    sd = highest_flag_domain(cpu, SD_ASYM_PACKING); 
    rcu_assign_pointer(per_cpu(sd_asym, cpu), sd); 
 
    //从下往上遍历,找到一个初始化的就赋值过去 
    for_each_domain(cpu, sd) { 
        if (sd->groups->sge) 
            ea_sd = sd; 
        else 
            break; 
    } 
    rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); 
 
    sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES); 
    rcu_assign_pointer(per_cpu(sd_scs, cpu), sd); 
} 
 
/* default_topology 层级数组初始化时传的函数指针 */ 
static inline int cpu_core_flags(void) 
{ 
    return SD_SHARE_PKG_RESOURCES; 
} 
 
 
static inline struct sched_domain *highest_flag_domain(int cpu, int flag) 
{ 
    struct sched_domain *sd, *hsd = NULL; 
 
    for_each_domain(cpu, sd) { //由child domain开始向上查找 
        if (!(sd->flags & flag)) 
            break; 
        hsd = sd; 
    } 
 
    return hsd; //返回的一个DIE层级的sd 
}

由 highest_flag_domain() 和MC层级传的flag函数指针可知,获取的是一个DIE层级的调度域,也就是说一个cluster内的cpu是共享cache的,但是除了大核,只有CPU7(这里说的cache应该是二级cache)。

对每个cpu进行debug打印的各个成员得内容:

cache: sd = 000000007607d14d, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 0000000005eb165b, sdscs=000000007607d14d 
cache: sd = 0000000073059b52, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 000000001b3b27f3, sdscs=0000000073059b52 
cache: sd = 000000005b94d1f5, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 000000009c9ce4e7, sdscs=000000005b94d1f5 
cache: sd = 0000000034f5aa86, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 00000000aa75a8a2, sdscs=0000000034f5aa86 
cache: sd = 0000000015506191, size = 3, id = 4, sds= 000000001048be02, sdnuma = (null), sdasym = (null), sdea = 00000000375c082c, sdscs=0000000015506191 
cache: sd = 0000000013e36405, size = 3, id = 4, sds= 000000001048be02, sdnuma = (null), sdasym = (null), sdea = 00000000f629682d, sdscs=0000000013e36405 
cache: sd = 00000000687439ee, size = 3, id = 4, sds= 000000001048be02, sdnuma = (null), sdasym = (null), sdea = 00000000d6ba54d5, sdscs=00000000687439ee 
cache: sd =           (null), size = 1, id = 7, sds=           (null), sdnuma = (null), sdasym = (null), sdea = 00000000b81ad593, sdscs=          (null)

(1) 为 NULL 的就是没有找到相关的调度域。

(2) sd_llc 和 sd_scs 具有相同指向,和 cpu_rq(cpu)->sd 和 MC层级的sd 四者是相同的指向。由于大核CPU7没有MC层级的调度域,所以其的为NULL。

(3) sd_llc_size 表示此cpu所在的cluster的cpu的个数。

(3) sd_llc_id 表示此cpu所在的cluster的首个cpu的id。

(4) sd_llc_shared 同一个cluster的cpu指向同一个 struct sched_domain_shared 结构,这个指向和MC层级调度域的 sd->sds 指向相同。

(5) sd_numa 和 sd_asym 由于 default_topology 变量中没有指定相关的flag,所以为NULL.

(6) sd_ea 和每个cpu的DIE层级的 sched_domain 成员具有相同指向。

设置位置:

//kernel/sched/topology.c kernel-5.10 
//是定义,sched.h中是声明 
//update_top_cache_domain: 唯一设置位置,指向MC层级的sd 
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc); 
//update_top_cache_domain: 唯一设置位置,MC层级的sd的cpu个数,也就是各个cpu所在cluster中cpu个数 
DEFINE_PER_CPU(int, sd_llc_size); 
//update_top_cache_domain: 唯一设置位置,cpu所在cluster的首个cpu的id 
DEFINE_PER_CPU(int, sd_llc_id); 
DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared); 
//update_top_cache_domain: 唯一设置位置,全部指向NULL 
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa); 
//update_top_cache_domain: 唯一设置位置,全部指向NULL 
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing); 
//update_top_cache_domain: 唯一设置位置,指向每个CPU对应的DIE层级的sd 
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity); 
DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity); 
 
static void update_top_cache_domain(int cpu) 
{ 
    struct sched_domain_shared *sds = NULL; 
    struct sched_domain *sd; 
    int id = cpu; 
    int size = 1; 
 
    sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); //返回MC层级的sd 
    if (sd) { 
        id = cpumask_first(sched_domain_span(sd)); 
        size = cpumask_weight(sched_domain_span(sd)); 
        sds = sd->shared; 
    } 
 
    rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); //指向MC层级的sd 
    per_cpu(sd_llc_size, cpu) = size; //MC层级的sd的cpu个数,也就是各个cpu所在cluster中cpu个数 
    per_cpu(sd_llc_id, cpu) = id; //cpu所在cluster的首个cpu的id 
    rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds); 
 
    sd = lowest_flag_domain(cpu, SD_NUMA); //MC和DIE层级都没定义这个flag,所以返回NULL 
    rcu_assign_pointer(per_cpu(sd_numa, cpu), sd); 
 
    sd = highest_flag_domain(cpu, SD_ASYM_PACKING); //MC和DIE层级都没定义这个flag,所以返回NULL 
    rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);  
 
    sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY); //返回此CPU对应的DIE层级的sd 
    rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd); 
}

各层级domain的flag:

//kernel-5.10 
# cat /proc/sys/kernel/sched_domain/cpu0/domain0/flags 
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_PKG_RESOURCES //MC 
# cat /proc/sys/kernel/sched_domain/cpu0/domain1/flags 
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_CPUCAPACITY SD_PREFER_SIBLING //DIE 
... 
# cat /proc/sys/kernel/sched_domain/cpu7/domain0/flags 
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_CPUCAPACITY SD_PREFER_SIBLING //DIE

6. Debug打印代码

/* 放到 kernel/sched 下面 */ 
 
#define pr_fmt(fmt) "topo_debug: " fmt 
 
#include <linux/fs.h> 
#include <linux/sched.h> 
#include <linux/proc_fs.h> 
#include <linux/seq_file.h> 
#include <linux/string.h> 
#include <linux/printk.h> 
#include <asm/topology.h> 
#include <linux/cpumask.h> 
#include <linux/sched/topology.h> 
#include "sched.h" 
 
extern struct cpu_topology cpu_topology[NR_CPUS]; 
 
extern struct sched_domain_topology_level *sched_domain_topology; //需要去掉原文件中的static修饰 
 
#define for_each_sd_topology(tl) for (tl = sched_domain_topology; tl->mask; tl++) 
 
 
struct domain_topo_debug_t { 
    int cmd; 
}; 
 
static struct domain_topo_debug_t dtd; 
 
 
static void debug_printf_sched_domain(struct seq_file *m, struct sched_domain *sd, char *level, int cpu) 
{ 
    if (!sd) { 
        if (level) { 
            seq_printf(m, "TL = %s, cpu = %d, sd = NULL\n", level, cpu); 
        } else { 
            seq_printf(m, "sd = NULL\n"); 
        } 
    } else { 
        seq_printf(m, "sched_domain sd = %p: parent = %p, child = %p, groups = %p, name = %s, private = %p, shared = %p, span = %*pbl, flags=0x%x\n", 
                sd, sd->parent, sd->child, sd->groups, sd->name, sd->private, sd->shared, cpumask_pr_args(to_cpumask(sd->span)), sd->flags); 
    } 
} 
 
static void debug_printf_sched_group(struct seq_file *m, struct sched_group *sg, char *level, int cpu) 
{ 
    if (!sg) { 
        if (level) { 
            seq_printf(m, "TL = %s, cpu = %d, sg = NULL\n", level, cpu); 
        } else { 
            seq_printf(m, "sg = NULL\n"); 
        } 
    } else { 
        seq_printf(m, "sched_group sg = %p: next = %p, ref = %d, group_weight = %d, sgc = %p, asym_prefer_cpu = %d, sge = %p, cpumask = %*pbl\n", 
                sg, sg->next, atomic_read(&sg->ref), sg->group_weight, sg->sgc, sg->asym_prefer_cpu, sg->sge, cpumask_pr_args(to_cpumask(sg->cpumask))); 
    } 
} 
 
 
static void debug_printf_sched_group_capacity(struct seq_file *m, struct sched_group_capacity *sgc, char *level, int cpu) 
{ 
    if (!sgc) { 
        if (level) { 
            seq_printf(m, "TL = %s, cpu = %d, sgc = NULL\n", level, cpu); 
        } else { 
            seq_printf(m, "sgc = NULL\n"); 
        } 
    } else { 
        seq_printf(m, "sched_group_capacity sgc = %p: ref = %d, capacity = %d, min_capacity = %d, max_capacity = %d, next_update = %d, imbalance = %d, id = %d, cpumask = %*pbl\n", 
                sgc, atomic_read(&sgc->ref), sgc->capacity, sgc->min_capacity, sgc->max_capacity, sgc->next_update, sgc->imbalance, sgc->id, 
                cpumask_pr_args(to_cpumask(sgc->cpumask))); 
    } 
} 
 
static void debug_printf_sched_domain_shared(struct seq_file *m, struct sched_domain_shared *sds, char *level, int cpu) 
{ 
    if (!sds) { 
        if (level) { 
            seq_printf(m, "TL = %s, cpu = %d, sds = NULL\n", level, cpu); 
        } else { 
            seq_printf(m, "sds = NULL\n"); 
        } 
    } else { 
        seq_printf(m, "sched_domain_shared sds = %p: ref = %d, nr_busy_cpus = %d, has_idle_cores = %d, overutilized = %d\n", 
                sds, atomic_read(&sds->ref), atomic_read(&sds->nr_busy_cpus), sds->has_idle_cores, sds->overutilized); 
    } 
} 
 
 
static void debug_printf_sd_sds_sg_sgc(struct seq_file *m, struct sched_domain_topology_level *tl, int cpu) 
{ 
    struct sd_data *sdd; 
    struct sched_domain *sd; 
    struct sched_domain_shared *sds; 
    struct sched_group *sg; 
    struct sched_group_capacity *sgc; 
 
    sdd = &tl->data; 
    seq_printf(m, "-------------------------TL = %s, cpu = %d, sdd = %p, %s--------------------------------\n", tl->name, cpu, sdd, __func__); 
 
    if (!sdd->sd) { 
        seq_printf(m, "TL = %s, cpu = %d, sdd->sd = NULL\n", tl->name, cpu); 
    } else { 
        sd = *per_cpu_ptr(sdd->sd, cpu); 
        debug_printf_sched_domain(m, sd, tl->name, cpu); 
    } 
 
    if (!sdd->sds) { 
        seq_printf(m, "TL = %s, cpu = %d, sdd->sds = NULL\n", tl->name, cpu); 
    } else { 
        sds = *per_cpu_ptr(sdd->sds, cpu); 
        debug_printf_sched_domain_shared(m, sds, tl->name, cpu); 
    } 
 
    if (!sdd->sg) { 
        seq_printf(m, "TL = %s, cpu = %d, sdd->sg = NULL\n", tl->name, cpu); 
    } else { 
        sg = *per_cpu_ptr(sdd->sg, cpu); 
        debug_printf_sched_group(m, sg, tl->name, cpu); 
    } 
 
    if (!sdd->sgc) { 
        seq_printf(m, "TL = %s, cpu = %d, sdd->sgc = NULL\n", tl->name, cpu); 
    } else { 
        sgc = *per_cpu_ptr(sdd->sgc, cpu); 
        debug_printf_sched_group_capacity(m, sgc, tl->name, cpu); 
    } 
    seq_printf(m, "-------------------------------------------------\n\n"); 
} 
 
 
static void debug_printf_sd_sds_sg_sgc_cpu_rq(struct seq_file *m, int cpu) 
{ 
    struct rq *rq = cpu_rq(cpu); 
    seq_printf(m, "---------------------cpu=%d, %s----------------------\n", cpu, __func__); 
    if (rq->sd) { 
        seq_printf(m, "rq->sd:\n"); 
        debug_printf_sched_domain(m, rq->sd, NULL, cpu); 
        seq_printf(m, "rq->sd->groups:\n"); 
        debug_printf_sched_group(m, rq->sd->groups, NULL, cpu); 
        if (rq->sd->groups) { 
            seq_printf(m, "rq->sd->groups->sgc:\n"); 
            debug_printf_sched_group_capacity(m, rq->sd->groups->sgc, NULL, cpu); 
        } 
        seq_printf(m, "rq->sd->shared:\n"); 
        debug_printf_sched_domain_shared(m, rq->sd->shared, NULL, cpu);     
    } 
    if (rq->sd && rq->sd->parent) { 
        seq_printf(m, "rq->sd->parent:\n"); 
        debug_printf_sched_domain(m, rq->sd->parent, NULL, cpu); 
        seq_printf(m, "rq->sd->parent->groups:\n"); 
        debug_printf_sched_group(m, rq->sd->parent->groups, NULL, cpu); 
        if (rq->sd->parent->groups) { 
            seq_printf(m, "rq->sd->parent->groups->sgc:\n"); 
            debug_printf_sched_group_capacity(m, rq->sd->parent->groups->sgc, NULL, cpu); 
        } 
        seq_printf(m, "rq->sd->parent->shared:\n"); 
        debug_printf_sched_domain_shared(m, rq->sd->parent->shared, NULL, cpu);     
    } 
    if (rq->sd && rq->sd->child) { 
        seq_printf(m, "rq->sd->child:\n"); 
        debug_printf_sched_domain(m, rq->sd->child, NULL, cpu); 
        seq_printf(m, "rq->sd->child->groups:\n"); 
        debug_printf_sched_group(m, rq->sd->child->groups, NULL, cpu); 
        if (rq->sd->child->groups) { 
            seq_printf(m, "rq->sd->child->groups->sgc:\n"); 
            debug_printf_sched_group_capacity(m, rq->sd->child->groups->sgc, NULL, cpu); 
        } 
        seq_printf(m, "rq->sd->child->shared:\n"); 
        debug_printf_sched_domain_shared(m, rq->sd->child->shared, NULL, cpu);     
    } 
    if (rq->sd && rq->sd->parent && rq->sd->parent->parent) { 
        seq_printf(m, "rq->sd->parent->parent:\n"); 
        debug_printf_sched_domain(m, rq->sd->parent->parent, NULL, cpu); 
        seq_printf(m, "rq->sd->parent->parent->groups:\n"); 
        debug_printf_sched_group(m, rq->sd->parent->parent->groups, NULL, cpu); 
        if (rq->sd->parent->parent->groups) { 
            seq_printf(m, "rq->sd->parent->parent->groups->sgc:\n"); 
            debug_printf_sched_group_capacity(m, rq->sd->parent->parent->groups->sgc, NULL, cpu); 
        } 
        seq_printf(m, "rq->sd->parent->parent->shared:\n"); 
        debug_printf_sched_domain_shared(m, rq->sd->parent->parent->shared, NULL, cpu);     
    } 
    if (rq->sd && rq->sd->child && rq->sd->child->child) { 
        seq_printf(m, "rq->sd->child->child:\n"); 
        debug_printf_sched_domain(m, rq->sd->child->child, NULL, cpu); 
        seq_printf(m, "rq->sd->child->child->groups:\n"); 
        debug_printf_sched_group(m, rq->sd->child->child->groups, NULL, cpu); 
        if (rq->sd->child->child->groups) { 
            seq_printf(m, "rq->sd->child->child->groups->sgc:\n"); 
            debug_printf_sched_group_capacity(m, rq->sd->child->child->groups->sgc, NULL, cpu); 
        } 
        seq_printf(m, "rq->sd->child->child->shared:\n"); 
        debug_printf_sched_domain_shared(m, rq->sd->child->child->shared, NULL, cpu);     
    } 
 
    seq_printf(m, "-------------------------------------------------\n\n"); 
} 
 
 
static void debug_printf_cpu_rq(struct seq_file *m, int cpu) 
{ 
    struct callback_head *callback; 
    struct rq *rq = cpu_rq(cpu); 
 
    seq_printf(m, "---------------------cpu=%d, %s----------------------\n", cpu, __func__); 
    seq_printf(m, "rq = %p: rd = %p, sd = %p, cpu_capacity = %d, cpu_capacity_orig = %d, max_idle_balance_cost = %lu\n", 
            rq, rq->rd, rq->sd, rq->cpu_capacity, rq->cpu_capacity_orig, rq->max_idle_balance_cost); 
 
    seq_printf(m, "balance_callback: \n"); 
    callback = rq->balance_callback; 
    while(callback) { 
        seq_printf(m, "%pf\n", callback->func); 
        callback = callback->next; 
    } 
    seq_printf(m, "-------------------------------------------------\n\n"); 
} 
 
 
 
static void debug_printf_root_domain(struct seq_file *m) 
{ 
    struct rq *rq = cpu_rq(0); 
    struct root_domain *rd = rq->rd; 
 
    seq_printf(m, "---------------------%s----------------------\n", __func__); 
    if (rd) { 
        seq_printf(m, "refcount = %d, span = %*pbl, max_cpu_capacity.val = %d, max_cpu_capacity.cpu = %d, max_cap_orig_cpu = %d, min_cap_orig_cpu = %d\n", 
            atomic_read(&rd->refcount), cpumask_pr_args(rd->span), rd->max_cpu_capacity.val, rd->max_cpu_capacity.cpu, rd->max_cap_orig_cpu, rd->min_cap_orig_cpu); 
    } 
    seq_printf(m, "-------------------------------------------------\n\n"); 
} 
 
 
static void debug_printf_cpu_topology(struct seq_file *m) 
{ 
    int i; 
    struct cpu_topology *ct = cpu_topology; 
 
    seq_printf(m, "---------------------%s----------------------\n", __func__); 
    for (i = 0; i < NR_CPUS; i++) { 
        seq_printf(m, "thread_id = %d, core_id = %d, cluster_id = %d, thread_sibling = %*pbl, core_sibling = %*pbl\n", 
            ct->thread_id, ct->core_id, ct->cluster_id, cpumask_pr_args(&ct->thread_sibling), cpumask_pr_args(&ct->core_sibling)); 
        ct++; //new add 
    } 
    seq_printf(m, "-------------------------------------------------\n\n"); 
} 
 
static void debug_printf_cache(struct seq_file *m, int cpu) 
{ 
    struct sched_domain *sd, *sdnuma, *sdasym, *sdea, *sdscs; 
    struct sched_domain_shared *sds; 
    int size, id; 
 
    sd = rcu_dereference(per_cpu(sd_llc, cpu)); 
    size = per_cpu(sd_llc_size, cpu); 
    id = per_cpu(sd_llc_id, cpu); 
    sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); 
    sdnuma = rcu_dereference(per_cpu(sd_numa, cpu)); 
    sdasym = rcu_dereference(per_cpu(sd_asym, cpu)); 
    sdea = rcu_dereference(per_cpu(sd_ea, cpu)); 
    sdscs = rcu_dereference(per_cpu(sd_scs, cpu)); 
 
    seq_printf(m, "---------------------%s----------------------\n", __func__); 
    seq_printf(m, "cache: sd = %p, size = %d, id = %d, sds= %p, sdnuma = %p, sdasym = %p, sdea = %p, sdscs=%p\n", 
            sd, size, id, sds, sdnuma, sdasym, sdea, sdscs); 
    seq_printf(m, "-------------------------------------------------\n\n"); 
} 
 
 
static void debug_printf_all(struct seq_file *m, const struct cpumask *cpu_mask) 
{ 
    int cpu; 
    struct sched_domain_topology_level *tl; 
 
    for_each_cpu(cpu, cpu_mask) { 
        for_each_sd_topology(tl) { 
            debug_printf_sd_sds_sg_sgc(m, tl, cpu); 
        } 
    } 
 
    for_each_cpu(cpu, cpu_mask) { //分开更方便查看 
        debug_printf_cpu_rq(m, cpu); 
        debug_printf_sd_sds_sg_sgc_cpu_rq(m, cpu); 
        debug_printf_cache(m, cpu); 
    } 
 
 
    debug_printf_root_domain(m); 
    debug_printf_cpu_topology(m); 
} 
 
 
static ssize_t domain_topo_debug_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 
{ 
 
    int ret, cmd_value; 
    char buffer[32] = {0}; 
 
    if (count >= sizeof(buffer)) { 
        count = sizeof(buffer) - 1; 
    } 
    if (copy_from_user(buffer, buf, count)) { 
        pr_info("copy_from_user failed\n"); 
        return -EFAULT; 
    } 
    ret = sscanf(buffer, "%d", &cmd_value); 
    if(ret <= 0){ 
        pr_info("sscanf dec failed\n"); 
        return -EINVAL; 
    } 
    pr_info("cmd_value=%d\n", cmd_value); 
 
    dtd.cmd = cmd_value; 
 
    return count; 
} 
 
static int domain_topo_debug_show(struct seq_file *m, void *v) 
{ 
    switch (dtd.cmd) { 
    case 0: 
        debug_printf_all(m, cpu_possible_mask); 
        break; 
    case 1: 
        debug_printf_all(m, cpu_online_mask); 
        break; 
    case 2: 
        debug_printf_all(m, cpu_present_mask); 
        break; 
    case 3: 
        debug_printf_all(m, cpu_active_mask); 
        break; 
    case 4: 
        debug_printf_all(m, cpu_isolated_mask); 
        break; 
 
    default: 
        pr_info("dtd.cmd is invalid!\n"); 
        break; 
    } 
 
    return 0; 
} 
 
 
static int domain_topo_debug_open(struct inode *inode, struct file *file) 
{ 
    return single_open(file, domain_topo_debug_show, NULL); 
} 
 
static const struct file_operations domain_topo_debug_fops = { 
    .open    = domain_topo_debug_open, 
    .read    = seq_read, 
    .write   = domain_topo_debug_write, 
    .llseek  = seq_lseek, 
    .release = single_release, 
}; 
 
 
static int __init domain_topo_debug_init(void) 
{ 
    proc_create("domain_topo_debug", S_IRUGO | S_IWUGO, NULL, &domain_topo_debug_fops); 
 
    pr_info("domain_topo_debug probed\n", nr_cpu_ids); 
 
    return 0; 
} 
fs_initcall(domain_topo_debug_init);

Debug打印内容:

# cat domain_topo_debug 
-------------------------TL = MC, cpu = 0, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 0, sdd->sd = NULL 
TL = MC, cpu = 0, sdd->sds = NULL 
TL = MC, cpu = 0, sdd->sg = NULL 
TL = MC, cpu = 0, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 0, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 0, sdd->sd = NULL 
TL = DIE, cpu = 0, sdd->sds = NULL 
TL = DIE, cpu = 0, sdd->sg = NULL 
TL = DIE, cpu = 0, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = MC, cpu = 1, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 1, sdd->sd = NULL 
TL = MC, cpu = 1, sdd->sds = NULL 
TL = MC, cpu = 1, sdd->sg = NULL 
TL = MC, cpu = 1, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 1, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 1, sdd->sd = NULL 
TL = DIE, cpu = 1, sdd->sds = NULL 
TL = DIE, cpu = 1, sdd->sg = NULL 
TL = DIE, cpu = 1, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = MC, cpu = 2, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 2, sdd->sd = NULL 
TL = MC, cpu = 2, sdd->sds = NULL 
TL = MC, cpu = 2, sdd->sg = NULL 
TL = MC, cpu = 2, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 2, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 2, sdd->sd = NULL 
TL = DIE, cpu = 2, sdd->sds = NULL 
TL = DIE, cpu = 2, sdd->sg = NULL 
TL = DIE, cpu = 2, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = MC, cpu = 3, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 3, sdd->sd = NULL 
TL = MC, cpu = 3, sdd->sds = NULL 
TL = MC, cpu = 3, sdd->sg = NULL 
TL = MC, cpu = 3, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 3, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 3, sdd->sd = NULL 
TL = DIE, cpu = 3, sdd->sds = NULL 
TL = DIE, cpu = 3, sdd->sg = NULL 
TL = DIE, cpu = 3, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = MC, cpu = 4, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 4, sdd->sd = NULL 
TL = MC, cpu = 4, sdd->sds = NULL 
TL = MC, cpu = 4, sdd->sg = NULL 
TL = MC, cpu = 4, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 4, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 4, sdd->sd = NULL 
TL = DIE, cpu = 4, sdd->sds = NULL 
TL = DIE, cpu = 4, sdd->sg = NULL 
TL = DIE, cpu = 4, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = MC, cpu = 5, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 5, sdd->sd = NULL 
TL = MC, cpu = 5, sdd->sds = NULL 
TL = MC, cpu = 5, sdd->sg = NULL 
TL = MC, cpu = 5, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 5, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 5, sdd->sd = NULL 
TL = DIE, cpu = 5, sdd->sds = NULL 
TL = DIE, cpu = 5, sdd->sg = NULL 
TL = DIE, cpu = 5, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = MC, cpu = 6, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 6, sdd->sd = NULL 
TL = MC, cpu = 6, sdd->sds = NULL 
TL = MC, cpu = 6, sdd->sg = NULL 
TL = MC, cpu = 6, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 6, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 6, sdd->sd = NULL 
TL = DIE, cpu = 6, sdd->sds = NULL 
TL = DIE, cpu = 6, sdd->sg = NULL 
TL = DIE, cpu = 6, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = MC, cpu = 7, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = MC, cpu = 7, sdd->sd = NULL 
TL = MC, cpu = 7, sdd->sds = NULL 
TL = MC, cpu = 7, sdd->sg = NULL 
TL = MC, cpu = 7, sdd->sgc = NULL 
------------------------------------------------- 
 
-------------------------TL = DIE, cpu = 7, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- 
TL = DIE, cpu = 7, sdd->sd = NULL 
TL = DIE, cpu = 7, sdd->sds = NULL 
TL = DIE, cpu = 7, sdd->sg = NULL 
TL = DIE, cpu = 7, sdd->sgc = NULL 
------------------------------------------------- 
 
---------------------cpu=0, debug_printf_cpu_rq---------------------- 
rq = 000000003429aaa4: rd = 000000001b6257a5, sd = 00000000d7f4f9d1, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=1, debug_printf_cpu_rq---------------------- 
rq = 00000000ed35f55b: rd = 000000001b6257a5, sd = 00000000b6a75a4c, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=2, debug_printf_cpu_rq---------------------- 
rq = 000000007ab2924d: rd = 000000001b6257a5, sd = 00000000baa86fe9, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=3, debug_printf_cpu_rq---------------------- 
rq = 00000000fccf4c48: rd = 000000001b6257a5, sd = 0000000088de345a, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=4, debug_printf_cpu_rq---------------------- 
rq = 000000002b3d7ce9: rd = 000000001b6257a5, sd = 00000000c5314037, cpu_capacity = 790, cpu_capacity_orig = 917, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=5, debug_printf_cpu_rq---------------------- 
rq = 000000000ffd99ea: rd = 000000001b6257a5, sd = 00000000bcdabb79, cpu_capacity = 790, cpu_capacity_orig = 917, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=6, debug_printf_cpu_rq---------------------- 
rq = 00000000ecf5161f: rd = 000000001b6257a5, sd = 00000000a7438b3c, cpu_capacity = 790, cpu_capacity_orig = 917, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=7, debug_printf_cpu_rq---------------------- 
rq = 000000001e90f615: rd = 000000001b6257a5, sd = 00000000e6bf0308, cpu_capacity = 906, cpu_capacity_orig = 1024, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
 
---------------------cpu=0, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 00000000d7f4f9d1: parent = 000000002e35d366, child =           (null), groups = 00000000020e03b4, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f 
rq->sd->groups: 
sched_group sg = 00000000020e03b4: next = 00000000ab25b4ff, ref = 4, group_weight = 1, sgc = 00000000c0fb3d29, asym_prefer_cpu = 0, sge = 0000000045497311, cpumask = 0 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 00000000c0fb3d29: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71656, imbalance = 0, id = 0, cpumask = 0 
rq->sd->shared: 
sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 000000002e35d366: parent =           (null), child = 00000000d7f4f9d1, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->parent->groups: 
sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------cpu=1, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 00000000b6a75a4c: parent = 0000000052738a20, child =           (null), groups = 00000000ab25b4ff, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f 
rq->sd->groups: 
sched_group sg = 00000000ab25b4ff: next = 00000000b80ca367, ref = 4, group_weight = 1, sgc = 00000000c7f86843, asym_prefer_cpu = 0, sge = 000000004ba0e6e4, cpumask = 1 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 00000000c7f86843: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71656, imbalance = 0, id = 1, cpumask = 1 
rq->sd->shared: 
sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 0000000052738a20: parent =           (null), child = 00000000b6a75a4c, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->parent->groups: 
sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------cpu=2, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 00000000baa86fe9: parent = 00000000ad441716, child =           (null), groups = 00000000b80ca367, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f 
rq->sd->groups: 
sched_group sg = 00000000b80ca367: next = 00000000d5827f1a, ref = 4, group_weight = 1, sgc = 00000000186b5ec8, asym_prefer_cpu = 0, sge = 00000000ad61ee0b, cpumask = 2 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 00000000186b5ec8: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71655, imbalance = 0, id = 2, cpumask = 2 
rq->sd->shared: 
sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 00000000ad441716: parent =           (null), child = 00000000baa86fe9, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->parent->groups: 
sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------cpu=3, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 0000000088de345a: parent = 00000000f40b8d8f, child =           (null), groups = 00000000d5827f1a, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f 
rq->sd->groups: 
sched_group sg = 00000000d5827f1a: next = 00000000020e03b4, ref = 4, group_weight = 1, sgc = 00000000cd39f15f, asym_prefer_cpu = 0, sge = 000000002fe36ed1, cpumask = 3 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 00000000cd39f15f: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71661, imbalance = 0, id = 3, cpumask = 3 
rq->sd->shared: 
sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 00000000f40b8d8f: parent =           (null), child = 0000000088de345a, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->parent->groups: 
sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------cpu=4, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 00000000c5314037: parent = 000000009aaa92f2, child =           (null), groups = 0000000021c86a5c, name = MC, private = 000000009e66189d, shared = 000000007b30bb71, span = 4-6, flags=0x823f 
rq->sd->groups: 
sched_group sg = 0000000021c86a5c: next = 000000007e6c07b8, ref = 3, group_weight = 1, sgc = 00000000ae0ee0b8, asym_prefer_cpu = 0, sge = 00000000e1287c6b, cpumask = 4 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 00000000ae0ee0b8: ref = 3, capacity = 790, min_capacity = 790, max_capacity = 790, next_update = 71257, imbalance = 0, id = 4, cpumask = 4 
rq->sd->shared: 
sched_domain_shared sds = 000000007b30bb71: ref = 3, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 000000009aaa92f2: parent =           (null), child = 00000000c5314037, groups = 0000000044121b65, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->parent->groups: 
sched_group sg = 0000000044121b65: next = 000000000679d876, ref = 8, group_weight = 3, sgc = 000000006f38e38f, asym_prefer_cpu = 0, sge = 0000000000aea201, cpumask = 4-6 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 000000006f38e38f: ref = 8, capacity = 2370, min_capacity = 790, max_capacity = 790, next_update = 71260, imbalance = 0, id = 4, cpumask = 4-6 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------cpu=5, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 00000000bcdabb79: parent = 000000005357fe0e, child =           (null), groups = 000000007e6c07b8, name = MC, private = 000000009e66189d, shared = 000000007b30bb71, span = 4-6, flags=0x823f 
rq->sd->groups: 
sched_group sg = 000000007e6c07b8: next = 000000006871b1c7, ref = 3, group_weight = 1, sgc = 000000008745da85, asym_prefer_cpu = 0, sge = 00000000ff253c30, cpumask = 5 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 000000008745da85: ref = 3, capacity = 790, min_capacity = 790, max_capacity = 790, next_update = 70617, imbalance = 0, id = 5, cpumask = 5 
rq->sd->shared: 
sched_domain_shared sds = 000000007b30bb71: ref = 3, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 000000005357fe0e: parent =           (null), child = 00000000bcdabb79, groups = 0000000044121b65, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->parent->groups: 
sched_group sg = 0000000044121b65: next = 000000000679d876, ref = 8, group_weight = 3, sgc = 000000006f38e38f, asym_prefer_cpu = 0, sge = 0000000000aea201, cpumask = 4-6 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 000000006f38e38f: ref = 8, capacity = 2370, min_capacity = 790, max_capacity = 790, next_update = 71260, imbalance = 0, id = 4, cpumask = 4-6 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------cpu=6, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 00000000a7438b3c: parent = 00000000779b3b0b, child =           (null), groups = 000000006871b1c7, name = MC, private = 000000009e66189d, shared = 000000007b30bb71, span = 4-6, flags=0x823f 
rq->sd->groups: 
sched_group sg = 000000006871b1c7: next = 0000000021c86a5c, ref = 3, group_weight = 1, sgc = 0000000043aa756f, asym_prefer_cpu = 0, sge = 000000000c95e6d7, cpumask = 6 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 0000000043aa756f: ref = 3, capacity = 790, min_capacity = 790, max_capacity = 790, next_update = 70116, imbalance = 0, id = 6, cpumask = 6 
rq->sd->shared: 
sched_domain_shared sds = 000000007b30bb71: ref = 3, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 00000000779b3b0b: parent =           (null), child = 00000000a7438b3c, groups = 0000000044121b65, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->parent->groups: 
sched_group sg = 0000000044121b65: next = 000000000679d876, ref = 8, group_weight = 3, sgc = 000000006f38e38f, asym_prefer_cpu = 0, sge = 0000000000aea201, cpumask = 4-6 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 000000006f38e38f: ref = 8, capacity = 2370, min_capacity = 790, max_capacity = 790, next_update = 71260, imbalance = 0, id = 4, cpumask = 4-6 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------cpu=7, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 00000000e6bf0308: parent =           (null), child =           (null), groups = 000000000679d876, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f 
rq->sd->groups: 
sched_group sg = 000000000679d876: next = 00000000e4ad4cdb, ref = 8, group_weight = 1, sgc = 0000000011c34dc4, asym_prefer_cpu = 0, sge = 00000000287ffa86, cpumask = 7 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 0000000011c34dc4: ref = 8, capacity = 906, min_capacity = 906, max_capacity = 906, next_update = 70126, imbalance = 0, id = 7, cpumask = 7 
rq->sd->shared: 
sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd = 00000000d7f4f9d1, size = 4, id = 0, sds= 0000000024396c1d, sdnuma =           (null), sdasym =           (null), sdea = 000000002e35d366, sdscs=00000000d7f4f9d1 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd = 00000000b6a75a4c, size = 4, id = 0, sds= 0000000024396c1d, sdnuma =           (null), sdasym =           (null), sdea = 0000000052738a20, sdscs=00000000b6a75a4c 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd = 00000000baa86fe9, size = 4, id = 0, sds= 0000000024396c1d, sdnuma =           (null), sdasym =           (null), sdea = 00000000ad441716, sdscs=00000000baa86fe9 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd = 0000000088de345a, size = 4, id = 0, sds= 0000000024396c1d, sdnuma =           (null), sdasym =           (null), sdea = 00000000f40b8d8f, sdscs=0000000088de345a 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd = 00000000c5314037, size = 3, id = 4, sds= 000000007b30bb71, sdnuma =           (null), sdasym =           (null), sdea = 000000009aaa92f2, sdscs=00000000c5314037 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd = 00000000bcdabb79, size = 3, id = 4, sds= 000000007b30bb71, sdnuma =           (null), sdasym =           (null), sdea = 000000005357fe0e, sdscs=00000000bcdabb79 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd = 00000000a7438b3c, size = 3, id = 4, sds= 000000007b30bb71, sdnuma =           (null), sdasym =           (null), sdea = 00000000779b3b0b, sdscs=00000000a7438b3c 
------------------------------------------------- 
 
---------------------debug_printf_cache---------------------- 
cache: sd =           (null), size = 1, id = 7, sds=           (null), sdnuma =           (null), sdasym =           (null), sdea = 00000000e6bf0308, sdscs=          (null) 
------------------------------------------------- 
 
---------------------debug_printf_root_domain---------------------- 
refcount = 8, span = 0-7, max_cpu_capacity.val = 906, max_cpu_capacity.cpu = 7, max_cap_orig_cpu = 7, min_cap_orig_cpu = 0 
------------------------------------------------- 
 
---------------------debug_printf_cpu_topology---------------------- 
thread_id = -1, core_id = 0, cluster_id = 0, thread_sibling = 0, core_sibling = 0-3 
thread_id = -1, core_id = 1, cluster_id = 0, thread_sibling = 1, core_sibling = 0-3 
thread_id = -1, core_id = 2, cluster_id = 0, thread_sibling = 2, core_sibling = 0-3 
thread_id = -1, core_id = 3, cluster_id = 0, thread_sibling = 3, core_sibling = 0-3 
thread_id = -1, core_id = 0, cluster_id = 1, thread_sibling = 4, core_sibling = 4-6 
thread_id = -1, core_id = 1, cluster_id = 1, thread_sibling = 5, core_sibling = 4-6 
thread_id = -1, core_id = 2, cluster_id = 1, thread_sibling = 6, core_sibling = 4-6 
thread_id = -1, core_id = 0, cluster_id = 2, thread_sibling = 7, core_sibling = 7 
-------------------------------------------------

7. Qcom BSP 的状态一致。

8. sched_domain sd->flags 这个掩码标志在代码中的很多位置作为开关进行判断,看起来像是一个常量。也在 /proc/sys/kernel/sched_domain/cpuX/domainY/flags 也导出了这些flag。

9. balance_callback 链表都是空。


三、DEBUG核隔离和offline的影响

1. 被 isolated 的 cpu 的 sched_group->group_weight=0,比如中核中 CPU4 被 isolated 了,CPU4 的 sched_group->group_weight=0,中核DIE层级的 sched_group->group_weight=2(3-1=2).

2. 被 offline 状态的 cpu 的 rq->sd = NULL,这就导致这个 offline 的 cpu 与整个调度域完全脱离了,因为cpu找调度域是通过 rq->sd 去路由的。其 cache 相关的 per-cpu 的指针
全部变为指向NULL(重新online后会恢复)。小核 DIE层级 cluster 对应的 sched_group->group_weight 的值也是减去 offline 状态的 cpu 后的。

3. 唯一的大核CPU7被isolate后,DIE层级的 sched_group.next 构成的单链表的指向并不会改变。唯一的大核CPU7被 offline 后,DIE层级的 sched_group.next 变为小核和中核互相指向,
已经不存在大核的了。

4. CPU核被 isolate 了或被 offline 了,其DIE层级对应的 sd->groups->sgc 的 capacity 也会变化,其是此cluster内 online 和没被isolate的cpu的算力之和。

offline cpu2 后的效果:

---------------------cpu=2, debug_printf_cpu_rq---------------------- 
rq = 000000001f8a123c: rd = 0000000035a93b24, sd =           (null), cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
---------------------cpu=2, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
------------------------------------------------- 
---------------------debug_printf_cache---------------------- 
cache: sd =           (null), size = 1, id = 2, sds=           (null), sdnuma =           (null), sdasym =           (null), sdea =           (null), sdscs=          (null) 
-------------------------------------------------

同时的 isolate cpu3 后的效果:

---------------------cpu=3, debug_printf_cpu_rq---------------------- 
rq = 00000000d72c33eb: rd = 0000000014388aae, sd = 0000000005fa314e, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 
balance_callback: 
------------------------------------------------- 
---------------------cpu=3, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- 
rq->sd: 
sched_domain sd = 0000000005fa314e: parent = 000000009e86ff7e, child =           (null), groups = 000000000018df41, name = MC, private = 0000000076e60833, shared = 000000005f941234, span = 0-1,3 
rq->sd->groups: 
sched_group sg = 000000000018df41: next = 0000000000f13ee1, ref = 3, group_weight = 0, sgc = 000000003d6525d0, asym_prefer_cpu = 0, sge = 00000000ab7de946, cpumask = 3 
rq->sd->groups->sgc: 
sched_group_capacity sgc = 000000003d6525d0: ref = 3, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 8909791, imbalance = 0, id = 3, cpumask = 3 
rq->sd->shared: 
sched_domain_shared sds = 000000005f941234: ref = 3, nr_busy_cpus = 2, has_idle_cores = 0, overutilized = 0 
rq->sd->parent: 
sched_domain sd = 000000009e86ff7e: parent =           (null), child = 0000000005fa314e, groups = 0000000052c6c8c4, name = DIE, private = 000000002e14f724, shared = 000000000392d96b, span = 0-1,3-6 
rq->sd->parent->groups: 
sched_group sg = 0000000052c6c8c4: next = 00000000cf354652, ref = 6, group_weight = 2, sgc = 00000000de302455, asym_prefer_cpu = 0, sge = 0000000042cf348e, cpumask = 0-1,3 
rq->sd->parent->groups->sgc: 
sched_group_capacity sgc = 00000000de302455: ref = 6, capacity = 460, min_capacity = 230, max_capacity = 230, next_update = 8912959, imbalance = 0, id = 0, cpumask = 0-1,3 
rq->sd->parent->shared: 
sched_domain_shared sds = 000000000392d96b: ref = 6, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 
------------------------------------------------- 
---------------------debug_printf_cache---------------------- 
cache: sd = 0000000005fa314e, size = 3, id = 0, sds= 000000005f941234, sdnuma =           (null), sdasym =           (null), sdea = 000000009e86ff7e, sdscs=0000000005fa314e 
-------------------------------------------------

offline的cpu其 rq->sd 为空,也就是说offline的cpu从调度域中删除了。但是被isolate的cpu还在调度域中(TODO: 待再次确认)。

5. 将CPU7 isolate后,max_cpu_capacity.cpu = 7,将CPU7 offline后,max_cpu_capacity.cpu = 4。但是实验看起来不是很可靠,span的内容也不可靠。

isolate cpu7后的 root_domain: 
---------------------debug_printf_root_domain---------------------- 
refcount = 8, span = 0-7, max_cpu_capacity.val = 1024, max_cpu_capacity.cpu = 7, max_cap_orig_cpu = 7, min_cap_orig_cpu = 0 
------------------------------------------------- 
 
offline cpu2 cpu7,isolate cpu3后的 root_domain: 
---------------------debug_printf_root_domain---------------------- 
refcount = 6, span = 0-1,3-6, max_cpu_capacity.val = 790, max_cpu_capacity.cpu = 4, max_cap_orig_cpu = 4, min_cap_orig_cpu = 0 
------------------------------------------------- 
 
offline cpu3,isolate cpu7后的 root_domain: 
---------------------debug_printf_root_domain---------------------- 
refcount = 7, span = 0-2,4-7, max_cpu_capacity.val = 790, max_cpu_capacity.cpu = 5, max_cap_orig_cpu = 7, min_cap_orig_cpu = 0 
------------------------------------------------- 
 
还出现一次: 
---------------------debug_printf_root_domain---------------------- 
refcount = 3, span = 0-1, max_cpu_capacity.val = 906, max_cpu_capacity.cpu = 7, max_cap_orig_cpu = -1, min_cap_orig_cpu = -1 
-------------------------------------------------

6. 无论怎样 isolate 和 offline,全局 cpu_topology[NR_CPUS] 数组的值不会变化。

7. isolate或offline cpu后,cpu的拓扑结构会重新build!

四、补充

1. kernel-5.10 上cpu 拓扑相关成员描述

struct cpu_topology { 
    int thread_id; //不支持多线程,所有cpu的都是-1 
    int core_id; //每个cluster内的cpu id,比如 0 1 2 3, 0 1 2, 0 
    int package_id; //cpuster的id,比如 0 1 2 
    int llc_id; //cpu所在cluster的首个cpu的id 
    cpumask_t thread_sibling; //每个cpu_topology[cpu]中只有一个,就是cpu id自身,比如 0 1 2 3 4 5 6 7 
    cpumask_t core_sibling; //cpu所在cluster的cpu mask,比如小核的就是0-3,中核的是4-6,大核的是7 
    cpumask_t llc_sibling; 
 
    cpumask_t android_vendor_data1; 
}; 
#define topology_physical_package_id(cpu)    (cpu_topology[cpu].package_id) //cpu属于的cluster id,从0开始 
#define topology_core_id(cpu)        (cpu_topology[cpu].core_id) 
#define topology_core_cpumask(cpu)    (&cpu_topology[cpu].core_sibling) 
#define topology_sibling_cpumask(cpu)    (&cpu_topology[cpu].thread_sibling) 
#define topology_llc_cpumask(cpu)    (&cpu_topology[cpu].llc_sibling)

本文参考链接:https://www.cnblogs.com/hellokitty2/p/15487560.html