基于MTK kernel-4.14
一、CPU 拓扑结构的构建
1. Arm64 架构的cpu拓扑结构存储在全局数组 struct cpu_topology cpu_topology[NR_CPUS] 变量当中
2. struct cpu_topology 结构的定义:
// include/asm/topology.h struct cpu_topology { int thread_id; int core_id; int cluster_id; cpumask_t thread_sibling; cpumask_t core_sibling; }; extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
3. cpu_topology[] 数组的构建来自设备 dts 文件的 "/cpus" 节点下的 "cpu-map" 节点,更新函数为:
//kernel_init() --> kernel_init_freeable() --> smp_prepare_cpus() --> init_cpu_topology() --> parse_dt_topology() --> parse_cluster() --> parse_core() static int __init parse_core(struct device_node *core, int cluster_id, int core_id) { ... cpu = get_cpu_for_node(core); if (cpu >= 0) { cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; } ... } //kernel_init() --> kernel_init_freeable() --> smp_prepare_cpus() --> store_cpu_topology() --> update_siblings_masks() static void update_siblings_masks(unsigned int cpuid) { struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; int cpu; /* update core and thread sibling masks */ for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; if (cpuid_topo->cluster_id != cpu_topo->cluster_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); if (cpu != cpuid) cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); //互相设置,只更新了一个cluster的! if (cpuid_topo->core_id != cpu_topo->core_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); if (cpu != cpuid) cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); } }
4. 由附上的debug打印可知,解析后的cpu拓扑结构中各个成员得值如下:
---------------------debug_printf_cpu_topology---------------------- thread_id = -1, core_id = 0, cluster_id = 0, thread_sibling = 0, core_sibling = 0-3 thread_id = -1, core_id = 1, cluster_id = 0, thread_sibling = 1, core_sibling = 0-3 thread_id = -1, core_id = 2, cluster_id = 0, thread_sibling = 2, core_sibling = 0-3 thread_id = -1, core_id = 3, cluster_id = 0, thread_sibling = 3, core_sibling = 0-3 thread_id = -1, core_id = 0, cluster_id = 1, thread_sibling = 4, core_sibling = 4-6 thread_id = -1, core_id = 1, cluster_id = 1, thread_sibling = 5, core_sibling = 4-6 thread_id = -1, core_id = 2, cluster_id = 1, thread_sibling = 6, core_sibling = 4-6 thread_id = -1, core_id = 0, cluster_id = 2, thread_sibling = 7, core_sibling = 7 -------------------------------------------------
疑问:update_siblings_masks()中只是更新了一个cluster的,为什么会所有的cpu的 thread_sibling/core_sibling 都有更新到 ?
二、调度域的构建
1. 相关结构描述
/* * 对于ARM的大小核架构只有两个调度域层级,MC 和 DIE,SMT是超线程,只有X86平台上有。 */ static struct sched_domain_topology_level default_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, //&cpu_topology[cpu].thread_sibling 这个没有使能 #endif #ifdef CONFIG_SCHED_MC { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, //&cpu_topology[cpu].core_sibling,初始化了sd_flags这个成员函数指针 #endif { cpu_cpu_mask, SD_INIT_NAME(DIE) }, //return node_to_cpumask_map[per_cpu(numa_node, cpu)] { NULL, }, }; /* 系统中构建调度域使用的是 sched_domain_topology,其指向 default_topology 数组 */ static struct sched_domain_topology_level *sched_domain_topology = default_topology; #define for_each_sd_topology(tl) for (tl = sched_domain_topology; tl->mask; tl++)
2. 调度域的构建函数
// sched_init_smp --> init_sched_domains --> build_sched_domains static int build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) { enum s_alloc alloc_state; struct sched_domain *sd; struct s_data d; int i, ret = -ENOMEM; /* * 对全局 default_topology 的 tl->data 的各个per-cpu的变量成员分配内存,对 s_data.rd 分配内存。 */ alloc_state = __visit_domain_allocation_hell(&d, cpu_map); /*初始化时是cpu_active_mask*/ if (alloc_state != sa_rootdomain) goto error; /* Set up domains for CPUs specified by the cpu_map */ for_each_cpu(i, cpu_map) { struct sched_domain_topology_level *tl; sd = NULL; /*只有MC和DIE两个*/ for_each_sd_topology(tl) { //为每个cpu 的每个层级都 build 调度域 sd = build_sched_domain(tl, cpu_map, attr, sd, i); //相等说明 d.sd 只指向MC层级的 sd if (tl == sched_domain_topology) *per_cpu_ptr(d.sd, i) = sd; if (tl->flags & SDTL_OVERLAP) //defalut的层级数组每个元素的tl->flags都是没有初始化的,这个不执行 sd->flags |= SD_OVERLAP; } } /* Build the groups for the domains */ for_each_cpu(i, cpu_map) { for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { //d.sd 是指向MC的每个domain的,此时子层级的已经指向了父层级的,父子的都可以遍历到了 sd->span_weight = cpumask_weight(sched_domain_span(sd)); //参数 sd->span if (sd->flags & SD_OVERLAP) {//不成立 if (build_overlap_sched_groups(sd, i)) goto error; } else { if (build_sched_groups(sd, i)) //sd->span 内的sg构成一个环形单链表 goto error; } } } /* Calculate CPU capacity for physical packages and nodes */ for (i = nr_cpumask_bits-1; i >= 0; i--) { struct sched_domain_topology_level *tl = sched_domain_topology; if (!cpumask_test_cpu(i, cpu_map)) continue; //从这里可以看出每个CPU,每个层级都有一个 sd 结构 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) { init_sched_groups_energy(i, sd, tl->energy); //第三个参数恒为NULL claim_allocations(i, sd); init_sched_groups_capacity(i, sd); //这里可以看出sdg->sgc->capacity是sd中所有cpu的算力之和 } } /* Attach the domains 为每一个rq attach domain*/ rcu_read_lock(); for_each_cpu(i, cpu_map) { /*读取的初始化为-1的*/ int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu); int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu); sd = *per_cpu_ptr(d.sd, i); /*找出最小和最大算力的cpu,sd的算力内容还是从rq里面来的*/ if ((max_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig > cpu_rq(max_cpu)->cpu_capacity_orig)) WRITE_ONCE(d.rd->max_cap_orig_cpu, i); if ((min_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig < cpu_rq(min_cpu)->cpu_capacity_orig)) WRITE_ONCE(d.rd->min_cap_orig_cpu, i); /*传参sd = *per_cpu_ptr(d.sd, i),d.rd 为系统唯一的root_domain结构,i是cpu id */ cpu_attach_domain(sd, d.rd, i); } rcu_read_unlock(); if (!cpumask_empty(cpu_map)) update_asym_cpucapacity(cpumask_first(cpu_map)); ret = 0; error: /* * 这里面除了 root_domain 判断了引用计数,没有释放外,之前所有申请初始化的成员都已经free掉了。 * 自己加debug打印证实的确是释放掉了,但是cpu的domain拓扑还是存在,为什么? */ __free_domain_allocs(&d, alloc_state, cpu_map); return ret; }
3. 对于 4+3+1 cpu架构的Soc,加debug后画出的拓扑图如下:
注:sds 为 struct sched_domain_shared *shared 的缩写。
由实验结果可知
(1). 小核和中核的 cpu_rq(cpu)->sd 指向其MC层级的 sched_domain 结构,大核没有MC层级的 sched_domain 结构,大核的 cpu_rq(cpu)->sd 指向其DIE层级的sched_domain 结构。
(2). 所有cpu的 cpu_rq(cpu)->rd 指向同一个全局 root_domain 结构。
(3). DIE层级的 sched_group_capacity.capacity 是其对应的 MC 层级的所有cpu的算力的和,本cluster有几个核就是几倍。
补充:对应层级的 sched_domain 的 private 成员会指向其对应层级的 sched_domain_topology_level.data 成员,也就是 default_topology[i].data,但是 data 中的各个 per-cpu 的指针在build完成后就全释放干净了。
4. 对于 4+4 cpu架构的soc,其调度域拓扑结构如下:
5. cache 相关per-cpu变量的构建
// kernel/sched/sched.h //select_idle_cpu/__select_idle_sibling/select_idle_sibling_cstate_aware/set_cpu_sd_state_busy/set_cpu_sd_state_idle/nohz_kick_needed 中使用 DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(int, sd_llc_size); //fair.c中唤醒路径中的 wake_wide 中使用 DECLARE_PER_CPU(int, sd_llc_id); //cpus_share_cache 中通过cpu的这个变量来判断是否共享cache DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); //set_idle_cores/test_idle_cores DECLARE_PER_CPU(struct sched_domain *, sd_numa); //task_numa_migrate中使用 DECLARE_PER_CPU(struct sched_domain *, sd_asym); //fair.c nohz_kick_needed中使用 DECLARE_PER_CPU(struct sched_domain *, sd_ea); //find_best_target 中有使用 DECLARE_PER_CPU(struct sched_domain *, sd_scs); //compute_energy/mtk_idle_power/mtk_busy_power 中都有使用
更新函数:
//sched_init_smp --> init_sched_domains --> build_sched_domains --> cpu_attach_domain --> update_top_cache_domain static void update_top_cache_domain(int cpu) { struct sched_domain_shared *sds = NULL; struct sched_domain *sd; struct sched_domain *ea_sd = NULL; int id = cpu; int size = 1; /* 找到最顶层的 SD_SHARE_PKG_RESOURCES 来初始化*/ sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); if (sd) { id = cpumask_first(sched_domain_span(sd)); size = cpumask_weight(sched_domain_span(sd)); sds = sd->shared; } rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); per_cpu(sd_llc_size, cpu) = size; per_cpu(sd_llc_id, cpu) = id; rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds); /* parent->parent是 NULL,没找到的话就返回NULL */ sd = lowest_flag_domain(cpu, SD_NUMA); rcu_assign_pointer(per_cpu(sd_numa, cpu), sd); sd = highest_flag_domain(cpu, SD_ASYM_PACKING); rcu_assign_pointer(per_cpu(sd_asym, cpu), sd); //从下往上遍历,找到一个初始化的就赋值过去 for_each_domain(cpu, sd) { if (sd->groups->sge) ea_sd = sd; else break; } rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES); rcu_assign_pointer(per_cpu(sd_scs, cpu), sd); } /* default_topology 层级数组初始化时传的函数指针 */ static inline int cpu_core_flags(void) { return SD_SHARE_PKG_RESOURCES; } static inline struct sched_domain *highest_flag_domain(int cpu, int flag) { struct sched_domain *sd, *hsd = NULL; for_each_domain(cpu, sd) { //由child domain开始向上查找 if (!(sd->flags & flag)) break; hsd = sd; } return hsd; //返回的一个DIE层级的sd }
由 highest_flag_domain() 和MC层级传的flag函数指针可知,获取的是一个DIE层级的调度域,也就是说一个cluster内的cpu是共享cache的,但是除了大核,只有CPU7(这里说的cache应该是二级cache)。
对每个cpu进行debug打印的各个成员得内容:
cache: sd = 000000007607d14d, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 0000000005eb165b, sdscs=000000007607d14d cache: sd = 0000000073059b52, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 000000001b3b27f3, sdscs=0000000073059b52 cache: sd = 000000005b94d1f5, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 000000009c9ce4e7, sdscs=000000005b94d1f5 cache: sd = 0000000034f5aa86, size = 4, id = 0, sds= 00000000d3d7a536, sdnuma = (null), sdasym = (null), sdea = 00000000aa75a8a2, sdscs=0000000034f5aa86 cache: sd = 0000000015506191, size = 3, id = 4, sds= 000000001048be02, sdnuma = (null), sdasym = (null), sdea = 00000000375c082c, sdscs=0000000015506191 cache: sd = 0000000013e36405, size = 3, id = 4, sds= 000000001048be02, sdnuma = (null), sdasym = (null), sdea = 00000000f629682d, sdscs=0000000013e36405 cache: sd = 00000000687439ee, size = 3, id = 4, sds= 000000001048be02, sdnuma = (null), sdasym = (null), sdea = 00000000d6ba54d5, sdscs=00000000687439ee cache: sd = (null), size = 1, id = 7, sds= (null), sdnuma = (null), sdasym = (null), sdea = 00000000b81ad593, sdscs= (null)
(1) 为 NULL 的就是没有找到相关的调度域。
(2) sd_llc 和 sd_scs 具有相同指向,和 cpu_rq(cpu)->sd 和 MC层级的sd 四者是相同的指向。由于大核CPU7没有MC层级的调度域,所以其的为NULL。
(3) sd_llc_size 表示此cpu所在的cluster的cpu的个数。
(3) sd_llc_id 表示此cpu所在的cluster的首个cpu的id。
(4) sd_llc_shared 同一个cluster的cpu指向同一个 struct sched_domain_shared 结构,这个指向和MC层级调度域的 sd->sds 指向相同。
(5) sd_numa 和 sd_asym 由于 default_topology 变量中没有指定相关的flag,所以为NULL.
(6) sd_ea 和每个cpu的DIE层级的 sched_domain 成员具有相同指向。
设置位置:
//kernel/sched/topology.c kernel-5.10 //是定义,sched.h中是声明 //update_top_cache_domain: 唯一设置位置,指向MC层级的sd DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc); //update_top_cache_domain: 唯一设置位置,MC层级的sd的cpu个数,也就是各个cpu所在cluster中cpu个数 DEFINE_PER_CPU(int, sd_llc_size); //update_top_cache_domain: 唯一设置位置,cpu所在cluster的首个cpu的id DEFINE_PER_CPU(int, sd_llc_id); DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared); //update_top_cache_domain: 唯一设置位置,全部指向NULL DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa); //update_top_cache_domain: 唯一设置位置,全部指向NULL DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing); //update_top_cache_domain: 唯一设置位置,指向每个CPU对应的DIE层级的sd DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity); DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity); static void update_top_cache_domain(int cpu) { struct sched_domain_shared *sds = NULL; struct sched_domain *sd; int id = cpu; int size = 1; sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); //返回MC层级的sd if (sd) { id = cpumask_first(sched_domain_span(sd)); size = cpumask_weight(sched_domain_span(sd)); sds = sd->shared; } rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); //指向MC层级的sd per_cpu(sd_llc_size, cpu) = size; //MC层级的sd的cpu个数,也就是各个cpu所在cluster中cpu个数 per_cpu(sd_llc_id, cpu) = id; //cpu所在cluster的首个cpu的id rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds); sd = lowest_flag_domain(cpu, SD_NUMA); //MC和DIE层级都没定义这个flag,所以返回NULL rcu_assign_pointer(per_cpu(sd_numa, cpu), sd); sd = highest_flag_domain(cpu, SD_ASYM_PACKING); //MC和DIE层级都没定义这个flag,所以返回NULL rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd); sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY); //返回此CPU对应的DIE层级的sd rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd); }
各层级domain的flag:
//kernel-5.10 # cat /proc/sys/kernel/sched_domain/cpu0/domain0/flags SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_PKG_RESOURCES //MC # cat /proc/sys/kernel/sched_domain/cpu0/domain1/flags SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_CPUCAPACITY SD_PREFER_SIBLING //DIE ... # cat /proc/sys/kernel/sched_domain/cpu7/domain0/flags SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_CPUCAPACITY SD_PREFER_SIBLING //DIE
6. Debug打印代码
/* 放到 kernel/sched 下面 */ #define pr_fmt(fmt) "topo_debug: " fmt #include <linux/fs.h> #include <linux/sched.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/printk.h> #include <asm/topology.h> #include <linux/cpumask.h> #include <linux/sched/topology.h> #include "sched.h" extern struct cpu_topology cpu_topology[NR_CPUS]; extern struct sched_domain_topology_level *sched_domain_topology; //需要去掉原文件中的static修饰 #define for_each_sd_topology(tl) for (tl = sched_domain_topology; tl->mask; tl++) struct domain_topo_debug_t { int cmd; }; static struct domain_topo_debug_t dtd; static void debug_printf_sched_domain(struct seq_file *m, struct sched_domain *sd, char *level, int cpu) { if (!sd) { if (level) { seq_printf(m, "TL = %s, cpu = %d, sd = NULL\n", level, cpu); } else { seq_printf(m, "sd = NULL\n"); } } else { seq_printf(m, "sched_domain sd = %p: parent = %p, child = %p, groups = %p, name = %s, private = %p, shared = %p, span = %*pbl, flags=0x%x\n", sd, sd->parent, sd->child, sd->groups, sd->name, sd->private, sd->shared, cpumask_pr_args(to_cpumask(sd->span)), sd->flags); } } static void debug_printf_sched_group(struct seq_file *m, struct sched_group *sg, char *level, int cpu) { if (!sg) { if (level) { seq_printf(m, "TL = %s, cpu = %d, sg = NULL\n", level, cpu); } else { seq_printf(m, "sg = NULL\n"); } } else { seq_printf(m, "sched_group sg = %p: next = %p, ref = %d, group_weight = %d, sgc = %p, asym_prefer_cpu = %d, sge = %p, cpumask = %*pbl\n", sg, sg->next, atomic_read(&sg->ref), sg->group_weight, sg->sgc, sg->asym_prefer_cpu, sg->sge, cpumask_pr_args(to_cpumask(sg->cpumask))); } } static void debug_printf_sched_group_capacity(struct seq_file *m, struct sched_group_capacity *sgc, char *level, int cpu) { if (!sgc) { if (level) { seq_printf(m, "TL = %s, cpu = %d, sgc = NULL\n", level, cpu); } else { seq_printf(m, "sgc = NULL\n"); } } else { seq_printf(m, "sched_group_capacity sgc = %p: ref = %d, capacity = %d, min_capacity = %d, max_capacity = %d, next_update = %d, imbalance = %d, id = %d, cpumask = %*pbl\n", sgc, atomic_read(&sgc->ref), sgc->capacity, sgc->min_capacity, sgc->max_capacity, sgc->next_update, sgc->imbalance, sgc->id, cpumask_pr_args(to_cpumask(sgc->cpumask))); } } static void debug_printf_sched_domain_shared(struct seq_file *m, struct sched_domain_shared *sds, char *level, int cpu) { if (!sds) { if (level) { seq_printf(m, "TL = %s, cpu = %d, sds = NULL\n", level, cpu); } else { seq_printf(m, "sds = NULL\n"); } } else { seq_printf(m, "sched_domain_shared sds = %p: ref = %d, nr_busy_cpus = %d, has_idle_cores = %d, overutilized = %d\n", sds, atomic_read(&sds->ref), atomic_read(&sds->nr_busy_cpus), sds->has_idle_cores, sds->overutilized); } } static void debug_printf_sd_sds_sg_sgc(struct seq_file *m, struct sched_domain_topology_level *tl, int cpu) { struct sd_data *sdd; struct sched_domain *sd; struct sched_domain_shared *sds; struct sched_group *sg; struct sched_group_capacity *sgc; sdd = &tl->data; seq_printf(m, "-------------------------TL = %s, cpu = %d, sdd = %p, %s--------------------------------\n", tl->name, cpu, sdd, __func__); if (!sdd->sd) { seq_printf(m, "TL = %s, cpu = %d, sdd->sd = NULL\n", tl->name, cpu); } else { sd = *per_cpu_ptr(sdd->sd, cpu); debug_printf_sched_domain(m, sd, tl->name, cpu); } if (!sdd->sds) { seq_printf(m, "TL = %s, cpu = %d, sdd->sds = NULL\n", tl->name, cpu); } else { sds = *per_cpu_ptr(sdd->sds, cpu); debug_printf_sched_domain_shared(m, sds, tl->name, cpu); } if (!sdd->sg) { seq_printf(m, "TL = %s, cpu = %d, sdd->sg = NULL\n", tl->name, cpu); } else { sg = *per_cpu_ptr(sdd->sg, cpu); debug_printf_sched_group(m, sg, tl->name, cpu); } if (!sdd->sgc) { seq_printf(m, "TL = %s, cpu = %d, sdd->sgc = NULL\n", tl->name, cpu); } else { sgc = *per_cpu_ptr(sdd->sgc, cpu); debug_printf_sched_group_capacity(m, sgc, tl->name, cpu); } seq_printf(m, "-------------------------------------------------\n\n"); } static void debug_printf_sd_sds_sg_sgc_cpu_rq(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); seq_printf(m, "---------------------cpu=%d, %s----------------------\n", cpu, __func__); if (rq->sd) { seq_printf(m, "rq->sd:\n"); debug_printf_sched_domain(m, rq->sd, NULL, cpu); seq_printf(m, "rq->sd->groups:\n"); debug_printf_sched_group(m, rq->sd->groups, NULL, cpu); if (rq->sd->groups) { seq_printf(m, "rq->sd->groups->sgc:\n"); debug_printf_sched_group_capacity(m, rq->sd->groups->sgc, NULL, cpu); } seq_printf(m, "rq->sd->shared:\n"); debug_printf_sched_domain_shared(m, rq->sd->shared, NULL, cpu); } if (rq->sd && rq->sd->parent) { seq_printf(m, "rq->sd->parent:\n"); debug_printf_sched_domain(m, rq->sd->parent, NULL, cpu); seq_printf(m, "rq->sd->parent->groups:\n"); debug_printf_sched_group(m, rq->sd->parent->groups, NULL, cpu); if (rq->sd->parent->groups) { seq_printf(m, "rq->sd->parent->groups->sgc:\n"); debug_printf_sched_group_capacity(m, rq->sd->parent->groups->sgc, NULL, cpu); } seq_printf(m, "rq->sd->parent->shared:\n"); debug_printf_sched_domain_shared(m, rq->sd->parent->shared, NULL, cpu); } if (rq->sd && rq->sd->child) { seq_printf(m, "rq->sd->child:\n"); debug_printf_sched_domain(m, rq->sd->child, NULL, cpu); seq_printf(m, "rq->sd->child->groups:\n"); debug_printf_sched_group(m, rq->sd->child->groups, NULL, cpu); if (rq->sd->child->groups) { seq_printf(m, "rq->sd->child->groups->sgc:\n"); debug_printf_sched_group_capacity(m, rq->sd->child->groups->sgc, NULL, cpu); } seq_printf(m, "rq->sd->child->shared:\n"); debug_printf_sched_domain_shared(m, rq->sd->child->shared, NULL, cpu); } if (rq->sd && rq->sd->parent && rq->sd->parent->parent) { seq_printf(m, "rq->sd->parent->parent:\n"); debug_printf_sched_domain(m, rq->sd->parent->parent, NULL, cpu); seq_printf(m, "rq->sd->parent->parent->groups:\n"); debug_printf_sched_group(m, rq->sd->parent->parent->groups, NULL, cpu); if (rq->sd->parent->parent->groups) { seq_printf(m, "rq->sd->parent->parent->groups->sgc:\n"); debug_printf_sched_group_capacity(m, rq->sd->parent->parent->groups->sgc, NULL, cpu); } seq_printf(m, "rq->sd->parent->parent->shared:\n"); debug_printf_sched_domain_shared(m, rq->sd->parent->parent->shared, NULL, cpu); } if (rq->sd && rq->sd->child && rq->sd->child->child) { seq_printf(m, "rq->sd->child->child:\n"); debug_printf_sched_domain(m, rq->sd->child->child, NULL, cpu); seq_printf(m, "rq->sd->child->child->groups:\n"); debug_printf_sched_group(m, rq->sd->child->child->groups, NULL, cpu); if (rq->sd->child->child->groups) { seq_printf(m, "rq->sd->child->child->groups->sgc:\n"); debug_printf_sched_group_capacity(m, rq->sd->child->child->groups->sgc, NULL, cpu); } seq_printf(m, "rq->sd->child->child->shared:\n"); debug_printf_sched_domain_shared(m, rq->sd->child->child->shared, NULL, cpu); } seq_printf(m, "-------------------------------------------------\n\n"); } static void debug_printf_cpu_rq(struct seq_file *m, int cpu) { struct callback_head *callback; struct rq *rq = cpu_rq(cpu); seq_printf(m, "---------------------cpu=%d, %s----------------------\n", cpu, __func__); seq_printf(m, "rq = %p: rd = %p, sd = %p, cpu_capacity = %d, cpu_capacity_orig = %d, max_idle_balance_cost = %lu\n", rq, rq->rd, rq->sd, rq->cpu_capacity, rq->cpu_capacity_orig, rq->max_idle_balance_cost); seq_printf(m, "balance_callback: \n"); callback = rq->balance_callback; while(callback) { seq_printf(m, "%pf\n", callback->func); callback = callback->next; } seq_printf(m, "-------------------------------------------------\n\n"); } static void debug_printf_root_domain(struct seq_file *m) { struct rq *rq = cpu_rq(0); struct root_domain *rd = rq->rd; seq_printf(m, "---------------------%s----------------------\n", __func__); if (rd) { seq_printf(m, "refcount = %d, span = %*pbl, max_cpu_capacity.val = %d, max_cpu_capacity.cpu = %d, max_cap_orig_cpu = %d, min_cap_orig_cpu = %d\n", atomic_read(&rd->refcount), cpumask_pr_args(rd->span), rd->max_cpu_capacity.val, rd->max_cpu_capacity.cpu, rd->max_cap_orig_cpu, rd->min_cap_orig_cpu); } seq_printf(m, "-------------------------------------------------\n\n"); } static void debug_printf_cpu_topology(struct seq_file *m) { int i; struct cpu_topology *ct = cpu_topology; seq_printf(m, "---------------------%s----------------------\n", __func__); for (i = 0; i < NR_CPUS; i++) { seq_printf(m, "thread_id = %d, core_id = %d, cluster_id = %d, thread_sibling = %*pbl, core_sibling = %*pbl\n", ct->thread_id, ct->core_id, ct->cluster_id, cpumask_pr_args(&ct->thread_sibling), cpumask_pr_args(&ct->core_sibling)); ct++; //new add } seq_printf(m, "-------------------------------------------------\n\n"); } static void debug_printf_cache(struct seq_file *m, int cpu) { struct sched_domain *sd, *sdnuma, *sdasym, *sdea, *sdscs; struct sched_domain_shared *sds; int size, id; sd = rcu_dereference(per_cpu(sd_llc, cpu)); size = per_cpu(sd_llc_size, cpu); id = per_cpu(sd_llc_id, cpu); sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); sdnuma = rcu_dereference(per_cpu(sd_numa, cpu)); sdasym = rcu_dereference(per_cpu(sd_asym, cpu)); sdea = rcu_dereference(per_cpu(sd_ea, cpu)); sdscs = rcu_dereference(per_cpu(sd_scs, cpu)); seq_printf(m, "---------------------%s----------------------\n", __func__); seq_printf(m, "cache: sd = %p, size = %d, id = %d, sds= %p, sdnuma = %p, sdasym = %p, sdea = %p, sdscs=%p\n", sd, size, id, sds, sdnuma, sdasym, sdea, sdscs); seq_printf(m, "-------------------------------------------------\n\n"); } static void debug_printf_all(struct seq_file *m, const struct cpumask *cpu_mask) { int cpu; struct sched_domain_topology_level *tl; for_each_cpu(cpu, cpu_mask) { for_each_sd_topology(tl) { debug_printf_sd_sds_sg_sgc(m, tl, cpu); } } for_each_cpu(cpu, cpu_mask) { //分开更方便查看 debug_printf_cpu_rq(m, cpu); debug_printf_sd_sds_sg_sgc_cpu_rq(m, cpu); debug_printf_cache(m, cpu); } debug_printf_root_domain(m); debug_printf_cpu_topology(m); } static ssize_t domain_topo_debug_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { int ret, cmd_value; char buffer[32] = {0}; if (count >= sizeof(buffer)) { count = sizeof(buffer) - 1; } if (copy_from_user(buffer, buf, count)) { pr_info("copy_from_user failed\n"); return -EFAULT; } ret = sscanf(buffer, "%d", &cmd_value); if(ret <= 0){ pr_info("sscanf dec failed\n"); return -EINVAL; } pr_info("cmd_value=%d\n", cmd_value); dtd.cmd = cmd_value; return count; } static int domain_topo_debug_show(struct seq_file *m, void *v) { switch (dtd.cmd) { case 0: debug_printf_all(m, cpu_possible_mask); break; case 1: debug_printf_all(m, cpu_online_mask); break; case 2: debug_printf_all(m, cpu_present_mask); break; case 3: debug_printf_all(m, cpu_active_mask); break; case 4: debug_printf_all(m, cpu_isolated_mask); break; default: pr_info("dtd.cmd is invalid!\n"); break; } return 0; } static int domain_topo_debug_open(struct inode *inode, struct file *file) { return single_open(file, domain_topo_debug_show, NULL); } static const struct file_operations domain_topo_debug_fops = { .open = domain_topo_debug_open, .read = seq_read, .write = domain_topo_debug_write, .llseek = seq_lseek, .release = single_release, }; static int __init domain_topo_debug_init(void) { proc_create("domain_topo_debug", S_IRUGO | S_IWUGO, NULL, &domain_topo_debug_fops); pr_info("domain_topo_debug probed\n", nr_cpu_ids); return 0; } fs_initcall(domain_topo_debug_init);
Debug打印内容:
# cat domain_topo_debug -------------------------TL = MC, cpu = 0, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 0, sdd->sd = NULL TL = MC, cpu = 0, sdd->sds = NULL TL = MC, cpu = 0, sdd->sg = NULL TL = MC, cpu = 0, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 0, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 0, sdd->sd = NULL TL = DIE, cpu = 0, sdd->sds = NULL TL = DIE, cpu = 0, sdd->sg = NULL TL = DIE, cpu = 0, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = MC, cpu = 1, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 1, sdd->sd = NULL TL = MC, cpu = 1, sdd->sds = NULL TL = MC, cpu = 1, sdd->sg = NULL TL = MC, cpu = 1, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 1, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 1, sdd->sd = NULL TL = DIE, cpu = 1, sdd->sds = NULL TL = DIE, cpu = 1, sdd->sg = NULL TL = DIE, cpu = 1, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = MC, cpu = 2, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 2, sdd->sd = NULL TL = MC, cpu = 2, sdd->sds = NULL TL = MC, cpu = 2, sdd->sg = NULL TL = MC, cpu = 2, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 2, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 2, sdd->sd = NULL TL = DIE, cpu = 2, sdd->sds = NULL TL = DIE, cpu = 2, sdd->sg = NULL TL = DIE, cpu = 2, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = MC, cpu = 3, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 3, sdd->sd = NULL TL = MC, cpu = 3, sdd->sds = NULL TL = MC, cpu = 3, sdd->sg = NULL TL = MC, cpu = 3, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 3, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 3, sdd->sd = NULL TL = DIE, cpu = 3, sdd->sds = NULL TL = DIE, cpu = 3, sdd->sg = NULL TL = DIE, cpu = 3, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = MC, cpu = 4, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 4, sdd->sd = NULL TL = MC, cpu = 4, sdd->sds = NULL TL = MC, cpu = 4, sdd->sg = NULL TL = MC, cpu = 4, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 4, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 4, sdd->sd = NULL TL = DIE, cpu = 4, sdd->sds = NULL TL = DIE, cpu = 4, sdd->sg = NULL TL = DIE, cpu = 4, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = MC, cpu = 5, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 5, sdd->sd = NULL TL = MC, cpu = 5, sdd->sds = NULL TL = MC, cpu = 5, sdd->sg = NULL TL = MC, cpu = 5, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 5, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 5, sdd->sd = NULL TL = DIE, cpu = 5, sdd->sds = NULL TL = DIE, cpu = 5, sdd->sg = NULL TL = DIE, cpu = 5, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = MC, cpu = 6, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 6, sdd->sd = NULL TL = MC, cpu = 6, sdd->sds = NULL TL = MC, cpu = 6, sdd->sg = NULL TL = MC, cpu = 6, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 6, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 6, sdd->sd = NULL TL = DIE, cpu = 6, sdd->sds = NULL TL = DIE, cpu = 6, sdd->sg = NULL TL = DIE, cpu = 6, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = MC, cpu = 7, sdd = 000000009e66189d, debug_printf_sd_sds_sg_sgc-------------------------------- TL = MC, cpu = 7, sdd->sd = NULL TL = MC, cpu = 7, sdd->sds = NULL TL = MC, cpu = 7, sdd->sg = NULL TL = MC, cpu = 7, sdd->sgc = NULL ------------------------------------------------- -------------------------TL = DIE, cpu = 7, sdd = 0000000079bc9f22, debug_printf_sd_sds_sg_sgc-------------------------------- TL = DIE, cpu = 7, sdd->sd = NULL TL = DIE, cpu = 7, sdd->sds = NULL TL = DIE, cpu = 7, sdd->sg = NULL TL = DIE, cpu = 7, sdd->sgc = NULL ------------------------------------------------- ---------------------cpu=0, debug_printf_cpu_rq---------------------- rq = 000000003429aaa4: rd = 000000001b6257a5, sd = 00000000d7f4f9d1, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=1, debug_printf_cpu_rq---------------------- rq = 00000000ed35f55b: rd = 000000001b6257a5, sd = 00000000b6a75a4c, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=2, debug_printf_cpu_rq---------------------- rq = 000000007ab2924d: rd = 000000001b6257a5, sd = 00000000baa86fe9, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=3, debug_printf_cpu_rq---------------------- rq = 00000000fccf4c48: rd = 000000001b6257a5, sd = 0000000088de345a, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=4, debug_printf_cpu_rq---------------------- rq = 000000002b3d7ce9: rd = 000000001b6257a5, sd = 00000000c5314037, cpu_capacity = 790, cpu_capacity_orig = 917, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=5, debug_printf_cpu_rq---------------------- rq = 000000000ffd99ea: rd = 000000001b6257a5, sd = 00000000bcdabb79, cpu_capacity = 790, cpu_capacity_orig = 917, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=6, debug_printf_cpu_rq---------------------- rq = 00000000ecf5161f: rd = 000000001b6257a5, sd = 00000000a7438b3c, cpu_capacity = 790, cpu_capacity_orig = 917, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=7, debug_printf_cpu_rq---------------------- rq = 000000001e90f615: rd = 000000001b6257a5, sd = 00000000e6bf0308, cpu_capacity = 906, cpu_capacity_orig = 1024, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=0, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 00000000d7f4f9d1: parent = 000000002e35d366, child = (null), groups = 00000000020e03b4, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f rq->sd->groups: sched_group sg = 00000000020e03b4: next = 00000000ab25b4ff, ref = 4, group_weight = 1, sgc = 00000000c0fb3d29, asym_prefer_cpu = 0, sge = 0000000045497311, cpumask = 0 rq->sd->groups->sgc: sched_group_capacity sgc = 00000000c0fb3d29: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71656, imbalance = 0, id = 0, cpumask = 0 rq->sd->shared: sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 000000002e35d366: parent = (null), child = 00000000d7f4f9d1, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->parent->groups: sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 rq->sd->parent->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------cpu=1, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 00000000b6a75a4c: parent = 0000000052738a20, child = (null), groups = 00000000ab25b4ff, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f rq->sd->groups: sched_group sg = 00000000ab25b4ff: next = 00000000b80ca367, ref = 4, group_weight = 1, sgc = 00000000c7f86843, asym_prefer_cpu = 0, sge = 000000004ba0e6e4, cpumask = 1 rq->sd->groups->sgc: sched_group_capacity sgc = 00000000c7f86843: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71656, imbalance = 0, id = 1, cpumask = 1 rq->sd->shared: sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 0000000052738a20: parent = (null), child = 00000000b6a75a4c, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->parent->groups: sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 rq->sd->parent->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------cpu=2, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 00000000baa86fe9: parent = 00000000ad441716, child = (null), groups = 00000000b80ca367, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f rq->sd->groups: sched_group sg = 00000000b80ca367: next = 00000000d5827f1a, ref = 4, group_weight = 1, sgc = 00000000186b5ec8, asym_prefer_cpu = 0, sge = 00000000ad61ee0b, cpumask = 2 rq->sd->groups->sgc: sched_group_capacity sgc = 00000000186b5ec8: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71655, imbalance = 0, id = 2, cpumask = 2 rq->sd->shared: sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 00000000ad441716: parent = (null), child = 00000000baa86fe9, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->parent->groups: sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 rq->sd->parent->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------cpu=3, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 0000000088de345a: parent = 00000000f40b8d8f, child = (null), groups = 00000000d5827f1a, name = MC, private = 000000009e66189d, shared = 0000000024396c1d, span = 0-3, flags=0x823f rq->sd->groups: sched_group sg = 00000000d5827f1a: next = 00000000020e03b4, ref = 4, group_weight = 1, sgc = 00000000cd39f15f, asym_prefer_cpu = 0, sge = 000000002fe36ed1, cpumask = 3 rq->sd->groups->sgc: sched_group_capacity sgc = 00000000cd39f15f: ref = 4, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 71661, imbalance = 0, id = 3, cpumask = 3 rq->sd->shared: sched_domain_shared sds = 0000000024396c1d: ref = 4, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 00000000f40b8d8f: parent = (null), child = 0000000088de345a, groups = 00000000e4ad4cdb, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->parent->groups: sched_group sg = 00000000e4ad4cdb: next = 0000000044121b65, ref = 8, group_weight = 4, sgc = 0000000076821083, asym_prefer_cpu = 0, sge = 00000000be40d7aa, cpumask = 0-3 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 0000000076821083: ref = 8, capacity = 920, min_capacity = 230, max_capacity = 230, next_update = 71662, imbalance = 0, id = 0, cpumask = 0-3 rq->sd->parent->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------cpu=4, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 00000000c5314037: parent = 000000009aaa92f2, child = (null), groups = 0000000021c86a5c, name = MC, private = 000000009e66189d, shared = 000000007b30bb71, span = 4-6, flags=0x823f rq->sd->groups: sched_group sg = 0000000021c86a5c: next = 000000007e6c07b8, ref = 3, group_weight = 1, sgc = 00000000ae0ee0b8, asym_prefer_cpu = 0, sge = 00000000e1287c6b, cpumask = 4 rq->sd->groups->sgc: sched_group_capacity sgc = 00000000ae0ee0b8: ref = 3, capacity = 790, min_capacity = 790, max_capacity = 790, next_update = 71257, imbalance = 0, id = 4, cpumask = 4 rq->sd->shared: sched_domain_shared sds = 000000007b30bb71: ref = 3, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 000000009aaa92f2: parent = (null), child = 00000000c5314037, groups = 0000000044121b65, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->parent->groups: sched_group sg = 0000000044121b65: next = 000000000679d876, ref = 8, group_weight = 3, sgc = 000000006f38e38f, asym_prefer_cpu = 0, sge = 0000000000aea201, cpumask = 4-6 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 000000006f38e38f: ref = 8, capacity = 2370, min_capacity = 790, max_capacity = 790, next_update = 71260, imbalance = 0, id = 4, cpumask = 4-6 rq->sd->parent->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------cpu=5, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 00000000bcdabb79: parent = 000000005357fe0e, child = (null), groups = 000000007e6c07b8, name = MC, private = 000000009e66189d, shared = 000000007b30bb71, span = 4-6, flags=0x823f rq->sd->groups: sched_group sg = 000000007e6c07b8: next = 000000006871b1c7, ref = 3, group_weight = 1, sgc = 000000008745da85, asym_prefer_cpu = 0, sge = 00000000ff253c30, cpumask = 5 rq->sd->groups->sgc: sched_group_capacity sgc = 000000008745da85: ref = 3, capacity = 790, min_capacity = 790, max_capacity = 790, next_update = 70617, imbalance = 0, id = 5, cpumask = 5 rq->sd->shared: sched_domain_shared sds = 000000007b30bb71: ref = 3, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 000000005357fe0e: parent = (null), child = 00000000bcdabb79, groups = 0000000044121b65, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->parent->groups: sched_group sg = 0000000044121b65: next = 000000000679d876, ref = 8, group_weight = 3, sgc = 000000006f38e38f, asym_prefer_cpu = 0, sge = 0000000000aea201, cpumask = 4-6 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 000000006f38e38f: ref = 8, capacity = 2370, min_capacity = 790, max_capacity = 790, next_update = 71260, imbalance = 0, id = 4, cpumask = 4-6 rq->sd->parent->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------cpu=6, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 00000000a7438b3c: parent = 00000000779b3b0b, child = (null), groups = 000000006871b1c7, name = MC, private = 000000009e66189d, shared = 000000007b30bb71, span = 4-6, flags=0x823f rq->sd->groups: sched_group sg = 000000006871b1c7: next = 0000000021c86a5c, ref = 3, group_weight = 1, sgc = 0000000043aa756f, asym_prefer_cpu = 0, sge = 000000000c95e6d7, cpumask = 6 rq->sd->groups->sgc: sched_group_capacity sgc = 0000000043aa756f: ref = 3, capacity = 790, min_capacity = 790, max_capacity = 790, next_update = 70116, imbalance = 0, id = 6, cpumask = 6 rq->sd->shared: sched_domain_shared sds = 000000007b30bb71: ref = 3, nr_busy_cpus = 1, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 00000000779b3b0b: parent = (null), child = 00000000a7438b3c, groups = 0000000044121b65, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->parent->groups: sched_group sg = 0000000044121b65: next = 000000000679d876, ref = 8, group_weight = 3, sgc = 000000006f38e38f, asym_prefer_cpu = 0, sge = 0000000000aea201, cpumask = 4-6 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 000000006f38e38f: ref = 8, capacity = 2370, min_capacity = 790, max_capacity = 790, next_update = 71260, imbalance = 0, id = 4, cpumask = 4-6 rq->sd->parent->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------cpu=7, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 00000000e6bf0308: parent = (null), child = (null), groups = 000000000679d876, name = DIE, private = 0000000079bc9f22, shared = 000000001c246b7a, span = 0-7, flags=0x107f rq->sd->groups: sched_group sg = 000000000679d876: next = 00000000e4ad4cdb, ref = 8, group_weight = 1, sgc = 0000000011c34dc4, asym_prefer_cpu = 0, sge = 00000000287ffa86, cpumask = 7 rq->sd->groups->sgc: sched_group_capacity sgc = 0000000011c34dc4: ref = 8, capacity = 906, min_capacity = 906, max_capacity = 906, next_update = 70126, imbalance = 0, id = 7, cpumask = 7 rq->sd->shared: sched_domain_shared sds = 000000001c246b7a: ref = 8, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 00000000d7f4f9d1, size = 4, id = 0, sds= 0000000024396c1d, sdnuma = (null), sdasym = (null), sdea = 000000002e35d366, sdscs=00000000d7f4f9d1 ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 00000000b6a75a4c, size = 4, id = 0, sds= 0000000024396c1d, sdnuma = (null), sdasym = (null), sdea = 0000000052738a20, sdscs=00000000b6a75a4c ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 00000000baa86fe9, size = 4, id = 0, sds= 0000000024396c1d, sdnuma = (null), sdasym = (null), sdea = 00000000ad441716, sdscs=00000000baa86fe9 ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 0000000088de345a, size = 4, id = 0, sds= 0000000024396c1d, sdnuma = (null), sdasym = (null), sdea = 00000000f40b8d8f, sdscs=0000000088de345a ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 00000000c5314037, size = 3, id = 4, sds= 000000007b30bb71, sdnuma = (null), sdasym = (null), sdea = 000000009aaa92f2, sdscs=00000000c5314037 ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 00000000bcdabb79, size = 3, id = 4, sds= 000000007b30bb71, sdnuma = (null), sdasym = (null), sdea = 000000005357fe0e, sdscs=00000000bcdabb79 ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 00000000a7438b3c, size = 3, id = 4, sds= 000000007b30bb71, sdnuma = (null), sdasym = (null), sdea = 00000000779b3b0b, sdscs=00000000a7438b3c ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = (null), size = 1, id = 7, sds= (null), sdnuma = (null), sdasym = (null), sdea = 00000000e6bf0308, sdscs= (null) ------------------------------------------------- ---------------------debug_printf_root_domain---------------------- refcount = 8, span = 0-7, max_cpu_capacity.val = 906, max_cpu_capacity.cpu = 7, max_cap_orig_cpu = 7, min_cap_orig_cpu = 0 ------------------------------------------------- ---------------------debug_printf_cpu_topology---------------------- thread_id = -1, core_id = 0, cluster_id = 0, thread_sibling = 0, core_sibling = 0-3 thread_id = -1, core_id = 1, cluster_id = 0, thread_sibling = 1, core_sibling = 0-3 thread_id = -1, core_id = 2, cluster_id = 0, thread_sibling = 2, core_sibling = 0-3 thread_id = -1, core_id = 3, cluster_id = 0, thread_sibling = 3, core_sibling = 0-3 thread_id = -1, core_id = 0, cluster_id = 1, thread_sibling = 4, core_sibling = 4-6 thread_id = -1, core_id = 1, cluster_id = 1, thread_sibling = 5, core_sibling = 4-6 thread_id = -1, core_id = 2, cluster_id = 1, thread_sibling = 6, core_sibling = 4-6 thread_id = -1, core_id = 0, cluster_id = 2, thread_sibling = 7, core_sibling = 7 -------------------------------------------------
7. Qcom BSP 的状态一致。
8. sched_domain sd->flags 这个掩码标志在代码中的很多位置作为开关进行判断,看起来像是一个常量。也在 /proc/sys/kernel/sched_domain/cpuX/domainY/flags 也导出了这些flag。
9. balance_callback 链表都是空。
三、DEBUG核隔离和offline的影响
1. 被 isolated 的 cpu 的 sched_group->group_weight=0,比如中核中 CPU4 被 isolated 了,CPU4 的 sched_group->group_weight=0,中核DIE层级的 sched_group->group_weight=2(3-1=2).
2. 被 offline 状态的 cpu 的 rq->sd = NULL,这就导致这个 offline 的 cpu 与整个调度域完全脱离了,因为cpu找调度域是通过 rq->sd 去路由的。其 cache 相关的 per-cpu 的指针
全部变为指向NULL(重新online后会恢复)。小核 DIE层级 cluster 对应的 sched_group->group_weight 的值也是减去 offline 状态的 cpu 后的。
3. 唯一的大核CPU7被isolate后,DIE层级的 sched_group.next 构成的单链表的指向并不会改变。唯一的大核CPU7被 offline 后,DIE层级的 sched_group.next 变为小核和中核互相指向,
已经不存在大核的了。
4. CPU核被 isolate 了或被 offline 了,其DIE层级对应的 sd->groups->sgc 的 capacity 也会变化,其是此cluster内 online 和没被isolate的cpu的算力之和。
offline cpu2 后的效果:
---------------------cpu=2, debug_printf_cpu_rq---------------------- rq = 000000001f8a123c: rd = 0000000035a93b24, sd = (null), cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=2, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = (null), size = 1, id = 2, sds= (null), sdnuma = (null), sdasym = (null), sdea = (null), sdscs= (null) -------------------------------------------------
同时的 isolate cpu3 后的效果:
---------------------cpu=3, debug_printf_cpu_rq---------------------- rq = 00000000d72c33eb: rd = 0000000014388aae, sd = 0000000005fa314e, cpu_capacity = 230, cpu_capacity_orig = 241, max_idle_balance_cost = 200000 balance_callback: ------------------------------------------------- ---------------------cpu=3, debug_printf_sd_sds_sg_sgc_cpu_rq---------------------- rq->sd: sched_domain sd = 0000000005fa314e: parent = 000000009e86ff7e, child = (null), groups = 000000000018df41, name = MC, private = 0000000076e60833, shared = 000000005f941234, span = 0-1,3 rq->sd->groups: sched_group sg = 000000000018df41: next = 0000000000f13ee1, ref = 3, group_weight = 0, sgc = 000000003d6525d0, asym_prefer_cpu = 0, sge = 00000000ab7de946, cpumask = 3 rq->sd->groups->sgc: sched_group_capacity sgc = 000000003d6525d0: ref = 3, capacity = 230, min_capacity = 230, max_capacity = 230, next_update = 8909791, imbalance = 0, id = 3, cpumask = 3 rq->sd->shared: sched_domain_shared sds = 000000005f941234: ref = 3, nr_busy_cpus = 2, has_idle_cores = 0, overutilized = 0 rq->sd->parent: sched_domain sd = 000000009e86ff7e: parent = (null), child = 0000000005fa314e, groups = 0000000052c6c8c4, name = DIE, private = 000000002e14f724, shared = 000000000392d96b, span = 0-1,3-6 rq->sd->parent->groups: sched_group sg = 0000000052c6c8c4: next = 00000000cf354652, ref = 6, group_weight = 2, sgc = 00000000de302455, asym_prefer_cpu = 0, sge = 0000000042cf348e, cpumask = 0-1,3 rq->sd->parent->groups->sgc: sched_group_capacity sgc = 00000000de302455: ref = 6, capacity = 460, min_capacity = 230, max_capacity = 230, next_update = 8912959, imbalance = 0, id = 0, cpumask = 0-1,3 rq->sd->parent->shared: sched_domain_shared sds = 000000000392d96b: ref = 6, nr_busy_cpus = 0, has_idle_cores = 0, overutilized = 0 ------------------------------------------------- ---------------------debug_printf_cache---------------------- cache: sd = 0000000005fa314e, size = 3, id = 0, sds= 000000005f941234, sdnuma = (null), sdasym = (null), sdea = 000000009e86ff7e, sdscs=0000000005fa314e -------------------------------------------------
offline的cpu其 rq->sd 为空,也就是说offline的cpu从调度域中删除了。但是被isolate的cpu还在调度域中(TODO: 待再次确认)。
5. 将CPU7 isolate后,max_cpu_capacity.cpu = 7,将CPU7 offline后,max_cpu_capacity.cpu = 4。但是实验看起来不是很可靠,span的内容也不可靠。
isolate cpu7后的 root_domain: ---------------------debug_printf_root_domain---------------------- refcount = 8, span = 0-7, max_cpu_capacity.val = 1024, max_cpu_capacity.cpu = 7, max_cap_orig_cpu = 7, min_cap_orig_cpu = 0 ------------------------------------------------- offline cpu2 cpu7,isolate cpu3后的 root_domain: ---------------------debug_printf_root_domain---------------------- refcount = 6, span = 0-1,3-6, max_cpu_capacity.val = 790, max_cpu_capacity.cpu = 4, max_cap_orig_cpu = 4, min_cap_orig_cpu = 0 ------------------------------------------------- offline cpu3,isolate cpu7后的 root_domain: ---------------------debug_printf_root_domain---------------------- refcount = 7, span = 0-2,4-7, max_cpu_capacity.val = 790, max_cpu_capacity.cpu = 5, max_cap_orig_cpu = 7, min_cap_orig_cpu = 0 ------------------------------------------------- 还出现一次: ---------------------debug_printf_root_domain---------------------- refcount = 3, span = 0-1, max_cpu_capacity.val = 906, max_cpu_capacity.cpu = 7, max_cap_orig_cpu = -1, min_cap_orig_cpu = -1 -------------------------------------------------
6. 无论怎样 isolate 和 offline,全局 cpu_topology[NR_CPUS] 数组的值不会变化。
7. isolate或offline cpu后,cpu的拓扑结构会重新build!
四、补充
1. kernel-5.10 上cpu 拓扑相关成员描述
struct cpu_topology { int thread_id; //不支持多线程,所有cpu的都是-1 int core_id; //每个cluster内的cpu id,比如 0 1 2 3, 0 1 2, 0 int package_id; //cpuster的id,比如 0 1 2 int llc_id; //cpu所在cluster的首个cpu的id cpumask_t thread_sibling; //每个cpu_topology[cpu]中只有一个,就是cpu id自身,比如 0 1 2 3 4 5 6 7 cpumask_t core_sibling; //cpu所在cluster的cpu mask,比如小核的就是0-3,中核的是4-6,大核的是7 cpumask_t llc_sibling; cpumask_t android_vendor_data1; }; #define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id) //cpu属于的cluster id,从0开始 #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) #define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling)
本文参考链接:https://www.cnblogs.com/hellokitty2/p/15487560.html