Skip to main content
 首页 » 操作系统

Linux内核中的各种时间频率

2022年07月19日167duanxz

一、各种时间的打印

1. per-cpu的各种类型的使用时间

# ls -l /proc/stat 
-r--r--r-- 1 root root 0 2021-01-01 19:46 /proc/stat 
# cat /proc/stat 
cpu  203632 46353 386930 31815547 3869 274339 68486 0 0 0 
cpu0 26704 7709 39012 3916272 49 87626 23620 0 0 0 
cpu1 14682 9898 25125 4055433 68 8755 3338 0 0 0 
cpu2 5588 8202 7818 4098854 47 2215 901 0 0 0 
cpu3 21765 10971 40654 4014299 341 19606 3900 0 0 0 
cpu4 28157 1362 52559 3983416 725 25697 6661 0 0 0 
cpu5 58390 2212 140189 3718682 1273 96146 17063 0 0 0 
cpu6 42753 1587 70162 3930832 1008 32193 11836 0 0 0 
cpu7 5588 4407 11408 4097755 355 2097 1164 0 0 0 
intr 71408793 0 32194638 9259224 0 0 56084 91247 0 0 0 0 0 0 0 0 0 0 0 0 0 23940117 0 0 0 0 1022833 0 0 0 0 0 0 0 0 739 1176966 83 213 253 2243389 758 207033 6503 1916 0 0 9173 0 12210 0 0 0 0 0 140 0 0 10 2058 554 0 0 0 18070 0 0 5083 0 0 0 0 224 0 48 0 0 0 2984 0 0 0 29162 0 49591 0 9466 0 0 0 0 0 0 0 0 159 159 0 0 374 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8365 0 0 0 0 25095 0 0 0 3686 0 0 7767 0 0 0 0 0 0 0 0 0 16034 0 0 0 0 0 231848 0 0 0 25090 0 0 0 3558 0 0 8736 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3144 0 3036 181465 0 0 1400 2 1403 1 504929 32592 637 0 0 12 15 0 0 3 0 3 30 0 0 2 0 6653 9 0 279 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 168 0 0 0 0 96 0 8 0 0 0 0 0 0 0 0 0 0 520 40 0 0 0 0 131 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 98 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 133 0 1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24 8 0 0 0 2 67 98 126 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
ctxt 61029826 
btime 1609501574 
processes 27212 
procs_running 1 
procs_blocked 0 
softirq 8564148 1172 1338008 1 3 243852 0 1611 5229125 0 1750376

对应的时间类型定义在内核头文件 include/linux/kernel_stat.h,上图中 cpu[0...7] 后的数值跟这些类型依次对应:

/* 
 * 'kernel_stat.h' contains the definitions needed for doing 
 * some kernel statistics (CPU usage, context switches ...), 
 * used by rstatd/perfmeter 
 */ 
enum cpu_usage_stat { 
    CPUTIME_USER, //用户空间占用cpu时间 
    CPUTIME_NICE, //高nice任务(第优先级),用户空间占用时间 
    CPUTIME_SYSTEM, //内核态占用cpu时间 
    CPUTIME_SOFTIRQ, //软中断占用cpu时间 
    CPUTIME_IRQ, //硬中断占用cpu时间 
    CPUTIME_IDLE, //cpu空闲时间 
    CPUTIME_IOWAIT, //cpu等待io时间 
    CPUTIME_STEAL, //GuestOS等待real cpu时间 
    CPUTIME_GUEST, //GuestOS消耗的时间 
    CPUTIME_GUEST_NICE, //高nice任务(第优先级),GuestOS消耗的时间 
    NR_STATS, 
};

打印函数为 fs/proc/stat.c 中的 show_stat(),单位为 jiffie。在linux系统中,cputime模块具有重要的意义。它记录了设备中所有cpu在各个状态下经过的时间。我们所熟悉的top工具就是用cputime换算出的cpu利用率。

2. per-cluster的在其各个频点下驻留的时间

cpufreq_stats 模块的开启需要使能 CONFIG_CPU_FREQ_STAT 宏。当系统使能该特性后,cpufreq driver sysfs下生成 stats 目录:

/sys/devices/system/cpu/cpufreq/policy0/stats # ls -l 
total 0 
--w-------    reset //可以对统计进行reset 
-r--r--r--    time_in_state //本cluster在各频点下驻留的时间,单位jiffy 
-r--r--r--    total_trans //频点之间总切换次数 
-r--r--r--    trans_table //频点转换表 
 
# cat /sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state 
1800000 5647 
1700000 7 
... 
200000 4221664

表示的是该 cpufreq policy 内分别处于各个频点的时间,单位为 jiffies。有了这个功能,我们就能获取每个 cluster 运行最多的频点是哪些,进而针对性的对系统功耗性能进行优化。

3. per-线程在各个频点下驻留的时间

# cat /proc/913/time_in_state 
cpu0 
1800000 0 
... 
1250000 2638 
... 
200000 0 
cpu4 
2850000 0 
... 
200000 0 
cpu7 
3050000 0 
... 
1300000 9

该节点记录了该线程在各个 cpufreq policy 的各个频点下驻留的时间, 单位为 clock_t。clock_t 是由 USER_HZ 来决定,该系统中 USER_HZ 为250,则 clock_t 代表4ms。

4. per-cpu的cpuidle time

# ls -l /sys/devices/system/cpu/cpu0/cpuidle 
drwxr-xr-x    driver 
drwxr-xr-x    state0 
drwxr-xr-x    state1 
drwxr-xr-x    state2 
drwxr-xr-x    state3 
drwxr-xr-x    state4 
drwxr-xr-x    state5 
drwxr-xr-x    state6 
 
# ls -l /sys/devices/system/cpu/cpu0/cpuidle/state0 
... 
-r--r--r-- 1 root root 4096 2021-01-02 19:51 time 
 
# cat /sys/devices/system/cpu/cpu0/cpuidle/state*/time 
2675541339 
13746613328 
0 
0 
460 
24621035515 
0

cpuidle time 模块的工作就是记录每个cpu在各层深度中睡了多久,即每次开机以来,每个核在每个 C-state下的时长,单位为 us。


二、各种时间统计原理

1. per-cpu的各种类型的使用时间

cputime 模块代码位于 kernel/sched/cputime.c。由上图可见,统计的时间精度是1个tick。当每次timer中断来临时,kernel经过由中断处理函数调用到 irqtime_account_process_tick()(需要使能特性宏 CONFIG_IRQ_TIME_ACCOUNTING,将irq/softirq的统计囊括其中)。通过判断当前task是否为 softirq/user tick/idle进程/guest系统进程/内核进程,将经历的cpu时间(通常为1个tick)统计到各个类型中去。

/* 
 * Account a tick to a process and cpustat 
 * @p: the process that the CPU time gets accounted to 
 * @user_tick: is the tick from userspace 
 * @rq: the pointer to rq 
 * 
 * Tick demultiplexing follows the order 
 * - pending hardirq update 
 * - pending softirq update 
 * - user_time 
 * - idle_time 
 * - system time 
 *   - check for guest_time 
 *   - else account as system_time 
 * 
 * Check for hardirq is done both for system and user time as there is 
 * no timer going off while we are on hardirq and hence we may never get an 
 * opportunity to update it solely in system time. 
 * p->stime and friends are only updated on system time and not on irq 
 * softirq as those do not count in task exec_runtime any more. 
 */ 
static void irqtime_account_process_tick(struct task_struct *p, int user_tick, int ticks) 
{ 
    u64 other, cputime = TICK_NSEC * ticks; 
 
    /* 
     * When returning from idle, many ticks can get accounted at 
     * once, including some ticks of steal, irq, and softirq time. 
     * Subtract those ticks from the amount of time accounted to 
     * idle, or potentially user or system time. Due to rounding, 
     * other time can exceed ticks occasionally. 
     */ 
    other = account_other_time(ULONG_MAX); 
    if (other >= cputime) 
        return; 
 
    cputime -= other; 
 
    if (this_cpu_ksoftirqd() == p) { 
        /* 
         * ksoftirqd time do not get accounted in cpu_softirq_time. 
         * So, we have to handle it separately here. 
         * Also, p->stime needs to be updated for ksoftirqd. 
         */ 
        account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); 
    } else if (user_tick) { 
        account_user_time(p, cputime); 
    } else if (p == this_rq()->idle) { 
        account_idle_time(cputime); 
    } else if (p->flags & PF_VCPU) { /* System time or guest time */ 
        account_guest_time(p, cputime); 
    } else { 
        account_system_index_time(p, cputime, CPUTIME_SYSTEM); 
    } 
}

2. per-cluster的在其各个频点下驻留的时间

cpufreq_times 模块代码位于 drivers/cpufreq/cpufreq_times.c,它的更新涉及到 cpufreq driver 与 cputime 两个模块。当 cpufreq policy 频率改变时,cpufreq driver 通过 cpufreq_notify_transition(普通调频模式)或者 cpufreq_driver_fast_switch(快速调频模式)调用 cpufreq_times_record_transition 函数,通知 cpufreq_times 模块当前该 policy 处于哪一个频点。当 cputime 模块接收到 timer 中断后,会调用 cpufreq_acct_update_power(),将该 tick 添加到 cpufreq_times 模块当前任务及当前频点的统计上。

3. per-线程在各个频点下驻留的时间

cpufreq_stats 模块代码位于 drivers/cpufreq/cpufreq_stats.c。它的更新有些类似于 cpufreq_times, 但与其不同的是只涉及 cpufreq driver 一个外部模块。当 cpufreq policy 频率改变时,cpufreq driver 通过 cpufreq_notify_transition(普通调频模式)或者 cpufreq_driver_fast_switch(快速调频模式)调用 cpufreq_times_record_transition 函数调用 cpufreq_stats_record_transition 函数,通知 cpufreq_stats 模块此刻发生调频以及要切换到哪一个目标频点。cpufreq_state 模块则调用 cpufreq_stats_update 获取当前 jiffies, 并与上一次更新时的 jiffies 相减,最后将差值添加到上个频点的时间统计中:

//drivers\cpufreq\cpufreq_stats.c 
static void cpufreq_stats_update(struct cpufreq_stats *stats, unsigned long long time) 
{ 
    unsigned long long cur_time = get_jiffies_64(); 
 
    stats->time_in_state[stats->last_index] += cur_time - time; 
    stats->last_time = cur_time; 
}

4. per-cpu的cpuidle time

cpuidle time 模块代码在 drivers/cpuidle/cpuidle.c。当某个 cpu runqueue 上没有 runnable 状态的任务时,该cpu调度到idle进程,经过层层调用,最后执行到 cpuidle_enter_state()函数。

/** 
 * cpuidle_enter_state - enter the state and update stats 
 * @dev: cpuidle device for this cpu 
 * @drv: cpuidle driver for this cpu 
 * @index: index into the states table in @drv of the state to enter 
 */ 
int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) //drivers/cpuidle/cpuidle.c 
{ 
    int entered_state; 
    ktime_t time_start, time_end; 
     
    ... 
    time_start = ns_to_ktime(local_clock()); 
    ... 
    entered_state = target_state->enter(dev, drv, index); 
    ... 
    time_end = ns_to_ktime(local_clock()); 
    ... 
    diff = ktime_sub(time_end, time_start); 
    ... 
    dev->last_residency_ns = diff; 
    dev->states_usage[entered_state].time_ns += diff; 
    ... 
}

三、内核中打印时间

1. 内核中打印UTC时间:

static void kernel_printk_utc_time(char *annotation) //参考kernel-4.19 
{ 
    struct timespec ts; 
    struct rtc_time tm; 
    getnstimeofday(&ts); 
    rtc_time_to_tm(ts.tv_sec, &tm); 
    pr_info("PM: wakeup_count %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n", 
        annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec); 
}

2. kernel log中打印内核时间

static size_t print_time(u64 ts, char *buf) //printk.c 传参 ts = local_clock(); 
{ 
    unsigned long rem_nsec = do_div(ts, 1000000000); 
 
    return sprintf(buf, "[%5lu.%06lu]", (unsigned long)ts, rem_nsec / 1000); 
}

printk()的打印路径:

printk //printk.c 
    vprintk_func 
        vprintk_default 
            vprintk_emit 
                vprintk_store //printk.c 
                    log_output //printk.c 
                        log_store //ts_nsec参数传0了,若传个案发的时间将更精确一些

3. 获取一段代码的执行时间:

t1 = local_clock(); 
//XXX 
t2 = local_clock(); 
dela_t = t2 -t1;

本文参考链接:https://www.cnblogs.com/hellokitty2/p/15666357.html