Skip to main content
 首页 » 操作系统

Linux tracer ftrace笔记(3)——宏展开和hook和注册

2022年07月19日30grandyang

一、ftrace的宏

1. struct tracepoint 结构

使用 struct tracepoint 变量来描述一个 trace point。

//include/linux/tracepoint-defs.h 
struct tracepoint { 
    const char *name; //trace point的名字,内核中通过hash表管理所有的trace point,找到对应的hash slot后,需要通过name来识别具体的trace point。 
    struct static_key key; //trace point状态,0表示disable,1表示enable,static_key_false(&key)判断的其实就只是key的真假。 
    struct static_call_key *static_call_key; 
    void *static_call_tramp; 
    void *iterator; 
    int (*regfunc)(void); //添加桩函数的函数 
    void (*unregfunc)(void); //卸载桩函数的函数 
    struct tracepoint_func __rcu *funcs; //trace point中所有的桩函数链表. 是个数组 
}; 
 
struct tracepoint_func { 
    void *func; 
    void *data; 
    int prio; 
};

static key使用见:https://www.cnblogs.com/hellokitty2/p/15026568.html

2. DEFINE_TRACE_FN 展开后是

/* 
 * include/linux/tracepoint.h 
 * 就是定义一个名为 __tracepoint_##_name 的 struct tracepoint 结构, 
 * 然后定义一个名为 __traceiter_##_name 的函数,它对 struct tracepoint::funcs[] 成员数组中的每个函数都进行调用,数组尾部要以NULL结尾。 
 */ 
#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)        \ 
    static const char __tpstrtab_##_name[]                \ 
    __section("__tracepoints_strings") = #_name;            \ 
    extern struct static_call_key __SCK__tp_func_##_name;    \ 
    int __traceiter_##_name(void *__data, proto);            \ 
    struct tracepoint __tracepoint_##_name    __used __section("__tracepoints") = {    \ 
        .name = __tpstrtab_##_name,                \ 
        .key = STATIC_KEY_INIT_FALSE,                \ 
        .static_call_key = &__SCK__tp_func_##_name,    \ 
        .static_call_tramp = NULL, \ 
        .iterator = &__traceiter_##_name,            \ 
        .regfunc = _reg,                    \ 
        .unregfunc = _unreg,                    \ 
        .funcs = NULL    \ 
    };                    \ 
    __TRACEPOINT_ENTRY(_name);                    \ 
    int __nocfi __traceiter_##_name(void *__data, proto)            \ 
    {                                \ 
        struct tracepoint_func *it_func_ptr;            \ 
        void *it_func;                        \ 
        it_func_ptr = rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ 
        if (it_func_ptr) {                    \ 
            do {                        \ 
                it_func = (it_func_ptr)->func;        \ 
                __data = (it_func_ptr)->data;        \ 
                ((void(*)(void *, proto))(it_func))(__data, args); \ 
            } while ((++it_func_ptr)->func);        \ 
        }                            \ 
        return 0;                        \ 
    }                                \ 
    extern struct static_call_key __SCK__tp_func_##_name;  \ 
    extern typeof(__traceiter_##_name) __SCT__tp_func_##_name;         \ 
    struct static_call_key __SCK__tp_func_##_name = {      \ 
        .func = __traceiter_##_name,                        \ 
    }

3. __DECLARE_TRACE 宏展开后就是:

/* 
 * include/linux/tracepoint.h 
 * 这个宏主要定义了一系列函数集合,常用的有 register_trace_##name、 
 * trace_##name##_enabled 
 * rcuidle 的还特殊弄了一个函数,还可以注册带有优先级的trace 
 */ 
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ 
    extern int __traceiter_##name(data_proto);            \ 
    extern struct static_call_key __SCK__tp_func_##name;        \ 
    extern typeof(__traceiter_##name) __SCT__tp_func_##name;    \ 
    extern struct tracepoint __tracepoint_##name;            \ 
    static inline void __nocfi trace_##name(proto)                \ 
    {                                \ 
        if (static_key_false(&__tracepoint_##name.key))        \ 
            do {                                \ 
                struct tracepoint_func *it_func_ptr;            \ 
                int __maybe_unused __idx = 0;                \ 
                void *__data;                        \ 
                                            \ 
                if (!(cond))                        \ 
                    return;                     \ 
                /* keep srcu and sched-rcu usage consistent */        \ 
                preempt_disable_notrace();                \ 
                it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \ 
                if (it_func_ptr) {                    \ 
                    __data = (it_func_ptr)->data;            \ 
                    __traceiter_##name(data_args);            \ 
                }                            \ 
                preempt_enable_notrace();                \ 
            } while (0)    \ 
        if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {        \ 
            rcu_read_lock_sched_notrace();            \ 
            rcu_dereference_sched(__tracepoint_##name.funcs);\ 
            rcu_read_unlock_sched_notrace();        \ 
        }                            \ 
    }                                \ 
    static inline void trace_##name##_rcuidle(proto)        \ 
    {                                \ 
        if (static_key_false(&__tracepoint_##name.key))     \ 
            do {                                \ 
                struct tracepoint_func *it_func_ptr;            \ 
                int __maybe_unused __idx = 0;                \ 
                void *__data;                        \ 
                                            \ 
                if (!(cond))                        \ 
                    return;                     \ 
                                            \ 
                /* srcu can't be used from NMI */            \ 
                WARN_ON_ONCE(in_nmi());            \ 
                                            \ 
                /* keep srcu and sched-rcu usage consistent */        \ 
                preempt_disable_notrace();                \ 
                                            \ 
                /*                            \ 
                 * For rcuidle callers, use srcu since sched-rcu    \ 
                 * doesn't work from the idle path.         \ 
                 */                         \ 
                __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ 
                rcu_irq_enter_irqson();             \ 
                                            \ 
                it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \ 
                if (it_func_ptr) {                    \ 
                    __data = (it_func_ptr)->data;            \ 
                    __traceiter_##name(data_args);            \ 
                }                            \ 
                                            \ 
                rcu_irq_exit_irqson();                \ 
                srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ 
                                            \ 
                preempt_enable_notrace();                \ 
            } while (0)    \ 
    }    \ 
     static inline int register_trace_##name(void (*probe)(data_proto), void *data)    \ 
    {                                \ 
        return tracepoint_probe_register(&__tracepoint_##name, (void *)probe, data);    \ 
    }                                \ 
    static inline int register_trace_prio_##name(void (*probe)(data_proto), void *data, int prio) \ 
    {                                \ 
        return tracepoint_probe_register_prio(&__tracepoint_##name, (void *)probe, data, prio); \ 
    }                                \ 
    static inline int unregister_trace_##name(void (*probe)(data_proto), void *data)    \ 
    {                                \ 
        return tracepoint_probe_unregister(&__tracepoint_##name, (void *)probe, data);    \ 
    }                                \ 
    static inline void check_trace_callback_type_##name(void (*cb)(data_proto))    \ 
    {                                \ 
    }                                \ 
    static inline bool trace_##name##_enabled(void)                    \ 
    {                                \ 
        return static_key_false(&__tracepoint_##name.key);    \ 
    }

trace_##name(proto) 中判断 __tracepoint_##name.key 的值为真才会调用执行各个钩子函数,在下面路径中会将这个key设置为真。

register_trace_##name() //具体tracepoint的define位置 
    tracepoint_probe_register //tracepoint.c 
        tracepoint_probe_register_prio //tracepoint.c 
            tracepoint_add_func //tracepoint.c 
                static_key_enable(&tp->key);

也就是说注册了 hook 才会真,否则为假。 

4. 使用 DECLARE_TRACE 的宏

#define DEFINE_TRACE(name, proto, args)    DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); 
 
//为空 
#define TRACE_EVENT_FLAGS(event, flag) 
 
//为空 
#define TRACE_EVENT_PERF_PERM(event, expr...) 
 
/* 
 * include/linux/tracepoint-defs.h 
 * 不建议直接使用,此头文件是包含在最head位置的 
 */ 
#define DECLARE_TRACEPOINT(tp) extern struct tracepoint __tracepoint_##tp 
 
/* 
 * 建议使用,它的作用和 trace_##name##_enabled(void) 一致,但是在头文件中 
 * 使用是安全的,然而 trace_##name##_enabled(void) 在头文件中是不安全的,应 
 * 该是因为不能重复定义一个函数。 
 */ 
#define tracepoint_enabled(tp) static_key_false(&(__tracepoint_##tp).key) 
 
/* 
 * include/linux/tracepoint.h 
 * 就是上面的一组函数集合,包含register_trace_##name、trace_##name##_enabled 等 
 */ 
#define DECLARE_TRACE(name, proto, args)                \ 
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args)) 
/* 
 * 两个宏之间的区别就是后者 arg4 逻辑与上了 cond 参数,主要是 trace_##name、trace_##name##_rcuidle 两个函数中使用,若是判断 cond 为假, 
 * 就直接返回了。 
 */ 
#define DECLARE_TRACE_CONDITION(name, proto, args, cond)        \ 
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), PARAMS(void *__data, proto), PARAMS(__data, args)) 
 
/* include/linux/tracepoint.h */ 
#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) 
 
#define DEFINE_EVENT(template, name, proto, args)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#define DEFINE_EVENT_PRINT(template, name, proto, args, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) 
 
#define TRACE_EVENT(name, proto, args, struct, assign, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#define TRACE_EVENT_FN(name, proto, args, struct, assign, print, reg, unreg)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, assign, print, reg, unreg) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) 
 
#define TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, print) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) 
 
#define TRACE_EVENT_FLAGS(event, flag) 
 
#define TRACE_EVENT_PERF_PERM(event, expr...) 
 
#define DECLARE_EVENT_NOP(name, proto, args)                \ 
    static inline void trace_##name(proto)                \ 
    { }                                \ 
    static inline bool trace_##name##_enabled(void)            \ 
    {                                \ 
        return false;                        \ 
    } 
 
#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)    DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) 
 
#define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print) 
 
#define DEFINE_EVENT_NOP(template, name, proto, args)    DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))

tracepoint.h 中的定义可能不是最终的,因为有文件中会先执行 #undef XXX,然后重新进行 define。观察可以发现,这些宏主要使用的是 DECLARE_TRACE,对照展开后的函数,显示是不完整的,因为 DEFINE_TRACE 相关的部分没有。因此每个trace应该还存在对 DEFINE_TRACE 进行使用的一部分。两者都存在,一个trace才圆满。


5. 使用 DEFINE_TRACE 的部分

/* include/trace/define_trace.h */ 
#undef TRACE_EVENT 
#define TRACE_EVENT(name, proto, args, tstruct, assign, print)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#undef TRACE_EVENT_CONDITION 
#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \ 
    TRACE_EVENT(name, PARAMS(proto), PARAMS(args), PARAMS(tstruct), PARAMS(assign),    PARAMS(print)) 
 
#undef TRACE_EVENT_FN 
#define TRACE_EVENT_FN(name, proto, args, tstruct, assign, print, reg, unreg)    \ 
    DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) 
 
#undef TRACE_EVENT_FN_COND 
#define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, assign, print, reg, unreg)    \ 
    DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) 
 
#undef TRACE_EVENT_NOP 
#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) 
 
#undef DEFINE_EVENT_NOP 
#define DEFINE_EVENT_NOP(template, name, proto, args) 
 
#undef DEFINE_EVENT 
#define DEFINE_EVENT(template, name, proto, args) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#undef DEFINE_EVENT_FN 
#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \ 
    DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) 
 
#undef DEFINE_EVENT_PRINT 
#define DEFINE_EVENT_PRINT(template, name, proto, args, print)    \ 
    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 
 
#undef DEFINE_EVENT_CONDITION 
#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \ 
    DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) 
 
#undef DECLARE_TRACE 
#define DECLARE_TRACE(name, proto, args)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))

6. EXPORT_TRACEPOINT_SYMBOL_GPL 和 EXPORT_TRACEPOINT_SYMBOL

导出这些trace符号后,模块中才能在模块中使用

/* 
 * include/linux/tracepoint.h 
 * 展开后就是 
 */ 
#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)                \ 
    EXPORT_SYMBOL_GPL(__tracepoint_##name);                \ 
    EXPORT_SYMBOL_GPL(__traceiter_##name);                \ 
    EXPORT_SYMBOL_GPL(__SCK__tp_func_##name); 
 
#define EXPORT_TRACEPOINT_SYMBOL(name)                    \ 
    EXPORT_SYMBOL(__tracepoint_##name);                \ 
    EXPORT_SYMBOL(__traceiter_##name);                \ 
    EXPORT_SYMBOL(__SCK__tp_func_##name)

7. 定义一个trace,TRACE_EVENT 各个成员使用的宏

/* include/linux/tracepoint.h */ 
#define PARAMS(args...) args 
#define TP_PROTO(args...)    args 
#define TP_ARGS(args...)    args 
#define TP_CONDITION(args...)    args 
//include/trace/trace_events.h 
#define TP_STRUCT__entry(args...) args 
#define TP_fast_assign(args...) args 
#define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)

include/trace/events/sched.h 文件中定义了大量的CPU调度相关的trace,但是它只include了 linux/tracepoint.h 文件,说明其使用的宏全部都是来自linux/tracepoint.h 文件的,但是 tracepoint.h 中又包含了其它头文件,不排除其它头文件中又包含了其它头文件,比如 include/trace/trace_events.h 。

8. 以 sched_migrate_task 为例来看 TRACE_EVENT

//include/trace/events/sched.h 
TRACE_EVENT(sched_migrate_task, 
 
    TP_PROTO(struct task_struct *p, int dest_cpu), 
 
    TP_ARGS(p, dest_cpu), 
 
    TP_STRUCT__entry( 
        __array(    char,    comm,    TASK_COMM_LEN    ) 
        __field(    pid_t,    pid            ) 
        __field(    int,    prio            ) 
        __field(    int,    orig_cpu        ) 
        __field(    int,    dest_cpu        ) 
        __field(    int,    running            ) 
    ), 
 
    TP_fast_assign( 
        memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 
        __entry->pid        = p->pid; 
        __entry->prio        = p->prio; /* XXX SCHED_DEADLINE */ 
        __entry->orig_cpu    = task_cpu(p); 
        __entry->dest_cpu    = dest_cpu; 
        __entry->running    = (p->state == TASK_RUNNING); 
    ), 
 
    TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d running=%d", 
          __entry->comm, __entry->pid, __entry->prio, 
          __entry->orig_cpu, __entry->dest_cpu, 
          __entry->running) 
);

include/linux/tracepoint.h 中有注释:__field(pid_t, prev_prid) 等于 pid_t prev_pid; __array(char, prev_comm, TASK_COMM_LEN) 等于 char prev_comm[TASK_COMM_LEN];
声明的 'local variable' 叫做 '__entry',可以在 TP_fast_assign 中使用 __entry->XX 来引用。TP_STRUCT__entry 指定环形缓冲区中的存储格式,也是 /sys/kernel/debug/tracing/events/<*>/format 导出到用户空间的格式。

按照如下宏定义进行展开:

#define TRACE_EVENT(name, proto, args, struct, assign, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 
#define DECLARE_TRACE(name, proto, args)                \ 
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args)) 
//直接映射也就是: 
#define TRACE_EVENT(name, proto, args, struct, assign, print) \ 
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args)) 
 
 
#define TRACE_EVENT(name, proto, args, struct, assign, print)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 
#define DEFINE_TRACE(name, proto, args)        DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); 
//直接映射也就是: 
#define TRACE_EVENT(name, proto, args, struct, assign, print)    DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));

全部展开后为:

#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ 
    extern int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu);            \ 
    extern struct static_call_key __SCK__tp_func_sched_migrate_task;        \ 
    extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task;    \ 
    extern struct tracepoint __tracepoint_sched_migrate_task;            \ 
    static inline void __nocfi trace_sched_migrate_task(struct task_struct *p, int dest_cpu)                \ 
    {                                \ 
        if (static_key_false(&__tracepoint_sched_migrate_task.key))        \ 
            do {                                \ 
                struct tracepoint_func *it_func_ptr;            \ 
                int __maybe_unused __idx = 0;                \ 
                void *__data;                        \ 
                                            \ 
                if (!cpu_online(raw_smp_processor_id()))                        \ 
                    return;                     \ 
                /* keep srcu and sched-rcu usage consistent */        \ 
                preempt_disable_notrace();                \ 
                it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ 
                if (it_func_ptr) {                    \ 
                    __data = (it_func_ptr)->data;            \ 
                    __traceiter_sched_migrate_task(__data, p, dest_cpu);            \ 
                }                            \ 
                preempt_enable_notrace();                \ 
            } while (0)    \ 
        if (IS_ENABLED(CONFIG_LOCKDEP) && cpu_online(raw_smp_processor_id())) {        \ 
            rcu_read_lock_sched_notrace();            \ 
            rcu_dereference_sched(__tracepoint_sched_migrate_task.funcs);\ 
            rcu_read_unlock_sched_notrace();        \ 
        }                            \ 
    }                                \ 
    static inline void trace_sched_migrate_task_rcuidle(struct task_struct *p, int dest_cpu)        \ 
    {                                \ 
        if (static_key_false(&__tracepoint_sched_migrate_task.key))     \ 
            do {                                \ 
                struct tracepoint_func *it_func_ptr;            \ 
                int __maybe_unused __idx = 0;                \ 
                void *__data;                        \ 
                                            \ 
                if (!cpu_online(raw_smp_processor_id()))                        \ 
                    return;                     \ 
                                            \ 
                /* srcu can't be used from NMI */            \ 
                WARN_ON_ONCE(in_nmi());            \ 
                                            \ 
                /* keep srcu and sched-rcu usage consistent */        \ 
                preempt_disable_notrace();                \ 
                                            \ 
                /*                            \ 
                 * For rcuidle callers, use srcu since sched-rcu    \ 
                 * doesn't work from the idle path.         \ 
                 */                         \ 
                __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ 
                rcu_irq_enter_irqson();             \ 
                                            \ 
                it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ 
                if (it_func_ptr) {                    \ 
                    __data = (it_func_ptr)->data;            \ 
                    __traceiter_sched_migrate_task(__data, p, dest_cpu);            \ 
                }                            \ 
                                            \ 
                rcu_irq_exit_irqson();                \ 
                srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ 
                                            \ 
                preempt_enable_notrace();                \ 
            } while (0)    \ 
    }    \ 
     static inline int register_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data)    \ 
    {                                \ 
        return tracepoint_probe_register(&__tracepoint_sched_migrate_task, (void *)probe, data);    \ 
    }                                \ 
    static inline int register_trace_prio_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data, int prio) \ 
    {                                \ 
        return tracepoint_probe_register_prio(&__tracepoint_sched_migrate_task, (void *)probe, data, prio); \ 
    }                                \ 
    static inline int unregister_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data)    \ 
    {                                \ 
        return tracepoint_probe_unregister(&__tracepoint_sched_migrate_task, (void *)probe, data);    \ 
    }                                \ 
    static inline void check_trace_callback_type_sched_migrate_task(void (*cb)(void *__data, struct task_struct *p, int dest_cpu))    \ 
    {                                \ 
    }                                \ 
    static inline bool trace_sched_migrate_task_enabled(void)                    \ 
    {                                \ 
        return static_key_false(&__tracepoint_sched_migrate_task.key);    \ 
    } 
 
#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)        \ 
        static const char __tpstrtab_sched_migrate_task[]                \ 
        __section("__tracepoints_strings") = "sched_migrate_task";            \ 
        extern struct static_call_key __SCK__tp_func_sched_migrate_task;    \ 
        int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu);            \ 
        struct tracepoint __tracepoint_sched_migrate_task    __used __section("__tracepoints") = {    \ 
            .name = __tpstrtab_sched_migrate_task,             \ 
            .key = STATIC_KEY_INIT_FALSE,                \ 
            .static_call_key = &__SCK__tp_func_sched_migrate_task, \ 
            .static_call_tramp = NULL, \ 
            .iterator = &__traceiter_sched_migrate_task,            \ 
            .regfunc = NULL,                    \ 
            .unregfunc = NULL,                    \ 
            .funcs = NULL    \ 
        };                    \ 
        __TRACEPOINT_ENTRY(sched_migrate_task);                    \ 
        int __nocfi __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu)            \ 
        {                                \ 
            struct tracepoint_func *it_func_ptr;            \ 
            void *it_func;                        \ 
            it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ 
            if (it_func_ptr) {                    \ 
                do {                        \ 
                    it_func = (it_func_ptr)->func;        \ 
                    __data = (it_func_ptr)->data;        \ 
                    ((void(*)(void *, struct task_struct *p, int dest_cpu))(it_func))(__data, p, dest_cpu); \ 
                } while ((++it_func_ptr)->func);        \ 
            }                            \ 
            return 0;                        \ 
        }                                \ 
        extern struct static_call_key __SCK__tp_func_sched_migrate_task;  \ 
        extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task;        \ 
        struct static_call_key __SCK__tp_func_sched_migrate_task = {      \ 
            .func = __traceiter_sched_migrate_task,                        \ 
        }

TODO: 其它部分是怎么起作用的?

从展开后的内容可以看到,当调用 trace_sched_migrate_task() 进行trace的时候,会调用 __traceiter_sched_migrate_task() 来遍历 struct tracepoint::funcs 数组中的每一个函数进行trace,也就是说一个trace上可以注册多个hook函数

若使用 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_migrate_task) 导出,上面加黑加粗的 __tracepoint_sched_migrate_task __traceiter_sched_migrate_task __SCK__tp_func_sched_migrate_task 三个符号会被导出来。

9. 一个trace上注册多个hook

既然一个trace上可以注册多个hook,那么一定会涉及到这些hook函数的调用次序的问题,见 tracepoint_probe_register 实现可知,有一个默认优先级 TRACEPOINT_DEFAULT_PRIO=10,注册函数中会传递给 struct tracepoint_func::prio,在插入到 struct tracepoint::funcs 数组时会判断优先级,优先级数值越大,越插在靠前的位置,相同优先级的话,后注册的插在后面。 比如此例子中,注册默认优先级的使用函数 register_trace_sched_migrate_task,自己指定优先级使用函数 register_trace_prio_sched_migrate_task。

int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) 
{ 
    return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); 
} 
EXPORT_SYMBOL_GPL(tracepoint_probe_register);

二、Google搞的vendor hook

1. hook 的 DEFINE_HOOK_FN 解析后是

//include/trace/hooks/vendor_hooks.h 
#define DEFINE_HOOK_FN(_name, _reg, _unreg, proto, args)        \ 
    static const char __tpstrtab_##_name[]                \ 
    __section("__tracepoints_strings") = #_name;            \ 
    extern struct static_call_key __SCK__tp_func_##_name;    \ 
    int __traceiter_##_name(void *__data, proto);            \ 
    struct tracepoint __tracepoint_##_name    __used __section("__tracepoints") = {    \ 
        .name = __tpstrtab_##_name,             \ 
        .key = STATIC_KEY_INIT_FALSE,                \ 
        .static_call_key = &__SCK__tp_func_##_name,    \ 
        .static_call_tramp = NULL,    \ 
        .iterator = &__traceiter_##_name,            \ 
        .regfunc = _reg,                    \ 
        .unregfunc = _unreg,                    \ 
        .funcs = NULL };                    \ 
    __TRACEPOINT_ENTRY(_name);                    \ 
    int __nocfi __traceiter_##_name(void *__data, proto)            \ 
    {                                \ 
        struct tracepoint_func *it_func_ptr;            \ 
        void *it_func;                        \ 
                                    \ 
        it_func_ptr = (&__tracepoint_##_name)->funcs; //不同:这里是直接访问的,ftrace是rcu_dereference_raw    \ 
        it_func = (it_func_ptr)->func; //不同:这里是先获取一个,ftrace中的是先判断it_func_ptr    \ 
        do {                            \ 
            __data = (it_func_ptr)->data;            \ 
            ((void(*)(void *, proto))(it_func))(__data, args); \ 
            it_func = READ_ONCE((++it_func_ptr)->func); \ 
        } while (it_func);    \ 
        return 0;                        \ 
    }                                   \ 
    extern struct static_call_key __SCK__tp_func_##_name;  \ 
    extern typeof(__traceiter_##_name) __SCT__tp_func_##_name;         \ 
    struct static_call_key __SCK__tp_func_##_name = {      \ 
        .func = __traceiter_##_name,                        \ 
    }

注意备注上的一些和ftrace之间的不同点。

2. hook 的 __DECLARE_HOOK 解析后是:

//include/trace/hooks/vendor_hooks.h 
#define __DECLARE_HOOK(name, proto, args, cond, data_proto, data_args)    \ 
    extern int __traceiter_##name(data_proto);            \ 
    extern struct static_call_key __SCK__tp_func_##name;        \ 
    extern typeof(__traceiter_##name) __SCT__tp_func_##name; 
    extern struct tracepoint __tracepoint_##name;            \ 
                                                            \ 
    static inline void __nocfi trace_##name(proto)            \ 
    {                                \ 
        if (static_key_false(&__tracepoint_##name.key))     \ 
            do {                                \ 
                struct tracepoint_func *it_func_ptr;            \ 
                void *__data;                        \ 
                                            \ 
                if (!(cond))                        \ 
                    return;                        \ 
                                            \ 
                it_func_ptr = (&__tracepoint_##name)->funcs;        \ 
                if (it_func_ptr) {                    \ 
                    __data = (it_func_ptr)->data;            \ 
                    __traceiter_##name(data_args);     \ 
                }                            \ 
            } while (0)    \ 
    }                                \ 
    static inline bool trace_##name##_enabled(void)        \ 
    {                                \ 
        return static_key_false(&__tracepoint_##name.key);    \ 
    }                                \ 
    static inline int register_trace_##name(void (*probe)(data_proto), void *data)    \ 
    {                                \ 
        return android_rvh_probe_register(&__tracepoint_##name, (void *)probe, data);    \ 
    }                                \ 
    /* vendor hooks cannot be unregistered */            \

相比与ftrace,hook的trace 删除了 trace_##name##_rcuidle()、register_trace_prio_##name()、unregister_trace_##name()、check_trace_callback_type_##name()

3. 其它宏

#undef DECLARE_RESTRICTED_HOOK 
#define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \ 
    DEFINE_HOOK_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)) 
 
#undef DECLARE_RESTRICTED_HOOK 
#define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \ 
    __DECLARE_HOOK(name, PARAMS(proto), PARAMS(args), cond, PARAMS(void *__data, proto),PARAMS(__data, args))

4. 总结

Google的vendor hook在ftrace的基础上做了改动,由于Google的Hook宏删除了ftrace中的 register_trace_prio_##name(),因此不能注册带有优先级的钩子函数了。

三、实验

1. 对5.10内核中的 util_est_update 中的trace添加hook

static inline void util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) //fair.c 
{ 
    ... 
    //Google 搞的 vendor hook 
    trace_android_rvh_util_est_update(cfs_rq, p, task_sleep, &ret); 
    if (ret) 
        return; 
    ... 
    //普通的ftrace 
    trace_sched_util_est_se_tp(&p->se); 
}

这两个trace符号Google已经导出来了:

EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_util_est_update); //vendor_hooks.c 
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); //core.c

2. 实验代码

/* 1. 包含头文件 */ 
#include <trace/events/sched.h> 
 
 
/* 2. 实现handler钩子函数,类型要与 trace##name() 的类型相同 */ 
//util_est_update() //fair.c 
void android_rvh_util_est_update_handler(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o) 
{ 
    struct util_est *se_ue = &p->se.avg.util_est; 
    struct util_est *rq_ue = &cfs_rq->avg.util_est; 
    trace_printk("start: first_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n", 
            se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep); 
    *ret_o = 0; 
} 
 
void android_rvh_util_est_update_handler_second(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o) 
{ 
    struct util_est *se_ue = &p->se.avg.util_est; 
    struct util_est *rq_ue = &cfs_rq->avg.util_est; 
    trace_printk("start: second_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n", 
            se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep); 
    *ret_o = 0; 
} 
 
//只改变这一个debug优先级, 默认优先级是10 
void sched_util_est_se_tp_handler(void *data, struct sched_entity *se) 
{ 
    static int count = 0; 
    int prio = 10; 
 
    if (entity_is_task(se)) { 
        struct task_struct *p = container_of(se, struct task_struct, se); 
        struct rq *rq = cpu_rq(task_cpu(p)); 
        struct cfs_rq *cfs_rq = &rq->cfs; 
        struct util_est *se_ue = &p->se.avg.util_est; 
        struct util_est *rq_ue = &cfs_rq->avg.util_est; 
        trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n", 
                count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma); 
    } else { 
        trace_printk("end: se is not task\n"); 
    } 
} 
 
void sched_util_est_se_tp_handler_prio_12(void *data, struct sched_entity *se) 
{ 
    static int count = 0; 
    int prio = 12; 
 
    if (entity_is_task(se)) { 
        struct task_struct *p = container_of(se, struct task_struct, se); 
        struct rq *rq = cpu_rq(task_cpu(p)); 
        struct cfs_rq *cfs_rq = &rq->cfs; 
        struct util_est *se_ue = &p->se.avg.util_est; 
        struct util_est *rq_ue = &cfs_rq->avg.util_est; 
        trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n", 
                count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma); 
    } else { 
        trace_printk("end: se is not task\n"); 
    } 
} 
 
 
/* 3. 注册handler */ 
//common register 
register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler, NULL); 
register_trace_sched_util_est_se_tp(sched_util_est_se_tp_handler, NULL); 
//google vendor couldn't use prio, because not defined. 
register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler_second, NULL); 
//ftrace register with prio. 
register_trace_prio_sched_util_est_se_tp(sched_util_est_se_tp_handler_prio_12, NULL, 12);

3. 实验结果,打印的前后关系:

# echo 1 > tracing_on 
# cat trace_pipe 
<...>-338     [005] d..3    32.158404: sched_util_est_se_tp_handler_prio_12: end: count=28494, prio=12, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0 
<...>-338     [005] d..3    32.158404: sched_util_est_se_tp_handler: end: count=28493, prio=10, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0 
 
<...>-338     [005] d..2    32.158410: android_rvh_util_est_update_handler: start: first_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1 
<...>-338     [005] d..2    32.158410: android_rvh_util_est_update_handler_second: start: second_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1

普通ftrace,注册时指定的优先级数值越大,越先调用。vendor hook 没有带有优先级注册的钩子函数,先注册的钩子函数调用在前,后注册的钩子函数调用在后。

看代码实现,就算是不执行 “echo 1 > tracing_on” 这些钩子函数应该也会被调用执行,只不过不会打印出来。

4. 另一种注册trace hook的方法

struct tracepoints_table { 
    const char *name; 
    void *func; 
    struct tracepoint *tp; 
    bool registered; 
}; 
 
static struct tracepoints_table g_tracepoints_table[] = { 
    {.name = "android_rvh_util_est_update", .func = android_rvh_util_est_update_handler}, 
    {.name = "sched_util_est_se_tp", .func = sched_util_est_se_tp_handler}, 
}; 
 
static void lookup_tracepoints(struct tracepoint *tp, void *ignore) 
{ 
    int i; 
 
    for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) { 
        if (!strcmp(g_tracepoints_table[i].name, tp->name)) 
            g_tracepoints_table[i].tp = tp; 
    } 
} 
 
static void register_tracepoints_table(void) 
{ 
    int i, ret; 
    struct tracepoints_table *tt; 
 
    for_each_kernel_tracepoint(lookup_tracepoints, NULL); //找到匹配的tracepoint结构 
    for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) { 
        tt = &g_tracepoints_table[i]; 
        if (tt->tp) { 
            ret = tracepoint_probe_register(tt->tp, tt->func,  NULL); 
            if (ret) { 
                pr_info("couldn't activate tracepoint %pf\n", tt->func); 
                tracepoint_cleanup(i); 
            } 
            tt->registered = true; 
        } 
    } 
} 
 
void tracepoint_cleanup(int index) 
{ 
    int i; 
    struct tracepoints_table *tt; 
 
    for (i = 0; i < index; i++) {  
        tracepoints_table *tt = &g_tracepoints_table[i]; 
        if (tt->registered) { 
            tracepoint_probe_unregister(tt->tp, tt->func, NULL); 
            tt->registered = false; 
        } 
    } 
}

可见这种注册需要便利 tracepoint 区域对name进行对比,效率比较低,优点是涉及的文件比较少。


本文参考链接:https://www.cnblogs.com/hellokitty2/p/15522289.html