文章目录
- 背景
- 1. static-key的使用方法
- 1.1. static-key定义
- 1.2 初始化
- 1.3 条件判断
- 1.4 修改判断条件
- 2、示例代码
- 参考链接
背景
内核中有很多判断条件在正常情况下的结果都是固定的,除非极其罕见的场景才会改变,通常单个的这种判断的代价很低可以忽略,但是如果这种判断数量巨大且被频繁执行,那就会带来性能损失了。内核的static-key机制就是为了优化这种场景,其优化的结果是:对于大多数情况,对应的判断被优化为一个NOP指令,在非常有场景的时候就变成jump XXX一类的指令,使得对应的代码段得到执行。
1. static-key的使用方法
1.1. static-key定义
static_key 结构体的定义如下:
#ifdef CONFIG_JUMP_LABELstruct static_key {atomic_t enabled;
/** Note:* To make anonymous unions work with old compilers, the static* initialization of them requires brackets. This creates a dependency* on the order of the struct with the initializers. If any fields* are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need* to be modified.** bit 0 => 1 if key is initially true* 0 if initially false* bit 1 => 1 if points to struct static_key_mod* 0 if points to struct jump_entry*/union {unsigned long type;struct jump_entry *entries;struct static_key_mod *next;};
};#else
struct static_key {atomic_t enabled;
};
#endif /* CONFIG_JUMP_LABEL */
如果没有定义CONFIG_JUMP_LABEL
,则static_key
退化成atomic
变量。
1.2 初始化
#define DEFINE_STATIC_KEY_TRUE(name) \struct static_key_true name = STATIC_KEY_TRUE_INIT
#define DEFINE_STATIC_KEY_FALSE(name) \struct static_key_false name = STATIC_KEY_FALSE_INIT
#define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, }
#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }#define STATIC_KEY_INIT_TRUE \{ .enabled = { 1 }, \.entries = (void *)JUMP_TYPE_TRUE }
#define STATIC_KEY_INIT_FALSE \{ .enabled = { 0 }, \.entries = (void *)JUMP_TYPE_FALSE }
false和true的主要区别就是enabled 是否为1.
1.3 条件判断
#ifdef CONFIG_JUMP_LABEL/** Combine the right initial value (type) with the right branch order* to generate the desired result.*** type\branch| likely (1) | unlikely (0)* -----------+-----------------------+------------------* | |* true (1) | ... | ...* | NOP | JMP L* | <br-stmts> | 1: ...* | L: ... |* | |* | | L: <br-stmts>* | | jmp 1b* | |* -----------+-----------------------+------------------* | |* false (0) | ... | ...* | JMP L | NOP* | <br-stmts> | 1: ...* | L: ... |* | |* | | L: <br-stmts>* | | jmp 1b* | |* -----------+-----------------------+------------------** The initial value is encoded in the LSB of static_key::entries,* type: 0 = false, 1 = true.** The branch type is encoded in the LSB of jump_entry::key,* branch: 0 = unlikely, 1 = likely.** This gives the following logic table:** enabled type branch instuction* -----------------------------+-----------* 0 0 0 | NOP* 0 0 1 | JMP* 0 1 0 | NOP* 0 1 1 | JMP** 1 0 0 | JMP* 1 0 1 | NOP* 1 1 0 | JMP* 1 1 1 | NOP** Which gives the following functions:** dynamic: instruction = enabled ^ branch* static: instruction = type ^ branch** See jump_label_type() / jump_label_init_type().*/#define static_branch_likely(x) \
({ \bool branch; \if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \branch = !arch_static_branch(&(x)->key, true); \else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \branch = !arch_static_branch_jump(&(x)->key, true); \else \branch = ____wrong_branch_error(); \likely(branch); \
})#define static_branch_unlikely(x) \
({ \bool branch; \if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \branch = arch_static_branch_jump(&(x)->key, false); \else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \branch = arch_static_branch(&(x)->key, false); \else \branch = ____wrong_branch_error(); \unlikely(branch); \
})#else /* !CONFIG_JUMP_LABEL */#define static_branch_likely(x) likely(static_key_enabled(&(x)->key))
#define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key))#endif /* CONFIG_JUMP_LABEL */
可见同样依赖HAVE_JUMP_LABEL
。如果没有定义的话,直接退化成likely和unlikely
static_branch_unlikely
和 static_branch_likely
只是填充指令的方式不同(可以参考上面的代码注释), 当static_key
为false时,都会进入else逻辑语句中。
if (static_branch_unlikely((&static_key)))do likely work;
elsedo unlikely work
1.4 修改判断条件
使用static_branch_enable
和 static_branch_disable
可以改变static_key 状态
#define static_branch_enable(x) static_key_enable(&(x)->key)
#define static_branch_disable(x) static_key_disable(&(x)->key)
底层是调用static_key_slow_dec
, static_key_slow_dec
来改变key->enabled计数。
static inline void static_key_enable(struct static_key *key)
{int count = static_key_count(key);WARN_ON_ONCE(count < 0 || count > 1);if (!count)static_key_slow_inc(key);
}
static inline void static_key_disable(struct static_key *key)
{int count = static_key_count(key);WARN_ON_ONCE(count < 0 || count > 1);if (count)static_key_slow_dec(key);
}
static inline void static_key_slow_inc(struct static_key *key)
{STATIC_KEY_CHECK_USE(key);atomic_inc(&key->enabled);
}static inline void static_key_slow_dec(struct static_key *key)
{STATIC_KEY_CHECK_USE(key);atomic_dec(&key->enabled);
}
2、示例代码
下面我们用一段代码来分析static-key对程序分支跳转硬编码的影响。
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/static_key.h>DEFINE_STATIC_KEY_FALSE(key);void func(int a){if (static_branch_unlikely(&key)) { printk("my_module: Feature is enabled\n");} else {printk("my_module: Feature is disabled\n");}
}static int __init my_module_init(void) {pr_info("my_module: Module loaded\n");int a = 1;func(a);static_branch_enable(&key);func(a);return 0;
}static void __exit my_module_exit(void) {pr_info("my_module: Module unloaded\n");
}module_init(my_module_init);
module_exit(my_module_exit);MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Sample Kernel Module with Static Key");
func汇编代码如下:
0000000000000000 <func>:0: a9bf7bfd stp x29, x30, [sp, #-16]!4: 910003fd mov x29, sp8: d503201f nopc: 90000000 adrp x0, 0 <func>10: 91000000 add x0, x0, #0x014: 94000000 bl 0 <printk>18: a8c17bfd ldp x29, x30, [sp], #161c: d65f03c0 ret20: 90000000 adrp x0, 0 <func>24: 91000000 add x0, x0, #0x028: 94000000 bl 0 <printk>2c: 17fffffb b 18 <func+0x18>
func中不适用static-key时,汇编代码如下:
void func(int a){if (a) { printk("my_module: Feature is enabled\n");} else {printk("my_module: Feature is disabled\n");}
}0000000000000000 <func>:0: a9bf7bfd stp x29, x30, [sp, #-16]!4: 910003fd mov x29, sp8: 340000a0 cbz w0, 1c <func+0x1c>c: 90000000 adrp x0, 0 <func>10: 91000000 add x0, x0, #0x014: 94000000 bl 0 <printk>18: 14000004 b 28 <func+0x28>1c: 90000000 adrp x0, 0 <func>20: 91000000 add x0, x0, #0x024: 94000000 bl 0 <printk>28: a8c17bfd ldp x29, x30, [sp], #162c: d65f03c0 ret
对比可以发现,在0x8地址处,使用static-key编码在编译时将cbz指令替换为了nop指令,减少了程序运行时对比次数。
参考链接
- Linux内核中的static-key机制
- Linux内核jump label与static key的原理与示例
- static-keys.html | 静态键
- Linux Jump Label/static-key机制详解