Ptmalloc源码分析：free

前几天被 free 给坑了，就一直报错搞得我很崩溃，痛定思痛，下定决心，看看 free 是怎么实现的

libc-2.23

杂项宏定义：

#ifndef RETURN_ADDRESS /* 如果没有定义RETURN_ADDRESS */
#define RETURN_ADDRESS(X_) (NULL) /* 宏定义为NULL */
#endif

#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA) /* 作为“参考数据” */
#define chunksize(p)         ((p)->size & ~(SIZE_BITS)) /* 获取真正的size */

#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ)) /* 获取chunk头 */

关键结构体：

1 2	typedef struct malloc_state mstate; / 管理arena的结构体 / typedef struct malloc_chunk mchunkptr; /* 管理chunk的结构体 */

和M位相关的宏定义

1 2	#define IS_MMAPPED 0x2 /* M位的具体位置 / #define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED) / 获取M位 */

和A位相关的宏定义

#define NON_MAIN_ARENA 0x4 /* A位的具体位置 */
#define chunk_non_main_arena(p) ((p)->size & NON_MAIN_ARENA) /* 获取A位 */

#define heap_for_ptr(ptr) \ /* 根据chunk头地址,计算对应的thread_arena */
  ((heap_info *) ((unsigned long) (ptr) & ~(HEAP_MAX_SIZE - 1)))

#define arena_for_chunk(ptr) \ /* 根据chunk头地址,获取对应的arena */
  (chunk_non_main_arena (ptr) ? heap_for_ptr (ptr)->ar_ptr : &main_arena)
/* chunk->size中的A位可以检查程序是否属于main_arena,如果是就直接分配,如果不是就通过heap_for_ptr获取对应的thread_arena */

和P位相关的宏定义

#define inuse(p) \
  ((((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
/* 获取当前chunk的状态(通过nextchunk->P) */

#define set_inuse(p) \
  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
/* 设置当前chunk为alloc(通过nextchunk->P) */

#define clear_inuse(p) \
  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
/* 设置当前chunk为free(通过nextchunk->P) */

#define prev_inuse(p)       ((p)->size & PREV_INUSE) 
/* 获取lastchunk的状态(lastchunk的状态设置比较简单,不需要格外的宏定义) */

#define inuse_bit_at_offset(p, s) \
  (((mchunkptr) (((char *) (p)) + (s)))->size & PREV_INUSE) 
/* 获取nextchunk的状态(需要人工输入chunk->size) */

#define set_inuse_bit_at_offset(p, s) \
  (((mchunkptr) (((char *) (p)) + (s)))->size |= PREV_INUSE)
/* 设置nextchunk为alloc(需要人工输入chunk->size) */

#define clear_inuse_bit_at_offset(p, s) \
  (((mchunkptr) (((char *) (p)) + (s)))->size &= ~(PREV_INUSE))
/* 设置nextchunk为free(需要人工输入chunk->size) */

__libc_free：free函数的具体实现

void
__libc_free(void* mem)
{
    mstate ar_ptr; /* 管理arena的结构体 */
    mchunkptr p; /* 管理chunk的结构体 */

    void (*hook) (void*, const void*)
        = atomic_forced_read(__free_hook); /* 原子读获取__free_hook */
    if (__builtin_expect(hook != NULL, 0)) /* __builtin_expect可以提高效率 */
    {
        (*hook)(mem, RETURN_ADDRESS(0)); /* 执行__free_hook */
        return;
    }

    if (mem == 0) /* free(0) has no effect */
        return;

    p = mem2chunk(mem); /* 通过指向chunk->FD的指针,获取指向chunk->head的指针 */

    if (chunk_is_mmapped(p)) /* 检查M位,看看该chunk是不是由mmap分配的 */
    {
        /* 查看动态brk/mmap阈值是否需要调整(不是重点) */
        if (!mp_.no_dyn_threshold
            && p->size > mp_.mmap_threshold
            && p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
        {
            mp_.mmap_threshold = chunksize(p);
            mp_.trim_threshold = 2 * mp_.mmap_threshold;
            LIBC_PROBE(memory_mallopt_free_dyn_thresholds, 2,
                mp_.mmap_threshold, mp_.trim_threshold);
        }
        munmap_chunk(p); /* 调用munmap_chunk进行chunk的回收(不是重点) */
        return;
    }

    ar_ptr = arena_for_chunk(p); /* 根据chunk头地址,获取对应的arena */
    _int_free(ar_ptr, p, 0); /* 核心 */
}
libc_hidden_def(__libc_free) /* 标志修饰的函数在动态链接的过程中进行延迟绑定 */

其实就是检查一下 __free_hook 中有没有东西（如果有就执行），再检查一下M位（进行调整），最后获取“对应的arena”，作为 _int_free 的参数

杂项宏定义：

#define INTERNAL_SIZE_T size_t /* 将其定义为"size_t" */
#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T)) /* 定义其大小为"1字 */

#ifndef TRIM_FASTBINS /* 与"fastbin是否会和top chunk合并"有关的标志 */
#define TRIM_FASTBINS  0 /* 默认为0(不合并) */
#endif

#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s))) /* 强制使p指针加上s,但不改变其指向 */

#define fastbin_index(sz) \ /* 获取对应的下标 */
  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)

#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL) /* 和fastbin合并机制有关 */
/* 
通常fastbins中的fastchunk是不会进行合并的
当被free的fastchunk与相邻的chunk合并后的大小大于FASTBIN_CONSOLIDATION_THRESHOLD时
此时内存碎片可能就比较多了，我们就需要将fastbins中的chunk都进行合并
*/

static void
free_perturb (char *p, size_t n) /* 设置perturb_byte(不清楚作用) */
{
  if (__glibc_unlikely (perturb_byte))
    memset (p, perturb_byte, n);
}

和优化有关的宏定义：

1 2	#define likely(x) __builtin_expect(!!(x), 1) /* 优化:大概率发生 / #define unlikely(x) __builtin_expect(!!(x), 0) / 优化:大概率不发生 */

和对齐有关的宏定义：

#ifndef MALLOC_ALIGNMENT /* MALLOC_ALIGNMENT的定义比较复杂,用于实现内存对齐 */
# if !SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_16)
#  define MALLOC_ALIGNMENT       (2 *SIZE_SZ < __alignof__ (long double)      \
                                  ? __alignof__ (long double) : 2 *SIZE_SZ)
# else
#  define MALLOC_ALIGNMENT       (2 *SIZE_SZ)
# endif
#endif

#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)

#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0) /* m判断是否对齐(这里的m通常为chunk->size) */

#define misaligned_chunk(p) \
  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
   & MALLOC_ALIGN_MASK) /* 实现p的对齐(具体什么原理我没有看懂) */

和更新 size，presize 有关的宏定义：

#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_BITS) | (s)))
/* 更新chunk->size,不干扰其标志位 */
#define set_head(p, s)       ((p)->size = (s)) 
/* 更新chunk->size,需要手动写入标志位 */
#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->prev_size = (s))
/* 更新nextchunk->prevsize,需要手动写入标志位 */

_int_free：__libc_free 的核心部分

static void
_int_free(mstate av, mchunkptr p, int have_lock) /* have_lock==0 */
{
    INTERNAL_SIZE_T size;        /* target chunk->size */
    mfastbinptr* fb;             /* associated fastbin */
    mchunkptr nextchunk;         /* next contiguous chunk */
    INTERNAL_SIZE_T nextsize;    /* its size */
    int nextinuse;               /* true if nextchunk is used */
    INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
    mchunkptr bck;               /* misc temp for linking */
    mchunkptr fwd;               /* misc temp for linking */

    const char* errstr = NULL;
    int locked = 0;

    size = chunksize(p); /* 获取目标chunk真正的大小 */

    if (__builtin_expect((uintptr_t)p > (uintptr_t)-size, 0)
        || __builtin_expect(misaligned_chunk(p), 0))
        /* 检查1: 简单检查一下chunk头指针的合理性 && 实现对齐后的chunk不为空 */
    {
        errstr = "free(): invalid pointer";
    errout:
        if (!have_lock && locked) /* 如果有互斥锁 */
            (void)mutex_unlock(&av->mutex); /* 互斥锁解锁 */
        malloc_printerr(check_action, errstr, chunk2mem(p), av);
        return;
    }

    if (__glibc_unlikely(size < MINSIZE || !aligned_OK(size)))
        /* 检查2: chunk的大小必须大于MINSIZE(4字) && 检查chunk->size是否对齐 */
    {
        errstr = "free(): invalid size";
        goto errout;
    }

    check_inuse_chunk(av, p); /* DEBUG:检查chunk的前一个chunk是否不被使用 */

      /* 根据if语句,觉得是否把chunk放入fastbin */
    if ((unsigned long)(size) <= (unsigned long)(get_max_fast())

#if TRIM_FASTBINS /* 允许fastbin和top chunk进行合并(附加一个if条件) */
        && (chunk_at_offset(p, size) != av->top)
        /* 附加if条件为:目标chunk不和top chunk相邻(否则进行合并) */
#endif /* 不允许fastbin和top chunk进行合并(没有附加if条件) */
        ) {
        /* <--情况1:将会把该chunk放入fastbin--> */
        if (__builtin_expect(chunk_at_offset(p, size)->size <= 2 * SIZE_SZ, 0)
            || __builtin_expect(chunksize(chunk_at_offset(p, size))
                >= av->system_mem, 0))
        {
            /* 检查3: nextchunk->size必须大于2字,小于malloc_state->system_mem */
            if (have_lock /* have_lock==0(表示没有锁) */
                || ({ assert(locked == 0); /* 断言没有锁 */
                  mutex_lock(&av->mutex); /* 加互斥锁 */
                  locked = 1;
                  chunk_at_offset(p, size)->size <= 2 * SIZE_SZ
                    || chunksize(chunk_at_offset(p, size)) >= av->system_mem;
                    }))
                /* 检查4: 加锁后重复检查3 */
            {
                errstr = "free(): invalid next size (fast)";
                goto errout;
            }
                    if (!have_lock) /* have_lock==0(恒成立) */
                    {
                        (void)mutex_unlock(&av->mutex); /* 解互斥锁 */
                        locked = 0;
                    }
        }

        free_perturb(chunk2mem(p), size - 2 * SIZE_SZ); /* 将chunk的mem部分全部设置为perturb_byte */

        set_fastchunks(av); /* 设置fastchunks标记位 */
        unsigned int idx = fastbin_index(size);
        fb = &fastbin(av, idx); /* 取出fastbin的头部 */

        mchunkptr old = *fb, old2; /* 使用原子操作,将P插入链表中 */
        unsigned int old_idx = ~0u;
        do
        {
            if (__builtin_expect(old == p, 0))
                /* 检查5: 检查原链表头的chunk是否等于p,经典Double free检查 */
            {
                errstr = "double free or corruption (fasttop)";
                goto errout;
            }
            if (have_lock && old != NULL)
                old_idx = fastbin_index(chunksize(old)); /* 获取对应的下标 */
            p->fd = old2 = old; /* 让p的fd指针变为原来的头部 */
        } while ((old = catomic_compare_and_exchange_val_rel(fb, p, old2)) != old2); /* 如果fb等于old2，就将fb赋值为p */

        if (have_lock && old != NULL && __builtin_expect(old_idx != idx, 0))
            /* 检查6: 先前的old_idx不属于当前的idx(说明修改了size),这个一般遇不到,因为从free到chunk进入fastbin的过程中,我们都是不能控制size的,一般都改不了size */
        {
            errstr = "invalid fastbin entry (free)";
            goto errout;
        }
    }

    else if (!chunk_is_mmapped(p)) { 
        /* <--情况2:将会在其他未映射的块到达时合并chunk--> */
        if (!have_lock) {
            (void)mutex_lock(&av->mutex);
            locked = 1;
        }

        nextchunk = chunk_at_offset(p, size); /* 获取物理相邻的下一个chunk */

        if (__glibc_unlikely(p == av->top))
            /* 检查7: 将要合并的chunk不能是top chunk */
        {
            errstr = "double free or corruption (top)";
            goto errout;
        }
        if (__builtin_expect(contiguous(av)
            && (char*)nextchunk
            >= ((char*)av->top + chunksize(av->top)), 0))
            /* 检查8: nextchunk不能超出top chunk的范围 */
        {
            errstr = "double free or corruption (out)";
            goto errout;
        }
        if (__glibc_unlikely(!prev_inuse(nextchunk)))
            /* 检查9: "nextchunk->P位==0"不能成立(等于"0"表示chunk已经是free状态) */
        {
            errstr = "double free or corruption (!prev)";
            goto errout;
        }

        nextsize = chunksize(nextchunk); /* 获取物理相邻的下一个chunk的大小 */
        if (__builtin_expect(nextchunk->size <= 2 * SIZE_SZ, 0)
            || __builtin_expect(nextsize >= av->system_mem, 0))
            /* 检查10: nextchunk->size必须大于2字,小于malloc_state->system_mem */
        {
            errstr = "free(): invalid next size (normal)";
            goto errout;
        }

        free_perturb(chunk2mem(p), size - 2 * SIZE_SZ); /* 将chunk的mem部分全部设置为perturb_byte */

        /* 后向合并(前一个chunk为free) */
        if (!prev_inuse(p)) {
            prevsize = p->prev_size;
            size += prevsize; /* 合并chunk */
            p = chunk_at_offset(p, -((long)prevsize)); /* 更新chunk->head */
            unlink(av, p, bck, fwd); /* 合并后的chunk脱链 */
        }

        if (nextchunk != av->top) {
            nextinuse = inuse_bit_at_offset(nextchunk, nextsize); /* 表示nextchunk是否free(nextchunk->nextchunk->P位) */

            /* 前向合并(后一个chunk为free) */
            if (!nextinuse) {
                unlink(av, nextchunk, bck, fwd); /* 后一个chunk脱链 */
                size += nextsize; /* 进行合并(不需要更新chunk->head) */
            }
            else
                clear_inuse_bit_at_offset(nextchunk, 0);

            /* 合并后的chunk插入unsortedbin的链表头 */
            bck = unsorted_chunks(av); 
            fwd = bck->fd;
            if (__glibc_unlikely(fwd->bk != bck))
                /* 检查11: 相当于 bck->fd->bk != bck,只要不破坏main_arena,通过这个检查应该没有什么问题(就是在进行main_arena劫持的时候,会造成影响) */
            {
                errstr = "free(): corrupted unsorted chunks";
                goto errout;
            }
            p->fd = fwd;
            p->bk = bck;
            if (!in_smallbin_range(size)) /* 如果属于largebin的范围 */
            {
                p->fd_nextsize = NULL;
                p->bk_nextsize = NULL;
            }
            bck->fd = p;
            fwd->bk = p;

            set_head(p, size | PREV_INUSE); /* 更新P位 */
            set_foot(p, size);

            check_free_chunk(av, p); /* DEBUG:简单检查 */
        }

        else {
            /* <--情况3:将会和top chunk进行合并--> */
            size += nextsize;
            set_head(p, size | PREV_INUSE);
            av->top = p;
            check_chunk(av, p);
        }

        if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
            /* 触发fastbin合并机制 */
            if (have_fastchunks(av)) /* 如果存在fastbin就进行合并 */
                malloc_consolidate(av); /* 合并所有的fastbin */

            if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM /* 和heap_trim有关的标志位 */
                if ((unsigned long)(chunksize(av->top)) >=
                    (unsigned long)(mp_.trim_threshold))
                    systrim(mp_.top_pad, av);
#endif
            }
            else {
                heap_info* heap = heap_for_ptr(top(av));

                assert(heap->ar_ptr == av);
                heap_trim(heap, mp_.top_pad); /* 调用heap_trim尝试缩小该heap */
            }
        }

        if (!have_lock) {
            assert(locked);
            (void)mutex_unlock(&av->mutex);
        }
    }

    else {
        munmap_chunk(p); /* 如果是mmap分配出来的chunk,直接进行回收 */
    }
}

现在 free 函数就分析完毕了：

__libc_free 里真正进行free的函数是 _int_free 函数，首先执行 __free_hook
然后该函数会判断当前释放的 chunk 是否为 fastbin（分3种情况）
- 情况1:将会把该chunk放入fastbin
  - 查看当前的头部与释放的chunk是否一致
  - 然后检查其 old chunk（原来的bin头）头部的 size 是否满足属于该 bin 的 idx 下标条件
  - 最后插入对应 fastbin 的链表头
- 情况2:将会在其他未映射的块到达时合并chunk
  - 判断该chunk应该进行“前向合并”，“后向合并”，还是两者都有
  - 然后就是一堆检查
  - 最后插入对应 unsortedbin 的链表头
- 情况3:将会和top chunk进行合并
  - 进行 top chunk 合并
  - 最后成为 top chunk 的一部分
接下来会判断是否触发 fastbin 合并机制
- 会调用heap_trim尝试缩小该heap
最后如果chunk是mmap出来的，则直接释放

最后再看下 unlink 的检查：

#define unlink(AV, P, BK, FD) {                                           
    FD = P->fd;								      
    BK = P->bk;								      
    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))	
        /* 检查1: 检查nextchunk->bk是不是chunk,检查lastchunk->fd是不是chunk */
      malloc_printerr (check_action, "corrupted double-linked list", P, AV);  
    else { /* 进行脱链 */								      
        FD->bk = BK;					      
        BK->fd = FD;							      
        if (!in_smallbin_range (P->size)				      
            && __builtin_expect (P->fd_nextsize != NULL, 0)) {
            /* 如果检测到目标chunk属于largebin,并且chunk->fd_nextsize不为空 */
	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      
		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))
            /* 检查2: 检查1的largebin版本,主要用于横向链表(用fd_nextsize和bk_nextsize组织的链表,用于管理同一largebin中,不同size的large chunk) */
	      malloc_printerr (check_action,				      
			       "corrupted double-linked list (not small)",    
			       P, AV);					      
            if (FD->fd_nextsize == NULL) { 
                /* 表示largebin中:脱链chunk的下一个chunk不是纵向链表的链表头 */
                if (P->fd_nextsize == P)
                    /* 表示脱链chunk是当前largebin中唯一纵向链表的链表头 */
                  FD->fd_nextsize = FD->bk_nextsize = FD;		      
                else {
                    FD->fd_nextsize = P->fd_nextsize; /* 继承p的fd_nextsize */
                    FD->bk_nextsize = P->bk_nextsize; /* 继承p的bk_nextsize */
                    /* p必须是纵向链表头,以下操作才有意义 */
                    P->fd_nextsize->bk_nextsize = FD; 
                    P->bk_nextsize->fd_nextsize = FD;			      
                  }							      
              } else { /* 表示脱链的chunk是纵向链表的链表头 */
                /* 如果某个纵向链表头脱链,则需要在纵向链表中完成脱链操作 */
                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      
                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      
              }								      
          }								      
      }									      
}

这个 unlink 的过程，会在 unlink 攻击中起作用
后面关于 largebin 的部分，则会在 largebin attack 中起作用

libc-2.27

杂项宏定义：

#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)

#define MINSIZE  \
  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))

#define MALLOC_ALIGNMENT (2 * SIZE_SZ < __alignof__ (long double) \
			  ? __alignof__ (long double) : 2 * SIZE_SZ)

# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)

和 arena 有关的宏定义：

#define arena_lock(ptr, size) do { \
      if (ptr) \
        __libc_lock_lock (ptr->mutex); \
      else \
        ptr = arena_get2 ((size), NULL); \
  } while (0) /* 对malloc_state->mutex进行加锁 */

#define arena_get(ptr, size) do { \
      ptr = thread_arena; \
      arena_lock (ptr, size); \
  } while (0) /* 从thread_arena获得一个分配区 */

和 tcache 有关的函数和结构体：

# define TCACHE_MAX_BINS		64 /* 最大tcachebin数量 */
# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1) /* 最大tcachebin大小 */
# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ) /* 根据TCACHE_MAX_BINS计算MAX_TCACHE_SIZE(固定值) */

# define MAYBE_INIT_TCACHE() \
  if (__glibc_unlikely (tcache == NULL)) \
    tcache_init(); /* 对tcache进行初始化 */

typedef struct tcache_perthread_struct
{
    char counts[TCACHE_MAX_BINS]; /* 这里是1字节,在libc-2.31中就变成了2字节 */
    tcache_entry* entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

__libc_free：free函数的具体实现

void
__libc_free(void* mem)
{
    mstate ar_ptr; /* 管理arena的结构体 */
    mchunkptr p; /* 管理chunk的结构体 */

    void (*hook) (void*, const void*)
        = atomic_forced_read(__free_hook); /* 原子读__free_hook */
    if (__builtin_expect(hook != NULL, 0))
    {
        (*hook)(mem, RETURN_ADDRESS(0)); /* 执行__free_hook */
        return;
    }

    if (mem == 0)
        return;

    p = mem2chunk(mem); /* 获取chunk头 */

    if (chunk_is_mmapped(p)) /* 如果该chunk是mmap分配的(通过检查M位) */
    {
        if (!mp_.no_dyn_threshold
            && chunksize_nomask(p) > mp_.mmap_threshold
            && chunksize_nomask(p) <= DEFAULT_MMAP_THRESHOLD_MAX
            && !DUMPED_MAIN_ARENA_CHUNK(p))
        {
            mp_.mmap_threshold = chunksize(p);
            mp_.trim_threshold = 2 * mp_.mmap_threshold;
            LIBC_PROBE(memory_mallopt_free_dyn_thresholds, 2,
                mp_.mmap_threshold, mp_.trim_threshold);
        }
        munmap_chunk(p); /* 调用munmap_chunk进行chunk的回收(不是重点) */
        return;
    }

    MAYBE_INIT_TCACHE(); /* 新增:对tcache进行初始化(libc-2.27开启了tcache) */

    ar_ptr = arena_for_chunk(p); /* 根据chunk头地址,获取对应的arena */
    _int_free(ar_ptr, p, 0); /* 核心 */
}
libc_hidden_def(__libc_free) /* 进行延迟绑定 */

和 chunk->size 有关的宏定义：

1 2	#define chunksize_nomask(p) ((p)->mchunk_size) /* 获取chunk->size / #define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS)) / 获取真正的size */

_int_free：__libc_free 的核心部分

static void
_int_free(mstate av, mchunkptr p, int have_lock)
{
    INTERNAL_SIZE_T size;        /* 记录chunk->size */
    mfastbinptr* fb;             /* 用于关联fastbin */
    mchunkptr nextchunk;         /* 物理相邻的下一个chunk */
    INTERNAL_SIZE_T nextsize;    /* 记录nextchunk->size */
    int nextinuse;               /* 如果使用nextchunk,则为true */
    INTERNAL_SIZE_T prevsize;    /* 记录lastchunk->size */
    mchunkptr bck;               /* link使用的"临时上一个chunk" */
    mchunkptr fwd;               /* link使用的"临时下一个chunk" */

    size = chunksize(p); /* 获取真正的chunk->size(为了和chunksize_nomask区分) */

    if (__builtin_expect((uintptr_t)p > (uintptr_t)-size, 0)
        || __builtin_expect(misaligned_chunk(p), 0))
        malloc_printerr("free(): invalid pointer");
    /* 检查1: 简单检查一下chunk头指针的合理性 && 实现对齐后的chunk不为空 */

    if (__glibc_unlikely(size < MINSIZE || !aligned_OK(size)))
        malloc_printerr("free(): invalid size");
    /* 检查2: chunk的大小必须大于MINSIZE(4字) && 检查chunk->size是否对齐 */

    check_inuse_chunk(av, p); /* DEBUG:检查chunk的前一个chunk是否不被使用 */

#if USE_TCACHE /* 如果启用TCACHE */
    {
        size_t tc_idx = csize2tidx(size); /* 获取对应tcachebin */

        if (tcache
            && tc_idx < mp_.tcache_bins
            /* 初始化为TCACHE_MAX_BINS,表示tcachebin的最大数量 */
            && tcache->counts[tc_idx] < mp_.tcache_count)
            /* 初始化为7,表示在同一个tcachebin中,chunk的最大值 */
        {
            tcache_put(p, tc_idx); /* 会将该free的块插入到对应idx的bin的第一个位置上(插头) */
            return;
        }
        /* tcache_perthread_struct有和main_arena相似的性质:
        会根据tcachebin的大小来决定tcachebin->count和tcachebin->entry的位置
        如果不对tc_idx进行限制,那么过大的size很可能超过tcache_perthread_struct的范围
        */
    }
#endif

    if ((unsigned long)(size) <= (unsigned long)(get_max_fast())
#if TRIM_FASTBINS
        && (chunk_at_offset(p, size) != av->top)
        /* 如果会和top chunk合并,则新增一个条件:nextchunk不是top chunk */
#endif
        ) {
        /* 情况1:会进入fastbin */
        if (__builtin_expect(chunksize_nomask(chunk_at_offset(p, size))
            <= 2 * SIZE_SZ, 0)
            || __builtin_expect(chunksize(chunk_at_offset(p, size))
                >= av->system_mem, 0))
            /* 检查3: nextchunk->size必须大于2字(当size刚好等于2字时,nextchunk->P不能为0) ,nextchunk->size不能大于或等于system_mem */
        {
            bool fail = true;
            if (!have_lock) /* 如果有锁,就进行解锁(过程有点看不懂) */
            {
                __libc_lock_lock(av->mutex);
                fail = (chunksize_nomask(chunk_at_offset(p, size)) <= 2 * SIZE_SZ
                    || chunksize(chunk_at_offset(p, size)) >= av->system_mem);
                __libc_lock_unlock(av->mutex);
            }

            if (fail)
                malloc_printerr("free(): invalid next size (fast)");
        }

        free_perturb(chunk2mem(p), size - 2 * SIZE_SZ); /* 设置perturb_byte */

        atomic_store_relaxed(&av->have_fastchunks, true); /* 检查是否有fastbin */
        unsigned int idx = fastbin_index(size); /* 获取对应的index */
        fb = &fastbin(av, idx); /* 获取对应的fastbin */

        mchunkptr old = *fb, old2;

        if (SINGLE_THREAD_P) /* 如果是单线程 */
        {
            if (__builtin_expect(old == p, 0))
                malloc_printerr("double free or corruption (fasttop)");
            /* 检查4: 检查原链表头的chunk是否等于p,经典Double free检查 */
            p->fd = old;
            *fb = p;
        }
        else /* 如果是多线程 */
            do
            {
                if (__builtin_expect(old == p, 0))
                    malloc_printerr("double free or corruption (fasttop)");
                p->fd = old2 = old;
                /* 检查4: 检查原链表头的chunk是否等于p,经典Double free检查 */
            } while ((old = catomic_compare_and_exchange_val_rel(fb, p, old2))
                != old2);
            if (have_lock && old != NULL
                && __builtin_expect(fastbin_index(chunksize(old)) != idx, 0))
                malloc_printerr("invalid fastbin entry (free)");
            /* 检查5: 检查顶部fastbin区块的大小是否是我们要添加的块的大小 */
    }
    /* 情况2:不会进入fastbin,并且不是mmap分配的(在其他未映射的块到达时合并它们) */
    else if (!chunk_is_mmapped(p)) {
        if (SINGLE_THREAD_P) /* 如果是单线程,则不需要锁定arena(SINGLE_THREAD_P=1) */
            have_lock = true;

        if (!have_lock)
            __libc_lock_lock(av->mutex);

        nextchunk = chunk_at_offset(p, size); /* 获取nextchunk->head */

        if (__glibc_unlikely(p == av->top))
            malloc_printerr("double free or corruption (top)");
        /* 检查6: 当前chunk不能是top chunk(不能free top chunk) */

        if (__builtin_expect(contiguous(av)
            && (char*)nextchunk
            >= ((char*)av->top + chunksize(av->top)), 0))
            malloc_printerr("double free or corruption (out)");
        /* 检查7: 检查nextchunk是否超出了竞技场的边界 */

        if (__glibc_unlikely(!prev_inuse(nextchunk)))
            malloc_printerr("double free or corruption (!prev)");
        /* 检查8: 该chunk是否在使用(必须是alloc状态) */

        nextsize = chunksize(nextchunk);
        if (__builtin_expect(chunksize_nomask(nextchunk) <= 2 * SIZE_SZ, 0)
            || __builtin_expect(nextsize >= av->system_mem, 0))
            malloc_printerr("free(): invalid next size (normal)");
        /* 检查9: nextchunk->size必须大于2字,小于malloc_state->system_mem(同检查3) */
        
        free_perturb(chunk2mem(p), size - 2 * SIZE_SZ);

        /* 后向合并 */
        if (!prev_inuse(p)) {
            prevsize = prev_size(p);
            size += prevsize;
            p = chunk_at_offset(p, -((long)prevsize));
            unlink(av, p, bck, fwd);
        }

        if (nextchunk != av->top) {
            nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

            /* 前向合并 */
            if (!nextinuse) {
                unlink(av, nextchunk, bck, fwd);
                size += nextsize;
            }
            else
                clear_inuse_bit_at_offset(nextchunk, 0);

            /* 合并后的chunk插入unsortedbin的链表头 */
            bck = unsorted_chunks(av);
            fwd = bck->fd;
            if (__glibc_unlikely(fwd->bk != bck))
                malloc_printerr("free(): corrupted unsorted chunks");
            /* 检查10: 相当于 bck->fd->bk != bck,只要不破坏main_arena,通过这个检查应该没有什么问题(就是在进行main_arena劫持的时候,会造成影响) */
            
            p->fd = fwd;
            p->bk = bck;
            if (!in_smallbin_range(size))
            {
                p->fd_nextsize = NULL;
                p->bk_nextsize = NULL;
            }
            bck->fd = p;
            fwd->bk = p;

            set_head(p, size | PREV_INUSE);
            set_foot(p, size);

            check_free_chunk(av, p);
        }
        /* 和top chunk合并 */
        else {
            size += nextsize;
            set_head(p, size | PREV_INUSE);
            av->top = p;
            check_chunk(av, p); /* DEBUG:简单检查 */
        }

        if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
            if (atomic_load_relaxed(&av->have_fastchunks))
                malloc_consolidate(av);

            if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
                if ((unsigned long)(chunksize(av->top)) >=
                    (unsigned long)(mp_.trim_threshold))
                    systrim(mp_.top_pad, av);
#endif
            }
            else {
                heap_info* heap = heap_for_ptr(top(av));
                assert(heap->ar_ptr == av);
                heap_trim(heap, mp_.top_pad);
            }
        }
        if (!have_lock)
            __libc_lock_unlock(av->mutex);
    }
    else {
        munmap_chunk(p); /* 如果是mmap分配出来的chunk,直接进行回收 */
    }
}

现在 free 函数就分析完毕了：

libc-2.27 加入了 tcache 机制，_int_free 会首先尝试把 free chunk 放入 tcachebin
和 libc-2.23 相比，总体上没有什么变化

最后看 unlink：

#define unlink(AV, P, BK, FD) {
	if (__builtin_expect(chunksize(P) != prev_size(next_chunk(P)), 0))
        /* 检查1: 检查chunk->size是否等于nextchunk->presize */
		malloc_printerr("corrupted size vs. prev_size");
    FD = P->fd;
    BK = P->bk;
    if (__builtin_expect(FD->bk != P || BK->fd != P, 0))
        /* 检查2: 检查nextchunk->bk是不是chunk,检查lastchunk->fd是不是chunk */
    	malloc_printerr("corrupted double-linked list");
    else {
        FD->bk = BK;
        BK->fd = FD;
        if (!in_smallbin_range(chunksize_nomask(P))
            && __builtin_expect(P->fd_nextsize != NULL, 0)) {
            if (__builtin_expect(P->fd_nextsize->bk_nextsize != P, 0)
                || __builtin_expect(P->bk_nextsize->fd_nextsize != P, 0))
                malloc_printerr("corrupted double-linked list (not small)");
            /* 检查3: 检查1的largebin版本,主要用于横向链表(用fd_nextsize和bk_nextsize组织的链表,用于管理同一largebin中,不同size的large chunk) */
            if (FD->fd_nextsize == NULL) {
                if (P->fd_nextsize == P)
                    FD->fd_nextsize = FD->bk_nextsize = FD;
                else {
                    FD->fd_nextsize = P->fd_nextsize;
                    FD->bk_nextsize = P->bk_nextsize;
                    P->fd_nextsize->bk_nextsize = FD;
                    P->bk_nextsize->fd_nextsize = FD;
                }
            }
            else {
                P->fd_nextsize->bk_nextsize = P->bk_nextsize;
                P->bk_nextsize->fd_nextsize = P->fd_nextsize;
            }
        }
    }
}

只多了一个检查而已