LLVM 基础知识

LLVM IR

LLVM 项目是模块化、可重用的编译器以及工具链技术的集合，用于优化以任意程序语言编写的程序的编译时间 (compile-time)，链接时间 (link-time)，运行时间 (run-time)，以及空闲时间 (idle-time)

编译可以分为五个基本步骤：

词法分析
语法分析
语义分析
中间代码的生成、优化
目标代码的生成

这是每个编译器都必须的基本步骤和流程，从源头输入高级语言源程序输出目标语言代码

IR主要有以下四部分组成：

Module：模块
Function：函数
BasicBlock：基本块（对应复合语句，用 {} 包裹起来）
Instruction：指令

基础流程

具体流程如下：

源代码 ==>[词法分析]==> 词法单元流(token) ==>[语法分析]==> 语法树(AST) ==>[语义分析]==> 语法树(AST) ==>[中间代码的生成,优化]==> 中间代码 ==>[目标代码的生成]==> 目标机器代码

LLVM 则使用 LLVM IR 为中间代码：

源代码 ==>[词法分析]==> 词法单元流(token) ==>[语法/语义分析]==> 语法树(AST) ==>[中间代码的生成]==> LLVM IR ==>[中间代码的优化]==> 生成汇编代码 ==>[汇编+链接]==> 目标机器代码

传给 LLVM PASS 进行优化的数据是 LLVM IR，即代码的中间表示，LLVM IR有三种表示形式：这三种中间格式是完全等价的：

在内存中的编译中间语言（我们无法通过文件的形式得到）
在硬盘上存储的二进制中间语言（格式为 .bc）
人类可读的代码语言（格式为 .ll）

从对应格式转化到另一格式的命令如下：

.c -> .ll：clang -emit-llvm -S a.c -o a.ll
.c -> .bc: clang -emit-llvm -c a.c -o a.bc
.ll -> .bc: llvm-as a.ll -o a.bc
.bc -> .ll: llvm-dis a.bc -o a.ll
.bc -> .s: llc a.bc -o a.s

LLVM 使用案例：

#include <stdio.h>
#include <unistd.h>

void fun1(int *a,int *b){
	int c;
	if(a>b){
		c=*a;
		*a=*b;
		*b=c;
	}else{
		printf("%d %d\n",*a,*b);
	}
}

void fun2(int n){
	int i=0;
	while(i<n){
		printf("%d\n",i);
	}
}

int main() {
	char name[0x10];
	read(0,&name[0],0x10);
	write(1,&name[1],0x10);
	printf("bye\n");
	return 0;
}

1	clang -emit-llvm -S test.c -o test.ll

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

@.str = private unnamed_addr constant [7 x i8] c"%d %d\0A\00", align 1
@.str.1 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
@.str.2 = private unnamed_addr constant [5 x i8] c"bye\0A\00", align 1

; Function Attrs: noinline nounwind optnone uwtable
define dso_local void @fun1(i32* %0, i32* %1) #0 { /* Function */
  %3 = alloca i32*, align 8 /* Instruction */
  %4 = alloca i32*, align 8
  %5 = alloca i32, align 4
  store i32* %0, i32** %3, align 8
  store i32* %1, i32** %4, align 8
  %6 = load i32*, i32** %3, align 8
  %7 = load i32*, i32** %4, align 8
  %8 = icmp ugt i32* %6, %7
  br i1 %8, label %9, label %17

9:                                                ; preds = %2 /* BasicBlock */
  %10 = load i32*, i32** %3, align 8
  %11 = load i32, i32* %10, align 4
  store i32 %11, i32* %5, align 4
  %12 = load i32*, i32** %4, align 8
  %13 = load i32, i32* %12, align 4
  %14 = load i32*, i32** %3, align 8
  store i32 %13, i32* %14, align 4
  %15 = load i32, i32* %5, align 4
  %16 = load i32*, i32** %4, align 8
  store i32 %15, i32* %16, align 4
  br label %23

17:                                               ; preds = %2
  %18 = load i32*, i32** %3, align 8
  %19 = load i32, i32* %18, align 4
  %20 = load i32*, i32** %4, align 8
  %21 = load i32, i32* %20, align 4
  %22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i32 %19, i32 %21)
  br label %23

23:                                               ; preds = %17, %9
  ret void
}

declare dso_local i32 @printf(i8*, ...) #1

; Function Attrs: noinline nounwind optnone uwtable
define dso_local void @fun2(i32 %0) #0 {
  %2 = alloca i32, align 4
  %3 = alloca i32, align 4
  store i32 %0, i32* %2, align 4
  store i32 0, i32* %3, align 4
  br label %4

4:                                                ; preds = %8, %1
  %5 = load i32, i32* %3, align 4
  %6 = load i32, i32* %2, align 4
  %7 = icmp slt i32 %5, %6
  br i1 %7, label %8, label %11

8:                                                ; preds = %4
  %9 = load i32, i32* %3, align 4
  %10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i32 %9)
  br label %4

11:                                               ; preds = %4
  ret void
}

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 {
  %1 = alloca i32, align 4
  %2 = alloca [16 x i8], align 16
  store i32 0, i32* %1, align 4
  %3 = getelementptr inbounds [16 x i8], [16 x i8]* %2, i64 0, i64 0
  %4 = call i64 @read(i32 0, i8* %3, i64 16)
  %5 = getelementptr inbounds [16 x i8], [16 x i8]* %2, i64 0, i64 1
  %6 = call i64 @write(i32 1, i8* %5, i64 16)
  %7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i64 0, i64 0))
  ret i32 0
}

declare dso_local i64 @read(i32, i8*, i64) #1

declare dso_local i64 @write(i32, i8*, i64) #1

attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 10.0.0-4ubuntu1 "}

@代表全局标识符（函数，全局变量）
%代表局部标识符（寄存器名称也就是局部变量，类型）
alloca 指令：用于分配内存堆栈给当前执行的函数，当这个函数返回其调用者时自动释放
br 指令：第一个参数是 i1 类型的值，用于作判断，第二和第三个参数分别是值为 true 和 false 时需要跳转到的标签

指令分析

GetElementPtr 指令是一条指针计算语句，本身并不进行任何数据的访问或修改，只进行指针的计算

1
2
3

<result> = getelementptr <ty>, <ty>* <ptrval>{, [inrange] <ty> <idx>}*
<result> = getelementptr inbounds <ty>, <ty>* <ptrval>{, [inrange] <ty> <idx>}*
<result> = getelementptr <ty>, <ptr vector> <ptrval>, [inrange] <vector index type> <idx>

第一个 <ty> 是 “第一个索引” 使用的基本类型
第二个 <ty>* 表示其后的基地址 <ptrval> 的类型

案例：数组处理

#include <stdio.h>
#include <unistd.h>

int main() {
   char name1[0x10];
   char name2[0x10][0x10];
   char* name3[0x10];
   name1[0x8] = 'a';
   name2[0x4][0x4] = 'b';
   name1[0]=name1[4];
   name3[0]=&name1[5];
}

%1 = alloca [16 x i8], align 16 /* 创建变量 */
%2 = alloca [16 x [16 x i8]], align 16
%3 = alloca [16 x i8*], align 16
%4 = getelementptr inbounds [16 x i8], [16 x i8]* %1, i64 0, i64 8
store i8 97, i8* %4, align 8
%5 = getelementptr inbounds [16 x [16 x i8]], [16 x [16 x i8]]* %2, i64 0, i64 4
%6 = getelementptr inbounds [16 x i8], [16 x i8]* %5, i64 0, i64 4
store i8 98, i8* %6, align 4
%7 = getelementptr inbounds [16 x i8], [16 x i8]* %1, i64 0, i64 4
%8 = load i8, i8* %7, align 4 /* 取数据 */
%9 = getelementptr inbounds [16 x i8], [16 x i8]* %1, i64 0, i64 0
store i8 %8, i8* %9, align 16
%10 = getelementptr inbounds [16 x i8], [16 x i8]* %1, i64 0, i64 5
%11 = getelementptr inbounds [16 x i8*], [16 x i8*]* %3, i64 0, i64 0
store i8* %10, i8** %11, align 16

name 和 &name[0] 是等价的
对于多级数组，需要分多次进行表示

案例：结构体处理

#include <stdio.h>
#include <unistd.h>

struct RT {
	char* a;
	int b[10][20];
	char c[5];
};
struct ST {
	int x;
	double y;
	struct RT r;
};

int main() {
	struct RT rt;
	struct ST st;
	st.r = rt;
	st.x = 1;
	st.y = 2.0;
	rt.a = &rt.c[3];
	rt.b[0x5][0x5] = 4;
	rt.c[3] = 'c';
}

%1 = alloca %struct.RT, align 8
%2 = alloca %struct.ST, align 8
%3 = getelementptr inbounds %struct.ST, %struct.ST* %2, i32 0, i32 2
%4 = bitcast %struct.RT* %3 to i8*
%5 = bitcast %struct.RT* %1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %4, i8* align 8 %5, i64 816, i1 false)
%6 = getelementptr inbounds %struct.ST, %struct.ST* %2, i32 0, i32 0
store i32 1, i32* %6, align 8
%7 = getelementptr inbounds %struct.ST, %struct.ST* %2, i32 0, i32 1
store double 2.000000e+00, double* %7, align 8
    /* rt.c */
%8 = getelementptr inbounds %struct.RT, %struct.RT* %1, i32 0, i32 2
    /* &c[3] */
%9 = getelementptr inbounds [5 x i8], [5 x i8]* %8, i64 0, i64 3
    /* rt.a */
%10 = getelementptr inbounds %struct.RT, %struct.RT* %1, i32 0, i32 0
store i8* %9, i8** %10, align 8
    /* rt.b */
%11 = getelementptr inbounds %struct.RT, %struct.RT* %1, i32 0, i32 1
%12 = getelementptr inbounds [10 x [20 x i32]], [10 x [20 x i32]]* %11, i64 0, i64 5
%13 = getelementptr inbounds [20 x i32], [20 x i32]* %12, i64 0, i64 5
store i32 4, i32* %13, align 4
    /* rt.c */
%14 = getelementptr inbounds %struct.RT, %struct.RT* %1, i32 0, i32 2
%15 = getelementptr inbounds [5 x i8], [5 x i8]* %14, i64 0, i64 3
store i8 99, i8* %15, align 1

数组中的索引在结构体中依然适用
在使用结构体之前，需要先找到对应的结构体条目

Call 指令用于调用一个函数：

1 2	<result> = [tail \| musttail \| notail ] call [fast-math flags] [cconv] [ret attrs] [addrspace(<num>)]<ty>\|<fnty> <fnptrval>(<function args>) [fn attrs] [ operand bundles ]

tail 和 musttail 标记指示优化器应执行尾(tail)调用优化，notail 标记表示优化器不应该向调用添加 tail 或 musttail 标记，它用于防止对调用执行尾调用优化
- tail 标记是可以忽略的提示
- musttail 标记意味着该调用必须经过尾(tail)调用优化，以使程序正确
- musttail 标记提供以下保证：
  - 如果该调用是调用图中的递归循环的一部分，则该调用将不会导致堆栈无限增长
  - 具有 inalloca 或 preallocated 属性的参数将就地转发
  - 如果 musttail 调用出现在带有 thunk 属性的函数中，并且调用者和被调用者都具有可变参数，那么寄存器或内存中的任何非原型参数都将转发给被调用者，类似地，被调用者的返回值返回给调用者的调用者，即使使用了空返回类型
fast-math flags 标记表示调用有一个或多个 fast-math flags (高级结构里面有这个的介绍)，这些 fast-math flags 是用于启用不安全的浮点优化的优化提示，fast-math flags 仅对返回浮点标量或向量类型、浮点标量或向量数组(嵌套到任何深度)类型的调用有效
cconv 标记指示调用应该使用哪种调用约定(高级结构里面有调用约定的介绍)，如果没有指定，调用默认使用C调用约定，该调用约定必须匹配目标函数的调用约定，否则行为是未定义的
ret attrs 列出返回值的参数属性(高级结构里面有参数属性的介绍)，这里只有 zeroext、signext 和 inreg 属性有效 addrspace(<num>) 属性可用于指示被调用函数的地址空间，如果未指定，将使用 data layout (高级结构里面有这个的介绍，翻译成了数据布局)字符串中的程序地址空间
<ty> 是调用指令本身的类型，也是返回值的类型，没有返回值的函数被标记为 void
<fnty> 是被调用函数的签名，参数类型必须匹配此签名所隐含的类型，如果函数不是可变参数，则可以省略此类型
fnptrval 是一个 LLVM 值，包含要调用的函数的指针（定义一个函数时的函数名），在大多数情况下，这是一个直接的函数调用，但是间接调用则尽可能调用任意指向函数值的指针
function args 是函数参数列表，有参数类型和参数属性，所有的参数都是 first class type，如果函数签名表明函数接受可变数量的参数，则可以指定额外的参数
fn attrs 函数属性
operand bundles 操作数集

案例：函数调用

#include <stdio.h>
#include <unistd.h>

int main() {
	char name[0x10];
	read(0,name,0x10);
	write(1,name,0x10);
	printf("bye\n");
}

%1 = alloca [16 x i8], align 16
%2 = getelementptr inbounds [16 x i8], [16 x i8]* %1, i64 0, i64 0
%3 = call i64 @read(i32 0, i8* %2, i64 16)
%4 = getelementptr inbounds [16 x i8], [16 x i8]* %1, i64 0, i64 0
%5 = call i64 @write(i32 1, i8* %4, i64 16)
%6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))

LLVM PASS

LLVM 的优化和转换工作就是由多个 pass 来一起完成的，类似流水线操作一样，每个 pass 完成特定的优化工作，所有的 pass 大致可以分为两类：

分析 (analysis) 和转换分析类的 pass 以提供信息为主
转换类 (transform) 的 pass 优化中间代码

LLVM 分三个阶段，对于不同的语言它都提供了同一种中间表示：

前端可以使用不同的编译工具对代码文件做词法分析以形成抽象语法树（AST），然后将分析好的代码转换成 LLVM 的中间表示 IR（intermediate representation）
中间部分的优化器只对中间表示 IR 进行操作，通过一系列的 pass 对 IR 做优化
后端负责将优化好的 IR 解释成对应平台的机器码

LLVM 的优点在于：中间表示 IR 代码编写良好，而且不同的前端语言最终都转换成同一种的 IR

计算机科学领域的任何问题都可以通过增加一个间接的中间层来解决

所以 LLVM 就是在执行优化操作之前，先把代码转化为 LLVM 的中间表示 IR，然后对 IR 进行优化，最后交给后端翻译为机器码

#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
using namespace llvm; /* 使用的函数在llvm命名空间中 */
 
namespace { /* 声明匿名空间,使得被声明的内容仅在该文件内部可见 */
  struct Hello : public FunctionPass /* 使用struct声明,表示默认共有继承
  FunctionPass 一次操作一个函数 */
  {
    static char ID; /* 声明了LLVM用于标识传递的传递标识符,这允许LLVM避免使用昂贵的C++运行时信息 */
    Hello() : FunctionPass(ID) {} /* 构造函数,调用父函数FunctionPass的构造函数进行初始化 */
    bool runOnFunction(Function &F) override /* 重写runOnFunction入口函数 */
    {
      errs() << "Hello: ";
      errs().write_escaped(F.getName()) << '\n';
      std::map<std::string, int> opCodeMap;
      int BBsize=0;
      int opsize=0;
      for(Function::iterator bbit=F.begin();bbit!=F.end();bbit++)
      {
        BBsize++;
        for(BasicBlock::iterator opit=bbit->begin();opit!=bbit->end();opit++)
        {
          opsize++;
          std::string opName(opit->getOpcodeName());
          std::map<std::string,int>::iterator itindex=opCodeMap.find(opName);
          if(itindex!=opCodeMap.end())opCodeMap[opName]++;
          else opCodeMap[opName]=1;
        }
      }
      errs().write_escaped(F.getName())<<" has "<<BBsize<<"  BasicBlocks   and "<<opsize<<"   opcode";
      for(auto it : opCodeMap)errs() <<"   function totally use  "<<it.first <<" "<<it.second <<"times \n";
      return false;
    }
  };
}
 
char Hello::ID = 0; /* 初始化pass ID(LLVM使用ID的地址来识别通行证) */
 
// Register for opt
static RegisterPass<Hello> X("hello", "Hello World Pass",false); 
/* 注册类Hello,给它命令行参数hello,和名字Hello World Pass
最后两个参数描述它的行为:
如果一个pass在没有修改它的情况下遍历CFG,那么第三个参数设置为true
如果传递是分析传递(例如:支配树传递),则true作为第四个参数提供 */
 
// Register for clang
static RegisterStandardPasses Y(PassManagerBuilder::EP_EarlyAsPossible,
  [](const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) {
    PM.add(new Hello());
  });

编译代码为：

1	clang `llvm-config --cxxflags` -Wl,-znodelete -fno-rtti -fPIC -shared Hello.cpp -o LLVMHello.so `llvm-config --ldflags`

LLVM Value

在 LLVM 中，Value 类是所有程序计算出的值的类（如：Arguments，Instructions， Functions 等）的基类，Value类的继承树如下：

在 LLVM 中一切皆 Value：

Value 类是 LLVM 中非常重要的基类
输入程序流中的 “变量/常量/表达式/符号” 都可以被视作一个 Value

Value 类包含多个成员，这里先介绍最重要的三个成员：VTy，UseList，SubclassID

VTy：
- 一个 Value 必然具有一个类型 Type
- VTy 用来记录这个 Value 的Type
UseList：
- LLVM 引入了 Use 类并在 Value 中添加一个 UseList 用来跟踪并记录 Value 的使用者
- 虽然名为 UseList 但只是一个 Use 类的指针
SubclassID：
- 这是一个 const 值，用来指示这个 Value 的子类型
- 其用于 isa<> 与 dyn_cast<> 的判断

1个具体的值，在IR中可能会被多处引用（比如：函数的入参会被函数中的多条指令引用）

在 Value 类中保存了一个 Users 列表（UseList）跟踪这种引用关系，这个被称为 def-use chain，可以使用如下的方法获取 Value 的 Uses：

Value::use_iterator // use-list的迭代器
Value::const_use_iterator // use-list的只读迭代器
unsigned use_size() // 返回value的引用次数
bool use_empty() // 如果use-list为NULL则返回true
use_iterator use_begin() // 获取迭代器的开头
use_iterator use_end() // 获取迭代器的末端
User *use_back() // 获取use-list的最后一个元素

使用案例：

bool runOnFunction(Function &Fun) override 
{
    errs() << "============[start]============>>\n";
    Function *F = &Fun;
    for (Value::use_iterator U = F->use_begin(), e = F->use_end(); U != e; ++U) {
        if (Instruction *Inst = dyn_cast<Instruction>(&*(U->getUser()))) {
            errs() << "F is used in instruction:\n";
            errs() << *Inst << "\n";
        }
    }
    errs() << "=============[end]=============>>\n";
    return false;
}

#include <stdio.h>
#include <unistd.h>

void fun1(){
}

void fun2(){
	fun1();
}

void fun3(){
	fun1();
	fun2();
}

void fun4(){
	fun1();
	fun2();
	fun3();
}

int main() {
	fun1();
	fun2();
	fun3();
	fun4();
	return 0;
}

测试结果：

============[start]============>> /* fun1的调用者 */
F is used in instruction:
  call void @fun1() /* fun2中调用fun1 */
F is used in instruction:
  call void @fun1() /* fun3中调用fun1 */
F is used in instruction:
  call void @fun1() /* fun4中调用fun1 */
F is used in instruction:
  call void @fun1() /* main中调用fun1 */
=============[end]=============>>
============[start]============>>
F is used in instruction:
  call void @fun2()
F is used in instruction:
  call void @fun2()
F is used in instruction:
  call void @fun2()
=============[end]=============>>
============[start]============>>
F is used in instruction:
  call void @fun3()
F is used in instruction:
  call void @fun3()
=============[end]=============>>
============[start]============>>
F is used in instruction:
  call void @fun4()
=============[end]=============>>
============[start]============>>
=============[end]=============>>

小结：

最近学编译原理，顺便把 LLVM 捡起来