0%

Js-Go:antlr4+部分语义分析

这是一个基于 Go 的简单 JavaScript 解释器,词法分析和语法分析都使用了 antlr4

词法分析

词法分析使用了 antlr4,脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
lexer grammar JavaScriptLexer;

FUNCTION : 'function';
RETURN : 'return';
VAR : 'var';

IF : 'if';
ELSE : 'else';
WHILE : 'while';
FOR : 'for';
BREAK : 'break';
CONTINUE : 'continue';

TRUE: 'true';
FALSE: 'false';

LPAREN: '(';
RPAREN: ')';
LBRACE: '{';
RBRACE: '}';
LBRACKET: '[';
RBRACKET: ']';

NUMBER: [0-9]+('.'[0-9]+)?;
IDENTIFIER: [a-zA-Z] [a-zA-Z0-9]*;
STRING: '"' (~["\r\n] | '\\"')* '"';

COL : ':';
DOT : '.';
COMMA : ',';
SEMICOLON : ';';

ASSIGN : '=';
ADD: '+';
SUB: '-';
MUL: '*';
DIV: '/';
MOD: '%';

ADD_ASSIGN: '+=';
SUB_ASSIGN: '-=';
MUL_ASSIGN: '*=';
DIV_ASSIGN: '/=';
MOD_ASSIGN: '%=';

NOT: '!';
EQ: '==';
NEQ: '!=';
LT: '<';
GT: '>';
LTE: '<=';
GTE: '>=';

AND: '&&';
OR: '||';

WS: [ \t\r\n]+ -> skip;
COMMENT: '/*' .*? '*/' -> skip;
LINE_COMMENT: '//' ~[\r\n]* -> skip;

语法分析

语法分析使用了 antlr4,脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
parser grammar JavaScriptParser;

options { tokenVocab=JavaScriptLexer; }

num : NUMBER;
id : IDENTIFIER;
str: STRING;
arr: '[' str (',' str)* ']';
key : id ':' expr;
obj: '{' key (',' key)* '}';

funcdef: 'function' IDENTIFIER '(' paramlist? ')' block;
paramlist: param (',' param)*;
param: IDENTIFIER ('=' expr)?;

funcall: IDENTIFIER '(' exprlist? ')';
exprlist: expr (',' expr)*;

program : (global)* ;

global: funcdef
| stmg
;

expr: expr ('*' | '/' | '+' | '-') expr
| expr ('<' | '>' | '==' | '<=' | '>=' | '!=') expr
| expr ('&&' | '||') expr
| '!' expr
| '-' expr
| '(' expr ')'
| id '.' funcall
| id '.' id
| funcall '.' funcall
| funcall '.' id
| funcall
| num
| id
| str
| arr
| obj
;

stmg: stm ';'?;
stm: expr ('=' | '+=' | '-=' | '*=' | '/=') expr
| 'var' IDENTIFIER ('=' expr)?
| 'return' expr
| 'break'
| 'continue'
| if
| while
| block
| expr
;

if: 'if' '(' expr ')' block ('else' block)?;
while: 'while' '(' expr ')' block;
block: '{' (stm ';'?)* '}';

语法分析

语法分析主要使用了 antlr4 中的 listener 模块,主要分析全局的函数和语句:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package listener

import (
"fmt"

Jsp "github.com/klang/js-go/JavaScript"
)

type MyListener struct {
*Jsp.BaseJavaScriptParserListener
}

func (l *MyListener) EnterProgram(ctx *Jsp.ProgramContext) {
vl = NewSymTable()
}

func (l *MyListener) EnterFuncdef(ctx *Jsp.FuncdefContext) {
var args []string
name := fmt.Sprintf("%v", ctx.GetChild(1))
for _, v := range ctx.GetChild(3).GetChildren() {
if v.GetChild(0) != nil {
args = append(args, fmt.Sprintf("%v", v.GetChild(0)))
}
}
vl.addFunc(name, args, ctx)
vl.showFuncAll()
}

func (l *MyListener) EnterStmg(ctx *Jsp.StmgContext) {
handStm(ctx.GetChild(0).(*Jsp.StmContext))
}
  • 对于函数只是简单的将其记录在 SymTable 中
  • 对于语句则需要进行详细的处理

由于 JavaScript 是解释性语言,我这里使用了一个动态变化的 SymTable 来实时记录各个变量的数据,其结构如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
type SymTable struct {
Variants []Variant
Fuctions []Fuction
InFuctions inFunction
}

type Variant struct {
name string
value string
typ int
}

type Fuction struct {
name string
args []string
fctx *Jsp.FuncdefContext
}

type inFunction struct {
log func(string) int
}

对应的辅助函数如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
func NewSymTable() *SymTable {
sym := &SymTable{
Variants: []Variant{},
Fuctions: []Fuction{},
InFuctions: inFunction{},
}
sym.initBuildInFunc()
return sym
}

func NewVariant(name string, value string, typ int) *Variant {
return &Variant{name, value, typ}
}

func (vl *SymTable) showVarAll() {
fmt.Println("|-----------------SymTable-----------------------|")
fmt.Println("|name \t\t type \t\t value \t\t |")
fmt.Println("|------------------------------------------------|")
for _, v := range vl.Variants {
fmt.Printf("|%-15v %-15v %-15v |\n", v.name, v.typ, v.value)
}
fmt.Printf("|------------------------------------------------|\n\n")
}

func (vl *SymTable) addVar(name string, value string, typ int) {
vl.Variants = append(vl.Variants, *NewVariant(name, value, typ))
}

func (vl *SymTable) getVarByName(name string) (string, int) {
for _, v := range vl.Variants {
if v.name == name {
return v.value, v.typ
}
}
return "", -1
}

func (vl *SymTable) getvarByIndex(index int) (string, int) {
return vl.Variants[index].value, vl.Variants[index].typ
}

func (vl *SymTable) getVarlen() int {
return len(vl.Variants)
}

func (vl *SymTable) delVar() {
vl.Variants = vl.Variants[:len(vl.Variants)-1]
}

func NewFunction(name string, args []string, ctx *Jsp.FuncdefContext) *Fuction {
return &Fuction{name, args, ctx}
}

func (vl *SymTable) addFunc(name string, args []string, ctx *Jsp.FuncdefContext) {
vl.Fuctions = append(vl.Fuctions, *NewFunction(name, args, ctx))
}

func (vl *SymTable) getFunc(name string) ([]string, *Jsp.FuncdefContext) {
for _, f := range vl.Fuctions {
if f.name == name {
return f.args, f.fctx
}
}
return nil, nil
}

func (vl *SymTable) showFuncAll() {
fmt.Println("|----------Function--------------|")
fmt.Println("|name \t\t args \t\t |")
for _, v := range vl.Fuctions {
fmt.Printf("|%-15v[ ", v.name)
for _, a := range v.args {
fmt.Printf("%v ", a)
}
fmt.Println("]")
}
fmt.Printf("|--------------------------------|\n\n")
}

func (vl *SymTable) initBuildInFunc() {
vl.InFuctions.log = func(msg string) int {
fmt.Printf("[+]log: %v\n", msg)
return 0
}
}

func (vl *SymTable) callInFunc(name string, args []Variant) string {
if name == "log" {
vl.InFuctions.log(args[0].value)
}
return ""
}

负责处理语句的函数为 handStm:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
func handStm(stm *Jsp.StmContext) (string, int, bool) {
str_1 := fmt.Sprintf("%v", stm.GetChild(0))
if str_1 == "return" {
value, typ := handExpr(stm.GetChild(1).(*Jsp.ExprContext))
return value, typ, true
}
if str_1 == "var" {
name := fmt.Sprintf("%v", stm.GetChild(1))
vaule, typ := handExpr(stm.GetChild(3).(*Jsp.ExprContext))
vl.addVar(name, vaule, typ)
vl.showVarAll()
}

if exp, ok := stm.GetChild(0).(*Jsp.ExprContext); ok {
handExpr(exp)
}

return "", -1, false
}
  • 目前只支持 return 语句,var 语句和表达式语句

其中最核心的部分就是处理表达式的函数 handExpr:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
func handExpr(exp *Jsp.ExprContext) (value string, typ int) {
if exp.GetChildCount() == 3 {
op := fmt.Sprintf("%v", exp.GetChild(1))
exp1, typ1 := handExpr(exp.GetChild(0).(*Jsp.ExprContext))
exp2, typ2 := handExpr(exp.GetChild(2).(*Jsp.ExprContext))

if op == "+" {
if typ1 == Jsp.JavaScriptParserNUMBER && typ2 == Jsp.JavaScriptParserNUMBER {
num1, _ := strconv.Atoi(exp1)
num2, _ := strconv.Atoi(exp2)
return strconv.Itoa(num1 + num2), Jsp.JavaScriptParserNUMBER
} else {
if exp1[0] == '"' {
exp1 = exp1[1 : len(exp1)-1]
}
if exp2[0] == '"' {
exp2 = exp2[1 : len(exp2)-1]
}
return "\"" + exp1 + exp2 + "\"", Jsp.JavaScriptLexerSTRING
}
}
if op == "-" {
if typ1 == Jsp.JavaScriptParserNUMBER && typ2 == Jsp.JavaScriptParserNUMBER {
num1, _ := strconv.Atoi(exp1)
num2, _ := strconv.Atoi(exp2)
return strconv.Itoa(num1 - num2), Jsp.JavaScriptParserNUMBER
} else {
return "", -1
}
}
if op == "*" {
if typ1 == Jsp.JavaScriptParserNUMBER && typ2 == Jsp.JavaScriptParserNUMBER {
num1, _ := strconv.Atoi(exp1)
num2, _ := strconv.Atoi(exp2)
return strconv.Itoa(num1 * num2), Jsp.JavaScriptParserNUMBER
} else {
return "", -1
}
}
if op == "/" {
if typ1 == Jsp.JavaScriptParserNUMBER && typ2 == Jsp.JavaScriptParserNUMBER {
num1, _ := strconv.Atoi(exp1)
num2, _ := strconv.Atoi(exp2)
return strconv.Itoa(num1 / num2), Jsp.JavaScriptParserNUMBER
} else {
return "", -1
}
}
}

if exp.GetChildCount() == 2 {
op := fmt.Sprintf("%v", exp.GetChild(0))
exp1, typ1 := handExpr(exp.GetChild(1).(*Jsp.ExprContext))

if op == "-" {
if typ1 == Jsp.JavaScriptParserNUMBER {
num1, _ := strconv.Atoi(exp1)
return strconv.Itoa(-num1), Jsp.JavaScriptParserNUMBER
} else {
return "", -1
}
}
}

if exp.GetChildCount() == 1 {
if exp.GetChild(0).GetChildCount() > 1 {
return handExpr_funcall(exp.GetChild(0).(*Jsp.FuncallContext))
} else {
renum := regexp.MustCompile(`^\d+$`)
resym := regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`)
str := fmt.Sprintf("%v", exp.GetChild(0).GetChild(0))
if str[0] == '"' {
return str, Jsp.JavaScriptLexerSTRING
}
if renum.MatchString(str) {
return str, Jsp.JavaScriptLexerNUMBER
}
if resym.MatchString(str) {
value, typ := vl.getVarByName(str)
return value, typ
}
}
}

return "", -1
}
  • 其中包括对二元表达式和一元表达式的处理

在表达式语句中有一个特殊的部分需要单独处理,那就是函数调用:

1
2
3
4
5
6
7
8
9
10
11
12
13
func handExpr_funcall(call *Jsp.FuncallContext) (string, int) {
var args []Variant = []Variant{}

name := fmt.Sprintf("%v", call.GetChild(0))

for _, c := range call.GetChild(2).GetChildren() {
if c.GetChild(0) != nil {
value, typ := handExpr(c.(*Jsp.ExprContext))
args = append(args, Variant{"unknown", value, typ})
}
}
return handFuncdef(name, args)
}
  • 该函数会先记录函数调用的参数,然后再进入函数定义中进行进一步的处理

处理函数定义的代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
func handFuncdef(name string, args []Variant) (string, int) {
var revalue string
var retyp int
params, ctx := vl.getFunc(name)
if ctx == nil {
return vl.callInFunc(name, args), -1
}

if len(params) != len(args) {
fmt.Println("errorrrr")
} else {
for i, param := range params {
value, typ := args[i].value, args[i].typ
vl.addVar(param, value, typ)
}
}

for _, c := range ctx.GetChild(ctx.GetChildCount() - 1).GetChildren() {
if c.GetChild(0) != nil {
value, typ, ok := handStm(c.(*Jsp.StmContext))
revalue, retyp = value, typ
if ok {
break
}
}
}

for i := 0; i < len(params); i++ {
vl.delVar()
}

return revalue, retyp
}
  • 先查找记录在 SymTable 中的函数,如果没有则查找内置函数