以识别 a = 10.01;
为例,阐述语法分析的构造过程
由a = 10.01得知,语法构成为:
SentenceList->Sentence->Exp SEMI->ID ASSIGNOP FLOAT SEMI
因此,需要储存结构如下
typedef struct ASTNode {int kind; //保存结点类型union {char type_id[33]; //由标识符生成的叶结点int type_int; //由整常数生成的叶结点float type_float; //由浮点常数生成的叶结点};union {//第1指针域struct ASTNode *Sentence; //句子struct ASTNode *Exp1; //表达式1struct ASTNode *ID; //标识符};union {//第2指针域struct ASTNode *SentenceList; //句子集struct ASTNode *Exp2; //表达式2};int type; //用以标识表达式结点的类型int pos; //语法单位所在位置行号
} ASTNode;
%union {int type_int;float type_float;char type_id[32];struct ASTNode *ptr;
};
在Program ->SentenceList->Sentence->Exp SEMI->ID ASSIGNOP FLOAT SEMI
中得知
%type program SentenceList Sentence Exp
%token ID /*指定ID的语义值是type_id*/
%token FLOAT /*指定FLOAT的语义值是type_float*/
符号标识符有SEMI ASSIGNOP
,
识别标识符(用于识别该文法类型)有SentenceList_ Sentence_
%token SentenceList_ Sentence_ SEMI ASSIGNOP
%option yylineno. //行号自动加1
定义段:
id [A-Za-z][A-Za-z0-9]* floatconst [0-9]*\.?[0-9]*
规则段:
%%"=" {strcpy(yylval.type_id, yytext);return ASSIGNOP;}
{id} {strcpy(yylval.type_id, yytext); return ID;}
";" {return SEMI;}
{floatconst} {yylval.type_float=atof(yytext); return FLOAT;}[ \r\t] {}
[\n] {}
. {printf("Error type A :Mysterious character \"%s\"\n\t at Line %d\n",yytext,yylineno);}
%%
注:program SentenceList Sentence Exp
的类型为struct ASTNode *
。
在使用时,可用$$=(ASTNode *)malloc(sizeof(ASTNode))
;创建内存。
使用$$->kind,$$->Sentence
创造节点属性。
program: SentenceList { display($1,0);}; //显示语法树,语义分析SentenceList: {$$=NULL;} | Sentence SentenceList {$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = SentenceList_;$$->pos=yylineno;$$->Sentence=$1;$$->SentenceList=$2;};
Sentence: Exp SEMI{//表达式$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = Sentence_;$$->pos=yylineno;$$->Exp1=$1;};Exp : Exp ASSIGNOP Exp{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = ASSIGNOP;$$->pos=yylineno;$$->Exp1=$1;$$->Exp2=$3;strcpy($$->type_id,"=");}| ID{$$=(ASTNode *)malloc(sizeof(ASTNode)); $$->kind=ID;
$$->pos=yylineno; strcpy($$->type_id,$1);}| FLOAT{$$=(ASTNode *)malloc(sizeof(ASTNode)); $$->kind=FLOAT;
$$->pos=yylineno; $$->type=FLOAT; $$->type_float=$1;};
int main(int argc, char *argv[]){yyin=fopen(argv[1],"r");if (!yyin) return 0;yylineno=1;yyparse();return 0;}
#include
void yyerror(const char* fmt, ...)
{va_list ap;va_start(ap, fmt);fprintf(stderr, "Grammar Error at Line %d Column %d: ", yylloc.first_line,yylloc.first_column);vfprintf(stderr, fmt, ap);fprintf(stderr, ".\n");
}
void display(struct ASTNode *T,int indent)
{//对抽象语法树的先根遍历int i=1;struct ASTNode *T0;if (T)
{switch (T->kind) {case SentenceList_: display(T->Sentence, indent);display(T->SentenceList, indent);break;case Sentence_:printf_(indent);printf("表达式语句:");display(T->Exp1, indent + 4);break;case ID:printf_(indent);printf("变量:%s",T->type_id);break;case FLOAT:printf_(indent);printf("浮点常量:%.6f",T->type_float);break;case ASSIGNOP:printf_(indent);printf("%s",T->type_id);display(T->Exp1,indent+4);display(T->Exp2,indent+4);break;}}
}
IF LP Exp RP Sentence ELSE Sentence
WHILE LP Exp RP LC SentenceList RC
SCAN ID SEMI
PRINT Exp SEMI
等
使用bat语言进行命令行输入,代码文件名为lex.l
、parser.y
,生成文件parser.exe
。
@echo on
flex lex.l
bison -d parser.y
gcc -o parser lex.yy.c parser.tab.c ast.c
实现编译,编译文件为test.c
.
parser test.c
lex.l
%option yylineno
%{
#include "parser.tab.h"
#include "string.h"
#include "def.h"int yycolumn=1;int lastToken;%}id [A-Za-z][A-Za-z0-9]*
intconst [0-9]+
floatconst [0-9]*\.?[0-9]*([eE][-+]?[0-9]+)?%%
"int" {strcpy(yylval.type_id, yytext);return TYPE;}
"float" {strcpy(yylval.type_id, yytext);return TYPE;}"return" {return RETURN;}
"if" {return IF;}
"else" {return ELSE;}
"while" {return WHILE;}
"for" {return FOR;}
"scan" {return SCAN;}
"print" {return PRINT;}{id} {strcpy(yylval.type_id, yytext); return ID;}
";" {return SEMI;}
"," {return COMMA;}
">"|"<"|">="|"<="|"=="|"!=" {strcpy(yylval.type_id, yytext);return RELOP;}
"=" {strcpy(yylval.type_id, yytext);return ASSIGNOP;}
"++" {strcpy(yylval.type_id, yytext);return DPLUS;}
"+" {strcpy(yylval.type_id, yytext);return PLUS;}
"-" {strcpy(yylval.type_id, yytext);return MINUS;}
"*" {strcpy(yylval.type_id, yytext);return STAR;}
"/" {strcpy(yylval.type_id, yytext);return DIV;}
"&&" {return AND;}
"||" {return OR;}
"!" {return NOT;}
"(" {return LP;}
")" {return RP;}
"{" {return LC;}
"}" {return RC;}
{intconst} {yylval.type_int=atoi(yytext); return INT;}
{floatconst} {yylval.type_float=atof(yytext); return FLOAT;}
[ \r\t] {} [\n] {yycolumn=1;}
. {printf("Error type A :Mysterious character \"%s\"\n\t at Line %d\n",yytext,yylineno);}%%int yywrap()
{
return 1;
}
parser.y
%error-verbose
%locations
%{
#include "stdio.h"
#include "math.h"
#include "string.h"
#include "def.h"
extern int yylineno;
extern char *yytext;
extern FILE *yyin;
void yyerror(const char* fmt, ...);
void display(struct ASTNode *,int);
int yylex();
%}%union {int type_int;float type_float;char type_id[32];struct ASTNode *ptr;
};// %type 定义非终结符的语义值类型
%type program SentenceList Sentence Exp //% token 定义终结符的语义值类型
%token INT /*指定INT的语义值是type_int,有词法分析得到的数值*/
%token ID RELOP TYPE /*指定ID,RELOP 的语义值是type_id,有词法分析得到的标识符字符串*/%token FLOAT /*指定ID的语义值是type_id,有词法分析得到的标识符字符串*///GE GT LE LT NE LB RB
%token DPLUS LP RP LC RC SEMI COMMA /*用bison对该文件编译时,带参数-d,生成的exp.tab.h中给这些单词进行编码,可在lex.l中包含parser.tab.h使用这些单词种类码*/
%token SCAN PRINT PLUS MINUS STAR DIV ASSIGNOP AND OR NOT IF ELSE WHILE RETURN STRUCT FOR SWITCH CASE COLON DEFAULT
/*以下为接在上述token后依次编码的枚举常量,作为AST结点类型标记*/
%token SentenceList_ Sentence_ IF_ELSE UMINUS
//%token FUNC_CALL ARGS FUNCTION PARAM ARG CALL LABEL GOTO JLT JLE JGT JGE EQ NEQ%left ASSIGNOP
%left OR
%left AND
%left RELOP
%left PLUS MINUS
%left STAR DIV
%right UMINUS NOT DPLUS%nonassoc ELSE%%program: SentenceList { display($1,0);}; //显示语法树,语义分析SentenceList: {$$=NULL;} | Sentence SentenceList {$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = SentenceList_;$$->pos=yylineno;$$->Sentence=$1;$$->SentenceList=$2;//printf("SentenceList: ->Sentence SentenceList\n");};
Sentence: Exp SEMI{//表达式$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = Sentence_;$$->pos=yylineno;$$->Exp1=$1;//printf("Sentence: ->Exp SEMI\n");}| SCAN ID SEMI{//scan输入$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = SCAN;$$->pos=yylineno;strcpy($$->type_id,$2);//printf("Sentence:->SCAN ID\n");}| PRINT Exp SEMI{//print输出$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = PRINT;$$->pos=yylineno;$$->ID = $2;//printf("Sentence:->PRINT ID\n");}| IF LP Exp RP Sentence ELSE Sentence{//if-else语句$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = IF_ELSE;$$->pos=yylineno;$$->Exp1 = $3;$$->SentenceIF = $5;$$->SentenceElse = $7;//printf("Sentence:->IF LP Exp RP SentenceList ELSE SentenceList\n");}| WHILE LP Exp RP LC SentenceList RC{//While语句$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = WHILE;$$->pos=yylineno;$$->Exp1 = $3;$$->SentenceListWhile= $6;};
Exp : Exp ASSIGNOP Exp{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = ASSIGNOP;$$->pos=yylineno;$$->Exp1=$1;$$->Exp2=$3;strcpy($$->type_id,"=");//printf("Exp = Exp \n");}| MINUS Exp %prec UMINUS{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = UMINUS;$$->pos=yylineno;$$->Exp1=$2;strcpy($$->type_id,"-");//printf("-Exp \n");}| LP Exp RP{$$=$2;//printf("(Exp) \n");}| Exp RELOP Exp{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = RELOP;$$->pos=yylineno;$$->Exp1=$1;$$->Exp2=$3;strcpy($$->type_id,$2);//printf("Exp %s Exp\n",$2);}| Exp MINUS Exp{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = MINUS; $$->pos=yylineno;$$->Exp1=$1;$$->Exp2=$3;strcpy($$->type_id,"-");//printf("Exp - Exp\n");}| Exp PLUS Exp{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = PLUS; $$->pos=yylineno;$$->Exp1=$1;$$->Exp2=$3;strcpy($$->type_id,"+");//printf("Exp + Exp\n");}| Exp STAR Exp{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = STAR; $$->pos=yylineno;$$->Exp1=$1;$$->Exp2=$3;strcpy($$->type_id,"*");//printf("Exp * Exp\n");}| Exp DIV Exp{$$=(ASTNode *)malloc(sizeof(ASTNode));$$->kind = DIV; $$->pos=yylineno;$$->Exp1=$1;$$->Exp2=$3;strcpy($$->type_id,"/");//printf("Exp / Exp\n");}| ID{$$=(ASTNode *)malloc(sizeof(ASTNode)); $$->kind=ID;$$->pos=yylineno; strcpy($$->type_id,$1);//printf("Exp :->ID,ID = %s\n",$1);}| FLOAT{$$=(ASTNode *)malloc(sizeof(ASTNode)); $$->kind=FLOAT;$$->pos=yylineno; $$->type=FLOAT; $$->type_float=$1;//printf("FLOAT = %f\n",$1);}| INT{$$=(ASTNode *)malloc(sizeof(ASTNode)); $$->kind=INT;$$->pos=yylineno; $$->type=INT; $$->type_int=$1;//printf("INT = %d\n",$1);};
%%int main(int argc, char *argv[]){yyin=fopen(argv[1],"r");if (!yyin) return 0;yylineno=1;yyparse();return 0;}
#include
void yyerror(const char* fmt, ...)
{va_list ap;va_start(ap, fmt);fprintf(stderr, "Grammar Error at Line %d Column %d: ", yylloc.first_line,yylloc.first_column);vfprintf(stderr, fmt, ap);fprintf(stderr, ".\n");
}
def.h
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "stdarg.h"
#include "parser.tab.h"typedef struct ASTNode {//以下对结点属性定义没有考虑存储效率,只是简单地列出要用到的一些属性int kind;union {char type_id[33]; //由标识符生成的叶结点int type_int; //由整常数生成的叶结点float type_float; //由浮点常数生成的叶结点};union {//第1指针域struct ASTNode *Sentence;struct ASTNode *Exp1;struct ASTNode *ID; //标识符};union {//第2指针域struct ASTNode *SentenceList;struct ASTNode *Exp2; //表达式2struct ASTNode *SentenceIF;struct ASTNode *SentenceListWhile;};union {//第3指针域struct ASTNode* SentenceElse;};int type; //用以标识表达式结点的类型int pos; //语法单位所在位置行号} ASTNode;
ast.c
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "def.h"
#include "parser.tab.h"int Enter = 0;void printf_(int indent) {if (Enter == 1) {printf("\n");}else {Enter = 1;}if (indent != 0) {printf("%*c", indent, ' ');}
}void display(struct ASTNode *T,int indent)
{//对抽象语法树的先根遍历int i=1;struct ASTNode *T0;if (T){switch (T->kind) {case SentenceList_: display(T->Sentence, indent);display(T->SentenceList, indent);break;case Sentence_:printf_(indent);printf("表达式语句:");display(T->Exp1, indent + 4);break;case ID:printf_(indent);printf("变量:%s",T->type_id);break;case INT:printf_(indent);printf("整型常量:%d",T->type_int);break;case FLOAT:printf_(indent);printf("浮点常量:%.6f",T->type_float);break;case ASSIGNOP:case AND:case OR:case RELOP:case PLUS:case MINUS:case STAR:case DIV:printf_(indent);printf("%s",T->type_id);display(T->Exp1,indent+4);display(T->Exp2,indent+4);break;case UMINUS:printf_(indent);printf("单目%s", T->type_id);display(T->Exp1, indent + 4);break;case SCAN:printf_(indent);printf("输入变量:%s", T->type_id);break;case PRINT:printf_(indent);printf("输出表达式:");display(T->ID, indent + 4);break;case IF_ELSE:printf_(indent);printf("条件语句(if_then_else):");printf_(indent+4);printf("条件:");display(T->Exp1, indent + 8);printf_(indent+4);printf("if子句:");display(T->SentenceIF, indent + 8);printf_(indent+4);printf("else子句:");display(T->SentenceElse, indent + 8);break;case WHILE:printf_(indent);printf("循环语句:");printf_(indent + 4);printf("条件:");display(T->Exp1, indent + 8);printf_(indent + 4);printf("循环体:");display(T->SentenceListWhile, indent + 8);break;}}
}
测试文件test.c
a = 10.1;
scan b;
if (a > b)
max = a;
else max = b;
print max;i = 1;
sum = 0;
while (i <= 10)
{sum = sum + i;i = i + 1;
}
print sum;a = (1 + 3) * 2 / -(13.56 - 5) > 100 == 1;
test.bat
@echo on
flex lex.l
bison -d parser.y
gcc -o parser lex.yy.c parser.tab.c ast.c
编译成功
运行成功
熟悉了flex、bison的使用,联合使用,以及和其他文件(.c)的联合使用,学会了创建语法树,实现语法树的显示输出。