自己动手开发编译器(十一)语义分析

自己动手开发编译器(十一)语义分析 上回我们已经用VBF的Parsers.Combinators库生成了miniSharp的语法分析器并且能够将miniSharp的源代码翻译成抽象语法树AST。这一回我们要继续进行下一步——语义分析。就目前大家接触的编程语言如C#、VB、C来说语义分析是编译器前端最复杂的部分。因为这些编程语言的语义都非常复杂。语义分析不像之前词法分析、语法分析那样有一些特定的工具来帮助。这一部分通常都是要纯手工写代码来完成。我们的miniSharp语义因为已经高度简化它的语义分析可以说比C#要容易一个数量级。我们只会在选定方法重载的时候见识一下C#复杂语义的冰山一角。所谓编程语言语义就是这段代码实际的含义。编程语言的代码必须有绝对明确的含义这样人们才能让程序做自己想做的事情。比如最简单的一行代码a 1; 它的语义是“将32位整型常量存储到变量a中”。首先我们对“1”有明确的定义它是32位有符号整型字面量这里“32位有符号整型”就是表达式“1”的类型。其次这句话成为合法的编程语言32位整型常量必须能够隐式转换为a的类型。假设a就是int型变量那么这条语句就直接将1存储到a所在内存里。如果a是浮点数类型的那么这句话就隐含着将整型常量1转换为浮点类型的步骤。在语义分析中类型检查是贯穿始终的一个步骤。像miniSharp这样的静态类型语言类型检查通常要做到判定每一个表达式的声明类型判定每一个字段、形式参数、变量声明的类型判断每一次赋值、传参数时是否存在合法的隐式类型转换判断一元和二元运算符左右两侧的类型是否合法比如不就不能在bool和int之间进行将所有要发生的隐式类型转换明确化要进行以上操作需要一个表存储所有已知的类型。如果引用了外部程序集则也需要将外部程序集中的类型信息放到表中。类型信息包括类型的名字、父类如果有的话、成员以及相互隐式转换的规则。我们用如下的类来表示一个miniSharp自定义类型span stylecolor:#000000span stylecolor:#0000ffpublic class /spanspan stylecolor:#2b91afCodeClassType /span: span stylecolor:#2b91afTypeBase /span{ span stylecolor:#0000ffpublic bool /spanIsStatic { span stylecolor:#0000ffget/span; span stylecolor:#0000ffset/span; } span stylecolor:#0000ffpublic /spanspan stylecolor:#2b91afCodeClassType /spanBaseType { span stylecolor:#0000ffget/span; span stylecolor:#0000ffset/span; } span stylecolor:#0000ffpublic /spanspan stylecolor:#2b91afCollection/spanspan stylecolor:#2b91afMethod/span Methods { span stylecolor:#0000ffget/span; span stylecolor:#0000ffprivate set/span; } span stylecolor:#0000ffpublic /spanspan stylecolor:#2b91afCollection/spanspan stylecolor:#2b91afMethod/span StaticMethods { span stylecolor:#0000ffget/span; span stylecolor:#0000ffprivate set/span; } span stylecolor:#0000ffpublic /spanspan stylecolor:#2b91afVariableCollection/spanspan stylecolor:#2b91afField/span Fields { span stylecolor:#0000ffget/span; span stylecolor:#0000ffprivate set/span; } span stylecolor:#0000ffpublic /spanCodeClassType() { Methods span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afCollection/spanspan stylecolor:#2b91afMethod/span(); StaticMethods span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afCollection/spanspan stylecolor:#2b91afMethod/span(); Fields span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afVariableCollection/spanspan stylecolor:#2b91afField/span(); } span stylecolor:#0000ffpublic override bool /spanIsAssignableFrom(span stylecolor:#2b91afTypeBase /spantype) { span stylecolor:#2b91afCodeClassType /spanotherClassType type span stylecolor:#0000ffas /spanspan stylecolor:#2b91afCodeClassType/span; span stylecolor:#0000ffif /span(otherClassType span stylecolor:#0000ffnull/span) { span stylecolor:#0000ffreturn false/span; } span stylecolor:#0000ffif /span(otherClassType span stylecolor:#0000ffthis/span) { span stylecolor:#0000ffreturn true/span; } span stylecolor:#0000ffelse /span{ span stylecolor:#0000ffreturn /spanIsAssignableFrom(otherClassType.BaseType); } } } /spanminiSharp不支持显式类型转换而唯一支持的隐式类型转换是子类引用到父类引用的转换。除了自定义类型之外我们还需要表示数组类型和基元类型int和bool简陋地如下处理span stylecolor:#000000span stylecolor:#0000ffpublic class /spanspan stylecolor:#2b91afPrimaryType /span: span stylecolor:#2b91afTypeBase /span{ span stylecolor:#0000ffpublic static readonly /spanspan stylecolor:#2b91afPrimaryType /spanInt span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afPrimaryType/span() { Name span stylecolor:#a31515int /span}; span stylecolor:#0000ffpublic static readonly /spanspan stylecolor:#2b91afPrimaryType /spanBoolean span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afPrimaryType/span() { Name span stylecolor:#a31515bool /span}; span stylecolor:#0000ffpublic static readonly /spanspan stylecolor:#2b91afPrimaryType /spanString span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afPrimaryType/span() { Name span stylecolor:#a31515string /span}; span stylecolor:#0000ffpublic static readonly /spanspan stylecolor:#2b91afPrimaryType /spanVoid span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afPrimaryType/span() { Name span stylecolor:#a31515void /span}; span stylecolor:#0000ffpublic static readonly /spanspan stylecolor:#2b91afPrimaryType /spanUnknown span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afPrimaryType/span() { Name span stylecolor:#0000ffnull /span}; span stylecolor:#0000ffpublic override bool /spanIsAssignableFrom(span stylecolor:#2b91afTypeBase /spantype) { span stylecolor:#0000ffif /span(span stylecolor:#0000ffthis /span type) { span stylecolor:#0000ffreturn true/span; } span stylecolor:#0000ffelse /span{ span stylecolor:#0000ffreturn false/span; } } } /spanspan stylecolor:#000000span stylecolor:#0000ffpublic class /spanspan stylecolor:#2b91afArrayType /span: span stylecolor:#2b91afTypeBase /span{ span stylecolor:#0000ffpublic /spanspan stylecolor:#2b91afTypeBase /spanElementType { span stylecolor:#0000ffget/span; span stylecolor:#0000ffset/span; } span stylecolor:#0000ffpublic static readonly /spanspan stylecolor:#2b91afArrayType /spanIntArray span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afArrayType/span() { Name span stylecolor:#a31515int[]/span, ElementType span stylecolor:#2b91afPrimaryType/span.Int }; span stylecolor:#0000ffpublic static readonly /spanspan stylecolor:#2b91afArrayType /spanStrArray span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afArrayType/span() { Name span stylecolor:#a31515string[]/span, ElementType span stylecolor:#2b91afPrimaryType/span.String }; span stylecolor:#0000ffpublic override bool /spanIsAssignableFrom(span stylecolor:#2b91afTypeBase /spantype) { span stylecolor:#2b91afCodeClassType /spanelementClassType ElementType span stylecolor:#0000ffas /spanspan stylecolor:#2b91afCodeClassType/span; span stylecolor:#2b91afArrayType /spanarrayType type span stylecolor:#0000ffas /spanspan stylecolor:#2b91afArrayType/span; span stylecolor:#0000ffif /span(elementClassType ! span stylecolor:#0000ffnull /span arrayType ! span stylecolor:#0000ffnull/span) { span stylecolor:#0000ffreturn /spanelementClassType.IsAssignableFrom(arrayType.ElementType); } span stylecolor:#0000ffreturn false/span; } } /span实际上C#会将int和bool直接映射到System.Int32以及System.Boolean结构体。我们的miniSharp不仅仅要翻译成托管代码所以并没有采用这个规定但在生成IL的时候仍然做这样的特殊处理。最后因为miniSharp并不支持引用外部程序集所以我也没有将类型表独立出来而是将类型信息存储在每个表示class的语法树节点上以方便语义分析时访问。语义分析的第二个主要任务是找到所有标识符的定义。标识符在miniSharp里主要有类名、字段名、方法名、参数名和本地变量名。遇到每个名称我们必须解析出标识符表示的类、方法或字段的定义。比如下面这段代码span stylecolor:#000000span stylecolor:#0000ffclass /spanspan stylecolor:#2b91afMyClass /span{ span stylecolor:#0000ffint /spana; span stylecolor:#0000ffpublic int /spanFoo() { span stylecolor:#0000ffint /spana; a 1; span stylecolor:#0000ffif /span(span stylecolor:#0000ffthis/span.a 0) { span stylecolor:#0000ffreturn this/span.a; } span stylecolor:#0000ffelse /span{ span stylecolor:#0000ffreturn /spana; } } } /span有一个字段叫a在过程Foo中又定义了一个同名局部变量a。那么过程内的局部变量a就会覆盖字段的a这句话的意思是标识符“a”在Foo中将表示局部变量而不是同名字段。在语义分析里我们遇到每一个可能代表变量的标识符时都要按照一套预先设定的规则来寻找其定义。比如按照如下顺序搜索当前的本地符号表其中包括当前作用域中定义的本地变量和方法参数搜索当前类的字段如果类的字段不仅仅是private的话如果类还允许定义属性的话这里的规则还要多好几条。所幸miniSharp只用以上两条就够了。我们看看怎么表示本地符号表。span stylecolor:#000000span stylecolor:#0000ffpublic class /spanspan stylecolor:#2b91afVariableCollection/spanT : span stylecolor:#2b91afKeyedCollection/spanspan stylecolor:#0000ffstring/span, T span stylecolor:#0000ffwhere /spanT : span stylecolor:#2b91afVariableInfo /span{ span stylecolor:#0000ffprivate /spanspan stylecolor:#2b91afStack/spanspan stylecolor:#2b91afHashSet/spanspan stylecolor:#0000ffstring/span m_levelStack; span stylecolor:#0000ffpublic int /spanm_Levels; span stylecolor:#0000ffpublic /spanVariableCollection() { m_Levels 0; m_levelStack span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afStack/spanspan stylecolor:#2b91afHashSet/spanspan stylecolor:#0000ffstring/span(); } span stylecolor:#0000ffprotected override string /spanGetKeyForItem(T item) { span stylecolor:#0000ffreturn /spanitem.Name; } span stylecolor:#0000ffpublic void /spanPushLevel() { m_levelStack.Push(span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afHashSet/spanspan stylecolor:#0000ffstring/span()); m_Levels; } span stylecolor:#0000ffpublic void /spanPopLevel() { span stylecolor:#0000ffif /span(m_Levels 0) { span stylecolor:#0000ffthrow new /spanspan stylecolor:#2b91afInvalidOperationException/span(); } span stylecolor:#0000ffvar /spankeysInLevel m_levelStack.Pop(); m_Levels--; span stylecolor:#0000ffforeach /span(span stylecolor:#0000ffvar /spankey span stylecolor:#0000ffin /spankeysInLevel) { Remove(key); } } span stylecolor:#0000ffprotected override void /spanInsertItem(span stylecolor:#0000ffint /spanindex, T item) { span stylecolor:#0000ffbase/span.InsertItem(index, item); span stylecolor:#0000ffif /span(m_Levels 0) { span stylecolor:#0000ffvar /spankeysInLevel m_levelStack.Peek(); keysInLevel.Add(GetKeyForItem(item)); } } } /span为了简便处理这里所用的数据结构都比较粗糙。但基本思想是使用一个Stack在进入一个新的作用域大括号包围的语句块时压入一个新的HashSet储存这一作用域内声明的变量。当作用域结束时弹出一个HashSet这个作用域内的变量就从表里删除了。所以miniSharp允许两个不互相嵌套的语句块内定义同名变量但不允许在同一个方法内的语句块内覆盖语句块外定义的变量或形式参数。接下来我们要讨论方法重载选取的问题。这是miniSharp中唯一一个稍微有些复杂性的语义。miniSharp允许同一个类多个方法具有相同的方法名只要他们的形式参数表的类型不完全一样即可。而判断一个方法调用表达式到底调用的是哪个方法一共分为以下几个步骤。第一步找到当前类中所有签名相符的方法。miniSharp和C#一样当前类中的方法具有比父类更高的优先级。而VB则采取当前类和父类相同优先级使用Overloads关键字时。所以miniSharp也要先在当前类中搜索合适的候选。第二个条件是签名相符它的定义是方法调用的表达式与候选方法的名称相同参数列表长度一致并且方法调用的表达式列表中的每一个表达式的类型都能隐式转换成候选方法参数表中对应位置参数的类型。稍微形式化一下就是方法F(T1, T2, T3,…,Tn)是调用表达式C(E1, E2, E3,… Em)的签名相符候选方法的条件是F.Name Cm n并且对所有i从1到n满足Ti.IsAssignableFrom(typeof(Ei))。第二步所有签名相符的候选方法中找到一个最佳候选。如果有两个候选方法P(P1, P2,…,Pn)和Q(Q1, Q2,…,Qn)那么我们说P比Q更佳当且仅当P的每一个参数类型都比Q的相应参数类型更好或至少一样好同时Q的每一个参数类型都不比P的相应参数类型更好。如果P和Q各自有一些参数类型比对方更好那么就视为P和Q条件一致无法做出判断有歧义。调用表达式列表项E所对应的候选方法参数类型TP比TQ更好意味着TP与typeof(E)相等但TQ与typeof(E)不相等或者TQ.IsAssignableFrom(TP)这意味着TP比TQ更“具体”一些。如果TP和TQ之间无法相互隐式转换或者两者是相同的类型则视为无法区分。如果在当前类中没有符合条件的候选则对父类重复以上步骤。真正C#的方法重载判断大体上也是这个步骤但还要更加复杂得多。因为C#还有param数组型参数可选参数命名参数泛型方法等语法。这里C#的Spec整整写了好几页纸来描述完整的规则。初看起来这段规则转换成代码很难写所以我采用了一种取巧的方法定义一个比较两个候选参数好坏的Comparer类然后用Order By的方式对候选参数进行排序。Comparer类如下span stylecolor:#000000span stylecolor:#0000ffpublic class /spanspan stylecolor:#2b91afMethodOverloadingComparer /span: span stylecolor:#2b91afIComparer/spanspan stylecolor:#2b91afMethod/span { span stylecolor:#0000ffprivate /spanspan stylecolor:#2b91afIList/spanspan stylecolor:#2b91afExpression/span m_expressionList; span stylecolor:#0000ffpublic /spanMethodOverloadingComparer(span stylecolor:#2b91afIList/spanspan stylecolor:#2b91afExpression/span expressions) { span stylecolor:#2b91afDebug/span.Assert(expressions ! span stylecolor:#0000ffnull/span); m_expressionList expressions; } span stylecolor:#0000ffpublic int /spanCompare(span stylecolor:#2b91afMethod /spanx, span stylecolor:#2b91afMethod /spany) { span stylecolor:#008000//step 1. find one with better conversion. /spanspan stylecolor:#0000ffint /spanlastComparisonResult 0; span stylecolor:#0000fffor /span(span stylecolor:#0000ffint /spani 0; i m_expressionList.Count; i) { span stylecolor:#0000ffint /spanresult CompareConversion(x.Parameters[i].Type, y.Parameters[i].Type, m_expressionList[i]); span stylecolor:#0000ffif /span(lastComparisonResult 0 result 0 || lastComparisonResult 0 result 0) { span stylecolor:#008000//none is better /spanspan stylecolor:#0000ffreturn /span0; } span stylecolor:#0000ffelse if /span(result ! 0) { lastComparisonResult result; } } span stylecolor:#0000ffreturn /spanlastComparisonResult; } span stylecolor:#0000ffprivate int /spanCompareConversion(span stylecolor:#2b91afTypeBase /spanleftTarget, span stylecolor:#2b91afTypeBase /spanrightTarget, span stylecolor:#2b91afExpression /spansource) { span stylecolor:#0000ffif /span(leftTarget rightTarget) { span stylecolor:#008000//same type, no better one /spanspan stylecolor:#0000ffreturn /span0; } span stylecolor:#0000ffelse if /span(leftTarget source.ExpressionType rightTarget ! source.ExpressionType) { span stylecolor:#008000//left is better; /spanspan stylecolor:#0000ffreturn /span-1; } span stylecolor:#0000ffelse if /span(leftTarget ! source.ExpressionType rightTarget source.ExpressionType) { span stylecolor:#008000//right is better; /spanspan stylecolor:#0000ffreturn /span1; } span stylecolor:#0000ffelse /span{ span stylecolor:#0000ffif /span(leftTarget.IsAssignableFrom(rightTarget)) { span stylecolor:#008000//right is more specific /spanspan stylecolor:#0000ffreturn /span1; } span stylecolor:#0000ffelse if/span(rightTarget.IsAssignableFrom(leftTarget)) { span stylecolor:#008000//left is more specific /spanspan stylecolor:#0000ffreturn /span-1; } span stylecolor:#0000ffelse /span{ span stylecolor:#008000//both are bad /spanspan stylecolor:#0000ffreturn /span0; } } } } /span最后我们要将这一系列步骤组合到一起。由于miniSharp的类可以以任何顺序定义一个类中的方法也可以以任何顺序定义调用时并不受任何限制。所以我们无法只用一次抽象语法树的遍历来完成语义分析。我采用的做法是分成三次遍历前两次分别对类的生命和成员的声明进行解析并构建符号表类型和成员第三次再对方法体进行解析。这样就可以方便地处理不同顺序定义的问题。总的来说三次遍历的任务是第一遍扫描所有class定义检查有无重名的情况。第二遍检查类的基类是否存在检测是否循环继承检查所有字段的类型以及是否重名检查所有方法参数和返回值的类型以及是否重复定义签名完全一致的情况。第三遍检查所有方法体中语句和表达式的语义。因为上一次抽象语法树的设计已经采用了Visitor模式所以以上三个阶段的语义分析可以分别写成三个Visitor来进行处理。语义分析模块同时还要报告所有语义错误。下面我给出第一阶段的Visitor实现供大家参考span stylecolor:#000000span stylecolor:#0000ffpublic class /spanspan stylecolor:#2b91afTypeDeclResolver /span: span stylecolor:#2b91afAstVisitor /span{ span stylecolor:#0000ffprivate /spanspan stylecolor:#2b91afTypeCollection /spanm_types; span stylecolor:#0000ffprivate /spanspan stylecolor:#2b91afCompilationErrorManager /spanm_errorManager; span stylecolor:#0000ffprivate const int /spanc_SE_TypeNameDuplicates 301; span stylecolor:#0000ffpublic void /spanDefineErrors() { m_errorManager.DefineError(c_SE_TypeNameDuplicates, 0, span stylecolor:#2b91afCompilationStage/span.SemanticAnalysis, span stylecolor:#a31515The program has already defined a type named {0}./span); } span stylecolor:#0000ffpublic /spanTypeDeclResolver(span stylecolor:#2b91afCompilationErrorManager /spanerrorManager) { m_errorManager errorManager; m_types span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afTypeCollection/span(); } span stylecolor:#0000ffpublic override /spanspan stylecolor:#2b91afAstNode /spanVisitProgram(span stylecolor:#2b91afProgram /spanast) { Visit(ast.MainClass); span stylecolor:#0000ffforeach /span(span stylecolor:#0000ffvar /spancd span stylecolor:#0000ffin /spanast.Classes) { Visit(cd); } span stylecolor:#0000ffreturn /spanast; } span stylecolor:#0000ffpublic override /spanspan stylecolor:#2b91afAstNode /spanVisitMainClass(span stylecolor:#2b91afMainClass /spanast) { span stylecolor:#008000//main class must be the first class. /spanspan stylecolor:#2b91afDebug/span.Assert(m_types.Count 0); span stylecolor:#0000ffvar /spanname ast.Name.Value; span stylecolor:#0000ffvar /spanmainclassType span stylecolor:#0000ffnew /spanspan stylecolor:#2b91afCodeClassType/span() { Name name, IsStatic span stylecolor:#0000fftrue /span}; m_types.Add(mainclassType); ast.Type mainclassType; span stylecolor:#0000ffreturn /spanast; } span stylecolor:#0000ffpublic override /spanspan stylecolor:#2b91afAstNode /spanVisitClassDecl(span stylecolor:#2b91afClassDecl /spanast) { span stylecolor:#0000ffvar /spanname ast.Name.Value; span stylecolor:#0000ffif /span(m_types.Contains(name)) { m_errorManager.AddError(c_SE_TypeNameDuplicates, ast.Name.Span, name); span stylecolor:#0000ffreturn /spanast; }/span