Add initial typechecker for AST #2

Merged
mrab merged 121 commits from typedAST into master 2024-06-14 07:53:30 +00:00
15 changed files with 1187 additions and 214 deletions
Showing only changes of commit a7b4c7e58e - Show all commits

6
.gitignore vendored
View File

@ -8,6 +8,12 @@ cabal-dev
*.chs.h *.chs.h
*.dyn_o *.dyn_o
*.dyn_hi *.dyn_hi
*.java
*.class
*.local~*
src/Parser/JavaParser.hs
src/Parser/Parser.hs
src/Parser/Lexer.hs
.hpc .hpc
.hsenv .hsenv
.cabal-sandbox/ .cabal-sandbox/

View File

@ -0,0 +1,116 @@
module TestByteCodeGenerator where
import Test.HUnit
import ByteCode.ClassFile.Generator
import ByteCode.ClassFile
import ByteCode.Constants
import Ast
nakedClass = Class "Testklasse" [] []
expectedClass = ClassFile {
constantPool = [
ClassInfo 4,
MethodRefInfo 1 3,
NameAndTypeInfo 5 6,
Utf8Info "java/lang/Object",
Utf8Info "<init>",
Utf8Info "()V",
Utf8Info "Code",
ClassInfo 9,
Utf8Info "Testklasse"
],
accessFlags = accessPublic,
thisClass = 8,
superClass = 1,
fields = [],
methods = [],
attributes = []
}
classWithFields = Class "Testklasse" [] [VariableDeclaration "int" "testvariable" Nothing]
expectedClassWithFields = ClassFile {
constantPool = [
ClassInfo 4,
MethodRefInfo 1 3,
NameAndTypeInfo 5 6,
Utf8Info "java/lang/Object",
Utf8Info "<init>",
Utf8Info "()V",
Utf8Info "Code",
ClassInfo 9,
Utf8Info "Testklasse",
FieldRefInfo 8 11,
NameAndTypeInfo 12 13,
Utf8Info "testvariable",
Utf8Info "I"
],
accessFlags = accessPublic,
thisClass = 8,
superClass = 1,
fields = [
MemberInfo {
memberAccessFlags = accessPublic,
memberNameIndex = 12,
memberDescriptorIndex = 13,
memberAttributes = []
}
],
methods = [],
attributes = []
}
method = MethodDeclaration "int" "add_two_numbers" [
ParameterDeclaration "int" "a",
ParameterDeclaration "int" "b" ]
(Block [Return (Just (BinaryOperation Addition (Reference "a") (Reference "b")))])
classWithMethod = Class "Testklasse" [method] []
expectedClassWithMethod = ClassFile {
constantPool = [
ClassInfo 4,
MethodRefInfo 1 3,
NameAndTypeInfo 5 6,
Utf8Info "java/lang/Object",
Utf8Info "<init>",
Utf8Info "()V",
Utf8Info "Code",
ClassInfo 9,
Utf8Info "Testklasse",
FieldRefInfo 8 11,
NameAndTypeInfo 12 13,
Utf8Info "add_two_numbers",
Utf8Info "(II)I"
],
accessFlags = accessPublic,
thisClass = 8,
superClass = 1,
fields = [],
methods = [
MemberInfo {
memberAccessFlags = accessPublic,
memberNameIndex = 12,
memberDescriptorIndex = 13,
memberAttributes = [
CodeAttribute {
attributeMaxStack = 420,
attributeMaxLocals = 420,
attributeCode = [Opiadd]
}
]
}
],
attributes = []
}
testBasicConstantPool = TestCase $ assertEqual "basic constant pool" expectedClass $ classBuilder nakedClass emptyClassFile
testFields = TestCase $ assertEqual "fields in constant pool" expectedClassWithFields $ classBuilder classWithFields emptyClassFile
testMethodDescriptor = TestCase $ assertEqual "method descriptor" "(II)I" (methodDescriptor method)
testMethodAssembly = TestCase $ assertEqual "method assembly" expectedClassWithMethod (classBuilder classWithMethod emptyClassFile)
tests = TestList [
TestLabel "Basic constant pool" testBasicConstantPool,
TestLabel "Fields constant pool" testFields,
TestLabel "Method descriptor building" testMethodDescriptor,
TestLabel "Method assembly" testMethodAssembly
]

View File

@ -4,14 +4,46 @@ import Test.HUnit
import Parser.Lexer import Parser.Lexer
testCommentSomething = TestCase $ assertEqual "scan /*Something*/" [Comment "/*Something*/"] $ alexScanTokens "/*Something*/" emptyTokenList :: [Token]
testEmptyComment = TestCase $ assertEqual "scan /*x*/" [Comment "/**/"] $ alexScanTokens "/**/" emptyTokenList = []
testLineComment = TestCase $ assertEqual "scan // comment" [Comment "// comment"] $ alexScanTokens "// comment" testCommentSomething = TestCase $ assertEqual "scan '/*Something*/'" emptyTokenList $ alexScanTokens "/*Something*/"
testEmptyComment = TestCase $ assertEqual "scan '/*x*/'" emptyTokenList $ alexScanTokens "/**/"
testLineComment = TestCase $ assertEqual "scan '// comment'" emptyTokenList $ alexScanTokens "// comment"
testLineCommentEnds = TestCase $ assertEqual "scan '// com\\n'" emptyTokenList $ alexScanTokens "// com\n"
testIdentifier = TestCase $ assertEqual "scan 'identifier'" [IDENTIFIER "identifier"] $ alexScanTokens "identifier"
testShortIdentifier = TestCase $ assertEqual "scan 'i'" [IDENTIFIER "i"] $ alexScanTokens "i"
testIdentifierWithNumber = TestCase $ assertEqual "scan 'i2'" [IDENTIFIER "i2"] $ alexScanTokens "i2"
testKeywordBreak = TestCase $ assertEqual "scan 'break'" [BREAK] $ alexScanTokens "break"
testKeywordInt = TestCase $ assertEqual "scan 'int'" [INT] $ alexScanTokens "int"
testIntLiteral = TestCase $ assertEqual "scan '234'" [INTEGERLITERAL 234] $ alexScanTokens "234"
testIntLiteral2 = TestCase $ assertEqual "scan '54_2'" [INTEGERLITERAL 542] $ alexScanTokens "54_2"
testCharLiteral = TestCase $ assertEqual "scan ''f''" [CHARLITERAL 'f'] $ alexScanTokens "'f'"
testBoolLiteralTrue = TestCase $ assertEqual "scan 'true'" [BOOLLITERAL True] $ alexScanTokens "true"
testBoolLiteralFalse = TestCase $ assertEqual "scan 'false'" [BOOLLITERAL False] $ alexScanTokens "false"
testLBrace = TestCase $ assertEqual "scan '('" [LBRACE] $ alexScanTokens "("
testAnd = TestCase $ assertEqual "scan '&&'" [AND] $ alexScanTokens "&&"
tests = TestList [ tests = TestList [
TestLabel "TestCommentSomething" testCommentSomething, TestLabel "TestCommentSomething" testCommentSomething,
TestLabel "TestEmptyComment" testEmptyComment, TestLabel "TestEmptyComment" testEmptyComment,
TestLabel "TestLineComment" testLineComment TestLabel "TestLineComment" testLineComment,
TestLabel "TestLineCommentEnds" testLineCommentEnds,
TestLabel "TestIdentifier" testIdentifier,
TestLabel "TestShortIdentifier" testShortIdentifier,
TestLabel "TestIdentifierWithNumber" testIdentifierWithNumber,
TestLabel "TestKeywordBreak" testKeywordBreak,
TestLabel "TestKeywordInt" testKeywordInt,
TestLabel "TestIntLiteral" testIntLiteral,
TestLabel "TestIntLiteral2" testIntLiteral2,
TestLabel "TestCharLiteral" testCharLiteral,
TestLabel "TestBoolLiteralTrue" testBoolLiteralTrue,
TestLabel "TestBoolLiteralFalse" testBoolLiteralFalse,
TestLabel "TestLBrace" testLBrace,
TestLabel "TestAnd" testAnd
] ]

189
Test/TestParser.hs Normal file
View File

@ -0,0 +1,189 @@
module TestParser(tests) where
import Test.HUnit
import Parser.Lexer
import Parser.JavaParser
import Ast
testSingleEmptyClass = TestCase $
assertEqual "expect single empty class hello" [Class "Hello" [] []] $
parse [CLASS, IDENTIFIER "Hello", LBRACKET, RBRACKET]
testTwoEmptyClasses = TestCase $
assertEqual "expect two empty classes" [Class "Class1" [] [], Class "Class2" [] []] $
parse [CLASS,IDENTIFIER "Class1",LBRACKET,RBRACKET,CLASS,IDENTIFIER "Class2",LBRACKET,RBRACKET]
testBooleanField = TestCase $
assertEqual "expect class with boolean field" [Class "WithBool" [] [VariableDeclaration "boolean" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithBool",LBRACKET,BOOLEAN,IDENTIFIER "value",SEMICOLON,RBRACKET]
testIntField = TestCase $
assertEqual "expect class with int field" [Class "WithInt" [] [VariableDeclaration "int" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithInt",LBRACKET,INT,IDENTIFIER "value",SEMICOLON,RBRACKET]
testCustomTypeField = TestCase $
assertEqual "expect class with foo field" [Class "WithFoo" [] [VariableDeclaration "Foo" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithFoo",LBRACKET,IDENTIFIER "Foo",IDENTIFIER "value",SEMICOLON,RBRACKET]
testMultipleDeclarationSameLine = TestCase $
assertEqual "expect class with two int fields" [Class "TwoInts" [] [VariableDeclaration "int" "num1" Nothing, VariableDeclaration "int" "num2" Nothing]] $
parse [CLASS,IDENTIFIER "TwoInts",LBRACKET,INT,IDENTIFIER "num1",COMMA,IDENTIFIER "num2",SEMICOLON,RBRACKET]
testMultipleDeclarations = TestCase $
assertEqual "expect class with int and char field" [Class "Multiple" [] [VariableDeclaration "int" "value" Nothing, VariableDeclaration "char" "letter" Nothing]] $
parse [CLASS,IDENTIFIER "Multiple",LBRACKET,INT,IDENTIFIER "value",SEMICOLON,CHAR,IDENTIFIER "letter",SEMICOLON,RBRACKET]
testWithModifier = TestCase $
assertEqual "expect class with int field" [Class "WithInt" [] [VariableDeclaration "int" "value" Nothing]] $
parse [ABSTRACT,CLASS,IDENTIFIER "WithInt",LBRACKET,PUBLIC,INT,IDENTIFIER "value",SEMICOLON,RBRACKET]
testEmptyMethod = TestCase $
assertEqual "expect class with method" [Class "WithMethod" [MethodDeclaration "int" "foo" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithMethod",LBRACKET,INT,IDENTIFIER "foo",LBRACE,RBRACE,SEMICOLON,RBRACKET]
testEmptyPrivateMethod = TestCase $
assertEqual "expect class with method" [Class "WithMethod" [MethodDeclaration "int" "foo" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithMethod",LBRACKET,PRIVATE,INT,IDENTIFIER "foo",LBRACE,RBRACE,LBRACKET,RBRACKET,RBRACKET]
testEmptyVoidMethod = TestCase $
assertEqual "expect class with method" [Class "WithMethod" [MethodDeclaration "void" "foo" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithMethod",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,RBRACE,LBRACKET,RBRACKET,RBRACKET]
testEmptyMethodWithParam = TestCase $
assertEqual "expect class with method with param" [Class "WithParam" [MethodDeclaration "void" "foo" [ParameterDeclaration "int" "param"] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithParam",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,INT,IDENTIFIER "param",RBRACE,SEMICOLON,RBRACKET]
testEmptyMethodWithParams = TestCase $
assertEqual "expect class with multiple params" [Class "WithParams" [MethodDeclaration "void" "foo" [ParameterDeclaration "int" "p1",ParameterDeclaration "Custom" "p2"] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithParams",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,INT,IDENTIFIER "p1",COMMA,IDENTIFIER "Custom",IDENTIFIER "p2",RBRACE,SEMICOLON,RBRACKET]
testClassWithMethodAndField = TestCase $
assertEqual "expect class with method and field" [Class "WithMethodAndField" [MethodDeclaration "void" "foo" [] (Block []), MethodDeclaration "int" "bar" [] (Block [])] [VariableDeclaration "int" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithMethodAndField",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,RBRACE,LBRACKET,RBRACKET,INT,IDENTIFIER "value",SEMICOLON,INT,IDENTIFIER "bar",LBRACE,RBRACE,SEMICOLON,RBRACKET]
testClassWithConstructor = TestCase $
assertEqual "expect class with constructor" [Class "WithConstructor" [MethodDeclaration "void" "<init>" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithConstructor",LBRACKET,IDENTIFIER "WithConstructor",LBRACE,RBRACE,LBRACKET,RBRACKET,RBRACKET]
testEmptyBlock = TestCase $ assertEqual "expect empty block" [Block []] $ parseStatement [LBRACKET,RBRACKET]
testBlockWithLocalVarDecl = TestCase $
assertEqual "expect block with local var delcaration" [Block [LocalVariableDeclaration $ VariableDeclaration "int" "localvar" Nothing]] $
parseStatement [LBRACKET,INT,IDENTIFIER "localvar",SEMICOLON,RBRACKET]
testBlockWithMultipleLocalVarDecls = TestCase $
assertEqual "expect block with multiple local var declarations" [Block [LocalVariableDeclaration $ VariableDeclaration "int" "var1" Nothing, LocalVariableDeclaration $ VariableDeclaration "boolean" "var2" Nothing]] $
parseStatement [LBRACKET,INT,IDENTIFIER "var1",SEMICOLON,BOOLEAN,IDENTIFIER "var2",SEMICOLON,RBRACKET]
testNestedBlocks = TestCase $
assertEqual "expect block with block inside" [Block [Block []]] $
parseStatement [LBRACKET,LBRACKET,RBRACKET,RBRACKET]
testBlockWithEmptyStatement = TestCase $
assertEqual "expect empty block" [Block []] $
parseStatement [LBRACKET,SEMICOLON,SEMICOLON,RBRACKET]
testExpressionIntLiteral = TestCase $
assertEqual "expect IntLiteral" (IntegerLiteral 3) $
parseExpression [INTEGERLITERAL 3]
testFieldWithInitialization = TestCase $
assertEqual "expect Class with initialized field" [Class "WithInitField" [] [VariableDeclaration "int" "number" $ Just $ IntegerLiteral 3]] $
parse [CLASS,IDENTIFIER "WithInitField",LBRACKET,INT,IDENTIFIER "number",ASSIGN,INTEGERLITERAL 3,SEMICOLON,RBRACKET]
testLocalBoolWithInitialization = TestCase $
assertEqual "expect block with with initialized local var" [Block [LocalVariableDeclaration $ VariableDeclaration "boolean" "b" $ Just $ BooleanLiteral False]] $
parseStatement [LBRACKET,BOOLEAN,IDENTIFIER "b",ASSIGN,BOOLLITERAL False,SEMICOLON,RBRACKET]
testFieldNullWithInitialization = TestCase $
assertEqual "expect Class with initialized field" [Class "WithInitField" [] [VariableDeclaration "Object" "bar" $ Just NullLiteral]] $
parse [CLASS,IDENTIFIER "WithInitField",LBRACKET,IDENTIFIER "Object",IDENTIFIER "bar",ASSIGN,NULLLITERAL,SEMICOLON,RBRACKET]
testReturnVoid = TestCase $
assertEqual "expect block with return nothing" [Block [Return Nothing]] $
parseStatement [LBRACKET,RETURN,SEMICOLON,RBRACKET]
testExpressionNot = TestCase $
assertEqual "expect expression not" (UnaryOperation Not (Reference "boar")) $
parseExpression [NOT,IDENTIFIER "boar"]
testExpressionMinus = TestCase $
assertEqual "expect expression minus" (UnaryOperation Minus (Reference "boo")) $
parseExpression [MINUS,IDENTIFIER "boo"]
testExpressionMultiplication = TestCase $
assertEqual "expect multiplication" (BinaryOperation Multiplication (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",TIMES,INTEGERLITERAL 3]
testExpressionDivision = TestCase $
assertEqual "expect division" (BinaryOperation Division (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",DIV,INTEGERLITERAL 3]
testExpressionModulo = TestCase $
assertEqual "expect modulo operation" (BinaryOperation Modulo (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",MODULO,INTEGERLITERAL 3]
testExpressionAddition = TestCase $
assertEqual "expect addition" (BinaryOperation Addition (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",PLUS,INTEGERLITERAL 3]
testExpressionSubtraction = TestCase $
assertEqual "expect subtraction" (BinaryOperation Subtraction (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",MINUS,INTEGERLITERAL 3]
testExpressionLessThan = TestCase $
assertEqual "expect comparision less than" (BinaryOperation CompareLessThan (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",LESS,INTEGERLITERAL 3]
testExpressionGreaterThan = TestCase $
assertEqual "expect comparision greater than" (BinaryOperation CompareGreaterThan (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",GREATER,INTEGERLITERAL 3]
testExpressionLessThanEqual = TestCase $
assertEqual "expect comparision less than or equal" (BinaryOperation CompareLessOrEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",LESSEQUAL,INTEGERLITERAL 3]
testExpressionGreaterThanOrEqual = TestCase $
assertEqual "expect comparision greater than or equal" (BinaryOperation CompareGreaterOrEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",GREATEREQUAL,INTEGERLITERAL 3]
testExpressionEqual = TestCase $
assertEqual "expect comparison equal" (BinaryOperation CompareEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",EQUAL,INTEGERLITERAL 3]
testExpressionNotEqual = TestCase $
assertEqual "expect comparison equal" (BinaryOperation CompareNotEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",NOTEQUAL,INTEGERLITERAL 3]
testExpressionAnd = TestCase $
assertEqual "expect and expression" (BinaryOperation And (Reference "bar") (Reference "baz")) $
parseExpression [IDENTIFIER "bar",AND,IDENTIFIER "baz"]
testExpressionXor = TestCase $
assertEqual "expect xor expression" (BinaryOperation BitwiseXor (Reference "bar") (Reference "baz")) $
parseExpression [IDENTIFIER "bar",XOR,IDENTIFIER "baz"]
testExpressionOr = TestCase $
assertEqual "expect or expression" (BinaryOperation Or (Reference "bar") (Reference "baz")) $
parseExpression [IDENTIFIER "bar",OR,IDENTIFIER "baz"]
testExpressionPostIncrement = TestCase $
assertEqual "expect PostIncrement" (UnaryOperation PostIncrement (Reference "a")) $
parseExpression [IDENTIFIER "a",INCREMENT]
testExpressionPostDecrement = TestCase $
assertEqual "expect PostDecrement" (UnaryOperation PostDecrement (Reference "a")) $
parseExpression [IDENTIFIER "a",DECREMENT]
testExpressionPreIncrement = TestCase $
assertEqual "expect PreIncrement" (UnaryOperation PreIncrement (Reference "a")) $
parseExpression [INCREMENT,IDENTIFIER "a"]
testExpressionPreDecrement = TestCase $
assertEqual "expect PreIncrement" (UnaryOperation PreDecrement (Reference "a")) $
parseExpression [DECREMENT,IDENTIFIER "a"]
tests = TestList [
testSingleEmptyClass,
testTwoEmptyClasses,
testBooleanField,
testIntField,
testCustomTypeField,
testMultipleDeclarations,
testWithModifier,
testEmptyMethod,
testEmptyPrivateMethod,
testEmptyVoidMethod,
testEmptyMethodWithParam,
testEmptyMethodWithParams,
testClassWithMethodAndField,
testClassWithConstructor,
testEmptyBlock,
testBlockWithLocalVarDecl,
testBlockWithMultipleLocalVarDecls,
testNestedBlocks,
testBlockWithEmptyStatement,
testExpressionIntLiteral,
testFieldWithInitialization,
testLocalBoolWithInitialization,
testFieldNullWithInitialization,
testReturnVoid,
testExpressionNot,
testExpressionMinus,
testExpressionLessThan,
testExpressionGreaterThan,
testExpressionLessThanEqual,
testExpressionGreaterThanOrEqual,
testExpressionEqual,
testExpressionNotEqual,
testExpressionAnd,
testExpressionXor,
testExpressionOr,
testExpressionPostIncrement,
testExpressionPostDecrement,
testExpressionPreIncrement,
testExpressionPreDecrement
]

View File

@ -1,11 +1,14 @@
module Main where module Main where
import Test.HUnit import Test.HUnit
import Parser.Lexer
import TestLexer import TestLexer
import TestByteCodeGenerator
import TestParser
otherTest = TestCase $ assertEqual "math" (4+3) 7
tests = TestList [TestLabel "TestLexer" TestLexer.tests, TestLabel "mathTest" otherTest] tests = TestList [
TestLabel "TestLexer" TestLexer.tests,
TestLabel "TestParser" TestParser.tests,
TestLabel "TestByteCodeGenerator" TestByteCodeGenerator.tests]
main = do runTestTTAndExit Main.tests main = do runTestTTAndExit Main.tests

View File

@ -8,11 +8,23 @@ executable compiler
main-is: Main.hs main-is: Main.hs
build-depends: base, build-depends: base,
array, array,
HUnit HUnit,
utf8-string,
bytestring
default-language: Haskell2010 default-language: Haskell2010
hs-source-dirs: src hs-source-dirs: src,
src/ByteCode,
src/ByteCode/ClassFile
build-tool-depends: alex:alex, happy:happy build-tool-depends: alex:alex, happy:happy
other-modules: Parser.Lexer, Ast, Example, Typecheck other-modules: Parser.Lexer,
Parser.JavaParser
Ast,
Example,
Typecheck,
ByteCode.ByteUtil,
ByteCode.ClassFile,
ByteCode.ClassFile.Generator,
ByteCode.Constants
test-suite tests test-suite tests
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
@ -20,6 +32,17 @@ test-suite tests
hs-source-dirs: src,Test hs-source-dirs: src,Test
build-depends: base, build-depends: base,
array, array,
HUnit HUnit,
utf8-string,
bytestring
build-tool-depends: alex:alex, happy:happy build-tool-depends: alex:alex, happy:happy
other-modules: Parser.Lexer, TestLexer other-modules: Parser.Lexer,
Parser.JavaParser,
Ast,
TestLexer,
TestParser,
TestByteCodeGenerator,
ByteCode.ByteUtil,
ByteCode.ClassFile,
ByteCode.ClassFile.Generator,
ByteCode.Constants

2
questions.md Normal file
View File

@ -0,0 +1,2 @@
# Questions
- Enum?

View File

@ -3,10 +3,11 @@ module Ast where
type CompilationUnit = [Class] type CompilationUnit = [Class]
type DataType = String type DataType = String
type Identifier = String type Identifier = String
data ParameterDeclaration = ParameterDeclaration DataType Identifier deriving (Show)
data VariableDeclaration = VariableDeclaration DataType Identifier (Maybe Expression) deriving (Show) data ParameterDeclaration = ParameterDeclaration DataType Identifier deriving (Show, Eq)
data Class = Class DataType [MethodDeclaration] [VariableDeclaration] deriving (Show) data VariableDeclaration = VariableDeclaration DataType Identifier (Maybe Expression) deriving (Show, Eq)
data MethodDeclaration = MethodDeclaration DataType Identifier [ParameterDeclaration] Statement deriving (Show) data Class = Class DataType [MethodDeclaration] [VariableDeclaration] deriving (Show, Eq)
data MethodDeclaration = MethodDeclaration DataType Identifier [ParameterDeclaration] Statement deriving (Show, Eq)
data Statement data Statement
= If Expression Statement (Maybe Statement) = If Expression Statement (Maybe Statement)
@ -16,14 +17,14 @@ data Statement
| Return (Maybe Expression) | Return (Maybe Expression)
| StatementExpressionStatement StatementExpression | StatementExpressionStatement StatementExpression
| TypedStatement DataType Statement | TypedStatement DataType Statement
deriving (Show) deriving (Show, Eq)
data StatementExpression data StatementExpression
= Assignment Expression Expression = Assignment Expression Expression
| ConstructorCall DataType [Expression] | ConstructorCall DataType [Expression]
| MethodCall Expression Identifier [Expression] | MethodCall Expression Identifier [Expression]
| TypedStatementExpression DataType StatementExpression | TypedStatementExpression DataType StatementExpression
deriving (Show) deriving (Show, Eq)
data BinaryOperator data BinaryOperator
= Addition = Addition
@ -43,7 +44,7 @@ data BinaryOperator
| And | And
| Or | Or
| NameResolution | NameResolution
deriving (Show) deriving (Show, Eq)
data UnaryOperator data UnaryOperator
= Not = Not
@ -52,7 +53,7 @@ data UnaryOperator
| PostDecrement | PostDecrement
| PreIncrement | PreIncrement
| PreDecrement | PreDecrement
deriving (Show) deriving (Show, Eq)
data Expression data Expression
= IntegerLiteral Int = IntegerLiteral Int
@ -66,4 +67,4 @@ data Expression
| UnaryOperation UnaryOperator Expression | UnaryOperation UnaryOperator Expression
| StatementExpressionExpression StatementExpression | StatementExpressionExpression StatementExpression
| TypedExpression DataType Expression | TypedExpression DataType Expression
deriving (Show) deriving (Show, Eq)

19
src/ByteCode/ByteUtil.hs Normal file
View File

@ -0,0 +1,19 @@
module ByteCode.ByteUtil(unpackWord16, unpackWord32) where
import Data.Word ( Word8, Word16, Word32 )
import Data.Int
import Data.Bits
unpackWord16 :: Word16 -> [Word8]
unpackWord16 v = [
fromIntegral (shiftR ((.&.) v 0xFF00) 8),
fromIntegral (shiftR ((.&.) v 0x00FF) 0)
]
unpackWord32 :: Word32 -> [Word8]
unpackWord32 v = [
fromIntegral (shiftR ((.&.) v 0xFF000000) 24),
fromIntegral (shiftR ((.&.) v 0x00FF0000) 16),
fromIntegral (shiftR ((.&.) v 0x0000FF00) 8),
fromIntegral (shiftR ((.&.) v 0x000000FF) 0)
]

168
src/ByteCode/ClassFile.hs Normal file
View File

@ -0,0 +1,168 @@
module ByteCode.ClassFile(
ConstantInfo(..),
Attribute(..),
MemberInfo(..),
ClassFile(..),
Operation(..),
serialize,
emptyClassFile
) where
import Data.Word
import Data.Int
import Data.ByteString (unpack)
import Data.ByteString.UTF8 (fromString)
import ByteCode.ByteUtil
import ByteCode.Constants
data ConstantInfo = ClassInfo Word16
| FieldRefInfo Word16 Word16
| MethodRefInfo Word16 Word16
| NameAndTypeInfo Word16 Word16
| IntegerInfo Int32
| Utf8Info [Char]
deriving (Show, Eq)
data Operation = Opiadd
| Opisub
| Opimul
| Opidiv
| Opiand
| Opior
| Opixor
| Opineg
| Opif_icmplt Word16
| Opif_icmple Word16
| Opif_icmpgt Word16
| Opif_icmpge Word16
| Opif_icmpeq Word16
| Opif_icmpne Word16
| Opaconst_null
| Opreturn
| Opireturn
| Opareturn
| Opsipush Word16
| Opldc_w Word16
| Opaload Word16
| Opiload Word16
| Opastore Word16
| Opistore Word16
| Opputfield Word16
| OpgetField Word16
deriving (Show, Eq)
data Attribute = CodeAttribute {
attributeMaxStack :: Word16,
attributeMaxLocals :: Word16,
attributeCode :: [Operation]
} deriving (Show, Eq)
data MemberInfo = MemberInfo {
memberAccessFlags :: Word16,
memberNameIndex :: Word16,
memberDescriptorIndex :: Word16,
memberAttributes :: [Attribute]
} deriving (Show, Eq)
data ClassFile = ClassFile {
constantPool :: [ConstantInfo],
accessFlags :: Word16,
thisClass :: Word16,
superClass :: Word16,
fields :: [MemberInfo],
methods :: [MemberInfo],
attributes :: [Attribute]
} deriving (Show, Eq)
emptyClassFile :: ClassFile
emptyClassFile = ClassFile {
constantPool = [],
accessFlags = accessPublic,
thisClass = 0,
superClass = 0,
fields = [],
methods = [],
attributes = []
}
class Serializable a where
serialize :: a -> [Word8]
instance Serializable ConstantInfo where
serialize (ClassInfo nameIndex) = tagClass : unpackWord16 nameIndex
serialize (FieldRefInfo classIndex nameAndTypeIndex) = tagFieldref : (unpackWord16 classIndex ++ unpackWord16 nameAndTypeIndex)
serialize (MethodRefInfo classIndex nameAndTypeIndex) = tagMethodref : (unpackWord16 classIndex ++ unpackWord16 nameAndTypeIndex)
serialize (NameAndTypeInfo classIndex descriptorIndex) = tagNameandtype : (unpackWord16 classIndex ++ unpackWord16 descriptorIndex)
serialize (IntegerInfo value) = tagInteger : unpackWord32 (fromIntegral value)
serialize (Utf8Info string) = tagUtf8 : unpackWord16 num_bytes ++ bytes where
bytes = unpack (fromString string)
num_bytes = fromIntegral $ length bytes
instance Serializable MemberInfo where
serialize member = unpackWord16 (memberAccessFlags member)
++ unpackWord16 (memberNameIndex member)
++ unpackWord16 (memberDescriptorIndex member)
++ unpackWord16 (fromIntegral (length (memberAttributes member)))
++ concatMap serialize (memberAttributes member)
instance Serializable Operation where
serialize Opiadd = [0x60]
serialize Opisub = [0x64]
serialize Opimul = [0x68]
serialize Opidiv = [0x6C]
serialize Opiand = [0x7E]
serialize Opior = [0x80]
serialize Opixor = [0x82]
serialize Opineg = [0x74]
serialize (Opif_icmplt branch) = 0xA1 : unpackWord16 branch
serialize (Opif_icmple branch) = 0xA4 : unpackWord16 branch
serialize (Opif_icmpgt branch) = 0xA3 : unpackWord16 branch
serialize (Opif_icmpge branch) = 0xA2 : unpackWord16 branch
serialize (Opif_icmpeq branch) = 0x9F : unpackWord16 branch
serialize (Opif_icmpne branch) = 0xA0 : unpackWord16 branch
serialize Opaconst_null = [0x01]
serialize Opreturn = [0xB1]
serialize Opireturn = [0xAC]
serialize Opareturn = [0xB0]
serialize (Opsipush index) = 0x11 : unpackWord16 index
serialize (Opldc_w index) = 0x13 : unpackWord16 index
serialize (Opaload index) = [0xC4, 0x19] ++ unpackWord16 index
serialize (Opiload index) = [0xC4, 0x15] ++ unpackWord16 index
serialize (Opastore index) = [0xC4, 0x3A] ++ unpackWord16 index
serialize (Opistore index) = [0xC4, 0x36] ++ unpackWord16 index
serialize (Opputfield index) = 0xB5 : unpackWord16 index
serialize (OpgetField index) = 0xB4 : unpackWord16 index
instance Serializable Attribute where
serialize (CodeAttribute { attributeMaxStack = maxStack,
attributeMaxLocals = maxLocals,
attributeCode = code }) = let
assembledCode = concat (map serialize code)
in
unpackWord16 7 -- attribute_name_index
++ unpackWord32 (12 + (fromIntegral (length assembledCode))) -- attribute_length
++ unpackWord16 maxStack -- max_stack
++ unpackWord16 maxLocals -- max_locals
++ unpackWord32 (fromIntegral (length assembledCode)) -- code_length
++ assembledCode -- code
++ unpackWord16 0 -- exception_table_length
++ unpackWord16 0 -- attributes_count
instance Serializable ClassFile where
serialize classfile = unpackWord32 0xC0FEBABE -- magic
++ unpackWord16 0 -- minor version
++ unpackWord16 49 -- major version
++ unpackWord16 (fromIntegral (1 + length (constantPool classfile))) -- constant pool count
++ concatMap serialize (constantPool classfile) -- constant pool
++ unpackWord16 (accessFlags classfile) -- access flags
++ unpackWord16 (thisClass classfile) -- this class
++ unpackWord16 (superClass classfile) -- super class
++ unpackWord16 0 -- interface count
++ unpackWord16 (fromIntegral (length (fields classfile))) -- fields count
++ concatMap serialize (fields classfile) -- fields info
++ unpackWord16 (fromIntegral (length (methods classfile))) -- methods count
++ concatMap serialize (methods classfile) -- methods info
++ unpackWord16 (fromIntegral (length (attributes classfile))) -- attributes count
++ concatMap serialize (attributes classfile) -- attributes info

View File

@ -0,0 +1,169 @@
module ByteCode.ClassFile.Generator(
classBuilder,
datatypeDescriptor,
methodParameterDescriptor,
methodDescriptor,
) where
import ByteCode.Constants
import ByteCode.ClassFile (ClassFile (..), ConstantInfo (..), MemberInfo(..), Operation(..), Attribute(..))
import Ast
import Data.Char
type ClassFileBuilder a = a -> ClassFile -> ClassFile
datatypeDescriptor :: String -> String
datatypeDescriptor "void" = "V"
datatypeDescriptor "int" = "I"
datatypeDescriptor "char" = "C"
datatypeDescriptor "boolean" = "B"
datatypeDescriptor x = "L" ++ x
methodParameterDescriptor :: String -> String
methodParameterDescriptor "void" = "V"
methodParameterDescriptor "int" = "I"
methodParameterDescriptor "char" = "C"
methodParameterDescriptor "boolean" = "B"
methodParameterDescriptor x = "L" ++ x ++ ";"
methodDescriptor :: MethodDeclaration -> String
methodDescriptor (MethodDeclaration returntype _ parameters _) = let
parameter_types = [datatype | ParameterDeclaration datatype _ <- parameters]
in
"("
++ (concat (map methodParameterDescriptor parameter_types))
++ ")"
++ datatypeDescriptor returntype
classBuilder :: ClassFileBuilder Class
classBuilder (Class name methods fields) _ = let
baseConstants = [
ClassInfo 4,
MethodRefInfo 1 3,
NameAndTypeInfo 5 6,
Utf8Info "java/lang/Object",
Utf8Info "<init>",
Utf8Info "()V",
Utf8Info "Code"
]
nameConstants = [ClassInfo 9, Utf8Info name]
nakedClassFile = ClassFile {
constantPool = baseConstants ++ nameConstants,
accessFlags = accessPublic,
thisClass = 8,
superClass = 1,
fields = [],
methods = [],
attributes = []
}
in
foldr methodBuilder (foldr fieldBuilder nakedClassFile fields) methods
fieldBuilder :: ClassFileBuilder VariableDeclaration
fieldBuilder (VariableDeclaration datatype name _) input = let
baseIndex = 1 + length (constantPool input)
constants = [
FieldRefInfo (fromIntegral (thisClass input)) (fromIntegral (baseIndex + 1)),
NameAndTypeInfo (fromIntegral (baseIndex + 2)) (fromIntegral (baseIndex + 3)),
Utf8Info name,
Utf8Info (datatypeDescriptor datatype)
]
field = MemberInfo {
memberAccessFlags = accessPublic,
memberNameIndex = (fromIntegral (baseIndex + 2)),
memberDescriptorIndex = (fromIntegral (baseIndex + 3)),
memberAttributes = []
}
in
input {
constantPool = (constantPool input) ++ constants,
fields = (fields input) ++ [field]
}
methodBuilder :: ClassFileBuilder MethodDeclaration
methodBuilder (MethodDeclaration returntype name parameters statement) input = let
baseIndex = 1 + length (constantPool input)
constants = [
FieldRefInfo (fromIntegral (thisClass input)) (fromIntegral (baseIndex + 1)),
NameAndTypeInfo (fromIntegral (baseIndex + 2)) (fromIntegral (baseIndex + 3)),
Utf8Info name,
Utf8Info (methodDescriptor (MethodDeclaration returntype name parameters (Block [])))
]
--code = assembleByteCode statement
method = MemberInfo {
memberAccessFlags = accessPublic,
memberNameIndex = (fromIntegral (baseIndex + 2)),
memberDescriptorIndex = (fromIntegral (baseIndex + 3)),
memberAttributes = [
CodeAttribute {
attributeMaxStack = 420,
attributeMaxLocals = 420,
attributeCode = [Opiadd]
}
]
}
in
input {
constantPool = (constantPool input) ++ constants,
methods = (fields input) ++ [method]
}
type Assembler a = a -> ([ConstantInfo], [Operation]) -> ([ConstantInfo], [Operation])
returnOperation :: DataType -> Operation
returnOperation dtype
| elem dtype ["int", "char", "boolean"] = Opireturn
| otherwise = Opareturn
binaryOperation :: BinaryOperator -> Operation
binaryOperation Addition = Opiadd
binaryOperation Subtraction = Opisub
binaryOperation Multiplication = Opimul
binaryOperation Division = Opidiv
binaryOperation BitwiseAnd = Opiand
binaryOperation BitwiseOr = Opior
binaryOperation BitwiseXor = Opixor
assembleMethod :: Assembler MethodDeclaration
assembleMethod (MethodDeclaration _ _ _ (Block statements)) (constants, ops) =
foldr assembleStatement (constants, ops) statements
assembleStatement :: Assembler Statement
assembleStatement (TypedStatement stype (Return expr)) (constants, ops) = case expr of
Nothing -> (constants, ops ++ [Opreturn])
Just expr -> let
(expr_constants, expr_ops) = assembleExpression expr (constants, ops)
in
(expr_constants, expr_ops ++ [returnOperation stype])
assembleExpression :: Assembler Expression
assembleExpression (TypedExpression _ (BinaryOperation op a b)) (constants, ops)
| elem op [Addition, Subtraction, Multiplication, Division, BitwiseAnd, BitwiseOr, BitwiseXor] = let
(aConstants, aOps) = assembleExpression a (constants, ops)
(bConstants, bOps) = assembleExpression b (aConstants, aOps)
in
(bConstants, bOps ++ [binaryOperation op])
assembleExpression (TypedExpression _ (CharacterLiteral literal)) (constants, ops) =
(constants, ops ++ [Opsipush (fromIntegral (ord literal))])
assembleExpression (TypedExpression _ (BooleanLiteral literal)) (constants, ops) =
(constants, ops ++ [Opsipush (if literal then 1 else 0)])
assembleExpression (TypedExpression _ (IntegerLiteral literal)) (constants, ops)
| literal <= 32767 && literal >= -32768 = (constants, ops ++ [Opsipush (fromIntegral literal)])
| otherwise = (constants ++ [IntegerInfo (fromIntegral literal)], ops ++ [Opldc_w (fromIntegral (1 + length constants))])
assembleExpression (TypedExpression _ NullLiteral) (constants, ops) =
(constants, ops ++ [Opaconst_null])
assembleExpression (TypedExpression etype (UnaryOperation Not expr)) (constants, ops) = let
(exprConstants, exprOps) = assembleExpression expr (constants, ops)
newConstant = fromIntegral (1 + length exprConstants)
in case etype of
"int" -> (exprConstants ++ [IntegerInfo 0x7FFFFFFF], exprOps ++ [Opldc_w newConstant, Opixor])
"char" -> (exprConstants, exprOps ++ [Opsipush 0xFFFF, Opixor])
"boolean" -> (exprConstants, exprOps ++ [Opsipush 0x01, Opixor])
assembleExpression (TypedExpression _ (UnaryOperation Minus expr)) (constants, ops) = let
(exprConstants, exprOps) = assembleExpression expr (constants, ops)
in
(exprConstants, exprOps ++ [Opineg])

25
src/ByteCode/Constants.hs Normal file
View File

@ -0,0 +1,25 @@
module ByteCode.Constants where
import Data.Word
tagClass :: Word8
tagFieldref :: Word8
tagMethodref :: Word8
tagNameandtype :: Word8
tagInteger :: Word8
tagUtf8 :: Word8
accessPublic :: Word16
accessPrivate :: Word16
accessProtected :: Word16
tagClass = 0x07
tagFieldref = 0x09
tagMethodref = 0x0A
tagNameandtype = 0x0C
tagInteger = 0x03
tagUtf8 = 0x01
accessPublic = 0x01
accessPrivate = 0x02
accessProtected = 0x04

View File

@ -1,12 +1,15 @@
{ {
module Parser.JavaParser (parse) where module Parser.JavaParser (parse, parseStatement, parseExpression) where
--import AbsSyn import Ast
import Parser.Lexer import Parser.Lexer
} }
%name parse %name parse
%name parseStatement statement
%name parseExpression expression
%tokentype { Token } %tokentype { Token }
%error { parseError } %error { parseError }
%errorhandlertype explist
%token %token
BOOLEAN { BOOLEAN } BOOLEAN { BOOLEAN }
@ -15,9 +18,9 @@ import Parser.Lexer
CHAR { CHAR } CHAR { CHAR }
CLASS { CLASS} CLASS { CLASS}
IDENTIFIER { IDENTIFIER $$ } IDENTIFIER { IDENTIFIER $$ }
INTLITERAL { INTLITERAL $$} INTLITERAL { INTEGERLITERAL $$}
DOT { DOT } DOT { DOT }
MOD { MOD } MOD { MODULO }
TIMESEQUAL { TIMESEQUAL } TIMESEQUAL { TIMESEQUAL }
GREATEREQUAL { GREATEREQUAL } GREATEREQUAL { GREATEREQUAL }
WHILE { WHILE } WHILE { WHILE }
@ -29,30 +32,28 @@ import Parser.Lexer
THIS { THIS } THIS { THIS }
STATIC { STATIC } STATIC { STATIC }
PROTECTED { PROTECTED } PROTECTED { PROTECTED }
TILDE { TILDE } TILDE { BITWISENOT }
MUL { MUL } MUL { TIMES }
MINUS { MINUS } MINUS { MINUS }
EXCLMARK { EXCLMARK } EXCLMARK { NOT }
IF { IF } IF { IF }
ELSE { ELSE } ELSE { ELSE }
DIVIDEEQUAL { DIVIDEEQUAL } DIVIDEEQUAL { DIVEQUAL }
NEW { NEW } NEW { NEW }
LBRACKET { LBRACKET } LBRACKET { LBRACKET }
JNULL { JNULL } JNULL { NULLLITERAL }
BOOLLITERAL { BOOLLITERAL } BOOLLITERAL { BOOLLITERAL $$ }
DIV { DIV } DIV { DIV }
LOGICALOR { LOGICALOR }
NOTEQUAL { NOTEQUAL } NOTEQUAL { NOTEQUAL }
INSTANCEOF { INSTANCEOF } INSTANCEOF { INSTANCEOF }
ANDEQUAL { ANDEQUAL } ANDEQUAL { ANDEQUAL }
ASSIGN { ASSIGN } ASSIGN { ASSIGN }
DECREMENT { DECREMENT } DECREMENT { DECREMENT }
STRINGLITERAL { STRINGLITERAL } CHARLITERAL { CHARLITERAL $$ }
CHARLITERAL { CHARLITERAL }
AND { AND } AND { AND }
XOREQUAL { XOREQUAL } XOREQUAL { XOREQUAL }
RETURN { RETURN } RETURN { RETURN }
QUESMARK { QUESMARK } QUESMARK { QUESTIONMARK }
SHIFTLEFTEQUAL { SHIFTLEFTEQUAL } SHIFTLEFTEQUAL { SHIFTLEFTEQUAL }
RBRACKET { RBRACKET } RBRACKET { RBRACKET }
COMMA { COMMA } COMMA { COMMA }
@ -68,7 +69,7 @@ import Parser.Lexer
INT { INT } INT { INT }
ABSTRACT { ABSTRACT } ABSTRACT { ABSTRACT }
SEMICOLON { SEMICOLON } SEMICOLON { SEMICOLON }
SIGNEDSHIFTRIGHTEQUAL { SIGNEDSHIFTRIGHTEQUAL } SIGNEDSHIFTRIGHTEQUAL { SHIFTRIGHTEQUAL }
UNSIGNEDSHIFTRIGHTEQUAL { UNSIGNEDSHIFTRIGHTEQUAL } UNSIGNEDSHIFTRIGHTEQUAL { UNSIGNEDSHIFTRIGHTEQUAL }
PLUSEQUAL { PLUSEQUAL } PLUSEQUAL { PLUSEQUAL }
OREQUAL { OREQUAL } OREQUAL { OREQUAL }
@ -76,31 +77,39 @@ import Parser.Lexer
LESS { LESS } LESS { LESS }
%% %%
compilationunit : typedeclarations { } compilationunit : typedeclarations { $1 }
typedeclarations : typedeclaration { } typedeclarations : typedeclaration { [$1] }
| typedeclarations typedeclaration { } | typedeclarations typedeclaration { $1 ++ [$2] }
name : qualifiedname { } name : simplename { $1 }
| simplename { } -- | qualifiedname { }
typedeclaration : classdeclaration { } typedeclaration : classdeclaration { $1 }
qualifiedname : name DOT IDENTIFIER { } qualifiedname : name DOT IDENTIFIER { }
simplename : IDENTIFIER { } simplename : IDENTIFIER { $1 }
classdeclaration : CLASS IDENTIFIER classbody { } classdeclaration : CLASS IDENTIFIER classbody { case $3 of (methods, fields) -> Class $2 methods fields }
| modifiers CLASS IDENTIFIER classbody { } | modifiers CLASS IDENTIFIER classbody { case $4 of (methods, fields) -> Class $3 methods fields }
classbody : LBRACKET RBRACKET { ([], []) } classbody : LBRACKET RBRACKET { ([], []) }
| LBRACKET classbodydeclarations RBRACKET { } | LBRACKET classbodydeclarations RBRACKET { $2 }
modifiers : modifier { } modifiers : modifier { }
| modifiers modifier { } | modifiers modifier { }
classbodydeclarations : classbodydeclaration { } classbodydeclarations : classbodydeclaration {
| classbodydeclarations classbodydeclaration{ } case $1 of
MethodDecl method -> ([method], [])
FieldDecls fields -> ([], fields)
}
| classbodydeclarations classbodydeclaration {
case ($1, $2) of
((methods, fields), MethodDecl method) -> ((methods ++ [method]), fields)
((methods, fields), FieldDecls newFields) -> (methods, (fields ++ newFields))
}
modifier : PUBLIC { } modifier : PUBLIC { }
| PROTECTED { } | PROTECTED { }
@ -110,52 +119,52 @@ modifier : PUBLIC { }
classtype : classorinterfacetype{ } classtype : classorinterfacetype{ }
classbodydeclaration : classmemberdeclaration { } classbodydeclaration : classmemberdeclaration { $1 }
| constructordeclaration { } | constructordeclaration { $1 }
classorinterfacetype : name{ } classorinterfacetype : name { $1 }
classmemberdeclaration : fielddeclaration { } classmemberdeclaration : fielddeclaration { $1 }
| methoddeclaration { } | methoddeclaration { $1 }
constructordeclaration : constructordeclarator constructorbody { } constructordeclaration : constructordeclarator constructorbody { case $1 of (classname, parameters) -> MethodDecl $ MethodDeclaration "void" "<init>" parameters $2 }
| modifiers constructordeclarator constructorbody { } | modifiers constructordeclarator constructorbody { case $2 of (classname, parameters) -> MethodDecl $ MethodDeclaration "void" "<init>" parameters $3 }
fielddeclaration : type variabledeclarators SEMICOLON { } fielddeclaration : type variabledeclarators SEMICOLON { FieldDecls $ map (convertDeclarator $1) $2 }
| modifiers type variabledeclarators SEMICOLON { } | modifiers type variabledeclarators SEMICOLON { FieldDecls $ map (convertDeclarator $2) $3 }
methoddeclaration : methodheader methodbody { } methoddeclaration : methodheader methodbody { case $1 of (returnType, (name, parameters)) -> MethodDecl (MethodDeclaration returnType name parameters $2) }
block : LBRACKET RBRACKET { } block : LBRACKET RBRACKET { Block [] }
| LBRACKET blockstatements RBRACKET { } | LBRACKET blockstatements RBRACKET { Block $2 }
constructordeclarator : simplename LBRACE RBRACE { } constructordeclarator : simplename LBRACE RBRACE { ($1, []) }
| simplename LBRACE formalparameterlist RBRACE { } | simplename LBRACE formalparameterlist RBRACE { ($1, $3) }
constructorbody : LBRACKET RBRACKET { } constructorbody : LBRACKET RBRACKET { Block [] }
| LBRACKET explicitconstructorinvocation RBRACKET { } -- | LBRACKET explicitconstructorinvocation RBRACKET { }
| LBRACKET blockstatements RBRACKET { } -- | LBRACKET blockstatements RBRACKET { }
| LBRACKET explicitconstructorinvocation blockstatements RBRACKET { } -- | LBRACKET explicitconstructorinvocation blockstatements RBRACKET { }
methodheader : type methoddeclarator { } methodheader : type methoddeclarator { ($1, $2) }
| modifiers type methoddeclarator { } | modifiers type methoddeclarator { ($2, $3) }
| VOID methoddeclarator { } | VOID methoddeclarator { ("void", $2) }
| modifiers VOID methoddeclarator { } | modifiers VOID methoddeclarator { ("void", $3)}
type : primitivetype { } type : primitivetype { $1 }
| referencetype { } | referencetype { $1 }
variabledeclarators : variabledeclarator { } variabledeclarators : variabledeclarator { [$1] }
| variabledeclarators COMMA variabledeclarator { } | variabledeclarators COMMA variabledeclarator { $1 ++ [$3] }
methodbody : block { } methodbody : block { $1 }
| SEMICOLON { } | SEMICOLON { Block [] }
blockstatements : blockstatement { } blockstatements : blockstatement { $1 }
| blockstatements blockstatement { } | blockstatements blockstatement { $1 ++ $2}
formalparameterlist : formalparameter { } formalparameterlist : formalparameter { [$1] }
| formalparameterlist COMMA formalparameter{ } | formalparameterlist COMMA formalparameter { $1 ++ [$3] }
explicitconstructorinvocation : THIS LBRACE RBRACE SEMICOLON { } explicitconstructorinvocation : THIS LBRACE RBRACE SEMICOLON { }
| THIS LBRACE argumentlist RBRACE SEMICOLON { } | THIS LBRACE argumentlist RBRACE SEMICOLON { }
@ -163,51 +172,51 @@ explicitconstructorinvocation : THIS LBRACE RBRACE SEMICOLON { }
classtypelist : classtype { } classtypelist : classtype { }
| classtypelist COMMA classtype { } | classtypelist COMMA classtype { }
methoddeclarator : IDENTIFIER LBRACE RBRACE { } methoddeclarator : IDENTIFIER LBRACE RBRACE { ($1, []) }
| IDENTIFIER LBRACE formalparameterlist RBRACE { } | IDENTIFIER LBRACE formalparameterlist RBRACE { ($1, $3) }
primitivetype : BOOLEAN { } primitivetype : BOOLEAN { "boolean" }
| numerictype { } | numerictype { $1 }
referencetype : classorinterfacetype { } referencetype : classorinterfacetype { $1 }
variabledeclarator : variabledeclaratorid { } variabledeclarator : variabledeclaratorid { Declarator $1 Nothing }
| variabledeclaratorid ASSIGN variableinitializer { } | variabledeclaratorid ASSIGN variableinitializer { Declarator $1 (Just $3) }
blockstatement : localvariabledeclarationstatement { } blockstatement : localvariabledeclarationstatement { $1 }
| statement { } | statement { $1 }
formalparameter : type variabledeclaratorid { } formalparameter : type variabledeclaratorid { ParameterDeclaration $1 $2 }
argumentlist : expression { } argumentlist : expression { }
| argumentlist COMMA expression { } | argumentlist COMMA expression { }
numerictype : integraltype { } numerictype : integraltype { $1 }
variabledeclaratorid : IDENTIFIER { } variabledeclaratorid : IDENTIFIER { $1 }
variableinitializer : expression { } variableinitializer : expression { $1 }
localvariabledeclarationstatement : localvariabledeclaration SEMICOLON { } localvariabledeclarationstatement : localvariabledeclaration SEMICOLON { $1 }
statement : statementwithouttrailingsubstatement{ } statement : statementwithouttrailingsubstatement{ $1 }
| ifthenstatement { } -- | ifthenstatement { }
| ifthenelsestatement { } -- | ifthenelsestatement { }
| whilestatement { } -- | whilestatement { }
expression : assignmentexpression { } expression : assignmentexpression { $1 }
integraltype : INT { } integraltype : INT { "int" }
| CHAR { } | CHAR { "char" }
localvariabledeclaration : type variabledeclarators { } localvariabledeclaration : type variabledeclarators { map LocalVariableDeclaration $ map (convertDeclarator $1) $2 }
statementwithouttrailingsubstatement : block { } statementwithouttrailingsubstatement : block { [$1] }
| emptystatement { } | emptystatement { [] }
| expressionstatement { } -- | expressionstatement { }
| returnstatement { } | returnstatement { [$1] }
ifthenstatement : IF LBRACE expression RBRACE statement { } ifthenstatement : IF LBRACE expression RBRACE statement { }
@ -215,22 +224,22 @@ ifthenelsestatement : IF LBRACE expression RBRACE statementnoshortif ELSE state
whilestatement : WHILE LBRACE expression RBRACE statement { } whilestatement : WHILE LBRACE expression RBRACE statement { }
assignmentexpression : conditionalexpression { } assignmentexpression : conditionalexpression { $1 }
| assignment{ } -- | assignment { }
emptystatement : SEMICOLON { } emptystatement : SEMICOLON { Block [] }
expressionstatement : statementexpression SEMICOLON { } expressionstatement : statementexpression SEMICOLON { }
returnstatement : RETURN SEMICOLON { } returnstatement : RETURN SEMICOLON { Return Nothing }
| RETURN expression SEMICOLON { } | RETURN expression SEMICOLON { Return $ Just $2 }
statementnoshortif : statementwithouttrailingsubstatement { } statementnoshortif : statementwithouttrailingsubstatement { }
| ifthenelsestatementnoshortif { } | ifthenelsestatementnoshortif { }
| whilestatementnoshortif { } | whilestatementnoshortif { }
conditionalexpression : conditionalorexpression { } conditionalexpression : conditionalorexpression { $1 }
| conditionalorexpression QUESMARK expression COLON conditionalexpression { } -- | conditionalorexpression QUESMARK expression COLON conditionalexpression { }
assignment : lefthandside assignmentoperator assignmentexpression { } assignment : lefthandside assignmentoperator assignmentexpression { }
@ -248,31 +257,31 @@ ifthenelsestatementnoshortif :IF LBRACE expression RBRACE statementnoshortif
whilestatementnoshortif : WHILE LBRACE expression RBRACE statementnoshortif { } whilestatementnoshortif : WHILE LBRACE expression RBRACE statementnoshortif { }
conditionalorexpression : conditionalandexpression { } conditionalorexpression : conditionalandexpression { $1 }
| conditionalorexpression LOGICALOR conditionalandexpression{ } -- | conditionalorexpression LOGICALOR conditionalandexpression{ }
lefthandside : name { } lefthandside : name { $1 }
assignmentoperator : ASSIGN{ } assignmentoperator : ASSIGN{ }
| TIMESEQUAL { } -- | TIMESEQUAL { }
| DIVIDEEQUAL { } -- | DIVIDEEQUAL { }
| MODULOEQUAL { } -- | MODULOEQUAL { }
| PLUSEQUAL { } -- | PLUSEQUAL { }
| MINUSEQUAL { } -- | MINUSEQUAL { }
| SHIFTLEFTEQUAL { } -- | SHIFTLEFTEQUAL { }
| SIGNEDSHIFTRIGHTEQUAL { } -- | SIGNEDSHIFTRIGHTEQUAL { }
| UNSIGNEDSHIFTRIGHTEQUAL { } -- | UNSIGNEDSHIFTRIGHTEQUAL { }
| ANDEQUAL { } -- | ANDEQUAL { }
| XOREQUAL { } -- | XOREQUAL { }
| OREQUAL{ } -- | OREQUAL{ }
preincrementexpression : INCREMENT unaryexpression { } preincrementexpression : INCREMENT unaryexpression { UnaryOperation PreIncrement $2 }
predecrementexpression : DECREMENT unaryexpression { } predecrementexpression : DECREMENT unaryexpression { UnaryOperation PreDecrement $2 }
postincrementexpression : postfixexpression INCREMENT { } postincrementexpression : postfixexpression INCREMENT { UnaryOperation PostIncrement $1 }
postdecrementexpression : postfixexpression DECREMENT { } postdecrementexpression : postfixexpression DECREMENT { UnaryOperation PostDecrement $1 }
methodinvocation : name LBRACE RBRACE { } methodinvocation : name LBRACE RBRACE { }
| name LBRACE argumentlist RBRACE { } | name LBRACE argumentlist RBRACE { }
@ -282,79 +291,86 @@ methodinvocation : name LBRACE RBRACE { }
classinstancecreationexpression : NEW classtype LBRACE RBRACE { } classinstancecreationexpression : NEW classtype LBRACE RBRACE { }
| NEW classtype LBRACE argumentlist RBRACE { } | NEW classtype LBRACE argumentlist RBRACE { }
conditionalandexpression : inclusiveorexpression { } conditionalandexpression : inclusiveorexpression { $1 }
fieldaccess : primary DOT IDENTIFIER { } fieldaccess : primary DOT IDENTIFIER { }
unaryexpression : preincrementexpression { } unaryexpression : unaryexpressionnotplusminus { $1 }
| predecrementexpression { } | predecrementexpression { $1 }
| PLUS unaryexpression { } | PLUS unaryexpression { $2 }
| MINUS unaryexpression { } | MINUS unaryexpression { UnaryOperation Minus $2 }
| unaryexpressionnotplusminus { } | preincrementexpression { $1 }
postfixexpression : primary { } postfixexpression : primary { $1 }
| name { } | name { Reference $1 }
| postincrementexpression { } | postincrementexpression { $1 }
| postdecrementexpression{ } | postdecrementexpression{ $1 }
primary : primarynonewarray { } primary : primarynonewarray { $1 }
inclusiveorexpression : exclusiveorexpression { } inclusiveorexpression : exclusiveorexpression { $1 }
| inclusiveorexpression OR exclusiveorexpression { } | inclusiveorexpression OR exclusiveorexpression { BinaryOperation Or $1 $3 }
primarynonewarray : literal { } primarynonewarray : literal { $1 }
| THIS { } -- | THIS { }
| LBRACE expression RBRACE { } -- | LBRACE expression RBRACE { }
| classinstancecreationexpression { } -- | classinstancecreationexpression { }
| fieldaccess { } -- | fieldaccess { }
| methodinvocation { } -- | methodinvocation { }
unaryexpressionnotplusminus : postfixexpression { } unaryexpressionnotplusminus : postfixexpression { $1 }
| TILDE unaryexpression { } -- | TILDE unaryexpression { }
| EXCLMARK unaryexpression { } | EXCLMARK unaryexpression { UnaryOperation Not $2 }
| castexpression{ } -- | castexpression{ }
exclusiveorexpression : andexpression { } exclusiveorexpression : andexpression { $1 }
| exclusiveorexpression XOR andexpression { } | exclusiveorexpression XOR andexpression { BinaryOperation BitwiseXor $1 $3 }
literal : INTLITERAL { } literal : INTLITERAL { IntegerLiteral $1 }
| BOOLLITERAL { } | BOOLLITERAL { BooleanLiteral $1 }
| CHARLITERAL { } | CHARLITERAL { CharacterLiteral $1 }
| STRINGLITERAL { } | JNULL { NullLiteral }
| JNULL { }
castexpression : LBRACE primitivetype RBRACE unaryexpression { } castexpression : LBRACE primitivetype RBRACE unaryexpression { }
| LBRACE expression RBRACE unaryexpressionnotplusminus{ } | LBRACE expression RBRACE unaryexpressionnotplusminus{ }
andexpression : equalityexpression { } andexpression : equalityexpression { $1 }
| andexpression AND equalityexpression { } | andexpression AND equalityexpression { BinaryOperation And $1 $3 }
equalityexpression : relationalexpression { } equalityexpression : relationalexpression { $1 }
| equalityexpression EQUAL relationalexpression { } | equalityexpression EQUAL relationalexpression { BinaryOperation CompareEqual $1 $3 }
| equalityexpression NOTEQUAL relationalexpression { } | equalityexpression NOTEQUAL relationalexpression { BinaryOperation CompareNotEqual $1 $3 }
relationalexpression : shiftexpression { } relationalexpression : shiftexpression { $1 }
| relationalexpression LESS shiftexpression { } | relationalexpression LESS shiftexpression { BinaryOperation CompareLessThan $1 $3 }
| relationalexpression GREATER shiftexpression { } | relationalexpression GREATER shiftexpression { BinaryOperation CompareGreaterThan $1 $3 }
| relationalexpression LESSEQUAL shiftexpression { } | relationalexpression LESSEQUAL shiftexpression { BinaryOperation CompareLessOrEqual $1 $3 }
| relationalexpression GREATEREQUAL shiftexpression { } | relationalexpression GREATEREQUAL shiftexpression { BinaryOperation CompareGreaterOrEqual $1 $3 }
| relationalexpression INSTANCEOF referencetype { } -- | relationalexpression INSTANCEOF referencetype { }
shiftexpression : additiveexpression { } shiftexpression : additiveexpression { $1 }
additiveexpression : multiplicativeexpression { } additiveexpression : multiplicativeexpression { $1 }
| additiveexpression PLUS multiplicativeexpression { } | additiveexpression PLUS multiplicativeexpression { BinaryOperation Addition $1 $3 }
| additiveexpression MINUS multiplicativeexpression { } | additiveexpression MINUS multiplicativeexpression { BinaryOperation Subtraction $1 $3 }
multiplicativeexpression : unaryexpression { } multiplicativeexpression : unaryexpression { $1 }
| multiplicativeexpression MUL unaryexpression { } | multiplicativeexpression MUL unaryexpression { BinaryOperation Multiplication $1 $3 }
| multiplicativeexpression DIV unaryexpression { } | multiplicativeexpression DIV unaryexpression { BinaryOperation Division $1 $3 }
| multiplicativeexpression MOD unaryexpression { } | multiplicativeexpression MOD unaryexpression { BinaryOperation Modulo $1 $3 }
{ {
parseError :: [Token] -> a data MethodOrFieldDeclaration = MethodDecl MethodDeclaration
parseError _ = error "Parse error" | FieldDecls [VariableDeclaration]
data Declarator = Declarator Identifier (Maybe Expression)
convertDeclarator :: DataType -> Declarator -> VariableDeclaration
convertDeclarator dataType (Declarator id assigment) = VariableDeclaration dataType id assigment
parseError :: ([Token], [String]) -> a
parseError (errortoken, expected) = error ("parse error on token: " ++ show errortoken ++ "\nexpected one of: " ++ show expected)
} }

View File

@ -1,5 +1,6 @@
{ {
module Parser.Lexer(Token(..), alexScanTokens) where module Parser.Lexer(Token(..), alexScanTokens) where
import Text.Read
} }
%wrapper "basic" %wrapper "basic"
@ -7,18 +8,221 @@
$digit = 0-9 $digit = 0-9
$alpha = [a-zA-Z] $alpha = [a-zA-Z]
$alphanum = [a-zA-Z0-9] $alphanum = [a-zA-Z0-9]
$JavaLetter = [A-Za-z\_\$]
$JavaLetterOrDigit = [A-Za-z\_\$0-9]
tokens :- tokens :-
$white ; $white ;
"/*"(.|\n)*"*/" { \s -> Comment s } "/*"(.|\n)*"*/" ;
"//".* {\s -> Comment s} "//".* ;
-- keywords
"abstract" { \_ -> ABSTRACT }
"assert" { \_ -> BOOLEAN }
"boolean" { \_ -> BOOLEAN}
"break" { \_ -> BREAK}
"byte" { \_ -> BYTE}
"case" { \_ -> CASE}
"catch" { \_ -> CATCH}
"char" { \_ -> CHAR}
"class" { \_ -> CLASS}
"const" { \_ -> CONST}
"continue" { \_ -> CONTINUE}
"default" { \_ -> DEFAULT}
"do" { \_ -> DO}
"double" { \_ -> DOUBLE}
("else"|"ifn't") { \_ -> ELSE}
"enum" { \_ -> ENUM}
"extends" { \_ -> EXTENDS}
"final" { \_ -> FINAL}
"finally" { \_ -> FINALLY}
"float" { \_ -> FLOAT}
"for" { \_ -> FOR}
"if" { \_ -> IF}
"goto" { \_ -> GOTO}
"implements" { \_ -> IMPLEMENTS}
"import" { \_ -> IMPORT}
"instanceof" { \_ -> INSTANCEOF}
"int" { \_ -> INT}
"long" { \_ -> LONG}
"native" { \_ -> NATIVE}
"new" { \_ -> NEW}
"package" { \_ -> PACKAGE}
"private" { \_ -> PRIVATE}
"protected" { \_ -> PROTECTED}
"public" { \_ -> PUBLIC}
"return" { \_ -> RETURN}
"short" { \_ -> SHORT}
"static" { \_ -> STATIC}
"strictfp" { \_ -> STRICTFP}
"super" { \_ -> SUPER}
"switch" { \_ -> SWITCH}
"synchronized" { \_ -> SYNCHRONIZED}
"this" { \_ -> THIS}
"throw" { \_ -> THROW}
"throws" { \_ -> THROWS}
"transient" { \_ -> TRANSIENT}
"try" { \_ -> TRY}
"void" { \_ -> VOID}
"volatile" { \_ -> VOLATILE}
"while" { \_ -> WHILE}
-- Literals
"true" { \_ -> BOOLLITERAL True }
"false" { \_ -> BOOLLITERAL False }
"null" { \_ -> NULLLITERAL }
-- end keywords
$JavaLetter$JavaLetterOrDigit* { \s -> IDENTIFIER s }
-- Literals
[1-9]([0-9\_]*[0-9])* { \s -> case readMaybe $ filter ((/=) '_') s of Just a -> INTEGERLITERAL a; Nothing -> error ("failed to parse INTLITERAL " ++ s) }
"'"."'" { \s -> case (s) of _ : c : _ -> CHARLITERAL c; _ -> error ("failed to parse CHARLITERAL " ++ s) }
-- separators
"(" { \_ -> LBRACE }
")" { \_ -> RBRACE }
"{" { \_ -> LBRACKET }
"}" { \_ -> RBRACKET }
";" { \_ -> SEMICOLON }
"," { \_ -> COMMA}
"." { \_ -> DOT }
-- operators
"=" { \_ -> ASSIGN }
"==" { \_ -> EQUAL }
"+" { \_ -> PLUS }
"+=" { \_ -> PLUSEQUAL }
">" { \_ -> GREATER }
">=" { \_ -> GREATEREQUAL }
"-" { \_ -> MINUS }
"-=" { \_ -> MINUSEQUAL }
"<" { \_ -> LESS }
"<=" { \_ -> LESSEQUAL }
"*" { \_ -> TIMES }
"*=" { \_ -> TIMESEQUAL }
"!" { \_ -> NOT }
"!=" { \_ -> NOTEQUAL }
"/" { \_ -> DIV }
"/=" { \_ -> DIVEQUAL }
"~" { \_ -> BITWISENOT }
"&&" { \_ -> AND }
"&" { \_ -> BITWISEAND }
"&=" { \_ -> ANDEQUAL }
"?" { \_ -> QUESTIONMARK }
"||" { \_ -> OR }
"|" { \_ -> BITWISEOR }
"|=" { \_ -> OREQUAL }
":" { \_ -> COLON }
"++" { \_ -> INCREMENT }
"^" { \_ -> XOR }
"^=" { \_ -> XOREQUAL }
"--" { \_ -> DECREMENT }
"%" { \_ -> MODULO }
"%=" { \_ -> MODULOEQUAL }
"<<" { \_ -> SHIFTLEFT }
"<<=" { \_ -> SHIFTLEFTEQUAL }
">>" { \_ -> SHIFTRIGHT }
">>=" { \_ -> SHIFTRIGHTEQUAL }
">>>" { \_ -> UNSIGNEDSHIFTRIGHT }
">>>=" { \_ -> UNSIGNEDSHIFTRIGHTEQUAL }
{ {
data Token data Token
= Comment String = ABSTRACT
| Different | ASSERT
| BOOLEAN
| BREAK
| BYTE
| CASE
| CATCH
| CHAR
| CLASS
| CONST
| CONTINUE
| DEFAULT
| DO
| DOUBLE
| ELSE
| ENUM
| EXTENDS
| FINAL
| FINALLY
| FLOAT
| FOR
| IF
| GOTO
| IMPLEMENTS
| IMPORT
| INSTANCEOF
| INT
| INTERFACE
| LONG
| NATIVE
| NEW
| PACKAGE
| PRIVATE
| PROTECTED
| PUBLIC
| RETURN
| SHORT
| STATIC
| STRICTFP
| SUPER
| SWITCH
| SYNCHRONIZED
| THIS
| THROW
| THROWS
| TRANSIENT
| TRY
| VOID
| VOLATILE
| WHILE
| IDENTIFIER String
| INTEGERLITERAL Int
| CHARLITERAL Char
| BOOLLITERAL Bool
| NULLLITERAL
| LBRACE
| RBRACE
| LBRACKET
| RBRACKET
| SEMICOLON
| COMMA
| DOT
| ASSIGN
| EQUAL
| PLUS
| PLUSEQUAL
| GREATER
| GREATEREQUAL
| MINUS
| MINUSEQUAL
| LESS
| LESSEQUAL
| TIMES
| TIMESEQUAL
| NOT
| NOTEQUAL
| DIV
| DIVEQUAL
| BITWISENOT
| AND
| BITWISEAND
| ANDEQUAL
| QUESTIONMARK
| OR
| BITWISEOR
| OREQUAL
| COLON
| INCREMENT
| XOR
| XOREQUAL
| DECREMENT
| MODULO
| MODULOEQUAL
| SHIFTLEFT
| SHIFTLEFTEQUAL
| SHIFTRIGHT
| SHIFTRIGHTEQUAL
| UNSIGNEDSHIFTRIGHT
| UNSIGNEDSHIFTRIGHTEQUAL
deriving(Eq,Show) deriving(Eq,Show)
} }