Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode

This commit is contained in:
mrab 2024-05-08 11:04:43 +02:00
commit 7cbf8aad08
6 changed files with 485 additions and 184 deletions

View File

@ -4,14 +4,46 @@ import Test.HUnit
import Parser.Lexer
testCommentSomething = TestCase $ assertEqual "scan /*Something*/" [Comment "/*Something*/"] $ alexScanTokens "/*Something*/"
testEmptyComment = TestCase $ assertEqual "scan /*x*/" [Comment "/**/"] $ alexScanTokens "/**/"
testLineComment = TestCase $ assertEqual "scan // comment" [Comment "// comment"] $ alexScanTokens "// comment"
emptyTokenList :: [Token]
emptyTokenList = []
testCommentSomething = TestCase $ assertEqual "scan '/*Something*/'" emptyTokenList $ alexScanTokens "/*Something*/"
testEmptyComment = TestCase $ assertEqual "scan '/*x*/'" emptyTokenList $ alexScanTokens "/**/"
testLineComment = TestCase $ assertEqual "scan '// comment'" emptyTokenList $ alexScanTokens "// comment"
testLineCommentEnds = TestCase $ assertEqual "scan '// com\\n'" emptyTokenList $ alexScanTokens "// com\n"
testIdentifier = TestCase $ assertEqual "scan 'identifier'" [IDENTIFIER "identifier"] $ alexScanTokens "identifier"
testShortIdentifier = TestCase $ assertEqual "scan 'i'" [IDENTIFIER "i"] $ alexScanTokens "i"
testIdentifierWithNumber = TestCase $ assertEqual "scan 'i2'" [IDENTIFIER "i2"] $ alexScanTokens "i2"
testKeywordBreak = TestCase $ assertEqual "scan 'break'" [BREAK] $ alexScanTokens "break"
testKeywordInt = TestCase $ assertEqual "scan 'int'" [INT] $ alexScanTokens "int"
testIntLiteral = TestCase $ assertEqual "scan '234'" [INTEGERLITERAL 234] $ alexScanTokens "234"
testIntLiteral2 = TestCase $ assertEqual "scan '54_2'" [INTEGERLITERAL 542] $ alexScanTokens "54_2"
testCharLiteral = TestCase $ assertEqual "scan ''f''" [CHARLITERAL 'f'] $ alexScanTokens "'f'"
testBoolLiteralTrue = TestCase $ assertEqual "scan 'true'" [BOOLLITERAL True] $ alexScanTokens "true"
testBoolLiteralFalse = TestCase $ assertEqual "scan 'false'" [BOOLLITERAL False] $ alexScanTokens "false"
testLBrace = TestCase $ assertEqual "scan '('" [LBRACE] $ alexScanTokens "("
testAnd = TestCase $ assertEqual "scan '&&'" [AND] $ alexScanTokens "&&"
tests = TestList [
TestLabel "TestCommentSomething" testCommentSomething,
TestLabel "TestEmptyComment" testEmptyComment,
TestLabel "TestLineComment" testLineComment
TestLabel "TestLineComment" testLineComment,
TestLabel "TestLineCommentEnds" testLineCommentEnds,
TestLabel "TestIdentifier" testIdentifier,
TestLabel "TestShortIdentifier" testShortIdentifier,
TestLabel "TestIdentifierWithNumber" testIdentifierWithNumber,
TestLabel "TestKeywordBreak" testKeywordBreak,
TestLabel "TestKeywordInt" testKeywordInt,
TestLabel "TestIntLiteral" testIntLiteral,
TestLabel "TestIntLiteral2" testIntLiteral2,
TestLabel "TestCharLiteral" testCharLiteral,
TestLabel "TestBoolLiteralTrue" testBoolLiteralTrue,
TestLabel "TestBoolLiteralFalse" testBoolLiteralFalse,
TestLabel "TestLBrace" testLBrace,
TestLabel "TestAnd" testAnd
]

28
Test/TestParser.hs Normal file
View File

@ -0,0 +1,28 @@
module TestParser(tests) where
import Test.HUnit
import Parser.Lexer
import Parser.JavaParser
import Ast
testSingleEmptyClass = TestCase $
assertEqual "expect single empty class hello" [Class "Hello" [] []] $
parse [CLASS, IDENTIFIER "Hello", LBRACKET, RBRACKET]
testTwoEmptyClasses = TestCase $
assertEqual "expect two empty classes" [Class "Class1" [] [], Class "Class2" [] []] $
parse [CLASS,IDENTIFIER "Class1",LBRACKET,RBRACKET,CLASS,IDENTIFIER "Class2",LBRACKET,RBRACKET]
testBooleanField = TestCase $
assertEqual "expect class with boolean field" [Class "WithBool" [] [VariableDeclaration "boolean" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithBool",LBRACKET,BOOLEAN,IDENTIFIER "value",SEMICOLON,RBRACKET]
testIntField = TestCase $
assertEqual "expect class with int field" [Class "WithInt" [] [VariableDeclaration "int" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithInt",LBRACKET,INT,IDENTIFIER "value",SEMICOLON,RBRACKET]
tests = TestList [
testSingleEmptyClass,
testTwoEmptyClasses,
testBooleanField,
testIntField
]

View File

@ -3,9 +3,12 @@ module Main where
import Test.HUnit
import TestLexer
import TestByteCodeGenerator
import TestParser
otherTest = TestCase $ assertEqual "math" (4+3) 7
tests = TestList [TestLabel "TestLexer" TestLexer.tests, TestLabel "mathTest" otherTest, TestLabel "bytecodeTest" TestByteCodeGenerator.tests]
tests = TestList [
TestLabel "TestLexer" TestLexer.tests,
TestLabel "TestParser" TestParser.tests,
TestLabel "TestByteCodeGenerator" TestByteCodeGenerator.tests]
main = do runTestTTAndExit Main.tests

View File

@ -16,7 +16,15 @@ executable compiler
src/ByteCode,
src/ByteCode/ClassFile
build-tool-depends: alex:alex, happy:happy
other-modules: Ast, Example, Typecheck, ByteCode.ByteUtil, ByteCode.ClassFile, ByteCode.ClassFile.Generator, ByteCode.Constants
other-modules: Parser.Lexer,
Parser.JavaParser
Ast,
Example,
Typecheck,
ByteCode.ByteUtil,
ByteCode.ClassFile,
ByteCode.ClassFile.Generator,
ByteCode.Constants
test-suite tests
type: exitcode-stdio-1.0
@ -28,4 +36,13 @@ test-suite tests
utf8-string,
bytestring
build-tool-depends: alex:alex, happy:happy
other-modules: TestLexer, TestByteCodeGenerator
other-modules: Parser.Lexer,
Parser.JavaParser,
Ast,
TestLexer,
TestParser,
TestByteCodeGenerator,
ByteCode.ByteUtil,
ByteCode.ClassFile,
ByteCode.ClassFile.Generator,
ByteCode.Constants

View File

@ -1,6 +1,6 @@
{
module Parser.JavaParser (parse) where
--import AbsSyn
import Ast
import Parser.Lexer
}
@ -14,10 +14,10 @@ import Parser.Lexer
CASE { CASE }
CHAR { CHAR }
CLASS { CLASS}
IDENTIFIER { IDENTIFIER $$}
INTLITERAL { INTLITERAL $$}
IDENTIFIER { IDENTIFIER $$ }
INTLITERAL { INTEGERLITERAL $$}
DOT { DOT }
MOD { MOD }
MOD { MODULO }
TIMESEQUAL { TIMESEQUAL }
GREATEREQUAL { GREATEREQUAL }
WHILE { WHILE }
@ -29,30 +29,29 @@ import Parser.Lexer
THIS { THIS }
STATIC { STATIC }
PROTECTED { PROTECTED }
TILDE { TILDE }
MUL { MUL }
TILDE { BITWISENOT }
MUL { TIMES }
MINUS { MINUS }
EXCLMARK { EXCLMARK }
EXCLMARK { NOT }
IF { IF }
ELSE { ELSE }
DIVIDEEQUAL { DIVIDEEQUAL }
DIVIDEEQUAL { DIVEQUAL }
NEW { NEW }
LBRACKET { LBRACKET }
JNULL { JNULL }
BOOLLITERAL { BOOLLITERAL }
JNULL { NULLLITERAL }
BOOLLITERAL { BOOLLITERAL $$ }
DIV { DIV }
LOGICALOR { LOGICALOR }
LOGICALOR { OR }
NOTEQUAL { NOTEQUAL }
INSTANCEOF { INSTANCEOF }
ANDEQUAL { ANDEQUAL }
ASSIGN { ASSIGN }
DECREMENT { DECREMENT }
STRINGLITERAL { STRINGLITERAL }
CHARLITERAL { CHARLITERAL }
CHARLITERAL { CHARLITERAL $$ }
AND { AND }
XOREQUAL { XOREQUAL }
RETURN { RETURN }
QUESMARK { QUESMARK }
QUESMARK { QUESTIONMARK }
SHIFTLEFTEQUAL { SHIFTLEFTEQUAL }
RBRACKET { RBRACKET }
COMMA { COMMA }
@ -68,7 +67,7 @@ import Parser.Lexer
INT { INT }
ABSTRACT { ABSTRACT }
SEMICOLON { SEMICOLON }
SIGNEDSHIFTRIGHTEQUAL { SIGNEDSHIFTRIGHTEQUAL }
SIGNEDSHIFTRIGHTEQUAL { SHIFTRIGHTEQUAL }
UNSIGNEDSHIFTRIGHTEQUAL { UNSIGNEDSHIFTRIGHTEQUAL }
PLUSEQUAL { PLUSEQUAL }
OREQUAL { OREQUAL }
@ -76,31 +75,39 @@ import Parser.Lexer
LESS { LESS }
%%
compilationunit : typedeclarations { }
compilationunit : typedeclarations { $1 }
typedeclarations : typedeclaration { }
| typedeclarations typedeclaration { }
typedeclarations : typedeclaration { [$1] }
| typedeclarations typedeclaration { $1 ++ [$2] }
name : qualifiedname { }
| simplename { }
typedeclaration : classdeclaration { }
typedeclaration : classdeclaration { $1 }
qualifiedname : name DOT IDENTIFIER { }
simplename : IDENTIFIER { }
classdeclaration : CLASS IDENTIFIER classbody { }
| modifiers CLASS IDENTIFIER classbody { }
classdeclaration : CLASS IDENTIFIER classbody { case $3 of (methods, fields) -> Class $2 methods fields }
-- | modifiers CLASS IDENTIFIER classbody { case $4 of (methods, fields) -> Class $3 methods fields }
classbody : LBRACKET RBRACKET { ([], []) }
| LBRACKET classbodydeclarations RBRACKET { }
| LBRACKET classbodydeclarations RBRACKET { $2 }
modifiers : modifier { }
| modifiers modifier { }
classbodydeclarations : classbodydeclaration { }
| classbodydeclarations classbodydeclaration{ }
classbodydeclarations : classbodydeclaration {
case $1 of
MethodDecl method -> ([method], [])
FieldDecls fields -> ([], fields)
}
-- | classbodydeclarations classbodydeclaration {
-- case ($1, $2) of
-- ((methods, fields), MethodDecl method) -> ((methods ++ [method]), fields)
-- ((methods, fields), FieldDecl field) -> (methods, (fields ++ [field]))
-- }
modifier : PUBLIC { }
| PROTECTED { }
@ -110,19 +117,19 @@ modifier : PUBLIC { }
classtype : classorinterfacetype{ }
classbodydeclaration : classmemberdeclaration { }
| constructordeclaration { }
classbodydeclaration : classmemberdeclaration { $1 }
-- | constructordeclaration { FieldDecl $ VariableDeclaration "int" "a" Nothing } -- TODO
classorinterfacetype : name{ }
classmemberdeclaration : fielddeclaration { }
| methoddeclaration { }
classmemberdeclaration : fielddeclaration { $1 }
-- | methoddeclaration { }
constructordeclaration : constructordeclarator constructorbody { }
| modifiers constructordeclarator constructorbody { }
fielddeclaration : type variabledeclarators SEMICOLON { }
| modifiers type variabledeclarators SEMICOLON { }
fielddeclaration : type variabledeclarators SEMICOLON { FieldDecls $ map (convertDeclarator $1) $2 }
-- | modifiers type variabledeclarators SEMICOLON {}
methoddeclaration : methodheader methodbody { }
@ -142,11 +149,11 @@ methodheader : type methoddeclarator { }
| VOID methoddeclarator { }
| modifiers VOID methoddeclarator { }
type : primitivetype { }
| referencetype { }
type : primitivetype { $1 }
-- | referencetype { }
variabledeclarators : variabledeclarator { }
| variabledeclarators COMMA variabledeclarator { }
variabledeclarators : variabledeclarator { [$1] }
-- | variabledeclarators COMMA variabledeclarator { $1 ++ [$3] }
methodbody : block { }
| SEMICOLON { }
@ -166,14 +173,14 @@ classtypelist : classtype { }
methoddeclarator : IDENTIFIER LBRACE RBRACE { }
| IDENTIFIER LBRACE formalparameterlist RBRACE { }
primitivetype : BOOLEAN { }
| numerictype { }
primitivetype : BOOLEAN { "boolean" }
| numerictype { $1 }
referencetype : classorinterfacetype { }
variabledeclarator : variabledeclaratorid { }
| variabledeclaratorid ASSIGN variableinitializer { }
variabledeclarator : variabledeclaratorid { Declarator $1 Nothing }
-- | variabledeclaratorid ASSIGN variableinitializer { Declarator $1 Nothing } -- TODO
blockstatement : localvariabledeclarationstatement { }
| statement { }
@ -183,9 +190,9 @@ formalparameter : type variabledeclaratorid { }
argumentlist : expression { }
| argumentlist COMMA expression { }
numerictype : integraltype { }
numerictype : integraltype { $1 }
variabledeclaratorid : IDENTIFIER { }
variabledeclaratorid : IDENTIFIER { $1 }
variableinitializer : expression { }
@ -199,8 +206,8 @@ statement : statementwithouttrailingsubstatement{ }
expression : assignmentexpression { }
integraltype : INT { }
| CHAR { }
integraltype : INT { "int" }
| CHAR { "char" }
localvariabledeclaration : type variabledeclarators { }
@ -320,7 +327,6 @@ exclusiveorexpression : andexpression { }
literal : INTLITERAL { }
| BOOLLITERAL { }
| CHARLITERAL { }
| STRINGLITERAL { }
| JNULL { }
castexpression : LBRACE primitivetype RBRACE unaryexpression { }
@ -354,7 +360,18 @@ multiplicativeexpression : unaryexpression { }
{
data MethodOrFieldDeclaration = MethodDecl MethodDeclaration
| FieldDecls [VariableDeclaration]
data Declarator = Declarator Identifier (Maybe Expression)
-- convertDeclaratorList :: [DataType] -> MethodOrFieldDeclaration
-- convertDeclaratorList = FieldDecls $ map
convertDeclarator :: DataType -> Declarator -> VariableDeclaration
convertDeclarator dataType (Declarator id assigment) = VariableDeclaration dataType id assigment
parseError :: [Token] -> a
parseError _ = error "Parse error"
parseError msg = error ("Parse error: " ++ show msg)
}

View File

@ -1,5 +1,6 @@
{
module Parser.Lexer(Token(..), alexScanTokens) where
module Parser.Lexer(Token(..), alexScanTokens) where
import Text.Read
}
%wrapper "basic"
@ -7,18 +8,221 @@
$digit = 0-9
$alpha = [a-zA-Z]
$alphanum = [a-zA-Z0-9]
$JavaLetter = [A-Za-z\_\$]
$JavaLetterOrDigit = [A-Za-z\_\$0-9]
tokens :-
$white ;
"/*"(.|\n)*"*/" { \s -> Comment s }
"//".* {\s -> Comment s}
"/*"(.|\n)*"*/" ;
"//".* ;
-- keywords
"abstract" { \_ -> ABSTRACT }
"assert" { \_ -> BOOLEAN }
"boolean" { \_ -> BOOLEAN}
"break" { \_ -> BREAK}
"byte" { \_ -> BYTE}
"case" { \_ -> CASE}
"catch" { \_ -> CATCH}
"char" { \_ -> CHAR}
"class" { \_ -> CLASS}
"const" { \_ -> CONST}
"continue" { \_ -> CONTINUE}
"default" { \_ -> DEFAULT}
"do" { \_ -> DO}
"double" { \_ -> DOUBLE}
("else"|"ifn't") { \_ -> ELSE}
"enum" { \_ -> ENUM}
"extends" { \_ -> EXTENDS}
"final" { \_ -> FINAL}
"finally" { \_ -> FINALLY}
"float" { \_ -> FLOAT}
"for" { \_ -> FOR}
"if" { \_ -> IF}
"goto" { \_ -> GOTO}
"implements" { \_ -> IMPLEMENTS}
"import" { \_ -> IMPORT}
"instanceof" { \_ -> INSTANCEOF}
"int" { \_ -> INT}
"long" { \_ -> LONG}
"native" { \_ -> NATIVE}
"new" { \_ -> NEW}
"package" { \_ -> PACKAGE}
"private" { \_ -> PRIVATE}
"protected" { \_ -> PROTECTED}
"public" { \_ -> PUBLIC}
"return" { \_ -> RETURN}
"short" { \_ -> SHORT}
"static" { \_ -> STATIC}
"strictfp" { \_ -> STRICTFP}
"super" { \_ -> SUPER}
"switch" { \_ -> SWITCH}
"synchronized" { \_ -> SYNCHRONIZED}
"this" { \_ -> THIS}
"throw" { \_ -> THROW}
"throws" { \_ -> THROWS}
"transient" { \_ -> TRANSIENT}
"try" { \_ -> TRY}
"void" { \_ -> VOID}
"volatile" { \_ -> VOLATILE}
"while" { \_ -> WHILE}
-- Literals
"true" { \_ -> BOOLLITERAL True }
"false" { \_ -> BOOLLITERAL False }
"null" { \_ -> NULLLITERAL }
-- end keywords
$JavaLetter$JavaLetterOrDigit* { \s -> IDENTIFIER s }
-- Literals
[1-9]([0-9\_]*[0-9])* { \s -> case readMaybe $ filter ((/=) '_') s of Just a -> INTEGERLITERAL a; Nothing -> error ("failed to parse INTLITERAL " ++ s) }
"'"."'" { \s -> case (s) of _ : c : _ -> CHARLITERAL c; _ -> error ("failed to parse CHARLITERAL " ++ s) }
-- separators
"(" { \_ -> LBRACE }
")" { \_ -> RBRACE }
"{" { \_ -> LBRACKET }
"}" { \_ -> RBRACKET }
";" { \_ -> SEMICOLON }
"," { \_ -> COMMA}
"." { \_ -> DOT }
-- operators
"=" { \_ -> ASSIGN }
"==" { \_ -> EQUAL }
"+" { \_ -> PLUS }
"+=" { \_ -> PLUSEQUAL }
">" { \_ -> GREATER }
">=" { \_ -> GREATEREQUAL }
"-" { \_ -> MINUS }
"-=" { \_ -> MINUSEQUAL }
"<" { \_ -> LESS }
"<=" { \_ -> LESSEQUAL }
"*" { \_ -> TIMES }
"*=" { \_ -> TIMESEQUAL }
"!" { \_ -> NOT }
"!=" { \_ -> NOTEQUAL }
"/" { \_ -> DIV }
"/=" { \_ -> DIVEQUAL }
"~" { \_ -> BITWISENOT }
"&&" { \_ -> AND }
"&" { \_ -> BITWISEAND }
"&=" { \_ -> ANDEQUAL }
"?" { \_ -> QUESTIONMARK }
"||" { \_ -> OR }
"|" { \_ -> BITWISEOR }
"|=" { \_ -> OREQUAL }
":" { \_ -> COLON }
"++" { \_ -> INCREMENT }
"^" { \_ -> XOR }
"^=" { \_ -> XOREQUAL }
"--" { \_ -> DECREMENT }
"%" { \_ -> MODULO }
"%=" { \_ -> MODULOEQUAL }
"<<" { \_ -> SHIFTLEFT }
"<<=" { \_ -> SHIFTLEFTEQUAL }
">>" { \_ -> SHIFTRIGHT }
">>=" { \_ -> SHIFTRIGHTEQUAL }
">>>" { \_ -> UNSIGNEDSHIFTRIGHT }
">>>=" { \_ -> UNSIGNEDSHIFTRIGHTEQUAL }
{
data Token
= Comment String
| Different
= ABSTRACT
| ASSERT
| BOOLEAN
| BREAK
| BYTE
| CASE
| CATCH
| CHAR
| CLASS
| CONST
| CONTINUE
| DEFAULT
| DO
| DOUBLE
| ELSE
| ENUM
| EXTENDS
| FINAL
| FINALLY
| FLOAT
| FOR
| IF
| GOTO
| IMPLEMENTS
| IMPORT
| INSTANCEOF
| INT
| INTERFACE
| LONG
| NATIVE
| NEW
| PACKAGE
| PRIVATE
| PROTECTED
| PUBLIC
| RETURN
| SHORT
| STATIC
| STRICTFP
| SUPER
| SWITCH
| SYNCHRONIZED
| THIS
| THROW
| THROWS
| TRANSIENT
| TRY
| VOID
| VOLATILE
| WHILE
| IDENTIFIER String
| INTEGERLITERAL Int
| CHARLITERAL Char
| BOOLLITERAL Bool
| NULLLITERAL
| LBRACE
| RBRACE
| LBRACKET
| RBRACKET
| SEMICOLON
| COMMA
| DOT
| ASSIGN
| EQUAL
| PLUS
| PLUSEQUAL
| GREATER
| GREATEREQUAL
| MINUS
| MINUSEQUAL
| LESS
| LESSEQUAL
| TIMES
| TIMESEQUAL
| NOT
| NOTEQUAL
| DIV
| DIVEQUAL
| BITWISENOT
| AND
| BITWISEAND
| ANDEQUAL
| QUESTIONMARK
| OR
| BITWISEOR
| OREQUAL
| COLON
| INCREMENT
| XOR
| XOREQUAL
| DECREMENT
| MODULO
| MODULOEQUAL
| SHIFTLEFT
| SHIFTLEFTEQUAL
| SHIFTRIGHT
| SHIFTRIGHTEQUAL
| UNSIGNEDSHIFTRIGHT
| UNSIGNEDSHIFTRIGHTEQUAL
deriving(Eq,Show)
}