From ee302bb2456306dfe252db51555898871b454746 Mon Sep 17 00:00:00 2001 From: Matthias Raba Date: Tue, 18 Jun 2024 07:37:01 +0200 Subject: [PATCH 1/9] maxLocals calculation --- src/ByteCode/Builder.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ByteCode/Builder.hs b/src/ByteCode/Builder.hs index dfebf89..258592a 100644 --- a/src/ByteCode/Builder.hs +++ b/src/ByteCode/Builder.hs @@ -69,18 +69,19 @@ methodAssembler (MethodDeclaration returntype name parameters statement) input = in case (splitAt index (methods input)) of (pre, []) -> input (pre, method : post) -> let - (_, bytecode, _) = assembleMethod (constantPool input, [], paramNames) declaration + (constants, bytecode, aParamNames) = assembleMethod (constantPool input, [], paramNames) declaration assembledMethod = method { memberAttributes = [ CodeAttribute { attributeMaxStack = 420, - attributeMaxLocals = 420, + attributeMaxLocals = fromIntegral $ length aParamNames, attributeCode = bytecode } ] } in input { + constantPool = constants, methods = pre ++ (assembledMethod : post) } -- 2.34.1 From 8a6dca4e3610ee4575d5c3945fca88d809aab1c5 Mon Sep 17 00:00:00 2001 From: MisterChaos96 Date: Thu, 20 Jun 2024 08:38:56 +0200 Subject: [PATCH 2/9] fix null not accepted for object method parameter --- src/Typecheck.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Typecheck.hs b/src/Typecheck.hs index 0be409f..7564de7 100644 --- a/src/Typecheck.hs +++ b/src/Typecheck.hs @@ -171,7 +171,7 @@ typeCheckStatementExpression (MethodCall expr methodName args) symtab classes = let args' = map (\arg -> typeCheckExpression arg symtab classes) args expectedTypes = [dataType | ParameterDeclaration dataType _ <- params] argTypes = map getTypeFromExpr args' - typeMatches = zipWith (\expType argType -> (expType == argType, expType, argType)) expectedTypes argTypes + typeMatches = zipWith (\expType argType -> (expType == argType || (argType == "null" && isObjectType expType), expType, argType)) expectedTypes argTypes mismatches = filter (not . fst3) typeMatches where fst3 (a, _, _) = a in @@ -186,6 +186,7 @@ typeCheckStatementExpression (MethodCall expr methodName args) symtab classes = Nothing -> error $ "Class for object type '" ++ objType ++ "' not found." _ -> error "Invalid object type for method call. Object must have a class type." + typeCheckStatementExpression (PostIncrement expr) symtab classes = let expr' = typeCheckExpression expr symtab classes type' = getTypeFromExpr expr' -- 2.34.1 From bcbec9209a26aea53efd274118bf5adf65430b6c Mon Sep 17 00:00:00 2001 From: MisterChaos96 Date: Thu, 20 Jun 2024 11:26:38 +0200 Subject: [PATCH 3/9] fix nullliterals having the type null instead of their corresponding object types --- src/Typecheck.hs | 62 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/Typecheck.hs b/src/Typecheck.hs index 7564de7..b3e3203 100644 --- a/src/Typecheck.hs +++ b/src/Typecheck.hs @@ -37,14 +37,18 @@ typeCheckVariableDeclaration (VariableDeclaration dataType identifier maybeExpr) -- Type check the initializer expression if it exists checkedExpr = fmap (\expr -> typeCheckExpression expr symtab classes) maybeExpr exprType = fmap getTypeFromExpr checkedExpr + checkedExprWithType = case exprType of + Just "null" | isObjectType dataType -> Just (TypedExpression dataType NullLiteral) + _ -> checkedExpr in case (validType, redefined, exprType) of (False, _, _) -> error $ "Type '" ++ dataType ++ "' is not a valid type for variable '" ++ identifier ++ "'" (_, True, _) -> error $ "Variable '" ++ identifier ++ "' is redefined in the same scope" (_, _, Just t) - | t == "null" && isObjectType dataType -> VariableDeclaration dataType identifier checkedExpr + | t == "null" && isObjectType dataType -> VariableDeclaration dataType identifier checkedExprWithType | t /= dataType -> error $ "Type mismatch in declaration of '" ++ identifier ++ "': expected " ++ dataType ++ ", found " ++ t - | otherwise -> VariableDeclaration dataType identifier checkedExpr - (_, _, Nothing) -> VariableDeclaration dataType identifier checkedExpr + | otherwise -> VariableDeclaration dataType identifier checkedExprWithType + (_, _, Nothing) -> VariableDeclaration dataType identifier checkedExprWithType + -- ********************************** Type Checking: Expressions ********************************** @@ -125,11 +129,14 @@ typeCheckStatementExpression (Assignment ref expr) symtab classes = ref' = typeCheckExpression ref symtab classes type' = getTypeFromExpr expr' type'' = getTypeFromExpr ref' + typeToAssign = if type' == "null" && isObjectType type'' then type'' else type' + exprWithType = if type' == "null" && isObjectType type'' then TypedExpression type'' NullLiteral else expr' in - if type'' == type' || (type' == "null" && isObjectType type'') then - TypedStatementExpression type'' (Assignment ref' expr') + if type'' == typeToAssign then + TypedStatementExpression type'' (Assignment ref' exprWithType) else - error $ "Type mismatch in assignment to variable: expected " ++ type'' ++ ", found " ++ type' + error $ "Type mismatch in assignment to variable: expected " ++ type'' ++ ", found " ++ typeToAssign + typeCheckStatementExpression (ConstructorCall className args) symtab classes = case find (\(Class name _ _) -> name == className) classes of @@ -168,25 +175,30 @@ typeCheckStatementExpression (MethodCall expr methodName args) symtab classes = Just (Class _ methods _) -> case find (\(MethodDeclaration retType name params _) -> name == methodName) methods of Just (MethodDeclaration retType _ params _) -> - let args' = map (\arg -> typeCheckExpression arg symtab classes) args + let args' = zipWith + (\arg (ParameterDeclaration paramType _) -> + let argTyped = typeCheckExpression arg symtab classes + in if getTypeFromExpr argTyped == "null" && isObjectType paramType + then TypedExpression paramType NullLiteral + else argTyped + ) args params expectedTypes = [dataType | ParameterDeclaration dataType _ <- params] argTypes = map getTypeFromExpr args' - typeMatches = zipWith (\expType argType -> (expType == argType || (argType == "null" && isObjectType expType), expType, argType)) expectedTypes argTypes + typeMatches = zipWith + (\expType argType -> (expType == argType || (argType == "null" && isObjectType expType), expType, argType)) + expectedTypes argTypes mismatches = filter (not . fst3) typeMatches - where fst3 (a, _, _) = a - in - if null mismatches && length args == length params then - TypedStatementExpression retType (MethodCall objExprTyped methodName args') - else if not (null mismatches) then - error $ unlines $ ("Argument type mismatches for method '" ++ methodName ++ "':") + fst3 (a, _, _) = a + in if null mismatches && length args == length params + then TypedStatementExpression retType (MethodCall objExprTyped methodName args') + else if not (null mismatches) + then error $ unlines $ ("Argument type mismatches for method '" ++ methodName ++ "':") : [ "Expected: " ++ expType ++ ", Found: " ++ argType | (_, expType, argType) <- mismatches ] - else - error $ "Incorrect number of arguments for method '" ++ methodName ++ "'. Expected " ++ show (length expectedTypes) ++ ", found " ++ show (length args) ++ "." + else error $ "Incorrect number of arguments for method '" ++ methodName ++ "'. Expected " ++ show (length expectedTypes) ++ ", found " ++ show (length args) ++ "." Nothing -> error $ "Method '" ++ methodName ++ "' not found in class '" ++ objType ++ "'." Nothing -> error $ "Class for object type '" ++ objType ++ "' not found." _ -> error "Invalid object type for method call. Object must have a class type." - - + typeCheckStatementExpression (PostIncrement expr) symtab classes = let expr' = typeCheckExpression expr symtab classes type' = getTypeFromExpr expr' @@ -252,14 +264,18 @@ typeCheckStatement (LocalVariableDeclaration (VariableDeclaration dataType ident -- If there's an initializer expression, type check it let checkedExpr = fmap (\expr -> typeCheckExpression expr symtab classes) maybeExpr exprType = fmap getTypeFromExpr checkedExpr + checkedExprWithType = case (exprType, dataType) of + (Just "null", _) | isObjectType dataType -> Just (TypedExpression dataType NullLiteral) + _ -> checkedExpr in case exprType of Just t | t == "null" && isObjectType dataType -> - TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExpr)) + TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExprWithType)) | t /= dataType -> error $ "Type mismatch in declaration of '" ++ identifier ++ "': expected " ++ dataType ++ ", found " ++ t - | otherwise -> TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExpr)) + | otherwise -> TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExprWithType)) Nothing -> TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExpr)) + typeCheckStatement (While cond stmt) symtab classes = let cond' = typeCheckExpression cond symtab classes stmt' = typeCheckStatement stmt symtab classes @@ -306,13 +322,17 @@ typeCheckStatement (Block statements) symtab classes = typeCheckStatement (Return expr) symtab classes = let methodReturnType = fromMaybe (error "Method return type not found in symbol table") (lookup "thisMeth" symtab) expr' = case expr of - Just e -> Just (typeCheckExpression e symtab classes) + Just e -> let eTyped = typeCheckExpression e symtab classes + in if getTypeFromExpr eTyped == "null" && isObjectType methodReturnType + then Just (TypedExpression methodReturnType NullLiteral) + else Just eTyped Nothing -> Nothing returnType = maybe "void" getTypeFromExpr expr' in if returnType == methodReturnType || isSubtype returnType methodReturnType classes then TypedStatement returnType (Return expr') else error $ "Return: Return type mismatch: expected " ++ methodReturnType ++ ", found " ++ returnType + typeCheckStatement (StatementExpressionStatement stmtExpr) symtab classes = let stmtExpr' = typeCheckStatementExpression stmtExpr symtab classes in TypedStatement (getTypeFromStmtExpr stmtExpr') (StatementExpressionStatement stmtExpr') -- 2.34.1 From faf3d1674e375a715383a39ac50d805d1e5b17f4 Mon Sep 17 00:00:00 2001 From: MisterChaos96 Date: Thu, 20 Jun 2024 11:52:35 +0200 Subject: [PATCH 4/9] fix constructor not handling nullliterals correctly --- src/Typecheck.hs | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/Typecheck.hs b/src/Typecheck.hs index b3e3203..5ccc7d3 100644 --- a/src/Typecheck.hs +++ b/src/Typecheck.hs @@ -141,9 +141,9 @@ typeCheckStatementExpression (Assignment ref expr) symtab classes = typeCheckStatementExpression (ConstructorCall className args) symtab classes = case find (\(Class name _ _) -> name == className) classes of Nothing -> error $ "Class '" ++ className ++ "' not found." - Just (Class _ methods fields) -> + Just (Class _ methods _) -> -- Find constructor matching the class name with void return type - case find (\(MethodDeclaration retType name params _) -> name == "" && retType == "void") methods of + case find (\(MethodDeclaration _ name params _) -> name == "") methods of -- If no constructor is found, assume standard constructor with no parameters Nothing -> if null args then @@ -151,21 +151,28 @@ typeCheckStatementExpression (ConstructorCall className args) symtab classes = else error $ "No valid constructor found for class '" ++ className ++ "', but arguments were provided." Just (MethodDeclaration _ _ params _) -> - let - args' = map (\arg -> typeCheckExpression arg symtab classes) args - -- Extract expected parameter types from the constructor's parameters - expectedTypes = [dataType | ParameterDeclaration dataType _ <- params] - argTypes = map getTypeFromExpr args' - -- Check if the types of the provided arguments match the expected types - typeMatches = zipWith (\expected actual -> if expected == actual then Nothing else Just (expected, actual)) expectedTypes argTypes - mismatchErrors = map (\(exp, act) -> "Expected type '" ++ exp ++ "', found '" ++ act ++ "'.") (catMaybes typeMatches) + let args' = zipWith + (\arg (ParameterDeclaration paramType _) -> + let argTyped = typeCheckExpression arg symtab classes + in if getTypeFromExpr argTyped == "null" && isObjectType paramType + then TypedExpression paramType NullLiteral + else argTyped + ) args params + expectedTypes = [dataType | ParameterDeclaration dataType _ <- params] + argTypes = map getTypeFromExpr args' + typeMatches = zipWith + (\expType argType -> (expType == argType || (argType == "null" && isObjectType expType), expType, argType)) + expectedTypes argTypes + mismatches = filter (not . fst3) typeMatches + fst3 (a, _, _) = a in - if length args /= length params then - error $ "Constructor for class '" ++ className ++ "' expects " ++ show (length params) ++ " arguments, but got " ++ show (length args) ++ "." - else if not (null mismatchErrors) then - error $ unlines $ ("Type mismatch in constructor arguments for class '" ++ className ++ "':") : mismatchErrors - else + if null mismatches && length args == length params then TypedStatementExpression className (ConstructorCall className args') + else if not (null mismatches) then + error $ unlines $ ("Type mismatch in constructor arguments for class '" ++ className ++ "':") + : [ "Expected: " ++ expType ++ ", Found: " ++ argType | (_, expType, argType) <- mismatches ] + else + error $ "Incorrect number of arguments for constructor of class '" ++ className ++ "'. Expected " ++ show (length expectedTypes) ++ ", found " ++ show (length args) ++ "." typeCheckStatementExpression (MethodCall expr methodName args) symtab classes = let objExprTyped = typeCheckExpression expr symtab classes -- 2.34.1 From 29faab5112631f5e14098907df031f31cb78ea20 Mon Sep 17 00:00:00 2001 From: Matthias Raba Date: Thu, 20 Jun 2024 15:07:02 +0200 Subject: [PATCH 5/9] maxStack calculation --- Test/JavaSources/Main.java | 2 +- src/ByteCode/Builder.hs | 5 +- src/ByteCode/Util.hs | 114 +++++++++++++++++++++++++++++++++---- 3 files changed, 106 insertions(+), 15 deletions(-) diff --git a/Test/JavaSources/Main.java b/Test/JavaSources/Main.java index ca1eb22..aa84965 100644 --- a/Test/JavaSources/Main.java +++ b/Test/JavaSources/Main.java @@ -1,7 +1,7 @@ // compile all test files using: // ls Test/JavaSources/*.java | grep -v ".*Main.java" | xargs -I {} cabal run compiler {} // compile (in project root) using: -// javac -g:none -sourcepath Test/JavaSources/ Test/JavaSources/Main.java +// javac -g:none -sourcepath Test/JavaSources/ Test/JavaSources/Main.java // afterwards, run using // java -ea -cp Test/JavaSources/ Main diff --git a/src/ByteCode/Builder.hs b/src/ByteCode/Builder.hs index 258592a..f7e9e00 100644 --- a/src/ByteCode/Builder.hs +++ b/src/ByteCode/Builder.hs @@ -50,14 +50,13 @@ methodBuilder (MethodDeclaration returntype name parameters statement) input = l memberDescriptorIndex = (fromIntegral (baseIndex + 3)), memberAttributes = [] } - in + in input { constantPool = (constantPool input) ++ constants, methods = (methods input) ++ [method] } - methodAssembler :: ClassFileBuilder MethodDeclaration methodAssembler (MethodDeclaration returntype name parameters statement) input = let methodConstantIndex = findMethodIndex input name @@ -73,7 +72,7 @@ methodAssembler (MethodDeclaration returntype name parameters statement) input = assembledMethod = method { memberAttributes = [ CodeAttribute { - attributeMaxStack = 420, + attributeMaxStack = fromIntegral $ maxStackDepth constants bytecode, attributeMaxLocals = fromIntegral $ length aParamNames, attributeCode = bytecode } diff --git a/src/ByteCode/Util.hs b/src/ByteCode/Util.hs index 45e8816..4271e11 100644 --- a/src/ByteCode/Util.hs +++ b/src/ByteCode/Util.hs @@ -4,7 +4,7 @@ import Data.Int import Ast import ByteCode.ClassFile import Data.List -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, isJust) import Data.Word (Word8, Word16, Word32) -- walks the name resolution chain. returns the innermost Just LocalVariable/FieldVariable or Nothing. @@ -12,13 +12,13 @@ resolveNameChain :: Expression -> Expression resolveNameChain (TypedExpression _ (BinaryOperation NameResolution a b)) = resolveNameChain b resolveNameChain (TypedExpression dtype (LocalVariable name)) = (TypedExpression dtype (LocalVariable name)) resolveNameChain (TypedExpression dtype (FieldVariable name)) = (TypedExpression dtype (FieldVariable name)) -resolveNameChain invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show(invalidExpression)) +resolveNameChain invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show (invalidExpression)) -- walks the name resolution chain. returns the second-to-last item of the namechain. resolveNameChainOwner :: Expression -> Expression resolveNameChainOwner (TypedExpression _ (BinaryOperation NameResolution a (TypedExpression dtype (FieldVariable name)))) = a resolveNameChainOwner (TypedExpression _ (BinaryOperation NameResolution a b)) = resolveNameChain b -resolveNameChainOwner invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show(invalidExpression)) +resolveNameChainOwner invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show (invalidExpression)) methodDescriptor :: MethodDeclaration -> String @@ -39,10 +39,39 @@ methodDescriptorFromParamlist parameters returntype = let ++ ")" ++ datatypeDescriptor returntype +parseMethodType :: ([String], String) -> String -> ([String], String) +parseMethodType (params, returnType) ('(' : descriptor) = parseMethodType (params, returnType) descriptor +parseMethodType (params, returnType) ('I' : descriptor) = parseMethodType (params ++ ["I"], returnType) descriptor +parseMethodType (params, returnType) ('C' : descriptor) = parseMethodType (params ++ ["C"], returnType) descriptor +parseMethodType (params, returnType) ('B' : descriptor) = parseMethodType (params ++ ["B"], returnType) descriptor +parseMethodType (params, returnType) ('L' : descriptor) = let + typeLength = elemIndex ';' descriptor + in case typeLength of + Just length -> let + (typeName, semicolon : restOfDescriptor) = splitAt length descriptor + in + parseMethodType (params ++ [typeName], returnType) restOfDescriptor + Nothing -> error $ "unterminated class type in function signature: " ++ (show descriptor) +parseMethodType (params, _) (')' : descriptor) = (params, descriptor) +parseMethodType _ descriptor = error $ "expected start of type name (L, I, C, B) but got: " ++ descriptor + +-- given a method index (constant pool index), +-- returns the full type of the method. (i.e (LSomething;II)V) +methodTypeFromIndex :: [ConstantInfo] -> Int -> String +methodTypeFromIndex constants index = case constants!!(fromIntegral (index - 1)) of + MethodRefInfo _ nameAndTypeIndex -> case constants!!(fromIntegral (nameAndTypeIndex - 1)) of + NameAndTypeInfo _ typeIndex -> case constants!!(fromIntegral (typeIndex - 1)) of + Utf8Info typeLiteral -> typeLiteral + unexpectedElement -> error "Expected Utf8Info but got: " ++ show unexpectedElement + unexpectedElement -> error "Expected NameAndTypeInfo but got: " ++ show unexpectedElement + unexpectedElement -> error "Expected MethodRefInfo but got: " ++ show unexpectedElement + +methodParametersFromIndex :: [ConstantInfo] -> Int -> ([String], String) +methodParametersFromIndex constants index = parseMethodType ([], "V") (methodTypeFromIndex constants index) + memberInfoIsMethod :: [ConstantInfo] -> MemberInfo -> Bool memberInfoIsMethod constants info = elem '(' (memberInfoDescriptor constants info) - datatypeDescriptor :: String -> String datatypeDescriptor "void" = "V" datatypeDescriptor "int" = "I" @@ -100,6 +129,18 @@ comparisonOperation CompareLessOrEqual branchLocation = Opif_icmple branchLoc comparisonOperation CompareGreaterThan branchLocation = Opif_icmpgt branchLocation comparisonOperation CompareGreaterOrEqual branchLocation = Opif_icmpge branchLocation +comparisonOffset :: Operation -> Maybe Int +comparisonOffset (Opif_icmpeq offset) = Just $ fromIntegral offset +comparisonOffset (Opif_icmpne offset) = Just $ fromIntegral offset +comparisonOffset (Opif_icmplt offset) = Just $ fromIntegral offset +comparisonOffset (Opif_icmple offset) = Just $ fromIntegral offset +comparisonOffset (Opif_icmpgt offset) = Just $ fromIntegral offset +comparisonOffset (Opif_icmpge offset) = Just $ fromIntegral offset +comparisonOffset anything_else = Nothing + +isComparisonOperation :: Operation -> Bool +isComparisonOperation op = isJust (comparisonOffset op) + findFieldIndex :: [ConstantInfo] -> String -> Maybe Int findFieldIndex constants name = let fieldRefNameInfos = [ @@ -143,10 +184,10 @@ findMethodIndex classFile name = let findClassIndex :: [ConstantInfo] -> String -> Maybe Int findClassIndex constants name = let - classNameIndices = [(index, constants!!(fromIntegral nameIndex - 1)) | (index, ClassInfo nameIndex) <- (zip[1..] constants)] + classNameIndices = [(index, constants!!(fromIntegral nameIndex - 1)) | (index, ClassInfo nameIndex) <- (zip [1..] constants)] classNames = map (\(index, nameInfo) -> case nameInfo of Utf8Info className -> (index, className) - something_else -> error("Expected UTF8Info but got " ++ show something_else)) + something_else -> error ("Expected UTF8Info but got " ++ show something_else)) classNameIndices desiredClassIndex = find (\(index, className) -> className == name) classNames in case desiredClassIndex of @@ -162,7 +203,7 @@ getKnownMembers constants = let (index, constants!!(fromIntegral classIndex - 1), constants!!(fromIntegral nameTypeIndex - 1)) | (index, MethodRefInfo classIndex nameTypeIndex) <- (zip [1..] constants) ] - + fieldsClassNameType = map (\(index, nameInfo, nameTypeInfo) -> case (nameInfo, nameTypeInfo) of (ClassInfo nameIndex, NameAndTypeInfo fnameIndex ftypeIndex) -> (index, (constants!!(fromIntegral nameIndex - 1), constants!!(fromIntegral fnameIndex - 1), constants!!(fromIntegral ftypeIndex - 1))) something_else -> error ("Expected Class and NameType info, but got: " ++ show nameInfo ++ " and " ++ show nameTypeInfo)) @@ -170,8 +211,8 @@ getKnownMembers constants = let fieldsResolved = map (\(index, (nameInfo, fnameInfo, ftypeInfo)) -> case (nameInfo, fnameInfo, ftypeInfo) of (Utf8Info cname, Utf8Info fname, Utf8Info ftype) -> (index, (cname, fname, ftype)) - something_else -> error("Expected UTF8Infos but got " ++ show something_else)) - fieldsClassNameType + something_else -> error ("Expected UTF8Infos but got " ++ show something_else)) + fieldsClassNameType in fieldsResolved @@ -223,7 +264,7 @@ injectDefaultConstructor pre | otherwise = pre ++ [MethodDeclaration "void" "" [] (TypedStatement "void" (Block []))] injectFieldInitializers :: String -> [VariableDeclaration] -> [MethodDeclaration] -> [MethodDeclaration] -injectFieldInitializers classname vars pre = let +injectFieldInitializers classname vars pre = let initializers = mapMaybe (\(variable) -> case variable of VariableDeclaration dtype name (Just initializer) -> Just ( TypedStatement dtype ( @@ -242,4 +283,55 @@ injectFieldInitializers classname vars pre = let map (\(method) -> case method of MethodDeclaration "void" "" params (TypedStatement "void" (Block statements)) -> MethodDeclaration "void" "" params (TypedStatement "void" (Block (initializers ++ statements))) otherwise -> method - ) pre \ No newline at end of file + ) pre + +-- effect of one instruction/operation on the stack +operationStackCost :: [ConstantInfo] -> Operation -> Int +operationStackCost constants Opiadd = -1 +operationStackCost constants Opisub = -1 +operationStackCost constants Opimul = -1 +operationStackCost constants Opidiv = -1 +operationStackCost constants Opirem = -1 +operationStackCost constants Opiand = -1 +operationStackCost constants Opior = -1 +operationStackCost constants Opixor = -1 +operationStackCost constants Opineg = 0 +operationStackCost constants Opdup = 1 +operationStackCost constants (Opnew _) = 1 +operationStackCost constants (Opif_icmplt _) = -2 +operationStackCost constants (Opif_icmple _) = -2 +operationStackCost constants (Opif_icmpgt _) = -2 +operationStackCost constants (Opif_icmpge _) = -2 +operationStackCost constants (Opif_icmpeq _) = -2 +operationStackCost constants (Opif_icmpne _) = -2 +operationStackCost constants Opaconst_null = 1 +operationStackCost constants Opreturn = 0 +operationStackCost constants Opireturn = -1 +operationStackCost constants Opareturn = -1 +operationStackCost constants Opdup_x1 = 1 +operationStackCost constants Oppop = -1 +operationStackCost constants (Opinvokespecial idx) = let + (params, returnType) = methodParametersFromIndex constants (fromIntegral idx) + in (length params + 1) - (fromEnum (returnType /= "V")) +operationStackCost constants (Opinvokevirtual idx) = let + (params, returnType) = methodParametersFromIndex constants (fromIntegral idx) + in (length params + 1) - (fromEnum (returnType /= "V")) +operationStackCost constants (Opgoto _) = 0 +operationStackCost constants (Opsipush _) = 1 +operationStackCost constants (Opldc_w _) = 1 +operationStackCost constants (Opaload _) = 1 +operationStackCost constants (Opiload _) = 1 +operationStackCost constants (Opastore _) = -1 +operationStackCost constants (Opistore _) = -1 +operationStackCost constants (Opputfield _) = -2 +operationStackCost constants (Opgetfield _) = -1 + +simulateStackOperation :: [ConstantInfo] -> Operation -> (Int, Int) -> (Int, Int) +simulateStackOperation constants op (cd, md) = let + depth = cd + operationStackCost constants op + in if depth < 0 + then error ("Consuming value off of empty stack: " ++ show op) + else (depth, max depth md) + +maxStackDepth :: [ConstantInfo] -> [Operation] -> Int +maxStackDepth constants ops = snd $ foldr (simulateStackOperation constants) (0, 0) (reverse ops) -- 2.34.1 From 79ddafbf9a15f0fc51ae9aa2de5794ce5f0e2728 Mon Sep 17 00:00:00 2001 From: Matthias Raba Date: Fri, 21 Jun 2024 07:37:25 +0200 Subject: [PATCH 6/9] Fixed class offset in constant pool when newly inserted --- Test/JavaSources/Main.java | 2 +- src/ByteCode/Util.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Test/JavaSources/Main.java b/Test/JavaSources/Main.java index aa84965..92c2929 100644 --- a/Test/JavaSources/Main.java +++ b/Test/JavaSources/Main.java @@ -1,7 +1,7 @@ // compile all test files using: // ls Test/JavaSources/*.java | grep -v ".*Main.java" | xargs -I {} cabal run compiler {} // compile (in project root) using: -// javac -g:none -sourcepath Test/JavaSources/ Test/JavaSources/Main.java +// pushd Test/JavaSources; javac -g:none Main.java; popd // afterwards, run using // java -ea -cp Test/JavaSources/ Main diff --git a/src/ByteCode/Util.hs b/src/ByteCode/Util.hs index 4271e11..1e28397 100644 --- a/src/ByteCode/Util.hs +++ b/src/ByteCode/Util.hs @@ -220,7 +220,7 @@ getKnownMembers constants = let getClassIndex :: [ConstantInfo] -> String -> ([ConstantInfo], Int) getClassIndex constants name = case findClassIndex constants name of Just index -> (constants, index) - Nothing -> (constants ++ [ClassInfo (fromIntegral (length constants)), Utf8Info name], fromIntegral (length constants)) + Nothing -> (constants ++ [ClassInfo (fromIntegral (length constants) + 2), Utf8Info name], fromIntegral (length constants) + 1) -- get the index for a field within a class, creating it if it does not exist. getFieldIndex :: [ConstantInfo] -> (String, String, String) -> ([ConstantInfo], Int) -- 2.34.1 From 4435f7aac86a1052941913d2d0fbc4b8c3d7ac5b Mon Sep 17 00:00:00 2001 From: Matthias Raba Date: Fri, 21 Jun 2024 08:07:20 +0200 Subject: [PATCH 7/9] Changed internal type of boolean, because it's obviously Z not B haha --- Test/JavaSources/Main.java | 5 +++++ Test/JavaSources/TestArithmetic.java | 11 +++++++++++ Test/JavaSources/TestRecursion.java | 9 ++++++++- src/ByteCode/Builder.hs | 2 +- src/ByteCode/Util.hs | 6 +++--- 5 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 Test/JavaSources/TestArithmetic.java diff --git a/Test/JavaSources/Main.java b/Test/JavaSources/Main.java index 92c2929..d33fabb 100644 --- a/Test/JavaSources/Main.java +++ b/Test/JavaSources/Main.java @@ -11,6 +11,7 @@ public class Main { TestEmpty empty = new TestEmpty(); TestFields fields = new TestFields(); TestConstructor constructor = new TestConstructor(42); + TestArithmetic arithmetic = new TestArithmetic(); TestMultipleClasses multipleClasses = new TestMultipleClasses(); TestRecursion recursion = new TestRecursion(10); TestMalicious malicious = new TestMalicious(); @@ -21,6 +22,10 @@ public class Main { assert fields.a == 0 && fields.b == 42; // constructor parameters override initializers assert constructor.a == 42; + // basic arithmetics + assert arithmetic.basic(1, 2, 3) == 2; + // we have boolean logic as well + assert arithmetic.logic(true, false, true) == true; // multiple classes within one file work. Referencing another classes fields/methods works. assert multipleClasses.a.a == 42; // self-referencing classes work. diff --git a/Test/JavaSources/TestArithmetic.java b/Test/JavaSources/TestArithmetic.java new file mode 100644 index 0000000..2806fd6 --- /dev/null +++ b/Test/JavaSources/TestArithmetic.java @@ -0,0 +1,11 @@ +public class TestArithmetic { + public int basic(int a, int b, int c) + { + return a + b - c * a / b % c; + } + + public boolean logic(boolean a, boolean b, boolean c) + { + return a && (c || b); + } +} diff --git a/Test/JavaSources/TestRecursion.java b/Test/JavaSources/TestRecursion.java index bcebce1..178f156 100644 --- a/Test/JavaSources/TestRecursion.java +++ b/Test/JavaSources/TestRecursion.java @@ -23,5 +23,12 @@ public class TestRecursion { { return fibonacci(n - 1) + this.fibonacci(n - 2); } - } + } + + public int ackermann(int m, int n) + { + if (m == 0) return n + 1; + if (n == 0) return ackermann(m - 1, 1); + return ackermann(m - 1, ackermann(m, n - 1)); + } } diff --git a/src/ByteCode/Builder.hs b/src/ByteCode/Builder.hs index f7e9e00..cf9d8c4 100644 --- a/src/ByteCode/Builder.hs +++ b/src/ByteCode/Builder.hs @@ -65,7 +65,7 @@ methodAssembler (MethodDeclaration returntype name parameters statement) input = Just index -> let declaration = MethodDeclaration returntype name parameters statement paramNames = "this" : [name | ParameterDeclaration _ name <- parameters] - in case (splitAt index (methods input)) of + in case splitAt index (methods input) of (pre, []) -> input (pre, method : post) -> let (constants, bytecode, aParamNames) = assembleMethod (constantPool input, [], paramNames) declaration diff --git a/src/ByteCode/Util.hs b/src/ByteCode/Util.hs index 1e28397..7517c6b 100644 --- a/src/ByteCode/Util.hs +++ b/src/ByteCode/Util.hs @@ -43,7 +43,7 @@ parseMethodType :: ([String], String) -> String -> ([String], String) parseMethodType (params, returnType) ('(' : descriptor) = parseMethodType (params, returnType) descriptor parseMethodType (params, returnType) ('I' : descriptor) = parseMethodType (params ++ ["I"], returnType) descriptor parseMethodType (params, returnType) ('C' : descriptor) = parseMethodType (params ++ ["C"], returnType) descriptor -parseMethodType (params, returnType) ('B' : descriptor) = parseMethodType (params ++ ["B"], returnType) descriptor +parseMethodType (params, returnType) ('Z' : descriptor) = parseMethodType (params ++ ["Z"], returnType) descriptor parseMethodType (params, returnType) ('L' : descriptor) = let typeLength = elemIndex ';' descriptor in case typeLength of @@ -53,7 +53,7 @@ parseMethodType (params, returnType) ('L' : descriptor) = let parseMethodType (params ++ [typeName], returnType) restOfDescriptor Nothing -> error $ "unterminated class type in function signature: " ++ (show descriptor) parseMethodType (params, _) (')' : descriptor) = (params, descriptor) -parseMethodType _ descriptor = error $ "expected start of type name (L, I, C, B) but got: " ++ descriptor +parseMethodType _ descriptor = error $ "expected start of type name (L, I, C, Z) but got: " ++ descriptor -- given a method index (constant pool index), -- returns the full type of the method. (i.e (LSomething;II)V) @@ -76,7 +76,7 @@ datatypeDescriptor :: String -> String datatypeDescriptor "void" = "V" datatypeDescriptor "int" = "I" datatypeDescriptor "char" = "C" -datatypeDescriptor "boolean" = "B" +datatypeDescriptor "boolean" = "Z" datatypeDescriptor x = "L" ++ x ++ ";" -- 2.34.1 From 8eb9c16c7a351802d1a5cdbe623a48f613edc908 Mon Sep 17 00:00:00 2001 From: Matthias Raba Date: Fri, 21 Jun 2024 08:49:55 +0200 Subject: [PATCH 8/9] typos, formatting, comments --- Test/JavaSources/Main.java | 2 +- Test/JavaSources/TestArithmetic.java | 2 +- src/Ast.hs | 2 +- src/ByteCode/Assembler.hs | 63 ++++++++-------- src/ByteCode/Builder.hs | 29 ++++---- src/ByteCode/ClassFile.hs | 22 ++---- src/ByteCode/Util.hs | 103 ++++++++------------------- 7 files changed, 86 insertions(+), 137 deletions(-) diff --git a/Test/JavaSources/Main.java b/Test/JavaSources/Main.java index d33fabb..b912e91 100644 --- a/Test/JavaSources/Main.java +++ b/Test/JavaSources/Main.java @@ -25,7 +25,7 @@ public class Main { // basic arithmetics assert arithmetic.basic(1, 2, 3) == 2; // we have boolean logic as well - assert arithmetic.logic(true, false, true) == true; + assert arithmetic.logic(false, false, true) == true; // multiple classes within one file work. Referencing another classes fields/methods works. assert multipleClasses.a.a == 42; // self-referencing classes work. diff --git a/Test/JavaSources/TestArithmetic.java b/Test/JavaSources/TestArithmetic.java index 2806fd6..410332a 100644 --- a/Test/JavaSources/TestArithmetic.java +++ b/Test/JavaSources/TestArithmetic.java @@ -6,6 +6,6 @@ public class TestArithmetic { public boolean logic(boolean a, boolean b, boolean c) { - return a && (c || b); + return !a && (c || b); } } diff --git a/src/Ast.hs b/src/Ast.hs index a20b8e8..fdf088b 100644 --- a/src/Ast.hs +++ b/src/Ast.hs @@ -2,7 +2,7 @@ module Ast where type CompilationUnit = [Class] type DataType = String -type Identifier = String +type Identifier = String data ParameterDeclaration = ParameterDeclaration DataType Identifier deriving (Show, Eq) data VariableDeclaration = VariableDeclaration DataType Identifier (Maybe Expression) deriving (Show, Eq) diff --git a/src/ByteCode/Assembler.hs b/src/ByteCode/Assembler.hs index aaa8542..101d7aa 100644 --- a/src/ByteCode/Assembler.hs +++ b/src/ByteCode/Assembler.hs @@ -12,12 +12,12 @@ type Assembler a = ([ConstantInfo], [Operation], [String]) -> a -> ([ConstantInf assembleExpression :: Assembler Expression assembleExpression (constants, ops, lvars) (TypedExpression _ (BinaryOperation op a b)) - | elem op [Addition, Subtraction, Multiplication, Division, Modulo, BitwiseAnd, BitwiseOr, BitwiseXor, And, Or] = let + | op `elem` [Addition, Subtraction, Multiplication, Division, Modulo, BitwiseAnd, BitwiseOr, BitwiseXor, And, Or] = let (aConstants, aOps, _) = assembleExpression (constants, ops, lvars) a (bConstants, bOps, _) = assembleExpression (aConstants, aOps, lvars) b in (bConstants, bOps ++ [binaryOperation op], lvars) - | elem op [CompareEqual, CompareNotEqual, CompareLessThan, CompareLessOrEqual, CompareGreaterThan, CompareGreaterOrEqual] = let + | op `elem` [CompareEqual, CompareNotEqual, CompareLessThan, CompareLessOrEqual, CompareGreaterThan, CompareGreaterOrEqual] = let (aConstants, aOps, _) = assembleExpression (constants, ops, lvars) a (bConstants, bOps, _) = assembleExpression (aConstants, aOps, lvars) b cmp_op = comparisonOperation op 9 @@ -60,7 +60,7 @@ assembleExpression (constants, ops, lvars) (TypedExpression _ (UnaryOperation Mi assembleExpression (constants, ops, lvars) (TypedExpression dtype (LocalVariable name)) | name == "this" = (constants, ops ++ [Opaload 0], lvars) | otherwise = let - localIndex = findIndex ((==) name) lvars + localIndex = elemIndex name lvars isPrimitive = elem dtype ["char", "boolean", "int"] in case localIndex of Just index -> (constants, ops ++ if isPrimitive then [Opiload (fromIntegral index)] else [Opaload (fromIntegral index)], lvars) @@ -69,7 +69,7 @@ assembleExpression (constants, ops, lvars) (TypedExpression dtype (LocalVariable assembleExpression (constants, ops, lvars) (TypedExpression dtype (StatementExpressionExpression stmtexp)) = assembleStatementExpression (constants, ops, lvars) stmtexp -assembleExpression _ expr = error ("unimplemented: " ++ show expr) +assembleExpression _ expr = error ("Unknown expression: " ++ show expr) assembleNameChain :: Assembler Expression assembleNameChain input (TypedExpression _ (BinaryOperation NameResolution (TypedExpression atype a) (TypedExpression _ (FieldVariable _)))) = @@ -84,7 +84,7 @@ assembleStatementExpression target = resolveNameChain (TypedExpression dtype receiver) in case target of (TypedExpression dtype (LocalVariable name)) -> let - localIndex = findIndex ((==) name) lvars + localIndex = elemIndex name lvars (constants_a, ops_a, _) = assembleExpression (constants, ops, lvars) expr isPrimitive = elem dtype ["char", "boolean", "int"] in case localIndex of @@ -99,20 +99,20 @@ assembleStatementExpression (constants_a, ops_a, _) = assembleExpression (constants_r, ops_r, lvars) expr in (constants_a, ops_a ++ [Opdup_x1, Opputfield (fromIntegral fieldIndex)], lvars) - something_else -> error ("expected TypedExpression, but got: " ++ show something_else) + something_else -> error ("Expected TypedExpression, but got: " ++ show something_else) assembleStatementExpression (constants, ops, lvars) (TypedStatementExpression _ (PreIncrement (TypedExpression dtype receiver))) = let target = resolveNameChain (TypedExpression dtype receiver) in case target of - (TypedExpression dtype (LocalVariable name)) -> let - localIndex = findIndex ((==) name) lvars - expr = (TypedExpression dtype (LocalVariable name)) + (TypedExpression dtype (LocalVariable name)) -> let + localIndex = elemIndex name lvars + expr = TypedExpression dtype (LocalVariable name) (exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr in case localIndex of Just index -> (exprConstants, exprOps ++ [Opsipush 1, Opiadd, Opdup, Opistore (fromIntegral index)], lvars) - Nothing -> error("No such local variable found in local variable pool: " ++ name) + Nothing -> error ("No such local variable found in local variable pool: " ++ name) (TypedExpression dtype (FieldVariable name)) -> let owner = resolveNameChainOwner (TypedExpression dtype receiver) in case owner of @@ -121,20 +121,20 @@ assembleStatementExpression (constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver) in (constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opsipush 1, Opiadd, Opdup_x1, Opputfield (fromIntegral fieldIndex)], lvars) - something_else -> error ("expected TypedExpression, but got: " ++ show something_else) + something_else -> error ("Expected TypedExpression, but got: " ++ show something_else) assembleStatementExpression (constants, ops, lvars) (TypedStatementExpression _ (PreDecrement (TypedExpression dtype receiver))) = let target = resolveNameChain (TypedExpression dtype receiver) in case target of - (TypedExpression dtype (LocalVariable name)) -> let - localIndex = findIndex ((==) name) lvars - expr = (TypedExpression dtype (LocalVariable name)) + (TypedExpression dtype (LocalVariable name)) -> let + localIndex = elemIndex name lvars + expr = TypedExpression dtype (LocalVariable name) (exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr in case localIndex of Just index -> (exprConstants, exprOps ++ [Opsipush 1, Opisub, Opdup, Opistore (fromIntegral index)], lvars) - Nothing -> error("No such local variable found in local variable pool: " ++ name) + Nothing -> error ("No such local variable found in local variable pool: " ++ name) (TypedExpression dtype (FieldVariable name)) -> let owner = resolveNameChainOwner (TypedExpression dtype receiver) in case owner of @@ -143,20 +143,20 @@ assembleStatementExpression (constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver) in (constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opsipush 1, Opisub, Opdup_x1, Opputfield (fromIntegral fieldIndex)], lvars) - something_else -> error ("expected TypedExpression, but got: " ++ show something_else) + something_else -> error ("Expected TypedExpression, but got: " ++ show something_else) assembleStatementExpression (constants, ops, lvars) (TypedStatementExpression _ (PostIncrement (TypedExpression dtype receiver))) = let target = resolveNameChain (TypedExpression dtype receiver) in case target of - (TypedExpression dtype (LocalVariable name)) -> let - localIndex = findIndex ((==) name) lvars - expr = (TypedExpression dtype (LocalVariable name)) + (TypedExpression dtype (LocalVariable name)) -> let + localIndex = elemIndex name lvars + expr = TypedExpression dtype (LocalVariable name) (exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr in case localIndex of Just index -> (exprConstants, exprOps ++ [Opdup, Opsipush 1, Opiadd, Opistore (fromIntegral index)], lvars) - Nothing -> error("No such local variable found in local variable pool: " ++ name) + Nothing -> error ("No such local variable found in local variable pool: " ++ name) (TypedExpression dtype (FieldVariable name)) -> let owner = resolveNameChainOwner (TypedExpression dtype receiver) in case owner of @@ -165,20 +165,20 @@ assembleStatementExpression (constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver) in (constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opdup_x1, Opsipush 1, Opiadd, Opputfield (fromIntegral fieldIndex)], lvars) - something_else -> error ("expected TypedExpression, but got: " ++ show something_else) + something_else -> error ("Expected TypedExpression, but got: " ++ show something_else) assembleStatementExpression (constants, ops, lvars) (TypedStatementExpression _ (PostDecrement (TypedExpression dtype receiver))) = let target = resolveNameChain (TypedExpression dtype receiver) in case target of - (TypedExpression dtype (LocalVariable name)) -> let - localIndex = findIndex ((==) name) lvars - expr = (TypedExpression dtype (LocalVariable name)) + (TypedExpression dtype (LocalVariable name)) -> let + localIndex = elemIndex name lvars + expr = TypedExpression dtype (LocalVariable name) (exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr in case localIndex of Just index -> (exprConstants, exprOps ++ [Opdup, Opsipush 1, Opisub, Opistore (fromIntegral index)], lvars) - Nothing -> error("No such local variable found in local variable pool: " ++ name) + Nothing -> error ("No such local variable found in local variable pool: " ++ name) (TypedExpression dtype (FieldVariable name)) -> let owner = resolveNameChainOwner (TypedExpression dtype receiver) in case owner of @@ -187,7 +187,7 @@ assembleStatementExpression (constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver) in (constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opdup_x1, Opsipush 1, Opisub, Opputfield (fromIntegral fieldIndex)], lvars) - something_else -> error ("expected TypedExpression, but got: " ++ show something_else) + something_else -> error ("Expected TypedExpression, but got: " ++ show something_else) assembleStatementExpression (constants, ops, lvars) @@ -231,7 +231,7 @@ assembleStatement (constants, ops, lvars) (TypedStatement dtype (If expr if_stmt else_length = sum (map opcodeEncodingLength ops_elsea) in case dtype of "void" -> (constants_ifa, ops ++ ops_cmp ++ [Opsipush 0, Opif_icmpeq (if_length + 6)] ++ ops_ifa ++ [Opgoto (else_length + 3)] ++ ops_elsea, lvars) - otherwise -> (constants_ifa, ops ++ ops_cmp ++ [Opsipush 0, Opif_icmpeq (if_length + 3)] ++ ops_ifa ++ ops_elsea, lvars) + _ -> (constants_ifa, ops ++ ops_cmp ++ [Opsipush 0, Opif_icmpeq (if_length + 3)] ++ ops_ifa ++ ops_elsea, lvars) assembleStatement (constants, ops, lvars) (TypedStatement _ (While expr stmt)) = let (constants_cmp, ops_cmp, _) = assembleExpression (constants, [], lvars) expr @@ -257,20 +257,19 @@ assembleStatement (constants, ops, lvars) (TypedStatement _ (StatementExpression in (constants_e, ops_e ++ [Oppop], lvars_e) -assembleStatement _ stmt = error ("Not yet implemented: " ++ show stmt) +assembleStatement _ stmt = error ("Unknown statement: " ++ show stmt) assembleMethod :: Assembler MethodDeclaration assembleMethod (constants, ops, lvars) (MethodDeclaration returntype name _ (TypedStatement _ (Block statements))) | name == "" = let (constants_a, ops_a, lvars_a) = foldl assembleStatement (constants, ops, lvars) statements - init_ops = [Opaload 0, Opinvokespecial 2] in - (constants_a, init_ops ++ ops_a ++ [Opreturn], lvars_a) + (constants_a, [Opaload 0, Opinvokespecial 2] ++ ops_a ++ [Opreturn], lvars_a) | otherwise = case returntype of "void" -> let (constants_a, ops_a, lvars_a) = foldl assembleStatement (constants, ops, lvars) statements in (constants_a, ops_a ++ [Opreturn], lvars_a) - otherwise -> foldl assembleStatement (constants, ops, lvars) statements -assembleMethod _ (MethodDeclaration _ _ _ stmt) = error ("Typed block expected for method body, got: " ++ show stmt) + _ -> foldl assembleStatement (constants, ops, lvars) statements +assembleMethod _ (MethodDeclaration _ _ _ stmt) = error ("Typed block expected for method body, got: " ++ show stmt) diff --git a/src/ByteCode/Builder.hs b/src/ByteCode/Builder.hs index cf9d8c4..b717247 100644 --- a/src/ByteCode/Builder.hs +++ b/src/ByteCode/Builder.hs @@ -22,14 +22,14 @@ fieldBuilder (VariableDeclaration datatype name _) input = let ] field = MemberInfo { memberAccessFlags = accessPublic, - memberNameIndex = (fromIntegral (baseIndex + 2)), - memberDescriptorIndex = (fromIntegral (baseIndex + 3)), + memberNameIndex = fromIntegral (baseIndex + 2), + memberDescriptorIndex = fromIntegral (baseIndex + 3), memberAttributes = [] } in input { - constantPool = (constantPool input) ++ constants, - fields = (fields input) ++ [field] + constantPool = constantPool input ++ constants, + fields = fields input ++ [field] } @@ -46,16 +46,16 @@ methodBuilder (MethodDeclaration returntype name parameters statement) input = l method = MemberInfo { memberAccessFlags = accessPublic, - memberNameIndex = (fromIntegral (baseIndex + 2)), - memberDescriptorIndex = (fromIntegral (baseIndex + 3)), + memberNameIndex = fromIntegral (baseIndex + 2), + memberDescriptorIndex = fromIntegral (baseIndex + 3), memberAttributes = [] } in input { - constantPool = (constantPool input) ++ constants, - methods = (methods input) ++ [method] + constantPool = constantPool input ++ constants, + methods = methods input ++ [method] } - + methodAssembler :: ClassFileBuilder MethodDeclaration methodAssembler (MethodDeclaration returntype name parameters statement) input = let @@ -94,11 +94,12 @@ classBuilder (Class name methods fields) _ = let Utf8Info "java/lang/Object", Utf8Info "", Utf8Info "()V", - Utf8Info "Code" + Utf8Info "Code", + ClassInfo 9, + Utf8Info name ] - nameConstants = [ClassInfo 9, Utf8Info name] nakedClassFile = ClassFile { - constantPool = baseConstants ++ nameConstants, + constantPool = baseConstants, accessFlags = accessPublic, thisClass = 8, superClass = 1, @@ -107,9 +108,13 @@ classBuilder (Class name methods fields) _ = let attributes = [] } + -- if a class has no constructor, inject an empty one. methodsWithInjectedConstructor = injectDefaultConstructor methods + -- for every constructor, prepend all initialization assignments for fields. methodsWithInjectedInitializers = injectFieldInitializers name fields methodsWithInjectedConstructor + -- add fields, then method bodies to the classfile. After all referable names are known, + -- assemble the methods into bytecode. classFileWithFields = foldr fieldBuilder nakedClassFile fields classFileWithMethods = foldr methodBuilder classFileWithFields methodsWithInjectedInitializers classFileWithAssembledMethods = foldr methodAssembler classFileWithMethods methodsWithInjectedInitializers diff --git a/src/ByteCode/ClassFile.hs b/src/ByteCode/ClassFile.hs index 1fc15c9..7a20e97 100644 --- a/src/ByteCode/ClassFile.hs +++ b/src/ByteCode/ClassFile.hs @@ -1,14 +1,4 @@ -module ByteCode.ClassFile( - ConstantInfo(..), - Attribute(..), - MemberInfo(..), - ClassFile(..), - Operation(..), - serialize, - emptyClassFile, - opcodeEncodingLength, - className -) where +module ByteCode.ClassFile where import Data.Word import Data.Int @@ -99,11 +89,11 @@ emptyClassFile = ClassFile { className :: ClassFile -> String className classFile = let - classInfo = (constantPool classFile)!!(fromIntegral (thisClass classFile)) + classInfo = constantPool classFile !! fromIntegral (thisClass classFile) in case classInfo of Utf8Info className -> className - otherwise -> error ("expected Utf8Info but got: " ++ show otherwise) - + unexpected_element -> error ("expected Utf8Info but got: " ++ show unexpected_element) + opcodeEncodingLength :: Operation -> Word16 opcodeEncodingLength Opiadd = 1 @@ -201,10 +191,10 @@ instance Serializable Attribute where serialize (CodeAttribute { attributeMaxStack = maxStack, attributeMaxLocals = maxLocals, attributeCode = code }) = let - assembledCode = concat (map serialize code) + assembledCode = concatMap serialize code in unpackWord16 7 -- attribute_name_index - ++ unpackWord32 (12 + (fromIntegral (length assembledCode))) -- attribute_length + ++ unpackWord32 (12 + fromIntegral (length assembledCode)) -- attribute_length ++ unpackWord16 maxStack -- max_stack ++ unpackWord16 maxLocals -- max_locals ++ unpackWord32 (fromIntegral (length assembledCode)) -- code_length diff --git a/src/ByteCode/Util.hs b/src/ByteCode/Util.hs index 7517c6b..f177342 100644 --- a/src/ByteCode/Util.hs +++ b/src/ByteCode/Util.hs @@ -10,23 +10,22 @@ import Data.Word (Word8, Word16, Word32) -- walks the name resolution chain. returns the innermost Just LocalVariable/FieldVariable or Nothing. resolveNameChain :: Expression -> Expression resolveNameChain (TypedExpression _ (BinaryOperation NameResolution a b)) = resolveNameChain b -resolveNameChain (TypedExpression dtype (LocalVariable name)) = (TypedExpression dtype (LocalVariable name)) -resolveNameChain (TypedExpression dtype (FieldVariable name)) = (TypedExpression dtype (FieldVariable name)) -resolveNameChain invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show (invalidExpression)) +resolveNameChain (TypedExpression dtype (LocalVariable name)) = TypedExpression dtype (LocalVariable name) +resolveNameChain (TypedExpression dtype (FieldVariable name)) = TypedExpression dtype (FieldVariable name) +resolveNameChain invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show invalidExpression) -- walks the name resolution chain. returns the second-to-last item of the namechain. resolveNameChainOwner :: Expression -> Expression resolveNameChainOwner (TypedExpression _ (BinaryOperation NameResolution a (TypedExpression dtype (FieldVariable name)))) = a resolveNameChainOwner (TypedExpression _ (BinaryOperation NameResolution a b)) = resolveNameChain b -resolveNameChainOwner invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show (invalidExpression)) - +resolveNameChainOwner invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show invalidExpression) methodDescriptor :: MethodDeclaration -> String methodDescriptor (MethodDeclaration returntype _ parameters _) = let parameter_types = [datatype | ParameterDeclaration datatype _ <- parameters] in "(" - ++ (concat (map datatypeDescriptor parameter_types)) + ++ concatMap datatypeDescriptor parameter_types ++ ")" ++ datatypeDescriptor returntype @@ -35,10 +34,12 @@ methodDescriptorFromParamlist parameters returntype = let parameter_types = [datatype | TypedExpression datatype _ <- parameters] in "(" - ++ (concat (map datatypeDescriptor parameter_types)) + ++ concatMap datatypeDescriptor parameter_types ++ ")" ++ datatypeDescriptor returntype +-- recursively parses a given type signature into a list of parameter types and the method return type. +-- As an initial parameter, you can supply ([], "void"). parseMethodType :: ([String], String) -> String -> ([String], String) parseMethodType (params, returnType) ('(' : descriptor) = parseMethodType (params, returnType) descriptor parseMethodType (params, returnType) ('I' : descriptor) = parseMethodType (params ++ ["I"], returnType) descriptor @@ -51,16 +52,16 @@ parseMethodType (params, returnType) ('L' : descriptor) = let (typeName, semicolon : restOfDescriptor) = splitAt length descriptor in parseMethodType (params ++ [typeName], returnType) restOfDescriptor - Nothing -> error $ "unterminated class type in function signature: " ++ (show descriptor) + Nothing -> error $ "unterminated class type in function signature: " ++ show descriptor parseMethodType (params, _) (')' : descriptor) = (params, descriptor) parseMethodType _ descriptor = error $ "expected start of type name (L, I, C, Z) but got: " ++ descriptor -- given a method index (constant pool index), -- returns the full type of the method. (i.e (LSomething;II)V) methodTypeFromIndex :: [ConstantInfo] -> Int -> String -methodTypeFromIndex constants index = case constants!!(fromIntegral (index - 1)) of - MethodRefInfo _ nameAndTypeIndex -> case constants!!(fromIntegral (nameAndTypeIndex - 1)) of - NameAndTypeInfo _ typeIndex -> case constants!!(fromIntegral (typeIndex - 1)) of +methodTypeFromIndex constants index = case constants !! fromIntegral (index - 1) of + MethodRefInfo _ nameAndTypeIndex -> case constants !! fromIntegral (nameAndTypeIndex - 1) of + NameAndTypeInfo _ typeIndex -> case constants !! fromIntegral (typeIndex - 1) of Utf8Info typeLiteral -> typeLiteral unexpectedElement -> error "Expected Utf8Info but got: " ++ show unexpectedElement unexpectedElement -> error "Expected NameAndTypeInfo but got: " ++ show unexpectedElement @@ -70,7 +71,7 @@ methodParametersFromIndex :: [ConstantInfo] -> Int -> ([String], String) methodParametersFromIndex constants index = parseMethodType ([], "V") (methodTypeFromIndex constants index) memberInfoIsMethod :: [ConstantInfo] -> MemberInfo -> Bool -memberInfoIsMethod constants info = elem '(' (memberInfoDescriptor constants info) +memberInfoIsMethod constants info = '(' `elem` memberInfoDescriptor constants info datatypeDescriptor :: String -> String datatypeDescriptor "void" = "V" @@ -79,35 +80,24 @@ datatypeDescriptor "char" = "C" datatypeDescriptor "boolean" = "Z" datatypeDescriptor x = "L" ++ x ++ ";" - memberInfoDescriptor :: [ConstantInfo] -> MemberInfo -> String -memberInfoDescriptor constants MemberInfo { - memberAccessFlags = _, - memberNameIndex = _, - memberDescriptorIndex = descriptorIndex, - memberAttributes = _ } = let - descriptor = constants!!((fromIntegral descriptorIndex) - 1) +memberInfoDescriptor constants MemberInfo { memberDescriptorIndex = descriptorIndex } = let + descriptor = constants !! (fromIntegral descriptorIndex - 1) in case descriptor of Utf8Info descriptorText -> descriptorText - _ -> ("Invalid Item at Constant pool index " ++ show descriptorIndex) - + _ -> "Invalid Item at Constant pool index " ++ show descriptorIndex memberInfoName :: [ConstantInfo] -> MemberInfo -> String -memberInfoName constants MemberInfo { - memberAccessFlags = _, - memberNameIndex = nameIndex, - memberDescriptorIndex = _, - memberAttributes = _ } = let - name = constants!!((fromIntegral nameIndex) - 1) +memberInfoName constants MemberInfo { memberNameIndex = nameIndex } = let + name = constants !! (fromIntegral nameIndex - 1) in case name of Utf8Info nameText -> nameText - _ -> ("Invalid Item at Constant pool index " ++ show nameIndex) - + _ -> "Invalid Item at Constant pool index " ++ show nameIndex returnOperation :: DataType -> Operation returnOperation dtype - | elem dtype ["int", "char", "boolean"] = Opireturn - | otherwise = Opareturn + | dtype `elem` ["int", "char", "boolean"] = Opireturn + | otherwise = Opareturn binaryOperation :: BinaryOperator -> Operation binaryOperation Addition = Opiadd @@ -141,50 +131,15 @@ comparisonOffset anything_else = Nothing isComparisonOperation :: Operation -> Bool isComparisonOperation op = isJust (comparisonOffset op) -findFieldIndex :: [ConstantInfo] -> String -> Maybe Int -findFieldIndex constants name = let - fieldRefNameInfos = [ - -- we only skip one entry to get the name since the Java constant pool - -- is 1-indexed (why) - (index, constants!!(fromIntegral index + 1)) - | (index, FieldRefInfo classIndex _) <- (zip [1..] constants) - ] - fieldRefNames = map (\(index, nameInfo) -> case nameInfo of - Utf8Info fieldName -> (index, fieldName) - something_else -> error ("Expected UTF8Info but got" ++ show something_else)) - fieldRefNameInfos - fieldIndex = find (\(index, fieldName) -> fieldName == name) fieldRefNames - in case fieldIndex of - Just (index, _) -> Just index - Nothing -> Nothing - -findMethodRefIndex :: [ConstantInfo] -> String -> Maybe Int -findMethodRefIndex constants name = let - methodRefNameInfos = [ - -- we only skip one entry to get the name since the Java constant pool - -- is 1-indexed (why) - (index, constants!!(fromIntegral index + 1)) - | (index, MethodRefInfo _ _) <- (zip [1..] constants) - ] - methodRefNames = map (\(index, nameInfo) -> case nameInfo of - Utf8Info methodName -> (index, methodName) - something_else -> error ("Expected UTF8Info but got " ++ show something_else)) - methodRefNameInfos - methodIndex = find (\(index, methodName) -> methodName == name) methodRefNames - in case methodIndex of - Just (index, _) -> Just index - Nothing -> Nothing - - findMethodIndex :: ClassFile -> String -> Maybe Int findMethodIndex classFile name = let constants = constantPool classFile in - findIndex (\method -> ((memberInfoIsMethod constants method) && (memberInfoName constants method) == name)) (methods classFile) + findIndex (\method -> memberInfoIsMethod constants method && memberInfoName constants method == name) (methods classFile) findClassIndex :: [ConstantInfo] -> String -> Maybe Int findClassIndex constants name = let - classNameIndices = [(index, constants!!(fromIntegral nameIndex - 1)) | (index, ClassInfo nameIndex) <- (zip [1..] constants)] + classNameIndices = [(index, constants!!(fromIntegral nameIndex - 1)) | (index, ClassInfo nameIndex) <- zip [1..] constants] classNames = map (\(index, nameInfo) -> case nameInfo of Utf8Info className -> (index, className) something_else -> error ("Expected UTF8Info but got " ++ show something_else)) @@ -198,10 +153,10 @@ getKnownMembers :: [ConstantInfo] -> [(Int, (String, String, String))] getKnownMembers constants = let fieldsClassAndNT = [ (index, constants!!(fromIntegral classIndex - 1), constants!!(fromIntegral nameTypeIndex - 1)) - | (index, FieldRefInfo classIndex nameTypeIndex) <- (zip [1..] constants) + | (index, FieldRefInfo classIndex nameTypeIndex) <- zip [1..] constants ] ++ [ (index, constants!!(fromIntegral classIndex - 1), constants!!(fromIntegral nameTypeIndex - 1)) - | (index, MethodRefInfo classIndex nameTypeIndex) <- (zip [1..] constants) + | (index, MethodRefInfo classIndex nameTypeIndex) <- zip [1..] constants ] fieldsClassNameType = map (\(index, nameInfo, nameTypeInfo) -> case (nameInfo, nameTypeInfo) of @@ -280,9 +235,9 @@ injectFieldInitializers classname vars pre = let otherwise -> Nothing ) vars in - map (\(method) -> case method of + map (\method -> case method of MethodDeclaration "void" "" params (TypedStatement "void" (Block statements)) -> MethodDeclaration "void" "" params (TypedStatement "void" (Block (initializers ++ statements))) - otherwise -> method + _ -> method ) pre -- effect of one instruction/operation on the stack @@ -312,10 +267,10 @@ operationStackCost constants Opdup_x1 = 1 operationStackCost constants Oppop = -1 operationStackCost constants (Opinvokespecial idx) = let (params, returnType) = methodParametersFromIndex constants (fromIntegral idx) - in (length params + 1) - (fromEnum (returnType /= "V")) + in (length params + 1) - fromEnum (returnType /= "V") operationStackCost constants (Opinvokevirtual idx) = let (params, returnType) = methodParametersFromIndex constants (fromIntegral idx) - in (length params + 1) - (fromEnum (returnType /= "V")) + in (length params + 1) - fromEnum (returnType /= "V") operationStackCost constants (Opgoto _) = 0 operationStackCost constants (Opsipush _) = 1 operationStackCost constants (Opldc_w _) = 1 -- 2.34.1 From 98735fd6ba430bc8e4ef32a59e5db9a30eda8711 Mon Sep 17 00:00:00 2001 From: Matthias Raba Date: Fri, 21 Jun 2024 09:03:47 +0200 Subject: [PATCH 9/9] updated bytecode.md --- doc/bytecode.md | 44 +++++++++++++++++++++++++++----------------- src/Main.hs | 15 +++++++-------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/doc/bytecode.md b/doc/bytecode.md index b980830..1002122 100644 --- a/doc/bytecode.md +++ b/doc/bytecode.md @@ -4,20 +4,7 @@ Die Bytecodegenerierung ist letztendlich eine zweistufige Transformation: `Getypter AST -> [ClassFile] -> [[Word8]]` -Vom AST, der bereits den Typcheck durchlaufen hat, wird zunächst eine Abbildung in die einzelnen ClassFiles vorgenommen. Diese ClassFiles werden anschließend in deren Byte-Repräsentation serialisiert. - -## Serialisierung - -Damit Bytecode generiert werden kann, braucht es Strukturen, die die Daten halten, die letztendlich serialisiert werden. Die JVM erwartet den kompilierten Code in handliche Pakete verpackt. Die Struktur dieser Pakete ist [so definiert](https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html). - -Jede Struktur, die in dieser übergreifenden Class File vorkommt, haben wir in Haskell abgebildet. Es gibt z.B die Struktur "ClassFile", die wiederum weitere Strukturen wie z.B Informationen über Felder oder Methoden der Klasse. Alle diese Strukturen implementieren folgendes TypeClass: - -``` -class Serializable a where - serialize :: a -> [Word8] -``` - -Die Struktur ClassFile ruft für deren Kinder rekursiv diese `serialize` Funktion auf. Am Ende bleibt eine flache Word8-Liste übrig, die Serialisierung ist damit abgeschlossen. +Vom AST, der bereits den Typcheck durchlaufen hat, wird zunächst eine Abbildung in die einzelnen ClassFiles vorgenommen. Diese ClassFiles werden anschließend in deren Byte-Repräsentation serialisiert. Dieser Teil der Aufgabenstellung wurde gemeinsam von Christian Brier und Matthias Raba umgesetzt. ## Codegenerierung @@ -32,7 +19,6 @@ Die Idee hinter beiden ist, dass sie jeweils zwei Inputs haben, wobei der Rückg Der Nutzer ruft beispielsweise die Funktion `classBuilder` auf. Diese wendet nach und nach folgende Transformationen an: ``` - methodsWithInjectedConstructor = injectDefaultConstructor methods methodsWithInjectedInitializers = injectFieldInitializers name fields methodsWithInjectedConstructor @@ -47,7 +33,7 @@ Zuerst wird (falls notwendig) ein leerer Defaultkonstruktor in die Classfile ein 2. Hinzufügen aller Methoden (nur Prototypen) 3. Hinzufügen des Bytecodes in allen Methoden -Die Unterteilung von Schritt 2 und 3 ist deswegen notwendig, weil der Code einer Methode auch eine andere, erst nachher deklarierte Methode aufrufen kann. Nach Schritt 2 sind alle Methoden der Klasse bekannt. Wie beschrieben wird auch hier der Zustand über alle Faltungen mitgenommen. Jeder Schritt hat Zugriff auf alle Daten, die aus dem vorherigen Schritt bleiben. Sukkzessive wird eine korrekte ClassFile aufgebaut. +Die Unterteilung von Schritt 2 und 3 ist deswegen notwendig, weil der Code einer Methode auch eine andere, erst nachher deklarierte Methode aufrufen kann. Nach Schritt 2 sind alle Methoden der Klasse bekannt. Wie beschrieben wird auch hier der Zustand über alle Faltungen mitgenommen. Jeder Schritt hat Zugriff auf alle Daten, die aus dem vorherigen Schritt bleiben. Sukzessive wird eine korrekte ClassFile aufgebaut. Besonders interessant ist hierbei Schritt 3. Dort wird das Verhalten jeder einzelnen Methode in Bytecode übersetzt. In diesem Schritt werden zusätzlich zu den `Buildern` noch die `Assembler` verwendet (Definition siehe oben.) Die Assembler funktionieren ähnlich wie die Builder, arbeiten allerdings nicht auf einer ClassFile, sondern auf dem Inhalt einer Methode: Sie verarbeiten jeweils ein Tupel: @@ -66,4 +52,28 @@ assembleExpression (constants, ops, lvars) (TypedExpression _ NullLiteral) = Hier werden die Konstanten und lokalen Variablen des Inputs nicht berührt, dem Bytecode wird lediglich die Operation `aconst_null` hinzugefügt. Damit ist das Verhalten des gematchten Inputs - eines Nullliterals - abgebildet. -Die Assembler rufen sich teilweise rekursiv selbst auf, da ja auch der AST verschachteltes Verhalten abbilden kann. Der Startpunkt für die Assembly einer Methode ist der Builder `methodAssembler`. Dieser entspricht Schritt 3 in der obigen Übersicht. \ No newline at end of file +Die Assembler rufen sich teilweise rekursiv selbst auf, da ja auch der AST verschachteltes Verhalten abbilden kann. Der Startpunkt für die Assembly einer Methode ist der Builder `methodAssembler`. Dieser entspricht Schritt 3 in der obigen Übersicht. + +## Serialisierung + +Damit Bytecode generiert werden kann, braucht es Strukturen, die die Daten halten, die letztendlich serialisiert werden. Die JVM erwartet den kompilierten Code in handliche Pakete verpackt. Die Struktur dieser Pakete ist [so definiert](https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html). + +Jede Struktur, die in dieser übergreifenden Class File vorkommt, haben wir in Haskell abgebildet. Es gibt z.B die Struktur "ClassFile", die wiederum weitere Strukturen wie z.B Informationen über Felder oder Methoden der Klasse beinhaltet. Alle diese Strukturen implementieren folgende TypeClass: + +``` +class Serializable a where + serialize :: a -> [Word8] +``` + +Hier ist ein Beispiel anhand der Serialisierung der einzelnen Operationen: + +``` +instance Serializable Operation where + serialize Opiadd = [0x60] + serialize Opisub = [0x64] + serialize Opimul = [0x68] + ... + serialize (Opgetfield index) = 0xB4 : unpackWord16 index +``` + +Die Struktur ClassFile ruft für deren Kinder rekursiv diese `serialize` Funktion auf und konkateniert die Ergebnisse. Am Ende bleibt eine flache Word8-Liste übrig, die Serialisierung ist damit abgeschlossen. Da der Typecheck sicherstellt, dass alle referenzierten Methoden/Felder gültig sind, kann die Übersetzung der einzelnen Klassen voneinander unabhängig geschehen. \ No newline at end of file diff --git a/src/Main.hs b/src/Main.hs index 3f4c329..bbb43b6 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,6 +1,5 @@ module Main where -import Example import Typecheck import Parser.Lexer (alexScanTokens) import Parser.JavaParser @@ -14,20 +13,20 @@ main = do args <- getArgs let filename = if null args then error "Missing filename, I need to know what to compile" - else args!!0 + else head args let outputDirectory = takeDirectory filename print ("Compiling " ++ filename) file <- readFile filename let untypedAST = parse $ alexScanTokens file - let typedAST = (typeCheckCompilationUnit untypedAST) - let assembledClasses = map (\(typedClass) -> classBuilder typedClass emptyClassFile) typedAST + let typedAST = typeCheckCompilationUnit untypedAST + let assembledClasses = map (`classBuilder` emptyClassFile) typedAST - mapM_ (\(classFile) -> let + mapM_ (\classFile -> let fileContent = pack (serialize classFile) - fileName = outputDirectory ++ "/" ++ (className classFile) ++ ".class" + fileName = outputDirectory ++ "/" ++ className classFile ++ ".class" in Data.ByteString.writeFile fileName fileContent - ) assembledClasses + ) assembledClasses + - -- 2.34.1