198 Commits

Author SHA1 Message Date
Matthias Raba
6e400ebb9d Merge branch 'master' of https://gitea.hb.dhbw-stuttgart.de/MisterChaos69/MiniJavaCompiler into documentation 2024-06-14 09:57:48 +02:00
dc2f845049 Merge pull request 'bytecode' (#3) from bytecode into master
Reviewed-on: #3
2024-06-14 07:56:36 +00:00
36c48d013a add typecheck documentation 2024-06-14 09:56:33 +02:00
Matthias Raba
07fcda5827 Merge branch 'master' of https://gitea.hb.dhbw-stuttgart.de/MisterChaos69/MiniJavaCompiler into bytecode 2024-06-14 09:54:47 +02:00
b1735c6300 Merge pull request 'Add initial typechecker for AST' (#2) from typedAST into master
Reviewed-on: #2
2024-06-14 07:53:29 +00:00
Matthias Raba
fa9f8c3425 added Bytecode documentation 2024-06-14 09:49:27 +02:00
Matthias Raba
2c928ad69b renamed Generator -> Builder 2024-06-14 08:54:12 +02:00
Matthias Raba
b47da4633d refactored assemblers & builders to individual files 2024-06-14 08:47:45 +02:00
Matthias Raba
3fc804e899 malicious tests 2024-06-14 08:11:58 +02:00
Matthias Raba
807aea112e added Test classes, fixed assignment dup missing 2024-06-14 07:57:38 +02:00
mrab
9e43b015b7 injecting initializers into all constructors, multiple classes per file supported 2024-06-13 22:25:35 +02:00
mrab
79a989eecf Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-06-13 22:22:52 +02:00
f02226bca8 add missing typeCheckVariableDeclaration 2024-06-13 22:06:10 +02:00
mrab
3acbce8afc fixed invalid dup depth for postinc/dec 2024-06-13 21:02:00 +02:00
mrab
44c6d74afb Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-06-13 20:51:19 +02:00
mrab
3f6eb68e91 implemented arbitrarily nested increment operators 2024-06-13 20:51:10 +02:00
7e13b3fac3 fix variable redefinition in scope not working 2024-06-13 20:35:00 +02:00
mrab
613a280079 Method & constructor calls fully working 2024-06-13 20:17:23 +02:00
mrab
fbd76deca3 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-06-13 17:36:09 +02:00
2139e7832c fix missing Typed Expression for local and field variable 2024-06-13 17:33:30 +02:00
mrab
9a9c508fc7 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-06-13 17:16:29 +02:00
baf9362634 make implicit this to explicit this for field variables 2024-06-13 15:49:59 +02:00
Matthias Raba
4def6e5804 name resolution for fields 2024-06-13 15:35:42 +02:00
Matthias Raba
3f78cdaa2d Merge branch 'typedAST' of https://gitea.hb.dhbw-stuttgart.de/MisterChaos69/MiniJavaCompiler into bytecode 2024-06-13 09:27:19 +02:00
710ec43959 add using standard constructor for constructor call 2024-06-13 09:26:05 +02:00
mrab
b41a77ba33 boolean AND/OR, if/else goto fixed 2024-06-12 19:57:09 +02:00
mrab
7317895800 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-06-12 16:55:31 +02:00
mrab
06dad4d7f9 resolved circular imports 2024-06-12 16:55:19 +02:00
b525d14192 add typechecking for returns, fix finding of constructors, fix if statement 2024-06-11 20:04:59 +02:00
a62fe50a0d Merge remote-tracking branch 'origin/create-parser' into typedAST 2024-06-11 18:50:39 +02:00
7c52084bbe fix test 2024-06-10 17:19:56 +02:00
fanoll
98b02446ba remove unused thisMeth type. Returns are combined and already checked against return Type 2024-06-10 12:53:59 +02:00
82b2b4a6e1 fix intliteral 0 2024-05-31 17:39:56 +02:00
3d351ee02b fix false error message 2024-05-31 17:11:46 +02:00
05b599b8ff fix if typecheck 2024-05-31 17:10:50 +02:00
060321f323 Merge remote-tracking branch 'origin/create-parser' into typedAST 2024-05-31 16:26:14 +02:00
fc96eba52e Merge remote-tracking branch 'origin/bytecode' into typedAST 2024-05-31 16:26:08 +02:00
af093fa3bb parser add increment statement 2024-05-31 12:10:00 +02:00
666856b33a parser add constructor call 2024-05-31 12:06:14 +02:00
mrab
578f959d7c Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-31 11:44:59 +02:00
mrab
bbe0d86670 partial revert 2024-05-31 11:44:53 +02:00
30365d76bd Revert "parser add preincrement and decrement conversion"
This reverts commit 2acba0f283.
2024-05-31 11:44:10 +02:00
mrab
c0caa7ce01 Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-31 11:24:26 +02:00
mrab
2b7d217e8a moved expression statement and expressionstatement 2024-05-31 11:24:11 +02:00
2acba0f283 parser add preincrement and decrement conversion 2024-05-31 11:23:25 +02:00
84613fabe0 parser add field subaccess 2024-05-31 11:07:19 +02:00
408111df51 parser implement field access 2024-05-31 11:04:23 +02:00
mrab
6abb9ae8ba Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-31 10:37:07 +02:00
45114caffb Merge remote-tracking branch 'origin/create-parser' into typedAST 2024-05-31 10:36:18 +02:00
a62d4c35e1 Merge remote-tracking branch 'origin/create-parser' into typedAST 2024-05-31 10:35:28 +02:00
4c82f5bfdd Revert "make UnaryOperation a statementexpression"
This reverts commit 25c0c33109.
2024-05-31 10:34:52 +02:00
8cf022e6e0 Revert "remove expression from unary operators"
This reverts commit 56cc1a9374.
2024-05-31 10:34:48 +02:00
d4f474ba54 fix increment 2024-05-31 10:34:47 +02:00
761244df74 Revert "change back UnaryOperation and move it to StatementExpression"
This reverts commit c690b01396.
2024-05-31 10:34:42 +02:00
c690b01396 change back UnaryOperation and move it to StatementExpression 2024-05-31 10:07:01 +02:00
56cc1a9374 remove expression from unary operators 2024-05-31 10:02:53 +02:00
25c0c33109 make UnaryOperation a statementexpression 2024-05-31 10:00:12 +02:00
mrab
5aa193bc08 Merge branch 'bytecode' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-31 09:44:45 +02:00
mrab
2e7c28812b Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-31 09:42:16 +02:00
mrab
2ac3b60e79 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-31 09:41:37 +02:00
1e59ba9e27 parser implement constructor with statements 2024-05-28 23:47:34 +02:00
f4d31a85cc parser add dot method call 2024-05-28 23:27:23 +02:00
0694dfb77d Merge branch 'typedAST' of https://gitea.hb.dhbw-stuttgart.de/MisterChaos69/MiniJavaCompiler into typedAST 2024-05-26 21:45:33 +02:00
6ab64371b5 feat: Add support for block statements in type checking 2024-05-26 21:45:11 +02:00
067bf8d796 refactor: move incremental types to statementsexpressions 2024-05-19 09:20:43 +02:00
24c2920c9c parser implement methodcall with params 2024-05-16 11:58:27 +02:00
3c70f9f1f6 parser add methodcall no params 2024-05-16 11:49:52 +02:00
mrab
6f4143a60a increment/decrement 2024-05-16 11:42:48 +02:00
c347e6c630 add test 2024-05-16 11:31:58 +02:00
mrab
fadbdf1035 Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-16 11:19:46 +02:00
f81a812f59 parser add assigment statement 2024-05-16 11:15:41 +02:00
mrab
d0d2cbd081 restructured bytecode code 2024-05-16 10:39:27 +02:00
e95377ee72 Merge remote-tracking branch 'origin/create-parser' into typedAST 2024-05-16 10:09:26 +02:00
mrab
ddab04f063 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-16 09:55:03 +02:00
mrab
09f70ca789 While assembly 2024-05-16 09:53:36 +02:00
2a502a6c67 fix return null for object return type not possible 2024-05-16 09:36:35 +02:00
mrab
d13e24c216 Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-16 09:33:16 +02:00
mrab
53e1afc9e4 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-16 09:28:18 +02:00
mrab
535a6891ad Local variable assignment 2024-05-16 09:28:00 +02:00
07837f7d5f null is now a valid declaration value for a object 2024-05-15 13:04:06 +02:00
a80dc1d34b null is now a valid declaration value for a object 2024-05-15 12:51:14 +02:00
e572975bda add: inject default constructor 2024-05-15 11:45:41 +02:00
mrab
207fb5c5f3 local variables working 2024-05-15 11:23:40 +02:00
b11adcf907 parser add this and braced expressions 2024-05-15 10:32:03 +02:00
b095678769 add: local variable assembler 2024-05-15 10:01:01 +02:00
a4d41b9ef7 parser add timesequal, divideequal, moduloequal, minusequal, andequal, xorequal, orequal 2024-05-14 23:35:56 +02:00
9f658397de parser implement assign 2024-05-14 23:14:43 +02:00
524c667c43 parser add while loop 2024-05-14 16:48:45 +02:00
mrab
a4fff37b07 Bugfix: else assembly 2024-05-14 14:09:24 +02:00
mrab
c02de8f9b2 Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-14 13:57:25 +02:00
mrab
a179dec3ea else Assembly 2024-05-14 13:57:20 +02:00
5723f6c662 parser add ifthenelse 2024-05-14 13:57:01 +02:00
mrab
95178366a2 Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-14 13:21:12 +02:00
fef619ac03 parser add if then 2024-05-14 13:20:46 +02:00
mrab
f9f984568f block and if assembly (no else) 2024-05-14 13:20:37 +02:00
a7b4c7e58e Merge remote-tracking branch 'origin/create-parser' into typedAST 2024-05-14 12:34:32 +02:00
mrab
5284d6ecba Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-14 12:34:11 +02:00
aa3b196ab5 set constructor return type void 2024-05-14 12:33:42 +02:00
mrab
d1d9a5d6e1 constructor and method call using this 2024-05-14 12:29:55 +02:00
7ba9743b0a parser add increment decrement 2024-05-14 11:34:57 +02:00
83e6964d71 Merge remote-tracking branch 'origin/typedAST' into create-parser 2024-05-14 11:28:15 +02:00
f8b0b59c5d refactor unused function 2024-05-14 11:26:27 +02:00
fae3498bd9 Refactor BinaryOperations and add char operations 2024-05-14 11:21:06 +02:00
3fa9736172 add post and pre operations 2024-05-14 11:19:26 +02:00
mrab
86e15b5856 method assembly is actually used (yay) 2024-05-14 11:12:12 +02:00
mrab
393253c9bb Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-14 10:29:27 +02:00
mrab
09469e0e45 added boolean expression assembly 2024-05-14 10:28:57 +02:00
64829c2086 replace parseBlock with parseStatement 2024-05-14 10:16:56 +02:00
666fb4ee1a Assignment now rakes expression instead of identifier 2024-05-14 10:04:00 +02:00
e350c23db1 fix spelling error 2024-05-14 09:36:18 +02:00
20184c5e26 add modulus to AST and Typecheck 2024-05-13 13:13:24 +02:00
c29aa13d69 parser add and, or, xor 2024-05-09 00:09:08 +02:00
de078639fc parser add multiplication, division, modulo, addition, subtraction 2024-05-08 23:37:18 +02:00
1d5463582f add differentiation between local and field variable 2024-05-08 17:39:43 +02:00
4f61431c79 parser add unary not, plus and minus 2024-05-08 16:59:10 +02:00
f82776e636 parser add return 2024-05-08 15:52:44 +02:00
8de0309107 Merge branch 'create-parser' of https://gitea.hb.dhbw-stuttgart.de/MisterChaos69/MiniJavaCompiler into create-parser 2024-05-08 15:51:40 +02:00
90fa658c8f parser implement varibale initialization 2024-05-08 15:36:05 +02:00
ebf54bf4cb parser implement int initializiation 2024-05-08 15:36:05 +02:00
b957f512ad parser add emptystatement 2024-05-08 15:36:05 +02:00
b82e205bcd parser implement nested blocks 2024-05-08 15:36:04 +02:00
98321fa162 parser implement multiple blockstatements 2024-05-08 15:36:04 +02:00
58367a95e6 parser implement localvardeclaration 2024-05-08 15:36:04 +02:00
ea18431b77 parser add constructor 2024-05-08 15:36:04 +02:00
9e6f31479f parser improve error messages 2024-05-08 15:36:04 +02:00
f183b8b183 parser implement methods with params 2024-05-08 15:36:04 +02:00
3130f7f7d4 add void methods 2024-05-08 15:36:04 +02:00
f57f360abf parser add empty methods 2024-05-08 15:36:04 +02:00
5e90f0d0ee parser add modifier 2024-05-08 15:36:04 +02:00
301b87c9ac parser implement multiple declarations 2024-05-08 15:36:04 +02:00
decc909c23 parser add custom type fields 2024-05-08 15:36:04 +02:00
mrab
d54c7cd7e6 Working pipeline for whole compiler 2024-05-08 15:23:18 +02:00
mrab
9d7af6effb Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-08 14:35:45 +02:00
c49b7f556c parser implement varibale initialization 2024-05-08 13:29:10 +02:00
c8ce7f4b43 parser implement int initializiation 2024-05-08 13:16:00 +02:00
ebfb912183 parser add emptystatement 2024-05-08 11:05:02 +02:00
mrab
7cbf8aad08 Merge branch 'create-parser' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-08 11:04:43 +02:00
mrab
93702071fb Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-08 10:57:21 +02:00
e8151ad2f0 parser implement nested blocks 2024-05-08 10:37:40 +02:00
a4b933d659 parser implement multiple blockstatements 2024-05-08 10:30:50 +02:00
7bfdeed620 add local and field var 2024-05-08 10:26:19 +02:00
2f81f82661 parser implement localvardeclaration 2024-05-08 10:20:55 +02:00
fecc7eceaa parser add constructor 2024-05-08 00:02:38 +02:00
5e8a2a90ed parser improve error messages 2024-05-08 00:00:18 +02:00
mrab
176b98d659 expression assembler 2024-05-07 21:18:01 +02:00
ced5d1df9c add program tests 2024-05-07 20:10:39 +02:00
acab0add95 added methodcall and assigment test 2024-05-07 16:42:01 +02:00
mrab
c0d48b5274 Merge branch 'bytecode' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-07 16:38:28 +02:00
b040130569 removed unneccesary comments 2024-05-07 16:36:42 +02:00
e723e9cb18 Merge branch 'bytecode' of https://gitea.hb.dhbw-stuttgart.de/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-07 16:35:49 +02:00
d47c7f5a45 added tests 2024-05-07 16:34:43 +02:00
8c3c3625b9 fixed method and name resolution typecheck 2024-05-07 16:34:34 +02:00
f9df24f456 Merge branch 'bytecode' of https://gitea.hb.dhbw-stuttgart.de/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-07 16:34:22 +02:00
b7d8f19433 add: MethodBuilder 2024-05-07 16:32:36 +02:00
mrab
8b5650dd61 code serialization, minimal opcodes 2024-05-07 15:46:08 +02:00
158197b440 parser implement methods with params 2024-05-07 15:03:04 +02:00
068b97e0e7 add: constantPoolBuilding 2024-05-07 14:27:20 +02:00
c920a3dfb6 add: Classbuilder & ClassFileBuilder 2024-05-07 11:54:54 +02:00
358293d4d4 add typecheck method and constructor call 2024-05-07 11:28:35 +02:00
7422510267 add expression to methodcall 2024-05-07 11:09:10 +02:00
40c7cab0e3 apply pull and add tests 2024-05-07 11:04:40 +02:00
mrab
f988c80b83 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-07 09:59:01 +02:00
mrab
ff03be2671 moved type check to its own file 2024-05-07 09:53:16 +02:00
mrab
c248dc3676 Merge branch 'typedAST' of ssh://gitea.hb.dhbw-stuttgart.de:2222/MisterChaos69/MiniJavaCompiler into bytecode 2024-05-07 09:49:32 +02:00
75272498c4 fixed typechecker trying to find a field of a class in the symboltable 2024-05-07 08:20:10 +02:00
e3fa3efd95 add void methods 2024-05-06 23:51:33 +02:00
d3ea0d6d7b parser add empty methods 2024-05-06 23:34:03 +02:00
8d9d28cb1f added means of testing 2024-05-06 23:15:22 +02:00
f14470d8cc add name resolution, fix symbol table key, value switchup 2024-05-06 23:15:10 +02:00
ae6232380e Fix type mismatch and redefinition issues in typeCheckStatement function 2024-05-06 19:52:06 +02:00
0debfe995d add full block type checking 2024-05-06 19:13:46 +02:00
bea9a039a8 parser add modifier 2024-05-06 11:03:26 +02:00
32cce1f137 parser implement multiple declarations 2024-05-06 10:59:41 +02:00
e04c475c55 parser add custom type fields 2024-05-06 10:56:56 +02:00
62bd191314 update block and local variable typecheck 2024-05-06 10:49:33 +02:00
af7162ea54 parser implement fields 2024-05-06 09:53:14 +02:00
dbdd2f85bd inital commit with first version 2024-05-05 10:00:11 +02:00
mrab
f93a1ba58c working export of binary class files 2024-05-04 19:08:41 +02:00
mrab
967c3a4b41 Moved bytecode stuff into separate folder 2024-05-04 16:28:42 +02:00
mrab
e2f978ca93 serialization of class files 2024-05-04 15:41:05 +02:00
f4c70d4d30 add:Serialization Framework 2024-05-04 13:02:35 +02:00
mrab
580e56ebbb Class file data structure 2024-05-04 11:39:44 +02:00
fa9cb761d5 parser implement class 2024-05-03 15:58:21 +02:00
fa2dc24aa0 indent with spaces 2024-05-02 22:38:58 +02:00
fe4091da99 start implementing parser 2024-05-02 22:33:46 +02:00
db2b4a142c lexer adjustments 2024-05-02 22:21:53 +02:00
95063ac64f lexer implement operators 2024-05-02 21:15:59 +02:00
4108eb58c1 lexer implement separators and fix literals 2024-05-02 21:15:59 +02:00
ecd778cc70 lexer implement literals 2024-05-02 21:15:59 +02:00
e5baa701b2 rename Token Identifier to IDENTIFIER 2024-05-02 21:15:59 +02:00
5d49da69a6 lexer implement keywords 2024-05-02 21:15:59 +02:00
b8e566a2a0 lexer implement identifiers 2024-05-02 21:15:59 +02:00
6d39110794 lexer implement comments 2024-05-02 21:15:59 +02:00
mrab
88604bf544 Typed AST elements 2024-05-02 13:56:40 +02:00
mrab
8d8bc06034 AST 2024-05-02 13:44:02 +02:00
53b123b997 Merge pull request 'project-structure' (#1) from project-structure into master
Reviewed-on: #1
2024-05-02 08:28:04 +00:00
d59594ab74 create cabal project structure 2024-05-01 21:20:38 +02:00
76be568b57 add happy file 2024-04-24 17:26:56 +02:00
29 changed files with 3014 additions and 0 deletions

28
.gitignore vendored Normal file
View File

@@ -0,0 +1,28 @@
dist
dist-*
cabal-dev
*.o
*.hi
*.hie
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
*.class
*.local~*
src/Parser/JavaParser.hs
src/Parser/Parser.hs
src/Parser/Lexer.hs
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
cabal.project.local~
.HTF/
.ghc.environment.*

View File

@@ -1,3 +1,14 @@
# MiniJavaCompiler
It's a compiler for a minimal version of Java with watered down syntax.
Written in Haskell.
# Cabal Commands
run main
```
cabal run
```
run tests
```
cabal test
```

View File

@@ -0,0 +1,38 @@
// compile all test files using:
// ls Test/JavaSources/*.java | grep -v ".*Main.java" | xargs -I {} cabal run compiler {}
// compile (in project root) using:
// javac -g:none -sourcepath Test/JavaSources/ Test/JavaSources/Main.java
// afterwards, run using
// java -ea -cp Test/JavaSources/ Main
public class Main {
public static void main(String[] args)
{
TestEmpty empty = new TestEmpty();
TestFields fields = new TestFields();
TestConstructor constructor = new TestConstructor(42);
TestMultipleClasses multipleClasses = new TestMultipleClasses();
TestRecursion recursion = new TestRecursion(10);
TestMalicious malicious = new TestMalicious();
// constructing a basic class works
assert empty != null;
// initializers (and default initializers to 0/null) work
assert fields.a == 0 && fields.b == 42;
// constructor parameters override initializers
assert constructor.a == 42;
// multiple classes within one file work. Referencing another classes fields/methods works.
assert multipleClasses.a.a == 42;
// self-referencing classes work.
assert recursion.child.child.child.child.child.value == 5;
// self-referencing methods work.
assert recursion.fibonacci(15) == 610;
// intentionally dodgy expressions work
assert malicious.assignNegativeIncrement(42) == -42;
assert malicious.tripleAddition(1, 2, 3) == 6;
for(int i = 0; i < 3; i++)
{
assert malicious.cursedFormatting(i) == i;
}
}
}

View File

@@ -0,0 +1,9 @@
public class TestConstructor
{
public int a = -1;
public TestConstructor(int initial_value)
{
a = initial_value;
}
}

View File

@@ -0,0 +1,4 @@
public class TestEmpty
{
}

View File

@@ -0,0 +1,5 @@
public class TestFields
{
public int a;
public int b = 42;
}

View File

@@ -0,0 +1,41 @@
public class TestMalicious {
public int assignNegativeIncrement(int n)
{
return n=-++n+1;
}
public int tripleAddition(int a, int b, int c)
{
return a+++b+++c++;
}
public int cursedFormatting(int n)
{
if
(n == 0)
{
return ((((0))));
}
else
if(n ==
1)
{
return
1;
}else {
return
2
;
}
}
}

View File

@@ -0,0 +1,9 @@
public class TestMultipleClasses
{
public AnotherTestClass a = new AnotherTestClass();
}
class AnotherTestClass
{
public int a = 42;
}

View File

@@ -0,0 +1,27 @@
public class TestRecursion {
public int value = 0;
public TestRecursion child = null;
public TestRecursion(int n)
{
value = n;
if(n > 0)
{
child = new TestRecursion(n - 1);
}
}
public int fibonacci(int n)
{
if(n < 2)
{
return n;
}
else
{
return fibonacci(n - 1) + this.fibonacci(n - 2);
}
}
}

49
Test/TestLexer.hs Normal file
View File

@@ -0,0 +1,49 @@
module TestLexer(tests) where
import Test.HUnit
import Parser.Lexer
emptyTokenList :: [Token]
emptyTokenList = []
testCommentSomething = TestCase $ assertEqual "scan '/*Something*/'" emptyTokenList $ alexScanTokens "/*Something*/"
testEmptyComment = TestCase $ assertEqual "scan '/*x*/'" emptyTokenList $ alexScanTokens "/**/"
testLineComment = TestCase $ assertEqual "scan '// comment'" emptyTokenList $ alexScanTokens "// comment"
testLineCommentEnds = TestCase $ assertEqual "scan '// com\\n'" emptyTokenList $ alexScanTokens "// com\n"
testIdentifier = TestCase $ assertEqual "scan 'identifier'" [IDENTIFIER "identifier"] $ alexScanTokens "identifier"
testShortIdentifier = TestCase $ assertEqual "scan 'i'" [IDENTIFIER "i"] $ alexScanTokens "i"
testIdentifierWithNumber = TestCase $ assertEqual "scan 'i2'" [IDENTIFIER "i2"] $ alexScanTokens "i2"
testKeywordBreak = TestCase $ assertEqual "scan 'break'" [BREAK] $ alexScanTokens "break"
testKeywordInt = TestCase $ assertEqual "scan 'int'" [INT] $ alexScanTokens "int"
testIntLiteral = TestCase $ assertEqual "scan '234'" [INTEGERLITERAL 234] $ alexScanTokens "234"
testIntLiteral2 = TestCase $ assertEqual "scan '54_2'" [INTEGERLITERAL 542] $ alexScanTokens "54_2"
testCharLiteral = TestCase $ assertEqual "scan ''f''" [CHARLITERAL 'f'] $ alexScanTokens "'f'"
testBoolLiteralTrue = TestCase $ assertEqual "scan 'true'" [BOOLLITERAL True] $ alexScanTokens "true"
testBoolLiteralFalse = TestCase $ assertEqual "scan 'false'" [BOOLLITERAL False] $ alexScanTokens "false"
testLBrace = TestCase $ assertEqual "scan '('" [LBRACE] $ alexScanTokens "("
testAnd = TestCase $ assertEqual "scan '&&'" [AND] $ alexScanTokens "&&"
tests = TestList [
TestLabel "TestCommentSomething" testCommentSomething,
TestLabel "TestEmptyComment" testEmptyComment,
TestLabel "TestLineComment" testLineComment,
TestLabel "TestLineCommentEnds" testLineCommentEnds,
TestLabel "TestIdentifier" testIdentifier,
TestLabel "TestShortIdentifier" testShortIdentifier,
TestLabel "TestIdentifierWithNumber" testIdentifierWithNumber,
TestLabel "TestKeywordBreak" testKeywordBreak,
TestLabel "TestKeywordInt" testKeywordInt,
TestLabel "TestIntLiteral" testIntLiteral,
TestLabel "TestIntLiteral2" testIntLiteral2,
TestLabel "TestCharLiteral" testCharLiteral,
TestLabel "TestBoolLiteralTrue" testBoolLiteralTrue,
TestLabel "TestBoolLiteralFalse" testBoolLiteralFalse,
TestLabel "TestLBrace" testLBrace,
TestLabel "TestAnd" testAnd
]

303
Test/TestParser.hs Normal file
View File

@@ -0,0 +1,303 @@
module TestParser(tests) where
import Test.HUnit
import Parser.Lexer
import Parser.JavaParser
import Ast
testSingleEmptyClass = TestCase $
assertEqual "expect single empty class hello" [Class "Hello" [] []] $
parse [CLASS, IDENTIFIER "Hello", LBRACKET, RBRACKET]
testTwoEmptyClasses = TestCase $
assertEqual "expect two empty classes" [Class "Class1" [] [], Class "Class2" [] []] $
parse [CLASS,IDENTIFIER "Class1",LBRACKET,RBRACKET,CLASS,IDENTIFIER "Class2",LBRACKET,RBRACKET]
testBooleanField = TestCase $
assertEqual "expect class with boolean field" [Class "WithBool" [] [VariableDeclaration "boolean" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithBool",LBRACKET,BOOLEAN,IDENTIFIER "value",SEMICOLON,RBRACKET]
testIntField = TestCase $
assertEqual "expect class with int field" [Class "WithInt" [] [VariableDeclaration "int" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithInt",LBRACKET,INT,IDENTIFIER "value",SEMICOLON,RBRACKET]
testCustomTypeField = TestCase $
assertEqual "expect class with foo field" [Class "WithFoo" [] [VariableDeclaration "Foo" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithFoo",LBRACKET,IDENTIFIER "Foo",IDENTIFIER "value",SEMICOLON,RBRACKET]
testMultipleDeclarationSameLine = TestCase $
assertEqual "expect class with two int fields" [Class "TwoInts" [] [VariableDeclaration "int" "num1" Nothing, VariableDeclaration "int" "num2" Nothing]] $
parse [CLASS,IDENTIFIER "TwoInts",LBRACKET,INT,IDENTIFIER "num1",COMMA,IDENTIFIER "num2",SEMICOLON,RBRACKET]
testMultipleDeclarations = TestCase $
assertEqual "expect class with int and char field" [Class "Multiple" [] [VariableDeclaration "int" "value" Nothing, VariableDeclaration "char" "letter" Nothing]] $
parse [CLASS,IDENTIFIER "Multiple",LBRACKET,INT,IDENTIFIER "value",SEMICOLON,CHAR,IDENTIFIER "letter",SEMICOLON,RBRACKET]
testWithModifier = TestCase $
assertEqual "expect class with int field" [Class "WithInt" [] [VariableDeclaration "int" "value" Nothing]] $
parse [ABSTRACT,CLASS,IDENTIFIER "WithInt",LBRACKET,PUBLIC,INT,IDENTIFIER "value",SEMICOLON,RBRACKET]
testEmptyMethod = TestCase $
assertEqual "expect class with method" [Class "WithMethod" [MethodDeclaration "int" "foo" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithMethod",LBRACKET,INT,IDENTIFIER "foo",LBRACE,RBRACE,SEMICOLON,RBRACKET]
testEmptyPrivateMethod = TestCase $
assertEqual "expect class with method" [Class "WithMethod" [MethodDeclaration "int" "foo" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithMethod",LBRACKET,PRIVATE,INT,IDENTIFIER "foo",LBRACE,RBRACE,LBRACKET,RBRACKET,RBRACKET]
testEmptyVoidMethod = TestCase $
assertEqual "expect class with method" [Class "WithMethod" [MethodDeclaration "void" "foo" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithMethod",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,RBRACE,LBRACKET,RBRACKET,RBRACKET]
testEmptyMethodWithParam = TestCase $
assertEqual "expect class with method with param" [Class "WithParam" [MethodDeclaration "void" "foo" [ParameterDeclaration "int" "param"] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithParam",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,INT,IDENTIFIER "param",RBRACE,SEMICOLON,RBRACKET]
testEmptyMethodWithParams = TestCase $
assertEqual "expect class with multiple params" [Class "WithParams" [MethodDeclaration "void" "foo" [ParameterDeclaration "int" "p1",ParameterDeclaration "Custom" "p2"] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithParams",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,INT,IDENTIFIER "p1",COMMA,IDENTIFIER "Custom",IDENTIFIER "p2",RBRACE,SEMICOLON,RBRACKET]
testClassWithMethodAndField = TestCase $
assertEqual "expect class with method and field" [Class "WithMethodAndField" [MethodDeclaration "void" "foo" [] (Block []), MethodDeclaration "int" "bar" [] (Block [])] [VariableDeclaration "int" "value" Nothing]] $
parse [CLASS,IDENTIFIER "WithMethodAndField",LBRACKET,VOID,IDENTIFIER "foo",LBRACE,RBRACE,LBRACKET,RBRACKET,INT,IDENTIFIER "value",SEMICOLON,INT,IDENTIFIER "bar",LBRACE,RBRACE,SEMICOLON,RBRACKET]
testClassWithConstructor = TestCase $
assertEqual "expect class with constructor" [Class "WithConstructor" [MethodDeclaration "void" "<init>" [] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithConstructor",LBRACKET,IDENTIFIER "WithConstructor",LBRACE,RBRACE,LBRACKET,RBRACKET,RBRACKET]
testConstructorWithParams = TestCase $
assertEqual "expect constructor with params" [Class "WithParams" [MethodDeclaration "void" "<init>" [ParameterDeclaration "int" "p1"] (Block [])] []] $
parse [CLASS,IDENTIFIER "WithParams",LBRACKET,IDENTIFIER "WithParams",LBRACE,INT,IDENTIFIER "p1",RBRACE,LBRACKET,RBRACKET,RBRACKET]
testConstructorWithStatements = TestCase $
assertEqual "expect constructor with statement" [Class "WithConstructor" [MethodDeclaration "void" "<init>" [] (Block [Return Nothing])] []] $
parse [CLASS,IDENTIFIER "WithConstructor",LBRACKET,IDENTIFIER "WithConstructor",LBRACE,RBRACE,LBRACKET,RETURN,SEMICOLON,RBRACKET,RBRACKET]
testEmptyBlock = TestCase $ assertEqual "expect empty block" [Block []] $ parseStatement [LBRACKET,RBRACKET]
testBlockWithLocalVarDecl = TestCase $
assertEqual "expect block with local var delcaration" [Block [LocalVariableDeclaration $ VariableDeclaration "int" "localvar" Nothing]] $
parseStatement [LBRACKET,INT,IDENTIFIER "localvar",SEMICOLON,RBRACKET]
testBlockWithMultipleLocalVarDecls = TestCase $
assertEqual "expect block with multiple local var declarations" [Block [LocalVariableDeclaration $ VariableDeclaration "int" "var1" Nothing, LocalVariableDeclaration $ VariableDeclaration "boolean" "var2" Nothing]] $
parseStatement [LBRACKET,INT,IDENTIFIER "var1",SEMICOLON,BOOLEAN,IDENTIFIER "var2",SEMICOLON,RBRACKET]
testNestedBlocks = TestCase $
assertEqual "expect block with block inside" [Block [Block []]] $
parseStatement [LBRACKET,LBRACKET,RBRACKET,RBRACKET]
testBlockWithEmptyStatement = TestCase $
assertEqual "expect empty block" [Block []] $
parseStatement [LBRACKET,SEMICOLON,SEMICOLON,RBRACKET]
testExpressionIntLiteral = TestCase $
assertEqual "expect IntLiteral" (IntegerLiteral 3) $
parseExpression [INTEGERLITERAL 3]
testFieldWithInitialization = TestCase $
assertEqual "expect Class with initialized field" [Class "WithInitField" [] [VariableDeclaration "int" "number" $ Just $ IntegerLiteral 3]] $
parse [CLASS,IDENTIFIER "WithInitField",LBRACKET,INT,IDENTIFIER "number",ASSIGN,INTEGERLITERAL 3,SEMICOLON,RBRACKET]
testLocalBoolWithInitialization = TestCase $
assertEqual "expect block with with initialized local var" [Block [LocalVariableDeclaration $ VariableDeclaration "boolean" "b" $ Just $ BooleanLiteral False]] $
parseStatement [LBRACKET,BOOLEAN,IDENTIFIER "b",ASSIGN,BOOLLITERAL False,SEMICOLON,RBRACKET]
testFieldNullWithInitialization = TestCase $
assertEqual "expect Class with initialized field" [Class "WithInitField" [] [VariableDeclaration "Object" "bar" $ Just NullLiteral]] $
parse [CLASS,IDENTIFIER "WithInitField",LBRACKET,IDENTIFIER "Object",IDENTIFIER "bar",ASSIGN,NULLLITERAL,SEMICOLON,RBRACKET]
testReturnVoid = TestCase $
assertEqual "expect block with return nothing" [Block [Return Nothing]] $
parseStatement [LBRACKET,RETURN,SEMICOLON,RBRACKET]
testExpressionNot = TestCase $
assertEqual "expect expression not" (UnaryOperation Not (Reference "boar")) $
parseExpression [NOT,IDENTIFIER "boar"]
testExpressionMinus = TestCase $
assertEqual "expect expression minus" (UnaryOperation Minus (Reference "boo")) $
parseExpression [MINUS,IDENTIFIER "boo"]
testExpressionMultiplication = TestCase $
assertEqual "expect multiplication" (BinaryOperation Multiplication (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",TIMES,INTEGERLITERAL 3]
testExpressionDivision = TestCase $
assertEqual "expect division" (BinaryOperation Division (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",DIV,INTEGERLITERAL 3]
testExpressionModulo = TestCase $
assertEqual "expect modulo operation" (BinaryOperation Modulo (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",MODULO,INTEGERLITERAL 3]
testExpressionAddition = TestCase $
assertEqual "expect addition" (BinaryOperation Addition (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",PLUS,INTEGERLITERAL 3]
testExpressionSubtraction = TestCase $
assertEqual "expect subtraction" (BinaryOperation Subtraction (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",MINUS,INTEGERLITERAL 3]
testExpressionLessThan = TestCase $
assertEqual "expect comparision less than" (BinaryOperation CompareLessThan (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",LESS,INTEGERLITERAL 3]
testExpressionGreaterThan = TestCase $
assertEqual "expect comparision greater than" (BinaryOperation CompareGreaterThan (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",GREATER,INTEGERLITERAL 3]
testExpressionLessThanEqual = TestCase $
assertEqual "expect comparision less than or equal" (BinaryOperation CompareLessOrEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",LESSEQUAL,INTEGERLITERAL 3]
testExpressionGreaterThanOrEqual = TestCase $
assertEqual "expect comparision greater than or equal" (BinaryOperation CompareGreaterOrEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",GREATEREQUAL,INTEGERLITERAL 3]
testExpressionEqual = TestCase $
assertEqual "expect comparison equal" (BinaryOperation CompareEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",EQUAL,INTEGERLITERAL 3]
testExpressionNotEqual = TestCase $
assertEqual "expect comparison equal" (BinaryOperation CompareNotEqual (Reference "bar") (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "bar",NOTEQUAL,INTEGERLITERAL 3]
testExpressionAnd = TestCase $
assertEqual "expect and expression" (BinaryOperation And (Reference "bar") (Reference "baz")) $
parseExpression [IDENTIFIER "bar",AND,IDENTIFIER "baz"]
testExpressionXor = TestCase $
assertEqual "expect xor expression" (BinaryOperation BitwiseXor (Reference "bar") (Reference "baz")) $
parseExpression [IDENTIFIER "bar",XOR,IDENTIFIER "baz"]
testExpressionOr = TestCase $
assertEqual "expect or expression" (BinaryOperation Or (Reference "bar") (Reference "baz")) $
parseExpression [IDENTIFIER "bar",OR,IDENTIFIER "baz"]
testExpressionPostIncrement = TestCase $
assertEqual "expect PostIncrement" (StatementExpressionExpression $ PostIncrement (Reference "a")) $
parseExpression [IDENTIFIER "a",INCREMENT]
testExpressionPostDecrement = TestCase $
assertEqual "expect PostDecrement" (StatementExpressionExpression $ PostDecrement (Reference "a")) $
parseExpression [IDENTIFIER "a",DECREMENT]
testExpressionPreIncrement = TestCase $
assertEqual "expect PreIncrement" (StatementExpressionExpression $ PreIncrement (Reference "a")) $
parseExpression [INCREMENT,IDENTIFIER "a"]
testExpressionPreDecrement = TestCase $
assertEqual "expect PreIncrement" (StatementExpressionExpression $ PreDecrement (Reference "a")) $
parseExpression [DECREMENT,IDENTIFIER "a"]
testExpressionAssign = TestCase $
assertEqual "expect assign 5 to a" (StatementExpressionExpression (Assignment (Reference "a") (IntegerLiteral 5))) $
parseExpression [IDENTIFIER "a",ASSIGN,INTEGERLITERAL 5]
testExpressionTimesEqual = TestCase $
assertEqual "expect assign and multiplication" (StatementExpressionExpression (Assignment (Reference "a") (BinaryOperation Multiplication (Reference "a") (IntegerLiteral 5)))) $
parseExpression [IDENTIFIER "a",TIMESEQUAL,INTEGERLITERAL 5]
testExpressionDivideEqual = TestCase $
assertEqual "expect assign and division" (StatementExpressionExpression (Assignment (Reference "a") (BinaryOperation Division (Reference "a") (IntegerLiteral 5)))) $
parseExpression [IDENTIFIER "a",DIVEQUAL,INTEGERLITERAL 5]
testExpressionPlusEqual = TestCase $
assertEqual "expect assign and addition" (StatementExpressionExpression (Assignment (Reference "a") (BinaryOperation Addition (Reference "a") (IntegerLiteral 5)))) $
parseExpression [IDENTIFIER "a",PLUSEQUAL,INTEGERLITERAL 5]
testExpressionMinusEqual = TestCase $
assertEqual "expect assign and subtraction" (StatementExpressionExpression (Assignment (Reference "a") (BinaryOperation Subtraction (Reference "a") (IntegerLiteral 5)))) $
parseExpression [IDENTIFIER "a",MINUSEQUAL,INTEGERLITERAL 5]
testExpressionThis = TestCase $
assertEqual "expect this" (Reference "this") $
parseExpression [THIS]
testExpressionBraced = TestCase $
assertEqual "expect braced expresssion" (BinaryOperation Multiplication (Reference "b") (BinaryOperation Addition (Reference "a") (IntegerLiteral 3))) $
parseExpression [IDENTIFIER "b",TIMES,LBRACE,IDENTIFIER "a",PLUS,INTEGERLITERAL 3,RBRACE]
testExpressionPrecedence = TestCase $
assertEqual "expect times to be inner expression" (BinaryOperation Addition (BinaryOperation Multiplication (Reference "b") (Reference "a")) (IntegerLiteral 3)) $
parseExpression [IDENTIFIER "b",TIMES,IDENTIFIER "a",PLUS,INTEGERLITERAL 3]
testExpressionMethodCallNoParams = TestCase $
assertEqual "expect methodcall no params" (StatementExpressionExpression (MethodCall (Reference "this") "foo" [])) $
parseExpression [IDENTIFIER "foo",LBRACE,RBRACE]
testExpressionMethodCallOneParam = TestCase $
assertEqual "expect methodcall one param" (StatementExpressionExpression (MethodCall (Reference "this") "foo" [Reference "a"])) $
parseExpression [IDENTIFIER "foo",LBRACE,IDENTIFIER "a",RBRACE]
testExpressionMethodCallTwoParams = TestCase $
assertEqual "expect methocall two params" (StatementExpressionExpression (MethodCall (Reference "this") "foo" [Reference "a", IntegerLiteral 5])) $
parseExpression [IDENTIFIER "foo",LBRACE,IDENTIFIER "a",COMMA,INTEGERLITERAL 5,RBRACE]
testExpressionThisMethodCall = TestCase $
assertEqual "expect this methocall" (StatementExpressionExpression (MethodCall (Reference "this") "foo" [])) $
parseExpression [THIS,DOT,IDENTIFIER "foo",LBRACE,RBRACE]
testExpressionThisMethodCallParam = TestCase $
assertEqual "expect this methocall" (StatementExpressionExpression (MethodCall (Reference "this") "foo" [Reference "x"])) $
parseExpression [THIS,DOT,IDENTIFIER "foo",LBRACE,IDENTIFIER "x",RBRACE]
testExpressionFieldAccess = TestCase $
assertEqual "expect NameResolution" (BinaryOperation NameResolution (Reference "this") (Reference "b")) $
parseExpression [THIS,DOT,IDENTIFIER "b"]
testExpressionSimpleFieldAccess = TestCase $
assertEqual "expect Reference" (Reference "a") $
parseExpression [IDENTIFIER "a"]
testExpressionFieldSubAccess = TestCase $
assertEqual "expect NameResolution without this" (BinaryOperation NameResolution (Reference "a") (Reference "b")) $
parseExpression [IDENTIFIER "a",DOT,IDENTIFIER "b"]
testExpressionConstructorCall = TestCase $
assertEqual "expect constructor call" (StatementExpressionExpression (ConstructorCall "Foo" [])) $
parseExpression [NEW,IDENTIFIER "Foo",LBRACE,RBRACE]
testStatementIfThen = TestCase $
assertEqual "expect empty ifthen" [If (Reference "a") (Block [Block []]) Nothing] $
parseStatement [IF,LBRACE,IDENTIFIER "a",RBRACE,LBRACKET,RBRACKET]
testStatementIfThenElse = TestCase $
assertEqual "expect empty ifthen" [If (Reference "a") (Block [Block []]) (Just (Block [Block []]))] $
parseStatement [IF,LBRACE,IDENTIFIER "a",RBRACE,LBRACKET,RBRACKET,ELSE,LBRACKET,RBRACKET]
testStatementWhile = TestCase $
assertEqual "expect while" [While (Reference "a") (Block [Block []])] $
parseStatement [WHILE,LBRACE,IDENTIFIER "a",RBRACE,LBRACKET,RBRACKET]
testStatementAssign = TestCase $
assertEqual "expect assign 5" [StatementExpressionStatement (Assignment (Reference "a") (IntegerLiteral 5))] $
parseStatement [IDENTIFIER "a",ASSIGN,INTEGERLITERAL 5,SEMICOLON]
testStatementMethodCallNoParams = TestCase $
assertEqual "expect methodcall statement no params" [StatementExpressionStatement (MethodCall (Reference "this") "foo" [])] $
parseStatement [IDENTIFIER "foo",LBRACE,RBRACE,SEMICOLON]
testStatementConstructorCall = TestCase $
assertEqual "expect constructor call" [StatementExpressionStatement (ConstructorCall "Foo" [])] $
parseStatement [NEW,IDENTIFIER "Foo",LBRACE,RBRACE,SEMICOLON]
testStatementConstructorCallWithArgs = TestCase $
assertEqual "expect constructor call" [StatementExpressionStatement (ConstructorCall "Foo" [Reference "b"])] $
parseStatement [NEW,IDENTIFIER "Foo",LBRACE,IDENTIFIER "b",RBRACE,SEMICOLON]
testStatementPreIncrement = TestCase $
assertEqual "expect increment" [StatementExpressionStatement $ PostIncrement $ Reference "a"] $
parseStatement [IDENTIFIER "a",INCREMENT,SEMICOLON]
tests = TestList [
testSingleEmptyClass,
testTwoEmptyClasses,
testBooleanField,
testIntField,
testCustomTypeField,
testMultipleDeclarations,
testWithModifier,
testEmptyMethod,
testEmptyPrivateMethod,
testEmptyVoidMethod,
testEmptyMethodWithParam,
testEmptyMethodWithParams,
testClassWithMethodAndField,
testClassWithConstructor,
testConstructorWithParams,
testConstructorWithStatements,
testEmptyBlock,
testBlockWithLocalVarDecl,
testBlockWithMultipleLocalVarDecls,
testNestedBlocks,
testBlockWithEmptyStatement,
testExpressionIntLiteral,
testFieldWithInitialization,
testLocalBoolWithInitialization,
testFieldNullWithInitialization,
testReturnVoid,
testExpressionNot,
testExpressionMinus,
testExpressionLessThan,
testExpressionGreaterThan,
testExpressionLessThanEqual,
testExpressionGreaterThanOrEqual,
testExpressionEqual,
testExpressionNotEqual,
testExpressionAnd,
testExpressionXor,
testExpressionOr,
testExpressionPostIncrement,
testExpressionPostDecrement,
testExpressionPreIncrement,
testExpressionPreDecrement,
testExpressionAssign,
testExpressionTimesEqual,
testExpressionTimesEqual,
testExpressionDivideEqual,
testExpressionPlusEqual,
testExpressionMinusEqual,
testExpressionBraced,
testExpressionThis,
testExpressionPrecedence,
testExpressionMethodCallNoParams,
testExpressionMethodCallOneParam,
testExpressionMethodCallTwoParams,
testExpressionThisMethodCall,
testExpressionThisMethodCallParam,
testExpressionFieldAccess,
testExpressionSimpleFieldAccess,
testExpressionFieldSubAccess,
testExpressionConstructorCall,
testStatementIfThen,
testStatementIfThenElse,
testStatementWhile,
testStatementAssign,
testStatementMethodCallNoParams,
testStatementConstructorCall,
testStatementConstructorCallWithArgs,
testStatementPreIncrement
]

12
Test/TestSuite.hs Normal file
View File

@@ -0,0 +1,12 @@
module Main where
import Test.HUnit
import TestLexer
import TestParser
tests = TestList [
TestLabel "TestLexer" TestLexer.tests,
TestLabel "TestParser" TestParser.tests
]
main = do runTestTTAndExit Main.tests

69
doc/bytecode.md Normal file
View File

@@ -0,0 +1,69 @@
# Bytecodegenerierung
Die Bytecodegenerierung ist letztendlich eine zweistufige Transformation:
`Getypter AST -> [ClassFile] -> [[Word8]]`
Vom AST, der bereits den Typcheck durchlaufen hat, wird zunächst eine Abbildung in die einzelnen ClassFiles vorgenommen. Diese ClassFiles werden anschließend in deren Byte-Repräsentation serialisiert.
## Serialisierung
Damit Bytecode generiert werden kann, braucht es Strukturen, die die Daten halten, die letztendlich serialisiert werden. Die JVM erwartet den kompilierten Code in handliche Pakete verpackt. Die Struktur dieser Pakete ist [so definiert](https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html).
Jede Struktur, die in dieser übergreifenden Class File vorkommt, haben wir in Haskell abgebildet. Es gibt z.B die Struktur "ClassFile", die wiederum weitere Strukturen wie z.B Informationen über Felder oder Methoden der Klasse. Alle diese Strukturen implementieren folgendes TypeClass:
```
class Serializable a where
serialize :: a -> [Word8]
```
Die Struktur ClassFile ruft für deren Kinder rekursiv diese `serialize` Funktion auf. Am Ende bleibt eine flache Word8-Liste übrig, die Serialisierung ist damit abgeschlossen.
## Codegenerierung
Für die erste der beiden Transformationen (`Getypter AST -> [ClassFile]`) werden die Konzepte der "Builder" und "Assembler" eingeführt. Sie sind wie folgt definiert:
```
type ClassFileBuilder a = a -> ClassFile -> ClassFile
type Assembler a = ([ConstantInfo], [Operation], [String]) -> a -> ([ConstantInfo], [Operation], [String])
```
Die Idee hinter beiden ist, dass sie jeweils zwei Inputs haben, wobei der Rückgabewert immer den gleichen Typ hat wie einer der inputs. Das erlaubt es, eine Faltung durchzuführen. Ein ClassFileBuilder z.B bekommt als ersten Parameter den AST, und als zweiten Parameter (und Rückgabewert) eine ClassFile. Soll nun eine Klasse gebaut werden, wird der ClassFileBuilder mit dem AST und einer leeren ClassFile aufgerufen. Der Zustand dieser anfangs leeren ClassFile wird durch alle folgenden Builder/Assembler durchgeschleift, was es erlaubt, nach und nach kleinere Transformationen auf sie anzuwenden.
Der Nutzer ruft beispielsweise die Funktion `classBuilder` auf. Diese wendet nach und nach folgende Transformationen an:
```
methodsWithInjectedConstructor = injectDefaultConstructor methods
methodsWithInjectedInitializers = injectFieldInitializers name fields methodsWithInjectedConstructor
classFileWithFields = foldr fieldBuilder nakedClassFile fields
classFileWithMethods = foldr methodBuilder classFileWithFields methodsWithInjectedInitializers
classFileWithAssembledMethods = foldr methodAssembler classFileWithMethods methodsWithInjectedInitializers
```
Zuerst wird (falls notwendig) ein leerer Defaultkonstruktor in die Classfile eingefügt. Anschließend wird der AST so modifiziert, dass die Initialisierungen für alle Klassenfelder in allen Konstruktoren stattfinden. Nun beginnen die Faltungen:
1. Hinzufügen aller Klassenfelder
2. Hinzufügen aller Methoden (nur Prototypen)
3. Hinzufügen des Bytecodes in allen Methoden
Die Unterteilung von Schritt 2 und 3 ist deswegen notwendig, weil der Code einer Methode auch eine andere, erst nachher deklarierte Methode aufrufen kann. Nach Schritt 2 sind alle Methoden der Klasse bekannt. Wie beschrieben wird auch hier der Zustand über alle Faltungen mitgenommen. Jeder Schritt hat Zugriff auf alle Daten, die aus dem vorherigen Schritt bleiben. Sukkzessive wird eine korrekte ClassFile aufgebaut.
Besonders interessant ist hierbei Schritt 3. Dort wird das Verhalten jeder einzelnen Methode in Bytecode übersetzt. In diesem Schritt werden zusätzlich zu den `Buildern` noch die `Assembler` verwendet (Definition siehe oben.) Die Assembler funktionieren ähnlich wie die Builder, arbeiten allerdings nicht auf einer ClassFile, sondern auf dem Inhalt einer Methode: Sie verarbeiten jeweils ein Tupel:
`([ConstantInfo], [Operation], [String])`
Dieses repräsentiert:
`(Konstantenpool, Bytecode, Lokale Variablen)`
In der Praxis werden oft nur Bytecode und Konstanten hinzugefügt. Prinzipiell können Assembler auch Code/Konstanten entfernen oder modifizieren. Als Beispiel dient hier der Assembler `assembleExpression`:
```
assembleExpression (constants, ops, lvars) (TypedExpression _ NullLiteral) =
(constants, ops ++ [Opaconst_null], lvars)
```
Hier werden die Konstanten und lokalen Variablen des Inputs nicht berührt, dem Bytecode wird lediglich die Operation `aconst_null` hinzugefügt. Damit ist das Verhalten des gematchten Inputs - eines Nullliterals - abgebildet.
Die Assembler rufen sich teilweise rekursiv selbst auf, da ja auch der AST verschachteltes Verhalten abbilden kann. Der Startpunkt für die Assembly einer Methode ist der Builder `methodAssembler`. Dieser entspricht Schritt 3 in der obigen Übersicht.

4
doc/generate.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/usr/bin/sh
pandoc bytecode.md -o bytecode.docx
pandoc bytecode.md -o bytecode.pdf

55
doc/typecheck.md Normal file
View File

@@ -0,0 +1,55 @@
# Typcheck (Fabian Noll)
## Überblick und Struktur
Die Typprüfung beginnt mit der Funktion `typeCheckCompilationUnit`, die eine Kompilationseinheit als Eingabe erhält. Diese Kompilationseinheit besteht aus einer Liste von Klassen. Jede Klasse wird einzeln durch die Funktion `typeCheckClass` überprüft. Innerhalb dieser Funktion wird eine Symboltabelle erstellt, die den Namen der Klasse als Typ und `this` als Identifier enthält. Diese Symboltabelle wird verwendet, um Typinformationen nach dem Lokalitätsprinzip während der Typprüfung zugänglich zu machen und zu verwalten.
Die Typprüfung einer Klasse umfasst die Überprüfung aller Methoden und Felder. Die Methode `typeCheckMethodDeclaration` ist für die Typprüfung einzelner Methodendeklarationen verantwortlich. Sie überprüft den Rückgabetyp der Methode, die Parameter und den Methodenrumpf. Der Methodenrumpf wird durch rekursive Aufrufe von `typeCheckStatement` überprüft, die verschiedene Arten von Anweisungen wie If-Anweisungen, While-Schleifen, Rückgabeanweisungen und Blockanweisungen behandelt.
## Ablauf und Symboltabellen
Eine zentrale Komponente des Typecheckers ist die Symboltabelle (symtab), die Informationen über die Bezeichner und ihre zugehörigen Datentypen speichert. Die Symboltabelle wird kontinuierlich angepasst, während der Typechecker die verschiedenen Teile des Programms durchläuft.
### Anpassung der Symboltabelle
- **Klassenkontext**:
Beim Typcheck einer Klasse wird eine initiale Symboltabelle erstellt, die die `this`-Referenz enthält. Dies geschieht in der Funktion `typeCheckClass`.
- **Methodenkontext**:
Innerhalb einer Methode wird die Symboltabelle um die Parameter der Methode erweitert sowie den Rückgabetyp der Methode, um die einzelnen Returns dagegen zu prüfen. Dies geschieht in `typeCheckMethodDeclaration`.
- **Blockkontext**:
Bei der Überprüfung eines Blocks (`typeCheckStatement` für Block) wird die Symboltabelle für jede Anweisung innerhalb des Blocks aktualisiert. Lokale Variablen, die innerhalb des Blocks deklariert werden, werden zur Symboltabelle hinzugefügt. Das bedeutet, dass automatisch, sobald der Block zu Ende ist, alle dort deklarierten Variablen danach nicht mehr zugänglich sind.
### Unterscheidung zwischen lokalen und Feldvariablen
Bei der Typprüfung von Referenzen (`typeCheckExpression` für Reference) wird zuerst in der Symboltabelle nach dem Bezeichner gesucht. Sollte dieser gefunden werden, handelt es sich um eine lokale Variable. Wenn der Bezeichner nicht gefunden wird, wird angenommen, dass es sich um eine Feldvariable handelt. In diesem Fall wird die Klasse, zu der die `this`-Referenz gehört, durchsucht, um die Feldvariable zu finden. Dies ermöglicht die Unterscheidung zwischen lokalen Variablen und Feldvariablen. Dies ist auch nur möglich, da wir die Feldvariablen und Methoden nicht in die Symboltabelle gelegt haben und stattdessen nur die `this`-Referenz.
## Fehlerbehandlung
Ein zentraler Aspekt des Typecheckers ist die Fehlerbehandlung. Bei Typinkonsistenzen oder ungültigen Operationen werden aussagekräftige Fehlermeldungen generiert. Beispiele für solche Fehlermeldungen sind:
- **Typinkonsistenzen**:
Wenn der Rückgabetyp einer Methode nicht mit dem deklarierten Rückgabetyp übereinstimmt. Oder aber auch die Anzahl der Parameter nicht übereinstimmt.
- **Ungültige Operationen**:
Wenn eine arithmetische Operation auf inkompatiblen Typen durchgeführt wird.
- **Nicht gefundene Bezeichner**:
Wenn eine Referenz auf eine nicht definierte Variable verweist.
Diese Fehlermeldungen helfen Entwicklern, die Ursachen von Typfehlern schnell zu identifizieren und zu beheben. Generell sind diese oftmals sehr spezifisch, was das Problem recht schnell identifizieren sollte. Z.B. falsche Reihenfolge / falsche Typen der Parameter beim Methodenaufruf sind direkt erkennbar.
## Typprüfung von Kontrollstrukturen und Blöcken
### If-Anweisungen
Bei der Typprüfung einer If-Anweisung (`typeCheckStatement` für If) wird zuerst der Typ der Bedingung überprüft, um sicherzustellen, dass es sich um einen booleschen Ausdruck handelt. Anschließend werden die Then- und Else-Zweige geprüft. Der Typ der If-Anweisung selbst wird durch die Vereinheitlichung der Typen der Then- und Else-Zweige bestimmt. Falls einer der Zweige keinen Rückgabewert hat, wird angenommen, dass der Rückgabewert `void` ist. Dies wurde so gelöst, um im Typchecker feststellen zu können, ob beide Zweige einen Return haben. Wenn nur einer der Zweige ein Return hat, wird im umliegenden Block ein weiteres benötigt, was durch den Typ `void` erzwungen wird. Dadurch weiß der Typchecker Bescheid.
### Block-Anweisungen
Die Typprüfung eines Blocks erfolgt in `typeCheckStatement` für Block. Jede Anweisung im Block wird nacheinander überprüft und die Symboltabelle wird entsprechend aktualisiert. Der Typ des Blocks wird durch die Vereinheitlichung der Typen aller Anweisungen im Block bestimmt. Wenn der Block keine Anweisungen hat, wird der Typ `void` angenommen.
### Rückgabeanweisungen
Die Typprüfung einer Rückgabeanweisung (`typeCheckStatement` für Return) überprüft, ob der Rückgabewert der Anweisung mit dem deklarierten Rückgabetyp der Methode übereinstimmt. Dafür wurde zu Beginn der Methodentypprüfung der Rückgabetyp der Methode in die Symboltabelle eingetragen. Wenn der Rückgabewert `null` ist, wird überprüft, ob der deklarierte Rückgabetyp ein Objekttyp ist. Dies stellt sicher, dass Methoden immer den korrekten Typ zurückgeben. Generell wird bei der Prüfung nach dem UpperBound geschaut und ebenfalls wird nachgeschaut, ob, wenn der Rückgabetyp `Object` ist, der Return-Wert auch eine tatsächlich existierende Klasse ist, indem in die Klassentabelle geschaut wird.

51
project.cabal Normal file
View File

@@ -0,0 +1,51 @@
name: MiniJavaCompiler
version: 0.1.0.0
build-type: Simple
cabal-version: >= 1.10
Synopsis: A compiler for a minimal version of Java with watered down syntax.
executable compiler
main-is: Main.hs
build-depends: base,
array,
HUnit,
utf8-string,
bytestring,
filepath
default-language: Haskell2010
hs-source-dirs: src
build-tool-depends: alex:alex, happy:happy
other-modules: Parser.Lexer,
Parser.JavaParser,
Ast,
Example,
Typecheck,
ByteCode.Util,
ByteCode.ByteUtil,
ByteCode.ClassFile,
ByteCode.Assembler,
ByteCode.Builder,
ByteCode.Constants
test-suite tests
type: exitcode-stdio-1.0
main-is: TestSuite.hs
hs-source-dirs: src,Test
build-depends: base,
array,
HUnit,
utf8-string,
bytestring,
filepath
build-tool-depends: alex:alex, happy:happy
other-modules: Parser.Lexer,
Parser.JavaParser,
Ast,
TestLexer,
TestParser,
ByteCode.Util,
ByteCode.ByteUtil,
ByteCode.ClassFile,
ByteCode.Assembler,
ByteCode.Builder,
ByteCode.Constants

2
questions.md Normal file
View File

@@ -0,0 +1,2 @@
# Questions
- Enum?

70
src/Ast.hs Normal file
View File

@@ -0,0 +1,70 @@
module Ast where
type CompilationUnit = [Class]
type DataType = String
type Identifier = String
data ParameterDeclaration = ParameterDeclaration DataType Identifier deriving (Show, Eq)
data VariableDeclaration = VariableDeclaration DataType Identifier (Maybe Expression) deriving (Show, Eq)
data Class = Class DataType [MethodDeclaration] [VariableDeclaration] deriving (Show, Eq)
data MethodDeclaration = MethodDeclaration DataType Identifier [ParameterDeclaration] Statement deriving (Show, Eq)
data Statement
= If Expression Statement (Maybe Statement)
| LocalVariableDeclaration VariableDeclaration
| While Expression Statement
| Block [Statement]
| Return (Maybe Expression)
| StatementExpressionStatement StatementExpression
| TypedStatement DataType Statement
deriving (Show, Eq)
data StatementExpression
= Assignment Expression Expression
| ConstructorCall DataType [Expression]
| MethodCall Expression Identifier [Expression]
| TypedStatementExpression DataType StatementExpression
| PostIncrement Expression
| PostDecrement Expression
| PreIncrement Expression
| PreDecrement Expression
deriving (Show, Eq)
data BinaryOperator
= Addition
| Subtraction
| Multiplication
| Division
| Modulo
| BitwiseAnd
| BitwiseOr
| BitwiseXor
| CompareLessThan
| CompareLessOrEqual
| CompareGreaterThan
| CompareGreaterOrEqual
| CompareEqual
| CompareNotEqual
| And
| Or
| NameResolution
deriving (Show, Eq)
data UnaryOperator
= Not
| Minus
deriving (Show, Eq)
data Expression
= IntegerLiteral Int
| CharacterLiteral Char
| BooleanLiteral Bool
| NullLiteral
| Reference Identifier
| LocalVariable Identifier
| FieldVariable Identifier
| BinaryOperation BinaryOperator Expression Expression
| UnaryOperation UnaryOperator Expression
| StatementExpressionExpression StatementExpression
| TypedExpression DataType Expression
deriving (Show, Eq)

276
src/ByteCode/Assembler.hs Normal file
View File

@@ -0,0 +1,276 @@
module ByteCode.Assembler where
import ByteCode.Constants
import ByteCode.ClassFile (ClassFile (..), ConstantInfo (..), MemberInfo(..), Operation(..), Attribute(..), opcodeEncodingLength)
import ByteCode.Util
import Ast
import Data.Char
import Data.List
import Data.Word
type Assembler a = ([ConstantInfo], [Operation], [String]) -> a -> ([ConstantInfo], [Operation], [String])
assembleExpression :: Assembler Expression
assembleExpression (constants, ops, lvars) (TypedExpression _ (BinaryOperation op a b))
| elem op [Addition, Subtraction, Multiplication, Division, Modulo, BitwiseAnd, BitwiseOr, BitwiseXor, And, Or] = let
(aConstants, aOps, _) = assembleExpression (constants, ops, lvars) a
(bConstants, bOps, _) = assembleExpression (aConstants, aOps, lvars) b
in
(bConstants, bOps ++ [binaryOperation op], lvars)
| elem op [CompareEqual, CompareNotEqual, CompareLessThan, CompareLessOrEqual, CompareGreaterThan, CompareGreaterOrEqual] = let
(aConstants, aOps, _) = assembleExpression (constants, ops, lvars) a
(bConstants, bOps, _) = assembleExpression (aConstants, aOps, lvars) b
cmp_op = comparisonOperation op 9
cmp_ops = [cmp_op, Opsipush 0, Opgoto 6, Opsipush 1]
in
(bConstants, bOps ++ cmp_ops, lvars)
assembleExpression (constants, ops, lvars) (TypedExpression _ (BinaryOperation NameResolution (TypedExpression atype a) (TypedExpression btype (FieldVariable b)))) = let
(fConstants, fieldIndex) = getFieldIndex constants (atype, b, datatypeDescriptor btype)
(aConstants, aOps, _) = assembleExpression (fConstants, ops, lvars) (TypedExpression atype a)
in
(aConstants, aOps ++ [Opgetfield (fromIntegral fieldIndex)], lvars)
assembleExpression (constants, ops, lvars) (TypedExpression _ (CharacterLiteral literal)) =
(constants, ops ++ [Opsipush (fromIntegral (ord literal))], lvars)
assembleExpression (constants, ops, lvars) (TypedExpression _ (BooleanLiteral literal)) =
(constants, ops ++ [Opsipush (if literal then 1 else 0)], lvars)
assembleExpression (constants, ops, lvars) (TypedExpression _ (IntegerLiteral literal))
| literal <= 32767 && literal >= -32768 = (constants, ops ++ [Opsipush (fromIntegral literal)], lvars)
| otherwise = (constants ++ [IntegerInfo (fromIntegral literal)], ops ++ [Opldc_w (fromIntegral (1 + length constants))], lvars)
assembleExpression (constants, ops, lvars) (TypedExpression _ NullLiteral) =
(constants, ops ++ [Opaconst_null], lvars)
assembleExpression (constants, ops, lvars) (TypedExpression etype (UnaryOperation Not expr)) = let
(exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr
newConstant = fromIntegral (1 + length exprConstants)
in case etype of
"int" -> (exprConstants ++ [IntegerInfo 0x7FFFFFFF], exprOps ++ [Opldc_w newConstant, Opixor], lvars)
"char" -> (exprConstants, exprOps ++ [Opsipush 0xFFFF, Opixor], lvars)
"boolean" -> (exprConstants, exprOps ++ [Opsipush 0x01, Opixor], lvars)
assembleExpression (constants, ops, lvars) (TypedExpression _ (UnaryOperation Minus expr)) = let
(exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr
in
(exprConstants, exprOps ++ [Opineg], lvars)
assembleExpression (constants, ops, lvars) (TypedExpression dtype (LocalVariable name))
| name == "this" = (constants, ops ++ [Opaload 0], lvars)
| otherwise = let
localIndex = findIndex ((==) name) lvars
isPrimitive = elem dtype ["char", "boolean", "int"]
in case localIndex of
Just index -> (constants, ops ++ if isPrimitive then [Opiload (fromIntegral index)] else [Opaload (fromIntegral index)], lvars)
Nothing -> error ("No such local variable found in local variable pool: " ++ name)
assembleExpression (constants, ops, lvars) (TypedExpression dtype (StatementExpressionExpression stmtexp)) =
assembleStatementExpression (constants, ops, lvars) stmtexp
assembleExpression _ expr = error ("unimplemented: " ++ show expr)
assembleNameChain :: Assembler Expression
assembleNameChain input (TypedExpression _ (BinaryOperation NameResolution (TypedExpression atype a) (TypedExpression _ (FieldVariable _)))) =
assembleExpression input (TypedExpression atype a)
assembleNameChain input expr = assembleExpression input expr
assembleStatementExpression :: Assembler StatementExpression
assembleStatementExpression
(constants, ops, lvars)
(TypedStatementExpression _ (Assignment (TypedExpression dtype receiver) expr)) = let
target = resolveNameChain (TypedExpression dtype receiver)
in case target of
(TypedExpression dtype (LocalVariable name)) -> let
localIndex = findIndex ((==) name) lvars
(constants_a, ops_a, _) = assembleExpression (constants, ops, lvars) expr
isPrimitive = elem dtype ["char", "boolean", "int"]
in case localIndex of
Just index -> (constants_a, ops_a ++ if isPrimitive then [Opdup, Opistore (fromIntegral index)] else [Opastore (fromIntegral index)], lvars)
Nothing -> error ("No such local variable found in local variable pool: " ++ name)
(TypedExpression dtype (FieldVariable name)) -> let
owner = resolveNameChainOwner (TypedExpression dtype receiver)
in case owner of
(TypedExpression otype _) -> let
(constants_f, fieldIndex) = getFieldIndex constants (otype, name, datatypeDescriptor dtype)
(constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver)
(constants_a, ops_a, _) = assembleExpression (constants_r, ops_r, lvars) expr
in
(constants_a, ops_a ++ [Opdup_x1, Opputfield (fromIntegral fieldIndex)], lvars)
something_else -> error ("expected TypedExpression, but got: " ++ show something_else)
assembleStatementExpression
(constants, ops, lvars)
(TypedStatementExpression _ (PreIncrement (TypedExpression dtype receiver))) = let
target = resolveNameChain (TypedExpression dtype receiver)
in case target of
(TypedExpression dtype (LocalVariable name)) -> let
localIndex = findIndex ((==) name) lvars
expr = (TypedExpression dtype (LocalVariable name))
(exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr
in case localIndex of
Just index -> (exprConstants, exprOps ++ [Opsipush 1, Opiadd, Opdup, Opistore (fromIntegral index)], lvars)
Nothing -> error("No such local variable found in local variable pool: " ++ name)
(TypedExpression dtype (FieldVariable name)) -> let
owner = resolveNameChainOwner (TypedExpression dtype receiver)
in case owner of
(TypedExpression otype _) -> let
(constants_f, fieldIndex) = getFieldIndex constants (otype, name, datatypeDescriptor dtype)
(constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver)
in
(constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opsipush 1, Opiadd, Opdup_x1, Opputfield (fromIntegral fieldIndex)], lvars)
something_else -> error ("expected TypedExpression, but got: " ++ show something_else)
assembleStatementExpression
(constants, ops, lvars)
(TypedStatementExpression _ (PreDecrement (TypedExpression dtype receiver))) = let
target = resolveNameChain (TypedExpression dtype receiver)
in case target of
(TypedExpression dtype (LocalVariable name)) -> let
localIndex = findIndex ((==) name) lvars
expr = (TypedExpression dtype (LocalVariable name))
(exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr
in case localIndex of
Just index -> (exprConstants, exprOps ++ [Opsipush 1, Opisub, Opdup, Opistore (fromIntegral index)], lvars)
Nothing -> error("No such local variable found in local variable pool: " ++ name)
(TypedExpression dtype (FieldVariable name)) -> let
owner = resolveNameChainOwner (TypedExpression dtype receiver)
in case owner of
(TypedExpression otype _) -> let
(constants_f, fieldIndex) = getFieldIndex constants (otype, name, datatypeDescriptor dtype)
(constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver)
in
(constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opsipush 1, Opisub, Opdup_x1, Opputfield (fromIntegral fieldIndex)], lvars)
something_else -> error ("expected TypedExpression, but got: " ++ show something_else)
assembleStatementExpression
(constants, ops, lvars)
(TypedStatementExpression _ (PostIncrement (TypedExpression dtype receiver))) = let
target = resolveNameChain (TypedExpression dtype receiver)
in case target of
(TypedExpression dtype (LocalVariable name)) -> let
localIndex = findIndex ((==) name) lvars
expr = (TypedExpression dtype (LocalVariable name))
(exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr
in case localIndex of
Just index -> (exprConstants, exprOps ++ [Opdup, Opsipush 1, Opiadd, Opistore (fromIntegral index)], lvars)
Nothing -> error("No such local variable found in local variable pool: " ++ name)
(TypedExpression dtype (FieldVariable name)) -> let
owner = resolveNameChainOwner (TypedExpression dtype receiver)
in case owner of
(TypedExpression otype _) -> let
(constants_f, fieldIndex) = getFieldIndex constants (otype, name, datatypeDescriptor dtype)
(constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver)
in
(constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opdup_x1, Opsipush 1, Opiadd, Opputfield (fromIntegral fieldIndex)], lvars)
something_else -> error ("expected TypedExpression, but got: " ++ show something_else)
assembleStatementExpression
(constants, ops, lvars)
(TypedStatementExpression _ (PostDecrement (TypedExpression dtype receiver))) = let
target = resolveNameChain (TypedExpression dtype receiver)
in case target of
(TypedExpression dtype (LocalVariable name)) -> let
localIndex = findIndex ((==) name) lvars
expr = (TypedExpression dtype (LocalVariable name))
(exprConstants, exprOps, _) = assembleExpression (constants, ops, lvars) expr
in case localIndex of
Just index -> (exprConstants, exprOps ++ [Opdup, Opsipush 1, Opisub, Opistore (fromIntegral index)], lvars)
Nothing -> error("No such local variable found in local variable pool: " ++ name)
(TypedExpression dtype (FieldVariable name)) -> let
owner = resolveNameChainOwner (TypedExpression dtype receiver)
in case owner of
(TypedExpression otype _) -> let
(constants_f, fieldIndex) = getFieldIndex constants (otype, name, datatypeDescriptor dtype)
(constants_r, ops_r, _) = assembleNameChain (constants_f, ops, lvars) (TypedExpression dtype receiver)
in
(constants_r, ops_r ++ [Opdup, Opgetfield (fromIntegral fieldIndex), Opdup_x1, Opsipush 1, Opisub, Opputfield (fromIntegral fieldIndex)], lvars)
something_else -> error ("expected TypedExpression, but got: " ++ show something_else)
assembleStatementExpression
(constants, ops, lvars)
(TypedStatementExpression rtype (MethodCall (TypedExpression otype receiver) name params)) = let
(constants_r, ops_r, lvars_r) = assembleExpression (constants, ops, lvars) (TypedExpression otype receiver)
(constants_p, ops_p, lvars_p) = foldl assembleExpression (constants_r, ops_r, lvars_r) params
(constants_m, methodIndex) = getMethodIndex constants_p (otype, name, methodDescriptorFromParamlist params rtype)
in
(constants_m, ops_p ++ [Opinvokevirtual (fromIntegral methodIndex)], lvars_p)
assembleStatementExpression
(constants, ops, lvars)
(TypedStatementExpression rtype (ConstructorCall name params)) = let
(constants_c, classIndex) = getClassIndex constants name
(constants_p, ops_p, lvars_p) = foldl assembleExpression (constants_c, ops ++ [Opnew (fromIntegral classIndex), Opdup], lvars) params
(constants_m, methodIndex) = getMethodIndex constants_p (name, "<init>", methodDescriptorFromParamlist params "void")
in
(constants_m, ops_p ++ [Opinvokespecial (fromIntegral methodIndex)], lvars_p)
assembleStatement :: Assembler Statement
assembleStatement (constants, ops, lvars) (TypedStatement stype (Return expr)) = case expr of
Nothing -> (constants, ops ++ [Opreturn], lvars)
Just expr -> let
(expr_constants, expr_ops, _) = assembleExpression (constants, ops, lvars) expr
in
(expr_constants, expr_ops ++ [returnOperation stype], lvars)
assembleStatement (constants, ops, lvars) (TypedStatement _ (Block statements)) =
foldl assembleStatement (constants, ops, lvars) statements
assembleStatement (constants, ops, lvars) (TypedStatement dtype (If expr if_stmt else_stmt)) = let
(constants_cmp, ops_cmp, _) = assembleExpression (constants, [], lvars) expr
(constants_ifa, ops_ifa, _) = assembleStatement (constants_cmp, [], lvars) if_stmt
(constants_elsea, ops_elsea, _) = case else_stmt of
Nothing -> (constants_ifa, [], lvars)
Just stmt -> assembleStatement (constants_ifa, [], lvars) stmt
-- +6 because we insert 2 gotos, one for if, one for else
if_length = sum (map opcodeEncodingLength ops_ifa)
-- +3 because we need to account for the goto in the if statement.
else_length = sum (map opcodeEncodingLength ops_elsea)
in case dtype of
"void" -> (constants_ifa, ops ++ ops_cmp ++ [Opsipush 0, Opif_icmpeq (if_length + 6)] ++ ops_ifa ++ [Opgoto (else_length + 3)] ++ ops_elsea, lvars)
otherwise -> (constants_ifa, ops ++ ops_cmp ++ [Opsipush 0, Opif_icmpeq (if_length + 3)] ++ ops_ifa ++ ops_elsea, lvars)
assembleStatement (constants, ops, lvars) (TypedStatement _ (While expr stmt)) = let
(constants_cmp, ops_cmp, _) = assembleExpression (constants, [], lvars) expr
(constants_stmta, ops_stmta, _) = assembleStatement (constants_cmp, [], lvars) stmt
-- +3 because we insert 2 gotos, one for the comparison, one for the goto back to the comparison
stmt_length = sum (map opcodeEncodingLength ops_stmta) + 6
entire_length = stmt_length + sum (map opcodeEncodingLength ops_cmp)
in
(constants_stmta, ops ++ ops_cmp ++ [Opsipush 0, Opif_icmpeq stmt_length] ++ ops_stmta ++ [Opgoto (-entire_length)], lvars)
assembleStatement (constants, ops, lvars) (TypedStatement _ (LocalVariableDeclaration (VariableDeclaration dtype name expr))) = let
isPrimitive = elem dtype ["char", "boolean", "int"]
(constants_init, ops_init, _) = case expr of
Just exp -> assembleExpression (constants, ops, lvars) exp
Nothing -> (constants, ops ++ if isPrimitive then [Opsipush 0] else [Opaconst_null], lvars)
localIndex = fromIntegral (length lvars)
storeLocal = if isPrimitive then [Opistore localIndex] else [Opastore localIndex]
in
(constants_init, ops_init ++ storeLocal, lvars ++ [name])
assembleStatement (constants, ops, lvars) (TypedStatement _ (StatementExpressionStatement expr)) = let
(constants_e, ops_e, lvars_e) = assembleStatementExpression (constants, ops, lvars) expr
in
(constants_e, ops_e ++ [Oppop], lvars_e)
assembleStatement _ stmt = error ("Not yet implemented: " ++ show stmt)
assembleMethod :: Assembler MethodDeclaration
assembleMethod (constants, ops, lvars) (MethodDeclaration returntype name _ (TypedStatement _ (Block statements)))
| name == "<init>" = let
(constants_a, ops_a, lvars_a) = foldl assembleStatement (constants, ops, lvars) statements
init_ops = [Opaload 0, Opinvokespecial 2]
in
(constants_a, init_ops ++ ops_a ++ [Opreturn], lvars_a)
| otherwise = case returntype of
"void" -> let
(constants_a, ops_a, lvars_a) = foldl assembleStatement (constants, ops, lvars) statements
in
(constants_a, ops_a ++ [Opreturn], lvars_a)
otherwise -> foldl assembleStatement (constants, ops, lvars) statements
assembleMethod _ (MethodDeclaration _ _ _ stmt) = error ("Typed block expected for method body, got: " ++ show stmt)

117
src/ByteCode/Builder.hs Normal file
View File

@@ -0,0 +1,117 @@
module ByteCode.Builder where
import ByteCode.Constants
import ByteCode.ClassFile (ClassFile (..), ConstantInfo (..), MemberInfo(..), Operation(..), Attribute(..), opcodeEncodingLength)
import ByteCode.Assembler
import ByteCode.Util
import Ast
import Data.Char
import Data.List
import Data.Word
type ClassFileBuilder a = a -> ClassFile -> ClassFile
fieldBuilder :: ClassFileBuilder VariableDeclaration
fieldBuilder (VariableDeclaration datatype name _) input = let
baseIndex = 1 + length (constantPool input)
constants = [
FieldRefInfo (fromIntegral (thisClass input)) (fromIntegral (baseIndex + 1)),
NameAndTypeInfo (fromIntegral (baseIndex + 2)) (fromIntegral (baseIndex + 3)),
Utf8Info name,
Utf8Info (datatypeDescriptor datatype)
]
field = MemberInfo {
memberAccessFlags = accessPublic,
memberNameIndex = (fromIntegral (baseIndex + 2)),
memberDescriptorIndex = (fromIntegral (baseIndex + 3)),
memberAttributes = []
}
in
input {
constantPool = (constantPool input) ++ constants,
fields = (fields input) ++ [field]
}
methodBuilder :: ClassFileBuilder MethodDeclaration
methodBuilder (MethodDeclaration returntype name parameters statement) input = let
baseIndex = 1 + length (constantPool input)
constants = [
MethodRefInfo (fromIntegral (thisClass input)) (fromIntegral (baseIndex + 1)),
NameAndTypeInfo (fromIntegral (baseIndex + 2)) (fromIntegral (baseIndex + 3)),
Utf8Info name,
Utf8Info (methodDescriptor (MethodDeclaration returntype name parameters (Block [])))
]
method = MemberInfo {
memberAccessFlags = accessPublic,
memberNameIndex = (fromIntegral (baseIndex + 2)),
memberDescriptorIndex = (fromIntegral (baseIndex + 3)),
memberAttributes = []
}
in
input {
constantPool = (constantPool input) ++ constants,
methods = (methods input) ++ [method]
}
methodAssembler :: ClassFileBuilder MethodDeclaration
methodAssembler (MethodDeclaration returntype name parameters statement) input = let
methodConstantIndex = findMethodIndex input name
in case methodConstantIndex of
Nothing -> error ("Cannot find method entry in method pool for method: " ++ name)
Just index -> let
declaration = MethodDeclaration returntype name parameters statement
paramNames = "this" : [name | ParameterDeclaration _ name <- parameters]
in case (splitAt index (methods input)) of
(pre, []) -> input
(pre, method : post) -> let
(_, bytecode, _) = assembleMethod (constantPool input, [], paramNames) declaration
assembledMethod = method {
memberAttributes = [
CodeAttribute {
attributeMaxStack = 420,
attributeMaxLocals = 420,
attributeCode = bytecode
}
]
}
in
input {
methods = pre ++ (assembledMethod : post)
}
classBuilder :: ClassFileBuilder Class
classBuilder (Class name methods fields) _ = let
baseConstants = [
ClassInfo 4,
MethodRefInfo 1 3,
NameAndTypeInfo 5 6,
Utf8Info "java/lang/Object",
Utf8Info "<init>",
Utf8Info "()V",
Utf8Info "Code"
]
nameConstants = [ClassInfo 9, Utf8Info name]
nakedClassFile = ClassFile {
constantPool = baseConstants ++ nameConstants,
accessFlags = accessPublic,
thisClass = 8,
superClass = 1,
fields = [],
methods = [],
attributes = []
}
methodsWithInjectedConstructor = injectDefaultConstructor methods
methodsWithInjectedInitializers = injectFieldInitializers name fields methodsWithInjectedConstructor
classFileWithFields = foldr fieldBuilder nakedClassFile fields
classFileWithMethods = foldr methodBuilder classFileWithFields methodsWithInjectedInitializers
classFileWithAssembledMethods = foldr methodAssembler classFileWithMethods methodsWithInjectedInitializers
in
classFileWithAssembledMethods

18
src/ByteCode/ByteUtil.hs Normal file
View File

@@ -0,0 +1,18 @@
module ByteCode.ByteUtil where
import Data.Word ( Word8, Word16, Word32 )
import Data.Bits
unpackWord16 :: Word16 -> [Word8]
unpackWord16 v = [
fromIntegral (shiftR ((.&.) v 0xFF00) 8),
fromIntegral (shiftR ((.&.) v 0x00FF) 0)
]
unpackWord32 :: Word32 -> [Word8]
unpackWord32 v = [
fromIntegral (shiftR ((.&.) v 0xFF000000) 24),
fromIntegral (shiftR ((.&.) v 0x00FF0000) 16),
fromIntegral (shiftR ((.&.) v 0x0000FF00) 8),
fromIntegral (shiftR ((.&.) v 0x000000FF) 0)
]

230
src/ByteCode/ClassFile.hs Normal file
View File

@@ -0,0 +1,230 @@
module ByteCode.ClassFile(
ConstantInfo(..),
Attribute(..),
MemberInfo(..),
ClassFile(..),
Operation(..),
serialize,
emptyClassFile,
opcodeEncodingLength,
className
) where
import Data.Word
import Data.Int
import Data.ByteString (unpack)
import Data.ByteString.UTF8 (fromString)
import ByteCode.Constants
import ByteCode.ByteUtil
data ConstantInfo = ClassInfo Word16
| FieldRefInfo Word16 Word16
| MethodRefInfo Word16 Word16
| NameAndTypeInfo Word16 Word16
| IntegerInfo Int32
| Utf8Info [Char]
deriving (Show, Eq)
data Operation = Opiadd
| Opisub
| Opimul
| Opidiv
| Opirem
| Opiand
| Opior
| Opixor
| Opineg
| Opdup
| Opnew Word16
| Opif_icmplt Word16
| Opif_icmple Word16
| Opif_icmpgt Word16
| Opif_icmpge Word16
| Opif_icmpeq Word16
| Opif_icmpne Word16
| Opaconst_null
| Opreturn
| Opireturn
| Opareturn
| Opdup_x1
| Oppop
| Opinvokespecial Word16
| Opinvokevirtual Word16
| Opgoto Word16
| Opsipush Word16
| Opldc_w Word16
| Opaload Word16
| Opiload Word16
| Opastore Word16
| Opistore Word16
| Opputfield Word16
| Opgetfield Word16
deriving (Show, Eq)
data Attribute = CodeAttribute {
attributeMaxStack :: Word16,
attributeMaxLocals :: Word16,
attributeCode :: [Operation]
} deriving (Show, Eq)
data MemberInfo = MemberInfo {
memberAccessFlags :: Word16,
memberNameIndex :: Word16,
memberDescriptorIndex :: Word16,
memberAttributes :: [Attribute]
} deriving (Show, Eq)
data ClassFile = ClassFile {
constantPool :: [ConstantInfo],
accessFlags :: Word16,
thisClass :: Word16,
superClass :: Word16,
fields :: [MemberInfo],
methods :: [MemberInfo],
attributes :: [Attribute]
} deriving (Show, Eq)
emptyClassFile :: ClassFile
emptyClassFile = ClassFile {
constantPool = [],
accessFlags = accessPublic,
thisClass = 0,
superClass = 0,
fields = [],
methods = [],
attributes = []
}
className :: ClassFile -> String
className classFile = let
classInfo = (constantPool classFile)!!(fromIntegral (thisClass classFile))
in case classInfo of
Utf8Info className -> className
otherwise -> error ("expected Utf8Info but got: " ++ show otherwise)
opcodeEncodingLength :: Operation -> Word16
opcodeEncodingLength Opiadd = 1
opcodeEncodingLength Opisub = 1
opcodeEncodingLength Opimul = 1
opcodeEncodingLength Opidiv = 1
opcodeEncodingLength Opirem = 1
opcodeEncodingLength Opiand = 1
opcodeEncodingLength Opior = 1
opcodeEncodingLength Opixor = 1
opcodeEncodingLength Opineg = 1
opcodeEncodingLength Opdup = 1
opcodeEncodingLength (Opnew _) = 3
opcodeEncodingLength (Opif_icmplt _) = 3
opcodeEncodingLength (Opif_icmple _) = 3
opcodeEncodingLength (Opif_icmpgt _) = 3
opcodeEncodingLength (Opif_icmpge _) = 3
opcodeEncodingLength (Opif_icmpeq _) = 3
opcodeEncodingLength (Opif_icmpne _) = 3
opcodeEncodingLength Opaconst_null = 1
opcodeEncodingLength Opreturn = 1
opcodeEncodingLength Opireturn = 1
opcodeEncodingLength Opareturn = 1
opcodeEncodingLength Opdup_x1 = 1
opcodeEncodingLength Oppop = 1
opcodeEncodingLength (Opinvokespecial _) = 3
opcodeEncodingLength (Opinvokevirtual _) = 3
opcodeEncodingLength (Opgoto _) = 3
opcodeEncodingLength (Opsipush _) = 3
opcodeEncodingLength (Opldc_w _) = 3
opcodeEncodingLength (Opaload _) = 4
opcodeEncodingLength (Opiload _) = 4
opcodeEncodingLength (Opastore _) = 4
opcodeEncodingLength (Opistore _) = 4
opcodeEncodingLength (Opputfield _) = 3
opcodeEncodingLength (Opgetfield _) = 3
class Serializable a where
serialize :: a -> [Word8]
instance Serializable ConstantInfo where
serialize (ClassInfo nameIndex) = tagClass : unpackWord16 nameIndex
serialize (FieldRefInfo classIndex nameAndTypeIndex) = tagFieldref : (unpackWord16 classIndex ++ unpackWord16 nameAndTypeIndex)
serialize (MethodRefInfo classIndex nameAndTypeIndex) = tagMethodref : (unpackWord16 classIndex ++ unpackWord16 nameAndTypeIndex)
serialize (NameAndTypeInfo classIndex descriptorIndex) = tagNameandtype : (unpackWord16 classIndex ++ unpackWord16 descriptorIndex)
serialize (IntegerInfo value) = tagInteger : unpackWord32 (fromIntegral value)
serialize (Utf8Info string) = tagUtf8 : unpackWord16 num_bytes ++ bytes where
bytes = unpack (fromString string)
num_bytes = fromIntegral $ length bytes
instance Serializable MemberInfo where
serialize member = unpackWord16 (memberAccessFlags member)
++ unpackWord16 (memberNameIndex member)
++ unpackWord16 (memberDescriptorIndex member)
++ unpackWord16 (fromIntegral (length (memberAttributes member)))
++ concatMap serialize (memberAttributes member)
instance Serializable Operation where
serialize Opiadd = [0x60]
serialize Opisub = [0x64]
serialize Opimul = [0x68]
serialize Opidiv = [0x6C]
serialize Opirem = [0x70]
serialize Opiand = [0x7E]
serialize Opior = [0x80]
serialize Opixor = [0x82]
serialize Opineg = [0x74]
serialize Opdup = [0x59]
serialize (Opnew index) = 0xBB : unpackWord16 index
serialize (Opif_icmplt branch) = 0xA1 : unpackWord16 branch
serialize (Opif_icmple branch) = 0xA4 : unpackWord16 branch
serialize (Opif_icmpgt branch) = 0xA3 : unpackWord16 branch
serialize (Opif_icmpge branch) = 0xA2 : unpackWord16 branch
serialize (Opif_icmpeq branch) = 0x9F : unpackWord16 branch
serialize (Opif_icmpne branch) = 0xA0 : unpackWord16 branch
serialize Opaconst_null = [0x01]
serialize Opreturn = [0xB1]
serialize Opireturn = [0xAC]
serialize Opareturn = [0xB0]
serialize Opdup_x1 = [0x5A]
serialize Oppop = [0x57]
serialize (Opinvokespecial index) = 0xB7 : unpackWord16 index
serialize (Opinvokevirtual index) = 0xB6 : unpackWord16 index
serialize (Opgoto index) = 0xA7 : unpackWord16 index
serialize (Opsipush index) = 0x11 : unpackWord16 index
serialize (Opldc_w index) = 0x13 : unpackWord16 index
serialize (Opaload index) = [0xC4, 0x19] ++ unpackWord16 index
serialize (Opiload index) = [0xC4, 0x15] ++ unpackWord16 index
serialize (Opastore index) = [0xC4, 0x3A] ++ unpackWord16 index
serialize (Opistore index) = [0xC4, 0x36] ++ unpackWord16 index
serialize (Opputfield index) = 0xB5 : unpackWord16 index
serialize (Opgetfield index) = 0xB4 : unpackWord16 index
instance Serializable Attribute where
serialize (CodeAttribute { attributeMaxStack = maxStack,
attributeMaxLocals = maxLocals,
attributeCode = code }) = let
assembledCode = concat (map serialize code)
in
unpackWord16 7 -- attribute_name_index
++ unpackWord32 (12 + (fromIntegral (length assembledCode))) -- attribute_length
++ unpackWord16 maxStack -- max_stack
++ unpackWord16 maxLocals -- max_locals
++ unpackWord32 (fromIntegral (length assembledCode)) -- code_length
++ assembledCode -- code
++ unpackWord16 0 -- exception_table_length
++ unpackWord16 0 -- attributes_count
instance Serializable ClassFile where
serialize classfile = unpackWord32 0xCAFEBABE -- magic
++ unpackWord16 0 -- minor version
++ unpackWord16 49 -- major version
++ unpackWord16 (fromIntegral (1 + length (constantPool classfile))) -- constant pool count
++ concatMap serialize (constantPool classfile) -- constant pool
++ unpackWord16 (accessFlags classfile) -- access flags
++ unpackWord16 (thisClass classfile) -- this class
++ unpackWord16 (superClass classfile) -- super class
++ unpackWord16 0 -- interface count
++ unpackWord16 (fromIntegral (length (fields classfile))) -- fields count
++ concatMap serialize (fields classfile) -- fields info
++ unpackWord16 (fromIntegral (length (methods classfile))) -- methods count
++ concatMap serialize (methods classfile) -- methods info
++ unpackWord16 (fromIntegral (length (attributes classfile))) -- attributes count
++ concatMap serialize (attributes classfile) -- attributes info

25
src/ByteCode/Constants.hs Normal file
View File

@@ -0,0 +1,25 @@
module ByteCode.Constants where
import Data.Word
tagClass :: Word8
tagFieldref :: Word8
tagMethodref :: Word8
tagNameandtype :: Word8
tagInteger :: Word8
tagUtf8 :: Word8
accessPublic :: Word16
accessPrivate :: Word16
accessProtected :: Word16
tagClass = 0x07
tagFieldref = 0x09
tagMethodref = 0x0A
tagNameandtype = 0x0C
tagInteger = 0x03
tagUtf8 = 0x01
accessPublic = 0x01
accessPrivate = 0x02
accessProtected = 0x04

245
src/ByteCode/Util.hs Normal file
View File

@@ -0,0 +1,245 @@
module ByteCode.Util where
import Data.Int
import Ast
import ByteCode.ClassFile
import Data.List
import Data.Maybe (mapMaybe)
import Data.Word (Word8, Word16, Word32)
-- walks the name resolution chain. returns the innermost Just LocalVariable/FieldVariable or Nothing.
resolveNameChain :: Expression -> Expression
resolveNameChain (TypedExpression _ (BinaryOperation NameResolution a b)) = resolveNameChain b
resolveNameChain (TypedExpression dtype (LocalVariable name)) = (TypedExpression dtype (LocalVariable name))
resolveNameChain (TypedExpression dtype (FieldVariable name)) = (TypedExpression dtype (FieldVariable name))
resolveNameChain invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show(invalidExpression))
-- walks the name resolution chain. returns the second-to-last item of the namechain.
resolveNameChainOwner :: Expression -> Expression
resolveNameChainOwner (TypedExpression _ (BinaryOperation NameResolution a (TypedExpression dtype (FieldVariable name)))) = a
resolveNameChainOwner (TypedExpression _ (BinaryOperation NameResolution a b)) = resolveNameChain b
resolveNameChainOwner invalidExpression = error ("expected a NameResolution or Local/Field Variable but got: " ++ show(invalidExpression))
methodDescriptor :: MethodDeclaration -> String
methodDescriptor (MethodDeclaration returntype _ parameters _) = let
parameter_types = [datatype | ParameterDeclaration datatype _ <- parameters]
in
"("
++ (concat (map datatypeDescriptor parameter_types))
++ ")"
++ datatypeDescriptor returntype
methodDescriptorFromParamlist :: [Expression] -> String -> String
methodDescriptorFromParamlist parameters returntype = let
parameter_types = [datatype | TypedExpression datatype _ <- parameters]
in
"("
++ (concat (map datatypeDescriptor parameter_types))
++ ")"
++ datatypeDescriptor returntype
memberInfoIsMethod :: [ConstantInfo] -> MemberInfo -> Bool
memberInfoIsMethod constants info = elem '(' (memberInfoDescriptor constants info)
datatypeDescriptor :: String -> String
datatypeDescriptor "void" = "V"
datatypeDescriptor "int" = "I"
datatypeDescriptor "char" = "C"
datatypeDescriptor "boolean" = "B"
datatypeDescriptor x = "L" ++ x ++ ";"
memberInfoDescriptor :: [ConstantInfo] -> MemberInfo -> String
memberInfoDescriptor constants MemberInfo {
memberAccessFlags = _,
memberNameIndex = _,
memberDescriptorIndex = descriptorIndex,
memberAttributes = _ } = let
descriptor = constants!!((fromIntegral descriptorIndex) - 1)
in case descriptor of
Utf8Info descriptorText -> descriptorText
_ -> ("Invalid Item at Constant pool index " ++ show descriptorIndex)
memberInfoName :: [ConstantInfo] -> MemberInfo -> String
memberInfoName constants MemberInfo {
memberAccessFlags = _,
memberNameIndex = nameIndex,
memberDescriptorIndex = _,
memberAttributes = _ } = let
name = constants!!((fromIntegral nameIndex) - 1)
in case name of
Utf8Info nameText -> nameText
_ -> ("Invalid Item at Constant pool index " ++ show nameIndex)
returnOperation :: DataType -> Operation
returnOperation dtype
| elem dtype ["int", "char", "boolean"] = Opireturn
| otherwise = Opareturn
binaryOperation :: BinaryOperator -> Operation
binaryOperation Addition = Opiadd
binaryOperation Subtraction = Opisub
binaryOperation Multiplication = Opimul
binaryOperation Division = Opidiv
binaryOperation Modulo = Opirem
binaryOperation BitwiseAnd = Opiand
binaryOperation BitwiseOr = Opior
binaryOperation BitwiseXor = Opixor
binaryOperation And = Opiand
binaryOperation Or = Opior
comparisonOperation :: BinaryOperator -> Word16 -> Operation
comparisonOperation CompareEqual branchLocation = Opif_icmpeq branchLocation
comparisonOperation CompareNotEqual branchLocation = Opif_icmpne branchLocation
comparisonOperation CompareLessThan branchLocation = Opif_icmplt branchLocation
comparisonOperation CompareLessOrEqual branchLocation = Opif_icmple branchLocation
comparisonOperation CompareGreaterThan branchLocation = Opif_icmpgt branchLocation
comparisonOperation CompareGreaterOrEqual branchLocation = Opif_icmpge branchLocation
findFieldIndex :: [ConstantInfo] -> String -> Maybe Int
findFieldIndex constants name = let
fieldRefNameInfos = [
-- we only skip one entry to get the name since the Java constant pool
-- is 1-indexed (why)
(index, constants!!(fromIntegral index + 1))
| (index, FieldRefInfo classIndex _) <- (zip [1..] constants)
]
fieldRefNames = map (\(index, nameInfo) -> case nameInfo of
Utf8Info fieldName -> (index, fieldName)
something_else -> error ("Expected UTF8Info but got" ++ show something_else))
fieldRefNameInfos
fieldIndex = find (\(index, fieldName) -> fieldName == name) fieldRefNames
in case fieldIndex of
Just (index, _) -> Just index
Nothing -> Nothing
findMethodRefIndex :: [ConstantInfo] -> String -> Maybe Int
findMethodRefIndex constants name = let
methodRefNameInfos = [
-- we only skip one entry to get the name since the Java constant pool
-- is 1-indexed (why)
(index, constants!!(fromIntegral index + 1))
| (index, MethodRefInfo _ _) <- (zip [1..] constants)
]
methodRefNames = map (\(index, nameInfo) -> case nameInfo of
Utf8Info methodName -> (index, methodName)
something_else -> error ("Expected UTF8Info but got " ++ show something_else))
methodRefNameInfos
methodIndex = find (\(index, methodName) -> methodName == name) methodRefNames
in case methodIndex of
Just (index, _) -> Just index
Nothing -> Nothing
findMethodIndex :: ClassFile -> String -> Maybe Int
findMethodIndex classFile name = let
constants = constantPool classFile
in
findIndex (\method -> ((memberInfoIsMethod constants method) && (memberInfoName constants method) == name)) (methods classFile)
findClassIndex :: [ConstantInfo] -> String -> Maybe Int
findClassIndex constants name = let
classNameIndices = [(index, constants!!(fromIntegral nameIndex - 1)) | (index, ClassInfo nameIndex) <- (zip[1..] constants)]
classNames = map (\(index, nameInfo) -> case nameInfo of
Utf8Info className -> (index, className)
something_else -> error("Expected UTF8Info but got " ++ show something_else))
classNameIndices
desiredClassIndex = find (\(index, className) -> className == name) classNames
in case desiredClassIndex of
Just (index, _) -> Just index
Nothing -> Nothing
getKnownMembers :: [ConstantInfo] -> [(Int, (String, String, String))]
getKnownMembers constants = let
fieldsClassAndNT = [
(index, constants!!(fromIntegral classIndex - 1), constants!!(fromIntegral nameTypeIndex - 1))
| (index, FieldRefInfo classIndex nameTypeIndex) <- (zip [1..] constants)
] ++ [
(index, constants!!(fromIntegral classIndex - 1), constants!!(fromIntegral nameTypeIndex - 1))
| (index, MethodRefInfo classIndex nameTypeIndex) <- (zip [1..] constants)
]
fieldsClassNameType = map (\(index, nameInfo, nameTypeInfo) -> case (nameInfo, nameTypeInfo) of
(ClassInfo nameIndex, NameAndTypeInfo fnameIndex ftypeIndex) -> (index, (constants!!(fromIntegral nameIndex - 1), constants!!(fromIntegral fnameIndex - 1), constants!!(fromIntegral ftypeIndex - 1)))
something_else -> error ("Expected Class and NameType info, but got: " ++ show nameInfo ++ " and " ++ show nameTypeInfo))
fieldsClassAndNT
fieldsResolved = map (\(index, (nameInfo, fnameInfo, ftypeInfo)) -> case (nameInfo, fnameInfo, ftypeInfo) of
(Utf8Info cname, Utf8Info fname, Utf8Info ftype) -> (index, (cname, fname, ftype))
something_else -> error("Expected UTF8Infos but got " ++ show something_else))
fieldsClassNameType
in
fieldsResolved
-- same as findClassIndex, but inserts a new entry into constant pool if not existing
getClassIndex :: [ConstantInfo] -> String -> ([ConstantInfo], Int)
getClassIndex constants name = case findClassIndex constants name of
Just index -> (constants, index)
Nothing -> (constants ++ [ClassInfo (fromIntegral (length constants)), Utf8Info name], fromIntegral (length constants))
-- get the index for a field within a class, creating it if it does not exist.
getFieldIndex :: [ConstantInfo] -> (String, String, String) -> ([ConstantInfo], Int)
getFieldIndex constants (cname, fname, ftype) = case findMemberIndex constants (cname, fname, ftype) of
Just index -> (constants, index)
Nothing -> let
(constantsWithClass, classIndex) = getClassIndex constants cname
baseIndex = 1 + length constantsWithClass
in
(constantsWithClass ++ [
FieldRefInfo (fromIntegral classIndex) (fromIntegral (baseIndex + 1)),
NameAndTypeInfo (fromIntegral (baseIndex + 2)) (fromIntegral (baseIndex + 3)),
Utf8Info fname,
Utf8Info (datatypeDescriptor ftype)
], baseIndex)
getMethodIndex :: [ConstantInfo] -> (String, String, String) -> ([ConstantInfo], Int)
getMethodIndex constants (cname, mname, mtype) = case findMemberIndex constants (cname, mname, mtype) of
Just index -> (constants, index)
Nothing -> let
(constantsWithClass, classIndex) = getClassIndex constants cname
baseIndex = 1 + length constantsWithClass
in
(constantsWithClass ++ [
MethodRefInfo (fromIntegral classIndex) (fromIntegral (baseIndex + 1)),
NameAndTypeInfo (fromIntegral (baseIndex + 2)) (fromIntegral (baseIndex + 3)),
Utf8Info mname,
Utf8Info mtype
], baseIndex)
findMemberIndex :: [ConstantInfo] -> (String, String, String) -> Maybe Int
findMemberIndex constants (cname, fname, ftype) = let
allMembers = getKnownMembers constants
desiredMember = find (\(index, (c, f, ft)) -> (c, f, ft) == (cname, fname, ftype)) allMembers
in
fmap (\(index, _) -> index) desiredMember
injectDefaultConstructor :: [MethodDeclaration] -> [MethodDeclaration]
injectDefaultConstructor pre
| any (\(MethodDeclaration _ name _ _) -> name == "<init>") pre = pre
| otherwise = pre ++ [MethodDeclaration "void" "<init>" [] (TypedStatement "void" (Block []))]
injectFieldInitializers :: String -> [VariableDeclaration] -> [MethodDeclaration] -> [MethodDeclaration]
injectFieldInitializers classname vars pre = let
initializers = mapMaybe (\(variable) -> case variable of
VariableDeclaration dtype name (Just initializer) -> Just (
TypedStatement dtype (
StatementExpressionStatement (
TypedStatementExpression dtype (
Assignment
(TypedExpression dtype (BinaryOperation NameResolution (TypedExpression classname (LocalVariable "this")) (TypedExpression dtype (FieldVariable name))))
initializer
)
)
)
)
otherwise -> Nothing
) vars
in
map (\(method) -> case method of
MethodDeclaration "void" "<init>" params (TypedStatement "void" (Block statements)) -> MethodDeclaration "void" "<init>" params (TypedStatement "void" (Block (initializers ++ statements)))
otherwise -> method
) pre

267
src/Example.hs Normal file
View File

@@ -0,0 +1,267 @@
module Example where
import Ast
import Typecheck
import Control.Exception (catch, evaluate, SomeException, displayException)
import Control.Exception.Base
import System.IO (stderr, hPutStrLn)
import Data.Maybe
import Data.List
green, red, yellow, blue, magenta, cyan, white :: String -> String
green str = "\x1b[32m" ++ str ++ "\x1b[0m"
red str = "\x1b[31m" ++ str ++ "\x1b[0m"
yellow str = "\x1b[33m" ++ str ++ "\x1b[0m"
blue str = "\x1b[34m" ++ str ++ "\x1b[0m"
magenta str = "\x1b[35m" ++ str ++ "\x1b[0m"
cyan str = "\x1b[36m" ++ str ++ "\x1b[0m"
white str = "\x1b[37m" ++ str ++ "\x1b[0m"
printSuccess :: String -> IO ()
printSuccess msg = putStrLn $ green "Success:" ++ white msg
handleError :: SomeException -> IO ()
handleError e = hPutStrLn stderr $ red ("Error: " ++ displayException e)
printResult :: Show a => String -> a -> IO ()
printResult title result = do
putStrLn $ green title
print result
sampleClasses :: [Class]
sampleClasses = [
Class "Person" [
MethodDeclaration "void" "setAge" [ParameterDeclaration "int" "newAge"]
(Block [
LocalVariableDeclaration (VariableDeclaration "int" "age" (Just (Reference "newAge")))
]),
MethodDeclaration "int" "getAge" [] (Return (Just (Reference "age"))),
MethodDeclaration "Person" "Person" [ParameterDeclaration "int" "initialAge"] (Block [])
] [
VariableDeclaration "int" "age" (Just (IntegerLiteral 25))
]
]
initialSymtab :: [(DataType, Identifier)]
initialSymtab = []
exampleExpression :: Expression
exampleExpression = BinaryOperation NameResolution (Reference "bob") (Reference "age")
exampleAssignment :: Expression
exampleAssignment = StatementExpressionExpression (Assignment (Reference "a") (IntegerLiteral 30))
exampleMethodCall :: Statement
exampleMethodCall = StatementExpressionStatement (MethodCall (Reference "this") "setAge" [IntegerLiteral 30])
exampleConstructorCall :: Statement
exampleConstructorCall = LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just (StatementExpressionExpression (ConstructorCall "Person" [IntegerLiteral 30]))))
exampleNameResolution :: Expression
exampleNameResolution = BinaryOperation NameResolution (Reference "bob2") (Reference "age")
exampleBlockResolution :: Statement
exampleBlockResolution = Block [
LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just (StatementExpressionExpression (ConstructorCall "Person" [IntegerLiteral 30])))),
LocalVariableDeclaration (VariableDeclaration "int" "age" (Just (StatementExpressionExpression (MethodCall (Reference "bob") "getAge" [])))),
StatementExpressionStatement (MethodCall (Reference "bob") "setAge" [IntegerLiteral 30])
]
exampleBlockResolutionFail :: Statement
exampleBlockResolutionFail = Block [
LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just (StatementExpressionExpression (ConstructorCall "Person" [IntegerLiteral 30])))),
LocalVariableDeclaration (VariableDeclaration "bool" "age" (Just (StatementExpressionExpression (MethodCall (Reference "bob") "getAge" [])))),
StatementExpressionStatement (MethodCall (Reference "bob") "setAge" [IntegerLiteral 30])
]
exampleMethodCallAndAssignment :: Statement
exampleMethodCallAndAssignment = Block [
LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just (StatementExpressionExpression (ConstructorCall "Person" [IntegerLiteral 30])))),
LocalVariableDeclaration (VariableDeclaration "int" "age" (Just (StatementExpressionExpression (MethodCall (Reference "bob") "getAge" [])))),
StatementExpressionStatement (MethodCall (Reference "bob") "setAge" [IntegerLiteral 30]),
LocalVariableDeclaration (VariableDeclaration "int" "a" Nothing),
StatementExpressionStatement (Assignment (Reference "a") (Reference "age"))
]
exampleMethodCallAndAssignmentFail :: Statement
exampleMethodCallAndAssignmentFail = Block [
LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just (StatementExpressionExpression (ConstructorCall "Person" [IntegerLiteral 30])))),
LocalVariableDeclaration (VariableDeclaration "int" "age" (Just (StatementExpressionExpression (MethodCall (Reference "bob") "getAge" [])))),
StatementExpressionStatement (MethodCall (Reference "bob") "setAge" [IntegerLiteral 30]),
StatementExpressionStatement (Assignment (Reference "a") (Reference "age"))
]
exampleNameResolutionAssignment :: Statement
exampleNameResolutionAssignment = Block [
LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just (StatementExpressionExpression (ConstructorCall "Person" [IntegerLiteral 30])))),
StatementExpressionStatement (Assignment (BinaryOperation NameResolution (Reference "bob") (Reference "age")) (IntegerLiteral 30))
]
exampleCharIntOperation :: Expression
exampleCharIntOperation = BinaryOperation Addition (CharacterLiteral 'a') (IntegerLiteral 1)
exampleNullDeclaration :: Statement
exampleNullDeclaration = LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just NullLiteral))
exampleNullDeclarationFail :: Statement
exampleNullDeclarationFail = LocalVariableDeclaration (VariableDeclaration "int" "a" (Just NullLiteral))
exampleNullAssignment :: Statement
exampleNullAssignment = StatementExpressionStatement (Assignment (Reference "a") NullLiteral)
exampleIncrement :: Statement
exampleIncrement = StatementExpressionStatement (PostIncrement (Reference "a"))
testClasses :: [Class]
testClasses = [
Class "Person" [
MethodDeclaration "Person" "Person" [ParameterDeclaration "int" "initialAge"]
(Block [
Return (Just (Reference "this"))
]),
MethodDeclaration "void" "setAge" [ParameterDeclaration "int" "newAge"]
(Block [
LocalVariableDeclaration (VariableDeclaration "int" "age" (Just (Reference "newAge")))
]),
MethodDeclaration "int" "getAge" []
(Return (Just (Reference "age")))
] [
VariableDeclaration "int" "age" Nothing -- initially unassigned
],
Class "Main" [
MethodDeclaration "int" "main" []
(Block [
LocalVariableDeclaration (VariableDeclaration "Person" "bob" (Just (StatementExpressionExpression (ConstructorCall "Person" [IntegerLiteral 25])))),
StatementExpressionStatement (MethodCall (Reference "bob") "setAge" [IntegerLiteral 30]),
LocalVariableDeclaration (VariableDeclaration "int" "bobAge" (Just (StatementExpressionExpression (MethodCall (Reference "bob2") "getAge" [])))),
Return (Just (Reference "bobAge"))
])
] [
VariableDeclaration "Person" "bob2" Nothing
]
]
runTypeCheck :: IO ()
runTypeCheck = do
catch (do
print "====================================================================================="
evaluatedExpression <- evaluate (typeCheckExpression exampleExpression [("bob", "Person")] sampleClasses)
printSuccess "Type checking of expression completed successfully"
printResult "Result Expression:" evaluatedExpression
) handleError
catch (do
print "====================================================================================="
evaluatedAssignment <- evaluate (typeCheckExpression exampleAssignment [("a", "int")] sampleClasses)
printSuccess "Type checking of assignment completed successfully"
printResult "Result Assignment:" evaluatedAssignment
) handleError
catch (do
print "====================================================================================="
evaluatedMethodCall <- evaluate (typeCheckStatement exampleMethodCall [("this", "Person"), ("setAge", "Person"), ("getAge", "Person")] sampleClasses)
printSuccess "Type checking of method call this completed successfully"
printResult "Result MethodCall:" evaluatedMethodCall
) handleError
catch (do
print "====================================================================================="
evaluatedConstructorCall <- evaluate (typeCheckStatement exampleConstructorCall [] sampleClasses)
printSuccess "Type checking of constructor call completed successfully"
printResult "Result Constructor Call:" evaluatedConstructorCall
) handleError
catch (do
print "====================================================================================="
evaluatedNameResolution <- evaluate (typeCheckExpression exampleNameResolution [("this", "Main")] testClasses)
printSuccess "Type checking of name resolution completed successfully"
printResult "Result Name Resolution:" evaluatedNameResolution
) handleError
catch (do
print "====================================================================================="
evaluatedBlockResolution <- evaluate (typeCheckStatement exampleBlockResolution [] sampleClasses)
printSuccess "Type checking of block resolution completed successfully"
printResult "Result Block Resolution:" evaluatedBlockResolution
) handleError
catch (do
print "====================================================================================="
evaluatedBlockResolutionFail <- evaluate (typeCheckStatement exampleBlockResolutionFail [] sampleClasses)
printSuccess "Type checking of block resolution failed"
printResult "Result Block Resolution:" evaluatedBlockResolutionFail
) handleError
catch (do
print "====================================================================================="
evaluatedMethodCallAndAssignment <- evaluate (typeCheckStatement exampleMethodCallAndAssignment [] sampleClasses)
printSuccess "Type checking of method call and assignment completed successfully"
printResult "Result Method Call and Assignment:" evaluatedMethodCallAndAssignment
) handleError
catch (do
print "====================================================================================="
evaluatedMethodCallAndAssignmentFail <- evaluate (typeCheckStatement exampleMethodCallAndAssignmentFail [] sampleClasses)
printSuccess "Type checking of method call and assignment failed"
printResult "Result Method Call and Assignment:" evaluatedMethodCallAndAssignmentFail
) handleError
catch (do
print "====================================================================================="
let mainClass = fromJust $ find (\(Class className _ _) -> className == "Main") testClasses
case mainClass of
Class _ [mainMethod] _ -> do
let result = typeCheckMethodDeclaration mainMethod [("this", "Main")] testClasses
printSuccess "Full program type checking completed successfully."
printResult "Main method result:" result
) handleError
catch (do
print "====================================================================================="
let typedProgram = typeCheckCompilationUnit testClasses
printSuccess "Type checking of Program completed successfully"
printResult "Typed Program:" typedProgram
) handleError
catch (do
print "====================================================================================="
typedAssignment <- evaluate (typeCheckStatement exampleNameResolutionAssignment [] sampleClasses)
printSuccess "Type checking of name resolution assignment completed successfully"
printResult "Result Name Resolution Assignment:" typedAssignment
) handleError
catch (do
print "====================================================================================="
evaluatedCharIntOperation <- evaluate (typeCheckExpression exampleCharIntOperation [] sampleClasses)
printSuccess "Type checking of char int operation completed successfully"
printResult "Result Char Int Operation:" evaluatedCharIntOperation
) handleError
catch (do
print "====================================================================================="
evaluatedNullDeclaration <- evaluate (typeCheckStatement exampleNullDeclaration [] sampleClasses)
printSuccess "Type checking of null declaration completed successfully"
printResult "Result Null Declaration:" evaluatedNullDeclaration
) handleError
catch (do
print "====================================================================================="
evaluatedNullDeclarationFail <- evaluate (typeCheckStatement exampleNullDeclarationFail [] sampleClasses)
printSuccess "Type checking of null declaration failed"
printResult "Result Null Declaration:" evaluatedNullDeclarationFail
) handleError
catch (do
print "====================================================================================="
evaluatedNullAssignment <- evaluate (typeCheckStatement exampleNullAssignment [("a", "Person")] sampleClasses)
printSuccess "Type checking of null assignment completed successfully"
printResult "Result Null Assignment:" evaluatedNullAssignment
) handleError
catch (do
print "====================================================================================="
evaluatedIncrement <- evaluate (typeCheckStatement exampleIncrement [("a", "int")] sampleClasses)
printSuccess "Type checking of increment completed successfully"
printResult "Result Increment:" evaluatedIncrement
) handleError

33
src/Main.hs Normal file
View File

@@ -0,0 +1,33 @@
module Main where
import Example
import Typecheck
import Parser.Lexer (alexScanTokens)
import Parser.JavaParser
import ByteCode.Builder
import ByteCode.ClassFile
import Data.ByteString (pack, writeFile)
import System.Environment
import System.FilePath.Posix (takeDirectory)
main = do
args <- getArgs
let filename = if null args
then error "Missing filename, I need to know what to compile"
else args!!0
let outputDirectory = takeDirectory filename
print ("Compiling " ++ filename)
file <- readFile filename
let untypedAST = parse $ alexScanTokens file
let typedAST = (typeCheckCompilationUnit untypedAST)
let assembledClasses = map (\(typedClass) -> classBuilder typedClass emptyClassFile) typedAST
mapM_ (\(classFile) -> let
fileContent = pack (serialize classFile)
fileName = outputDirectory ++ "/" ++ (className classFile) ++ ".class"
in Data.ByteString.writeFile fileName fileContent
) assembledClasses

383
src/Parser/JavaParser.y Normal file
View File

@@ -0,0 +1,383 @@
{
module Parser.JavaParser (parse, parseStatement, parseExpression) where
import Ast
import Parser.Lexer
}
%name parse
%name parseStatement statement
%name parseExpression expression
%tokentype { Token }
%error { parseError }
%errorhandlertype explist
%token
BOOLEAN { BOOLEAN }
BREAK { BREAK }
CASE { CASE }
CHAR { CHAR }
CLASS { CLASS}
IDENTIFIER { IDENTIFIER $$ }
INTLITERAL { INTEGERLITERAL $$}
DOT { DOT }
MOD { MODULO }
TIMESEQUAL { TIMESEQUAL }
GREATEREQUAL { GREATEREQUAL }
WHILE { WHILE }
PUBLIC { PUBLIC }
VOID { VOID }
EQUAL { EQUAL }
XOR { XOR }
RBRACE { RBRACE }
THIS { THIS }
STATIC { STATIC }
PROTECTED { PROTECTED }
TILDE { BITWISENOT }
MUL { TIMES }
MINUS { MINUS }
EXCLMARK { NOT }
IF { IF }
ELSE { ELSE }
DIVIDEEQUAL { DIVEQUAL }
NEW { NEW }
LBRACKET { LBRACKET }
JNULL { NULLLITERAL }
BOOLLITERAL { BOOLLITERAL $$ }
DIV { DIV }
NOTEQUAL { NOTEQUAL }
INSTANCEOF { INSTANCEOF }
ANDEQUAL { ANDEQUAL }
ASSIGN { ASSIGN }
DECREMENT { DECREMENT }
CHARLITERAL { CHARLITERAL $$ }
AND { AND }
XOREQUAL { XOREQUAL }
RETURN { RETURN }
QUESMARK { QUESTIONMARK }
SHIFTLEFTEQUAL { SHIFTLEFTEQUAL }
RBRACKET { RBRACKET }
COMMA { COMMA }
MINUSEQUAL { MINUSEQUAL }
INCREMENT { INCREMENT }
LBRACE { LBRACE }
LESSEQUAL { LESSEQUAL }
PLUS { PLUS }
PRIVATE { PRIVATE }
MODULOEQUAL { MODULOEQUAL }
GREATER { GREATER }
OR { OR }
INT { INT }
ABSTRACT { ABSTRACT }
SEMICOLON { SEMICOLON }
SIGNEDSHIFTRIGHTEQUAL { SHIFTRIGHTEQUAL }
UNSIGNEDSHIFTRIGHTEQUAL { UNSIGNEDSHIFTRIGHTEQUAL }
PLUSEQUAL { PLUSEQUAL }
OREQUAL { OREQUAL }
COLON { COLON }
LESS { LESS }
%%
compilationunit : typedeclarations { $1 }
typedeclarations : typedeclaration { [$1] }
| typedeclarations typedeclaration { $1 ++ [$2] }
name : simplename { Reference $1 }
| qualifiedname { $1 }
typedeclaration : classdeclaration { $1 }
qualifiedname : name DOT IDENTIFIER { BinaryOperation NameResolution $1 (Reference $3) }
simplename : IDENTIFIER { $1 }
classdeclaration : CLASS IDENTIFIER classbody { case $3 of (methods, fields) -> Class $2 methods fields }
| modifiers CLASS IDENTIFIER classbody { case $4 of (methods, fields) -> Class $3 methods fields }
classbody : LBRACKET RBRACKET { ([], []) }
| LBRACKET classbodydeclarations RBRACKET { $2 }
modifiers : modifier { }
| modifiers modifier { }
classbodydeclarations : classbodydeclaration {
case $1 of
MethodDecl method -> ([method], [])
FieldDecls fields -> ([], fields)
}
| classbodydeclarations classbodydeclaration {
case ($1, $2) of
((methods, fields), MethodDecl method) -> ((methods ++ [method]), fields)
((methods, fields), FieldDecls newFields) -> (methods, (fields ++ newFields))
}
modifier : PUBLIC { }
| PROTECTED { }
| PRIVATE { }
| STATIC { }
| ABSTRACT { }
classtype : classorinterfacetype { $1 }
classbodydeclaration : classmemberdeclaration { $1 }
| constructordeclaration { $1 }
classorinterfacetype : simplename { $1 }
classmemberdeclaration : fielddeclaration { $1 }
| methoddeclaration { $1 }
constructordeclaration : constructordeclarator constructorbody { MethodDecl $ MethodDeclaration "void" "<init>" $1 $2 }
| modifiers constructordeclarator constructorbody { MethodDecl $ MethodDeclaration "void" "<init>" $2 $3 }
fielddeclaration : type variabledeclarators SEMICOLON { FieldDecls $ map (convertDeclarator $1) $2 }
| modifiers type variabledeclarators SEMICOLON { FieldDecls $ map (convertDeclarator $2) $3 }
methoddeclaration : methodheader methodbody { case $1 of (returnType, (name, parameters)) -> MethodDecl (MethodDeclaration returnType name parameters $2) }
block : LBRACKET RBRACKET { Block [] }
| LBRACKET blockstatements RBRACKET { Block $2 }
constructordeclarator : simplename LBRACE RBRACE { [] }
| simplename LBRACE formalparameterlist RBRACE { $3 }
constructorbody : LBRACKET RBRACKET { Block [] }
-- | LBRACKET explicitconstructorinvocation RBRACKET { }
| LBRACKET blockstatements RBRACKET { Block $2 }
-- | LBRACKET explicitconstructorinvocation blockstatements RBRACKET { }
methodheader : type methoddeclarator { ($1, $2) }
| modifiers type methoddeclarator { ($2, $3) }
| VOID methoddeclarator { ("void", $2) }
| modifiers VOID methoddeclarator { ("void", $3)}
type : primitivetype { $1 }
| referencetype { $1 }
variabledeclarators : variabledeclarator { [$1] }
| variabledeclarators COMMA variabledeclarator { $1 ++ [$3] }
methodbody : block { $1 }
| SEMICOLON { Block [] }
blockstatements : blockstatement { $1 }
| blockstatements blockstatement { $1 ++ $2}
formalparameterlist : formalparameter { [$1] }
| formalparameterlist COMMA formalparameter { $1 ++ [$3] }
explicitconstructorinvocation : THIS LBRACE RBRACE SEMICOLON { }
| THIS LBRACE argumentlist RBRACE SEMICOLON { }
classtypelist : classtype { }
| classtypelist COMMA classtype { }
methoddeclarator : IDENTIFIER LBRACE RBRACE { ($1, []) }
| IDENTIFIER LBRACE formalparameterlist RBRACE { ($1, $3) }
primitivetype : BOOLEAN { "boolean" }
| numerictype { $1 }
referencetype : classorinterfacetype { $1 }
variabledeclarator : variabledeclaratorid { Declarator $1 Nothing }
| variabledeclaratorid ASSIGN variableinitializer { Declarator $1 (Just $3) }
blockstatement : localvariabledeclarationstatement { $1 } -- expected type statement
| statement { $1 }
formalparameter : type variabledeclaratorid { ParameterDeclaration $1 $2 }
argumentlist : expression { [$1] }
| argumentlist COMMA expression { $1 ++ [$3] }
numerictype : integraltype { $1 }
variabledeclaratorid : IDENTIFIER { $1 }
variableinitializer : expression { $1 }
localvariabledeclarationstatement : localvariabledeclaration SEMICOLON { $1 }
statement : statementwithouttrailingsubstatement{ $1 } -- statement returns a list of statements
| ifthenstatement { [$1] }
| ifthenelsestatement { [$1] }
| whilestatement { [$1] }
expression : assignmentexpression { $1 }
integraltype : INT { "int" }
| CHAR { "char" }
localvariabledeclaration : type variabledeclarators { map LocalVariableDeclaration $ map (convertDeclarator $1) $2 }
statementwithouttrailingsubstatement : block { [$1] }
| emptystatement { [] }
| expressionstatement { [$1] }
| returnstatement { [$1] }
ifthenstatement : IF LBRACE expression RBRACE statement { If $3 (Block $5) Nothing }
ifthenelsestatement : IF LBRACE expression RBRACE statementnoshortif ELSE statement { If $3 (Block $5) (Just (Block $7)) }
whilestatement : WHILE LBRACE expression RBRACE statement { While $3 (Block $5) }
assignmentexpression : conditionalexpression { $1 }
| assignment { StatementExpressionExpression $1 }
emptystatement : SEMICOLON { Block [] }
expressionstatement : statementexpression SEMICOLON { StatementExpressionStatement $1 }
returnstatement : RETURN SEMICOLON { Return Nothing }
| RETURN expression SEMICOLON { Return $ Just $2 }
statementnoshortif : statementwithouttrailingsubstatement { $1 }
-- | ifthenelsestatementnoshortif { }
-- | whilestatementnoshortif { }
conditionalexpression : conditionalorexpression { $1 }
-- | conditionalorexpression QUESMARK expression COLON conditionalexpression { }
assignment : lefthandside assignmentoperator assignmentexpression {
case $2 of
Nothing -> Assignment $1 $3
Just operator -> Assignment $1 (BinaryOperation operator $1 $3)
}
statementexpression : assignment { $1 }
| preincrementexpression { $1 }
| predecrementexpression { $1 }
| postincrementexpression { $1 }
| postdecrementexpression { $1 }
| methodinvocation { $1 }
| classinstancecreationexpression { $1 }
ifthenelsestatementnoshortif :IF LBRACE expression RBRACE statementnoshortif
ELSE statementnoshortif { }
whilestatementnoshortif : WHILE LBRACE expression RBRACE statementnoshortif { }
conditionalorexpression : conditionalandexpression { $1 }
-- | conditionalorexpression LOGICALOR conditionalandexpression{ }
lefthandside : name { $1 }
assignmentoperator : ASSIGN { Nothing }
| TIMESEQUAL { Just Multiplication }
| DIVIDEEQUAL { Just Division }
| MODULOEQUAL { Just Modulo }
| PLUSEQUAL { Just Addition }
| MINUSEQUAL { Just Subtraction }
-- | SHIFTLEFTEQUAL { }
-- | SIGNEDSHIFTRIGHTEQUAL { }
-- | UNSIGNEDSHIFTRIGHTEQUAL { }
| ANDEQUAL { Just BitwiseAnd }
| XOREQUAL { Just BitwiseXor }
| OREQUAL{ Just BitwiseOr }
preincrementexpression : INCREMENT unaryexpression { PreIncrement $2 }
predecrementexpression : DECREMENT unaryexpression { PreDecrement $2 }
postincrementexpression : postfixexpression INCREMENT { PostIncrement $1 }
postdecrementexpression : postfixexpression DECREMENT { PostDecrement $1 }
methodinvocation : simplename LBRACE RBRACE { MethodCall (Reference "this") $1 [] }
| simplename LBRACE argumentlist RBRACE { MethodCall (Reference "this") $1 $3 }
| primary DOT IDENTIFIER LBRACE RBRACE { MethodCall $1 $3 [] }
| primary DOT IDENTIFIER LBRACE argumentlist RBRACE { MethodCall $1 $3 $5 }
classinstancecreationexpression : NEW classtype LBRACE RBRACE { ConstructorCall $2 [] }
| NEW classtype LBRACE argumentlist RBRACE { ConstructorCall $2 $4 }
conditionalandexpression : inclusiveorexpression { $1 }
fieldaccess : primary DOT IDENTIFIER { BinaryOperation NameResolution $1 (Reference $3) }
unaryexpression : unaryexpressionnotplusminus { $1 }
| predecrementexpression { StatementExpressionExpression $1 }
| PLUS unaryexpression { $2 }
| MINUS unaryexpression { UnaryOperation Minus $2 }
| preincrementexpression { StatementExpressionExpression $1 }
postfixexpression : primary { $1 }
| name { $1 }
| postincrementexpression { StatementExpressionExpression $1 }
| postdecrementexpression { StatementExpressionExpression $1 }
primary : primarynonewarray { $1 }
inclusiveorexpression : exclusiveorexpression { $1 }
| inclusiveorexpression OR exclusiveorexpression { BinaryOperation Or $1 $3 }
primarynonewarray : literal { $1 }
| THIS { Reference "this" }
| LBRACE expression RBRACE { $2 }
| classinstancecreationexpression { StatementExpressionExpression $1 }
| fieldaccess { $1 }
| methodinvocation { StatementExpressionExpression $1 }
unaryexpressionnotplusminus : postfixexpression { $1 }
-- | TILDE unaryexpression { }
| EXCLMARK unaryexpression { UnaryOperation Not $2 }
-- | castexpression{ }
exclusiveorexpression : andexpression { $1 }
| exclusiveorexpression XOR andexpression { BinaryOperation BitwiseXor $1 $3 }
literal : INTLITERAL { IntegerLiteral $1 }
| BOOLLITERAL { BooleanLiteral $1 }
| CHARLITERAL { CharacterLiteral $1 }
| JNULL { NullLiteral }
castexpression : LBRACE primitivetype RBRACE unaryexpression { }
| LBRACE expression RBRACE unaryexpressionnotplusminus{ }
andexpression : equalityexpression { $1 }
| andexpression AND equalityexpression { BinaryOperation And $1 $3 }
equalityexpression : relationalexpression { $1 }
| equalityexpression EQUAL relationalexpression { BinaryOperation CompareEqual $1 $3 }
| equalityexpression NOTEQUAL relationalexpression { BinaryOperation CompareNotEqual $1 $3 }
relationalexpression : shiftexpression { $1 }
| relationalexpression LESS shiftexpression { BinaryOperation CompareLessThan $1 $3 }
| relationalexpression GREATER shiftexpression { BinaryOperation CompareGreaterThan $1 $3 }
| relationalexpression LESSEQUAL shiftexpression { BinaryOperation CompareLessOrEqual $1 $3 }
| relationalexpression GREATEREQUAL shiftexpression { BinaryOperation CompareGreaterOrEqual $1 $3 }
-- | relationalexpression INSTANCEOF referencetype { }
shiftexpression : additiveexpression { $1 }
additiveexpression : multiplicativeexpression { $1 }
| additiveexpression PLUS multiplicativeexpression { BinaryOperation Addition $1 $3 }
| additiveexpression MINUS multiplicativeexpression { BinaryOperation Subtraction $1 $3 }
multiplicativeexpression : unaryexpression { $1 }
| multiplicativeexpression MUL unaryexpression { BinaryOperation Multiplication $1 $3 }
| multiplicativeexpression DIV unaryexpression { BinaryOperation Division $1 $3 }
| multiplicativeexpression MOD unaryexpression { BinaryOperation Modulo $1 $3 }
{
data MethodOrFieldDeclaration = MethodDecl MethodDeclaration
| FieldDecls [VariableDeclaration]
data Declarator = Declarator Identifier (Maybe Expression)
convertDeclarator :: DataType -> Declarator -> VariableDeclaration
convertDeclarator dataType (Declarator id assigment) = VariableDeclaration dataType id assigment
data StatementWithoutSub = Statement
parseError :: ([Token], [String]) -> a
parseError (errortoken, expected) = error ("parse error on token: " ++ show errortoken ++ "\nexpected one of: " ++ show expected)
}

228
src/Parser/Lexer.x Normal file
View File

@@ -0,0 +1,228 @@
{
module Parser.Lexer(Token(..), alexScanTokens) where
import Text.Read
}
%wrapper "basic"
$digit = 0-9
$alpha = [a-zA-Z]
$alphanum = [a-zA-Z0-9]
$JavaLetter = [A-Za-z\_\$]
$JavaLetterOrDigit = [A-Za-z\_\$0-9]
tokens :-
$white ;
"/*"(.|\n)*"*/" ;
"//".* ;
-- keywords
"abstract" { \_ -> ABSTRACT }
"assert" { \_ -> BOOLEAN }
"boolean" { \_ -> BOOLEAN}
"break" { \_ -> BREAK}
"byte" { \_ -> BYTE}
"case" { \_ -> CASE}
"catch" { \_ -> CATCH}
"char" { \_ -> CHAR}
"class" { \_ -> CLASS}
"const" { \_ -> CONST}
"continue" { \_ -> CONTINUE}
"default" { \_ -> DEFAULT}
"do" { \_ -> DO}
"double" { \_ -> DOUBLE}
("else"|"ifn't") { \_ -> ELSE}
"enum" { \_ -> ENUM}
"extends" { \_ -> EXTENDS}
"final" { \_ -> FINAL}
"finally" { \_ -> FINALLY}
"float" { \_ -> FLOAT}
"for" { \_ -> FOR}
"if" { \_ -> IF}
"goto" { \_ -> GOTO}
"implements" { \_ -> IMPLEMENTS}
"import" { \_ -> IMPORT}
"instanceof" { \_ -> INSTANCEOF}
"int" { \_ -> INT}
"long" { \_ -> LONG}
"native" { \_ -> NATIVE}
"new" { \_ -> NEW}
"package" { \_ -> PACKAGE}
"private" { \_ -> PRIVATE}
"protected" { \_ -> PROTECTED}
"public" { \_ -> PUBLIC}
"return" { \_ -> RETURN}
"short" { \_ -> SHORT}
"static" { \_ -> STATIC}
"strictfp" { \_ -> STRICTFP}
"super" { \_ -> SUPER}
"switch" { \_ -> SWITCH}
"synchronized" { \_ -> SYNCHRONIZED}
"this" { \_ -> THIS}
"throw" { \_ -> THROW}
"throws" { \_ -> THROWS}
"transient" { \_ -> TRANSIENT}
"try" { \_ -> TRY}
"void" { \_ -> VOID}
"volatile" { \_ -> VOLATILE}
"while" { \_ -> WHILE}
-- Literals
"true" { \_ -> BOOLLITERAL True }
"false" { \_ -> BOOLLITERAL False }
"null" { \_ -> NULLLITERAL }
-- end keywords
$JavaLetter$JavaLetterOrDigit* { \s -> IDENTIFIER s }
-- Literals
[0-9]([0-9\_]*[0-9])* { \s -> case readMaybe $ filter ((/=) '_') s of Just a -> INTEGERLITERAL a; Nothing -> error ("failed to parse INTLITERAL " ++ s) }
"'"."'" { \s -> case (s) of _ : c : _ -> CHARLITERAL c; _ -> error ("failed to parse CHARLITERAL " ++ s) }
-- separators
"(" { \_ -> LBRACE }
")" { \_ -> RBRACE }
"{" { \_ -> LBRACKET }
"}" { \_ -> RBRACKET }
";" { \_ -> SEMICOLON }
"," { \_ -> COMMA}
"." { \_ -> DOT }
-- operators
"=" { \_ -> ASSIGN }
"==" { \_ -> EQUAL }
"+" { \_ -> PLUS }
"+=" { \_ -> PLUSEQUAL }
">" { \_ -> GREATER }
">=" { \_ -> GREATEREQUAL }
"-" { \_ -> MINUS }
"-=" { \_ -> MINUSEQUAL }
"<" { \_ -> LESS }
"<=" { \_ -> LESSEQUAL }
"*" { \_ -> TIMES }
"*=" { \_ -> TIMESEQUAL }
"!" { \_ -> NOT }
"!=" { \_ -> NOTEQUAL }
"/" { \_ -> DIV }
"/=" { \_ -> DIVEQUAL }
"~" { \_ -> BITWISENOT }
"&&" { \_ -> AND }
"&" { \_ -> BITWISEAND }
"&=" { \_ -> ANDEQUAL }
"?" { \_ -> QUESTIONMARK }
"||" { \_ -> OR }
"|" { \_ -> BITWISEOR }
"|=" { \_ -> OREQUAL }
":" { \_ -> COLON }
"++" { \_ -> INCREMENT }
"^" { \_ -> XOR }
"^=" { \_ -> XOREQUAL }
"--" { \_ -> DECREMENT }
"%" { \_ -> MODULO }
"%=" { \_ -> MODULOEQUAL }
"<<" { \_ -> SHIFTLEFT }
"<<=" { \_ -> SHIFTLEFTEQUAL }
">>" { \_ -> SHIFTRIGHT }
">>=" { \_ -> SHIFTRIGHTEQUAL }
">>>" { \_ -> UNSIGNEDSHIFTRIGHT }
">>>=" { \_ -> UNSIGNEDSHIFTRIGHTEQUAL }
{
data Token
= ABSTRACT
| ASSERT
| BOOLEAN
| BREAK
| BYTE
| CASE
| CATCH
| CHAR
| CLASS
| CONST
| CONTINUE
| DEFAULT
| DO
| DOUBLE
| ELSE
| ENUM
| EXTENDS
| FINAL
| FINALLY
| FLOAT
| FOR
| IF
| GOTO
| IMPLEMENTS
| IMPORT
| INSTANCEOF
| INT
| INTERFACE
| LONG
| NATIVE
| NEW
| PACKAGE
| PRIVATE
| PROTECTED
| PUBLIC
| RETURN
| SHORT
| STATIC
| STRICTFP
| SUPER
| SWITCH
| SYNCHRONIZED
| THIS
| THROW
| THROWS
| TRANSIENT
| TRY
| VOID
| VOLATILE
| WHILE
| IDENTIFIER String
| INTEGERLITERAL Int
| CHARLITERAL Char
| BOOLLITERAL Bool
| NULLLITERAL
| LBRACE
| RBRACE
| LBRACKET
| RBRACKET
| SEMICOLON
| COMMA
| DOT
| ASSIGN
| EQUAL
| PLUS
| PLUSEQUAL
| GREATER
| GREATEREQUAL
| MINUS
| MINUSEQUAL
| LESS
| LESSEQUAL
| TIMES
| TIMESEQUAL
| NOT
| NOTEQUAL
| DIV
| DIVEQUAL
| BITWISENOT
| AND
| BITWISEAND
| ANDEQUAL
| QUESTIONMARK
| OR
| BITWISEOR
| OREQUAL
| COLON
| INCREMENT
| XOR
| XOREQUAL
| DECREMENT
| MODULO
| MODULOEQUAL
| SHIFTLEFT
| SHIFTLEFTEQUAL
| SHIFTRIGHT
| SHIFTRIGHTEQUAL
| UNSIGNEDSHIFTRIGHT
| UNSIGNEDSHIFTRIGHTEQUAL
deriving(Eq,Show)
}

405
src/Typecheck.hs Normal file
View File

@@ -0,0 +1,405 @@
module Typecheck where
import Data.List (find)
import Data.Maybe
import Ast
typeCheckCompilationUnit :: CompilationUnit -> CompilationUnit
typeCheckCompilationUnit classes = map (`typeCheckClass` classes) classes
typeCheckClass :: Class -> [Class] -> Class
typeCheckClass (Class className methods fields) classes =
let
-- Fields and methods dont need to be added to the symtab because they are looked upon automatically under "this"
-- if its not a declared local variable. Also shadowing wouldnt be possible then.
initalSymTab = [("this", className)]
checkedMethods = map (\method -> typeCheckMethodDeclaration method initalSymTab classes) methods
checkedFields = map (\field -> typeCheckVariableDeclaration field initalSymTab classes) fields
in Class className checkedMethods checkedFields
typeCheckMethodDeclaration :: MethodDeclaration -> [(Identifier, DataType)] -> [Class] -> MethodDeclaration
typeCheckMethodDeclaration (MethodDeclaration retType name params body) symtab classes =
let
methodParams = [(identifier, dataType) | ParameterDeclaration dataType identifier <- params]
initialSymtab = ("thisMeth", retType) : symtab ++ methodParams
checkedBody = typeCheckStatement body initialSymtab classes
bodyType = getTypeFromStmt checkedBody
in if bodyType == retType || (bodyType == "void" && retType == "void") || (bodyType == "null" && isObjectType retType) || isSubtype bodyType retType classes
then MethodDeclaration retType name params checkedBody
else error $ "Method Declaration: Return type mismatch in method " ++ name ++ ": expected " ++ retType ++ ", found " ++ bodyType
typeCheckVariableDeclaration :: VariableDeclaration -> [(Identifier, DataType)] -> [Class] -> VariableDeclaration
typeCheckVariableDeclaration (VariableDeclaration dataType identifier maybeExpr) symtab classes =
let
-- Ensure the type is valid (either a primitive type or a valid class name)
validType = dataType `elem` ["int", "boolean", "char"] || isUserDefinedClass dataType classes
-- Ensure no redefinition in the same scope
redefined = any ((== identifier) . snd) symtab
-- Type check the initializer expression if it exists
checkedExpr = fmap (\expr -> typeCheckExpression expr symtab classes) maybeExpr
exprType = fmap getTypeFromExpr checkedExpr
in case (validType, redefined, exprType) of
(False, _, _) -> error $ "Type '" ++ dataType ++ "' is not a valid type for variable '" ++ identifier ++ "'"
(_, True, _) -> error $ "Variable '" ++ identifier ++ "' is redefined in the same scope"
(_, _, Just t)
| t == "null" && isObjectType dataType -> VariableDeclaration dataType identifier checkedExpr
| t /= dataType -> error $ "Type mismatch in declaration of '" ++ identifier ++ "': expected " ++ dataType ++ ", found " ++ t
| otherwise -> VariableDeclaration dataType identifier checkedExpr
(_, _, Nothing) -> VariableDeclaration dataType identifier checkedExpr
-- ********************************** Type Checking: Expressions **********************************
typeCheckExpression :: Expression -> [(Identifier, DataType)] -> [Class] -> Expression
typeCheckExpression (IntegerLiteral i) _ _ = TypedExpression "int" (IntegerLiteral i)
typeCheckExpression (CharacterLiteral c) _ _ = TypedExpression "char" (CharacterLiteral c)
typeCheckExpression (BooleanLiteral b) _ _ = TypedExpression "boolean" (BooleanLiteral b)
typeCheckExpression NullLiteral _ _ = TypedExpression "null" NullLiteral
typeCheckExpression (Reference id) symtab classes =
case lookup id symtab of
Just t -> TypedExpression t (LocalVariable id)
Nothing ->
case lookup "this" symtab of
Just className ->
let classDetails = find (\(Class name _ _) -> name == className) classes
in case classDetails of
Just (Class _ _ fields) ->
let fieldTypes = [dt | VariableDeclaration dt fieldId _ <- fields, fieldId == id]
-- this case only happens when its a field of its own class so the implicit this will be converted to explicit this
in case fieldTypes of
[fieldType] -> TypedExpression fieldType (BinaryOperation NameResolution (TypedExpression className (LocalVariable "this")) (TypedExpression fieldType (FieldVariable id)))
[] -> error $ "Field '" ++ id ++ "' not found in class '" ++ className ++ "'"
_ -> error $ "Ambiguous reference to field '" ++ id ++ "' in class '" ++ className ++ "'"
Nothing -> error $ "Class '" ++ className ++ "' not found for 'this'"
Nothing -> error $ "Context for 'this' not found in symbol table, unable to resolve '" ++ id ++ "'"
typeCheckExpression (BinaryOperation op expr1 expr2) symtab classes =
let expr1' = typeCheckExpression expr1 symtab classes
expr2' = typeCheckExpression expr2 symtab classes
type1 = getTypeFromExpr expr1'
type2 = getTypeFromExpr expr2'
resultType = resolveResultType type1 type2
in case op of
Addition -> checkArithmeticOperation op expr1' expr2' type1 type2 resultType
Subtraction -> checkArithmeticOperation op expr1' expr2' type1 type2 resultType
Multiplication -> checkArithmeticOperation op expr1' expr2' type1 type2 resultType
Division -> checkArithmeticOperation op expr1' expr2' type1 type2 resultType
Modulo -> checkArithmeticOperation op expr1' expr2' type1 type2 resultType
BitwiseAnd -> checkBitwiseOperation op expr1' expr2' type1 type2
BitwiseOr -> checkBitwiseOperation op expr1' expr2' type1 type2
BitwiseXor -> checkBitwiseOperation op expr1' expr2' type1 type2
CompareLessThan -> checkComparisonOperation op expr1' expr2' type1 type2
CompareLessOrEqual -> checkComparisonOperation op expr1' expr2' type1 type2
CompareGreaterThan -> checkComparisonOperation op expr1' expr2' type1 type2
CompareGreaterOrEqual -> checkComparisonOperation op expr1' expr2' type1 type2
CompareEqual -> checkEqualityOperation op expr1' expr2' type1 type2
CompareNotEqual -> checkEqualityOperation op expr1' expr2' type1 type2
And -> checkLogicalOperation op expr1' expr2' type1 type2
Or -> checkLogicalOperation op expr1' expr2' type1 type2
NameResolution -> resolveNameResolution expr1' expr2 symtab classes
typeCheckExpression (UnaryOperation op expr) symtab classes =
let expr' = typeCheckExpression expr symtab classes
type' = getTypeFromExpr expr'
in case op of
Not ->
if type' == "boolean"
then
TypedExpression "boolean" (UnaryOperation op expr')
else
error "Logical NOT operation requires an operand of type boolean"
Minus ->
if type' == "int" || type' == "char"
then
TypedExpression type' (UnaryOperation op expr')
else
error "Unary minus operation requires an operand of type int or char"
typeCheckExpression (StatementExpressionExpression stmtExpr) symtab classes =
let stmtExpr' = typeCheckStatementExpression stmtExpr symtab classes
in TypedExpression (getTypeFromStmtExpr stmtExpr') (StatementExpressionExpression stmtExpr')
-- ********************************** Type Checking: StatementExpressions **********************************
typeCheckStatementExpression :: StatementExpression -> [(Identifier, DataType)] -> [Class] -> StatementExpression
typeCheckStatementExpression (Assignment ref expr) symtab classes =
let expr' = typeCheckExpression expr symtab classes
ref' = typeCheckExpression ref symtab classes
type' = getTypeFromExpr expr'
type'' = getTypeFromExpr ref'
in
if type'' == type' || (type' == "null" && isObjectType type'') then
TypedStatementExpression type'' (Assignment ref' expr')
else
error $ "Type mismatch in assignment to variable: expected " ++ type'' ++ ", found " ++ type'
typeCheckStatementExpression (ConstructorCall className args) symtab classes =
case find (\(Class name _ _) -> name == className) classes of
Nothing -> error $ "Class '" ++ className ++ "' not found."
Just (Class _ methods fields) ->
-- Find constructor matching the class name with void return type
case find (\(MethodDeclaration retType name params _) -> name == "<init>" && retType == "void") methods of
-- If no constructor is found, assume standard constructor with no parameters
Nothing ->
if null args then
TypedStatementExpression className (ConstructorCall className args)
else
error $ "No valid constructor found for class '" ++ className ++ "', but arguments were provided."
Just (MethodDeclaration _ _ params _) ->
let
args' = map (\arg -> typeCheckExpression arg symtab classes) args
-- Extract expected parameter types from the constructor's parameters
expectedTypes = [dataType | ParameterDeclaration dataType _ <- params]
argTypes = map getTypeFromExpr args'
-- Check if the types of the provided arguments match the expected types
typeMatches = zipWith (\expected actual -> if expected == actual then Nothing else Just (expected, actual)) expectedTypes argTypes
mismatchErrors = map (\(exp, act) -> "Expected type '" ++ exp ++ "', found '" ++ act ++ "'.") (catMaybes typeMatches)
in
if length args /= length params then
error $ "Constructor for class '" ++ className ++ "' expects " ++ show (length params) ++ " arguments, but got " ++ show (length args) ++ "."
else if not (null mismatchErrors) then
error $ unlines $ ("Type mismatch in constructor arguments for class '" ++ className ++ "':") : mismatchErrors
else
TypedStatementExpression className (ConstructorCall className args')
typeCheckStatementExpression (MethodCall expr methodName args) symtab classes =
let objExprTyped = typeCheckExpression expr symtab classes
in case objExprTyped of
TypedExpression objType _ ->
case find (\(Class className _ _) -> className == objType) classes of
Just (Class _ methods _) ->
case find (\(MethodDeclaration retType name params _) -> name == methodName) methods of
Just (MethodDeclaration retType _ params _) ->
let args' = map (\arg -> typeCheckExpression arg symtab classes) args
expectedTypes = [dataType | ParameterDeclaration dataType _ <- params]
argTypes = map getTypeFromExpr args'
typeMatches = zipWith (\expType argType -> (expType == argType, expType, argType)) expectedTypes argTypes
mismatches = filter (not . fst3) typeMatches
where fst3 (a, _, _) = a
in
if null mismatches && length args == length params then
TypedStatementExpression retType (MethodCall objExprTyped methodName args')
else if not (null mismatches) then
error $ unlines $ ("Argument type mismatches for method '" ++ methodName ++ "':")
: [ "Expected: " ++ expType ++ ", Found: " ++ argType | (_, expType, argType) <- mismatches ]
else
error $ "Incorrect number of arguments for method '" ++ methodName ++ "'. Expected " ++ show (length expectedTypes) ++ ", found " ++ show (length args) ++ "."
Nothing -> error $ "Method '" ++ methodName ++ "' not found in class '" ++ objType ++ "'."
Nothing -> error $ "Class for object type '" ++ objType ++ "' not found."
_ -> error "Invalid object type for method call. Object must have a class type."
typeCheckStatementExpression (PostIncrement expr) symtab classes =
let expr' = typeCheckExpression expr symtab classes
type' = getTypeFromExpr expr'
in if type' == "int" || type' == "char"
then
TypedStatementExpression type' (PostIncrement expr')
else
error "Post-increment operation requires an operand of type int or char"
typeCheckStatementExpression (PostDecrement expr) symtab classes =
let expr' = typeCheckExpression expr symtab classes
type' = getTypeFromExpr expr'
in if type' == "int" || type' == "char"
then
TypedStatementExpression type' (PostDecrement expr')
else
error "Post-decrement operation requires an operand of type int or char"
typeCheckStatementExpression (PreIncrement expr) symtab classes =
let expr' = typeCheckExpression expr symtab classes
type' = getTypeFromExpr expr'
in if type' == "int" || type' == "char"
then
TypedStatementExpression type' (PreIncrement expr')
else
error "Pre-increment operation requires an operand of type int or char"
typeCheckStatementExpression (PreDecrement expr) symtab classes =
let expr' = typeCheckExpression expr symtab classes
type' = getTypeFromExpr expr'
in if type' == "int" || type' == "char"
then
TypedStatementExpression type' (PreDecrement expr')
else
error "Pre-decrement operation requires an operand of type int or char"
-- ********************************** Type Checking: Statements **********************************
typeCheckStatement :: Statement -> [(Identifier, DataType)] -> [Class] -> Statement
typeCheckStatement (If cond thenStmt elseStmt) symtab classes =
let
cond' = typeCheckExpression cond symtab classes
thenStmt' = typeCheckStatement thenStmt symtab classes
elseStmt' = fmap (\stmt -> typeCheckStatement stmt symtab classes) elseStmt
thenType = getTypeFromStmt thenStmt'
elseType = maybe "void" getTypeFromStmt elseStmt'
ifType = if thenType == "void" || elseType == "void"
then "void"
else unifyReturnTypes thenType elseType
in if getTypeFromExpr cond' == "boolean"
then TypedStatement ifType (If cond' thenStmt' elseStmt')
else error "If condition must be of type boolean"
typeCheckStatement (LocalVariableDeclaration (VariableDeclaration dataType identifier maybeExpr)) symtab classes =
-- Check for redefinition in the current scope
if any ((== identifier) . snd) symtab
then error $ "Variable '" ++ identifier ++ "' is redefined in the same scope"
else
-- If there's an initializer expression, type check it
let checkedExpr = fmap (\expr -> typeCheckExpression expr symtab classes) maybeExpr
exprType = fmap getTypeFromExpr checkedExpr
in case exprType of
Just t
| t == "null" && isObjectType dataType ->
TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExpr))
| t /= dataType -> error $ "Type mismatch in declaration of '" ++ identifier ++ "': expected " ++ dataType ++ ", found " ++ t
| otherwise -> TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExpr))
Nothing -> TypedStatement dataType (LocalVariableDeclaration (VariableDeclaration dataType identifier checkedExpr))
typeCheckStatement (While cond stmt) symtab classes =
let cond' = typeCheckExpression cond symtab classes
stmt' = typeCheckStatement stmt symtab classes
in if getTypeFromExpr cond' == "boolean"
then
TypedStatement (getTypeFromStmt stmt') (While cond' stmt')
else
error "While condition must be of type boolean"
typeCheckStatement (Block statements) symtab classes =
let
processStatements (accSts, currentSymtab, types) stmt =
case stmt of
LocalVariableDeclaration (VariableDeclaration dataType identifier maybeExpr) ->
let
alreadyDefined = any (\(id, _) -> id == identifier) currentSymtab
newSymtab = if alreadyDefined
then error ("Variable " ++ identifier ++ " already defined in this scope.")
else (identifier, dataType) : currentSymtab
checkedExpr = fmap (\expr -> typeCheckExpression expr currentSymtab classes) maybeExpr
checkedStmt = typeCheckStatement stmt newSymtab classes
in (accSts ++ [checkedStmt], newSymtab, types)
_ ->
let
checkedStmt = typeCheckStatement stmt currentSymtab classes
stmtType = getTypeFromStmt checkedStmt
in case stmt of
If {} -> (accSts ++ [checkedStmt], currentSymtab, if stmtType /= "void" then types ++ [stmtType] else types)
While _ _ -> (accSts ++ [checkedStmt], currentSymtab, if stmtType /= "void" then types ++ [stmtType] else types)
Return _ -> (accSts ++ [checkedStmt], currentSymtab, if stmtType /= "void" then types ++ [stmtType] else types)
Block _ -> (accSts ++ [checkedStmt], currentSymtab, if stmtType /= "void" then types ++ [stmtType] else types)
_ -> (accSts ++ [checkedStmt], currentSymtab, types)
-- Initial accumulator: empty statements list, initial symbol table, empty types list
(checkedStatements, finalSymtab, collectedTypes) = foldl processStatements ([], symtab, []) statements
-- Determine the block's type: unify all collected types, default to "void" if none (UpperBound)
blockType = if null collectedTypes then "void" else foldl1 unifyReturnTypes collectedTypes
in TypedStatement blockType (Block checkedStatements)
typeCheckStatement (Return expr) symtab classes =
let methodReturnType = fromMaybe (error "Method return type not found in symbol table") (lookup "thisMeth" symtab)
expr' = case expr of
Just e -> Just (typeCheckExpression e symtab classes)
Nothing -> Nothing
returnType = maybe "void" getTypeFromExpr expr'
in if returnType == methodReturnType || isSubtype returnType methodReturnType classes
then TypedStatement returnType (Return expr')
else error $ "Return: Return type mismatch: expected " ++ methodReturnType ++ ", found " ++ returnType
typeCheckStatement (StatementExpressionStatement stmtExpr) symtab classes =
let stmtExpr' = typeCheckStatementExpression stmtExpr symtab classes
in TypedStatement (getTypeFromStmtExpr stmtExpr') (StatementExpressionStatement stmtExpr')
-- ********************************** Type Checking: Helpers **********************************
isSubtype :: DataType -> DataType -> [Class] -> Bool
isSubtype subType superType classes
| subType == superType = True
| subType == "null" && isObjectType superType = True
| superType == "Object" && isObjectType subType = True
| superType == "Object" && isUserDefinedClass subType classes = True
| otherwise = False
isUserDefinedClass :: DataType -> [Class] -> Bool
isUserDefinedClass dt classes = dt `elem` map (\(Class name _ _) -> name) classes
isObjectType :: DataType -> Bool
isObjectType dt = dt /= "int" && dt /= "boolean" && dt /= "char"
getTypeFromExpr :: Expression -> DataType
getTypeFromExpr (TypedExpression t _) = t
getTypeFromExpr _ = error "Untyped expression found where typed was expected"
getTypeFromStmt :: Statement -> DataType
getTypeFromStmt (TypedStatement t _) = t
getTypeFromStmt _ = error "Untyped statement found where typed was expected"
getTypeFromStmtExpr :: StatementExpression -> DataType
getTypeFromStmtExpr (TypedStatementExpression t _) = t
getTypeFromStmtExpr _ = error "Untyped statement expression found where typed was expected"
unifyReturnTypes :: DataType -> DataType -> DataType
unifyReturnTypes dt1 dt2
| dt1 == dt2 = dt1
| dt1 == "null" = dt2
| dt2 == "null" = dt1
| otherwise = "Object"
resolveResultType :: DataType -> DataType -> DataType
resolveResultType "char" "char" = "char"
resolveResultType "int" "int" = "int"
resolveResultType "char" "int" = "int"
resolveResultType "int" "char" = "int"
resolveResultType t1 t2
| t1 == t2 = t1
| otherwise = error $ "Incompatible types: " ++ t1 ++ " and " ++ t2
checkArithmeticOperation :: BinaryOperator -> Expression -> Expression -> DataType -> DataType -> DataType -> Expression
checkArithmeticOperation op expr1' expr2' type1 type2 resultType
| (type1 == "int" || type1 == "char") && (type2 == "int" || type2 == "char") =
TypedExpression resultType (BinaryOperation op expr1' expr2')
| otherwise = error $ "Arithmetic operation " ++ show op ++ " requires operands of type int or char"
checkBitwiseOperation :: BinaryOperator -> Expression -> Expression -> DataType -> DataType -> Expression
checkBitwiseOperation op expr1' expr2' type1 type2
| type1 == "int" && type2 == "int" =
TypedExpression "int" (BinaryOperation op expr1' expr2')
| otherwise = error $ "Bitwise operation " ++ show op ++ " requires operands of type int or char"
checkComparisonOperation :: BinaryOperator -> Expression -> Expression -> DataType -> DataType -> Expression
checkComparisonOperation op expr1' expr2' type1 type2
| (type1 == "int" || type1 == "char") && (type2 == "int" || type2 == "char") =
TypedExpression "boolean" (BinaryOperation op expr1' expr2')
| otherwise = error $ "Comparison operation " ++ show op ++ " requires operands of type int or char"
checkEqualityOperation :: BinaryOperator -> Expression -> Expression -> DataType -> DataType -> Expression
checkEqualityOperation op expr1' expr2' type1 type2
| type1 == type2 =
TypedExpression "boolean" (BinaryOperation op expr1' expr2')
| otherwise = error $ "Equality operation " ++ show op ++ " requires operands of the same type"
checkLogicalOperation :: BinaryOperator -> Expression -> Expression -> DataType -> DataType -> Expression
checkLogicalOperation op expr1' expr2' type1 type2
| type1 == "boolean" && type2 == "boolean" =
TypedExpression "boolean" (BinaryOperation op expr1' expr2')
| otherwise = error $ "Logical operation " ++ show op ++ " requires operands of type boolean"
resolveNameResolution :: Expression -> Expression -> [(Identifier, DataType)] -> [Class] -> Expression
resolveNameResolution expr1' (Reference ident2) symtab classes =
case getTypeFromExpr expr1' of
objType ->
case find (\(Class className _ _) -> className == objType) classes of
Just (Class _ _ fields) ->
let fieldTypes = [dt | VariableDeclaration dt id _ <- fields, id == ident2]
in case fieldTypes of
[resolvedType] -> TypedExpression resolvedType (BinaryOperation NameResolution expr1' (TypedExpression resolvedType (FieldVariable ident2)))
[] -> error $ "Field '" ++ ident2 ++ "' not found in class '" ++ objType ++ "'"
_ -> error $ "Ambiguous reference to field '" ++ ident2 ++ "' in class '" ++ objType ++ "'"
Nothing -> error $ "Class '" ++ objType ++ "' not found"
resolveNameResolution _ _ _ _ = error "Name resolution requires object reference and field name"