lexer implement separators and fix literals

This commit is contained in:
Marvin Schlegel 2024-05-02 17:45:04 +02:00
parent ecd778cc70
commit 4108eb58c1
2 changed files with 25 additions and 7 deletions

View File

@ -26,6 +26,8 @@ testCharLiteral = TestCase $ assertEqual "scan ''f''" [CHARLITERAL 'f'] $ alexSc
testBoolLiteralTrue = TestCase $ assertEqual "scan 'true'" [BOOLLITERAL True] $ alexScanTokens "true"
testBoolLiteralFalse = TestCase $ assertEqual "scan 'false'" [BOOLLITERAL False] $ alexScanTokens "false"
testLBrace = TestCase $ assertEqual "scan '('" [LBRACE] $ alexScanTokens "("
tests = TestList [
TestLabel "TestCommentSomething" testCommentSomething,
TestLabel "TestEmptyComment" testEmptyComment,
@ -40,5 +42,6 @@ tests = TestList [
TestLabel "TestIntLiteral2" testIntLiteral2,
TestLabel "TestCharLiteral" testCharLiteral,
TestLabel "TestBoolLiteralTrue" testBoolLiteralTrue,
TestLabel "TestBoolLiteralFalse" testBoolLiteralFalse
TestLabel "TestBoolLiteralFalse" testBoolLiteralFalse,
TestLabel "TestLBrace" testLBrace
]

View File

@ -1,6 +1,6 @@
{
module Parser.Lexer(Token(..), alexScanTokens) where
import Data.Char
import Text.Read
}
%wrapper "basic"
@ -65,14 +65,23 @@ tokens :-
"void" { \_ -> VOID}
"volatile" { \_ -> VOLATILE}
"while" { \_ -> WHILE}
-- end keywords
$JavaLetter$JavaLetterOrDigit* { \s -> IDENTIFIER s }
-- Literals
[1-9]([0-9\_]*[0-9])* { \s -> INTEGERLITERAL $ read $ filter ((/=) '_') s }
"'"."'" { \s -> CHARLITERAL $ read $ filter ((/=) '\'') s }
"true" { \_ -> BOOLLITERAL True }
"false" { \_ -> BOOLLITERAL False }
"null" { \_ -> NULLLITERAL }
-- end keywords
$JavaLetter$JavaLetterOrDigit* { \s -> IDENTIFIER s }
-- Literals
[1-9]([0-9\_]*[0-9])* { \s -> case readMaybe $ filter ((/=) '_') s of Just a -> INTEGERLITERAL a; Nothing -> error ("failed to parse INTLITERAL " ++ s) }
"'"."'" { \s -> case (s) of _ : c : _ -> CHARLITERAL c; _ -> error ("failed to parse CHARLITERAL " ++ s) }
-- separators
"(" { \_ -> LBRACE }
")" { \_ -> RBRACE }
"{" { \_ -> LBRACKET }
"}" { \_ -> RBRACKET }
";" { \_ -> SEMICOLON }
"," { \_ -> COMMA}
"." { \_ -> DOT }
{
data Token
@ -131,7 +140,13 @@ data Token
| CHARLITERAL Char
| BOOLLITERAL Bool
| NULLLITERAL
| LBRACE
| RBRACE
| LBRACKET
| RBRACKET
| SEMICOLON
| COMMA
| DOT
deriving(Eq,Show)
}