0 | module Text.Lexer.Tokenizer
4 | import Text.Lexer.Core
6 | import Text.PrettyPrint.Prettyprinter
8 | import public Control.Delayed
9 | import public Text.Bounded
15 | data Tokenizer : (tokenType : Type) -> Type where
16 | Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
17 | Compose : (begin : Lexer) ->
18 | (mapBegin : String -> tokenType) ->
19 | (tagger : String -> tag) ->
20 | (middle : Inf (tag -> Tokenizer tokenType)) ->
21 | (end : tag -> Lexer) ->
22 | (mapEnd : String -> tokenType) ->
24 | Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
27 | Functor Tokenizer where
28 | map f (Match lex fn) = Match lex (f . fn)
29 | map f (Compose {begin, mapBegin, tagger, middle, end, mapEnd})
31 | mapBegin = f . mapBegin,
32 | middle = map f . middle,
33 | mapEnd = f . mapEnd,
36 | map f (Alt t1 t2) = map f t1 `Alt` map f t2
40 | (<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
45 | match : Lexer -> (String -> a) -> Tokenizer a
52 | compose : (begin : Lexer) ->
53 | (mapBegin : String -> a) ->
54 | (tagger : String -> tag) ->
55 | (middle : Inf (tag -> Tokenizer a)) ->
56 | (end : tag -> Lexer) ->
57 | (mapEnd : String -> a) ->
65 | data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
68 | Show StopReason where
69 | show EndInput = "EndInput"
70 | show NoRuleApply = "NoRuleApply"
71 | show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
74 | Pretty StopReason where
75 | pretty EndInput = pretty "EndInput"
76 | pretty NoRuleApply = pretty "NoRuleApply"
77 | pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
81 | (line, col : Int) -> List (WithBounds a) ->
83 | (List (WithBounds a), (StopReason, Int, Int, List Char))
84 | tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
85 | tokenise reject tokenizer line col acc str
86 | = case scan reject [] str of
87 | Just _ => (reverse acc, (EndInput, line, col, str))
88 | Nothing => case getFirstMatch tokenizer str of
89 | Right (toks, line', col', rest) =>
91 | assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
92 | Left reason => (reverse acc, reason, (line, col, str))
94 | countNLs : List Char -> Nat
95 | countNLs str = List.length (filter (== '\n') str)
97 | getCols : List Char -> Int -> Int
99 | = case span (/= '\n') x of
100 | (incol, []) => c + cast (length incol)
101 | (incol, _) => cast (length incol)
106 | getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
107 | getNext lexer line col str =
108 | let Just (token, rest) = scan lexer [] str
110 | line' = line + cast (countNLs token)
111 | col' = getCols token col
112 | tokenStr = pack $
reverse token
113 | in pure (tokenStr, line', col', rest)
115 | getFirstMatch : Tokenizer a -> List Char ->
116 | Either StopReason (List (WithBounds a), Int, Int, List Char)
117 | getFirstMatch (Match lex fn) str
118 | = let Just (tok, line', col', rest) = getNext lex line col str
119 | | _ => Left NoRuleApply
120 | tok' = MkBounded (fn tok) False (MkBounds line col line' col')
121 | in Right ([tok'], line', col', rest)
122 | getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
123 | = let Just (beginTok', line', col' , rest) = getNext begin line col str
124 | | Nothing => Left NoRuleApply
125 | tag = tagger beginTok'
126 | middle = middleFn tag
128 | beginTok'' = MkBounded (mapBegin beginTok') False (MkBounds line col line' col')
129 | (midToks, (reason, line'', col'', rest'')) =
130 | assert_total (tokenise end middle line' col' [] rest)
132 | (ComposeNotClosing _ _) => Left reason
133 | _ => let Just (endTok', lineEnd, colEnd, restEnd) =
134 | getNext end line'' col'' rest''
135 | | _ => Left $
ComposeNotClosing (line, col) (line', col')
136 | endTok'' = MkBounded (mapEnd endTok') False (MkBounds line'' col'' lineEnd colEnd)
137 | in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
138 | getFirstMatch (Alt t1 t2) str
139 | = case getFirstMatch t1 str of
140 | Right result => Right result
141 | Left reason@(ComposeNotClosing _ _) => Left reason
142 | Left _ => getFirstMatch t2 str
148 | (List (WithBounds a), (StopReason, Int, Int, String))
149 | lexTo reject tokenizer str
150 | = let (ts, reason, (l, c, str')) =
151 | tokenise reject tokenizer 0 0 [] (unpack str) in
152 | (ts, reason, (l, c, pack str'))
158 | lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
159 | lex tokenizer str = lexTo (pred $
const False) tokenizer str