6 | import public Text.Lexer.Core
7 | import public Text.Quantity
8 | import public Text.Token
13 | toTokenMap : List (Lexer, k) -> TokenMap (Token k)
14 | toTokenMap = map $
\(l, kind) => (l, Tok kind)
20 | any = pred (const True)
26 | opt : (l : Lexer) -> Recognise False
32 | non : (l : Lexer) -> Lexer
33 | non l = reject l <+> any
39 | choiceMap : {c : Bool} ->
40 | Foldable t => (a -> Recognise c) -> t a -> Recognise c
41 | choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
49 | Foldable t => t (Recognise c) -> Recognise c
50 | choice = choiceMap id
56 | (xs : List (Recognise c)) -> Recognise (isCons xs && c)
57 | concat = Lexer.Core.concatMap id
62 | is : (x : Char) -> Lexer
68 | isNot : (x : Char) -> Lexer
69 | isNot x = pred (/=x)
74 | like : (x : Char) -> Lexer
75 | like x = pred (\y => toUpper x == toUpper y)
80 | notLike : (x : Char) -> Lexer
81 | notLike x = pred (\y => toUpper x /= toUpper y)
87 | exact : (str : String) -> Lexer
88 | exact str = case unpack str of
90 | (x :: xs) => concatMap is (x :: xs)
96 | approx : (str : String) -> Lexer
97 | approx str = case unpack str of
99 | (x :: xs) => concatMap like (x :: xs)
104 | oneOf : (chars : String) -> Lexer
105 | oneOf chars = pred (\x => x `elem` unpack chars)
110 | range : (start : Char) -> (end : Char) -> Lexer
111 | range start end = pred (\x => (x >= min start end)
112 | && (x <= max start end))
118 | some : Lexer -> Lexer
119 | some l = l <+> many l
125 | many : Lexer -> Recognise False
126 | many l = opt (some l)
132 | someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
133 | someUntil stopBefore l = some (reject stopBefore <+> l)
140 | manyUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Recognise False
141 | manyUntil stopBefore l = many (reject stopBefore <+> l)
148 | manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
149 | manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
155 | count : (q : Quantity) -> (l : Lexer) -> Recognise (isSucc (min q))
156 | count (Qty Z Nothing) l = many l
157 | count (Qty Z (Just Z)) _ = empty
158 | count (Qty Z (Just (S max))) l = opt $
l <+> count (atMost max) l
159 | count (Qty (S min) Nothing) l = l <+> count (atLeast min) l
160 | count (Qty (S min) (Just Z)) _ = fail
161 | count (Qty (S min) (Just (S max))) l = l <+> count (between min max) l
167 | digit = pred isDigit
173 | digits = some digit
179 | binDigit = pred (\c => c == '0' || c == '1')
185 | binDigits = some binDigit
191 | hexDigit = pred isHexDigit
197 | hexDigits = some hexDigit
203 | octDigit = pred isOctDigit
209 | octDigits = some octDigit
215 | alpha = pred isAlpha
221 | alphas = some alpha
227 | lower = pred isLower
233 | lowers = some lower
239 | upper = pred isUpper
245 | uppers = some upper
251 | alphaNum = pred isAlphaNum
257 | alphaNums = some alphaNum
263 | space = pred isSpace
269 | spaces = some space
275 | newline = let crlf = "\r\n" in
276 | exact crlf <|> oneOf crlf
282 | newlines = some newline
288 | symbol = pred (\x => not (isSpace x || isAlphaNum x))
294 | symbols = some symbol
300 | control = pred isControl
306 | controls = some control
312 | surround : (start : Lexer) -> (end : Lexer) -> (l : Lexer) -> Lexer
313 | surround start end l = start <+> manyThen end l
319 | quote : (q : Lexer) -> (l : Lexer) -> Lexer
320 | quote q l = surround q q l
326 | escape : (esc : Lexer) -> Lexer -> Lexer
327 | escape esc l = esc <+> l
334 | stringLit = quote (is '"') (escape (is '\\') any <|> any)
341 | charLit = let q = '\'' in
342 | is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
344 | lexStr : List String -> Lexer
346 | lexStr (t :: ts) = exact t <|> lexStr ts
349 | control = lexStr ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
350 | "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
351 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
352 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
354 | <|> (is 'x' <+> hexDigits)
355 | <|> (is 'o' <+> octDigits)
362 | intLit = opt (is '-') <+> digits
368 | binLit = exact "0b" <+> binDigits
374 | hexLit = approx "0x" <+> hexDigits
380 | octLit = exact "0o" <+> octDigits
383 | digitsUnderscoredLit : Lexer
384 | digitsUnderscoredLit = digits <+> many (is '_' <+> digits)
387 | binUnderscoredLit : Lexer
388 | binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)
391 | hexUnderscoredLit : Lexer
392 | hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)
395 | octUnderscoredLit : Lexer
396 | octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)
403 | lineComment : (start : Lexer) -> Lexer
404 | lineComment start = start <+> manyUntil newline any <+> opt newline
412 | blockComment : (start : Lexer) -> (end : Lexer) -> Lexer
413 | blockComment start end = start <+> middle <+> end
415 | middle : Recognise False
416 | middle = manyUntil end (blockComment start end <|> any)