Data.String.Parser


   0 | ||| A simple parser combinator library for String. Inspired by attoparsec zepto.
   1 | module Data.String.Parser
   2 | import public Control.Monad.Identity
   3 | import Control.Monad.Trans
   4 | 
   5 | import Data.String
   6 | import Data.String.Extra
   7 | import Data.Fin
   8 | import Data.List
   9 | import Data.List.Alternating
  10 | import Data.List1
  11 | import Data.SnocList
  12 | import Data.Vect
  13 | 
  14 | %default total
  15 | 
  16 | ||| The input state, pos is position in the string and maxPos is the length of the input string.
  17 | public export
  18 | record State where
  19 |     constructor S
  20 |     input : String
  21 |     pos : Int
  22 |     maxPos : Int
  23 | 
  24 | Show State where
  25 |     show s = "(" ++ show s.input ++ ", " ++ show s.pos ++ ", " ++ show s.maxPos ++ ")"
  26 | 
  27 | ||| Result of applying a parser
  28 | public export
  29 | data Result a = Fail Int String | OK a State
  30 | 
  31 | Functor Result where
  32 |   map f (Fail i err) = Fail i err
  33 |   map f (OK r s)     = OK (f r) s
  34 | 
  35 | public export
  36 | record ParseT (m : Type -> Type) (a : Type) where
  37 |     constructor P
  38 |     runParser : State -> m (Result a)
  39 | 
  40 | public export
  41 | Parser : Type -> Type
  42 | Parser = ParseT Identity
  43 | 
  44 | public export
  45 | Functor m => Functor (ParseT m) where
  46 |     map f p = P $ \s => map (map f) (p.runParser s)
  47 | 
  48 | public export
  49 | Monad m => Applicative (ParseT m) where
  50 |     pure x = P $ \s => pure $ OK x s
  51 |     f <*> x = P $ \s => case !(f.runParser s) of
  52 |                             OK f' s' => map (map f') (x.runParser s')
  53 |                             Fail i err => pure $ Fail i err
  54 | 
  55 | public export
  56 | Monad m => Alternative (ParseT m) where
  57 |     empty = P $ \s => pure $ Fail s.pos "no alternative left"
  58 |     a <|> b = P $ \s => case !(a.runParser s) of
  59 |                             OK r s' => pure $ OK r s'
  60 |                             Fail _ _ => b.runParser s
  61 | 
  62 | public export
  63 | Monad m => Monad (ParseT m) where
  64 |     m >>= k = P $ \s => case !(m.runParser s) of
  65 |                              OK a s' => (k a).runParser s'
  66 |                              Fail i err => pure $ Fail i err
  67 | 
  68 | public export
  69 | MonadTrans ParseT where
  70 |     lift x = P $ \s => map (flip OK s) x
  71 | 
  72 | ||| Run a parser in a monad
  73 | ||| Returns a tuple of the result and final position on success.
  74 | ||| Returns an error message on failure.
  75 | export
  76 | parseT : Functor m => ParseT m a -> String -> m (Either String (a, Int))
  77 | parseT p str = map (\case
  78 |                        OK r s => Right (r, s.pos)
  79 |                        Fail i err => Left $ let (line, col) = computePos i str in "Parse failed at position \{show line}-\{show col}: \{err}")
  80 |                    (p.runParser (S str 0 (strLength str)))
  81 |   where
  82 |     computePosAcc : Int -> List Char -> (Int, Int) -> (Int, Int)
  83 |     computePosAcc 0 input acc = acc
  84 |     computePosAcc n [] acc = acc
  85 |     computePosAcc n ('\n' :: is) (line, col) = computePosAcc (n - 1) is (line + 1, 0)
  86 |     computePosAcc n (i :: is) (line, col) = computePosAcc (n - 1) is (line, col + 1)
  87 | 
  88 |     -- compute the position as line:col
  89 |     computePos : Int -> String -> (Int, Int)
  90 |     computePos pos input = computePosAcc pos (fastUnpack input) (0,0)
  91 | 
  92 | ||| Run a parser in a pure function
  93 | ||| Returns a tuple of the result and final position on success.
  94 | ||| Returns an error message on failure.
  95 | export
  96 | parse : Parser a -> String -> Either String (a, Int)
  97 | parse p str = runIdentity $ parseT p str
  98 | 
  99 | ||| Combinator that replaces the error message on failure.
 100 | ||| This allows combinators to output relevant errors
 101 | export
 102 | (<?>) : Functor m => ParseT m a -> String -> ParseT m a
 103 | (<?>) p msg = P $ \s => map (\case
 104 |                                 OK r s' => OK r s'
 105 |                                 Fail i _ => Fail i msg)
 106 |                             (p.runParser s)
 107 | 
 108 | export infixl 0 <?>
 109 | 
 110 | ||| Discards the result of a parser
 111 | export
 112 | skip : Functor m => ParseT m a -> ParseT m ()
 113 | skip = ignore
 114 | 
 115 | ||| Maps the result of the parser `p` or returns `def` if it fails.
 116 | export
 117 | optionMap : Functor m => b -> (a -> b) -> ParseT m a -> ParseT m b
 118 | optionMap def f p = P $ \s => map (\case
 119 |                                      OK r s'  => OK (f r) s'
 120 |                                      Fail _ _ => OK def s)
 121 |                                   (p.runParser s)
 122 | 
 123 | ||| Runs the result of the parser `p` or returns `def` if it fails.
 124 | export
 125 | option : Functor m => a -> ParseT m a -> ParseT m a
 126 | option def = optionMap def id
 127 | 
 128 | ||| Returns a Bool indicating whether `p` succeeded
 129 | export
 130 | succeeds : Functor m => ParseT m a -> ParseT m Bool
 131 | succeeds = optionMap False (const True)
 132 | 
 133 | ||| Returns a Maybe that contains the result of `p` if it succeeds or `Nothing` if it fails.
 134 | export
 135 | optional : Functor m => ParseT m a -> ParseT m (Maybe a)
 136 | optional = optionMap Nothing Just
 137 | 
 138 | ||| Succeeds if and only if the argument parser fails.
 139 | |||
 140 | ||| In Parsec, this combinator is called `notFollowedBy`.
 141 | export
 142 | requireFailure : Functor m => ParseT m a -> ParseT m ()
 143 | requireFailure (P runParser) = P $ \s => reverseResult s <$> runParser s
 144 | where
 145 |   reverseResult : State -> Result a -> Result ()
 146 |   reverseResult s (Fail _ _) = OK () s
 147 |   reverseResult s (OK _ _)   = Fail (pos s) "Purposefully changed OK to Fail"
 148 | 
 149 | ||| Fail with some error message
 150 | export
 151 | fail : Applicative m => String -> ParseT m a
 152 | fail x = P $ \s => pure $ Fail s.pos x
 153 | 
 154 | ||| Succeeds if the next char satisfies the predicate `f`
 155 | export
 156 | satisfy : Applicative m => (Char -> Bool) -> ParseT m Char
 157 | satisfy f = P $ \s => pure $ if s.pos < s.maxPos
 158 |                                   then let ch = assert_total $ strIndex s.input s.pos in
 159 |                                        if f ch
 160 |                                            then OK ch (S s.input (s.pos + 1) s.maxPos)
 161 |                                            else Fail s.pos "could not satisfy predicate"
 162 |                                   else Fail s.pos "could not satisfy predicate"
 163 | 
 164 | ||| Succeeds if the string `str` follows.
 165 | export
 166 | string : Applicative m => String -> ParseT m String
 167 | string str = P $ \s => pure $ let len = strLength str in
 168 |                               if s.pos+len <= s.maxPos
 169 |                                   then let head = strSubstr s.pos len s.input in
 170 |                                        if head == str
 171 |                                          then OK str (S s.input (s.pos + len) s.maxPos)
 172 |                                          else Fail s.pos ("string " ++ show str)
 173 |                                   else Fail s.pos ("string " ++ show str)
 174 | 
 175 | ||| Succeeds if the end of the string is reached.
 176 | export
 177 | eos : Applicative m => ParseT m ()
 178 | eos = P $ \s => pure $ if s.pos == s.maxPos
 179 |                            then OK () s
 180 |                            else Fail s.pos "expected the end of the string"
 181 | 
 182 | ||| Succeeds if the next char is `c`
 183 | export
 184 | char : Applicative m => Char -> ParseT m Char
 185 | char c = satisfy (== c) <?> "expected \{show c}"
 186 | 
 187 | ||| Parses a space character
 188 | export
 189 | space : Applicative m => ParseT m Char
 190 | space = satisfy isSpace <?> "expected space"
 191 | 
 192 | ||| Parses a letter or digit (a character between \'0\' and \'9\').
 193 | ||| Returns the parsed character.
 194 | export
 195 | alphaNum : Applicative m => ParseT m Char
 196 | alphaNum = satisfy isAlphaNum <?> "expected letter or digit"
 197 | 
 198 | ||| Parses a letter (an upper case or lower case character). Returns the
 199 | ||| parsed character.
 200 | export
 201 | letter : Applicative m => ParseT m Char
 202 | letter = satisfy isAlpha <?> "expected letter"
 203 | 
 204 | mutual
 205 |     ||| Succeeds if `p` succeeds, will continue to match `p` until it fails
 206 |     ||| and accumulate the results in a list
 207 |     export
 208 |     covering
 209 |     some : Monad m => ParseT m a -> ParseT m (List a)
 210 |     some p = do r <- p
 211 |                 rs <- many p
 212 |                 pure $ r :: rs
 213 | 
 214 |     ||| Always succeeds, will accumulate the results of `p` in a list until it fails.
 215 |     export
 216 |     covering
 217 |     many : Monad m => ParseT m a -> ParseT m (List a)
 218 |     many p = some p <|> pure []
 219 | 
 220 | ||| Parse left-nested lists of the form `((init op arg) op arg) op arg`
 221 | export
 222 | covering
 223 | hchainl : Monad m => ParseT m init -> ParseT m (init -> arg -> init) -> ParseT m arg -> ParseT m init
 224 | hchainl pini pop parg = pini >>= go
 225 |   where
 226 |   covering
 227 |   go : init -> ParseT m init
 228 |   go x = (do op <- pop
 229 |              arg <- parg
 230 |              go $ op x arg) <|> pure x
 231 | 
 232 | ||| Parse right-nested lists of the form `arg op (arg op (arg op end))`
 233 | export
 234 | covering
 235 | hchainr : Monad m => ParseT m arg -> ParseT m (arg -> end -> end) -> ParseT m end -> ParseT m end
 236 | hchainr parg pop pend = go id <*> pend
 237 |   where
 238 |   covering
 239 |   go : (end -> end) -> ParseT m (end -> end)
 240 |   go f = (do arg <- parg
 241 |              op <- pop
 242 |              go $ f . op arg) <|> pure f
 243 | 
 244 | ||| Always succeeds, applies the predicate `f` on chars until it fails and creates a string
 245 | ||| from the results.
 246 | export
 247 | covering
 248 | takeWhile : Monad m => (Char -> Bool) -> ParseT m String
 249 | takeWhile f = pack <$> many (satisfy f)
 250 | 
 251 | ||| Similar to `takeWhile` but fails if the resulting string is empty.
 252 | export
 253 | covering
 254 | takeWhile1 : Monad m => (Char -> Bool) -> ParseT m String
 255 | takeWhile1 f = pack <$> some (satisfy f)
 256 | 
 257 | ||| Takes from the input until the `stop` string is found.
 258 | ||| Fails if the `stop` string cannot be found.
 259 | export
 260 | covering
 261 | takeUntil : Monad m => (stop : String) -> ParseT m String
 262 | takeUntil stop = do
 263 |     let StrCons s top = strM stop
 264 |       | StrNil => pure ""
 265 |     takeUntil' s top [<]
 266 |   where
 267 |     takeUntil' : Monad m' => (s : Char) -> (top : String) -> (acc : SnocList String) -> ParseT m' String
 268 |     takeUntil' s top acc = do
 269 |         init <- takeWhile (/= s)
 270 |         skip $ char s <?> "end of string reached - \{show stop} not found"
 271 |         case !(succeeds $ string top) of
 272 |              False => takeUntil' s top $ acc :< (init +> s)
 273 |              True => pure $ concat $ acc :< init
 274 | 
 275 | ||| Parses zero or more space characters
 276 | export
 277 | covering
 278 | spaces : Monad m => ParseT m ()
 279 | spaces = skip (many space)
 280 | 
 281 | ||| Parses one or more space characters
 282 | export
 283 | covering
 284 | spaces1 : Monad m => ParseT m ()
 285 | spaces1 = skip (some space) <?> "whitespaces"
 286 | 
 287 | 
 288 | ||| Run the parser p between the opening parser `o` and the closing parser `c`,
 289 | ||| returning the result of p.
 290 | export
 291 | between : Monad m => ParseT m o -> ParseT m c -> ParseT m p -> ParseT m p
 292 | between o c p = id <$ o <*> p <* c
 293 | 
 294 | ||| Discards brackets around a matching parser
 295 | export
 296 | parens : Monad m => ParseT m a -> ParseT m a
 297 | parens p = between (char '(') (char ')') p
 298 | 
 299 | ||| Discards whitespace after a matching parser
 300 | export
 301 | covering
 302 | lexeme : Monad m => ParseT m a -> ParseT m a
 303 | lexeme p = p <* spaces
 304 | 
 305 | ||| Matches a specific string, then skips following whitespace
 306 | export
 307 | covering
 308 | token : Monad m => String -> ParseT m ()
 309 | token s = lexeme (skip $ string s) <?> "expected token " ++ show s
 310 | 
 311 | ||| Matches a single digit
 312 | export
 313 | digit : Monad m => ParseT m (Fin 10)
 314 | digit = do x <- satisfy isDigit
 315 |            case lookup x digits of
 316 |                 Nothing => fail "not a digit"
 317 |                 Just y => pure y
 318 |   where
 319 |     digits : List (Char, Fin 10)
 320 |     digits = [ ('0', 0)
 321 |              , ('1', 1)
 322 |              , ('2', 2)
 323 |              , ('3', 3)
 324 |              , ('4', 4)
 325 |              , ('5', 5)
 326 |              , ('6', 6)
 327 |              , ('7', 7)
 328 |              , ('8', 8)
 329 |              , ('9', 9)
 330 |              ]
 331 | 
 332 | fromDigits : Num a => (Fin 10 -> a) -> List (Fin 10) -> a
 333 | fromDigits f xs = foldl addDigit 0 xs
 334 | where
 335 |   addDigit : a -> Fin 10 -> a
 336 |   addDigit num d = 10*num + f d
 337 | 
 338 | intFromDigits : List (Fin 10) -> Integer
 339 | intFromDigits = fromDigits finToInteger
 340 | 
 341 | natFromDigits : List (Fin 10) -> Nat
 342 | natFromDigits = fromDigits finToNat
 343 | 
 344 | ||| Matches a natural number
 345 | export
 346 | covering
 347 | natural : Monad m => ParseT m Nat
 348 | natural = natFromDigits <$> some digit
 349 | 
 350 | ||| Matches an integer, eg. "12", "-4"
 351 | export
 352 | covering
 353 | integer : Monad m => ParseT m Integer
 354 | integer = do minus <- succeeds (char '-')
 355 |              x <- some digit
 356 |              pure $ if minus then (intFromDigits x)*(-1) else intFromDigits x
 357 | 
 358 | 
 359 | ||| Parse repeated instances of at least one `p`, separated by `s`,
 360 | ||| returning a list of successes.
 361 | |||
 362 | ||| @ p the parser for items
 363 | ||| @ s the parser for separators
 364 | export
 365 | covering
 366 | sepBy1 : Monad m => (p : ParseT m a)
 367 |                  -> (s : ParseT m b)
 368 |                  -> ParseT m (List1 a)
 369 | sepBy1 p s = [| p ::: many (s *> p) |]
 370 | 
 371 | ||| Parse zero or more `p`s, separated by `s`s, returning a list of
 372 | ||| successes.
 373 | |||
 374 | ||| @ p the parser for items
 375 | ||| @ s the parser for separators
 376 | export
 377 | covering
 378 | sepBy : Monad m => (p : ParseT m a)
 379 |                 -> (s : ParseT m b)
 380 |                 -> ParseT m (List a)
 381 | sepBy p s = optionMap [] forget (p `sepBy1` s)
 382 | 
 383 | ||| Parses /one/ or more occurrences of `p` separated by `comma`.
 384 | export
 385 | covering
 386 | commaSep1 : Monad m => ParseT m a -> ParseT m (List1 a)
 387 | commaSep1 p = p `sepBy1` (char ',')
 388 | 
 389 | ||| Parses /zero/ or more occurrences of `p` separated by `comma`.
 390 | export
 391 | covering
 392 | commaSep : Monad m => ParseT m a -> ParseT m (List a)
 393 | commaSep p = p `sepBy` (char ',')
 394 | 
 395 | ||| Parses alternating occurrences of `a`s and `b`s.
 396 | ||| Can be thought of as parsing:
 397 | ||| - a list of `b`s, separated, and surrounded, by `a`s
 398 | ||| - a non-empty list of `a`s, separated by `b`s
 399 | ||| where we care about the separators
 400 | export
 401 | covering
 402 | alternating : Monad m
 403 |            => ParseT m a
 404 |            -> ParseT m b
 405 |            -> ParseT m (Odd a b)
 406 | alternating x y = do vx <- x
 407 |                      (vx ::) <$> [| y :: alternating x y |] <|> pure [vx]
 408 | 
 409 | ||| Run the specified parser precisely `n` times, returning a vector
 410 | ||| of successes.
 411 | export
 412 | ntimes : Monad m => (n : Nat) -> ParseT m a -> ParseT m (Vect n a)
 413 | ntimes    Z  p = pure Vect.Nil
 414 | ntimes (S n) p = [| p :: (ntimes n p) |]
 415 | 
 416 |