Monadic parser combinators… in F#
Posted by Brian on February 16, 2008
I’ll soon be starting a new job where it will be important to know F#. I have read a bit about the language so that I feel pretty comfortable reading F# code, but up until now I had written very little of it. So I decided I should write something non-trivial to practice my skills, and of course I settled on my old favorite: monadic parser combinators.
Back in part four, I had written enough code to do "parsers with decent error-handling" in C#. The code today aims to do the same thing in F#. I am leveraging the nice F# "computation expression" syntax for monadic comprehensions. This is my first shot at writing F# code, so it may be ugly and non-idiomatic, but hopefully given my prior discussion, the code will be reasonably straightforward for the diligent reader to follow. This is just me coding for fun and learning; I expect there are better F# parsing libraries out there already. (I note that the code below is only about 180 lines, and a lot of it is whitespace; the equivalent C# code is about 270 lines.) I present the code below without further commentary.
As a final aside, at this point, I have now implemented a monadic parser combinator library from scratch in three different languages (C++, C#, and F#), and I would bet even money that there’s no one else on the planet who has implemented parser combinators in at least three different languages – with none of those languages being Haskell. :) (If there is such a person, I bet he and I would make fast friends!)
Anyway, here is some F# code. Enjoy!
open System type Position = int type Input = char list * Position let pos i = snd i type Error = { Position : int; Expectations : string list; Message : string } let mkError pos = { Position = pos; Expectations = []; Message = "" } let mergeError e1 e2 = { Position = e2.Position; Expectations = e1.Expectations @ e2.Expectations; Message = e2.Message } let setExpectation s e = { e with Expectations = [s] } type ParseResult<'a> = | Succ of 'a * Input * Error | Fail of Error let succeeded res = match res with | Succ _ -> true | Fail _ -> false let err res = match res with | Succ(_,_,e) -> e | Fail e -> e let mergeErrorRes res1 res2 = match res2 with | Succ (x2,i2,e2) -> Succ(x2,i2,mergeError (err res1) e2) | Fail e2 -> Fail (mergeError (err res1) e2) let setExpectationRes s res = match res with | Succ(x,i,e) -> Succ(x,i,setExpectation s e) | Fail e -> Fail (setExpectation s e) type Consumed<'a> = Consumed of bool * ParseResult<'a> let tagConsumed s (Consumed(b,res) as c)= if b then c else Consumed(b,setExpectationRes s res) type P<'a> = Input -> Consumed<'a> let fail s = fun (i,p) -> Consumed(false,Fail { Position = p; Expectations = []; Message = s }) type ParseMonad() = class member p.ReturnFrom x = x member p.Return x = fun i -> Consumed(false,Succ(x,i,mkError (pos i))) member p.Bind(m,f) = fun i -> match m i with | Consumed(b,res) -> match res with | Succ(x,i2,e) -> let (Consumed(b2,res2)) = f x i2 Consumed(b || b2, if b2 then res2 else mergeErrorRes res res2) | Fail e -> Consumed(b,Fail e) end let (<|>) p1 p2 = fun i -> let Consumed(b,res) as c = p1 i if b then c else match res with | Succ _ -> c | Fail e -> let Consumed(b2,res2) as c2 = p2 i if b2 then c2 else Consumed(b2,mergeErrorRes res res2) let attempt p = fun i -> let Consumed(b,res) as c = p i if b && not (succeeded res) then Consumed(false,res) else c let tag s p = fun i -> p i |> tagConsumed s let sat pred = fun (i,pos) -> match i with | [] -> Consumed(false,Fail { Position = pos; Expectations = []; Message = "unexpected end of input" }) | c :: cs -> if pred c then Consumed(true, Succ(c, (cs,pos+1), mkError pos)) else Consumed(false, Fail { Position = pos; Expectations = []; Message = sprintf "unexpected character '%c'" c }) let item : P<char> = sat (fun _ -> true) |> tag "any character" let parse = ParseMonad() let letter : P<char> = sat Char.IsLetter |> tag "letter" let digit : P<char> = sat Char.IsDigit |> tag "digit" let test p (s : string) = let i = s.ToCharArray() |> List.ofArray let ans = p (i,0) printfn "%A" ans test letter "abc" test letter "123" test (letter <|> digit) "123" test (letter <|> digit) ";23" let literal (s : string) r = let rec help l = match l with | [] -> parse { return r } | c :: cs -> parse { let! _ = sat (fun x -> x = c) return! help cs } s.ToCharArray() |> List.ofArray |> help test (literal "ab" 0) "abc" test (literal "12" 0) "abc" test ((attempt (literal "abc" 1)) <|> (literal "aba" 2)) "aba" test (((literal "abc" 1)) <|> (literal "aba" 2)) "aba" let rec many p = (many1 p) <|> parse { return [] } and many1 p = parse { let! x = p let! xs = many p return x :: xs } test (many letter) "hello world" let chainl1 p op = let rec help x = parse { let! f = op let! y = p return! help (f x y) } <|> parse { return x } parse { let! x = p return! help x } let digitVal = parse { let! c = digit return int c - int '0' } let nat = chainl1 digitVal (parse { return fun x y -> 10*x + y }) |> tag "natural number" test nat "123!" test nat "hello" let rec chainr1 p op = parse { let! x = p return! parse { let! f = op let! y = chainr1 p op return f x y } <|> parse { return x } } let notFollowedBy p1 p2 (label : string) = parse { let! r = p1 return! (attempt ((parse { let! x = p2 return! fail (String.Concat("unexpected ", label)) }) <|> parse { let! _ = parse { return () } return r })) } let endOfInput = notFollowedBy (parse { return () }) item "character" |> tag "end of input" test endOfInput "" test endOfInput "z" let addOp = literal "+" (fun x y -> x + y) <|> literal "-" (fun x y -> x - y) |> tag "add/sub op" let mulOp = literal "*" (fun x y -> x * y) <|> literal "/" (fun x y -> x / y) |> tag "mul/div op" let expOp = literal "^" (fun (x:int) (y:int) -> Math.Pow(float x,float y) |> int) |> tag "exp op" let rec expr = chainl1 term addOp and term = chainl1 factor mulOp and factor = chainr1 part expOp and part = nat <|> paren and paren = parse { let! _ = literal "(" 0 let! e = expr let! _ = literal ")" 0 return e } test expr "(611-2^3^2+1)/10-5*2!" test expr "(611-2^-3^2+1)/10-5*2!" test expr "(611-2^3^2.2+1)/10-5*2!" let fullExpr = parse { let! e = expr let! _ = endOfInput return e } test fullExpr "(611-2^3^2+1)/10-5*2" test fullExpr "(611-2^3^2+1)/10-5*2!"