1

So, I have a basic parser for my language, and according to Is possible to parse "off-side" (indentation-based) languages with fparsec?

I want to incorporate indentation-based syntax for it, like in python.

However, I'm struggling to see how incorporate the indentationParser with mine, they collide with type mismatch.

In my language, anything after a "do" is new scope, and requiere identation:

type ExprC =
  | BoolC of bool
  | DecC of decimal
  | MathC of MathOp * array<ExprC>
  | VarC of string
  | BlockC of BlockCodeC
  | IfC of LogiOpC * BlockCodeC * BlockCodeC
  | LoopC of ExprC * BlockCodeC

module Parser2

open System
open System.Collections.Generic

open FParsec
open TablaM

let tabStopDistance = 8 // must be a power of 2

module IndentationParser =
    type LastParsedIndentation() =
        [<DefaultValue>]
        val mutable Value: int32
        [<DefaultValue>]
        val mutable EndIndex: int64

    type UserState = 
        {Indentation: int
         // We put LastParsedIndentation into the UserState so that we 
         // can conveniently use a separate instance for each stream.
         // The members of the LastParsedIndentation instance will be mutated
         // directly and hence won't be affected by any stream backtracking. 
         LastParsedIndentation: LastParsedIndentation}
        with
           static member Create() = {Indentation = -1
                                     LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}

    type CharStream = CharStream<UserState>
    type Parser<'t> = Parser<'t, UserState>

    // If this function is called at the same index in the stream
    // where the function previously stopped, then the previously
    // returned indentation will be returned again. 
    // This way we can avoid backtracking at the end of indented blocks.
    let skipIndentation (stream: CharStream) =    
        let lastParsedIndentation = stream.UserState.LastParsedIndentation
        if lastParsedIndentation.EndIndex = stream.Index then
            lastParsedIndentation.Value
        else
            let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
            while stream.Peek() = '#' do
                stream.SkipRestOfLine(false) // skip comment
                indentation <- stream.SkipNewlineThenWhitespace(tabStopDistance, false)
            lastParsedIndentation.EndIndex <- stream.Index
            lastParsedIndentation.Value <- indentation
            indentation

    let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
        fun stream ->
            let oldIndentation = stream.UserState.Indentation
            let indentation = skipIndentation stream
            if indentation <= oldIndentation then 
                Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
            else
                stream.UserState <- {stream.UserState with Indentation = indentation}            
                let results = ResizeArray()
                let mutable stateTag = stream.StateTag
                let mutable reply = p stream // parse the first element
                let mutable newIndentation = 0
                while reply.Status = Ok 
                      && (results.Add(reply.Result)
                          newIndentation <- skipIndentation stream
                          newIndentation = indentation)
                   do
                     stateTag <- stream.StateTag
                     reply <- p stream
                if reply.Status = Ok 
                   || (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag) 
                then
                    if newIndentation < indentation || stream.IsEndOfStream then
                        stream.UserState <- {stream.UserState with Indentation = oldIndentation}
                        Reply(List.ofSeq results)
                    else
                        Reply(Error, messageError "wrong indentation")
                else // p failed
                    Reply(reply.Status, reply.Error) 


open IndentationParser

let reserved = ["for";"do"; "while";"if";"case";"type"]

let DecimalParser : Parser<_, unit> =
    // note: doesn't parse a float exponent suffix
    numberLiteral NumberLiteralOptions.AllowFraction "number" 
    |>> fun num -> 
            decimal num.String

let isBlank = fun c -> c = ' ' || c = '\t'

let ws = skipMany1SatisfyL isBlank "whitespace"
let str_ws s = pstring s .>> ws
let comment = pstring "#" >>. skipRestOfLine false
let wsBeforeEOL = skipManySatisfy isBlank >>. optional comment

let pidentifierraw =
    let isIdentifierFirstChar c = isLetter c || c = '_'
    let isIdentifierChar c = isLetter c || isDigit c || c = '_'
    many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier"
let pidentifier =
    pidentifierraw 
    >>= fun s -> 
        if reserved |> List.exists ((=) s) then fail "keyword" 
        else preturn s

let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
let pvar = pidentifier |>> VarC

let booleans = choice[stringReturn "true" true <|> stringReturn "false" false] .>> spaces |>> BoolC
let decimals = DecimalParser |>> DecC

let pliteral = decimals <|> booleans //<|> pstringliteral
let expression1 = spaces >>? choice[pliteral;pvar] 

let between a b p = pstring a >>. p .>> pstring b

let expr, exprImpl = createParserForwardedToRef()

let parens = expr |> between "(" ")"

let lhExpression = choice[pliteral; parens; pvar]

do exprImpl := spaces >>. choice[attempt parens; 
                                   expression1]

(* Rules of associations *)
type Assoc = Associativity

let opp = new OperatorPrecedenceParser<ExprC,unit,unit>()
let parithmetic = opp.ExpressionParser

let terma = (lhExpression .>> spaces) <|> parithmetic .>> parens

opp.TermParser <- terma
let mathOps = [
    "+", Add, 1;
    "-", Sub, 1;
    "*", Mul, 2;
    "/", Div, 2
]

for str:String, op:MathOp, precedence:int in mathOps do
    opp.AddOperator(InfixOperator(str, spaces, precedence, Assoc.Left, fun x y -> MathC(op, [| x; y |])))


let indentedStatements, indentedStatementsRef = createParserForwardedToRef()

let doBlock = keyword "do" >>. (pipe2 (ws .>> wsBeforeEOL) 
                                        indentedStatements
                               (fun a stmts -> 
                                   let lines = stmts |> List.toArray
                                   BlockC(lines)))

let ifBlock = keyword "if" >>. doBlock
let forBlock = keyword "for" >>. doBlock
let funBlock = keyword "fun" >>. doBlock

let statement = ifBlock <|> forBlock <|> funBlock <|> lhExpression <|> parithmetic

do indentedStatementsRef := indentedMany1 statement "statement"

let document = indentedStatements .>> spaces .>> eof

let parse str =
    runParserOnString document (UserState.Create()) "" str

The error is in the line:

let statement = ifBlock <|> lhExpression <|> parithmetic

Error FS0001: Type mismatch. Expecting a    Parser<ExprC list,unit>    but given a    Parser<'a list>    The type 'unit' does not match the type 'UserState' (FS0001) (TablaM)
mamcx
  • 15,916
  • 26
  • 101
  • 189
  • I guess, it's because of the *stateless* `DecimalParser`, hence the types of `pliteral` and `lhExpression` automatically generalized as `Parser<_, unit>`. – Be Brave Be Like Ukraine Aug 18 '15 at 04:06
  • Ok, what is driving me nuts is why pipe2 make it work but not for literals. Also, if is possible to do the indentation other way that fit the regular approach of FParsec... – mamcx Aug 18 '15 at 16:22

0 Answers0