0

im building a parser with Sedlex and Menhir, where i have a function definition as:



(* Lexer *)

let non_ascii = [%sedlex.regexp? '\160' .. '\255'];

let escape = [%sedlex.regexp?
  unicode | ('\\', Compl('\r' | '\n' | '\012' | hex_digit))
];

let ident_start = [%sedlex.regexp?
  '_' | 'a' .. 'z' | 'A' .. 'Z' | '$' | non_ascii | escape
];

let ident_char = [%sedlex.regexp?
  '_' | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | non_ascii | escape];

let rec get_next_token = buf => {
  switch%sedlex (buf) {
  | white_space => get_next_token(buf)
  | eof => EOF
  | ';' => SEMI_COLON
  | '}' => RIGHT_BRACE
  | '{' => LEFT_BRACE
  | ':' => COLON
  | '(' => LEFT_PAREN
  | ')' => RIGHT_PAREN
  | '[' => LEFT_BRACKET
  | ']' => RIGHT_BRACKET
  | '%' => PERCENTAGE
  | '&' => AMPERSAND
  | ident => IDENT(Sedlexing.latin1(buf))
  | number => get_dimension(Sedlexing.latin1(buf), buf)
  | _ => assert(false)
  };
}

let parse = (buf, parser) => {
  let last_token = ref((Parser.EOF, Lexing.dummy_pos, Lexing.dummy_pos));
  let next_token = () => {
    last_token := get_next_token_with_location(buf);
    last_token^;
  };

  try(MenhirLib.Convert.Simplified.traditional2revised(parser, next_token)) {
  | LexingError(_) as e => raise(e)
  | _ => raise(ParseError(last_token^))
  };
};



(* Parser *)

%token <string> IDENT
%token LEFT_PAREN
%token RIGHT_PAREN

function_expr:
  | i = IDENT; LEFT_PAREN; xs = list(exprs); RIGHT_PAREN {
    Texp_function (
      (i, Lex_buffer.make_loc $startpos(i) $endpos(i)),
      (xs, Lex_buffer.make_loc $startpos(xs) $endpos(xs))
    )
  }

and i have a simple ident definition that is:


| i = IDENT {Texp_ident i, Lex_buffer.make_loc $startpos(i) $endpos(i) } 

functions cant have a space between the ident and the LEFT_PAREN, how can i define it?

i want that and func(1, 2, 3) produces a list of expressions as [Texp_ident "and"; Texp_function("func", [...])], but it is actually producing: [Texp_function("and", ["func"; ...])]. since it doesnt care about the space between ident and LEFT_PAREN. how can i fix that?

udduu
  • 69
  • 4
  • 1
    How did you define `IDENT`? If it considered `and func` as a function you would have `Texp_function "and func"` so the problem doesn't come from here. Your example is not complete and won't allow us to help you – Lhooq May 25 '22 at 08:49
  • Hi, i added the definitions in the post, do you need any more info? but afaict, `and func` is not a valid ident, since the token after `and` is a whitespace. – udduu May 25 '22 at 17:06
  • I don't see the lexing rule for `IDENT`, can you show it? – Lhooq May 26 '22 at 19:53
  • You showed the parsing use of `IDENT` but not its lexing definition. You should have something in your lexer.mll file that produces an `IDENT` containing a string, you still didn't show it – Lhooq May 27 '22 at 08:45
  • Hi, i added what you ask, it is using sedlex instead of ocamllex and its also using reason syntax. basically what im search for is a function definition like in javascript, where the parenthesis must be together with the ident. – udduu May 27 '22 at 18:21

0 Answers0