Lexer (lexer.pl
):
:- module(lexer, []).
:- use_module(library(debug)).
:- use_module(library(dcgs)).
:- use_module(library(lists), [append/2, append/3, member/2]).
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Tokens.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
term(Ts) --> tokens(Ts).
% read_term(Ts) --> term(Ts0), end, !, { append(Ts0, [end], Ts) }.
read_term_([end]) --> end, !.
read_term_([T|Ts]) --> token(T), read_term_(Ts).
% Greedy.
tokens([T|Ts]) --> token(T), tokens(Ts).
tokens([]) --> [].
token(name(Cs)) --> name(Cs), !.
token(variable(Cs)) --> variable(Cs), !.
token(float_number(Cs)) --> float_number(Cs), !. % 3
token(integer(Cs)) --> integer(Cs), !. % 2
token(double_quoted_list(Cs)) --> double_quoted_list(Cs), !.
token(open) --> open, !.
token(open_ct) --> open_ct, !.
token(close) --> close_, !.
token(open_list) --> open_list, !.
token(close_list) --> close_list, !.
token(open_curly) --> open_curly, !.
token(close_curly) --> close_curly, !.
token(ht_sep) --> ht_sep, !.
token(comma) --> comma, !.
name(Cs) --> (layout_text_sequence '|' []), !, name_token(Cs).
variable(Cs) --> (layout_text_sequence '|' []), !, variable_token(Cs).
integer(Cs) --> (layout_text_sequence '|' []), !, integer_token(Cs).
float_number(Cs) --> (layout_text_sequence '|' []), !, float_number_token(Cs).
double_quoted_list(Cs) -->
(layout_text_sequence '|' []), !, double_quoted_list_token(Cs).
open --> layout_text_sequence, open_token.
open_ct --> open_token.
close_ --> (layout_text_sequence '|' []), !, close_token.
open_list --> (layout_text_sequence '|' []), !, open_list_token.
close_list --> (layout_text_sequence '|' []), !, close_list_token.
open_curly --> (layout_text_sequence '|' []), !, open_curly_token.
close_curly --> (layout_text_sequence '|' []), !, close_curly_token.
ht_sep --> (layout_text_sequence '|' []), !, head_tail_separator_token.
comma --> (layout_text_sequence '|' []), !, comma_token.
end --> (layout_text_sequence '|' []), !, end_token.
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Layout text.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
layout_text_sequence --> layout_text, layout_texts.
% Greedy.
layout_texts --> layout_text, layout_texts.
layout_texts --> [].
layout_text --> layout_char(_), !.
layout_text --> comment, !.
comment --> single_line_comment, !.
comment --> bracketed_comment, !.
single_line_comment --> end_line_comment_char(_), comment_text, new_line_char(_).
% Greedy. The order is important.
% single_line_comment -->
% end_line_comment_char(_), comment_text, new_line_char(_), !.
% single_line_comment -->
% end_line_comment_char(_), comment_text, [_], !, { false }.
% single_line_comment --> end_line_comment_char(_), comment_text.
bracketed_comment --> comment_open, comment_text, comment_close.
comment_open --> comment_1_char, comment_2_char.
comment_close --> comment_2_char, comment_1_char.
comment_text --> chars(_).
comment_1_char --> "/".
comment_2_char --> "*".
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Names.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
name_token(Cs) --> letter_digit_token(Cs), !.
name_token(Cs) --> graphic_token(Cs), !.
name_token(Cs) --> quoted_token(Cs), !.
name_token(Cs) --> semicolon_token(Cs), !.
name_token(Cs) --> cut_token(Cs), !.
letter_digit_token([C|Cs]) --> small_letter_char(C), alphanumeric_chars(Cs).
graphic_token(_) --> ".", layout_char(_), !, { false }.
graphic_token(_) --> ".", end_line_comment_char(_), !, { false }.
graphic_token([C|Cs]) --> graphic_token_char(C), graphic_token_chars(Cs).
% Greedy.
graphic_token_chars([C|Cs]) --> graphic_token_char(C), graphic_token_chars(Cs).
graphic_token_chars([]) --> [].
graphic_token_char(C) --> graphic_char(C), !.
graphic_token_char(C) --> backslash_char(C), !.
quoted_token(Cs) -->
single_quote_char(_),
single_quoted_items(Cs),
single_quote_char(_).
% Greedy.
single_quoted_items(Cs) -->
single_quoted_item(Cs0),
single_quoted_items(Cs1),
{ append(Cs0, Cs1, Cs) }.
single_quoted_items([]) --> [].
single_quoted_item([C]) --> single_quoted_character(C), !.
single_quoted_item([]) --> continuation_escape_sequence, !.
continuation_escape_sequence --> backslash_char(_), new_line_char(_).
semicolon_token([C]) --> semicolon_char(C).
cut_token([C]) --> cut_char(C).
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Quoted characters.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
single_quoted_character(C) --> non_quote_char(C), !.
single_quoted_character(C) --> single_quote_char(C), single_quote_char(C), !.
single_quoted_character(C) --> double_quote_char(C), !.
single_quoted_character(C) --> back_quote_char(C), !.
double_quoted_character(C) --> non_quote_char(C), !.
double_quoted_character(C) --> single_quote_char(C), !.
double_quoted_character(C) --> double_quote_char(C), double_quote_char(C), !.
double_quoted_character(C) --> back_quote_char(C), !.
back_quoted_character(C) --> non_quote_char(C), !.
back_quoted_character(C) --> single_quote_char(C), !.
back_quoted_character(C) --> double_quote_char(C), !.
back_quoted_character(C) --> back_quote_char(C), back_quote_char(C), !.
non_quote_char(C) --> graphic_char(C), !.
non_quote_char(C) --> alphanumeric_char(C), !.
non_quote_char(C) --> solo_char(C), !.
non_quote_char(C) --> space_char(C), !.
non_quote_char(C) --> meta_escape_sequence(C), !.
non_quote_char(C) --> control_escape_sequence(C), !.
non_quote_char(C) --> octal_escape_sequence(C), !.
non_quote_char(C) --> hexadecimal_escape_sequence(C), !.
meta_escape_sequence(C) --> backslash_char(_), meta_char(C0),
{ member(C0-C, [('\\')-('\\'), ''''-'''', '"'-'"', '`'-'`']), ! }.
control_escape_sequence(C) --> backslash_char(_), symbolic_control_char(C0),
{ member(
C0-C,
[
'a'-'\a',
'b'-'\b',
'r'-'\r',
'f'-'\f',
't'-'\t',
'n'-'\n',
'v'-'\v'
]
), !
}.
symbolic_control_char(C) --> symbolic_alert_char(C), !.
symbolic_control_char(C) --> symbolic_backspace_char(C), !.
symbolic_control_char(C) --> symbolic_carriage_return_char(C), !.
symbolic_control_char(C) --> symbolic_form_feed_char(C), !.
symbolic_control_char(C) --> symbolic_horizontal_tab_char(C), !.
symbolic_control_char(C) --> symbolic_new_line_char(C), !.
symbolic_control_char(C) --> symbolic_vertical_tab_char(C), !.
symbolic_alert_char('a') --> "a".
symbolic_backspace_char('b') --> "b".
symbolic_carriage_return_char('r') --> "r".
symbolic_form_feed_char('f') --> "f".
symbolic_horizontal_tab_char('t') --> "t".
symbolic_new_line_char('n') --> "n".
symbolic_vertical_tab_char('v') --> "v".
octal_escape_sequence(C) -->
backslash_char(_),
octal_digit_char(C0),
octal_digit_chars(Cs),
backslash_char(_),
{ number_chars(N, ['0', 'o', C0|Cs]),
char_code(C, N)
}.
hexadecimal_escape_sequence(C) -->
backslash_char(_),
symbolic_hexadecimal_char(C0),
hexadecimal_digit_char(C1),
hexadecimal_digit_chars(Cs),
backslash_char(_),
{ number_chars(N, ['0', C0, C1|Cs]),
char_code(C, N)
}.
symbolic_hexadecimal_char('x') --> "x".
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Variables.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
variable_token(Cs) --> named_variable(Cs), !. % 1
variable_token(Cs) --> anonymous_variable(Cs), !. % 0
anonymous_variable([C]) --> variable_indicator_char(C).
named_variable([C0, C1|Cs]) -->
variable_indicator_char(C0), !,
alphanumeric_char(C1),
alphanumeric_chars(Cs).
named_variable([C|Cs]) -->
capital_letter_char(C), !,
alphanumeric_chars(Cs).
variable_indicator_char(C) --> underscore_char(C).
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Integer numbers.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
integer_token(Cs) --> character_code_constant(Cs), !.
integer_token(Cs) --> binary_constant(Cs), !.
integer_token(Cs) --> octal_constant(Cs), !.
integer_token(Cs) --> hexadecimal_constant(Cs), !.
integer_token(Cs) --> integer_constant(Cs), !.
integer_constant([C|Cs]) --> decimal_digit_char(C), decimal_digit_chars(Cs).
character_code_constant(['0', C0, C]) -->
"0", single_quote_char(C0), single_quoted_character(C).
binary_constant(Cs) -->
binary_constant_indicator(Cs0),
binary_digit_char(C),
binary_digit_chars(Cs1),
{ append(Cs0, [C|Cs1], Cs) }.
binary_constant_indicator("0b") --> "0b".
octal_constant(Cs) -->
octal_constant_indicator(Cs0),
octal_digit_char(C),
octal_digit_chars(Cs1),
{ append(Cs0, [C|Cs1], Cs) }.
octal_constant_indicator("0o") --> "0o".
hexadecimal_constant(Cs) -->
hexadecimal_constant_indicator(Cs0),
hexadecimal_digit_char(C),
hexadecimal_digit_chars(Cs1),
{ append(Cs0, [C|Cs1], Cs) }.
hexadecimal_constant_indicator("0x") --> "0x".
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Floating point numbers.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
float_number_token(Cs) -->
integer_constant(Cs0),
fraction(Cs1),
exponent(Cs2),
{ append([Cs0, Cs1, Cs2], Cs) }.
fraction([C0, C1|Cs]) -->
decimal_point_char(C0),
decimal_digit_char(C1),
decimal_digit_chars(Cs).
% Greedy.
exponent([C|Cs]) --> exponent_char(C), sign(Cs0), integer_constant(Cs1), !,
{ append(Cs0, Cs1, Cs) }.
exponent([]) --> [].
% Greedy.
sign([C]) --> negative_sign_char(C), !.
sign([C]) --> positive_sign_char(C), !.
sign([]) --> [].
positive_sign_char('+') --> "+".
negative_sign_char('-') --> "-".
decimal_point_char('.') --> ".".
exponent_char(C) --> [C], { member(C, "eE"), ! }.
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Double quoted lists.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
double_quoted_string(Cs) -->
double_quote_char(_),
double_quoted_item(Cs0),
double_quoted_char(_),
append(["""", Cs0, """"], Cs).
% Greedy.
double_quoted_items(Cs) -->
double_quoted_item(Cs0), double_quoted_items(Cs1),
{ append(Cs0, Cs1, Cs) }.
double_quoted_items([]) --> [].
double_quoted_item([C]) --> double_quoted_character(C), !.
double_quoted_item([]) --> continuation_escape_sequence, !.
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Double quoted lists.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
double_quoted_list_token(Cs) -->
double_quote_char(C),
double_quoted_items(Cs),
double_quote_char(C).
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Back quoted strings.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
back_quoted_string -->
back_quote_char,
back_quoted_items,
back_quoted_char.
% Greedy.
back_quoted_items --> back_quoted_item, back_quoted_items.
back_quoted_items --> [].
back_quoted_item --> back_quoted_character, !.
back_quoted_item --> continuation_escape_sequence, !.
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Other tokens.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
open_token --> open_char(_).
close_token --> close_char(_).
open_list_token --> open_list_char(_).
close_list_token --> close_list_char(_).
open_curly_token --> open_curly_char(_).
close_curly_token --> close_curly_char(_).
head_tail_separator_token --> head_tail_separator_char(_).
comma_token --> comma_char(_).
% The order is important.
% Greedy. TODO: Find better.
end_token, [C] --> end_char, layout_char(C), !.
end_token, "%" --> end_char, end_line_comment_char(_), !.
end_token --> end_char, [_], !, { false }.
end_token --> end_char.
end_char --> ".".
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Processor character set.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
% Not greedy.
chars([]) --> [].
chars([C|Cs]) --> char(C), chars(Cs).
char(C) --> graphic_char(C), !.
char(C) --> alphanumeric_char(C), !.
char(C) --> solo_char(C), !.
char(C) --> layout_char(C), !.
char(C) --> meta_char(C), !.
char(C) --> [C], { write('Accepting: \''), write(C), write(''''), nl }.
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Graphic characters.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
graphic_char(C) --> [C], { member(C, "#$&*+-./:<=>?@^~"), ! }.
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Alphanumeric characters.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
% Greedy.
alphanumeric_chars([C|Cs]) -->
alphanumeric_char(C),
alphanumeric_chars(Cs).
alphanumeric_chars([]) --> [].
alphanumeric_char(C) --> alpha_char(C), !.
alphanumeric_char(C) --> decimal_digit_char(C), !.
alpha_char(C) --> underscore_char(C), !.
alpha_char(C) --> letter_char(C), !.
letter_char(C) --> capital_letter_char(C), !.
letter_char(C) --> small_letter_char(C), !.
% Greedy.
decimal_digit_chars([C|Cs]) --> decimal_digit_char(C), decimal_digit_chars(Cs).
decimal_digit_chars([]) --> [].
% Greedy.
binary_digit_chars([C|Cs]) --> binary_digit_char(C), binary_digit_chars(Cs).
binary_digit_chars([]) --> [].
% Greedy.
octal_digit_chars([C|Cs]) --> octal_digit_char(C), octal_digit_chars(Cs).
octal_digit_chars([]) --> [].
% Greedy.
hexadecimal_digit_chars([C|Cs]) -->
hexadecimal_digit_char(C), hexadecimal_digit_chars(Cs).
hexadecimal_digit_chars([]) --> [].
small_letter_char(C) --> [C], { member(C, "abcdefghijklmnopqrstuvwxyz"), ! }.
capital_letter_char(C) --> [C], { member(C, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"), ! }.
decimal_digit_char(C) --> [C], { member(C, "0123456789"), ! }.
binary_digit_char(C) --> [C], { member(C, "01"), ! }.
octal_digit_char(C) --> [C], { member(C, "01234567"), ! }.
hexadecimal_digit_char(C) --> [C], { member(C, "0123456789AaBbCcDdEeFf"), ! }.
underscore_char('_') --> "_".
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Solo characters.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
solo_char(C) --> cut_char(C), !.
solo_char(C) --> open_char(C), !.
solo_char(C) --> close_char(C), !.
solo_char(C) --> comma_char(C), !.
solo_char(C) --> semicolon_char(C), !.
solo_char(C) --> open_list_char(C), !.
solo_char(C) --> close_list_char(C), !.
solo_char(C) --> open_curly_char(C), !.
solo_char(C) --> close_curly_char(C), !.
solo_char(C) --> head_tail_separator_char(C), !.
solo_char(C) --> end_line_comment_char(C), !.
cut_char('!') --> "!".
open_char('(') --> "(".
close_char(')') --> ")".
comma_char((',')) --> ",".
semicolon_char(';') --> ";".
open_list_char('[') --> "[".
close_list_char(']') --> "]".
open_curly_char('{') --> "{".
close_curly_char('}') --> "}".
head_tail_separator_char('|') --> "|".
end_line_comment_char('%') --> "%".
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Layout characters.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
layout_char(C) --> space_char(C), !.
layout_char(C) --> horizontal_tab_char(C), !.
layout_char(C) --> new_line_char(C), !.
space_char(' ') --> " ".
horizontal_tab_char('\t') --> "\t".
new_line_char('\n') --> "\n".
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Meta characters.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
meta_char(C) --> backslash_char(C), !.
meta_char(C) --> single_quote_char(C), !.
meta_char(C) --> double_quote_char(C), !.
meta_char(C) --> back_quote_char(C), !.
backslash_char('\\') --> "\\".
single_quote_char('''') --> "'".
double_quote_char('"') --> """".
back_quote_char('`') --> "`".
Parser (parser.pl
):
:- module(parser, []).
:- use_module(library(debug)).
:- use_module(library(dcgs)).
:- use_module(library(error)).
:- use_module(library(format)).
:- use_module(library(lists)).
state(S), [S] --> [S].
state(S0, S), [S] --> [S0].
print --> state(s(Ts, Ss, CsVs)), { format("~q ~q ~q\n", [Ts, Ss, CsVs]) }.
print(Cs) -->
state(s(Ts, Ss, CsVs)),
{ format("~s ~q ~q ~q\n", [Cs, Ts, Ss, CsVs]) }.
lookahead(T) --> state(s([T|_], _, _)).
shift --> state(s([name(Cs)|Ts], Ss, CsVs), s(Ts, [term(atom,A)|Ss], CsVs)),
{ atom_chars(A, Cs) }.
shift -->
state(
s([open_list,close_list|Ts], Ss, CsVs),
s(Ts, [term(atom,[])|Ss], CsVs)
).
shift -->
state(
s([open_curly,close_curly|Ts], Ss, CsVs),
s(Ts, [term(atom,{})|Ss], CsVs)
).
shift -->
state(s([variable(Cs)|Ts], Ss, CsVs0), s(Ts, [term(variable,V)|Ss], CsVs)),
{ variable(Cs, CsVs0, V, CsVs) }.
shift -->
state(
s([integer(Cs)|Ts], [term(atom,-)|Ss], CsVs),
s(Ts, [term(integer,N)|Ss], CsVs)
), !,
{ number_chars(N, [-|Cs]) }.
shift -->
state(
s([float_number(Cs)|Ts], [term(atom,-)|Ss], CsVs),
s(Ts, [term(float,N)|Ss], CsVs)
), !,
{ number_chars(N, [-|Cs]) }.
shift -->
state(
s([integer(Cs)|Ts], Ss, CsVs),
s(Ts, [term(integer,N)|Ss], CsVs)
),
{ number_chars(N, Cs) }.
shift -->
state(
s([float_number(Cs)|Ts], Ss, CsVs),
s(Ts, [term(float,N)|Ss], CsVs)
),
{ number_chars(N, Cs) }.
shift -->
state(
s([open_ct|Ts], [term(atom,A)|Ss], CsVs),
s(Ts, [compound(A,As,As)|Ss], CsVs)
), !.
shift --> state(s([open|Ts], Ss, CsVs), s(Ts, [open|Ss], CsVs)).
shift --> state(s([open_ct|Ts], Ss, CsVs), s(Ts, [open_ct|Ss], CsVs)).
% /*
shift -->
state(
s([close|Ts], [term(_,T),dot(0,Cs0)|Ss], CsVs),
s(Ts, [term(chars, Cs)|Ss], CsVs)
),
{ append(Cs0, T, Cs) }.
shift -->
state(
s([close|Ts], [term(_,T),dot(N0,Cs0)|Ss], CsVs),
s(Ts, [dot(N,Cs)|Ss], CsVs)
),
{ succ(N, N0), append(Cs0, T, Cs) }.
shift -->
state(
s([close|Ts], [dot(0,Cs)|Ss], CsVs),
s(Ts, [term(chars,Cs)|Ss], CsVs)
).
shift -->
state(
s([close|Ts], [dot(N0,Cs)|Ss], CsVs),
s(Ts, [dot(N,Cs)|Ss], CsVs)
),
{ succ(N, N0) }.
% */
shift -->
state(
s([close|Ts], [term(_,T0),compound(A,As,[T0])|Ss], CsVs),
s(Ts, [term(compound,T)|Ss], CsVs)
),
{ T =.. [A|As] }.
shift -->
state(
s([close|Ts], [term(_,T),open|Ss], CsVs),
s(Ts, [term(compound,T)|Ss], CsVs)
).
shift -->
state(
s([close|Ts], [term(_,T),open_ct|Ss], CsVs),
s(Ts, [term(compound,T)|Ss], CsVs)
).
% /*
shift -->
state(
% s([comma|Ts], [term(atom,A),compound('.',As,As))|Ss], CsVs),
s([comma|Ts], [term(atom,A),compound('.',As,_)|Ss], CsVs),
s(Ts, [dot(0,[A])|Ss], CsVs)
),
% { acyclic_term(As), atom_length(A, 1) },
{ var(As), atom_length(A, 1) },
reduce, !.
shift -->
state(
s([comma|Ts], [term(_,T),dot(0,[A])|Ss], CsVs),
s(Ts, [compound('.',[A,T|As],As)|Ss], CsVs)
).
shift -->
state(
s([comma|Ts], [term(_,T),dot(N0,Cs0)|Ss], CsVs),
s(Ts, [compound('.',[A,T|As],As),dot(N,Cs)|Ss], CsVs)
),
{ succ(N, N0), append(Cs, [A], Cs0) }.
shift -->
state(
s([comma|Ts], [dot(0,[A|Cs])|Ss], CsVs),
s(Ts, [compound('.',[A,Cs|As],As)|Ss], CsVs)
).
shift -->
state(
s([comma|Ts], [dot(N0,Cs0)|Ss], CsVs),
s(Ts, [compound('.',[A,Cs1|As],As),dot(N,Cs)|Ss], CsVs)
),
{ succ(N, N0),
length(Cs, N0),
append(Cs, [A|Cs1], Cs0)
}.
% */
shift -->
state(
s([comma|Ts], [term(_,T),compound(A,As0,[T|As])|Ss], CsVs),
s(Ts, [compound(A,As0,As)|Ss], CsVs)
).
reduce -->
state(
s(Ts, [dot(0,[A]),dot(N0,Cs0)|Ss], CsVs),
s(Ts, [dot(N,Cs)|Ss], CsVs)
),
{ succ(N0, N), append(Cs0, [A], Cs) }, !.
reduce --> [].
read_term_ -->
% print,
lookahead(end), !,
accept.
read_term_ -->
shift, !,
read_term_.
accept --> state(s([end], [term(_,_)], _)), !.
accept -->
print("End"), { false }.
% { throw(error(reduction(imcomplete), read_term//0)) }.
variable("_", CsVs, _, CsVs) :- !.
variable(Cs, CsVs, V, CsVs) :-
member(Cs-V, CsVs), !.
variable(Cs, CsVs, V, [Cs-V|CsVs]).
succ(X, S) :-
can_be(not_less_than_zero,X),
can_be(not_less_than_zero,S),
( nonvar(X) -> S is X+1 ; X is S-1, X >= 0 ).
sml(S, M, Xs0, Xs) :-
'$skip_max_list'(S, M, Xs0, Xs).
The initialization file init.pl
:
:- use_module(library(debug)).
:- use_module(library(charsio)).
:- use_module(library(dcgs)).
:- use_module(library(dif)).
:- use_module(library(lists)).
:- use_module(library(iso_ext)).
:- use_module(library(pairs)).
:- use_module(library(pio)).
:- use_module(library(format)).
:- use_module(lexer).
:- use_module(parser).
parse(Cs, Ts, Ss0, S) :-
phrase(lexer:read_term_(Ts), Cs),
Ss0 = [s(Ts, [], [])|_],
phrase(parser:read_term_, Ss0, [S]).
sample(a) --> [a].
sample([]) --> [[]].
sample('.'(E)) --> ['.'], sample(E).
sample('.'(E,Es)) --> ['.'], sample(E), sample(Es).
sample('.'(E,F,G)) --> ['.'], sample(E), sample(F), sample(G).
sample('.'(E,F,G,H)) --> ['.'], sample(E), sample(F), sample(G), sample(H).
generate(T, N) :-
length(Cs, N),
phrase(sample(T), Cs).
test :-
once(generate('.'(a,'.'(a,'.'(a,'.'(a,[]))),a), _)),
% N = 72849,
call_nth(user:generate(T, _), N),
( N mod 2^10 =:= 0, writeq(N), nl, false ; true ),
% T = '.'('.'(a,[]),[]),
% T = '.'('.'(aa,'.'(b,'.'(c,[]))),[]),
write_term_to_chars(T, [quoted(true),ignore_ops(true)], Cs0),
append(Cs0, ".", Cs),
( parse(Cs, _, _, _) ->
parse(Cs, Ts, Ss, S),
S = s(_,[term(_,T0)],_),
T0 \== T,
format("N = ~q,\nCs = ~q,\nTs = ~q,\nSs = ~q,\nS = ~q.\n\n", [N,Cs,Ts,Ss,S])
; format("N = ~q,\nCs = ~q,\nT = ~q.\n\n", [N,Cs,T])
),
halt.
test :-
halt.
dif(V, S0, V) :-
dif(V, S0).
ns_ --> "(".
ns_ --> ")".
ns_ --> "'.'".
ns_ --> "a".
ns --> [].
ns --> ns_, ns.
nonsense(Cs) :-
length(Cs, _),
[C0|Cs0] = Cs,
foldl(dif, Cs0, C0, _),
phrase(ns, Cs).
nonsense :-
call_nth(nonsense(Cs0), N),
( N mod 2^10 =:= 0, writeq(N), nl, false ; true ),
append(Cs0, ".", Cs),
( parse(Cs, Ts, Ss, S),
\+ catch(read_from_chars(Cs, _), error(syntax_error(_),_), false),
format("N = ~q,\nCs = ~q,\nTs = ~q,\nSs = ~q,\nS = ~q.\n\n", [N,Cs,Ts,Ss,S])
; catch(read_from_chars(Cs, T), error(syntax_error(_),_), false),
\+ parse(Cs, Ts, Ss, S),
\+ member(N, [161,749,822,3819]),
format("N = ~q,\nCs = ~q,\nT = ~q.\n\n", [N,Cs,T])
),
halt.
nonsense :-
halt.
Using Scryer Prolog with scryer-prolog init -g test
. It can also be queried like ?- parse("'.'(a,[]).", Ts, Ss, S).
. By uncommenting print//0
in read_term_//0
in parser.pl
(or using state(S0, S), [S] --> [S0,S].
), the transition can be visualized.
In the step by step, dot/2
and term(chars,_)
use a compact internal representation.
Some information is lost in this implementation (can't be seeing but present), example when parsing '.'('.'(a,[]),[]).
:
...
[close,comma,open_list,close_list,close,end] [term(atom,[]),dot(0,"a"),compound('.',A,A)] []
[comma,open_list,close_list,close,end] [term(chars,"a"),compound('.',A,A)] []
[open_list,close_list,close,end] [compound('.',["a"|A],A)] [] % "a" still compactly stored.
...
Some grammar rules of shift//0
can be commented to fall back to the unoptimized parser.
The space complexity is O(ln N)
where N
is the size of the list of characters (not token nor input).
In the worst case ([]
, '.'(a,a([]))
, '.'(a,a('.'(a,a([]))))
, etc), the space complexity is O(N ln N)
where N
is the number of tokens.
In the best case ([]
, '.'(a,[])
, '.'(a,'.'(a,[]))
, etc), the space complexity is O(ln N)
where N
is the number of tokens.