UPDATE:
So the issue seems to be with the generator, and not necessarily with the next-token and lookahead functions. I added some display calls around where the set!s were happening, and found that the issue is that after (generate-token) is called the second time, it resumes execution from where it was called the first time.
Here's the full code for the program (I've left the original post below for reference):
(define char-alphanumeric? (lambda (char) (or (char-alphabetic? char) (char-numeric? char))))
(define generate-token #f)
(define filename "input.txt")
(define next-token #f)
(define lookahead #f)
(define status #f)
(let ((f (open-input-file filename)) (yield #f) (token "") (lookahead-token #f) (current-token #f))
(set! generate-token (lambda ()
(letrec ((next-char (lambda (c)
(let ((separators (list #\; #\,)))
(cond ((eof-object? c) (display "last token before eof: ") (display token) (newline) (yield c))
((member c separators)
(begin
(display "token before sep: ") (display token) (newline)
(call-with-current-continuation (lambda (resume)
(set! generate-token (lambda () (resume)))
(yield token)))
(display "back from call") (set! token "")
(call-with-current-continuation (lambda (resume)
(set! generate-token (lambda () (resume)))
(yield (make-string 1 c))))
))
((or (char-alphanumeric? c) (equal? c #\_)) ; c is part of a string token
(begin (display "found char: ") (display c) (display "; added to string: ")
(set! token (string-append token (make-string 1 c)))
(display token) (newline)
(next-char (read-char f))))
((char-whitespace? c)
(begin
(display "token before ws: ") (display token) (newline)
(if (> (string-length token) 0)
(begin (call-with-current-continuation (lambda (resume)
(display "setting generate-token to resume") (newline)
(set! generate-token (lambda ()
((display "calling resume") (newline)
(resume))))
(display "yielding token from cc") (newline)
(yield token)))
(display "continuing...") (newline)
(set! token ""))
;(set! token "")
))))
(next-char (read-char f))
))))
(call-with-current-continuation (lambda (k) ((set! yield k) (k (next-char (read-char f))))))
)))
(set! lookahead (lambda () (begin
(if (not lookahead-token)
(begin (display "no lookahead") (newline)
(display "setting lookahead-token") (newline)
(set! lookahead-token (string-copy (generate-token)))
(display "lookahead set to ") (display lookahead-token) (newline)
))
lookahead-token)))
(set! next-token (lambda () (begin
(if lookahead-token
(begin (display "affirmative") (newline)
(set! current-token (string-copy lookahead-token))
(set! lookahead-token #f))
(begin (display "negative") (newline)
(display "setting current token to next-token") (newline)
(set! current-token (string-copy (generate-token)))
(display "current token = ") (display current-token) (newline)
(set! lookahead-token #f)))
current-token)))
(set! status (lambda () (begin (display current-token) (display " -> ") (display lookahead-token) (newline))))
)
Executing the next-token and lookahead calls as per the first example in the original post below yields:
> (next-token)
negative
setting current token to next-token
found char: t; added to string: t
found char: h; added to string: th
found char: e; added to string: the
found char: s; added to string: thes
found char: e; added to string: these
token before ws: these
setting generate-token to resume
yielding token from cc
current token = these
"these"
> (status)
these -> #f
> (lookahead)
no lookahead
setting lookahead-token
calling resume
continuing...
found char: a; added to string: a
found char: r; added to string: ar
found char: e; added to string: are
token before ws: are
setting generate-token to resume
yielding token from cc ; the problem is right here: the generate token call is
current token = are ; sending control back to next-token instead of lookahead.
"are"
> (status)
are -> #f
I'm at a loss as to why it's behaving this way, but will admit that I am new to continuations and probably don't fully understand the ramifications. Any help, as always, would be greatly appreciated.
Thanks.
Original post follows:
I've created a generator that parses a text file and returns one token at0 a time as strings. So, if I have a file that contains
these are my file contents
Successive calls to (generate-token) return "these" "are" "my" ... respectively. This seems to be working, but what I've written this as part of a parser for a larger assignment. The generator seems to be working smoothly, but as I am building an LR(1) parser to parse the stream of tokens, I need to be able to perform a lookahead. To that end, I've created the following program:
(define generate-token #f)
(define next-token #f)
(define lookahead #f)
(define status #f)
(let ((lookahead-token #f) (current-token #f))
(set! generate-token (lambda () ... ) ; the generator function
(set! lookahead (lambda () (begin
(if (not lookahead-token)
(begin (display "no lookahead") (newline)
(set! lookahead-token (string-copy (generate-token)))))
lookahead-token)))
(set! next-token (lambda () (begin
(if lookahead-token
(begin (display "affirmative") (newline)
(set! current-token (string-copy lookahead-token))
(set! lookahead-token #f))
(begin (display "negative") (newline)
(set! current-token (string-copy (generate-token)))
(set! lookahead-token #f)))
current-token)))
(set! status (lambda () (begin (display current-token) (display " -> ") (display lookahead-token) (newline))))
)
However, these don't work as expected. I am under the impression that scheme (this is written in drRacket, but using #lang r5rs) passes objects by value, so the (string-copy calls are hypothetically unnecessary, but this still doesn't work as expected. It works like so:
> (status)
#f -> #f
> (next-token)
"these"
> (status) ; next-token properly sets current-token
"these" -> #f
> (lookahead) ; generator returns "are" as expected
"are"
> (status) ; notice that the current-token has been replaced instead of the lookahead-token
"are" -> #f
In a different flow, if (lookahead) is called first, it works correctly.
> (status)
#f -> #f
> (lookahead)
"these"
> (status)
#f -> "these"
> (lookahead)
"these"
> (status)
#f -> "these"
> (next-token)
"these"
> (status)
"these" -> #f
> (lookahead)
"are"
> (status)
"these" -> "are"
If anyone has any clue as to what's going on, any insight would be greatly appreciated. Disclosure: this is school work, but I'm not asking you to do it for me >.>.