0

I'm trying to make a lexer in Rust while being relatively new to it but with a background in C/C++. I'm having problems with how Rust allocates memory in the following code, which generates the error "Cannot move out of borrowed content". I've read cargo --explain E0507 which details possible solutions, but I'm struggling to grasp the underlying differences between how Rust and C/C++ manage memory. In essence, I want to understand how to manage dynamic memory in Rust (or a better way to achieve what I'm doing).

The error is:

error[E0507]: cannot move out of borrowed content
  --> <anon>:65:16
   |
65 |         return self.read_tok.unwrap();
   |                ^^^^ cannot move out of borrowed content

error[E0507]: cannot move out of borrowed content
  --> <anon>:73:16
   |
73 |         return self.peek_tok.unwrap();
   |                ^^^^ cannot move out of borrowed content

error: aborting due to 2 previous errors

The code is:

use std::fmt;

#[derive(Debug, PartialEq)]
pub enum TokenType {
    EndOfFile,
    Illegal
}

pub struct Token {
    token_type: TokenType,
    value: String
}

impl Token {
    pub fn new(token_type: TokenType, value: String) -> Token {
        return Token {
            token_type: token_type,
            value: value
        };
    }

    pub fn is_token_type(&self, token_type: TokenType) -> bool {
        return self.token_type == token_type;
    }
}

impl fmt::Debug for Token {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:?}[{}]", self.token_type, self.value)
    }
}

pub struct Lexer {
    input: String,
    read_pos: usize,
    peek_pos: usize,
    ch: char,
    read_tok: Option<Token>,
    peek_tok: Option<Token>
}

const EOF: char = 0 as char;

impl Lexer {
    pub fn new(input: &str) -> Lexer {
        return Lexer {
            input: input.to_string(),
            read_pos: 0,
            peek_pos: 1,
            ch: EOF,
            read_tok: None,
            peek_tok: None
        };
    }

    pub fn next_token(&mut self) -> Token {
        if self.peek_tok.is_none() {
            self.read_tok = Some(self.get_next_token());
        } else {
            self.read_tok = self.peek_tok.take();
        }

        return self.read_tok.unwrap();
    }

    pub fn peek_token(&mut self) -> Token {
        if self.peek_tok.is_none() {
            self.peek_tok = Some(self.get_next_token());
        }

        return self.peek_tok.unwrap();
    }

    fn get_next_token(&mut self) -> Token {
        let ch = self.next_char();
        let tok: Token;

        match ch {
            EOF => { tok = Token::new(TokenType::EndOfFile, "".to_string()); }
            _   => { tok = Token::new(TokenType::Illegal, ch.to_string()); }
        }

        return tok;
    }

    fn next_char(&mut self) -> char {
        if self.peek_pos >= self.input.len() {
            self.ch = EOF;
        } else {
            self.ch = self.input.chars().nth(self.peek_pos).unwrap();
        }

        self.read_pos = self.peek_pos;
        self.peek_pos += 1;

        return self.ch;
    }
}


fn main() {
    let input = "let x = 5;";
    let mut l = Lexer::new(input);

    loop {
        let t = l.next_token();
        println!("{:?}", t);

        if t.is_token_type(TokenType::EndOfFile) {
            break;
        }
    }
}

Rust playground link: https://play.rust-lang.org/?gist=bc85fafa35a5cbbd5ac4066aef9e333c&version=stable&backtrace=0https://play.rust-lang.org/?gist=21cba64f53488ee0a9389c0191c47134&version=stable&backtrace=0

I've managed to translate a working implementation in C++ which might give some more info on what I'm trying to achieve:

#include <string>
#include <iostream>

enum TokenType {
    ENDOFFILE,
    ILLEGAL
};

class Token {
private:
    enum TokenType token_type;
    std::string value;

public:
    Token(enum TokenType token_type, std::string value)
    {
        this->token_type = token_type;
        this->value = value;
    }

    bool is_token_type(enum TokenType token_type)
    {
        return this->token_type == token_type;
    }

    std::string to_string()
    {
        std::string tok;

        switch (this->token_type) {
        case ENDOFFILE:
            tok = "EndOfFile";
            break;
        case ILLEGAL:
            tok = "Illegal[" + this->value + "]";
            break;
        }

        return tok;
    }
};

class Lexer {
private:
    std::string input;
    int read_pos;
    int peek_pos;
    char ch;
    Token *read_tok;
    Token *peek_tok;

    Token *get_next_token() {
        char c = this->next_char();
        std::string c_str;
        Token *t;

        c_str.push_back(c);

        switch (c) {
        case 0:
            t = new Token(ENDOFFILE, "");
            break;
        default:
            t = new Token(ILLEGAL, c_str);
        }

        return t;
    }

    char next_char()
    {
        if (this->peek_pos >= this->input.length()) {
            this->ch = 0;
        } else {
            this->ch = input.at(this->peek_pos);
        }

        this->read_pos = this->peek_pos;
        this->peek_pos += 1;

        return this->ch;
    }

public:
    Lexer (std::string input)
    {
        this->input = input;
        this->read_pos = -1;
        this->peek_pos = 0;
        this->ch = 0;
        this->read_tok = NULL;
        this->peek_tok = NULL;
    }

    Token *next_token()
    {
        if (this->read_tok != NULL) {
            delete this->read_tok;
        }

        if (this->peek_tok == NULL) {
            this->read_tok = this->get_next_token();
        } else {
            this->read_tok = this->peek_tok;
            this->peek_tok = NULL;
        }

        return this->read_tok;
    }

    Token *peek_token()
    {
        if (this->peek_tok == NULL) {
            this->peek_tok = this->get_next_token();
        }

        return this->peek_tok;
    }
};

int main(int argc, char **argv)
{
    std::string input = "let x = 5;";
    Lexer l = Lexer(input);

    while (1) {
        Token *t = l.next_token();
        std::cout << t->to_string() << std::endl;

        if (t->is_token_type(ENDOFFILE)) {
            break;
        }
    }

    return 0;
}
Onei
  • 177
  • 1
  • 2
  • 11
  • I'm afraid there is a significant number of issues in your question: (1) Please try to narrow the code down to a [MCVE]. (2) You should include the complete, exact error message provided by the compiler, including what line triggered the error. (3) The generic answer of how to manage memory in Rust should hopefully be already answered in The Book. I advise you to read the chapters on [Ownership](https://doc.rust-lang.org/book/ownership.html) and [References and Borrowing](https://doc.rust-lang.org/book/references-and-borrowing.html). – E_net4 May 16 '17 at 10:00
  • 2
    Also note that there are many other [Stack Overflow questions with that title](http://stackoverflow.com/search?tab=votes&q=cannot%20move%20out%20of%20borrowed%20content) than the one you linked, and these may help you understand the problem. – E_net4 May 16 '17 at 10:01
  • @E_net4 (1) This was as small I could reasonably make it while keeping the basic functionality. (2) Copied the error from the rust playground output. (3) While I've read those chapters and have a grasp of the ideas, the examples seem to generic to be of use to a relative beginner (hence I'm here). – Onei May 16 '17 at 10:19
  • An MCVE does not have to keep all functionality, but just enough to reproduce the particular issue that you are facing. I highly doubt that we have to observe all that code (including the example in C++) to understand the problem. – E_net4 May 16 '17 at 11:01

1 Answers1

5

You came very close to getting it right, but there are two problems with your code.

First, as the compiler tells you, the following is prohibited:

self.read_tok = self.peek_tok;
self.peek_tok = None;

The first line attempts to move an Option<Token> object out of self.peek_tok. In Rust, objects can be moved out of variables, but not out of structure fields or slice subscripts. This is because the compiler can check that the variable is not used after the move, as well as arrange that its destructor is not invoked. This is not possible for objects stored in fields of structures or inside slices, at least not without adding overhead to every structure or container.

Moving objects out of structs is possible as long as they are stored in an intermediate container that supports moving. Fortunately, Option is such a container, and its take() method is designed for exactly that purpose:

self.read_tok = self.peek_tok.take()

Option::take() moves the object from the option, replaces it with None, and returns the object.

Second, once the above is fixed, the compiler complains of "moving out of borrowed content" on the return statements of next_token and peek_token, because they attempt to move objects out of the Option. Here you have the choice of cloning the Token, or moving it out of the option using Option::take() as above. The cloning approach requires adding #[derive(Clone)] to TokenType and Token, as well as changing the returns to:

// Use as_ref() to convert Option<Token> to Option<&Token>,
// which is unwrapped and the Token inside cloned
self.read_tok.as_ref().unwrap().clone()

With these changes, the example compiles, although it still flags the input as illegal.

user4815162342
  • 141,790
  • 18
  • 296
  • 355