Interpreter - How closure captures its name?

Question

Consider this code in Haskell:

let factorial n = if n < 2 then 1 else n * factorial (n-1) in factorial 3

I see that interpreter evaluating the program in such order:

It's a binding. Evaluate definitions first and evaluate the part after "in".
It's a definition. Evaluate the body and then associate body with the name.
It's a lambda. Capture the environment, make a closure and return.
Body of the definition is evaluated, write it to the name now.
Definitions are evaluated, evaluate the right part of the expression.
The expression is evaluated, return the result.

I see the following problem with this model: At step 3, when the closure captures environment it doesn't know anything about "factorial" binding.

I am writing an interpreter for ML-like language in JavaScript and I stumbled this problem. For example, the following code in my language:

fac = \x -> if (== x, 0) { 1 } else { fac (- x, 1) } in fac 3

won't work because of the described problem.

How interpreters for other languages solve this problem?

Here's the code of the interpreter for the reference.

"use strict";

const grammar =
`
Expression "expression"
  = Condition
  / Application
  / Lambda
  / Binding
  / Integer
  / String
  / Identifier
  / '(' expr: Expression ')' { return expr; }

_ "whitespace"
  = [ \\t\\n\\r\\n]*

Integer "integer"
  = [0-9]+ {
    return { type: 'literal',
             literalType: 'Integer',
             value: parseInt(text(), 10)
          };
  }

String "string"
 = '\"' value: ([^\"]* { return text(); } ) '\"' {
   return { type: 'literal',
            literalType: 'String',
            value: value
          };
    }

Letter
  = [a-zA-Z]

Identifier
  = (Letter / '+' / '-' / '*' / '/' / '_' / '==' / '>' / '<')+ {
    return {
        type: 'identifier',
        value: text()
    }
  }

Application
  = id: Identifier _ args: ActualArguments {
     return { type: 'application',
            fun: id,
            args: args
          }
  }
  / lambda: ('(' l: Lambda ')' { return l; }) _ args: ActualArguments  {
     return { type: 'application',
            fun: lambda,
            args: args
          }
    }

ActualArguments
 = expr: Expression rest: (',' _ e: Expression { return e })* { return [expr].concat(rest); }

Lambda
  = '\\\\' args: Arguments _ '->' _ body: Expression {
   return { type: 'lambda',
            args: args,
            body: body
          }
    }

Arguments
  = head: Identifier rest: (',' _ i: Identifier { return i; })* { return [head].concat(rest); }

Binding
 = name: Identifier _ '=' _ def: Expression _ 'in' _ expr: Expression {
   return {
     type: 'binding',
     name: name,
     def: def,
     expr: expr
   }
 }

Condition
 = 'if' _ '(' _ cond: Expression _ ')' _ '{' _ expr1: Expression _ '}' expr2: ( _ 'else' _ '{' _ e: Expression _ '}' { return e; })? {
   return {
     type: 'condition',
     condition: cond,
     expr1,
     expr2
   }
 }
`

const parser = peg.generate(grammar);

const std = {
  '+': (arg1, arg2) => arg1 + arg2,
  '-': (arg1, arg2) => arg1 - arg2,
  '*': (arg1, arg2) => arg1 * arg2,
  '/': (arg1, arg2) => arg1 / arg2,
  'str': (arg1, arg2) => [arg1, arg2].join(""),
  '>': (arg1, arg2) => arg1 > arg2,
  '<': (arg1, arg2) => arg1 < arg2,
  '==': (arg1, arg2) => arg1 === arg2,
  'false': false,
  'true': true,
  'and': (arg1, arg2) => arg1 && arg2,
  'or': (arg1, arg2) => arg1 || arg2
}

const makeLambda = (fun, parentEnv) => {
  return (...args) => {
      const env = Object.assign({}, parentEnv);
      fun.args.forEach((el, i) => {
        env[el.value] = args[i];
      });
      return _eval(fun.body, env);
  }
}

const evalLiteral = (literal) => {
  switch (literal.literalType) {
    case 'Integer':
      return parseInt(literal.value);
    case 'String':
      return String(literal.value);
    default:
      console.log('Literal type undefined');
      return literal.value;
  }
}

const _eval = (expr, parentEnv = std) => {
  const env = Object.assign({}, parentEnv);
  switch (expr.type) {
    case 'application':
      const fun = _eval(expr.fun, env);
      const args = expr.args.map(arg => _eval(arg, env));
      return fun.apply(null, args);
      break;
    case 'literal':
      return evalLiteral(expr);
    case 'identifier':
      return env[expr.value];
    case 'lambda':
      return makeLambda(expr, env);
    case 'binding':
      env[expr.name.value] = _eval(expr.def, env);
      return _eval(expr.expr, env);
    case 'condition':
      if (_eval(expr.condition, env)) {
        return _eval(expr.expr1, env);
      } else {
        return _eval(expr.expr2, env);
      }
  }
}

const parseAndEval = (str) => {
  try {
    const parsed = parser.parse(str);
    console.log(parsed);
    return _eval(parsed);
  } catch (e) {
    if (e.name == "SyntaxError" ) {
    return e.toString() +
      " start: " + JSON.stringify(e.location.start) +
      " end: " + JSON.stringify(e.location.end);
    } else {
      return e.toString();
    }
  }
}

FWIW, [in JavaScript](http://www.ecma-international.org/ecma-262/7.0/index.html#sec-let-and-const-declarations-runtime-semantics-evaluation) it creates an uninitialized binding for `factorial`, then evaluates the right-hand side (which doesn't actually *use* the binding because the function is just being defined, not run), and then initializes the binding with the resulting value. So the binding exists (uninitialized) when the initializer is evaluated (so early reference errors could be handled, although JavaScript doesn't do that), and has a value as of when the function is run. FWIW. — T.J. Crowder, Dec 02 '16 at 07:30
In order to evaluate recursive anonymous functions (lambdas) you need something called Y-Combinator. You might find this links helpful, http://kestas.kuliukas.com/YCombinatorExplained/ and http://stackoverflow.com/questions/93526/what-is-a-y-combinator — zeronone, Dec 02 '16 at 07:46
@zeronone To use Y you already need an interpreter. Also, once we do have an interpreter, Y is horribly inefficient compared to just using recursive bindings. — András Kovács, Dec 02 '16 at 10:47
@AndrásKovács I think zeronone was suggesting to add Y as a predefined operator in the predefined environment `const std = { ... , 'Y' : Yimpl }` where `Yimpl` can be defined recursively in JavaScript, and shouldn't be too inefficient, I guess. — chi, Dec 02 '16 at 12:19
@chi I am not sure I can implement in host language because host language is unaware of my language environment. — charlag, Dec 02 '16 at 12:33
@charlag_khan But `Yimpl` needs nothing from the environment. Its implementation should be something like `var Yimpl = (f) => f(() => Yimpl(f))`. — chi, Dec 02 '16 at 12:40
@chi this could work, thanks. I also found this: "We use a reference to the environment for two reasons: First, by storing a reference instead of the actual environment, we save space. Second - and more importantly - the use of references allows for the creation of environments containing closures whose embedded references point to the environment itself. This is important when defining recursive, or mutually recursive functions. " http://www.cs.cornell.edu/courses/cs312/2004fa/lectures/lecture24.htm — charlag, Dec 02 '16 at 12:53

charlag · Answer 1 · 2016-12-03T18:01:38.587

UPDATE

I found one more solution here: http://lisperator.net/pltut/eval1/new-constructs

Evaluator is somewhat different for this language, though. Basically, it boils down to this:

function make_lambda(env, exp) {
    if (exp.name) {                    // these
        env = env.extend();            // lines
        env.def(exp.name, lambda);     // are
    }                                  // new
    function lambda() {
        var names = exp.vars;
        var scope = env.extend();
        for (var i = 0; i < names.length; ++i)
            scope.def(names[i], i < arguments.length ? arguments[i] : false);
        return evaluate(exp.body, scope);
    }
    return lambda;
}

I found the solution in the lecture on implementing ML compiler: http://www.cs.cornell.edu/courses/cs312/2004fa/lectures/lecture24.htm

"A closure consists of the name of the unique argument (see below), the expression (i.e. abstract syntax tree) defining the function body, and a reference to the environment in which the closure was created. We use a reference to the environment for two reasons:

First, by storing a reference instead of the actual environment, we save space.
Second - and more importantly - the use of references allows for the creation of environments containing closures whose embedded references point to the environment itself. This is important when defining recursive, or mutually recursive functions."

So, what I did is passing a reference of the environment to the makeLamba() instead of a copy. Thus, lambda shares the environment at the moment of creation but for the evaluation of its body the copy of the environment is made (so anything inside the lambda won't mutate parent environment).

  // ...
  case 'lambda':
    return makeLambda(expr, parentEnv); // not a copy but a reference
  // ...

  const makeLambda = (fun, parentEnv) => {
    return (...args) => {
        const env = Object.assign({}, parentEnv);
        fun.args.forEach((el, i) => {
          env[el.value] = args[i];
        });
        return _eval(fun.body, env);
    }
  }

Interpreter - How closure captures its name?

1 Answers1