了解Compiler, Tiny compiler

Author:Gao
Created At:2020-08-02

构建一个简单的编译器, 将 List 格式的代码转换成 C 格式的

原始代码

(plus 3 (abstract 9 6))

编译器

一个编译器的前端模型

A Compiler's Frontend

根据编译过程来解决这个问题

  1. 生成tokens
  2. 根据tokens生成ast
  3. 转换astnewAst
  4. newAst生成代码

Tokenizer

将源代码转换为token

const tokenizer = (input) => {
  let pos = 0;
  let tokens = [];
  while (pos < input.length) {
    let char = input[pos];

    const PAREN_MATCH = /[\(\)]/;
    if (PAREN_MATCH.test(char)) {
      tokens.push({type: 'paren', value: char});
      pos++;
      continue;
    }

    const NAME_MATCH = /[a-zA-Z_]/;
    const NAME_MATCH_ = /[a-zA-Z0-9_]/;
    if (NAME_MATCH.test(char)) {
      let verb = char;
      while (NAME_MATCH_.test(input[++pos])) {
        verb += input[pos];
      }
      tokens.push({type: 'name', value: verb});
      continue;
    }

    const NUM_MATCH = /[0-9]/;
    if (NUM_MATCH.test(char)) {
      let verb = char;
      while (NUM_MATCH.test(input[++pos])) {
        verb += input[pos];
      }
      tokens.push({type: 'number', value: verb});
      continue;
    }

    const WHITE_SPACE = /\s/;
    if (WHITE_SPACE.test(char)) {
      pos++;
      continue;
    }
    throw new Error(`Unexpect token at ${pos}`);
  }
  return tokens;
};

module.exports = tokenizer;

Parser

token流转换为AST

const parser = (tokens) => {
  let current = 0;

  const walk = () => {
    let token = tokens[current];

    if (token.type === 'number') {
      current++;

      return {
        type: 'NumberLiteral',
        value: token.value,
      };
    }

    if (token.type === 'paren' && token.value === '(') {
      token = tokens[++current];

      let node = {
        type: 'CallExpression',
        name: token.value,
        params: [],
      };

      token = tokens[++current];

      while (
        token.type !== 'paren' ||
        (token.type === 'paren' && token.value !== ')')
      ) {
        node.params.push(walk());
        token = tokens[current];
      }

      current++;

      return node;
    }
    throw new TypeError(token.type);
  };

  let ast = {
    type: 'Program',
    body: [],
  };

  while (current < tokens.length) {
    ast.body.push(walk());
  }

  return ast;
};

module.exports = parser;

Traverser and Transformer

Traverser 提供了遍历 AST 的方法

Transformer 通过 Traverser 遍历语法树来修改 AST

Traverser

const traverser = (ast, visitor) => {
  const traverseArray = (array, parent) => {
    array.forEach((child) => {
      traverseNode(child, parent);
    });
  };

  const traverseNode = (node, parent) => {
    let methods = visitor[node.type];

    if (methods && methods.enter) {
      methods.enter(node, parent);
    }

    switch (node.type) {
      case 'Program':
        traverseArray(node.body, node);
        break;

      case 'CallExpression':
        traverseArray(node.params, node);
        break;

      case 'NumberLiteral':
        break;

      default:
        throw new TypeError(node.type);
    }

    if (methods && methods.exit) {
      methods.exit(node, parent);
    }
  };

  traverseNode(ast, null);
};

module.exports = traverser;

Transformer

const traverser = require('./traverser');

const transformer = (ast) => {
  let newAst = {
    type: 'Program',
    body: [],
  };

  ast._context = newAst.body;

  traverser(ast, {
    NumberLiteral: {
      enter(node, parent) {
        parent._context.push({
          type: 'NumberLiteral',
          value: node.value,
        });
      },
    },
    CallExpression: {
      enter(node, parent) {
        let expression = {
          type: 'CallExpression',
          callee: {
            type: 'Identifier',
            name: node.name,
          },
          arguments: [],
        };

        node._context = expression.arguments;

        if (parent.type !== 'CallExpression') {
          expression = {
            type: 'ExpressionStatement',
            expression: expression,
          };
        }

        parent._context.push(expression);
      },
    },
  });

  return newAst;
};

module.exports = transformer;

Code Generator

AST重新生成为代码

const codeGenerator = (node) => {
  switch (node.type) {
    case 'Program':
      return node.body.map(codeGenerator).join('\n');

    case 'ExpressionStatement':
      return codeGenerator(node.expression) + ';';

    case 'CallExpression':
      return (
        codeGenerator(node.callee) +
        '(' +
        node.arguments.map(codeGenerator).join(', ') +
        ')'
      );

    case 'Identifier':
      return node.name;

    case 'NumberLiteral':
      return node.value;

    default:
      throw new TypeError(node.type);
  }
};

module.exports = codeGenerator;