CompilerFun.DD

ive always wanted to write a compiler but have found myself to be just a little too retarded

but today that changes!!!!!

its buggy, incomplete and implements a terrible language but it runs and its mine, maybe ill try to make a better one at some point.

and running this program in it

i=0;
n1=0;
n2=1;
x=0;
WHILE (i < 12)
{
    x = n1;
    n1 = n2;
    n2 = n1 + x;
    i = i + 1;
    PRINT x;
};

"successfully" prints out the first 12 fibonacci numbers

from enum import Enum, auto

class Token(Enum):
    LPAR = 0
    RPAR = auto()
    PLUS = auto()
    MINUS = auto()
    STAR = auto()
    SLASH = auto()
    PERCENT = auto()
    NUM = auto()
    NEQ = auto()
    EQ = auto()
    GT = auto()
    GE = auto()
    LT = auto()
    LE = auto()
    BANG = auto()
    BOR = auto()
    BAND = auto()
    XOR = auto()
    SHL = auto()
    SHR = auto()
    SEMICOLON = auto()
    IF = auto()
    ELSE = auto()
    IDENT = auto()
    LBRACE = auto()
    RBRACE = auto()
    ASSIGN = auto()
    PRINT = auto()
    WHILE = auto()

class Opcode(Enum):
    CONSTANT = 99
    JNZ = auto()
    JMP = auto()
    POP = auto()
    SBRK = auto()
    STOR = auto()
    LOAD = auto()

class Tokenizer():
    def __init__(self, source):
        self.source = source
        self.cur = 0

    def tokenize(self):
        r = []
        while not self.end():
            t = self.token()
            if t: r.append(t)
        return r

    def token(self):
        while self.char().isspace():
            if self.advance(): return None

        if self.char().isnumeric():
            x = []
            while self.char().isnumeric():
                x.append(self.char())
                if self.advance(): break
            return (Token.NUM, int("".join(x)))

        if self.char().isalpha():
            x = [self.char()]
            if self.advance(): return (Token.IDENT, "".join(x))
            while self.char().isalnum():
                x.append(self.char())
                if self.advance(): break

            match "".join(x):
                case "IF": return (Token.IF, None)
                case "ELSE": return (Token.ELSE, None)
                case "PRINT": return (Token.PRINT, None)
                case "WHILE": return (Token.WHILE, None)
                case (x): return (Token.IDENT, x)

        match self.char():
            case "+": return (Token.PLUS, self.advance())
            case "-": return (Token.MINUS, self.advance())
            case "*": return (Token.STAR, self.advance())
            case "/": return (Token.SLASH, self.advance())
            case "(": return (Token.LPAR, self.advance())
            case ")": return (Token.RPAR, self.advance())
            case "{": return (Token.LBRACE, self.advance())
            case "}": return (Token.RBRACE, self.advance())
            case "|": return (Token.BOR, self.advance())
            case "&": return (Token.BAND, self.advance())
            case "^": return (Token.XOR, self.advance())
            case "%": return (Token.PERCENT, self.advance())
            case ";": return (Token.SEMICOLON, self.advance())
            case "<":
                if self.advance() or self.char() != "<": return (Token.LT, None)
                return (Token.SHL, self.advance())
            case "=":
                if self.advance() or self.char() != "=": return (Token.ASSIGN, None)
                return (Token.EQ, self.advance())

        raise Exception("err", self.char())

    def char(self):
        return self.source[self.cur]

    def end(self):
        return self.cur >= len(self.source)

    def advance(self):
        self.cur += 1
        return self.end()

class Compiler():
    def __init__(self):
        self.idents = {}

    def compile(self, tokens):
        self.cur = 0
        self.tokens = tokens
        return self.statement()

    def statement(self):
        if self.match(Token.LBRACE):
            stmt = []
            while not self.match(Token.RBRACE):
                stmt = [*stmt, *self.statement()]
                if not self.match(Token.RBRACE): stmt.append(Opcode.POP)
                else: break
            return stmt
        expr = self.expression()
        self.consume(Token.SEMICOLON)
        return expr

    def expression(self):
        return self.ewhile()

    def ewhile(self):
        if self.match(Token.WHILE):
            self.consume(Token.LPAR)
            expr = self.expression()
            self.consume(Token.RPAR)
            stmt = self.statement()
            return [*expr, Opcode.JNZ, len(stmt) + 4, *stmt, Opcode.JMP, -len(stmt) - 2 - len(expr)]
        return self.print()

    def print(self):
        if self.match(Token.PRINT):
            return [*self.expression(), Token.PRINT]
        return self.assign()

    def assign(self):
        if self.match(Token.IDENT):
            if self.peek()[0] != Token.ASSIGN:
                self.cur -= 1
                return self.bor()
            var = self.previous()[1]
            expr = []
            if var not in self.idents:
                self.idents[var] = len(self.idents)
                expr = [Opcode.SBRK]
            while self.match(Token.ASSIGN):
                val = self.bor()
                expr = [*expr, *val, Opcode.STOR, self.idents[var]]
            return expr

        return self.bor()

    def bor(self):
        return self.binary(self.xor, Token.BOR)

    def xor(self):
        return self.binary(self.band, Token.XOR)

    def band(self):
        return self.binary(self.equality, Token.BAND)

    def equality(self):
        return self.binary(self.comparison, Token.NEQ, Token.EQ)

    def comparison(self):
        return self.binary(self.bitshift, Token.GT, Token.GE, Token.LT, Token.LE)

    def bitshift(self):
        return self.binary(self.term, Token.SHL, Token.SHR)

    def term(self):
        return self.binary(self.factor, Token.MINUS, Token.PLUS)

    def factor(self):
        return self.binary(self.unary, Token.SLASH, Token.STAR, Token.PERCENT)

    def unary(self):
        while self.match(Token.BANG, Token.MINUS):
            op = self.previous()
            return [*self.unary(), op[0]]
        return self.eif()

    def eif(self):
        if self.match(Token.IF):
            self.consume(Token.LPAR)
            expr = self.expression()
            self.consume(Token.RPAR)
            stmt = self.statement()
            if self.match(Token.ELSE):
                elsestmt = self.statement()
                return [*expr, Opcode.JNZ, len(stmt) + 4, *stmt, Opcode.JMP, len(elsestmt) + 2, *elsestmt]
            return [*expr, Opcode.JNZ, len(stmt) + 2, *stmt]

        return self.primary()

    def primary(self):
        if self.match(Token.NUM): return [Opcode.CONSTANT, self.previous()[1]]

        if self.match(Token.IDENT): return [Opcode.LOAD, self.idents[self.previous()[1]]]

        if self.match(Token.LPAR):
            expr = self.expression()
            self.consume(Token.RPAR)
            return expr
        raise Exception("err", self.peek())

    def binary(self, last, *match):
        expr = [*last()]
        while self.match(*match):
            op = self.previous()
            right = last()
            expr = [*expr, *right, op[0]]
        return expr

    def match(self, *types):
        for type in types:
            if self.check(type):
                self.advance()
                return True
        return False

    def check(self, type):
        if self.end(): return False
        return self.peek()[0] == type

    def advance(self):
        if not self.end(): self.cur += 1
        return self.previous()

    def end(self):
        return self.cur >= len(self.tokens)

    def peek(self):
        return self.tokens[self.cur]

    def previous(self):
        return self.tokens[self.cur - 1]

    def consume(self, type):
        if self.check(type): return self.advance()
        raise Exception("err", type, self.peek())

class VM():
    def __init__(self):
        self.heap = []

    def run(self, code):
        self.code = code
        self.pc = 0
        self.stack = []
        while self.pc < len(code):
            self.execute()
        return self.stack.pop()

    def execute(self):
        match self.code[self.pc]:
            case Opcode.CONSTANT:
                self.stack.append(self.code[self.pc + 1])
                self.pc += 1
            case Opcode.JNZ:
                if not self.stack.pop(): self.pc += self.code[self.pc + 1] - 1
                else: self.pc += 1
            case Opcode.JMP:
                self.pc += self.code[self.pc + 1] - 1
            case Opcode.POP:
                self.stack.pop()
            case Opcode.SBRK:
                self.heap.append(0)
            case Opcode.STOR:
                self.heap[self.code[self.pc + 1]] = self.stack.pop()
                self.stack.append(self.heap[self.code[self.pc + 1]])
                self.pc += 1
            case Opcode.LOAD:
                self.stack.append(self.heap[self.code[self.pc + 1]])
                self.pc += 1
            case Token.PRINT:
                print(self.stack[-1])
            case Token.PLUS:
                self.stack.append(self.stack.pop() + self.stack.pop())
            case Token.MINUS:
                r = self.stack.pop()
                self.stack.append(self.stack.pop() - r)
            case Token.STAR:
                self.stack.append(self.stack.pop() * self.stack.pop())
            case Token.SLASH:
                r = self.stack.pop()
                self.stack.append(self.stack.pop() // r)
            case Token.PERCENT:
                r = self.stack.pop()
                self.stack.append(self.stack.pop() % r)
            case Token.LT:
                r = self.stack.pop()
                self.stack.append(self.stack.pop() < r)
            case Token.BOR:
                self.stack.append(self.stack.pop() | self.stack.pop())
            case Token.SHL:
                r = self.stack.pop()
                self.stack.append(self.stack.pop() << r)
            case (x):
                raise Exception("err", x)

        self.pc += 1

if __name__ == "__main__":
    c = Compiler()
    v = VM()
    while True:
        s = input("> ")
        a = Tokenizer(s).tokenize()
        print(a)
        b = c.compile(a)
        print(b)
        print(v.run(b))

also CCC is back, would be cool to go since i live quite near (kiel) but currently unable to due to personal circumstances.

__attribute__((noreturn)) int main(int argc, char* argv[])
{
    std::cout << "Goodbye, World!" << std::endl;
    delete world;
}