Example: Logo Compiler

Turtle Graphics

Turtle Graphics was started back in the 1960s by S. Pappert. His Turtle Graphics also came with a programming language called «Logo». Now, a website about the Turtle could not be complete without a little homage to this beginnings and so you will find a very simple Logo-to-Python compiler here.

Since there is no official specifications for Logo, I based the compiler upon the examples in the book «Einführung in die Programming mit LOGO» (Introduction to the Programming in LOGO) by Prof. Juraj Hromkovic. The book itself in turn uses XLOGO.

The Source Code

# A SIMPLE LOGO-COMPILER IN PYTHON
#
# This program contains a very simple compiler/interpreter for a
# programm written in the programming language "LOGO". The given
# Logo-code is first translated to Python-code and then run using
# Python's "exec"-statement.
#
# Error-handling is non-existent. Please make sure your 
# Logo-script is correct before executing it!
#
# The function to be used here is:
#   compileLogo(logo_script)
#        -> Returns the corresponding Python-code.
#
# Supported Logo-Commands
# -----------------------
#   TO name (:arglist) code END
#        -> define a function
#   REPEAT n [code]
#        -> repeat the code n times
#   BREAK
#        -> break out of a loop
#   MAKE "destvar expr
#        -> set a variables value
#   IF expr <=> expr [thencode] ([elsecode])
#        -> execute the thencode only if the comparison returns
#           true. Otherwise execute the elsecode, if provided.
#   WHILE expr <=> expr [code]
#        -> execute the thencode while the comparison is true.
#   PRINT [list]
#        -> print the strings in the list
#
#   For the supported turtle functions see below
#   (LOGO_BUILTINS).
#
# MAY-08-2014
#  
# (c) 2014, Tobias Kohn
# http://jython.tobiaskohn.ch
#
from gturtle import *
from math import *
from time import sleep
import sys

# Enter any Logo-Program here:
#
# Note: the bar | is treated as a simple whitespace character
# and is here solely because of historic reasons.
PROGRAM = """
   | to QUAD :s 
   |   make "x 20 
   |   repeat 50 [
   |     if :x > :s [break]
   |     fd :x
   |     make "x :x + 5
   |     lt 360 / 8
   |   ] 
   | end
   | setPenColor 11
   | QUAD 144
"""

# PYTHON_INIT is put in front of each and every Logo Script. Here
# we initialize the turtle, for example.
PYTHON_INIT = """
makeTurtle()
speed(-1)
"""

### BEGIN FUNCTIONS ###

# Here we define all functions inside Logo. The names must be in 
# uppercase letters and the value contains a tuple with the number
# of arguments and the Python equivalent.

LOGO_BUILTINS = {
    "ABS": (1, "abs"),
    "ARCCOS": (1, "logo_arccos"),
    "ARCSIN": (1, "logo_arcsin"),
    "ARCTAN": (1, "logo_arctan"),
    "BACKWARD": (1, "back"),
    "BK": (1, "back"),
    "COS": (1, "logo_cos"),
    "CS": (0, "clear"),
    "FD": (1, "forward"),
    "FORWARD": (1, "forward"),
    "HOME": (0, "home"),  
    "LT": (1, "left"),
    "LEFT": (1, "left"),
    "PD": (0, "penDown"),
    "PENDOWN": (0, "penDown"),
    "PE": (0, "logo_penErase"),
    "PENERASE": (0, "logo_penErase"),
    "PENPAINT": (0, "logo_penPaint"),
    "PPT": (0, "logo_penPaint"),
    "PU": (0, "penUp"),
    "PENUP": (0, "penUp"),
    "RT": (1, "right"),
    "RIGHT": (1, "right"),
    "SETPC": (1, "logo_setPenColor"),
    "SETPENCOLOR": (1, "logo_setPenColor"),
    "SIN": (1, "logo_sin"),
    "SQRT": (1, "sqrt"),
    "STOP": (0, "sys.exit"),
    "TAN": (1, "logo_tan"),
    "WAIT": (1, "logo_wait"),
    # Not really a function but used by the scanner:
    "REPEAT": (2, None)   
}

CUSTOM_FUNCTIONS = {}

def isFunction(name):
    return (name in LOGO_BUILTINS) or \
           (name in CUSTOM_FUNCTIONS)

def getArgCount(name):
    if name in LOGO_BUILTINS:
        argCount, _ = LOGO_BUILTINS[name]
        return argCount
    elif name in CUSTOM_FUNCTIONS:
        return CUSTOM_FUNCTIONS[name]
    else:
        return 0

def getFunctionName(name):
    if name in LOGO_BUILTINS:
        _, pyname = LOGO_BUILTINS[name]
        if type(pyname) == str:
            return pyname
    return name

def defineFunction(name, argCount):
    CUSTOM_FUNCTIONS[name] = argCount

def isArgument(tkn):
    if type(tkn) == str:
        return (tkn[0] == ':')
    else:
        return False

### END FUNCTIONS ###

### BEGIN HELPER FUNCTIONS ###

# Some of the functions are defined slightly differently
# in Logo than in Python. We therefore provide an interface.

LOGO_COLORS = ["black", "red", "green", "yellow", "blue",
               "magenta", "cyan", "white", "gray",
               "lightgray", "darkred", "darkgreen",
               "darkblue", "orange", "pink", "purple",
               "brown"]

LAST_COLOR = "black"

def logo_setPenColor(c):
    if type(c) == int:
        if c >= 0 and c < len(LOGO_COLORS):
            clr = makeColor(LOGO_COLORS[c])
        else:
            clr = makeColor(LAST_COLOR)
    elif type(c) == list:
        clr = makeColor(c[0], c[1], c[2])
    else:
        clr = makeColor(c)
    setPenColor(clr)
    setColor(clr)

def logo_penErase():
    global LAST_COLOR
    LAST_COLOR = getPenColor()
    penErase()

def logo_penPaint():
    setPenColor(LAST_COLOR)

def logo_wait(t):
    sleep(t/1000)

def logo_sin(arg):
    return sin(radian(arg))

def logo_cos(arg):
    return cos(radian(arg))

def logo_tan(arg):
    return tan(radian(arg))

def logo_arcsin(arg):
    return degrees(asin(arg))

def logo_arccos(arg):
    return degrees(acos(arg))

def logo_arctan(arg):
    return degrees(atan(arg))

### END HELPER FUNCTIONS ###

### BEGIN SCANNER ###

def tokenize(script):
    """
    Split the given script as string into a list of tokens where each 
    token is a string itself.
    """

    CC_WHITESPACE = 0
    CC_LETTER = 1
    CC_DIGIT = 2
    CC_SYMBOL = 3
    CC_COMMENT = 4
    CC_PREFIX = 5

    def getCatCode(ch):
        if (ch <= ' ') or (ch == '|'):
            return CC_WHITESPACE
        if (ch >= 'A' and ch <= 'Z') or \
           (ch >= 'a' and ch <= 'z') or (ch == '_'):
            return CC_LETTER
        if (ch >= '0' and ch <= '9') or (ch == '.'):
            return CC_DIGIT
        if ch == ';':
            return CC_COMMENT
        if ch in ['"', ':']:
            return CC_PREFIX
        return CC_SYMBOL

    def skipComment(start):
        pos = start
        while (pos < len(script)) and (script[pos] not in ['\n', '\r']):
            pos += 1
        return pos

    def getNextToken(start):
        if start >= len(script):
            return ("", start)
        cc = getCatCode(script[start])
        if cc in [CC_LETTER, CC_DIGIT, CC_PREFIX]:
            pos = start+1
            while (pos < len(script)) and \
                  (getCatCode(script[pos]) in [CC_LETTER, CC_DIGIT]):
                pos += 1
            return (script[start: pos], pos)
        if cc == CC_COMMENT:
            return getNextToken(skipComment(start))
        if cc == CC_WHITESPACE:
            return getNextToken(start+1)
        return (script[start], start+1)

    pos = 0
    while pos < len(script):
        (tkn, pos) = getNextToken(pos)
        yield tkn
    if pos < len(script):
        yield getNextToken(pos)

def makeTokenList(script):
    """
    Splits the given LOGO script into tokens. The tokens in the
    list can be strings or lists themselves.
    """
    listStack = []
    curList = []
    for tkn in tokenize(script):
        if tkn != "":
            if tkn == '[':
                listStack.append(curList)
                curList = []
            elif tkn == ']':
                subList = curList
                curList = listStack.pop()
                curList.append(subList)
            elif type(tkn) == str:
                curList.append(tkn.upper())
            else:
                curList.append(tkn)
    return curList

### END SCANNER ###

### BEGIN PARSER ###

def parse(tokenList):
    """
    Requires a list with the tokens from a Logo script and returns
    an abstract syntax tree representing the tokens in the list.
    """
    ast, _ = parseBody(tokenList)
    return ast

def parseBody(tokenList):
    result = []
    while (tokenList != []) and (tokenList[0] != "END"):
        expr, tokenList = getExpr(tokenList)
        result.append(expr)
    return result, tokenList

def getExpr(tokenList):
    if tokenList != []:
        if tokenList[0] == "TO":
            name = tokenList[1]
            rest = tokenList[2:]
            args = []
            while (rest != []) and isArgument(rest[0]):
                args.append(rest[0][1:])
                rest = rest[1:]
            code, rest = parseBody(rest)
            if rest[0] != "END":
                print "ERROR: END EXPECTED!"
            defineFunction(name, len(args))
            return ["TO", name, args, code], rest[1:]
        elif tokenList[0] == "MAKE":
            name = tokenList[1]
            if name[0] == '"':
                name = name[1:]
            value, rest = getExpr(tokenList[2:])
            return ["MAKE", name, value], rest
        elif tokenList[0] in ["PRINT", "PR"]:
            return ["PRINT", tokenList[1]], tokenList[2:]
        else:
            term1, rest = getTerm(tokenList)
            result = ['+', term1]
            while (rest != []) and (rest[0] in ['+', '-']):
                term, rest = getTerm(rest)
                result.append(term)
            if (len(result) == 2):
                return term1, rest
            else:
                return result, rest
    else:
        return "", []

def getTerm(tokenList):
    if tokenList != []:
        result, rest = getFactor(tokenList)
        while (rest != []) and (rest[0] in ['*', '/']):
            op = rest[0]
            factor, rest = getFactor(rest[1:])
            result = [op, result, factor]
        return result, rest
    else:
        return "", []

def getFactor(tokenList):
    if tokenList != []:
        tkn = tokenList[0]
        if type(tkn) == list:
            return parse(tkn), tokenList[1:]
        elif isFunction(tkn):
            result = [tkn]
            argCount = getArgCount(tkn)
            rest = tokenList[1:]
            while argCount > 0:
                arg, rest = getExpr(rest)
                result.append(arg)
                argCount -= 1
            return result, rest
        elif tkn == '+':
            factor, rest = getFactor(tokenList[1:])
            return factor, rest
        elif tkn == '-':
            tkn, rest = getFactor(tokenList[1:])
            return ['-', tkn], rest
        elif tkn == '(':
            tkn, rest = getExpr(tokenList[1:])
            if rest[0] != ')':
                print "ERROR: MISSING CLOSING PARENTHESIS"
            return tkn, rest[1:]
        elif tkn == "IF" or tkn == "WHILE":
            comp, rest = getExpr(tokenList[1:])
            if (rest != []) and \
               (rest[0] in ['=', '>', '<', '>=', '<=', '!=']):
                op = rest[0]
                expr2, rest = getExpr(rest[1:])
                comp = [op, comp, expr2]
            code, rest = getExpr(rest)
            result = [tkn, comp, code]
            if (tkn == "IF") and (rest != []) and \
               (type(rest[0]) == list):
                altCode, rest = getExpr(rest)
                result.append(altCode)
            return result, rest
        elif tkn[0] == ':':
            return tkn[1:], tokenList[1:]
        else:
            return tkn, tokenList[1:]
    else:
        return "", []
        
### END PARSER ###

### BEGIN CODE GENERATOR ###

_rep_counter = 0

def generateCode(ast, indentation = 0):
    """
    The (first) argument must be an abstract syntax tree repesenting
    the Logo program. The optional second argument gives the indentation
    which is required in Python to denote code blocks.
    
    The functions returns an executable Python-script as string.
    """
    global _rep_counter
    if type(ast) == list:
        astHead = ast[0]
        if type(astHead) == list:
            result = ""
            for item in ast:
                result += generateCode(item, indentation) + '\n'
            return result[:-1]
        elif astHead == "TO":
            name = ast[1]
            args = ", ".join(ast[2])
            body = generateCode(ast[3], indentation + 1)
            result = ('\t' * indentation) + \
                     ("def %s(%s):\n" % (name, args)) + \
                     body
            return result
        elif astHead == "MAKE":
            dest = ast[1]
            src = generateCode(ast[2])
            return ('\t' * indentation) + dest + " = " + src
        elif astHead in ["IF", "WHILE"]:
            keyword = astHead.lower()
            comp = generateCode(ast[1])
            body1 = generateCode(ast[2], indentation + 1)
            if len(ast) > 3:
                body2 = '\n' + ('\t' * indentation) + "else:\n" + \
                        generateCode(ast[3], indentation + 1)
            else:
                body2 = ""
            result = ('\t' * indentation) + \
                     ("%s %s:\n" % (keyword, comp)) + \
                     body1 + \
                     body2
            return result
        elif astHead == "REPEAT":
            body = generateCode(ast[2], indentation + 1)
            _rep_counter += 1
            result = ('\t' * indentation) + \
                 ("for _i%d in xrange(int(%s)):\n" % (_rep_counter, ast[1])) + \
                 body
            return result
        elif astHead == "BREAK":
            return ('\t' * indentation) + "break"
        elif astHead == "PRINT":
            def makePrintArg(arg):
                if arg[0] == ':':
                    return arg[1:]
                elif '"' in arg:
                    return repr(arg)
                else:
                    return '"' + arg + '"'

            if type(ast[1]) == list:
                arg = ", ".join([makePrintArg(x) for x in ast[1]])
            else:
                arg = makePrintArg(ast[1])
            return ('\t' * indentation) + "print " + arg
        elif astHead in ['=', '<', '>', '<=', '>=', '!=']:
            leftSide = generateCode(ast[1])
            rightSide = generateCode(ast[2])
            if astHead == '=':
                return leftSide + " == " + rightSide
            else:
                return leftSide + ' ' + astHead + ' ' + rightSide
        elif astHead == '+':
            result = ""
            ast = ast[1:]
            for item in ast:
                if (type(item) == list) and (item[0] == '-'):
                    term = generateCode(item[1], 0)
                    result += '-' + term
                else:
                    term = generateCode(item, 0)
                    result += '+' + term
            return ('\t' * indentation) + '(' + result[1:] + ')'
        elif astHead == '-':
            if type(ast[1]) == list:
                result = '(' + generateCode(ast[1]) + ')'
            else:
                result = generateCode(ast[1])
            result = ('\t' * indentation) + '-' + result
            return result
        elif astHead == '*':
            result = "*".join([generateCode(x) for x in ast[1:]])
            return ('\t' * indentation) + result
        elif astHead == '/':
            result = ('\t' * indentation) + generateCode(ast[1]) + \
                     '/' + generateCode(ast[2])
            return result
        elif isFunction(astHead):
            name = getFunctionName(astHead)
            args = ", ".join([generateCode(x) for x in ast[1:]])
            return ('\t' * indentation) + name + '(' + args + ')'
        else:
            args = ", ".join([generateCode(x) for x in ast])
            return ('\t' * indentation) + '[' + args + ']'
    else:
        return ast

### END CODE GENERATOR ###

### BEGIN EVAL LOGO ###

def compileLogo(script):
    """
    Compile the given Logo script and return the corresponding
    Python code.
    """
    tokenList = makeTokenList(script)
    ast = parse(tokenList)
    code = generateCode(ast)
    return (PYTHON_INIT + code).replace('\t', '    ')

### END EVAL LOGO ###

exec compileLogo(PROGRAM)