##########IMPORT START############
_files = {}
_sourcefolder = None
_sourcefile = None

def set_sourcepath(path):
    """
    Set the path of the source file which is currently being assembled.
    """
    global _sourcefolder, _sourcefile
    _sourcefile = path
    bs = path.rfind('/')
    _sourcefolder = path[:bs+1]

def get_sourcepath():
    """
    Return the path of the source file which is currently being assembled.
    """
    return _sourcefile

def change_sourcepath(path):
    """
    Change the path of the source file which is currently being assembled.
    path is taken relative to the current sourcepath.
    """
    global _sourcefolder, _sourcefile
    abspath = _abspath(_sourcefolder, path)
    _sourcefile = abspath
    bs = abspath.rfind('/')
    _sourcefolder = abspath[:bs+1]

def _abspath(curpath, path):
    """
    Assuming current directory is curpath, resolve path as far as possible.
    """
    if path.startswith('./'):
        npath = path[2:]
        return _abspath(curpath, npath)
    elif not curpath:
        return path
    elif path.startswith('../'):
        parpath = curpath[:curpath.rfind('/', 0, -1)+1]
        npath = path[3:]
        return _abspath(parpath, npath)
    elif path.startswith('/'):
        return path
    else:
        return curpath + path

def add_file(path):
    """
    Read the file at path, return (size, abspath) or if it fails return (-1, abspath).
    set_sourcepath needs to be called at some point before using this function.
    If the same file has been added before, it is not read again, and the same size is returned.
    """
    abspath = _abspath(_sourcefolder, path)
    if abspath in _files:
        return len(_files[abspath]), abspath
    try:
        f = open(abspath, 'rb')
        s = f.read()
        f.close()
        size = len(s)
        _files[abspath] = s
    except Exception as e:
        print('Caught exception in add_file(): %s' % (str(e),))
        size = -1
    return size, abspath

def filecontents(path):
    """
    Return the contents of file at path, or None if file is not in 
    add_file needs to be called on the same file before using this.
    """
    abspath = _abspath(_sourcefolder, path)
    return _files.get(abspath)

def filesize(path):
    """
    Return the size of file at path, or -1 if file is not in 
    add_file needs to be called on the same file before using this.
    """
    abspath = _abspath(_sourcefolder, path)
    try:
        return len(_files[abspath])
    except Exception as e:
        print('Caught exception in filesize(): %s' % (str(e),))
    return -1

def get_sourcecode():
    """
    Return the source code in the file last passed to set_sourcepath, or None if file cannot be read.
    """
    if _sourcefile:
        try:
            with open(_sourcefile, 'r') as f:
                s = f.read()
            return s.split('\n')
        except Exception as e:
            print('Caught exception in get_sourcecode(): %s' % (str(e),))
    return None
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############
def reverse(b):  # needed because no [::-1] available in upy
    """Return the reversed bytearray b."""
    tmp = []
    for c in b:
        tmp.insert(0, c)
    return bytearray(tmp)


def int_to_signrotimv(n):
    """
    Return a tuple (sign, rot, imv) such that sign*ROR(imv,2*rot) == n,
    and such that sign = +-1, 0<=rot<=15, 0<=imv<=255, or None if that is impossible.
    """
    for i in range(0, 32, 2):
        if rotateleft32(n, i) < 256:  # -> ans ror i = n with ans 8bit and i 4bit
            sign = 1
            rot = i//2
            imv = rotateleft32(n, i)
            return (sign, rot, imv)
        if rotateleft32(-n, i) < 256:
            sign = -1
            rot = i//2
            imv = rotateleft32(-n, i)
            return (sign, rot, imv)
    return None


def _check_aropexpr(s):
    """
    Return '' if s specifies an aropexpr, otherwise nonempty error string,
    where an aropexpr is an expression of the form arop expr,
    where arop is + or - and expr is num or num aropexpr, where is_valid_imval('#'+num) returns True.
    """
    s = s.strip()
    if len(s) == 0:
        return 'invalid epxression: expected nonempty string'
    if s[0] not in ('+', '-'):
        return 'invalid expression: expected "+" or "-"'
    s = s[1:]
    i = len(s)
    if '+' in s:
        i = s.find('+')
    if '-' in s:
        i = min(i, s.find('-'))
    num = s[:i].strip()
    rest = s[i:].strip()
    if not is_valid_imval('#'+num):
        return 'invalid expression: expected numeric immediate value'
    if len(rest) == 0:
        return ''
    return _check_aropexpr(rest)


def check_pcrelative_expression(s, labeldict):
    """Return '' if s specifies a pc relative expression, otherwise nonempty error string."""
    for i in range(len(s)):
        if (not isalnum(s[i])) and s[i] != '_':
            label = s[:i]
            rest = s[i:]
            break
    else:
        label = s
        rest = ''
    label.strip()
    rest.strip()
    if label not in labeldict:
        return 'invalid pc relative expression: undefined label'
    if len(rest) == 0:
        return ''
    return _check_aropexpr(rest)


def _aropexpr_to_int(s):
    """
    s must be a valid aropexpr.
    Return the integer that the expression evaluates to.
    """
    s = s.strip()
    sign = 1
    if s[0] == '-':
        sign = -1
    s = s[1:]
    i = len(s)
    if '+' in s:
        i = s.find('+')
    if '-' in s:
        i = min(i, s.find('-'))
    num = s[:i].strip()
    rest = s[i:].strip()
    numint = imval_to_int('#'+num)
    if len(rest) == 0:
        return sign*numint
    return sign*numint + _aropexpr_to_int(rest)


def pcrelative_expression_to_int(s, address, labeldict):
    """
    s must be a valid pc relative expression.
    Return the offset that the expression evaluates to (with correction for PC==address+8).
    """
    for i in range(len(s)):
        if (not isalnum(s[i])) and s[i] != '_':
            label = s[:i]
            rest = s[i:]
            break
    else:
        label = s
        rest = ''
    label.strip()
    rest.strip()
    offset = labeldict[label] - (address + 8)
    if len(rest) == 0:
        return offset
    return offset + _aropexpr_to_int(rest)


def is_coprocreg(s):
    """Return True if s specifies a coprocessor register, False otherwise."""
    coprocreglist = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15']
    return s.lower() in coprocreglist


def is_coproc(s):
    """Return True if s specifies a coprocessor, False otherwise."""
    coproclist = ['P0', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15']
    return s.upper() in coproclist


def encode_imval(s):
    """
    s must be a syntactically valid, encodable immediate value.
    Return the imval encoded as in psrtrans/dataproc.
    """
    imval = imval_to_int(s)
    op2field = 0
    for i in range(0, 32, 2):  # range is [0, 2, 4, ..., 30]
        if rotateleft32(imval, i) < 256:  # -> ans ror i = const with ans 8bit and i 4bit
            op2field = (i//2 << 8) | rotateleft32(imval, i)
            break
    return op2field


def is_expressable_imval(s):
    """
    s must be a syntactically valid immediate value.
    Return True if s can be expressed as an 8 bit imval and 4 bit shift (like in psrtrans or dataproc), False otherwise.
    """
    const = imval_to_int(s)
    for i in range(0, 32, 2):  # range is [0, 2, 4, ..., 30]
        if rotateleft32(const, i) < 256:  # -> ans ror i = const with ans 8bit and i 4bit
            return True
    return False


def is_psr(s):
    """Returns True if s is the name of a cpsr or spsr, False otherwise."""
    psrlist = ['CPSR', 'SPSR', 'CPSR_ALL', 'SPSR_ALL', 'SPSR_FLG', 'CPSR_FLG']
    return s.upper() in psrlist


def encode_32bit(l):
    """
    Encode an instruction (32 bit only) using l to determine contents and positions.
    l must be a list of tuples of 3 integers: (offset, length, value). LSB has offset 0.
    Attention: does not change endianness.
    Return the encoded instruction as a bytearray object.
    """
    word = 0
    for e in l:
        word = word | ((e[2] & ((1 << e[1])-1)) << e[0])
    return bytearray([(word >> 24) & 0xFF, (word >> 16) & 0xFF, (word >> 8) & 0xFF, word & 0xFF])


def encode_16bit(l):
    """
    Encode a 16bit value using l to determine contents and positions.
    l must be a list of tuples of 3 integers: (offset, length, value). LSB has offset 0.
    Attention: does not change endianness.
    Return the encoded instruction as a bytearray object.
    """
    word = 0
    for e in l:
        word = word | ((e[2] & ((1 << e[1])-1)) << e[0])
    return bytearray([(word >> 8) & 0xFF, word & 0xFF])


def is_valid_numeric_literal(s):
    """Return True if '#'+s is a valid immediate value, False otherwise."""
    return is_valid_imval('#'+s)


def numeric_literal_to_int(s):
    """
    Return value of the numeric literal s.
    s must be a syntactically valid numeric literal, or the result is meaningless.
    """
    return imval_to_int('#'+s)


def bigendian_to_littleendian(b):
    """
    Convert bytearray object b from big endian to little endian.
    len(b)%4 must be 0.
    """
    outstr = bytearray()
    if len(b) % 4:
        return bytearray()
    for i in range(0, len(b), 4):
        r = reverse(b[i:i+4])
        for c in r:
            outstr.append(c)
    return outstr


def bigendian_to_littleendian_16bit(b):
    """
    Convert bytearray object b from big endian to little endian,
    assuming it is an array of 16bit values.
    len(b)%2 must be 0.
    """
    outstr = bytearray()
    if len(b) % 2:
        return bytearray()
    for i in range(0, len(b), 2):
        r = reverse(b[i:i+2])
        for c in r:
            outstr.append(c)
    return outstr


def rotateleft32(n, r):
    """Return n % (2**32) rotated left by r bits, using a word size of 32 bits."""
    n &= 0xFFFFFFFF
    r %= 32
    n <<= r
    carry = (n & (0xFFFFFFFF << 32)) >> 32
    n &= 0xFFFFFFFF
    n += carry
    return n


def isalnum(s):
    """Return True if s contains at least one char and only alphanumeric chars (0...9A...Za...z), False otherwise."""
    if len(s) == 0:
        return False
    for c in s:
        if c not in '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz':
            return False
    return True


def isxdigit(s):
    """Return True if s contains at least one char and only xdigits (0...9A...Fa...f), False otherwise."""
    if len(s) == 0:
        return False
    for c in s:
        if c not in '0123456789ABCDEFabcdef':
            return False
    return True


def isoctdigit(s):
    """Return True if s contains at least one char and only digits 0...7, False otherwise."""
    if len(s) == 0:
        return False
    for c in s:
        if c not in '01234567':
            return False
    return True


def isbindigit(s):
    """Return True if s contains at least one char and only digits 0 and 1, False otherwise."""
    if len(s) == 0:
        return False
    for c in s:
        if c not in '01':
            return False
    return True


def is_shiftname(s):
    """Return True if s is a shiftname (excluding RRX), False otherwise."""
    shiftnamelist = ['ASL', 'LSL', 'LSR', 'ASR', 'ROR']
    return s.upper() in shiftnamelist


def is_valid_imval(s):
    """Return True if s is a syntactically valid immediate value, False otherwise."""
    if len(s) < 2:
        return False
    if s[0] != '#':
        return False
    if s[1] in ['-', '+'] and len(s) >= 3:
        s = s[0]+s[2:]
    if s == '#0':
        return True
    if s.startswith('#\'') and s[-1] == '\'' and len(s) == 4 and ord(s[2]) <= 255:
        return True
    if s.startswith('#0x') and len(s) >= 4 and isxdigit(s[3:]):
        return True
    if s.startswith('#0') and len(s) >= 3 and isoctdigit(s[2:]):
        return True
    if s.startswith('#0b') and len(s) >= 4 and isbindigit(s[3:]):
        return True
    if s[1] != '0' and str.isdigit(s[1:]):
        return True
    return False


def imval_to_int(s):
    """
    Return value of the immediate value s.
    s must be a syntactically valid immediate value, or the result is meaningless.
    """
    sign = 1
    if s[1] == '-':
        sign = -1
        s = s[0]+s[2:]
    elif s[1] == '+':
        s = s[0]+s[2:]
    if s == '#0':
        val = 0
    elif s.startswith('#\'') and s[-1] == '\'' and len(s) == 4:
        val = ord(s[2])
    elif s.startswith('#0x'):
        val = int(s[3:], 16)
    elif s.startswith('#0b'):
        val = int(s[3:], 2)
    elif s.startswith('#0'):
        val = int(s[2:], 8)
    else:
        val = int(s[1:])
    return sign*val


def is_valid_label(s):
    """
    Return True if s is a syntactically valid label, False otherwise.
    Rules: must start with an alphabetic character, must only contain alphanumeric characters or underscores.
    """
    if not s[0].isalpha():
        return False
    for c in s:
        if isalnum(c) or c == '_':
            continue
        return False
    return True


def is_private_label(s):
    """
    Return True if s is a reserved label name, False otherwise.
    Currently, only assembly "keywords" are reserved
    """
    if is_directive(s) or is_opname(s) or is_reg(s) or is_otherkeyword(s):
        return True
    return False


def is_otherkeyword(s):
    """Return True if s is another keyword than an opname, directive or reg, False otherwise."""
    if s.upper() in ['LSL', 'LSR', 'ASL', 'ASR', 'ROR', 'RRX']:
        return True
    if s.upper() in ['P0', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15']:
        return True
    if s.upper() in ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15']:
        return True
    if s.upper() in ['CPSR', 'SPSR', 'CPSR_ALL', 'SPSR_ALL', 'SPSR_FLG', 'CPSR_FLG']:
        return True
    return False


def get_reg_num(s):
    """Return the register number of the register with name s, or -1 if no such register exists."""
    regdict = {'R0': 0, 'R1': 1, 'R2': 2, 'R3': 3, 'R4': 4, 'R5': 5, 'R6': 6, 'R7': 7, 'R8': 8, 'R9': 9, 'R10': 10,
               'R11': 11, 'R12': 12, 'R13': 13, 'SP': 13, 'R14': 14, 'LR': 14, 'R15': 15, 'PC': 15}
    s = s.upper()
    if s in regdict:
        return regdict[s]
    return -1


def is_reg(s):
    """Return True if s names a register, False otherwise."""
    return get_reg_num(s) != -1


def get_condcode_value(s):
    """Return the number corresponding to s or -1 if s is not a valid condition code.s"""
    condcodedict = {'EQ': 0, 'NE': 1, 'HS': 2, 'CS': 2, 'LO': 3, 'CC': 3, 'MI': 4, 'PL': 5, 'VS': 6, 'VC': 7, 'HI': 8,
                    'LS': 9, 'GE': 10, 'LT': 11, 'GT': 12, 'LE': 13, 'AL': 14}
    if s.upper() in condcodedict:
        return condcodedict[s.upper()]
    return -1


def is_condcode(s):
    """Return True if s is a valid condcode, False otherwise."""
    return get_condcode_value(s) != -1


def is_preasm_directive(s):
    predirlist = ['GET', 'INCLUDE']
    return s.upper() in predirlist


def is_directive(s):
    """Return True if s is an (implemented) directive, False otherwise."""
    directivelist = ['DCD', 'DCDU', 'DCW', 'DCWU', 'ALIGN', 'DCB', 'INCBIN']
    return s.upper() in directivelist


def is_dataproc_fullop(s):
    fulloplist = ['ADC', 'ADD', 'RSB', 'RSC', 'SBC', 'SUB', 'AND', 'BIC', 'EOR', 'ORR']
    return s.upper() in fulloplist


def is_dataproc_testop(s):
    testoplist = ['CMP', 'CMN', 'TEQ', 'TST']
    return s.upper() in testoplist


def is_dataproc_movop(s):
    movoplist = ['MOV', 'MVN']
    return s.upper() in movoplist


def get_dataprocop_num(s):
    dataprocopdict = {'ADC': 5, 'ADD': 4, 'RSB': 3, 'RSC': 7, 'SBC': 6, 'SUB': 2, 'AND': 0, 'BIC': 14,
                      'EOR': 1, 'ORR': 12, 'CMP': 10, 'CMN': 11, 'TEQ': 9, 'TST': 8, 'MOV': 13, 'MVN': 15}
    if s.upper() in dataprocopdict:
        return dataprocopdict[s.upper()]
    return -1


def is_dataprocop(s):
    dataprocoplist = ['ADC', 'ADD', 'RSB', 'RSC', 'SBC', 'SUB', 'AND', 'BIC', 'EOR', 'ORR', 'CMP', 'CMN', 'TEQ', 'TST', 'MOV', 'MVN',
                      'ADCS', 'ADDS', 'RSBS', 'RSCS', 'SBCS', 'SUBS', 'ANDS', 'BICS', 'EORS', 'ORRS', 'MOVS', 'MVNS']
    return s.upper() in dataprocoplist


def is_branchop(s):
    branchoplist = ['BX', 'B', 'BL']
    return s.upper() in branchoplist


def is_psrtransop(s):
    psrtransoplist = ['MSR', 'MRS']
    return s.upper() in psrtransoplist


def is_mulop(s):
    muloplist = ['MUL', 'MLA', 'MULS', 'MLAS']
    return s.upper() in muloplist


def is_longmulop(s):
    longmuloplist = ['UMULL', 'SMULL', 'UMLAL', 'SMLAL', 'UMULLS', 'SMULLS', 'UMLALS', 'SMLALS']
    return s.upper() in longmuloplist


def is_swiop(s):
    swioplist = ['SWI', 'SVC']
    return s.upper() in swioplist


def is_singledatatransop(s):
    singledatatransoplist = ['LDR', 'STR', 'LDRB', 'STRB', 'LDRT', 'STRT', 'LDRBT', 'STRBT']
    return s.upper() in singledatatransoplist


def is_halfsigneddatatransop(s):
    halfsigneddatatransoplist = ['LDRH', 'LDRSH', 'LDRSB', 'STRH']
    return s.upper() in halfsigneddatatransoplist


def is_swapop(s):
    swapoplist = ['SWP', 'SWPB']
    return s.upper() in swapoplist


def is_blockdatatransop(s):
    blockdatatransoplist = ['LDMFD', 'LDMED', 'LDMFA', 'LDMEA', 'LDMIA', 'LDMIB', 'LDMDA', 'LDMDB',
                            'STMFD', 'STMED', 'STMFA', 'STMEA', 'STMIA', 'STMIB', 'STMDA', 'STMDB']
    return s.upper() in blockdatatransoplist


def is_coprocregtransop(s):
    coprocregtransoplist = ['MRC', 'MCR']
    return s.upper() in coprocregtransoplist


def is_pseudoinstructionop(s):
    pseudoinstructionoplist = ['ADR']
    return s.upper() in pseudoinstructionoplist


def is_miscarithmeticop(s):
    miscarithmeticoplist = ['CLZ']
    return s.upper() in miscarithmeticoplist


def is_opname(s):
    """Return True if s is a valid operation or directive name (full name, i.e. with flags!), False otherwise."""
    return is_preasm_directive(s) or is_directive(s) or is_dataprocop(s) or is_branchop(s) or is_psrtransop(s)\
        or is_mulop(s) or is_longmulop(s) or is_swiop(s) or is_singledatatransop(s) or is_halfsigneddatatransop(s)\
        or is_swapop(s) or is_blockdatatransop(s) or is_coprocregtransop(s) or is_pseudoinstructionop(s)\
        or is_miscarithmeticop(s)


def is_conditionable(s):
    """Check if s can be conditionally executed. Return True if yes, False if no."""
    return not (is_preasm_directive(s) or is_directive(s)) and is_opname(s)


def is_pseudoinstruction(opname, operands):
    """Check if opname operands is a pseudoinstruction. Return True if yes, False if no."""
    return is_pseudoinstructionop(opname)
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def check_coprocregtransop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) != 5 and len(operands) != 6:
        return 'Invalid number of operands. Expected 5 or 6, got %i' % len(operands)
    if not is_coproc(operands[0]):
        return 'Expected coprocessor (e.g. p15)'
    if not is_valid_numeric_literal(operands[1]):
        return 'Expected numeric literal'
    if not 0 <= numeric_literal_to_int(operands[1]) <= 7:
        return 'Must be in range 0 to 7'
    if not is_reg(operands[2]):
        return 'Expected register'
    if not is_coprocreg(operands[3]) or not is_coprocreg(operands[4]):
        return 'Expected coprocessor register (e.g. c0)'
    if len(operands) == 5:
        return ''
    if not is_valid_numeric_literal(operands[5]):
        return 'Expected numeric literal'
    if not 0 <= numeric_literal_to_int(operands[5]) <= 7:
        return 'Must be in range 0 to 7'
    return ''


def encode_coprocregtransop(name, condcode, operands):
    """
    check_coprocregtransop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = [x.strip() for x in operands.split(',')]
    cpnum = int(operands[0][1:])
    cpopc = numeric_literal_to_int(operands[1])
    rd = get_reg_num(operands[2])
    crn = int(operands[3][1:])
    crm = int(operands[4][1:])
    if len(operands) == 6:
        cp = numeric_literal_to_int(operands[5])
    else:
        cp = 0
    lflag = (name == 'MRC')
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (24, 4, 0xE), (21, 3, cpopc),
                                    (20, 1, lflag), (16, 4, crn), (12, 4, rd),
                                    (8, 4, cpnum), (5, 3, cp), (4, 1, 0x1), (0, 4, crm)])
    return bigendian_to_littleendian(encoded)
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def is_valid_addresspart(hsflag, addresspart, tflag, address, labeldict):
    """
    hsflag = True -> selects syntax checking for halfword or signed data transfer instructions, False selects normal unsigned word/byte transfer syntax.
    Address must be the address of this instruction.
    Return empty string if addresspart is valid according to the syntax rules for datatransfer address part, error string otherwise.
    """
    if len(addresspart) < 1:
        return 'Address part is missing'
    if addresspart[0] != '[':  # must be an expression (label) if not starting with a bracket
        if addresspart not in labeldict:
            return 'Expected bracket or label'
        offset = labeldict[addresspart] - address - 8
        addresspart = '[PC, #'+str(offset)+']'  # range check done below
    writeback = False
    if addresspart[-1] == '!':
        writeback = True
        addresspart = addresspart[:-1].strip()  # strip the trailing !
    if addresspart[-1] == ']':
        preindexed = True
        addresspart = addresspart[:-1].strip()  # strip the trailing ]
    else:
        if writeback:
            return '! is only allowed for preindexed addressing'
        preindexed = False
    addresspart = addresspart[1:].strip()  # strip the leading [
    addresspart = [x.strip() for x in addresspart.split(',')]
    if len(addresspart) < 1 or len(addresspart) > 3 or (hsflag and len(addresspart) > 2):
        return 'Invalid addresspart'
    if not preindexed:
        if addresspart[0][-1:] != ']':
            return 'Expected closing ]'
        addresspart[0] = addresspart[0][:-1].strip()  # strip the trailing ]
    # there should be no syntax differences between pre- and post-indexing left
    if not is_reg(addresspart[0]):
        return 'Expected register as base'
    if writeback and get_reg_num(addresspart[0]) == 15:
        return 'Write-back should not be used when PC is the base register'
    if preindexed and tflag:
        return 'T-flag is not allowed when pre-indexing is used'
    if len(addresspart) == 1:
        return ''
    if is_valid_imval(addresspart[1]):
        n = imval_to_int(addresspart[1])
        if hsflag:
            limit = 2**8-1
        else:
            limit = 2**12-1
        if n > limit:
            return 'Offset too high (max. %i)' % (limit)
        if n < -limit:
            return 'Offset too low (min. %i)' % (-limit)
        if len(addresspart) > 2:
            return 'Too many operands'
        return ''
    else:
        if len(addresspart[1]) < 2:
            return 'Invalid offset'
        if addresspart[1][0] in ['+', '-']:
            addresspart[1] = addresspart[1][1:]
        if not is_reg(addresspart[1]):
            return 'Invalid offset: must be register or immediate value'
        if get_reg_num(addresspart[1]) == 15:
            return 'PC is not allowed as offset'
        if not preindexed and get_reg_num(addresspart[0]) == get_reg_num(addresspart[1]):
            return 'Manual says: post-indexed with Rm = Rn should not be used'
        if len(addresspart) == 2:
            return ''
        if hsflag:
            return 'Expected less operands'
        # addresspart[2] should be a shift:
        if len(addresspart[2]) < 3:
            return 'Invalid shift expression'
        shift = addresspart[2]
        if shift.upper() == 'RRX':
            return ''
        shift = shift.split()
        if len(shift) == 1 and '#' in shift[0]:
            shift = shift[0].split('#')
            shift[1] = '#' + shift[1]
        if len(shift) != 2:
            return 'Invalid shift expression'
        if not is_shiftname(shift[0]):
            return 'Invalid shift name'
        if is_reg(shift[1]):
            return 'Register specified shift amount is not allowed in data transfer instructions'
        if not is_valid_imval(shift[1]):
            return 'Invalid shift amount'
        n = imval_to_int(shift[1])
        if n >= 0 and n <= 31:
            return ''
        elif n == 32 and shift[0] not in ['LSR', 'ASR']:
            return 'Shift by 32 is only allowed for LSR'
        return 'Invalid immediate shift amount. Must be 0 <= amount <= 31 (or 32 for special LSR, ASR)'


def check_singledatatransop(flags, operands, address, labeldict):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    tflag = 'T' in flags
    operands = [x.strip() for x in operands.split(',', 1)]
    if len(operands) != 2:
        return 'Expected more operands'
    if not is_reg(operands[0]):
        return 'Expected register'
    err = is_valid_addresspart(False, operands[1], tflag, address, labeldict)
    if len(err) > 0:
        return err
    return ''


def check_halfsigneddatatransop(operands, address, labeldict):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',', 1)]
    if len(operands) != 2:
        return 'Expected more operands'
    if not is_reg(operands[0]):
        return 'Expected register'
    err = is_valid_addresspart(True, operands[1], False, address, labeldict)
    if len(err) > 0:
        return err
    return ''


def parse_datatrans(name, operands, address, labeldict):
    """
    check_singledatatransop or check_halfsigneddatatransop must be called before this.
    Does the stuff common to halfsigned and normal datatrans encoding.
    """
    if operands.count('[') == 0:
        label = operands.split(',')[1].strip()
        offset = labeldict[label] - address - 8
        operands = operands.split(',')[0] + ',[PC, #'+str(offset)+']'
    writeback = (operands[-1] == '!')
    if writeback:
        operands = operands[:-1].strip()
    preindexed = (operands[-1] == ']')
    if preindexed:
        operands = operands[:-1].strip()
    loadflag = (name == 'LDR')
    operands = [x.strip() for x in operands.split(',')]
    operands[1] = operands[1][1:].strip()
    if operands[1][-1] == ']':
        operands[1] = operands[1][:-1].strip()
    rd = get_reg_num(operands[0])
    rn = get_reg_num(operands[1])
    offset = 0
    upflag = True
    iflag = False
    if len(operands) > 2:
        if is_valid_imval(operands[2]):
            iflag = False  # !!!
            offset = imval_to_int(operands[2])
            upflag = (offset >= 0)
            offset = abs(offset)
        else:
            iflag = True
            upflag = True
            if operands[2][0] == '-':
                upflag = False
                operands[2] = operands[2][1:]
            elif operands[2][0] == '+':
                operands[2] = operands[2][1:]
            rm = get_reg_num(operands[2])
            shiftfield = 0
            if len(operands) == 4:
                shift = [x.strip() for x in operands[3].split()]
                if len(shift) == 1:  # RRX
                    shifttype = 'ROR'
                    shiftby = 0
                else:
                    shifttype = shift[0]
                    shiftby = imval_to_int(shift[1])
                    if shiftby == 0:
                        shifttype = 'LSL'
                    if shifttype.upper() in ['LSR', 'ASR'] and shiftby == 32:
                        shiftby = 0
                shiftfield = (shiftby << 3) | {'LSL': 0, 'ASL': 0, 'LSR': 1, 'ASR': 2, 'ROR': 3}[shifttype.upper()] << 1
            offset = (shiftfield << 4) | rm
    return (writeback, preindexed, loadflag, upflag, iflag, rd, rn, offset)


def encode_singledatatransop(name, flags, condcode, operands, address, labeldict):
    """
    check_singledatatransop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    (writeback, preindexed, loadflag, upflag, iflag, rd, rn, offset) = parse_datatrans(name, operands, address, labeldict)
    if 'T' in flags:
        writeback = True
    byteflag = ('B' in flags)
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (26, 2, 0x1), (25, 1, iflag),
                                    (24, 1, preindexed), (23, 1, upflag), (22, 1, byteflag),
                                    (21, 1, writeback), (20, 1, loadflag), (16, 4, rn),
                                    (12, 4, rd), (0, 12, offset)])
    return bigendian_to_littleendian(encoded)


def encode_halfsigneddatatransop(name, flags, condcode, operands, address, labeldict):
    """
    check_halfsigneddatatransop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    (writeback, preindexed, loadflag, upflag, iflag, rd, rn, offset) = parse_datatrans(name, operands, address, labeldict)
    assert not (offset & 0xF00)  # either iflag and only lowest 4 bit used or not iflag and only lowest 8 bit used
    assert (not iflag) or not (offset & 0xFF0)
    hflag = ('H' in flags)
    sflag = ('S' in flags)
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (24, 1, preindexed), (23, 1, upflag),
                                    (22, 1, not iflag), (21, 1, writeback), (20, 1, loadflag),
                                    (16, 4, rn), (12, 4, rd), (8, 4, offset >> 4), (7, 1, 0x1),
                                    (6, 1, sflag), (5, 1, hflag), (4, 1, 0x1), (0, 4, offset)])
    return bigendian_to_littleendian(encoded)


def check_swapop(operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) != 3:
        return 'Expected 3 operands, got %i' % (len(operands))
    if len(operands[2]) < 4:
        return 'Invalid syntax'
    if operands[2][0] != '[' or operands[2][-1] != ']':
        return 'Missing brackets around third operand of swap instruction'
    operands[2] = operands[2][1:-1].strip()
    for op in operands:
        if not is_reg(op):
            return 'Only registers are allowed here'
    for op in operands:
        if get_reg_num(op) == 15:
            return 'PC is not allowed here'
    return ''


def encode_swapop(name, flags, condcode, operands):
    """
    check_swapop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = [x.strip() for x in operands.split(',')]
    operands[2] = operands[2][1:-1].strip()
    operands = [get_reg_num(x) for x in operands]
    byteflag = (flags == 'B')
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (23, 5, 0x2), (22, 1, byteflag),
                                    (16, 4, operands[2]), (12, 4, operands[0]),
                                    (4, 4, 0x9), (0, 4, operands[1])])
    return bigendian_to_littleendian(encoded)
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def check_mulop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if name == 'MUL' and len(operands) != 3:
        return 'Expected 3 operands, got %i' % len(operands)
    if name == 'MLA' and len(operands) != 4:
        return 'Expected 4 operands, got %i' % len(operands)
    for o in operands:
        if not is_reg(o):
            return 'Expected a register'
    operands = [get_reg_num(x) for x in operands]
    if 15 in operands:
        return 'PC is not allowed here'
    if operands[0] == operands[1]:
        return 'Rd must be different from Rm'
    return ''


def encode_mulop(name, flags, condcode, operands):
    """
    check_mulop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = [get_reg_num(x.strip()) for x in operands.split(',')]
    sflag = (flags == 'S')
    (rd, rm, rs) = operands[0:3]
    if name == 'MUL':
        rn = 0
        aflag = False
    else:
        rn = operands[3]
        aflag = True
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (21, 1, aflag), (20, 1, sflag), (16, 4, rd),
                                    (12, 4, rn), (8, 4, rs), (4, 4, 0x9), (0, 4, rm)])
    return bigendian_to_littleendian(encoded)


def check_longmulop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) != 4:
        return 'Expected 4 operands, got %i' % len(operands)
    for o in operands:
        if not is_reg(o):
            return 'Expected a register'
    operands = [get_reg_num(x) for x in operands]
    if 15 in operands:
        return 'PC is not allowed here'
    if operands[0] == operands[1] or operands[0] == operands[2] or operands[1] == operands[2]:
        return 'RdHi, RdLo and Rm must all be different registers'
    return ''


def encode_longmulop(name, flags, condcode, operands):
    """
    check_longmulop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    (rdlo, rdhi, rm, rs) = [get_reg_num(x.strip()) for x in operands.split(',')]
    sflag = (flags == 'S')
    signedflag = (name[0] == 'S')
    aflag = (name[3] == 'A')
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (23, 5, 0x1), (22, 1, signedflag), (21, 1, aflag),
                                    (20, 1, sflag), (16, 4, rdhi), (12, 4, rdlo), (8, 4, rs), (4, 4, 0x9), (0, 4, rm)])
    return bigendian_to_littleendian(encoded)
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def get_size(name, operands, address):
    """
    name must be the name of a directive or instruction in uppercase, operands the operands, address the address where it is.
    Return the length of op (in bytes) (assuming passed address) and store included files using filedict,
    or return -1 on failure (invalid name)
    """
    if not is_opname(name):
        print('DEBUG: TODO remove. not is_opname. "%s"' % (name))
        return -1
    if is_directive(name):
        return get_directive_size(name, operands, address)
    elif is_opname(name):
        return get_instruction_size(name, operands, address)
    else:
        return -1


def get_directive_size(name, operands, address):
    """
    Do not check the syntax or the content, just return how many bytes it will be if it is valid, and store included files using 
    name must be the name of a directive in uppercase, operands the operands, address the address where it is.
    Return -1 on failure (invalid name).
    """
    if name == 'DCD' or name == 'DCDU':
        padding = 0
        if name == 'DCD':
            padding = ((4 - (address % 4)) % 4)
        operands = operands.split(',')
        return padding + 4*len(operands)
    elif name == 'DCW' or name == 'DCWU':
        padding = 0
        if name == 'DCW':
            padding = address % 2
        operands = operands.split(',')
        return padding + 2*len(operands)
    elif name == 'ALIGN':
        operands = operands.split(',')
        alignment = 4
        offset = 0
        if operands[0] != '':
            alignment = imval_to_int('#'+operands[0])
        if len(operands) > 1:
            offset = imval_to_int('#'+operands[1])
        return (alignment - ((address+alignment-offset) % alignment)) % alignment
    elif name == 'DCB':
        size = 0
        operands = operands.split(',')
        for i in operands:
            i = i.strip()
            if i[0] == '"':
                i = i[1:-1]
                size += len(i)
            else:
                size += 1
        return size
    elif name == 'INCBIN':
        size = filesize(operands)
        return size
    else:
        return -1


def get_instruction_size(name, operands, address):
    """
    Do not check the syntax or the content, just return how many bytes it will be if it is valid.
    name must be the name of a directive in uppercase, operands the operands, address the address where it is.
    Return -1 on failure (invalid name).
    """
    return 4
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def check_directive(name, operands):
    """
    Assumes valid name.
    Check the operands.
    Return an error string if invalid, empty string otherwise.
    """
    if name == 'DCD' or name == 'DCDU':
        operands = [x.strip() for x in operands.split(',')]
        if len(operands) == 0:
            return 'Missing operands after DCD: expected at least one immediate value'
        for op in operands:
            if not is_valid_numeric_literal(op):
                return 'Invalid numeric literal'
            i = numeric_literal_to_int(op)
            if i > (2**32)-1:
                return 'Numeric literal outside of 32bit range: greater than 2^32-1'
            if i < -(2**31):
                return 'Numeric literal outside of 32bit range: lower than -2^31'
        return ''
    if name == 'DCW' or name == 'DCWU':
        operands = [x.strip() for x in operands.split(',')]
        if len(operands) == 0:
            return 'Missing operands after DCW: expected at least one immediate value'
        for op in operands:
            if not is_valid_numeric_literal(op):
                return 'Invalid numeric literal'
            i = numeric_literal_to_int(op)
            if i > (2**16)-1:
                return 'Numeric literal outside of 16bit range: greater than 2^16-1'
            if i < -(2**15):
                return 'Numeric literal outside of 32bit range: lower than -2^15'
        return ''
    if name == 'ALIGN':
        operands = [x.strip() for x in operands.split(',')]
        if len(operands) == 0 or len(operands[0]) == 0:  # implicit alignment to 4 bytes
            return ''
        if len(operands) > 2:
            return 'Only two arguments are allowed: alignment, offset'
        if not is_valid_numeric_literal(operands[0]):
            return 'Invalid numeric literal'
        alignment = numeric_literal_to_int(operands[0])
        if alignment == 0 or (alignment & (alignment-1)) != 0:
            return 'Only powers of two are allowed as alignment boundaries'
        if len(operands) == 1:
            return ''
        if not is_valid_numeric_literal(operands[1]):
            return 'Invalid numeric literal'
        return ''
    if name == 'DCB':
        operands = [x.strip() for x in operands.split(',')]
        if len(operands) == 0 or len(operands[0]) == 0:
            return 'Missing operands after DCB: expected at least one numeric or string literal'
        for op in operands:
            if len(op) == 0:
                return 'Unexpected comma'
            if op[0] == '"':
                if len(op) < 3:
                    return 'Invalid string literal: empty'
                if op[-1] != '"':
                    return 'Invalid string literal: not terminated'
                op = op[1:-1]
                for c in op:
                    c = ord(c)
                    if c < 0 or c > 255:
                        return 'Invalid character'
            elif is_valid_numeric_literal(op):
                i = numeric_literal_to_int(op)
                if i < -128:
                    return 'Numeric literal outside of 8bit range: lower than -2^7'
                if i > 255:
                    return 'Numeric literal outside of 8bit range: greater than 2^8-1'
            else:
                return 'Expected numeric or string literal'
        return ''
    if name == 'INCBIN':
        s = filecontents(operands)
        if s is None:
            return 'Could not open file "%s"' % (operands)
        return ''
    return 'Invalid name (failed in check_directive) (report as bug)'


def encode_directive(name, operands, address):
    """
    check_directive must be called before this.
    Address must be the address of the directive.
    Encode the directive and return it as a bytes object.
    """
    if name == 'DCD' or name == 'DCDU':
        operands = [x.strip() for x in operands.split(',')]
        encoded = b''
        if name == 'DCD':
            encoded += b'\x00'*((4 - (address % 4)) % 4)  # align
        for op in operands:
            i = numeric_literal_to_int(op)
            encoded += bigendian_to_littleendian(encode_32bit([(0, 32, i)]))
        return encoded
    if name == 'DCW' or name == 'DCWU':
        operands = [x.strip() for x in operands.split(',')]
        encoded = b''
        if name == 'DCW':
            encoded += b'\x00'*(address % 2)  # align
        for op in operands:
            i = numeric_literal_to_int(op)
            encoded += bigendian_to_littleendian_16bit(encode_16bit([(0, 16, i)]))
        return encoded
    if name == 'ALIGN':
        operands = [x.strip() for x in operands.split(',')]
        if len(operands) == 0 or len(operands[0]) == 0:  # implicit alignment to 4 bytes
            alignment = 4
        else:
            alignment = numeric_literal_to_int(operands[0])
        if len(operands) < 2:
            offset = 0
        else:
            offset = numeric_literal_to_int(operands[1])
        padsize = ((alignment - ((address+alignment-offset) % alignment)) % alignment)
        return padsize*b'\x00'
    if name == 'DCB':
        operands = [x.strip() for x in operands.split(',')]
        encoded = b''
        for op in operands:
            if op[0] == '"':
                op = op[1:-1]
                for c in op:
                    c = ord(c)
                    encoded += bytes([c])
            else:  # is valid numeric literal because has to be checked before
                i = numeric_literal_to_int(op)
                encoded += bytes([i])
        return encoded
    if name == 'INCBIN':
        return filecontents(operands)
    return b''  # should never be reached
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def check_blockdatatransop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',', 1)]
    if len(operands) < 2:
        return 'Too few operands'
    if len(operands[0]) < 2:
        return 'Invalid operand'
    if operands[0][-1] == '!':
        writeback = True
        operands[0] = operands[0][:-1].strip()
    else:
        writeback = False
    if not is_reg(operands[0]):
        return 'Expected register'
    base = get_reg_num(operands[0])
    if base == 15:
        return 'PC is not allowed here'
    if len(operands[1]) < 2:
        return 'Invalid operand'
    if operands[1][-1] == '^':
        sbit = True
        operands[1] = operands[1][:-1].strip()
    else:
        sbit = False
    if operands[1][0] != '{' or operands[1][-1] != '}':
        return 'Missing {} around register list'
    operands[1] = operands[1][1:-1].strip()
    operands[1] = [x.strip() for x in operands[1].split(',')]
    if len(operands[1]) < 1:
        return 'Invalid register list'
    reglist = []
    for op in operands[1]:
        if '-' in op:
            r = [x.strip() for x in op.split('-')]
            if len(r) > 2:
                return 'Invalid syntax'
            if not is_reg(r[0]) or not is_reg(r[1]):
                return 'Expected register'
            start = get_reg_num(r[0])
            end = get_reg_num(r[1])
            if start >= end:
                return 'Registers must be specified in ascending order'
            reglist += list(range(start, end+1))  # upy needs explicit conversion
        else:
            if not is_reg(op):
                return 'Expected register'
            reglist.append(get_reg_num(op))
    for i in range(0, len(reglist)-1):
        if reglist[i] >= reglist[i+1]:
            return 'Registers must be specified in ascending order'
    if sbit and writeback and (name == 'STM' or (name == 'LDM' and 15 not in reglist)):
        return 'Writeback may not be used combined with user bank transfer'
    if writeback and name == 'LDM' and base in reglist:
        return 'Attention: Writeback is useless here because the loaded value will overwrite it'
    return ''


def encode_blockdatatransop(name, flags, condcode, operands):
    """
    check_blockdatatransop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = [x.strip() for x in operands.split(',')]
    if operands[0][-1] == '!':
        writeback = True
        operands[0] = operands[0][:-1].strip()
    else:
        writeback = False
    base = get_reg_num(operands[0])
    if operands[-1][-1] == '^':
        sbit = True
        operands[-1] = operands[-1][:-1].strip()
    else:
        sbit = False
    operands[1] = operands[1][1:].strip()  # strip the curly brackets
    operands[-1] = operands[-1][:-1].strip()
    reglist = []
    for op in operands[1:]:
        if '-' in op:
            (start, end) = [get_reg_num(r.strip()) for r in op.split('-')]
            reglist += list(range(start, end+1))  # upy needs explicit conversion
        else:
            reglist.append(get_reg_num(op))
    regfield = 0
    for r in reglist:
        regfield |= (1 << r)
    lflag = (name == 'LDM')
    addrmodedict = {'ED': (lflag, lflag), 'IB': (1, 1), 'FD': (lflag, not lflag),
                    'IA': (1, 0), 'EA': (not lflag, lflag), 'DB': (0, 1),
                    'FA': (not lflag, not lflag), 'DA': (0, 0)}
    (uflag, pflag) = addrmodedict[flags]
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (25, 3, 0x4), (24, 1, pflag),
                                    (23, 1, uflag), (22, 1, sbit), (21, 1, writeback),
                                    (20, 1, lflag), (16, 4, base), (0, 16, regfield)])
    return bigendian_to_littleendian(encoded)
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############

# branch, psr, swi


def check_branchop(name, operands, address, labeldict):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) != 1:
        return 'Invalid number of operands: expected 1, got %i' % (len(operands))
    operands = operands[0]
    if name == 'BX':
        operands = operands.strip()
        if not is_reg(operands):
            return 'Invalid Operand: Expected register'
        rn = get_reg_num(operands)
        if rn == 15:
            return 'PC not allowed here (causes undefined behaviour)'
        return ''
    else:
        operands = operands.strip()
        err = check_pcrelative_expression(operands, labeldict)
        if len(err) != 0:
            return 'Invalid Operand: Expected pc relative expression (%s)' % (err)
        offset = pcrelative_expression_to_int(operands, address, labeldict)
        if offset % 4 != 0:
            return 'Offset must be aligned to four bytes'
        offset >>= 2
        if offset < -2**23 or offset > 2**23-1:
            return 'Branch target too far away'
        return ''


def encode_branchop(name, condcode, operands, address, labeldict):
    """
    check_branchop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = operands.strip()
    if name == 'BX':
        rn = get_reg_num(operands)
        ccval = get_condcode_value(condcode)
        encoded = encode_32bit([(28, 4, ccval), (4, 24, 0x12FFF1), (0, 4, rn)])
        return bigendian_to_littleendian(encoded)
    else:
        offset = pcrelative_expression_to_int(operands, address, labeldict)
        offset >>= 2
        offset = offset + (offset < 0)*(1 << 24)  # correction for negative offsets
        ccval = get_condcode_value(condcode)
        lflag = (name == 'BL')
        encoded = encode_32bit([(28, 4, ccval), (25, 3, 0x5), (24, 1, lflag), (0, 24, offset)])
        return bigendian_to_littleendian(encoded)


def check_psrtransop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) != 2:
        return 'Invalid number of operands: expected 2, got %i' % (len(operands))
    if name == 'MRS':
        if not is_reg(operands[0]):
            return 'Invalid operand: expected register'
        rd = get_reg_num(operands[0])
        if rd == 15:
            return 'PC is not allowed here'
        if not operands[1].upper() in ['SPSR', 'SPSR_ALL', 'CPSR', 'CPSR_ALL']:
            return 'Invalid operand: expected psr'
        return ''
    else:
        if not is_psr(operands[0]):
            return 'Invalid operand: expected psr'
        if not operands[0].upper().endswith('FLG'):
            if not is_reg(operands[1]):  # immediate is only allowed for PSR_FLG
                return 'Invalid operand: expected register'
            if get_reg_num(operands[1]) == 15:
                return 'PC is not allowed here'
            return ''
        if is_reg(operands[1]):
            if get_reg_num(operands[1]) == 15:
                return 'PC is not allowed here'
            return ''
        if not is_valid_imval(operands[1]):
            return 'Invalid operand: expected register or immediate value'
        if not is_expressable_imval(operands[1]):
            return 'This immediate value cannot be encoded'
        return ''


def encode_psrtransop(name, condcode, operands):
    """
    check_psrtransop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = [x.strip() for x in operands.split(',')]
    if name == 'MRS':
        rd = get_reg_num(operands[0])
        if operands[1].upper()[0] == 'C':  # CPSR
            spsrflag = False
        else:
            spsrflag = True
        ccval = get_condcode_value(condcode)
        encoded = encode_32bit([(28, 4, ccval), (23, 5, 0x2), (22, 1, spsrflag), (16, 6, 0xF), (12, 4, rd)])
        return bigendian_to_littleendian(encoded)
    else:
        if operands[0].upper()[0] == 'C':  # CPSR
            spsrflag = False
        else:
            spsrflag = True
        if operands[0].upper().endswith('FLG'):
            allflag = False
        else:
            allflag = True
        if is_reg(operands[1]):
            iflag = False
            rm = get_reg_num(operands[1])
            op2field = rm
        else:
            iflag = True
            op2field = encode_imval(operands[1])
        ccval = get_condcode_value(condcode)
        encoded = encode_32bit([(28, 4, ccval), (25, 1, iflag), (23, 2, 0x2),
                                        (22, 1, spsrflag), (17, 5, 0x14), (16, 1, allflag),
                                        (12, 4, 0xF), (0, 12, op2field)])
        return bigendian_to_littleendian(encoded)


def check_swiop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) != 1:
        return 'Invalid number of operands: expected 1, got %i' % (len(operands))
    operands = operands[0]
    if not is_valid_imval(operands):
        return 'Invalid operand: expected immediate value'
    com = imval_to_int(operands)
    if com > 2**24-1:
        return 'Operand greater than 2^24-1'
    if com < -2**23:
        return 'Operand lower than -2^23'
    return ''


def encode_swiop(name, condcode, operands):
    """
    check_swiop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = operands.strip()
    ccval = get_condcode_value(condcode)
    com = imval_to_int(operands)
    encoded = encode_32bit([(28, 4, ccval), (24, 4, 0xF), (0, 24, com)])
    return bigendian_to_littleendian(encoded)


def check_miscarithmeticop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) != 2:
        return 'Invalid number of operands: expected 2, got %i' % (len(operands))
    if not is_reg(operands[0]):
        return 'Invalid operand: expected register'
    rd = get_reg_num(operands[0])
    if not is_reg(operands[1]):
        return 'Invalid operand: expected register'
    rm = get_reg_num(operands[1])
    if rd == 15 or rm == 15:
        return 'PC is not allowed here'
    return ''

def encode_miscarithmeticop(name, condcode, operands):
    """
    check_miscarithmeticop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = [x.strip() for x in operands.split(',')]
    rd = get_reg_num(operands[0])
    rm = get_reg_num(operands[1])
    ccval = get_condcode_value(condcode)
    encoded = encode_32bit([(28, 4, ccval), (16, 12, 0x16F), (12, 4, rd), (4, 8, 0xF1), (0, 4, rm)])
    return bigendian_to_littleendian(encoded)
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def check_op2(op2):
    """
    Check Op2 of a dataprocop.
    Return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in op2.split(',')]
    if len(operands) == 1:
        if is_reg(operands[0]):  # op2 = reg
            return ''
        if not is_valid_imval(operands[0]):  # op2 = immediate
            return 'Invalid op2 (must be of the form "reg" or "reg, shift" or "immediate value")'
        # constant must be expressable as "8bit unsigned int" rotated right by 2*n with n an "4 bit unsigned int"
        if not is_expressable_imval(operands[0]):
            return 'This immediate value cannot be encoded as op2'
        return ''
    if len(operands) != 2:
        return 'Invalid op2 (must be of the form "reg" or "reg, shift" or "immediate value")'
    # ->must be of form "reg, shift"
    if not is_reg(operands[0]):
        return 'Invalid op2 (must be of the form "reg" or "reg, shift" or "immediate value")'
    if '#' in operands[1]:
        operands[1] = operands[1][:operands[1].find('#')]+' '+operands[1][operands[1].find('#'):]  # make it legal to omit the space between shiftname and immediate value
    shift = [x.strip() for x in operands[1].split()]
    if len(shift) == 1:  # "RRX" or "shiftname reg" or "shiftname immediate"
        if shift[0] != 'RRX':
            return 'Invalid op2 (must be of the form "reg" or "reg, shift" or "immediate value")'
        return ''
    elif len(shift) > 2:
        return 'Invalid op2 (must be of the form "reg" or "reg, shift" or "immediate value")'
    if not is_shiftname(shift[0]):
        return 'Invalid op2 (must be of the form "reg" or "reg, shift" or "immediate value")'
    if is_reg(shift[1]):
        if get_reg_num(shift[1]) == 15:
            return 'PC may not be used here'
        return ''
    if not is_valid_imval(shift[1]):
        return 'Invalid op2 (must be of the form "reg" or "reg, shift" or "immediate value")'
    amount = imval_to_int(shift[1])
    if amount >= 0 and amount <= 31:
        return ''
    elif amount == 32 and shift[0] not in ['LSR', 'ASR']:
        return 'Shift by 32 is only allowed for LSR'
    return 'Invalid immediate shift amount. Must be 0 <= amount <= 31 (or 32 for special LSR, ASR)'


def check_dataprocop(name, operands):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    operands = [x.strip() for x in operands.split(',')]
    if len(operands) < 2:
        return 'Expected more operands'
    if not is_reg(operands[0]):  # operands can be (reg, op2) or (reg, reg, op2)
        return 'Expected a register as first operand'
    operands.pop(0)
    if name in ['AND', 'EOR', 'SUB', 'RSB', 'ADD', 'ADC', 'SBC', 'RSC', 'ORR', 'BIC']:
        if not is_reg(operands[0]):
            return 'Expected a register as second operand'
        operands.pop(0)
    if len(operands) < 1:
        return 'Expected more operands'
    return check_op2(','.join(operands))


def encode_dataprocop(name, flags, condcode, operands):
    """
    check_dataprocop must be called before this.
    Encode the instruction and return it as a bytearray object.
    """
    operands = [x.strip() for x in operands.split(',')]
    sflag = (flags == 'S')
    if is_dataproc_fullop(name):
        dest = get_reg_num(operands[0])
        op1 = get_reg_num(operands[1])
        (iflag, op2) = encode_op2(','.join(operands[2:]))
    elif is_dataproc_testop(name):
        dest = 0
        op1 = get_reg_num(operands[0])
        (iflag, op2) = encode_op2(','.join(operands[1:]))
        sflag = True
    else:  # movop
        dest = get_reg_num(operands[0])
        op1 = 0
        (iflag, op2) = encode_op2(','.join(operands[1:]))
    ccval = get_condcode_value(condcode)
    dpn = get_dataprocop_num(name)
    encoded = encode_32bit([(28, 4, ccval), (25, 1, iflag), (21, 4, dpn), (20, 1, sflag),
                                    (16, 4, op1), (12, 4, dest), (0, 12, op2)])
    return bigendian_to_littleendian(encoded)


def encode_op2(op2):
    """
    check_op2 must be called before this.
    Argument op2 must be a string.
    Encode the op2. Return a tuple of I-flag and an integer containing the other 12 bits.
    """
    operands = [x.strip() for x in op2.split(',')]
    if len(operands) == 1:
        if is_reg(operands[0]):  # op2 = reg
            iflag = False
            reg = get_reg_num(operands[0])
            shifttype = 'LSL'
            shiftby = 0
            shiftbyreg = False
        else:  # op2 = immediate value
            iflag = True
            op2field = encode_imval(operands[0])
    else:
        iflag = False
        reg = get_reg_num(operands[0])
        if '#' in operands[1]:
            operands[1] = operands[1][:operands[1].find('#')]+' '+operands[1][operands[1].find('#'):]  # make it legal to omit the space between shiftname and immediate value
        shift = [x.strip() for x in operands[1].split()]
        if len(shift) == 1:  # RRX
            shifttype = 'ROR'
            shiftby = 0
            shiftbyreg = False
        else:
            shifttype = shift[0]
            if is_reg(shift[1]):
                shiftby = get_reg_num(shift[1])
                shiftbyreg = True
            else:
                shiftby = imval_to_int(shift[1])
                shiftbyreg = False
                if shiftby == 0:
                    shifttype = 'LSL'
                if shifttype.upper() in ['LSR', 'ASR'] and shiftby == 32:
                    shiftby = 0
    if not iflag:
        shiftfield = ({'LSL': 0, 'ASL': 0, 'LSR': 1, 'ASR': 2, 'ROR': 3}[shifttype.upper()] << 1) | shiftbyreg
        if shiftbyreg:
            shiftfield = (shiftby << 4) | shiftfield
        else:
            shiftfield = (shiftby << 3) | shiftfield
        op2field = (shiftfield << 4) | reg
    return (iflag, op2field)
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############


def check_pseudoinstruction(name, operands, address, labeldict):
    """
    Assumes valid name, valid name+flags combination, valid condcode.
    Check the operands and return an error string if invalid, empty string otherwise.
    """
    if name == 'ADR':
        reg, expr = [x.strip() for x in operands.split(',', 1)]
        if not is_reg(reg):
            return 'Invalid operand: expected register'
        err = check_pcrelative_expression(expr, labeldict)
        if len(err) != 0:
            return err
        offs = pcrelative_expression_to_int(expr, address, labeldict)
        sri = int_to_signrotimv(offs)
        if sri is None:
            return 'Invalid offset: cannot be encoded'
        return ''
    return 'Unknown pseudoinstruction (bug)'


def get_replacement(name, operands, address, labeldict):
    """
    check_pseudoinstruction must be called before this, and it must be a pseudoinstruction.
    Return replacement opname and operands.
    """
    if name == 'ADR':
        reg, expr = [x.strip() for x in operands.split(',', 1)]
        offs = pcrelative_expression_to_int(expr, address, labeldict)
        sign, rot, imv = int_to_signrotimv(offs)
        if sign == 1:
            newop = 'ADD'
        else:
            newop = 'SUB'
        newoperands = '%s, PC, #%i' % (reg, sign*offs)
        return (newop, newoperands)
    return None
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############
# functions return -1 on failure.
# member functions prefixed with an underline must not be called from outside the class.


class Sourceline:
    # line  # the full string
    # notcomment  # everything except comments
    # label
    # operation  # everything after the label
    # opname
    # flags
    # condcode
    # operands
    # address
    # hexcode
    # length
    # errmsg  # contains a string describing the error if a function
    #           fails and returns -1

    def __init__(self, inst):
        """Initialize all data members to their empty/unprocessed-value except self.line to inst."""
        self.line = inst
        self.notcomment = ''
        self.label = ''
        self.operation = ''
        self.opname = ''
        self.flags = ''
        self.condcode = ''
        self.operands = ''
        self.address = -1
        self.hexcode = b''
        self.length = -1
        self.errmsg = ''

    def _parse_s_suffix(self):
        """If self.opname is an an op with an (arithmetic) S-suffix, remove that suffix and set self.flags to S."""
        soplist = ['ADC', 'ADD', 'RSB', 'RSC', 'SBC', 'SUB', 'AND',
                   'BIC', 'EOR', 'ORR', 'MOV', 'MVN', 'MUL', 'MLA',
                   'UMULL', 'SMULL', 'UMLAL', 'SMLAL']
        for op in soplist:
            if self.opname.startswith(op):
                if (self.opname[:-3] == op and is_condcode(self.opname[-3:-1]) and self.opname[-1] == 'S')\
                   or (self.opname == op+'S'):
                    self.opname = self.opname[:-1]
                    self.flags = 'S'
                    break

    def _parse_tbhs_suffixes(self):
        """If self.opname is an op with T, B, H or S-suffixes, remove that (those) suffix(es) and set self.flags to those."""
        tbhsoplist = ['LDR', 'STR', 'SWP']
        for op in tbhsoplist:
            if self.opname.startswith(op):
                if self.opname[-1] == 'T':
                    self.opname = self.opname[:-1]
                    self.flags = 'T'
                if self.opname[-1] == 'B':
                    self.opname = self.opname[:-1]
                    self.flags = 'B' + self.flags
                if self.opname[-1] == 'H':
                    self.opname = self.opname[:-1]
                    self.flags = 'H' + self.flags
                if self.opname[-1] == 'S':
                    self.opname = self.opname[:-1]
                    self.flags = 'S' + self.flags
                break

    def _parse_addrmode_suffixes(self):
        """If self.opname is a multiple data processing instruction, remove the suffix and set self.flags to it."""
        addrsuffoplist = ['LDM', 'STM']
        addressingmodelist = ['FD', 'ED', 'FA', 'EA', 'IA', 'IB',
                              'DA', 'DB']
        for op in addrsuffoplist:
            if self.opname.startswith(op):
                if (self.opname[:-4] == op and is_condcode(self.opname[-4:-2]) and self.opname[-2:] in addressingmodelist)\
                   or (self.opname[:-2] == op and self.opname[-2:] in addressingmodelist):
                    self.flags = self.opname[-2:]
                    self.opname = self.opname[:-2]
                    break

    def _parse_condition_code(self):
        """If there is a condition code in self.opname, remove it there and set self.condcode to it."""
        if is_opname(self.opname+self.flags):
            return
        if is_condcode(self.opname[-2:]):
            self.condcode = self.opname[-2:]
            self.opname = self.opname[:-2]

    def _check_operation(self):
        """
        Check if self forms a valid operation, fill errmsg if not.
        Return 0 if valid, -1 if invalid.
        """
        if not is_opname(self.opname+self.flags):
            self.errmsg = 'This instruction (name:%s, flags:%s) is unknown' % (self.opname, self.flags)
            return -1
        if is_conditionable(self.opname+self.flags):
            if len(self.condcode) > 0 and not is_condcode(self.condcode):
                self.errmsg = 'Condition code is unknown (condition code: %s)' % (self.condcode)
                return -1
        elif len(self.condcode) > 0:
            self.errmsg = 'Condition codes are not allowed for this instruction (name:%s, flags:%s, cond:%s)' % (self.opname, self.flags, self.condcode)
            return -1
        return 0

    def _check_label(self):
        """
        Check if self.label is allowed (i.e. not private or containing illegal characters), fill errmsg if not.
        Return 0 if correct, -1 if not correct.
        """
        if not is_valid_label(self.label):
            self.errmsg = 'This label contains illegal characters (label:%s)' % (self.label)
            return -1
        if is_private_label(self.label):
            self.errmsg = 'This label name is reserved (label:%s)' % (self.label)
            return -1
        return 0

    def parse_comments(self):
        """
        self must be initialized and not yet parsed.
        Set self.notcomment to self.line stripped from comments.
        Return 0.
        """
        sci = self.line.find(';')
        dqi = self.line.find('"')
        if sci < dqi or dqi == -1:
            if sci == -1:
                self.notcomment = self.line
            else:
                self.notcomment = self.line[:sci]
        else:  # there is a ; and a " before it
            tmpline = self.line.replace('\\"', '""')  # TODO: test #replace c-style escaped " with "", which is the arm asm way of getting a single " inside a string. don't have to care about cases where it's outside a string, because that will be a syntax error later on anyway. sci is still correct.
            while tmpline.count('"', 0, sci) % 2 != 0:  # if the number of quotes before the semicolon is odd, the semicolon is inside a string
                sci = self.line.find(';', sci+1)
                if sci == -1:
                    break
            if sci == -1:
                self.notcomment = self.line
            else:
                self.notcomment = self.line[:sci]
        self.notcomment = self.notcomment.rstrip()  # whitespace in front is relevant!
        return 0

    def parse_labelpart(self):
        """
        self must be processed by parse_comments but nothing after that.
        Set self.label to the label, set self.operation to the rest of the line (stripped from whitespace).
        Return 0 on success, -1 on failure/error.
        """
        if self.notcomment[:1].isspace():
            self.operation = self.notcomment.strip()
            return 0
        splitr = self.notcomment.split(None, 1)
        if len(splitr) == 0:
            return 0
        elif len(splitr) == 1:
            self.label = splitr[0]
        else:  # len(splitr) == 2
            self.label = splitr[0]
            self.operation = splitr[1].strip()
        if self._check_label() == -1:
            return -1
        return 0

    def parse_namepart(self):
        """
        self must be processed by parse_labelpart but nothing after.
        Parse self.operation: the operation name with suffixes and stuff, set self.opname, self.flags, self.condcode (defaults to AL if conditionable) to the computed values. Store the rest in self.operands.
        Return 0 on success, -1 on failure/error.
        """
        splitr = self.operation.split(None, 1)
        if len(splitr) == 0:
            return 0
        elif len(splitr) == 1:
            oppart = splitr[0]
            operators = ''
        else:  # len(splitr) == 2
            (oppart, operators) = splitr
        self.opname = oppart.upper().strip()
        self.operands = operators.strip()
        self._parse_s_suffix()
        self._parse_tbhs_suffixes()
        self._parse_addrmode_suffixes()
        self._parse_condition_code()
        if len(self.condcode) == 0 and is_conditionable(self.opname+self.flags):
            self.condcode = 'AL'
        if self._check_operation() == -1:
            return -1
        return 0

    def is_include(self):
        """
        self must be process by parse_namepart.
        Return True if this operation is INCLUDE, False otherwise.
        """
        return self.opname in ('INCLUDE', 'GET',)

    def is_incbin(self):
        """
        self must be process by parse_namepart.
        Return True if this operation is INCBIN, False otherwise.
        """
        return self.opname == 'INCBIN'

    def set_length_and_address(self, address):
        """
        self must be processed by parse_namepart.
        Set self.address to address, calculate and set self.length, store included files in 
        Return 0 on success, -1 on failure.
        """
        self.address = address
        if len(self.opname+self.flags) == 0:
            self.length = 0
        else:
            self.length = get_size(self.opname+self.flags, self.operands, address)
        if self.length == -1:
            self.errmsg = 'Could not calculate instruction size'
            return -1
        return 0

    def get_length(self):
        """
        self must be processed by set_length_and_address.
        Return self.length.
        """
        return self.length

    def replace_pseudoinstructions(self, labeldict):
        """
        self must be processed by set_length_and_address.
        Replace self.opname, self.operands of pseudoinstructions.
        Return 0 on success, -1 on failure.
        """
        if is_pseudoinstruction(self.opname, self.operands):
            err = check_pseudoinstruction(self.opname, self.operands, self.address, labeldict)
            if len(err) != 0:
                self.errmsg = err
                return -1
            self.opname, self.operands = get_replacement(self.opname, self.operands, self.address, labeldict)
        return 0

    def _check_syntax(self, labeldict):
        """
        self must be processed by replace_pseudoinstructions.
        Return error message string, empty string if no error.
        """
        fullname = self.opname+self.flags
        if is_directive(fullname):
            return check_directive(self.opname, self.operands)
        elif is_dataprocop(fullname):
            return check_dataprocop(self.opname, self.operands)
        elif is_branchop(fullname):
            return check_branchop(self.opname, self.operands, self.address, labeldict)
        elif is_psrtransop(fullname):
            return check_psrtransop(self.opname, self.operands)
        elif is_swiop(fullname):
            return check_swiop(self.opname, self.operands)
        elif is_mulop(fullname):
            return check_mulop(self.opname, self.operands)
        elif is_longmulop(fullname):
            return check_longmulop(self.opname, self.operands)
        elif is_coprocregtransop(fullname):
            return check_coprocregtransop(self.opname, self.operands)
        elif is_singledatatransop(fullname):
            return check_singledatatransop(self.flags, self.operands, self.address, labeldict)
        elif is_halfsigneddatatransop(fullname):
            return check_halfsigneddatatransop(self.operands, self.address, labeldict)
        elif is_swapop(fullname):
            return check_swapop(self.operands)
        elif is_blockdatatransop(fullname):
            return check_blockdatatransop(self.opname, self.operands)
        elif is_miscarithmeticop(fullname):
            return check_miscarithmeticop(self.opname, self.operands)
        else:
            return 'Unknown or not implemented instruction (failed in _check_syntax)'

    def _encode_line(self, labeldict):
        """
        self must be processed by _check_syntax.
        Return encoded line as a bytearray object.
        """
        fullname = self.opname+self.flags
        if is_directive(fullname):
            return encode_directive(self.opname, self.operands, self.address)
        elif is_dataprocop(fullname):
            return encode_dataprocop(self.opname, self.flags, self.condcode, self.operands)
        elif is_branchop(fullname):
            return encode_branchop(self.opname, self.condcode, self.operands, self.address, labeldict)
        elif is_psrtransop(fullname):
            return encode_psrtransop(self.opname, self.condcode, self.operands)
        elif is_swiop(fullname):
            return encode_swiop(self.opname, self.condcode, self.operands)
        elif is_mulop(fullname):
            return encode_mulop(self.opname, self.flags, self.condcode, self.operands)
        elif is_longmulop(fullname):
            return encode_longmulop(self.opname, self.flags, self.condcode, self.operands)
        elif is_coprocregtransop(fullname):
            return encode_coprocregtransop(self.opname, self.condcode, self.operands)
        elif is_singledatatransop(fullname):
            return encode_singledatatransop(self.opname, self.flags, self.condcode, self.operands, self.address, labeldict)
        elif is_halfsigneddatatransop(fullname):
            return encode_halfsigneddatatransop(self.opname, self.flags, self.condcode, self.operands, self.address, labeldict)
        elif is_swapop(fullname):
            return encode_swapop(self.opname, self.flags, self.condcode, self.operands)
        elif is_blockdatatransop(fullname):
            return encode_blockdatatransop(self.opname, self.flags, self.condcode, self.operands)
        elif is_miscarithmeticop(fullname):
            return encode_miscarithmeticop(self.opname, self.condcode, self.operands)
        else:
            return b''

    def assemble(self, labeldict):
        """
        self must be processed by replace_pseudoinstructions.
        Set self.hexcode to the binary machine code corresponding to self.
        Return 0 on success, -1 on failure.
        """
        if len(self.opname) == 0:
            return 0
        err = self._check_syntax(labeldict)
        if len(err) > 0:
            self.errmsg = err
            return -1
        self.hexcode = self._encode_line(labeldict)
        if len(self.hexcode) != self.length:
            self.errmsg = 'Precalculated length (%i bytes) and real length (%i bytes) are not the same' % (self.length, len(self.hexcode))
            return -1
        return 0

    def get_hex(self):
        """
        self must be processed by assemble.
        Return self.hexcode.
        """
        return self.hexcode
##########IMPORT END############

##########IMPORT END############
##########IMPORT START############
# assembly language details:
# of the operations dealing with the coprocessor only MRC and MCR are supported (the others (LDC, STC, CDP) are useless on the nspire)
# labels may not have any whitespace before them, instructions have to have whitespace before them


def printerror(filename, linenum, line, msg):
    """Print an error message using the supplied information."""
    print('Error in file "%s" on line %i:%s' % (filename, linenum, line))
    print('\t%s' % (msg))


def printmsg(msg):
    """Print msg."""
    print(msg)


def read_file_and_stage1_parse(infile, filestack=tuple()):
    numerrs = 0
    prevsrcpath = get_sourcepath()
    change_sourcepath(infile)
    infile = get_sourcepath() # get the full path
    if infile in filestack:
        printerror(infile, -1, '', 'file includes itself (possibly indirectly): filestack=%s' % str(filestack))
        numerrs += 1
        return numerrs, []
    text = get_sourcecode()
    code = []
    includedcode = [] # list of (index, code,) tuples
    # create list of Sourceline objects containing the lines of code
    for l in text:
        code.append(Sourceline(l))

    # stage 1: parse comments, labels and operation names
    for i, c in enumerate(code):
        if c.parse_comments() == -1:
            if len(c.errmsg) > 0:
                printerror(infile, i, c.line, c.errmsg)
            else:
                printerror(infile, i, c.line, 'unknown error in parse_comments')
            numerrs += 1
            continue
        if c.parse_labelpart() == -1:
            if len(c.errmsg) > 0:
                printerror(infile, i, c.line, c.errmsg)
            else:
                printerror(infile, i, c.line, 'unknown error in parse_labelpart')
            numerrs += 1
            continue
        if c.parse_namepart() == -1:
            if len(c.errmsg) > 0:
                printerror(infile, i, c.line, c.errmsg)
            else:
                printerror(infile, i, c.line, 'unknown error in parse_namepart')
            numerrs += 1
            continue
        # read binary files included with INCBIN
        if c.is_incbin():
            size, abspath = add_file(c.operands)
            c.operands = abspath # need a path which is still correct after changing the source file
            if size < 0:
                printerror(infile, i, c.line, "error in add_file for path '%s'" % (abspath,))
                numerrs += 1
                continue
        # recursively add INCLUDEd files
        if c.is_include():
            incfile = c.operands
            incnumerrs, inccode = read_file_and_stage1_parse(c.operands, filestack=filestack+(infile,))
            numerrs += incnumerrs
            includedcode.append((i,inccode,))
    set_sourcepath(prevsrcpath)
    if numerrs != 0:
        return numerrs, []

    # concatenate all snippets of the code in this file and the included files
    fullcode = []
    currpos = 0
    for i, cd in includedcode:
        fullcode += code[currpos:i]
        fullcode += cd
        currpos = i+1
    fullcode += code[currpos:]
    return numerrs, fullcode


def assembler(infile, outfile):
    """
    Assemble infile and write the binary to outfile.
    Return -1 on failure, 0 on success.
    """

    # todo set sourcepath to cwd
    set_sourcepath('') # if infile is not an absolute path, it is interpreted relative to current working directory

    # extended stage 1: parse comments, labels and operation names, read files included with INCBIN,
    #                   and recursively do the same for all INCLUDEs
    numerrs, code = read_file_and_stage1_parse(infile)
    if numerrs > 0:
        printmsg('Stopping assembler: %i Error(s)' % (numerrs))
        return -1

    # stage 2: calculate length and address of every instruction
    curaddr = 0
    for i, c in enumerate(code):
        if c.set_length_and_address(curaddr) == -1:
            if len(c.errmsg) > 0:
                printerror(infile, i, c.line, c.errmsg)
            else:
                printerror(infile, i, c.line, 'unknown error in set_length_and_address')
            numerrs += 1
            continue
        curaddr += c.get_length()
    if numerrs != 0:
        printmsg('Stopping assembler: %i Error(s)' % (numerrs))
        return -1

    # stage 3: create a dictionary of labels
    labeldict = {}
    for i, c in enumerate(code):
        if len(c.label) > 0:
            if c.label in labeldict:
                printerror(infile, i, c.line, 'Label name already used (at offset 0x%x)' % (labeldict[c.label]))
                numerrs += 1
                continue
            labeldict[c.label] = c.address
    if numerrs != 0:
        printmsg('Stopping assembler: %i Error(s)' % (numerrs))
        return -1

    # stage 4: evaluate/replace pseudo-instructions and some directives
    for i, c in enumerate(code):
        if c.replace_pseudoinstructions(labeldict) == -1:
            if len(c.errmsg) > 0:
                printerror(infile, i, c.line, c.errmsg)
            else:
                printerror(infile, i, c.line, 'unknown error in replace_pseudoinstructions')
            numerrs += 1
            continue
    if numerrs != 0:
        printmsg('Stopping assembler: %i Error(s)' % (numerrs))
        return -1

    # stage 5: check syntax, encode all instructions and directives
    for i, c in enumerate(code):
        if c.assemble(labeldict) == -1:
            if len(c.errmsg) > 0:
                printerror(infile, i, c.line, c.errmsg)
            else:
                printerror(infile, i, c.line, 'unknown error in assemble')
            numerrs += 1
            continue
    if numerrs != 0:
        printmsg('Stopping assembler: %i Error(s)' % (numerrs))
        return -1

    # stage 6: write output to file
    binary = bytearray()
    for c in code:
        for b in c.get_hex():
            binary.append(b)
    f = open(outfile, 'wb')
    f.write(bytearray([ord(c) for c in 'PRG\x00']))
    f.write(binary)
    f.close()
##########IMPORT END############

##########IMPORT END############


def calc_assemble():
    """prompts the user for the arguments"""
    inf = input('in:')
    outf = input('out:')
    return assembler(inf, outf)


calc_assemble()