nrj-oisc

NOR and Reference Jump OISC platform
git clone git://git.luxferre.top/nrj-oisc.git
Log | Files | Refs | README

nrjasm.py (9444B)


      1 #!/usr/bin/env python3
      2 # The reference assembler for NRJ OISC (tested for NRJ16)
      3 # By Luxferre, 2022, public domain
      4 
      5 import sys
      6 import array
      7 from os.path import realpath
      8 from numpy import trim_zeros
      9 
     10 # some constants to redefine more easily in case of major breaking changes
     11 
     12 NRJDEF_BITS = 16 # default word/addr size if the .bit directive is omitted
     13 
     14 # service characters
     15 NRJCHAR_COMMENT = ';' # all comments are after ;
     16 NRJCHAR_VARDEREF = '@' # variable dereferencing is with @
     17 NRJCHAR_CHARDEREF = "'" # character dereferencing is with '
     18 NRJCHAR_SUBST = '%' # var-in-macro substitution is with %
     19 
     20 # macro variables
     21 NRJVAR1 = NRJCHAR_SUBST + 'A' # %A
     22 NRJVAR2 = NRJCHAR_SUBST + 'B' # %B
     23 NRJVAR3 = NRJCHAR_SUBST + 'C' # %C
     24 
     25 # and preprocessor directives
     26 NRJDIR_INC = '.inc'
     27 NRJDIR_BITS = '.bit'
     28 NRJDIR_ORG = '.org'
     29 NRJDIR_DEF = '.def'
     30 NRJDIR_END = '.end'
     31 NRJDIR_VAR = '.var'
     32 NRJDIR_SET = '.set'
     33 NRJDIR_FREE = 'FREE'
     34 NRJDIR_NXT = 'NXT'
     35 NRJDIR_HLT = 'HLT'
     36 
     37 included_files = [] # stash to check the already included files
     38 
     39 def readsrc(fname): # read source file contents, stripping comments, empty lines and trailing/leading whitespace
     40     f = open(fname, 'r')
     41     rawlines = f.readlines()
     42     f.close()
     43     lines = []
     44     global included_files
     45     included_files.append(realpath(fname))
     46     for line in rawlines:
     47         line = line.split(NRJCHAR_COMMENT)[0].strip()
     48         if len(line) > 0:
     49             tokens = line.split() # split on any whitespace, which is what we need
     50             if tokens[0] == NRJDIR_INC: # process include directive immediately
     51                 incfname = realpath(' '.join(tokens[1:])) # because the name may include spaces
     52                 if incfname not in included_files: # cyclic inclusion protection
     53                     incfile = readsrc(incfname) # call itself recursively, trying to include a file
     54                     included_files.append(incfname) # update the list of included files
     55                     lines.extend(incfile) # update the source with the included contents in place
     56                 else:
     57                     print('Attempt to include an already included file %s, ignoring!' % incfname)
     58             else: # otherwise just append the tokenized source line
     59                 lines.append(tokens)
     60     return lines
     61 
     62 def start_assembly(srcfname, dstfname): # main assembly method
     63     wordsize = NRJDEF_BITS # define machine word/address size
     64 
     65     # we're starting with tokenized Stage 1 source: all includes processed, comments and whitespace stripped
     66     stage1src = readsrc(srcfname)
     67 
     68     # Stage 2: scan the source for the first word size set directive
     69     for line in stage1src:
     70         if line[0] == NRJDIR_BITS:
     71             wordsize = int(line[1])
     72             break
     73     print('Building for NRJ%u' % wordsize)
     74 
     75     # Stage 3: expand all macros
     76     stage3src = []
     77     macrobuffers = {}
     78     macrostart = False
     79     macroname = None
     80     for line in stage1src:
     81         if macrostart: # we already are buffering a macro
     82             if line[0] == NRJDIR_END: # macro ended and saved in the buffers
     83                 macrostart = False
     84                 macroname = None
     85             else: # continue buffering
     86                 macrobuffers[macroname].append(line)
     87         else: # usual code
     88             if line[0] == NRJDIR_DEF: # starting a macro
     89                 macroname = line[1]
     90                 macrobuffers[macroname] = [] # prepare the place to buffer the macro into
     91                 macrostart = True
     92             elif line[0] != NRJDIR_BITS: # ignoring word size directive as we already processed it
     93                 if line[0] in macrobuffers: # detected an already compiled macro, substituting the code and parameters
     94                     p1 = NRJDIR_HLT # placeholders for missing parameters
     95                     p2 = NRJDIR_HLT
     96                     p3 = NRJDIR_NXT # assume we're referring to the next address in p3
     97                     if len(line) > 1: # fill the first parameter if present
     98                         p1 = line[1]
     99                     if len(line) > 2: # fill the second parameter if present
    100                         p2 = line[2]
    101                     if len(line) > 3: # fill the third parameter if present
    102                         p3 = line[3]
    103                     for macroline in macrobuffers[line[0]]: # now, perform the macrosubstitution with parameter replacement
    104                         stage3src.append(' '.join(macroline).replace(NRJVAR1, p1).replace(NRJVAR2, p2).replace(NRJVAR3, p3).split())
    105                 else: # append a normal line
    106                     stage3src.append(line)
    107 
    108     # Stage 4: now, process .var directive, FREE directive, @ and ' dereferencing operators
    109     vartable = {} # don't store numeric locations here yet, only string representations (hex or FREE)
    110     stage4src = []
    111     for line in stage3src:
    112         if line[0] == NRJDIR_VAR: # .var directive: no @ or ' operators allowed here
    113             if line[2] == NRJDIR_FREE:
    114                 vartable[line[1]] = 0
    115             else:
    116                 vartable[line[1]] = int(line[2], 16)
    117     # now, fill in the FREE bits
    118     maxvar = 0
    119     for vname in vartable:
    120         if vartable[vname] > maxvar:
    121             maxvar = vartable[vname]
    122     for vname in vartable:
    123         if vartable[vname] == 0:
    124             maxvar += 1
    125             vartable[vname] = maxvar
    126     for line in stage3src:
    127         if line[0] != NRJDIR_VAR: # finally, perform variable substitution
    128             # but first, attempt to perform character substitution
    129             for i, el in enumerate(line):
    130                 if el.startswith(NRJCHAR_CHARDEREF):
    131                     line[i] = hex(ord(el[1]))[2:].upper()
    132             sline = ' '.join(line)
    133             for vname in vartable:
    134                 sline = sline.replace(NRJCHAR_VARDEREF+vname, hex(vartable[vname])[2:].upper())
    135             stage4src.append(sline.split())
    136 
    137     # now, our Stage 4 code is fully flat and we can start allocating memory for it
    138     # directives left to process at this point: .org, .set, NXT, HLT
    139     # (we cannot process .set before because it can also take value of NXT or HLT)
    140 
    141     memsize = 1 << wordsize
    142     haltaddr = memsize - 1 # halting address to be filled in the lookup table
    143     print('Allocating %u %u-bit words of memory...' % (memsize, wordsize))
    144     memmod = 'H'
    145     if wordsize >= 32:
    146         memmod = 'L'
    147     elif wordsize >= 64:
    148         memmod = 'Q'
    149     elif wordsize <= 8:
    150         memmod = 'B'
    151     targetmem = array.array(memmod, [0]*memsize)
    152 
    153     # here is the trickiest part of the whole assembly process - building a lookup table
    154     # as NRJ can't directly jump to the next instruction by itself, we need to tell it to
    155     # the NXT macro will be replaced with a cell in the lookup table that points to the next instruction
    156     # and the lookup table will also take some memory in the machine
    157 
    158     # now, try to detect the optimal offset for our lookup table
    159     if maxvar > 0: # we have some variables defined, so place the lookup table after them
    160         ltoffset = maxvar + 1
    161     else: # in the worst case scenario, the code will take half of all memory and lookup table will take the other half
    162         ltoffset = memsize >> 1 
    163     targetmem[ltoffset] = haltaddr # the first lookup table entry is always the halting address
    164     codepos = 0
    165     ltpos = 1 
    166     # let's iterate over the code 
    167     # pass 1
    168     for line in stage4src:
    169         if line[0] == NRJDIR_ORG: # handle .org
    170             codepos = int(line[1], 16)
    171         elif line[0] == NRJDIR_SET: # handle .set
    172             addr = int(line[1], 16)
    173             val = line[2]
    174             if val == NRJDIR_HLT:
    175                 targetmem[addr] = ltoffset
    176             elif val != NRJDIR_NXT:
    177                 targetmem[addr] = int(val, 16)
    178         else: # 3-value vector where HLT or NXT can be encountered
    179             # save current instruction in the lookup table
    180             targetmem[ltoffset + ltpos] = codepos
    181             ltpos += 1
    182             for v in line:
    183                 if v == NRJDIR_HLT:
    184                     targetmem[codepos] = ltoffset
    185                 elif v == NRJDIR_NXT:
    186                     targetmem[codepos] = ltoffset + ltpos
    187                 else:
    188                     targetmem[codepos] = int(v, 16)
    189                 codepos += 1
    190     # pass 2 - fill in NXT
    191     codepos = 0
    192     ltpos = 1
    193     for line in stage4src:
    194         if line[0] == NRJDIR_ORG: # handle .org
    195             codepos = int(line[1], 16)
    196         elif line[0] == NRJDIR_SET: # handle .set
    197             addr = int(line[1], 16)
    198             val = line[2]
    199             if val == NRJDIR_NXT:
    200                 val = targetmem[ltoffset + ltpos]
    201                 targetmem[addr] = val
    202         else: # 3-value vector where HLT or NXT can be encountered
    203             ltpos += 1
    204             for v in line:
    205                 codepos += 1
    206 
    207     # looking for a more optimal solution than to import numpy just for this:
    208 
    209     targetmem = trim_zeros(targetmem, 'b') # only strip trailing zero values
    210 
    211     # now, we have assembled our target memory snapshot, let's write the output file
    212 
    213     outf = open(dstfname, "wb")
    214     targetmem.tofile(outf)
    215     outf.close()
    216     print('Assembled %s' % dstfname)
    217 
    218 
    219 if __name__ == '__main__': # nrjasm entry point
    220     version = '0.0.1'
    221     print('nrjasm v%s by Luxferre, 2022' % version)
    222     if len(sys.argv) > 2:
    223         print('Assembling %s into %s...' % (sys.argv[1], sys.argv[2]))
    224         start_assembly(sys.argv[1], sys.argv[2])
    225     else:
    226         print('Usage: nrjasm.py [source] [binary]')
    227