nrjasm.py (9444B)
1 #!/usr/bin/env python3 2 # The reference assembler for NRJ OISC (tested for NRJ16) 3 # By Luxferre, 2022, public domain 4 5 import sys 6 import array 7 from os.path import realpath 8 from numpy import trim_zeros 9 10 # some constants to redefine more easily in case of major breaking changes 11 12 NRJDEF_BITS = 16 # default word/addr size if the .bit directive is omitted 13 14 # service characters 15 NRJCHAR_COMMENT = ';' # all comments are after ; 16 NRJCHAR_VARDEREF = '@' # variable dereferencing is with @ 17 NRJCHAR_CHARDEREF = "'" # character dereferencing is with ' 18 NRJCHAR_SUBST = '%' # var-in-macro substitution is with % 19 20 # macro variables 21 NRJVAR1 = NRJCHAR_SUBST + 'A' # %A 22 NRJVAR2 = NRJCHAR_SUBST + 'B' # %B 23 NRJVAR3 = NRJCHAR_SUBST + 'C' # %C 24 25 # and preprocessor directives 26 NRJDIR_INC = '.inc' 27 NRJDIR_BITS = '.bit' 28 NRJDIR_ORG = '.org' 29 NRJDIR_DEF = '.def' 30 NRJDIR_END = '.end' 31 NRJDIR_VAR = '.var' 32 NRJDIR_SET = '.set' 33 NRJDIR_FREE = 'FREE' 34 NRJDIR_NXT = 'NXT' 35 NRJDIR_HLT = 'HLT' 36 37 included_files = [] # stash to check the already included files 38 39 def readsrc(fname): # read source file contents, stripping comments, empty lines and trailing/leading whitespace 40 f = open(fname, 'r') 41 rawlines = f.readlines() 42 f.close() 43 lines = [] 44 global included_files 45 included_files.append(realpath(fname)) 46 for line in rawlines: 47 line = line.split(NRJCHAR_COMMENT)[0].strip() 48 if len(line) > 0: 49 tokens = line.split() # split on any whitespace, which is what we need 50 if tokens[0] == NRJDIR_INC: # process include directive immediately 51 incfname = realpath(' '.join(tokens[1:])) # because the name may include spaces 52 if incfname not in included_files: # cyclic inclusion protection 53 incfile = readsrc(incfname) # call itself recursively, trying to include a file 54 included_files.append(incfname) # update the list of included files 55 lines.extend(incfile) # update the source with the included contents in place 56 else: 57 print('Attempt to include an already included file %s, ignoring!' % incfname) 58 else: # otherwise just append the tokenized source line 59 lines.append(tokens) 60 return lines 61 62 def start_assembly(srcfname, dstfname): # main assembly method 63 wordsize = NRJDEF_BITS # define machine word/address size 64 65 # we're starting with tokenized Stage 1 source: all includes processed, comments and whitespace stripped 66 stage1src = readsrc(srcfname) 67 68 # Stage 2: scan the source for the first word size set directive 69 for line in stage1src: 70 if line[0] == NRJDIR_BITS: 71 wordsize = int(line[1]) 72 break 73 print('Building for NRJ%u' % wordsize) 74 75 # Stage 3: expand all macros 76 stage3src = [] 77 macrobuffers = {} 78 macrostart = False 79 macroname = None 80 for line in stage1src: 81 if macrostart: # we already are buffering a macro 82 if line[0] == NRJDIR_END: # macro ended and saved in the buffers 83 macrostart = False 84 macroname = None 85 else: # continue buffering 86 macrobuffers[macroname].append(line) 87 else: # usual code 88 if line[0] == NRJDIR_DEF: # starting a macro 89 macroname = line[1] 90 macrobuffers[macroname] = [] # prepare the place to buffer the macro into 91 macrostart = True 92 elif line[0] != NRJDIR_BITS: # ignoring word size directive as we already processed it 93 if line[0] in macrobuffers: # detected an already compiled macro, substituting the code and parameters 94 p1 = NRJDIR_HLT # placeholders for missing parameters 95 p2 = NRJDIR_HLT 96 p3 = NRJDIR_NXT # assume we're referring to the next address in p3 97 if len(line) > 1: # fill the first parameter if present 98 p1 = line[1] 99 if len(line) > 2: # fill the second parameter if present 100 p2 = line[2] 101 if len(line) > 3: # fill the third parameter if present 102 p3 = line[3] 103 for macroline in macrobuffers[line[0]]: # now, perform the macrosubstitution with parameter replacement 104 stage3src.append(' '.join(macroline).replace(NRJVAR1, p1).replace(NRJVAR2, p2).replace(NRJVAR3, p3).split()) 105 else: # append a normal line 106 stage3src.append(line) 107 108 # Stage 4: now, process .var directive, FREE directive, @ and ' dereferencing operators 109 vartable = {} # don't store numeric locations here yet, only string representations (hex or FREE) 110 stage4src = [] 111 for line in stage3src: 112 if line[0] == NRJDIR_VAR: # .var directive: no @ or ' operators allowed here 113 if line[2] == NRJDIR_FREE: 114 vartable[line[1]] = 0 115 else: 116 vartable[line[1]] = int(line[2], 16) 117 # now, fill in the FREE bits 118 maxvar = 0 119 for vname in vartable: 120 if vartable[vname] > maxvar: 121 maxvar = vartable[vname] 122 for vname in vartable: 123 if vartable[vname] == 0: 124 maxvar += 1 125 vartable[vname] = maxvar 126 for line in stage3src: 127 if line[0] != NRJDIR_VAR: # finally, perform variable substitution 128 # but first, attempt to perform character substitution 129 for i, el in enumerate(line): 130 if el.startswith(NRJCHAR_CHARDEREF): 131 line[i] = hex(ord(el[1]))[2:].upper() 132 sline = ' '.join(line) 133 for vname in vartable: 134 sline = sline.replace(NRJCHAR_VARDEREF+vname, hex(vartable[vname])[2:].upper()) 135 stage4src.append(sline.split()) 136 137 # now, our Stage 4 code is fully flat and we can start allocating memory for it 138 # directives left to process at this point: .org, .set, NXT, HLT 139 # (we cannot process .set before because it can also take value of NXT or HLT) 140 141 memsize = 1 << wordsize 142 haltaddr = memsize - 1 # halting address to be filled in the lookup table 143 print('Allocating %u %u-bit words of memory...' % (memsize, wordsize)) 144 memmod = 'H' 145 if wordsize >= 32: 146 memmod = 'L' 147 elif wordsize >= 64: 148 memmod = 'Q' 149 elif wordsize <= 8: 150 memmod = 'B' 151 targetmem = array.array(memmod, [0]*memsize) 152 153 # here is the trickiest part of the whole assembly process - building a lookup table 154 # as NRJ can't directly jump to the next instruction by itself, we need to tell it to 155 # the NXT macro will be replaced with a cell in the lookup table that points to the next instruction 156 # and the lookup table will also take some memory in the machine 157 158 # now, try to detect the optimal offset for our lookup table 159 if maxvar > 0: # we have some variables defined, so place the lookup table after them 160 ltoffset = maxvar + 1 161 else: # in the worst case scenario, the code will take half of all memory and lookup table will take the other half 162 ltoffset = memsize >> 1 163 targetmem[ltoffset] = haltaddr # the first lookup table entry is always the halting address 164 codepos = 0 165 ltpos = 1 166 # let's iterate over the code 167 # pass 1 168 for line in stage4src: 169 if line[0] == NRJDIR_ORG: # handle .org 170 codepos = int(line[1], 16) 171 elif line[0] == NRJDIR_SET: # handle .set 172 addr = int(line[1], 16) 173 val = line[2] 174 if val == NRJDIR_HLT: 175 targetmem[addr] = ltoffset 176 elif val != NRJDIR_NXT: 177 targetmem[addr] = int(val, 16) 178 else: # 3-value vector where HLT or NXT can be encountered 179 # save current instruction in the lookup table 180 targetmem[ltoffset + ltpos] = codepos 181 ltpos += 1 182 for v in line: 183 if v == NRJDIR_HLT: 184 targetmem[codepos] = ltoffset 185 elif v == NRJDIR_NXT: 186 targetmem[codepos] = ltoffset + ltpos 187 else: 188 targetmem[codepos] = int(v, 16) 189 codepos += 1 190 # pass 2 - fill in NXT 191 codepos = 0 192 ltpos = 1 193 for line in stage4src: 194 if line[0] == NRJDIR_ORG: # handle .org 195 codepos = int(line[1], 16) 196 elif line[0] == NRJDIR_SET: # handle .set 197 addr = int(line[1], 16) 198 val = line[2] 199 if val == NRJDIR_NXT: 200 val = targetmem[ltoffset + ltpos] 201 targetmem[addr] = val 202 else: # 3-value vector where HLT or NXT can be encountered 203 ltpos += 1 204 for v in line: 205 codepos += 1 206 207 # looking for a more optimal solution than to import numpy just for this: 208 209 targetmem = trim_zeros(targetmem, 'b') # only strip trailing zero values 210 211 # now, we have assembled our target memory snapshot, let's write the output file 212 213 outf = open(dstfname, "wb") 214 targetmem.tofile(outf) 215 outf.close() 216 print('Assembled %s' % dstfname) 217 218 219 if __name__ == '__main__': # nrjasm entry point 220 version = '0.0.1' 221 print('nrjasm v%s by Luxferre, 2022' % version) 222 if len(sys.argv) > 2: 223 print('Assembling %s into %s...' % (sys.argv[1], sys.argv[2])) 224 start_assembly(sys.argv[1], sys.argv[2]) 225 else: 226 print('Usage: nrjasm.py [source] [binary]') 227