nrj-oisc

NOR and Reference Jump OISC platform
git clone git://git.luxferre.top/nrj-oisc.git
Log | Files | Refs | README

commit d712730bb499a777f41635b72e82953264c37cd7
parent f560cc5cbdcc96a162049e546be89d90cec5264d
Author: Luxferre <lux@ferre>
Date:   Fri,  2 Sep 2022 19:57:59 +0300

First working assembler!

Diffstat:
Aexample.nrjasm | 40++++++++++++++++++++++++++++++++++++++++
Mnrj.c | 4++--
Anrjasm.py | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astdlib.nrjasm | 21+++++++++++++++++++++
4 files changed, 281 insertions(+), 2 deletions(-)

diff --git a/example.nrjasm b/example.nrjasm @@ -0,0 +1,40 @@ +; a proposed assembly syntax for NRJ machines (example for NRJ16) +; semicolons are comments +; preprocessor instructions start with dot (.) +; every non-preprocessor instruction creates an entry in the lookup table +; all addressing is in words +; we usually start at the word 3 (don't pre-fill the I/O buffers) + +.bit 16 ; word/address size: NRJ16 is the default setting +.org 3 ; .org defines the start of further code/data (in words, hex) + +; include the standard library + +.inc stdlib.nrjasm + +.var x 12EF ; .var defines a label for a particular memory location +.var y 12F0 ; define another variable at 0x12F0 +.set @x 33EE ; .set sets a memory location to a particular hex constant at the build time, @ dereferences a label into the address +.set @y 'M ; ' dereferences a character into a whole word with its ASCII code + +; we CANNOT use dereferencing operators with .var, only with .set or directly + +; there also can be .inc instruction to include a snippet from another file in the same directory + +; now, main elementary macros: +; NXT - address of the next instruction position in the lookup table +; HLT - the last address position in the lookup table (0xFFFF for NRJ16), set by .bits +; FREE - address of the next available (at build time) memory cell, can only be used in .var + +; for the lookup table and CUR/NXT macros to work correctly, the code must start at an address divisible by 3 +; note that FREE doesn't intelligently detect the available cells, it only takes the next one after the maximum address used +; so in our case, the first FREE instance will be substituted with 12F1, the next with 12F2 and so on + + +; now, lets output a character by transferring the y value to the output cell 1 +; in an endless loop + +.lbl myloop +MOV 1 @y @myloop ; output the character and jump to the beginning +; we don't have to explicitly zero out the cell 1 as it is done by the I/O logic + diff --git a/nrj.c b/nrj.c @@ -41,7 +41,7 @@ void nrj_run(NRJWORD *mem, NRJWORD pc) { /* Main NRJ engine - just 12 lines of C mem[mem[pc]] = (~(mem[mem[pc]] | mem[mem[pc+1]])) & MAXADDR; /* then perform the NOR operation */ pc = mem[mem[pc+2]]; /* then perform the reference jump operation */ if(mem[1]) { /* then handle output if word 1 is set */ - nrj_out(&mem[2], &mem[mem[1]]); /* output the value from the location specified in word 1 */ + nrj_out(&mem[2], &mem[1]); /* output the value from the word 1 */ mem[1] = (NRJWORD) 0; /* clear word 1 */ } } @@ -58,7 +58,7 @@ int main(int argc, char* argv[]) { /* emulator entry point: nrj program.bin */ fseek(prog, 0, SEEK_END); int flen = ftell(prog); fseek(prog, 0, SEEK_SET); - fread(mem, sizeof(NRJWORD), (flen/sizeof(NRJWORD)) & MAXADDR, prog); + fread(mem, sizeof(NRJWORD), flen/sizeof(NRJWORD), prog); fclose(prog); tcgetattr(0, &tty_opts_backup); atexit(&restore_term); diff --git a/nrjasm.py b/nrjasm.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +# The reference assembler for NRJ OISC (tested for NRJ16) +# By Luxferre, 2022, public domain + +import sys +import array +from os.path import realpath + +# some constants to redefine more easily in case of major breaking changes + +NRJDEF_BITS = 16 # default word/addr size if the .bit directive is omitted + +# service characters +NRJCHAR_COMMENT = ';' # all comments are after ; +NRJCHAR_VARDEREF = '@' # variable dereferencing is with @ +NRJCHAR_CHARDEREF = "'" # character dereferencing is with ' +NRJCHAR_SUBST = '%' # var-in-macro substitution is with % + +# macro variables +NRJVAR1 = NRJCHAR_SUBST + 'A' # %A +NRJVAR2 = NRJCHAR_SUBST + 'B' # %B +NRJVAR3 = NRJCHAR_SUBST + 'C' # %C + +# and preprocessor directives +NRJDIR_INC = '.inc' +NRJDIR_BITS = '.bit' +NRJDIR_ORG = '.org' +NRJDIR_DEF = '.def' +NRJDIR_END = '.end' +NRJDIR_VAR = '.var' +NRJDIR_SET = '.set' +NRJDIR_FREE = 'FREE' +NRJDIR_NXT = 'NXT' +NRJDIR_HLT = 'HLT' + +included_files = [] # stash to check the already included files + +def readsrc(fname): # read source file contents, stripping comments, empty lines and trailing/leading whitespace + f = open(fname, 'r') + rawlines = f.readlines() + f.close() + lines = [] + global included_files + included_files.append(realpath(fname)) + for line in rawlines: + line = line.split(NRJCHAR_COMMENT)[0].strip() + if len(line) > 0: + tokens = line.split() # split on any whitespace, which is what we need + if tokens[0] == NRJDIR_INC: # process include directive immediately + incfname = realpath(' '.join(tokens[1:])) # because the name may include spaces + if incfname not in included_files: # cyclic inclusion protection + incfile = readsrc(incfname) # call itself recursively, trying to include a file + included_files.append(incfname) # update the list of included files + lines.extend(incfile) # update the source with the included contents in place + else: + print('Attempt to include an already included file %s, ignoring!' % incfname) + else: # otherwise just append the tokenized source line + lines.append(tokens) + return lines + +def start_assembly(srcfname, dstfname): # main assembly method + wordsize = NRJDEF_BITS # define machine word/address size + + # we're starting with tokenized Stage 1 source: all includes processed, comments and whitespace stripped + stage1src = readsrc(srcfname) + + # Stage 2: scan the source for the first word size set directive + for line in stage1src: + if line[0] == NRJDIR_BITS: + wordsize = int(line[1]) + break + print('Building for NRJ%u' % wordsize) + + # Stage 3: expand all macros + stage3src = [] + macrobuffers = {} + macrostart = False + macroname = None + for line in stage1src: + if macrostart: # we already are buffering a macro + if line[0] == NRJDIR_END: # macro ended and saved in the buffers + macrostart = False + macroname = None + else: # continue buffering + macrobuffers[macroname].append(line) + else: # usual code + if line[0] == NRJDIR_DEF: # starting a macro + macroname = line[1] + macrobuffers[macroname] = [] # prepare the place to buffer the macro into + macrostart = True + elif line[0] != NRJDIR_BITS: # ignoring word size directive as we already processed it + if line[0] in macrobuffers: # detected an already compiled macro, substituting the code and parameters + p1 = NRJDIR_HLT # placeholders for missing parameters + p2 = NRJDIR_HLT + p3 = NRJDIR_NXT # assume we're referring to the next address in p3 + if len(line) > 1: # fill the first parameter if present + p1 = line[1] + if len(line) > 2: # fill the second parameter if present + p2 = line[2] + if len(line) > 3: # fill the third parameter if present + p3 = line[3] + for macroline in macrobuffers[line[0]]: # now, perform the macrosubstitution with parameter replacement + stage3src.append(' '.join(macroline).replace(NRJVAR1, p1).replace(NRJVAR2, p2).replace(NRJVAR3, p3).split()) + else: # append a normal line + stage3src.append(line) + + # Stage 4: now, process .var directive, FREE directive, @ and ' dereferencing operators + vartable = {} # don't store numeric locations here yet, only string representations (hex or FREE) + stage4src = [] + for line in stage3src: + if line[0] == NRJDIR_VAR: # .var directive: no @ or ' operators allowed here + if line[2] == NRJDIR_FREE: + vartable[line[1]] = 0 + else: + vartable[line[1]] = int(line[2], 16) + # now, fill in the FREE bits + maxvar = 0 + for vname in vartable: + if vartable[vname] > maxvar: + maxvar = vartable[vname] + for vname in vartable: + if vartable[vname] == 0: + maxvar += 1 + vartable[vname] = maxvar + for line in stage3src: + if line[0] != NRJDIR_VAR: # finally, perform variable substitution + # but first, attempt to perform character substitution + for i, el in enumerate(line): + if el.startswith(NRJCHAR_CHARDEREF): + line[i] = hex(ord(el[1]))[2:].upper() + sline = ' '.join(line) + for vname in vartable: + sline = sline.replace(NRJCHAR_VARDEREF+vname, hex(vartable[vname])[2:].upper()) + stage4src.append(sline.split()) + + # now, our Stage 4 code is fully flat and we can start allocating memory for it + # directives left to process at this point: .org, .set, NXT, HLT + # (we cannot process .set before because it can also take value of NXT or HLT) + + memsize = 1 << wordsize + haltaddr = memsize - 1 # halting address to be filled in the lookup table + print('Allocating %u %u-bit words of memory...' % (memsize, wordsize)) + memmod = 'H' + if wordsize >= 32: + memmod = 'L' + elif wordsize >= 64: + memmod = 'Q' + elif wordsize <= 8: + memmod = 'B' + targetmem = array.array(memmod, [0]*memsize) + + # here is the trickiest part of the whole assembly process - building a lookup table + # as NRJ can't directly jump to the next instruction by itself, we need to tell it to + # the NXT macro will be replaced with a cell in the lookup table that points to the next instruction + # and the lookup table will also take some memory in the machine + + ltoffset = memsize >> 1 # in the worst case scenario, the code will take half of all memory and lookup table will take the other half + targetmem[ltoffset] = haltaddr # the first lookup table entry is always the halting address + codepos = 0 + ltpos = 1 + # let's iterate over the code + # pass 1 + for line in stage4src: + if line[0] == NRJDIR_ORG: # handle .org + codepos = int(line[1], 16) + elif line[0] == NRJDIR_SET: # handle .set + addr = int(line[1], 16) + val = line[2] + if val == 'HLT': + targetmem[addr] = ltoffset + elif val != 'NXT': + targetmem[addr] = int(val, 16) + else: # 3-value vector where HLT or NXT can be encountered + # save current instruction in the lookup table + targetmem[ltoffset + ltpos] = codepos + ltpos += 1 + for v in line: + if v == 'HLT': + targetmem[codepos] = ltoffset + elif v == 'NXT': + targetmem[codepos] = ltoffset + ltpos + else: + targetmem[codepos] = int(v, 16) + codepos += 1 + # pass 2 - fill in NXT + codepos = 0 + ltpos = 1 + for line in stage4src: + if line[0] == NRJDIR_ORG: # handle .org + codepos = int(line[1], 16) + elif line[0] == NRJDIR_SET: # handle .set + addr = int(line[1], 16) + val = line[2] + if val == 'NXT': + val = targetmem[ltoffset + ltpos] + targetmem[addr] = val + else: # 3-value vector where HLT or NXT can be encountered + ltpos += 1 + for v in line: + codepos += 1 + + # now, we have assembled our target memory snapshot, let's write the output file + + outf = open(dstfname, "wb") + targetmem.tofile(outf) + outf.close() + print('Assembled %s' % dstfname) + + +if __name__ == '__main__': # nrjasm entry point + version = '0.0.1' + print('nrjasm v%s by Luxferre, 2022' % version) + if len(sys.argv) > 2: + print('Assembling %s into %s...' % (sys.argv[1], sys.argv[2])) + start_assembly(sys.argv[1], sys.argv[2]) + else: + print('Usage: nrjasm.py [source] [binary]') + diff --git a/stdlib.nrjasm b/stdlib.nrjasm @@ -0,0 +1,21 @@ +; nrjasm standard library starts here + +; custom macros always take 3 values, usually cell addresses (referred to as %A, %B and %C) and defined between .def and .end (no nesting allowed) +; custom macros are always expanded before the elementary macros +; if %C is not passed, it is replaced with NXT +; if %B and/or %A is not passed, it is replaced with HLT + +.def .lbl ; define labels + .var %A FREE ; allocate a variable with the name in %A, then set it to the next instruciton address: + .set @%A NXT ; %A is directly substituted as text, so we can use it after the dereferencing operator +.end + +; define a reusable buffer variable for our following macros +.var setbuf FREE ; we don't care which address it will actually be + +.def MOV ; transfer one cell to another, usage: MOV dst src + @setbuf HLT NXT ; first, zero out the setbuf variable by performing NOR with 0xFFFF + %A HLT NXT ; then, zero out the destination cell by performing NOR with 0xFFFF + @setbuf %B NXT ; then, set setbuf variable to the inverted source value + %A @setbuf %C ; finally, set destination cell to the inverted setbuf value ( = source value) +.end