commit d712730bb499a777f41635b72e82953264c37cd7
parent f560cc5cbdcc96a162049e546be89d90cec5264d
Author: Luxferre <lux@ferre>
Date: Fri, 2 Sep 2022 19:57:59 +0300
First working assembler!
Diffstat:
A | example.nrjasm | | | 40 | ++++++++++++++++++++++++++++++++++++++++ |
M | nrj.c | | | 4 | ++-- |
A | nrjasm.py | | | 218 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | stdlib.nrjasm | | | 21 | +++++++++++++++++++++ |
4 files changed, 281 insertions(+), 2 deletions(-)
diff --git a/example.nrjasm b/example.nrjasm
@@ -0,0 +1,40 @@
+; a proposed assembly syntax for NRJ machines (example for NRJ16)
+; semicolons are comments
+; preprocessor instructions start with dot (.)
+; every non-preprocessor instruction creates an entry in the lookup table
+; all addressing is in words
+; we usually start at the word 3 (don't pre-fill the I/O buffers)
+
+.bit 16 ; word/address size: NRJ16 is the default setting
+.org 3 ; .org defines the start of further code/data (in words, hex)
+
+; include the standard library
+
+.inc stdlib.nrjasm
+
+.var x 12EF ; .var defines a label for a particular memory location
+.var y 12F0 ; define another variable at 0x12F0
+.set @x 33EE ; .set sets a memory location to a particular hex constant at the build time, @ dereferences a label into the address
+.set @y 'M ; ' dereferences a character into a whole word with its ASCII code
+
+; we CANNOT use dereferencing operators with .var, only with .set or directly
+
+; there also can be .inc instruction to include a snippet from another file in the same directory
+
+; now, main elementary macros:
+; NXT - address of the next instruction position in the lookup table
+; HLT - the last address position in the lookup table (0xFFFF for NRJ16), set by .bits
+; FREE - address of the next available (at build time) memory cell, can only be used in .var
+
+; for the lookup table and CUR/NXT macros to work correctly, the code must start at an address divisible by 3
+; note that FREE doesn't intelligently detect the available cells, it only takes the next one after the maximum address used
+; so in our case, the first FREE instance will be substituted with 12F1, the next with 12F2 and so on
+
+
+; now, lets output a character by transferring the y value to the output cell 1
+; in an endless loop
+
+.lbl myloop
+MOV 1 @y @myloop ; output the character and jump to the beginning
+; we don't have to explicitly zero out the cell 1 as it is done by the I/O logic
+
diff --git a/nrj.c b/nrj.c
@@ -41,7 +41,7 @@ void nrj_run(NRJWORD *mem, NRJWORD pc) { /* Main NRJ engine - just 12 lines of C
mem[mem[pc]] = (~(mem[mem[pc]] | mem[mem[pc+1]])) & MAXADDR; /* then perform the NOR operation */
pc = mem[mem[pc+2]]; /* then perform the reference jump operation */
if(mem[1]) { /* then handle output if word 1 is set */
- nrj_out(&mem[2], &mem[mem[1]]); /* output the value from the location specified in word 1 */
+ nrj_out(&mem[2], &mem[1]); /* output the value from the word 1 */
mem[1] = (NRJWORD) 0; /* clear word 1 */
}
}
@@ -58,7 +58,7 @@ int main(int argc, char* argv[]) { /* emulator entry point: nrj program.bin */
fseek(prog, 0, SEEK_END);
int flen = ftell(prog);
fseek(prog, 0, SEEK_SET);
- fread(mem, sizeof(NRJWORD), (flen/sizeof(NRJWORD)) & MAXADDR, prog);
+ fread(mem, sizeof(NRJWORD), flen/sizeof(NRJWORD), prog);
fclose(prog);
tcgetattr(0, &tty_opts_backup);
atexit(&restore_term);
diff --git a/nrjasm.py b/nrjasm.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+# The reference assembler for NRJ OISC (tested for NRJ16)
+# By Luxferre, 2022, public domain
+
+import sys
+import array
+from os.path import realpath
+
+# some constants to redefine more easily in case of major breaking changes
+
+NRJDEF_BITS = 16 # default word/addr size if the .bit directive is omitted
+
+# service characters
+NRJCHAR_COMMENT = ';' # all comments are after ;
+NRJCHAR_VARDEREF = '@' # variable dereferencing is with @
+NRJCHAR_CHARDEREF = "'" # character dereferencing is with '
+NRJCHAR_SUBST = '%' # var-in-macro substitution is with %
+
+# macro variables
+NRJVAR1 = NRJCHAR_SUBST + 'A' # %A
+NRJVAR2 = NRJCHAR_SUBST + 'B' # %B
+NRJVAR3 = NRJCHAR_SUBST + 'C' # %C
+
+# and preprocessor directives
+NRJDIR_INC = '.inc'
+NRJDIR_BITS = '.bit'
+NRJDIR_ORG = '.org'
+NRJDIR_DEF = '.def'
+NRJDIR_END = '.end'
+NRJDIR_VAR = '.var'
+NRJDIR_SET = '.set'
+NRJDIR_FREE = 'FREE'
+NRJDIR_NXT = 'NXT'
+NRJDIR_HLT = 'HLT'
+
+included_files = [] # stash to check the already included files
+
+def readsrc(fname): # read source file contents, stripping comments, empty lines and trailing/leading whitespace
+ f = open(fname, 'r')
+ rawlines = f.readlines()
+ f.close()
+ lines = []
+ global included_files
+ included_files.append(realpath(fname))
+ for line in rawlines:
+ line = line.split(NRJCHAR_COMMENT)[0].strip()
+ if len(line) > 0:
+ tokens = line.split() # split on any whitespace, which is what we need
+ if tokens[0] == NRJDIR_INC: # process include directive immediately
+ incfname = realpath(' '.join(tokens[1:])) # because the name may include spaces
+ if incfname not in included_files: # cyclic inclusion protection
+ incfile = readsrc(incfname) # call itself recursively, trying to include a file
+ included_files.append(incfname) # update the list of included files
+ lines.extend(incfile) # update the source with the included contents in place
+ else:
+ print('Attempt to include an already included file %s, ignoring!' % incfname)
+ else: # otherwise just append the tokenized source line
+ lines.append(tokens)
+ return lines
+
+def start_assembly(srcfname, dstfname): # main assembly method
+ wordsize = NRJDEF_BITS # define machine word/address size
+
+ # we're starting with tokenized Stage 1 source: all includes processed, comments and whitespace stripped
+ stage1src = readsrc(srcfname)
+
+ # Stage 2: scan the source for the first word size set directive
+ for line in stage1src:
+ if line[0] == NRJDIR_BITS:
+ wordsize = int(line[1])
+ break
+ print('Building for NRJ%u' % wordsize)
+
+ # Stage 3: expand all macros
+ stage3src = []
+ macrobuffers = {}
+ macrostart = False
+ macroname = None
+ for line in stage1src:
+ if macrostart: # we already are buffering a macro
+ if line[0] == NRJDIR_END: # macro ended and saved in the buffers
+ macrostart = False
+ macroname = None
+ else: # continue buffering
+ macrobuffers[macroname].append(line)
+ else: # usual code
+ if line[0] == NRJDIR_DEF: # starting a macro
+ macroname = line[1]
+ macrobuffers[macroname] = [] # prepare the place to buffer the macro into
+ macrostart = True
+ elif line[0] != NRJDIR_BITS: # ignoring word size directive as we already processed it
+ if line[0] in macrobuffers: # detected an already compiled macro, substituting the code and parameters
+ p1 = NRJDIR_HLT # placeholders for missing parameters
+ p2 = NRJDIR_HLT
+ p3 = NRJDIR_NXT # assume we're referring to the next address in p3
+ if len(line) > 1: # fill the first parameter if present
+ p1 = line[1]
+ if len(line) > 2: # fill the second parameter if present
+ p2 = line[2]
+ if len(line) > 3: # fill the third parameter if present
+ p3 = line[3]
+ for macroline in macrobuffers[line[0]]: # now, perform the macrosubstitution with parameter replacement
+ stage3src.append(' '.join(macroline).replace(NRJVAR1, p1).replace(NRJVAR2, p2).replace(NRJVAR3, p3).split())
+ else: # append a normal line
+ stage3src.append(line)
+
+ # Stage 4: now, process .var directive, FREE directive, @ and ' dereferencing operators
+ vartable = {} # don't store numeric locations here yet, only string representations (hex or FREE)
+ stage4src = []
+ for line in stage3src:
+ if line[0] == NRJDIR_VAR: # .var directive: no @ or ' operators allowed here
+ if line[2] == NRJDIR_FREE:
+ vartable[line[1]] = 0
+ else:
+ vartable[line[1]] = int(line[2], 16)
+ # now, fill in the FREE bits
+ maxvar = 0
+ for vname in vartable:
+ if vartable[vname] > maxvar:
+ maxvar = vartable[vname]
+ for vname in vartable:
+ if vartable[vname] == 0:
+ maxvar += 1
+ vartable[vname] = maxvar
+ for line in stage3src:
+ if line[0] != NRJDIR_VAR: # finally, perform variable substitution
+ # but first, attempt to perform character substitution
+ for i, el in enumerate(line):
+ if el.startswith(NRJCHAR_CHARDEREF):
+ line[i] = hex(ord(el[1]))[2:].upper()
+ sline = ' '.join(line)
+ for vname in vartable:
+ sline = sline.replace(NRJCHAR_VARDEREF+vname, hex(vartable[vname])[2:].upper())
+ stage4src.append(sline.split())
+
+ # now, our Stage 4 code is fully flat and we can start allocating memory for it
+ # directives left to process at this point: .org, .set, NXT, HLT
+ # (we cannot process .set before because it can also take value of NXT or HLT)
+
+ memsize = 1 << wordsize
+ haltaddr = memsize - 1 # halting address to be filled in the lookup table
+ print('Allocating %u %u-bit words of memory...' % (memsize, wordsize))
+ memmod = 'H'
+ if wordsize >= 32:
+ memmod = 'L'
+ elif wordsize >= 64:
+ memmod = 'Q'
+ elif wordsize <= 8:
+ memmod = 'B'
+ targetmem = array.array(memmod, [0]*memsize)
+
+ # here is the trickiest part of the whole assembly process - building a lookup table
+ # as NRJ can't directly jump to the next instruction by itself, we need to tell it to
+ # the NXT macro will be replaced with a cell in the lookup table that points to the next instruction
+ # and the lookup table will also take some memory in the machine
+
+ ltoffset = memsize >> 1 # in the worst case scenario, the code will take half of all memory and lookup table will take the other half
+ targetmem[ltoffset] = haltaddr # the first lookup table entry is always the halting address
+ codepos = 0
+ ltpos = 1
+ # let's iterate over the code
+ # pass 1
+ for line in stage4src:
+ if line[0] == NRJDIR_ORG: # handle .org
+ codepos = int(line[1], 16)
+ elif line[0] == NRJDIR_SET: # handle .set
+ addr = int(line[1], 16)
+ val = line[2]
+ if val == 'HLT':
+ targetmem[addr] = ltoffset
+ elif val != 'NXT':
+ targetmem[addr] = int(val, 16)
+ else: # 3-value vector where HLT or NXT can be encountered
+ # save current instruction in the lookup table
+ targetmem[ltoffset + ltpos] = codepos
+ ltpos += 1
+ for v in line:
+ if v == 'HLT':
+ targetmem[codepos] = ltoffset
+ elif v == 'NXT':
+ targetmem[codepos] = ltoffset + ltpos
+ else:
+ targetmem[codepos] = int(v, 16)
+ codepos += 1
+ # pass 2 - fill in NXT
+ codepos = 0
+ ltpos = 1
+ for line in stage4src:
+ if line[0] == NRJDIR_ORG: # handle .org
+ codepos = int(line[1], 16)
+ elif line[0] == NRJDIR_SET: # handle .set
+ addr = int(line[1], 16)
+ val = line[2]
+ if val == 'NXT':
+ val = targetmem[ltoffset + ltpos]
+ targetmem[addr] = val
+ else: # 3-value vector where HLT or NXT can be encountered
+ ltpos += 1
+ for v in line:
+ codepos += 1
+
+ # now, we have assembled our target memory snapshot, let's write the output file
+
+ outf = open(dstfname, "wb")
+ targetmem.tofile(outf)
+ outf.close()
+ print('Assembled %s' % dstfname)
+
+
+if __name__ == '__main__': # nrjasm entry point
+ version = '0.0.1'
+ print('nrjasm v%s by Luxferre, 2022' % version)
+ if len(sys.argv) > 2:
+ print('Assembling %s into %s...' % (sys.argv[1], sys.argv[2]))
+ start_assembly(sys.argv[1], sys.argv[2])
+ else:
+ print('Usage: nrjasm.py [source] [binary]')
+
diff --git a/stdlib.nrjasm b/stdlib.nrjasm
@@ -0,0 +1,21 @@
+; nrjasm standard library starts here
+
+; custom macros always take 3 values, usually cell addresses (referred to as %A, %B and %C) and defined between .def and .end (no nesting allowed)
+; custom macros are always expanded before the elementary macros
+; if %C is not passed, it is replaced with NXT
+; if %B and/or %A is not passed, it is replaced with HLT
+
+.def .lbl ; define labels
+ .var %A FREE ; allocate a variable with the name in %A, then set it to the next instruciton address:
+ .set @%A NXT ; %A is directly substituted as text, so we can use it after the dereferencing operator
+.end
+
+; define a reusable buffer variable for our following macros
+.var setbuf FREE ; we don't care which address it will actually be
+
+.def MOV ; transfer one cell to another, usage: MOV dst src
+ @setbuf HLT NXT ; first, zero out the setbuf variable by performing NOR with 0xFFFF
+ %A HLT NXT ; then, zero out the destination cell by performing NOR with 0xFFFF
+ @setbuf %B NXT ; then, set setbuf variable to the inverted source value
+ %A @setbuf %C ; finally, set destination cell to the inverted setbuf value ( = source value)
+.end