equi

A self-descriptive stack-based PC platform
git clone git://git.luxferre.top/equi.git
Log | Files | Refs | README | LICENSE

commit 0a23370e4fbdf8ededd4bf1776c32c13fd8d2f0a
Author: Luxferre <lux@ferre>
Date:   Sat,  6 Aug 2022 00:08:17 +0300

Up

Diffstat:
Aequi.c | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aequi.md | 134+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 259 insertions(+), 0 deletions(-)

diff --git a/equi.c b/equi.c @@ -0,0 +1,125 @@ +/* + * Equi platform reference implementation + * + * Created in 2022 by Luxferre, released into public domain + * + * See equi.md file for the specification and manual + * + * @license Unlicense <https://unlicense.org> + * @author Luxferre + */ + +/* Standard includes */ + +#include <stdlib.h> +#include <stdio.h> + +/* Definitions section */ + +#define ushort unsigned short /* basic 16-bit integer */ +#define uchar unsigned char /* basic 8-bit integer */ +#define WS sizeof(ushort) /* Equi word size in bytes */ +#define CLT_ENTRY_LEN 6 /* Amount of significant compiled word characters */ +#define CLT_ENTRY_SIZE (CLT_ENTRY_LEN + WS) /* Full size in bytes taken by one CLT entry */ + +/* Configuration section (constants overridable at compile-time) */ + +/* Main and return stack size in bytes */ +#ifndef STACK_SIZE +#define STACK_SIZE 256 +#endif + +/* Literal stack size in bytes */ +#ifndef LIT_STACK_SIZE +#define LIT_STACK_SIZE 32 +#endif + +/* GPD (general purpose data) area start address */ +#ifndef GPD_AREA_START +#define GPD_AREA_START 0x2300 +#endif + +/* Command buffer start address */ +#ifndef CMD_BUF_START +#define CMD_BUF_START 0x5000 +#endif + +/* Command buffer size in bytes */ +#ifndef CMD_BUF_SIZE +#define CMD_BUF_SIZE 0x5700 +#endif + +/* Some necessary constants and offsets derived from the above values */ +#define STACK_SIZE_WORDS (STACK_SIZE / WS) /* Main and return stack size in words */ + +#define MAIN_STACK_ADDR 0 /* Main stack address */ +#define RETURN_STACK_ADDR STACK_SIZE /* Return stack address */ +#define LIT_STACK_ADDR (RETURN_STACK_ADDR + STACK_SIZE) /* Literal stack address */ +#define PC_ADDR (LIT_STACK_ADDR + LIT_STACK_SIZE) /* Program counter address */ +#define CBP_ADDR (PC_ADDR + WS) /* Compilation buffer pointer location address */ +#define CLTP_ADDR (CBP_ADDR + WS) /* Compilation lookup table pointer location address */ +#define FLAGS_ADDR (CLTP_ADDR + WS) /* Flags register address */ +#define GPD_START_LOC_ADDR (FLAGS_ADDR + 1) /* Location where GPD_AREA_START value is stored */ +#define CMD_START_LOC_ADDR (GPD_START_LOC_ADDR + WS) /* Location where CMD_BUF_START value is stored */ +#define CMD_SIZE_LOC_ADDR (CMD_START_LOC_ADDR + WS) /* Location where CMD_BUF_SIZE value is stored */ +#define CLT_START (CMD_SIZE_LOC_ADDR + 213u) /* should be 0x300 by default */ +#define CLT_SIZE (GPD_AREA_START - CLT_START) /* Compilation lookup table size in bytes */ +#define CLT_ENTRIES_MAX (CLT_SIZE / CLT_ENTRY_SIZE) /* Maximum amount of entries in CLT, must be integer */ +#define GPD_AREA_SIZE (CMD_BUF_START - GPD_AREA_START) /* GPD area size in bytes */ + +/* + * Structures that describe Equi machine RAM using the above configuration + */ + +struct EquiFlags { + uchar II:1; /* instruction ignore mode */ + uchar CM:1; /* compilation mode */ + uchar IM:1; /* interpretation mode */ +}; + +struct CLTEntry { + uchar name[6]; /* compiled word name */ + ushort loc; /* compiled word location */ +}; + +struct EquiRAM { + ushort main_stack[STACK_SIZE_WORDS]; + ushort return_stack[STACK_SIZE_WORDS]; + uchar literal_stack[LIT_STACK_SIZE]; + ushort pc; /* program counter */ + ushort msp; /* main stack pointer */ + ushort rsp; /* return stack pointer */ + ushort cbp; /* compilation buffer pointer */ + ushort cltp; /* compilation lookup table pointer */ + struct EquiFlags flags; + ushort gpd_start; + ushort cmd_start; + ushort cmd_size; + uchar reserved[207]; /* reserved space */ + struct CLTEntry clt[CLT_ENTRIES_MAX]; /* compilation lookup table */ + uchar gpd[GPD_AREA_SIZE]; + uchar cmdbuf[CMD_BUF_SIZE]; +}; + + +/* + * Before running the main code, instantiate and initialize the machine RAM + */ + +static struct EquiRAM ram = { + .gpd_start = GPD_AREA_START, + .cmd_start = CMD_BUF_START, + .cmd_size = CMD_BUF_SIZE +}; + +/* Also create an alternative view of the same RAM area for direct offset-based access */ +static uchar* flatram = (uchar *)&ram; + +int main(int argc, char* argv[]) { + + /* */ + + printf("0x%X", ram.cmd_size); + + return 0; +} diff --git a/equi.md b/equi.md @@ -0,0 +1,134 @@ +# Equi + +Equi is a general-purpose 16-bit stack-based platform (and a programming language/VM named the same) aimed at low-cost, low-energy computing. It was inspired by Forth, Uxn, VTL-2, SIMPL and some other similar projects. + +The name Equi comes from the fact each source code instruction is **equi**valent to a machine instruction. No, it isn't mapped to one machine instruction. It **is the** machine instruction. All the instructions and data in Equi are represented with printable ASCII characters only. This allows to bootstrap Equi code directly from the keyboard (any standard keyboard/keypad that allows serial input) using a tiny interpreter stored, for instance, in the hardware ROM. + +## Specification + +Main features of an Equi machine: + +- Instruction bus: 8-bit; +- Data bus: 16-bit; +- Address bus: 16-bit; +- Two 256-byte (128-word) stacks, main and return; +- One 32-byte literal stack; +- 16-bit program counter (PC); +- 16-bit compilation buffer pointer (CBP); +- 16-bit compilation lookup table pointer (CLTP); +- Interpretation mode (IM) flag; +- Compilation mode (CM) flag; +- Instruction ignore (II) flag; +- 42752 bytes of main RAM (0x0000-0xa6ff), 41984 bytes of which are available for the program and data and 768 bytes hold the three stacks and necessary service registers listed above; +- Up to 4GB flat persistent storage (tape, disk, flash etc); +- Serial terminal input and output; +- Up to 65535 peripheral extension ports. + +The default Equi RAM layout is: + +Address range|Size (bytes)|Purpose +-------------|------------|--------------------- +0x0000-0x00ff|256 |Main stack +0x0100-0x01ff|256 |Return stack +0x0200-0x021f|32 |Literal stack +0x0220-0x0221|2 |PC +0x0222-0x0223|2 |CBP +0x0224-0x0225|2 |CLTP +0x0226 |1 |Flags (II/CM/IM) +0x0227-0x0228|2 |GPD area start (set to 0x2300 by default) +0x0229-0x022a|2 |Command buffer start (set to 0x5000 by default) +0x022b-0x022c|2 |Command buffer size (set to 0x5700 by default) +0x0227-0x02ff|211 |Reserved for internal usage by the interpreter and future extensions +0x0300-0x22ff|8192 |Compilation lookup table (CLT) - up to 1024 compiled words (6 bytes for literal + 2 bytes for CBP address each) by default +0x2300-0x4fff|11520 |General purpose data (GPD) area +0x5000-0xa6ff|22272 |Command buffer area + +This layout has been carefully chosen to achieve maximum portability: so that full a Equi system could fit inside the user area of Apple II's space between (physical) 0x0800 and 0xbf00 addresses, while still leaving 4096 bytes for the interpreter itself with all necessary peripheral drivers if required so. As such, Apple II equipped with full 48K RAM is supposedly the lowest spec possible target system Equi could run on with the default virtual RAM configuration. + +To preserve runtime integrity, Equi implementations are allowed to (but not required to) restrict all writes to the addresses below the GPD area start (0x2300 by default). On systems with less/more RAM where CLT and/or GPD and/or command buffer areas are smaller/larger, the values at 0x0227, 0x0229 and 0x022b must be populated accordingly. You don't need to fill in the sizes of CLT and GPD areas since they are calculated automatically from the specified offsets, you only need to specify how big your command buffer is to avoid any writes outside the user area. + +Equi is strictly case-sensitive: all uppercase basic Latin letters, as well as a number of special characters, are reserved for machine instructions, and all custom words must be defined in lowercase only (additionally, `_` character is allowed in the identifiers). Within comments (see below), any characters can be used. + +The interpreter can run in one of the four modes: command (default), interpretation (IM), compilation (CM) and instruction ignore (II) mode. An Equi machine always starts in the command mode. The latter three are triggered by certain instructions that set the corresponding flags. The semantics of the compilation mode is similar to that of Forth, and will be covered in detail here later on. + +In the command mode, the interpreter doesn't perform any instruction execution and doesn't manipulate program counter (PC). Instead, it accumulates all characters typed from the standard input into the so-called command buffer. The only instruction Equi must react to in this mode is CR, the carriage return character, that sets PC to the command buffer start, sets the IM flag, **clears the CLT** and starts execution in the interpretation mode. Note that this also means that every Equi program file, even when run in a non-interactive environment, must end with a CR character, and as long as it does and every program has a halting `Q` instruction, you can safely concatenate several Equi programs in a single file to be executed sequentially. + +In the interpretation mode (IM flag set), when the interpreter encounters any of the following characters - `_0-9A-Fa-z` (not including `-`) - it pushes their ASCII values bytewise onto the literal stack (32-byte long, the overflown bytes are discarded). When any other character (except `:`, `"` or `'`) is encountered when the literal stack is not empty, the `#` instruction logic (see below) is performed automatically. If `:` is encountered, compilation mode logic is performed instead. If a `Q` instruction or a on-printable character is encountered, Equi returns to the command mode immediately. + +In the compilation mode, all instructions except `;` are skipped while the CM flag is set. When the interpreter encounters `;` instruction, it performs the finalizing logic to save the compiled word into CLT (see below) and returns to the interpretation mode. + +In the instruction ignore more (II flag set), all instruction except `)` (that unsets the II flag), are skipped and discarded. PC, however, does increase as usual in this mode. + +Note that II flag has the precedence over IM and CM flags and CM flag has the precedence over IM flag. I.e. you cannot exit the interpretation mode while being in the compilation mode, and you can't exit any other mode while being in the II mode. And surely enough you can't exit the command mode (interpreter shell itself) unless all three mode flags are unset. + +Equi's core instruction set is: + +Op |Stack state |Meaning +---|--------------------------------|---------------------------------------------------------- +`#`|`( -- )` |Literal: pop all characters from the literal stack, discard all `_a-z` characters, leave the top 4 characters and push the 16-bit value from them (in the order they were pushed) onto the main stack +`"`|`( -- lit1 lit2 ... )` |Pop all the values from the literal stack and push them onto the main stack as 16-bit values +` `|`( -- )` |No operation: whitespace can be used in the code for clarity and not affect anything except PC +`(`|`( -- )` |Set the II flag: when it is set, the interpreter must ignore all instructions except `)`, used for writing comments +`)`|`( -- )` |Unset the II flag, returning to the normal interpretation or compilation mode +`:`|`( -- )` |Compilation mode start: set CM flag and set CBP to PC+1 value +`;`|`( -- )` |Compilation mode end: replace this instruction in-memory with `R` instruction, pop all characters from the literal stack, append the lookup table with the first 6 chars and CBP value, unset the CM flag and increase CLTP value by 8 +`'`|`( -- )` |Call the compiled word: pop all characters from the literal stack, leave the first 6, look them up in CLT for a CBP value, set PC to CBP if found, error out if not, then push PC to return stack and set PC to the CBP value +CR |`( -- )` |In the command mode, output a line break and switch to the interpretation mode (see above); in all other modes, identical to whitespace +`R`|`( -- )` |**R**eturn: pop and assign the PC value from the return stack +`]`|`( a -- )` |Pop the value from main stack and push onto return stack +`[`|`( -- a )` |Pop the value from return stack and push onto main stack +`L`|`( addr -- a` ) |**L**oad a 16-bit value from `addr` +`S`|`( a addr -- )` |**S**tore a 16-bit value into `addr` +`W`|`( a addr -- )` |**W**rite a 8-bit value into `addr` (note that both value and address still must be 16-bit, the higher byte of the value is discarded) +`!`|`( a -- )` |Drop the top value from the stack +`$`|`( a -- a a )` |Duplicate the top value on the stack +`%`|`( a b -- b a )` |Swap two top values on the stack +`@`|`( a b c -- b c a )` |Rotate three top values on the stack +`\`|`( a b -- a b a )` |Copy over the second value on the stack +`J`|`( rel -- )` |**J**ump: increase or decrease PC according to the relative value (treated as signed, from -32768 to 32767) +`I`|`( cond rel -- ) ` |Pop relative value and condition. **I**f the condition value is not zero, `J` to the relative value +`X`|`( -- pc )` |Locate e**X**ecution point: push PC+1 value onto the main stack +`>`|`( a b -- a>b )` |Push 1 onto the stack if the second popped value is greater than the first, 0 otherwise +`<`|`( a b -- a>b )` |Push 1 onto the stack if the second popped value is less than the first, 0 otherwise +`=`|`( a b -- a==b )` |Push 1 onto the stack if the two popped values are equal, 0 otherwise +`+`|`( a b -- a+b )` |Sum +`-`|`( a b -- a-b )` |Difference +`*`|`( a b -- a*b )` |Product +`/`|`( a b -- a/b rem )` |Integer division (with remainder) +`N`|`( a -- -a )` |Single-instruction negation (complement to 65536) +`~`|`( a -- ~a )` |Bitwise NOT +`&`|`( a b -- a&b )` |Bitwise AND +`|`|`( a b -- a|b )` |Bitwise OR +`^`|`( a b -- a^b )` |Bitwise XOR +`.`|`( a -- ) ` |Output a character by the ASCII (or Unicode, if supported) value into the standard terminal +`,`|`( -- a ) ` |Non-blocking key input of an ASCII (or Unicode, if supported) value from the standard terminal +`?`|`( -- a ) ` |Blocking key input of an ASCII (or Unicode, if supported) value from the standard terminal +`P`|`( p1 p2 port -- status r1 r2 )`|**P**ort I/O: pass two 16-bit parameters to the port and read the operation status and results into the words on the stack top +`}`|`( adh adl len maddr -- status)`|Persistent storage write operation. Stack parameters: persistent address high 2 bytes, low 2 bytes, data length, RAM address +`{`|`( adh adl len maddr -- status)`|Persistent storage read operation. Stack parameters: persistent address high 2 bytes, low 2 bytes, data length, RAM address +`Q`|`( -- )` |**Q**uit the interpretation mode (unset IM flag if set), or the interpreter shell itself if in command mode (halt the machine when it's nowhere to exit to) + +This would be an example of loading a 15200-byte long Equi program from the persistent storage at address 0x12345678 into the RAM at the next free address and running it (discarding the load status for brevity): + +``` +1234#5678#3b60X0007+{!(CR) +``` + +The logic of this loader snippet is as follows. After the `X` instruction, the top of the stack contains the address occupied by the following byte (`0`) in this case. Then we add the amount of bytes needed to shift loading to the end of the snippet, in our case 7 (additionally skipping `0`, `0`, `7`, `+`, `{` and `!`) and use the result as the target loading address in memory. Since this loader snippet itself ends at 0x5021 (with the default configuration, the command buffer starts at 0x5000) and the status is already popped from the stack, the execution will start from 0x5022 immediately. Please also notice how we don't need `#` after `3b60` (15200 in hex) and `0007` since all valid instructions except `:`, `'` and `"` trigger `#` automatically before execution when the literal stack is not empty. + +This should be enough to get started with any program, which also means once a boot ROM with an Equi shell is up, it can also be used as a stand-alone OS, like a Forth system or ROM-BASIC. This way, already loaded Equi programs can also load and call each other as long as the command buffer RAM permits and their exact locations of in the persistent storage are known. And until the moment the Equi machine is put back into the command mode, all these programs can share word definitions with each other and reuse them as necessary. + +Note that, due to the dynamic nature of word allocation and ability to reconfigure the runtime environment for different offsets depending on the target, absolute jumps are not directly supported in Equi and generally not recommended, although one can easily do them with `]R` sequence and/or calculate absolute positions using `X` instruction. + +Please also note that Equi doesn't specify any graphical or sound output capabilities. If such support is required, it generally must be implemented, as with any other peripheral, via the port I/O interface (`P`) instruction specific to a particular hardware/software implementation. Same goes for how standard serial terminal input/output is processed: Equi specification doesn't enforce any particular way. On the desktop/laptop PCs, however, it is advised, especially for software-based implementations/VMs, that the terminal I/O should be VT100-compatible, including, for instance, control character support and the output of an audiovisual bell for ASCII 0x07 (`\a` or `^G`). Depending on the target, these features may already be supported by the underlying OS's terminal emulator or may be implemented as a part of the VM itself. + +## Reference implementation + +Being a purely PC-oriented low-level runtime/programming environment, Equi has the reference implementation emulator/VM written in C (ANSI C89 standard), `equi.c`, compilable and runnable on all the systems supporting standard I/O. Note that this emulator: + +- doesn't fully implement `P` instruction but instead outputs its parameters to the standard error stream and puts three 0x0000 values back onto the stack, +- sandboxes the `{` and `}` operations using the file you supply in the command-line parameter. If you don't supply the file, these operations will effectively do nothing except putting 0x0000 (success status) onto the stack. + +The source code file should compile using any mainstream C compiler with C89 support, like GCC/DJGPP, Clang, TCC etc. However, it is also being developed to be compilable with CC65 compiler for targets like Apple II or Atari 800. All the machine/target specific configuration is done at compile time, using compiler command-line switches. Here are the instructions to build Equi using different known C compilers: + +