tgl.awk (7889B)
1 # The Great Library of useful AWK functions 2 # Fully POSIX-compatible but sometimes depends on other POSIX commands 3 # Use with your programs like this: 4 # LANG=C awk -f tgl.awk -f your_prog.awk [args] 5 # 6 # Current functionality: 7 # * single character input: setterm, getchar 8 # * ASCII and UTF-8 codepoint conversion: ord, wctomb, mbtowc 9 # * loading binary files as decimal integers into arrays: loadbin 10 # * saving binary files from arrays with decimal integers: savebin 11 # * tangent and cotangent functions: tan, cotan 12 # * signum, floor and ceiling functions: sign, floor, ceil 13 # * test for native bitwise operation support: bw_native_support 14 # * reimplementation of most bitwise operations (unsigned 32-bit): 15 # - NOT: bw_compl 16 # - AND: bw_and 17 # - OR: bw_or 18 # - XOR: bw_xor 19 # - NAND: bw_nand 20 # - NOR: bw_nor 21 # - >>: bw_rshift 22 # - <<: bw_lshift 23 # 24 # Created by Luxferre in 2023, released into public domain 25 26 # set/restore the terminal input mode using stty 27 # usage: setterm(0|1|2|3) 28 # 0 - restore the original terminal input mode 29 # 1 - blocking single-character input with echo 30 # 2 - blocking single-character input without echo 31 # 3 - non-blocking single-character input without echo 32 # in pipes, this function doesn't do anything 33 # (but returns 0 since it's not an error) 34 # otherwise an actual stty exit code is returned 35 function setterm(mode, cmd) { 36 if(system("stty >/dev/null 2>&1")) return 0 # exit code 0 means we're in a tty 37 if(!TGL_TERMMODE) { # cache the original terminal input mode 38 (cmd = "stty -g") | getline TGL_TERMMODE 39 close(cmd) 40 } 41 if(mode == 1) cmd = "-icanon" 42 else if(mode == 2) cmd = "-icanon -echo" 43 else if(mode == 3) cmd = "-icanon time 0 min 0 -echo" 44 else cmd = TGL_TERMMODE # restore the original mode 45 return system("stty " cmd ">/dev/null 2>&1") # execute the stty command 46 } 47 48 # getchar emulation using od 49 # caches the read command for further usage 50 # also able to capture null bytes, unlike read/printf approach 51 # use in conjunction with setterm to achieve different input modes 52 # setting LANG=C envvar is recommended, for GAWK it is required 53 # usage: getchar() => integer 54 function getchar(c) { 55 if(!TGL_GCH_CMD) TGL_GCH_CMD = "od -tu1 -w1 -N1 -An -v" # first time usage 56 TGL_GCH_CMD | getline c 57 close(TGL_GCH_CMD) 58 return int(c) 59 } 60 61 # get the ASCII code of a character 62 # setting LANG=C envvar is recommended, for GAWK it is required 63 # usage: ord(c) => integer 64 function ord(c, b) { 65 # init char-to-ASCII mapping if it's not there yet 66 if(!TGL_ORD["#"]) for(b=0;b<256;b++) TGL_ORD[sprintf("%c", b)] = b 67 return int(TGL_ORD[c]) 68 } 69 70 # encode a single integer UTF-8 codepoint into a byte sequence in a string 71 # setting LANG=C envvar is recommended, for GAWK it is required 72 # usage: wctomb(code) => string 73 # we can safely use the string type for all codepoints above 0 as all 74 # multibyte sequences have a high bit set, so no null byte is there 75 # for invalid codepoints, an empty string will be returned 76 function wctomb(code, s) { 77 code = int(code) 78 if(code < 0 || code > 1114109) s = "" # invalid codepoint 79 else if(code < 128) s = sprintf("%c", code) # single byte 80 else if(code < 2048) # 2-byte sequence 81 s = sprintf("%c%c", \ 82 192 + (int(code/64) % 32), \ 83 128 + (code % 64)) 84 else if(code < 65536) # 3-byte sequence 85 s = sprintf("%c%c%c", \ 86 224 + (int(code/4096) % 16), \ 87 128 + (int(code/64) % 64), \ 88 128 + (code % 64)) 89 else # 4-byte sequence 90 s = sprintf("%c%c%c%c", \ 91 240 + (int(code/262144) % 8), \ 92 128 + (int(code/4096) % 64), \ 93 128 + (int(code/64) % 64), \ 94 128 + (code % 64)) 95 return s 96 } 97 98 # decode a byte string into a UTF-8 codepoint 99 # setting LANG=C envvar is recommended, for GAWK it is required 100 # usage: mbtowc(s) => integer 101 # decoding stops on the first encountered invalid byte 102 function mbtowc(s, len, code, b, pos) { 103 len = length(s) 104 code = 0 105 for(pos=1;pos<=len;pos++) { 106 code *= 64 # shift the code 6 bits left 107 b = ord(substr(s, pos, 1)) 108 if(pos == 1) { # expect a single or header byte 109 if(b < 128) {code = b; break} # it resolves into a single byte 110 else if(b >= 192 && b < 224) # it's a header byte of 2-byte sequence 111 code += b % 32 112 else if(b >= 224 && b < 240) # it's a header byte of 3-byte sequence 113 code += b % 16 114 else if(b >= 240) # it's a header byte of 4-byte sequence 115 code += b % 8 116 else break # a trailer byte in the header position is invalid 117 } 118 else if(b >= 128 && b < 192) # it must be a trailer byte 119 code += b % 64 120 else break # a header byte in the trailer position is invalid 121 } 122 return code 123 } 124 125 # load any binary file into an AWK array (0-indexed), depends on od 126 # returns the resulting array length 127 # usage: loadbin(fname, arr, len, wordsize) => integer 128 # len parameter is optional, specifies how many bytes to read 129 # (if 0 or unset, read everything) 130 # wordsize parameter is optional, 1 byte by default 131 # multibyte words are considered little-endian 132 function loadbin(fname, arr, len, wordsize, cmd, i) { 133 wordsize = int(wordsize) 134 if(wordsize < 1) wordsize = 1 135 len = int(len) 136 i = (len > 0) ? (" -N" len " ") : "" 137 cmd = "od -tu" wordsize " -An -w" wordsize i " -v \"" fname "\"" 138 # every line should be a single decimal integer (with some whitespace) 139 i = 0 140 while((cmd | getline) > 0) # read the next line from the stream 141 if(NF) arr[i++] = int($1) # read the first and only field 142 close(cmd) # close the od process 143 return i 144 } 145 146 # save an AWK array (0-indexed) into a binary file 147 # setting LANG=C envvar is recommended, for GAWK it is required 148 # returns the amount of written elements 149 # usage: savebin(fname, arr, len, wordsize) => integer 150 # wordsize parameter is optional, 1 byte by default 151 # multibyte words are considered little-endian 152 function savebin(fname, arr, len, wordsize, i, j) { 153 wordsize = int(wordsize) 154 if(wordsize < 1) wordsize = 1 155 printf("") > fname # truncate the file and open the stream 156 for(i=0;i<len;i++) { 157 if(wordsize == 1) printf("%c", arr[i]) >> fname 158 else # we have a multibyte word size 159 for(j=0;j<wordsize;j++) 160 printf("%c", int(arr[i]/2^(8*j))%256) >> fname 161 } 162 close(fname) # close the output file 163 return i 164 } 165 166 # the missing tangent/cotangent functions 167 168 function tan(x) {return sin(x)/cos(x)} 169 function cotan(x) {return cos(x)/sin(x)} 170 171 # the missing sign/floor/ceil functions 172 173 function sign(x) {return x < 0 ? -1 : !!x} 174 function floor(x, f) { 175 f = int(x) 176 if(x == f) return x 177 else return x >= 0 ? f : (f - 1) 178 } 179 function ceil(x, f) { 180 f = int(x) 181 if(x == f) return x 182 else return x >= 0 ? (f + 1) : f 183 } 184 185 # Bitwise operations section 186 187 # test if the AWK engine has non-POSIX bitwise operation functions 188 # (and, or, xor, compl, lshift, rshift) implemented natively: 189 # if compl is missing, it will be concatenated with 1 and equal to 1 190 # so the inverse of this condition will be the result 191 function bw_native_support() {return (compl (1) != 1)} 192 193 # now, the implementation of the operations themselves 194 # note that all complements are 32-bit and all operands must be non-negative 195 196 function bw_compl(a) {return 4294967295 - int(a)} 197 function bw_lshift(a, b) {for(;b>0;b--) a = int(a/2);return a} 198 function bw_rshift(a, b) {for(;b>0;b--) a *= 2;return int(a)} 199 function bw_and(a, b, v, r) { 200 v = 1; r = 0 201 while(a > 0 || b > 0) { 202 if((a%2) == 1 && (b%2) == 1) r += v 203 a = int(a/2) 204 b = int(b/2) 205 v *= 2 206 } 207 return int(r) 208 } 209 function bw_or(a, b, v, r) { 210 v = 1; r = 0 211 while(a > 0 || b > 0) { 212 if((a%2) == 1 || (b%2) == 1) r += v 213 a = int(a/2) 214 b = int(b/2) 215 v *= 2 216 } 217 return int(r) 218 } 219 function bw_xor(a, b, v, r) { 220 v = 1; r = 0 221 while(a > 0 || b > 0) { 222 if((a%2) != (b%2)) r += v 223 a = int(a/2) 224 b = int(b/2) 225 v *= 2 226 } 227 return int(r) 228 } 229 function bw_nand(a, b) {return bw_compl(bw_and(a,b))} 230 function bw_nor(a, b) {return bw_compl(bw_or(a,b))} 231