gmi2txt.sh (3649B)
1 #!/bin/bash 2 # A simple helper tool to create a formatted and CRLF-terminated plaintext from the Gemtext passed via standard input 3 # 4 # Usage: cat [file] | gmi2txt.sh [page_width] [leading_spaces] [trailing_spaces] 5 # (pass 0 as the reflow width if you want to pass the other parameters but don't want to turn on reflow logic) 6 # 7 # Created by Luxferre in 2023, released into public domain 8 9 shopt -s extglob # enable extended pattern matching (just to be sure) 10 11 TARGET_WIDTH="$1" 12 LSPACES="$2" 13 TSPACES="$3" 14 15 SPC=$'\x20' 16 17 [[ -z "$TARGET_WIDTH" ]] && TARGET_WIDTH=0 # reflow off by default 18 [[ -z "$LSPACES" ]] && LSPACES=0 19 [[ -z "$TSPACES" ]] && TSPACES=0 20 21 FORMAT_WIDTH=0 # make formatting width distinct from the target reflow width 22 (( TSPACES > 0 )) && FORMAT_WIDTH="$TARGET_WIDTH" # and only use it if there are trailing spaces 23 24 reflowfmt="%-$(( LSPACES ))s%-${FORMAT_WIDTH}s%-$(( TSPACES ))s\n" # params: smth, line, smth 25 26 reflow_line() { # single-line logic from phlow.sh, adapted into a function and separating by LF only 27 local line="$1" 28 local llen="${#line}" # get effective line length 29 if (( 0 == TARGET_WIDTH || llen < TARGET_WIDTH )); then # no need to run the logic for smaller lines or if TARGET_WIDTH is 0 30 printf "$reflowfmt" '' "$line" '' 31 return 32 fi 33 local lastws=0 # variable to track last whitespace 34 local cpos=0 # variable to track current position within the page line 35 local pagepos=0 # variable to track the position of new line start 36 local outbuf='' # temporary output buffer 37 local c='' # temporary character buffer 38 for ((i=0;i<llen;i++,cpos++)); do # start iterating over characters 39 c="${line:i:1}" # get the current one 40 if (( cpos >= TARGET_WIDTH )); then # we already exceeded the page width 41 (( lastws == 0 )) && lastws=$TARGET_WIDTH # no whitespace encountered here 42 printf "$reflowfmt" '' "${outbuf:0:$lastws}" '' # truncate the buffer 43 outbuf='' 44 pagepos=$(( pagepos + lastws )) 45 cpos=0 46 lastws=0 47 i=$pagepos # update current iteration index from the last valid whitespace 48 else # save the whitespace position if found 49 [[ "$c" == "$SPC" ]] && lastws="$cpos" 50 outbuf="${outbuf}${c}" # save the character itself 51 fi 52 done 53 [[ ! -z "$outbuf" ]] && printf "$reflowfmt" '' "$outbuf" '' # output the last unprocessed chunk 54 } 55 56 readarray -t LINES -d $'\n' # read the input line array (split by LF) 57 for line in "${LINES[@]}"; do # iterate over the read text 58 line="${line%%$'\r'}" # remove a trailing CR if it is there 59 if [[ "${line:0:2}" == $'=>' ]]; then # we have a linkable resource 60 linkline="${line##=>*([[:blank:]])}" # remove the link signature and any leading whitespace 61 linkurl="${linkline%%[[:blank:]]*}" # treat anything until the next whitespace (or the end of line) as a URL 62 linkdesc="${linkline##${linkurl}*([[:blank:]])}" # remove the URL and any other leading whitespace to get the description 63 linkdesc="${linkdesc%%*([[:blank:]])}" # remove any trailing whitespace from the description 64 targetlink="$(printf '%s: %s' "$linkdesc" "$linkurl")" # reformat the link 65 rlink="$(reflow_line "$targetlink")" # reflow the final link text 66 printf '%s\r\n' "${rlink%%$'\n'}" # remove a trailing LF if it is there, but then add CRLF 67 else # we have an info line 68 infoline='' 69 [[ "${line:0:3}" != $'```' ]] && infoline="$line" # ignore the preformatting togglers, pass everything else 70 readarray -t reflowed_lines -d $'\n' < <(reflow_line "$infoline") 71 for rline in "${reflowed_lines[@]}"; do # iterate over the reflowed line parts 72 printf '%s\r\n' "$rline" 73 done 74 fi 75 done # file output finished