tii

Tcl-based suite for working with ii/idec protocol
git clone git://git.luxferre.top/tii.git
Log | Files | Refs

commit c34fba498a74bebccfcf65c6d13cf26509b8d09e
Author: Luxferre <lux@ferre>
Date:   Mon, 21 Oct 2024 17:09:52 +0300

[WIP] initial upload

Diffstat:
Atiifetch.tcl | 290+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 290 insertions(+), 0 deletions(-)

diff --git a/tiifetch.tcl b/tiifetch.tcl @@ -0,0 +1,290 @@ +#!/usr/bin/env tclsh +# tiifetch: fetch all data from an ii/idec station into the local text db +# (see https://github.com/idec-net/new-docs/blob/master/protocol-en.md) +# Usage: tiifetch station_http_url [echos] [db_dir] +# The echo list should be delimited with slash (/), comma (,) or semicolon (;) +# if no echos are specified (or "" is passed), then list.txt will be fetched +# and then all missing echo content from it will be downloaded +# If db_dir isn't specified, it's fetched and merged into the +# tiidb directory in the program root +# with echoconfs and messages respectively +# This component only fetches the messages, doesn't parse or display them +# Supported protocols: HTTP, HTTPS, Gemini, Spartan, Gopher/Finger/Nex +# Depends on Tcllib for URI parsing +# Created by Luxferre in 2024, released into public domain + +package require http +package require uri + +# autodetect TclTLS support and enable HTTPS request support if detected +set tls_support 0 +catch {package require tls; set tls_support 1} +if {$tls_support eq 1} { + ::http::register https 443 [list ::tls::socket -autoservername true] +} + +proc url2dict {inputurl} { + set out [dict create] + if [regexp {^(.*)://} $inputurl _ lscheme] { + dict set out scheme $lscheme + } else { + dict set out handler "render_handler_invalid" + return $out + } + set rawout [::uri::split [regsub {^.*://} $inputurl "http://"]] + set rhost [dict get $rawout host] + set rpath [dict get $rawout path] + set rport [dict get $rawout port] + set secondarydata [dict get $rawout query] + dict set out host $rhost + set selector $rpath + dict set out handler "render_handler_$lscheme" + # protocol-specific request logic + switch "$lscheme" { + gophers - + gopher { + if {$rport eq ""} {set rport 70} + set selector [string cat [string range $rpath 1 end] "\r\n"] + dict set out handler render_handler_gopher + } + finger { + if {$rport eq ""} {set rport 79} + set selector "$selector\r\n" + } + spartan { + if {$rport eq ""} {set rport 300} + if {$rpath eq ""} {set rpath "/"} + if {![string match "/*" $rpath]} { + set rpath "/$rpath" + } + set blen [string length $secondarydata] + set selector "$rhost $rpath $blen\r\n$secondarydata" + } + nex { + if {$rport eq ""} {set rport 1900} + set selector "$selector\r\n" + } + gemini { + if {$rport eq ""} {set rport 1965} + set selector "$inputurl\r\n" + } + default {dict set out handler render_handler_none} + } + dict set out path $rpath + dict set out selector $selector + dict set out port $rport + return $out +} + +proc reqresp {host port reqline is_tls encoding} { + global sock_response + set sock_net_timeout 5000 + set sock 0 + if {$is_tls eq 1} { + catch {set sock [::tls::socket -autoservername true -async $host $port]} + } elseif {$is_tls eq 2} { + catch {set sock [::tls::socket -autoservername true -async $host $port]} + if {$sock eq 0} {catch {set sock [socket -async $host $port]}} + } else { + catch {set sock [socket -async $host $port]} + } + if {$sock eq 0} {set sock_response ""; return} + global rcv_end_$sock + unset -nocomplain rcv_end_$sock + if {$encoding eq ""} {set encoding utf-8} + fconfigure $sock -translation binary -buffering none -encoding $encoding + fileevent $sock writable [list connected $sock $reqline] + proc connected {sock reqline} { + fileevent $sock writable {} + puts -nonewline $sock "$reqline" + flush $sock + fileevent $sock readable [list rdbl $sock] + } + set sock_response "" + proc rdbl {sock} { + global sock_response rcv_end_$sock + while {![eof $sock]} { + append sock_response [read $sock] + } + set rcv_end_$sock 0 + } + after $sock_net_timeout "global rcv_end_$sock; set rcv_end_$sock 1" + vwait rcv_end_$sock + catch {close $sock} + unset -nocomplain rcv_end_$sock +} + + +# file download helper +proc getfile {url} { + set url [regsub -all {([^:])//} $url {\1/}] + set urlparts [url2dict $url] + set scheme [dict get $urlparts scheme] + set host [dict get $urlparts host] + set port [dict get $urlparts port] + set sel [dict get $urlparts selector] + global sock_response tls_support + switch $tls_support { + 0 {set localtls 0} + 1 {set localtls 2} + } + switch $scheme { + gophers - gopher - finger - nex { + if {$scheme eq "gophers"} {set localtls 1} + reqresp $host $port $sel $localtls utf-8 + set body "$sock_response" + set sock_response "" + return $body + } + gemini - spartan { + if {$scheme eq "gemini"} {set localtls 1} + reqresp $host $port $sel $localtls utf-8 + set body "$sock_response" + set sock_response "" + if {[regexp {^([^\n]*)\n} $body _ statusline]} { + set statusline [string trimright $statusline] + set statusparts [split $statusline " "] + set statuscode [lindex $statusparts 0] + set mainstatuscode [string index $statuscode 0] + if {$mainstatuscode eq 2} { + regsub {.*?\n} $body "" body + return $body + } else {return {}} + } else {return {}} + } + https - http { + set hs [::http::geturl $url -binary 1 -keepalive 1 -timeout 5000] + return [::http::data $hs] + } + default {return {}} + } +} + +# file read helper +proc readfile {fname} { + set fp [open $fname r] + fconfigure $fp -encoding utf-8 + set data [read $fp] + close $fp + return $data +} + +# file write helper (leaves a newline at the end) +proc writefileln {fname data} { + set fp [open $fname w] + fconfigure $fp -encoding utf-8 + puts $fp $data + close $fp +} + +# main logic proc +proc fetchiidb {url echos dbdir dolog} { + # trim the parameters + set url [string trim $url] + set echos [string trim $echos] + set dbdir [file normalize [string trim $dbdir]] + set echodir [file join $dbdir "echo"] + set msgdir [file join $dbdir "msg"] + # ensure that the necessary dirs exist + file mkdir $dbdir $echodir $msgdir + # attempt to fetch the echolist if echos are empty + if {$echos eq {}} { + if {$dolog eq 1} {puts "Fetching echolist..."} + set echolist [getfile "$url/list.txt"] + set echos [lmap e [split $echolist "\n"] {lindex [split $e ":"] 0}] + } else { + set echos [split $echos "/,;"] + } + if {$dolog eq 1} {puts "Echos to fetch: $echos"} + # pass the echo list and fetch the message IDs + set echos [lmap s $echos {string trim $s}] + set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]] + # iterate over the fetched data and fetch corresponding messages + set curecho "" + set datalines [split $echodata "\n"] + set echomap "" + # build the map of lists of message IDs + foreach line $datalines { + set line [string trim $line] + # detect if the line is related to echo name or message ID + if {[string first "." $line] eq -1} { # message ID + if {$curecho ne ""} { + dict lappend echomap $curecho $line + } + } else { # echo name + set curecho $line + dict set echomap $curecho "" + } + } + if {$dolog eq 1} {puts "Echomap built"} + # detect how many message IDs we can pass in a single query + # (assuming the maximum GET length is 256 chars) + # then we assume 21 character per message ID + set maxids [expr {int(256 / 21)}] + # pass the echo list and fetch the message IDs + # now, process the map we've built + dict for {echoname msgids} $echomap { + # get the existing message IDs in the echo + set echofile [file join $echodir $echoname] + set oldmsgids "" + if [file exists $echofile] { + set oldmsgids [lmap s [split [readfile $echofile] "\n"] {string trim $s}] + } + # pre-filter the new message IDs to fetch + set newmsgids [lmap m [concat $oldmsgids $msgids] { + if {$m in $oldmsgids && $m in $msgids} continue + if {$m eq {}} continue + set m + }] + # save the echo index file with all message IDs + writefileln $echofile [join $msgids "\n"] + if {$dolog eq 1} {puts "Fetching [llength $newmsgids] new messages from $echoname, $maxids messages per request..."} + set idgroups "" + set grcount 0 + set localcount 0 + foreach nmid $newmsgids { # iterate over new messages to group them + if {$nmid ne ""} { + dict lappend idgroups $grcount $nmid + incr localcount + if {$localcount > $maxids} { + incr grcount + set localcount 0 + } + } + } + dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages + # get the message data in the bundle format + set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]] + set bdata [split $msgbundle "\n"] + foreach bline $bdata { + set parts [split $bline ":"] + if {[llength $parts] > 1} { # valid message + set mid [lindex $parts 0] + set mdata [binary decode base64 [lindex $parts 1]] + writefileln [file join $msgdir $mid] [encoding convertfrom utf-8 $mdata] + } + } + } + } +} + +# end of procs, start the entrypoint +if {![info exists argv0] || [file tail [info script]] ne [file tail $argv0]} {return} + +set scriptpath [file normalize [info script]] +set appdir [file dirname $scriptpath] +# check if we're running from a starpack +if [string match *app-tiifetch $appdir] { + set appdir [file normalize [file join $appdir ".." ".." ".." ]] +} + +if {$argc > 0} { + set localdbdir [file join $appdir "tiidb"] + if {$argc > 2} { + set localdbdir [lindex $argv 2] + } + puts "Fetching messages, please wait..." + fetchiidb [lindex $argv 0] [lindex $argv 1] $localdbdir 1 + puts "Messages fetched" +} else {puts "No ii/idec station URL specified!"} +