commit c34fba498a74bebccfcf65c6d13cf26509b8d09e
Author: Luxferre <lux@ferre>
Date: Mon, 21 Oct 2024 17:09:52 +0300
[WIP] initial upload
Diffstat:
A | tiifetch.tcl | | | 290 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 290 insertions(+), 0 deletions(-)
diff --git a/tiifetch.tcl b/tiifetch.tcl
@@ -0,0 +1,290 @@
+#!/usr/bin/env tclsh
+# tiifetch: fetch all data from an ii/idec station into the local text db
+# (see https://github.com/idec-net/new-docs/blob/master/protocol-en.md)
+# Usage: tiifetch station_http_url [echos] [db_dir]
+# The echo list should be delimited with slash (/), comma (,) or semicolon (;)
+# if no echos are specified (or "" is passed), then list.txt will be fetched
+# and then all missing echo content from it will be downloaded
+# If db_dir isn't specified, it's fetched and merged into the
+# tiidb directory in the program root
+# with echoconfs and messages respectively
+# This component only fetches the messages, doesn't parse or display them
+# Supported protocols: HTTP, HTTPS, Gemini, Spartan, Gopher/Finger/Nex
+# Depends on Tcllib for URI parsing
+# Created by Luxferre in 2024, released into public domain
+
+package require http
+package require uri
+
+# autodetect TclTLS support and enable HTTPS request support if detected
+set tls_support 0
+catch {package require tls; set tls_support 1}
+if {$tls_support eq 1} {
+ ::http::register https 443 [list ::tls::socket -autoservername true]
+}
+
+proc url2dict {inputurl} {
+ set out [dict create]
+ if [regexp {^(.*)://} $inputurl _ lscheme] {
+ dict set out scheme $lscheme
+ } else {
+ dict set out handler "render_handler_invalid"
+ return $out
+ }
+ set rawout [::uri::split [regsub {^.*://} $inputurl "http://"]]
+ set rhost [dict get $rawout host]
+ set rpath [dict get $rawout path]
+ set rport [dict get $rawout port]
+ set secondarydata [dict get $rawout query]
+ dict set out host $rhost
+ set selector $rpath
+ dict set out handler "render_handler_$lscheme"
+ # protocol-specific request logic
+ switch "$lscheme" {
+ gophers -
+ gopher {
+ if {$rport eq ""} {set rport 70}
+ set selector [string cat [string range $rpath 1 end] "\r\n"]
+ dict set out handler render_handler_gopher
+ }
+ finger {
+ if {$rport eq ""} {set rport 79}
+ set selector "$selector\r\n"
+ }
+ spartan {
+ if {$rport eq ""} {set rport 300}
+ if {$rpath eq ""} {set rpath "/"}
+ if {![string match "/*" $rpath]} {
+ set rpath "/$rpath"
+ }
+ set blen [string length $secondarydata]
+ set selector "$rhost $rpath $blen\r\n$secondarydata"
+ }
+ nex {
+ if {$rport eq ""} {set rport 1900}
+ set selector "$selector\r\n"
+ }
+ gemini {
+ if {$rport eq ""} {set rport 1965}
+ set selector "$inputurl\r\n"
+ }
+ default {dict set out handler render_handler_none}
+ }
+ dict set out path $rpath
+ dict set out selector $selector
+ dict set out port $rport
+ return $out
+}
+
+proc reqresp {host port reqline is_tls encoding} {
+ global sock_response
+ set sock_net_timeout 5000
+ set sock 0
+ if {$is_tls eq 1} {
+ catch {set sock [::tls::socket -autoservername true -async $host $port]}
+ } elseif {$is_tls eq 2} {
+ catch {set sock [::tls::socket -autoservername true -async $host $port]}
+ if {$sock eq 0} {catch {set sock [socket -async $host $port]}}
+ } else {
+ catch {set sock [socket -async $host $port]}
+ }
+ if {$sock eq 0} {set sock_response ""; return}
+ global rcv_end_$sock
+ unset -nocomplain rcv_end_$sock
+ if {$encoding eq ""} {set encoding utf-8}
+ fconfigure $sock -translation binary -buffering none -encoding $encoding
+ fileevent $sock writable [list connected $sock $reqline]
+ proc connected {sock reqline} {
+ fileevent $sock writable {}
+ puts -nonewline $sock "$reqline"
+ flush $sock
+ fileevent $sock readable [list rdbl $sock]
+ }
+ set sock_response ""
+ proc rdbl {sock} {
+ global sock_response rcv_end_$sock
+ while {![eof $sock]} {
+ append sock_response [read $sock]
+ }
+ set rcv_end_$sock 0
+ }
+ after $sock_net_timeout "global rcv_end_$sock; set rcv_end_$sock 1"
+ vwait rcv_end_$sock
+ catch {close $sock}
+ unset -nocomplain rcv_end_$sock
+}
+
+
+# file download helper
+proc getfile {url} {
+ set url [regsub -all {([^:])//} $url {\1/}]
+ set urlparts [url2dict $url]
+ set scheme [dict get $urlparts scheme]
+ set host [dict get $urlparts host]
+ set port [dict get $urlparts port]
+ set sel [dict get $urlparts selector]
+ global sock_response tls_support
+ switch $tls_support {
+ 0 {set localtls 0}
+ 1 {set localtls 2}
+ }
+ switch $scheme {
+ gophers - gopher - finger - nex {
+ if {$scheme eq "gophers"} {set localtls 1}
+ reqresp $host $port $sel $localtls utf-8
+ set body "$sock_response"
+ set sock_response ""
+ return $body
+ }
+ gemini - spartan {
+ if {$scheme eq "gemini"} {set localtls 1}
+ reqresp $host $port $sel $localtls utf-8
+ set body "$sock_response"
+ set sock_response ""
+ if {[regexp {^([^\n]*)\n} $body _ statusline]} {
+ set statusline [string trimright $statusline]
+ set statusparts [split $statusline " "]
+ set statuscode [lindex $statusparts 0]
+ set mainstatuscode [string index $statuscode 0]
+ if {$mainstatuscode eq 2} {
+ regsub {.*?\n} $body "" body
+ return $body
+ } else {return {}}
+ } else {return {}}
+ }
+ https - http {
+ set hs [::http::geturl $url -binary 1 -keepalive 1 -timeout 5000]
+ return [::http::data $hs]
+ }
+ default {return {}}
+ }
+}
+
+# file read helper
+proc readfile {fname} {
+ set fp [open $fname r]
+ fconfigure $fp -encoding utf-8
+ set data [read $fp]
+ close $fp
+ return $data
+}
+
+# file write helper (leaves a newline at the end)
+proc writefileln {fname data} {
+ set fp [open $fname w]
+ fconfigure $fp -encoding utf-8
+ puts $fp $data
+ close $fp
+}
+
+# main logic proc
+proc fetchiidb {url echos dbdir dolog} {
+ # trim the parameters
+ set url [string trim $url]
+ set echos [string trim $echos]
+ set dbdir [file normalize [string trim $dbdir]]
+ set echodir [file join $dbdir "echo"]
+ set msgdir [file join $dbdir "msg"]
+ # ensure that the necessary dirs exist
+ file mkdir $dbdir $echodir $msgdir
+ # attempt to fetch the echolist if echos are empty
+ if {$echos eq {}} {
+ if {$dolog eq 1} {puts "Fetching echolist..."}
+ set echolist [getfile "$url/list.txt"]
+ set echos [lmap e [split $echolist "\n"] {lindex [split $e ":"] 0}]
+ } else {
+ set echos [split $echos "/,;"]
+ }
+ if {$dolog eq 1} {puts "Echos to fetch: $echos"}
+ # pass the echo list and fetch the message IDs
+ set echos [lmap s $echos {string trim $s}]
+ set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]]
+ # iterate over the fetched data and fetch corresponding messages
+ set curecho ""
+ set datalines [split $echodata "\n"]
+ set echomap ""
+ # build the map of lists of message IDs
+ foreach line $datalines {
+ set line [string trim $line]
+ # detect if the line is related to echo name or message ID
+ if {[string first "." $line] eq -1} { # message ID
+ if {$curecho ne ""} {
+ dict lappend echomap $curecho $line
+ }
+ } else { # echo name
+ set curecho $line
+ dict set echomap $curecho ""
+ }
+ }
+ if {$dolog eq 1} {puts "Echomap built"}
+ # detect how many message IDs we can pass in a single query
+ # (assuming the maximum GET length is 256 chars)
+ # then we assume 21 character per message ID
+ set maxids [expr {int(256 / 21)}]
+ # pass the echo list and fetch the message IDs
+ # now, process the map we've built
+ dict for {echoname msgids} $echomap {
+ # get the existing message IDs in the echo
+ set echofile [file join $echodir $echoname]
+ set oldmsgids ""
+ if [file exists $echofile] {
+ set oldmsgids [lmap s [split [readfile $echofile] "\n"] {string trim $s}]
+ }
+ # pre-filter the new message IDs to fetch
+ set newmsgids [lmap m [concat $oldmsgids $msgids] {
+ if {$m in $oldmsgids && $m in $msgids} continue
+ if {$m eq {}} continue
+ set m
+ }]
+ # save the echo index file with all message IDs
+ writefileln $echofile [join $msgids "\n"]
+ if {$dolog eq 1} {puts "Fetching [llength $newmsgids] new messages from $echoname, $maxids messages per request..."}
+ set idgroups ""
+ set grcount 0
+ set localcount 0
+ foreach nmid $newmsgids { # iterate over new messages to group them
+ if {$nmid ne ""} {
+ dict lappend idgroups $grcount $nmid
+ incr localcount
+ if {$localcount > $maxids} {
+ incr grcount
+ set localcount 0
+ }
+ }
+ }
+ dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages
+ # get the message data in the bundle format
+ set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]]
+ set bdata [split $msgbundle "\n"]
+ foreach bline $bdata {
+ set parts [split $bline ":"]
+ if {[llength $parts] > 1} { # valid message
+ set mid [lindex $parts 0]
+ set mdata [binary decode base64 [lindex $parts 1]]
+ writefileln [file join $msgdir $mid] [encoding convertfrom utf-8 $mdata]
+ }
+ }
+ }
+ }
+}
+
+# end of procs, start the entrypoint
+if {![info exists argv0] || [file tail [info script]] ne [file tail $argv0]} {return}
+
+set scriptpath [file normalize [info script]]
+set appdir [file dirname $scriptpath]
+# check if we're running from a starpack
+if [string match *app-tiifetch $appdir] {
+ set appdir [file normalize [file join $appdir ".." ".." ".." ]]
+}
+
+if {$argc > 0} {
+ set localdbdir [file join $appdir "tiidb"]
+ if {$argc > 2} {
+ set localdbdir [lindex $argv 2]
+ }
+ puts "Fetching messages, please wait..."
+ fetchiidb [lindex $argv 0] [lindex $argv 1] $localdbdir 1
+ puts "Messages fetched"
+} else {puts "No ii/idec station URL specified!"}
+