tiifetch.tcl (13549B)
1 #!/usr/bin/env tclsh 2 # tiifetch: fetch all data from an ii/idec station into the local text db 3 # (see https://github.com/idec-net/new-docs/blob/master/protocol-en.md) 4 # Usage: tiifetch.tcl [station_url] [echos] [db_file] 5 # The echo list should be delimited with slash (/), comma (,) or semicolon (;) 6 # if no echos are specified (or "" is passed), then list.txt will be fetched 7 # and then all missing echo content from it will be downloaded 8 # If db_dir isn't specified, it's fetched and merged into the 9 # tiidb directory in the program root with echoconfs and messages respectively 10 # This component only fetches the messages, doesn't parse or display them 11 # Supported protocols: HTTP, HTTPS, Gemini, Spartan, Gopher/Finger/Nex 12 # Depends on Tcllib for URI parsing and SQLite3 for data storage 13 # Created by Luxferre in 2024, released into public domain 14 15 package require http 16 package require uri 17 package require sqlite3 18 package require sha256 19 20 # autodetect TclTLS support and enable HTTPS request support if detected 21 set tls_support 0 22 catch {package require tls; set tls_support 1} 23 if {$tls_support eq 1} { 24 ::http::register https 443 [list ::tls::socket -autoservername true] 25 } 26 27 proc url2dict {inputurl} { 28 set out [dict create] 29 if [regexp {^(.*)://} $inputurl _ lscheme] { 30 dict set out scheme $lscheme 31 } else { 32 dict set out handler "render_handler_invalid" 33 return $out 34 } 35 set rawout [::uri::split [regsub {^.*://} $inputurl "http://"]] 36 set rhost [dict get $rawout host] 37 set rpath [dict get $rawout path] 38 set rport [dict get $rawout port] 39 set secondarydata [dict get $rawout query] 40 dict set out host $rhost 41 set selector $rpath 42 dict set out handler "render_handler_$lscheme" 43 # protocol-specific request logic 44 switch "$lscheme" { 45 gophers - 46 gopher { 47 if {$rport eq ""} {set rport 70} 48 set selector [string cat "/" $rpath "\r\n"] 49 dict set out handler render_handler_gopher 50 } 51 finger { 52 if {$rport eq ""} {set rport 79} 53 set selector "/$selector\r\n" 54 } 55 spartan { 56 if {$rport eq ""} {set rport 300} 57 if {$rpath eq ""} {set rpath "/"} 58 if {![string match "/*" $rpath]} { 59 set rpath "/$rpath" 60 } 61 set blen [string length $secondarydata] 62 set selector "$rhost $rpath $blen\r\n$secondarydata" 63 } 64 nex { 65 if {$rport eq ""} {set rport 1900} 66 set selector "/$selector\r\n" 67 } 68 gemini { 69 if {$rport eq ""} {set rport 1965} 70 set selector "$inputurl\r\n" 71 } 72 default {dict set out handler render_handler_none} 73 } 74 dict set out path $rpath 75 dict set out selector $selector 76 dict set out port $rport 77 return $out 78 } 79 80 proc reqresp {host port reqline is_tls encoding} { 81 global sock_response net_timeout 82 set sock 0 83 if {$is_tls eq 1} { 84 catch {set sock [::tls::socket -autoservername true -async $host $port]} 85 } elseif {$is_tls eq 2} { 86 catch {set sock [::tls::socket -autoservername true -async $host $port]} 87 if {$sock eq 0} {catch {set sock [socket -async $host $port]}} 88 } else { 89 catch {set sock [socket -async $host $port]} 90 } 91 if {$sock eq 0} {set sock_response ""; return} 92 global rcv_end_$sock 93 unset -nocomplain rcv_end_$sock 94 if {$encoding eq ""} {set encoding utf-8} 95 fconfigure $sock -translation binary -buffering none -encoding $encoding 96 fileevent $sock writable [list connected $sock $reqline] 97 proc connected {sock reqline} { 98 fileevent $sock writable {} 99 puts -nonewline $sock "$reqline" 100 flush $sock 101 fileevent $sock readable [list rdbl $sock] 102 } 103 set sock_response "" 104 proc rdbl {sock} { 105 global sock_response rcv_end_$sock 106 while {![eof $sock]} { 107 append sock_response [read $sock] 108 } 109 set rcv_end_$sock 0 110 } 111 after $net_timeout "global rcv_end_$sock; set rcv_end_$sock 1" 112 vwait rcv_end_$sock 113 catch {close $sock} 114 unset -nocomplain rcv_end_$sock 115 } 116 117 118 # file download helper 119 proc getfile {url} { 120 global net_timeout 121 set url [regsub -all {([^:])//} $url {\1/}] 122 set urlparts [url2dict $url] 123 set scheme [dict get $urlparts scheme] 124 set host [dict get $urlparts host] 125 set port [dict get $urlparts port] 126 set sel [dict get $urlparts selector] 127 global sock_response tls_support 128 switch $tls_support { 129 0 {set localtls 0} 130 1 {set localtls 2} 131 } 132 switch $scheme { 133 gophers - gopher - finger - nex { 134 if {$scheme eq "gophers"} {set localtls 2} else {set localtls 0} 135 reqresp $host $port $sel $localtls utf-8 136 set body "$sock_response" 137 set sock_response "" 138 return $body 139 } 140 gemini - spartan { 141 if {$scheme eq "gemini"} {set localtls 1} else {set localtls 0} 142 reqresp $host $port $sel $localtls utf-8 143 set body "$sock_response" 144 set sock_response "" 145 if {[regexp {^([^\n]*)\n} $body _ statusline]} { 146 set statusline [string trimright $statusline] 147 set statusparts [split $statusline " "] 148 set statuscode [lindex $statusparts 0] 149 set mainstatuscode [string index $statuscode 0] 150 if {$mainstatuscode eq 2} { 151 regsub {.*?\n} $body "" body 152 return $body 153 } else {return {}} 154 } else {return {}} 155 } 156 https - http { 157 set hs "" 158 while {$hs eq ""} { 159 set hs [::http::geturl $url -binary 1 -keepalive 0 -timeout $net_timeout] 160 } 161 if {[::http::ncode $hs] eq "200"} { 162 return [::http::data $hs] 163 } else {return {}} 164 } 165 default {return {}} 166 } 167 } 168 169 # file read helper 170 proc readfile {fname} { 171 set fp [open $fname r] 172 fconfigure $fp -encoding utf-8 173 set data [read $fp] 174 close $fp 175 return $data 176 } 177 178 # file write helper (leaves a newline at the end) 179 proc writefileln {fname data} { 180 set fp [open $fname w] 181 fconfigure $fp -encoding utf-8 182 puts $fp $data 183 close $fp 184 } 185 186 # list comparison helper (listcomp $new $old) 187 proc listcomp {new old} { 188 foreach i $old { 189 set new [lsearch -all -inline -not -exact $new $i] 190 } 191 return $new 192 } 193 194 # generate ID from the Node-to-Point msg contents 195 # (exactly how it was transferred inside base64) 196 proc n2p_id {binmsg} { 197 set hash [::sha2::sha256 -bin -- $binmsg] 198 set trimbased [string range [binary encode base64 $hash] 0 19] 199 return [string map {+ A - A / z _ z} $trimbased] 200 } 201 202 # ensure database file is created 203 proc createdb {fname} { 204 sqlite3 fdb $fname 205 fdb eval { 206 PRAGMA journal_mode=WAL; 207 CREATE TABLE `msg` (`id` INTEGER PRIMARY KEY AUTOINCREMENT, 208 `msgid` VARCHAR(20) NOT NULL UNIQUE, 209 `timestamp` INT NOT NULL, 210 `echoname` VARCHAR(120) NOT NULL, 211 `repto` VARCHAR(120) NOT NULL, 212 `msgfrom` VARCHAR(120) NOT NULL, 213 `msgfromaddr` VARCHAR(120) NOT NULL, 214 `msgto` VARCHAR(120) NOT NULL, 215 `subj` VARCHAR(120) NOT NULL, 216 `body` TEXT NOT NULL, 217 `content_id` VARCHAR(20) NOT NULL); 218 } 219 fdb close 220 } 221 222 # main logic proc 223 proc fetchiidb {url echos dbfile dolog maxids} { 224 if {$maxids < 12} {set maxids 12} 225 # trim the parameters 226 set url [string trim $url] 227 set echos [string trim $echos] 228 set dbfile [file normalize [string trim $dbfile]] 229 if {![file exists $dbfile]} {createdb $dbfile} 230 sqlite3 msgdb $dbfile 231 # attempt to fetch the echolist if echos are empty 232 if {$echos eq {}} { 233 if {$dolog eq 1} {puts "Fetching echolist..."} 234 set echolist [getfile "$url/list.txt"] 235 set echos [lmap e [split $echolist "\n"] {lindex [split $e ":"] 0}] 236 } else { 237 set echos [split $echos "/,;"] 238 } 239 set echos [string trim [lmap s $echos {string trim $s " \t\r\n"}] " \t\r\n"] 240 if {$dolog eq 1} {puts "Echos to fetch: $echos"} 241 if {$dolog eq 1} {puts "Building message indexes..."} 242 set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]] 243 set datalines [split $echodata \n] 244 # iterate over the fetched data and fetch corresponding messages 245 set curecho "" 246 set echomap "" 247 # build the map of lists of message IDs 248 foreach line $datalines { 249 set line [string trim $line " \t\r\n"] 250 if {$line ne ""} { 251 # detect if the line is related to echo name or message ID 252 if {[string first "." $line] eq -1} { # message ID 253 if {[string length $line] == 20} { # filter out invalid IDs 254 if {$curecho ne ""} { 255 dict lappend echomap $curecho $line 256 } 257 } 258 } else { # echo name 259 set curecho $line 260 dict set echomap $curecho "" 261 } 262 } 263 } 264 if {$dolog eq 1} {puts "Echomap built"} 265 # pass the echo list and fetch the message IDs 266 # now, process the map we've built 267 dict for {echoname msgids} $echomap { 268 if {![string match *.* $echoname]} {continue} 269 if {[llength $msgids] eq 0} {continue} 270 # get the existing message IDs in the echo 271 set oldmsgids [msgdb eval {SELECT `msgid` FROM `msg` WHERE `echoname` = $echoname ORDER BY `id` ASC;}] 272 # pre-filter the new message IDs to fetch 273 set newmsgids [listcomp $msgids $oldmsgids] 274 set idgroups "" 275 set grcount 0 276 set localcount 0 277 set globalcount 0 278 foreach nmid $newmsgids { # iterate over new messages to group them 279 if {$nmid ne ""} { 280 set cid [string trim [msgdb eval {SELECT `msgid` FROM `msg` WHERE `msgid` = $nmid;}]] 281 if {$nmid ne $cid} { 282 incr globalcount 283 # insert new message ID to the echo mapping 284 dict lappend idgroups $grcount $nmid 285 incr localcount 286 if {$localcount > $maxids} { 287 incr grcount 288 set localcount 0 289 } 290 } 291 } 292 } 293 if {$globalcount > 0} { 294 if {$dolog eq 1} {puts "Fetching $globalcount new messages from $echoname..."} 295 dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages 296 # get the message data in the bundle format 297 set plen 0 298 set retries 4 299 while {$plen < $globalcount} { 300 set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]] 301 set bdata [lmap m [split $msgbundle "\n"] { 302 set m [string trim $m] 303 if {$m eq ""} {continue} 304 set m 305 }] 306 set plen [llength $bdata] 307 incr retries -1 308 if {$retries < 1} {break} 309 } 310 foreach bline $bdata { 311 set parts [split $bline ":"] 312 if {[llength $parts] > 1} { # valid message 313 set mid [string trim [lindex $parts 0]] 314 set bdata [binary decode base64 [lindex $parts 1]] 315 # calculate ii Node-to-Point ID to verify the message integrity 316 set content_id [n2p_id $bdata] 317 set mdata [encoding convertfrom utf-8 $bdata] 318 set msglines [split $mdata "\n"] 319 set replyto "" 320 set tags [split [lindex $msglines 0] "/"] 321 if {[dict exists $tags repto]} { 322 set replyto [dict get $tags repto] 323 } else {set replyto ""} 324 set echoarea [string trim [lindex $msglines 1]] 325 set timestamp [string trim [lindex $msglines 2]] 326 set msgfrom [string trim [lindex $msglines 3]] 327 set msgfromaddr [string trim [lindex $msglines 4]] 328 set msgto [string trim [lindex $msglines 5]] 329 set subj [string trim [lindex $msglines 6]] 330 set msgbody [string trimright [lrange $msglines 8 end]] 331 msgdb eval {INSERT OR IGNORE INTO `msg` (`msgid`, `timestamp`, `echoname`, `repto`, `msgfrom`, 332 `msgfromaddr`, `msgto`, `subj`, `body`, `content_id`) 333 VALUES ($mid, $timestamp, $echoarea, $replyto, $msgfrom, $msgfromaddr, $msgto, $subj, $msgbody, $content_id);} 334 } 335 } 336 } 337 } 338 } 339 msgdb close 340 } 341 342 proc massfetch {echos db dolog} { 343 global appdir 344 if {$dolog eq 1} {puts "No ii/idec station URL specified, using stations.txt"} 345 set stfile [file join $appdir "stations.txt"] 346 if {[file exists $stfile]} { 347 set stlist [readfile $stfile] 348 dict for {station stmaxids} $stlist { 349 set station [string trim $station] 350 if {$station ne "" && ![string match "#*" $station]} { 351 if {$dolog eq 1} {puts "Fetching from $station"} 352 fetchiidb $station $echos $db $dolog $stmaxids 353 } 354 } 355 } else { 356 if {$dolog eq 1} {puts "No stations.txt found, bailing out!"} 357 } 358 } 359 360 # end of procs, start the entrypoint 361 if {![info exists argv0] || [file tail [info script]] ne [file tail $argv0]} {return} 362 363 set scriptpath [file normalize [info script]] 364 set appdir [file dirname $scriptpath] 365 # check if we're running from a starpack 366 if [string match *app-tiifetch $appdir] { 367 set appdir [file normalize [file join $appdir ".." ".." ".." ]] 368 } 369 set localdb [file join $appdir "tii.db"] 370 371 # populate general HTTP configuration 372 set cfgfile [file join $appdir "config.txt"] 373 set net_timeout 5000 374 if {[file exists $cfgfile]} { 375 set cfg [readfile $cfgfile] 376 if {[dict exists $cfg useragent]} { 377 ::http::config -useragent [dict get $cfg useragent] 378 } 379 if {[dict exists $cfg proxyhost]} { 380 ::http::config -proxyhost [dict get $cfg proxyhost] 381 } 382 if {[dict exists $cfg proxyport]} { 383 ::http::config -proxyport [dict get $cfg proxyport] 384 } 385 if {[dict exists $cfg net_timeout]} { 386 set net_timeout [dict get $cfg net_timeout] 387 } 388 } 389 390 if {$argc > 0} { 391 if {$argc > 2} { 392 set localdb [lindex $argv 2] 393 } 394 puts "Fetching messages, please wait..." 395 set sturl [string trim [lindex $argv 0]] 396 if {$sturl eq ""} { 397 massfetch [lindex $argv 1] $localdb 1 398 } else { 399 fetchiidb $sturl [lindex $argv 1] $localdb 1 12 400 } 401 puts "Messages fetched" 402 } else { 403 puts "Fetching messages, please wait..." 404 massfetch "" $localdb 1 405 puts "Messages fetched" 406 } 407