tiifetch.tcl (13491B)
1 #!/usr/bin/env tclsh 2 # tiifetch: fetch all data from an ii/idec station into the local text db 3 # (see https://github.com/idec-net/new-docs/blob/master/protocol-en.md) 4 # Usage: tiifetch.tcl [station_url] [echos] [db_file] 5 # The echo list should be delimited with slash (/), comma (,) or semicolon (;) 6 # if no echos are specified (or "" is passed), then list.txt will be fetched 7 # and then all missing echo content from it will be downloaded 8 # If db_dir isn't specified, it's fetched and merged into the 9 # tiidb directory in the program root with echoconfs and messages respectively 10 # This component only fetches the messages, doesn't parse or display them 11 # Supported protocols: HTTP, HTTPS, Gemini, Spartan, Gopher/Finger/Nex 12 # Depends on Tcllib for URI parsing and SQLite3 for data storage 13 # Created by Luxferre in 2024, released into public domain 14 15 package require http 16 package require uri 17 package require sqlite3 18 package require sha256 19 20 # autodetect TclTLS support and enable HTTPS request support if detected 21 set tls_support 0 22 catch {package require tls; set tls_support 1} 23 if {$tls_support eq 1} { 24 ::http::register https 443 [list ::tls::socket -autoservername true] 25 } 26 27 proc url2dict {inputurl} { 28 set out [dict create] 29 if [regexp {^(.*)://} $inputurl _ lscheme] { 30 dict set out scheme $lscheme 31 } else { 32 dict set out handler "render_handler_invalid" 33 return $out 34 } 35 set rawout [::uri::split [regsub {^.*://} $inputurl "http://"]] 36 set rhost [dict get $rawout host] 37 set rpath [dict get $rawout path] 38 set rport [dict get $rawout port] 39 set secondarydata [dict get $rawout query] 40 dict set out host $rhost 41 set selector $rpath 42 dict set out handler "render_handler_$lscheme" 43 # protocol-specific request logic 44 switch "$lscheme" { 45 gophers - 46 gopher { 47 if {$rport eq ""} {set rport 70} 48 set selector [string cat [string range $rpath 1 end] "\r\n"] 49 dict set out handler render_handler_gopher 50 } 51 finger { 52 if {$rport eq ""} {set rport 79} 53 set selector "$selector\r\n" 54 } 55 spartan { 56 if {$rport eq ""} {set rport 300} 57 if {$rpath eq ""} {set rpath "/"} 58 if {![string match "/*" $rpath]} { 59 set rpath "/$rpath" 60 } 61 set blen [string length $secondarydata] 62 set selector "$rhost $rpath $blen\r\n$secondarydata" 63 } 64 nex { 65 if {$rport eq ""} {set rport 1900} 66 set selector "$selector\r\n" 67 } 68 gemini { 69 if {$rport eq ""} {set rport 1965} 70 set selector "$inputurl\r\n" 71 } 72 default {dict set out handler render_handler_none} 73 } 74 dict set out path $rpath 75 dict set out selector $selector 76 dict set out port $rport 77 return $out 78 } 79 80 proc reqresp {host port reqline is_tls encoding} { 81 global sock_response net_timeout 82 set sock 0 83 if {$is_tls eq 1} { 84 catch {set sock [::tls::socket -autoservername true -async $host $port]} 85 } elseif {$is_tls eq 2} { 86 catch {set sock [::tls::socket -autoservername true -async $host $port]} 87 if {$sock eq 0} {catch {set sock [socket -async $host $port]}} 88 } else { 89 catch {set sock [socket -async $host $port]} 90 } 91 if {$sock eq 0} {set sock_response ""; return} 92 global rcv_end_$sock 93 unset -nocomplain rcv_end_$sock 94 if {$encoding eq ""} {set encoding utf-8} 95 fconfigure $sock -translation binary -buffering none -encoding $encoding 96 fileevent $sock writable [list connected $sock $reqline] 97 proc connected {sock reqline} { 98 fileevent $sock writable {} 99 puts -nonewline $sock "$reqline" 100 flush $sock 101 fileevent $sock readable [list rdbl $sock] 102 } 103 set sock_response "" 104 proc rdbl {sock} { 105 global sock_response rcv_end_$sock 106 while {![eof $sock]} { 107 append sock_response [read $sock] 108 } 109 set rcv_end_$sock 0 110 } 111 after $net_timeout "global rcv_end_$sock; set rcv_end_$sock 1" 112 vwait rcv_end_$sock 113 catch {close $sock} 114 unset -nocomplain rcv_end_$sock 115 } 116 117 118 # file download helper 119 proc getfile {url} { 120 global net_timeout 121 set url [regsub -all {([^:])//} $url {\1/}] 122 set urlparts [url2dict $url] 123 set scheme [dict get $urlparts scheme] 124 set host [dict get $urlparts host] 125 set port [dict get $urlparts port] 126 set sel [dict get $urlparts selector] 127 global sock_response tls_support 128 switch $tls_support { 129 0 {set localtls 0} 130 1 {set localtls 2} 131 } 132 switch $scheme { 133 gophers - gopher - finger - nex { 134 if {$scheme eq "gophers"} {set localtls 1} 135 reqresp $host $port $sel $localtls utf-8 136 set body "$sock_response" 137 set sock_response "" 138 return $body 139 } 140 gemini - spartan { 141 if {$scheme eq "gemini"} {set localtls 1} 142 reqresp $host $port $sel $localtls utf-8 143 set body "$sock_response" 144 set sock_response "" 145 if {[regexp {^([^\n]*)\n} $body _ statusline]} { 146 set statusline [string trimright $statusline] 147 set statusparts [split $statusline " "] 148 set statuscode [lindex $statusparts 0] 149 set mainstatuscode [string index $statuscode 0] 150 if {$mainstatuscode eq 2} { 151 regsub {.*?\n} $body "" body 152 return $body 153 } else {return {}} 154 } else {return {}} 155 } 156 https - http { 157 set hs "" 158 while {$hs eq ""} { 159 set hs [::http::geturl $url -binary 1 -keepalive 0 -timeout $net_timeout] 160 } 161 if {[::http::ncode $hs] eq "200"} { 162 return [::http::data $hs] 163 } else {return {}} 164 } 165 default {return {}} 166 } 167 } 168 169 # file read helper 170 proc readfile {fname} { 171 set fp [open $fname r] 172 fconfigure $fp -encoding utf-8 173 set data [read $fp] 174 close $fp 175 return $data 176 } 177 178 # file write helper (leaves a newline at the end) 179 proc writefileln {fname data} { 180 set fp [open $fname w] 181 fconfigure $fp -encoding utf-8 182 puts $fp $data 183 close $fp 184 } 185 186 # list comparison helper (listcomp $new $old) 187 proc listcomp {new old} { 188 foreach i $old { 189 set new [lsearch -all -inline -not -exact $new $i] 190 } 191 return $new 192 } 193 194 # generate ID from the Node-to-Point msg contents 195 # (exactly how it was transferred inside base64) 196 proc n2p_id {binmsg} { 197 set hash [::sha2::sha256 -bin -- $binmsg] 198 set trimbased [string range [binary encode base64 $hash] 0 19] 199 return [string map {+ A - A / z _ z} $trimbased] 200 } 201 202 # ensure database file is created 203 proc createdb {fname} { 204 sqlite3 fdb $fname 205 fdb eval { 206 CREATE TABLE `msg` (`id` INTEGER PRIMARY KEY AUTOINCREMENT, 207 `msgid` VARCHAR(20) NOT NULL UNIQUE, 208 `timestamp` INT NOT NULL, 209 `echoname` VARCHAR(120) NOT NULL, 210 `repto` VARCHAR(120) NOT NULL, 211 `msgfrom` VARCHAR(120) NOT NULL, 212 `msgfromaddr` VARCHAR(120) NOT NULL, 213 `msgto` VARCHAR(120) NOT NULL, 214 `subj` VARCHAR(120) NOT NULL, 215 `body` TEXT NOT NULL, 216 `content_id` VARCHAR(20) NOT NULL); 217 } 218 fdb close 219 } 220 221 # main logic proc 222 proc fetchiidb {url echos dbfile dolog maxids} { 223 if {$maxids < 12} {set maxids 12} 224 # trim the parameters 225 set url [string trim $url] 226 set echos [string trim $echos] 227 set dbfile [file normalize [string trim $dbfile]] 228 if {![file exists $dbfile]} {createdb $dbfile} 229 sqlite3 msgdb $dbfile 230 # attempt to fetch the echolist if echos are empty 231 if {$echos eq {}} { 232 if {$dolog eq 1} {puts "Fetching echolist..."} 233 set echolist [getfile "$url/list.txt"] 234 set echos [lmap e [split $echolist "\n"] {lindex [split $e ":"] 0}] 235 } else { 236 set echos [split $echos "/,;"] 237 } 238 set echos [string trim [lmap s $echos {string trim $s " \t\r\n"}] " \t\r\n"] 239 if {$dolog eq 1} {puts "Echos to fetch: $echos"} 240 if {$dolog eq 1} {puts "Building message indexes..."} 241 set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]] 242 set datalines [split $echodata \n] 243 # iterate over the fetched data and fetch corresponding messages 244 set curecho "" 245 set echomap "" 246 # build the map of lists of message IDs 247 foreach line $datalines { 248 set line [string trim $line " \t\r\n"] 249 if {$line ne ""} { 250 # detect if the line is related to echo name or message ID 251 if {[string first "." $line] eq -1} { # message ID 252 if {[string length $line] == 20} { # filter out invalid IDs 253 if {$curecho ne ""} { 254 dict lappend echomap $curecho $line 255 } 256 } 257 } else { # echo name 258 set curecho $line 259 dict set echomap $curecho "" 260 } 261 } 262 } 263 if {$dolog eq 1} {puts "Echomap built"} 264 # pass the echo list and fetch the message IDs 265 # now, process the map we've built 266 dict for {echoname msgids} $echomap { 267 if {![string match *.* $echoname]} {continue} 268 if {[llength $msgids] eq 0} {continue} 269 # get the existing message IDs in the echo 270 set oldmsgids [msgdb eval {SELECT `msgid` FROM `msg` WHERE `echoname` = $echoname ORDER BY `id` ASC;}] 271 # pre-filter the new message IDs to fetch 272 set newmsgids [listcomp $msgids $oldmsgids] 273 set idgroups "" 274 set grcount 0 275 set localcount 0 276 set globalcount 0 277 foreach nmid $newmsgids { # iterate over new messages to group them 278 if {$nmid ne ""} { 279 set cid [string trim [msgdb eval {SELECT `msgid` FROM `msg` WHERE `msgid` = $nmid;}]] 280 if {$nmid ne $cid} { 281 incr globalcount 282 # insert new message ID to the echo mapping 283 dict lappend idgroups $grcount $nmid 284 incr localcount 285 if {$localcount > $maxids} { 286 incr grcount 287 set localcount 0 288 } 289 } 290 } 291 } 292 if {$globalcount > 0} { 293 if {$dolog eq 1} {puts "Fetching $globalcount new messages from $echoname..."} 294 dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages 295 # get the message data in the bundle format 296 set plen 0 297 set retries 4 298 while {$plen < $globalcount} { 299 set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]] 300 set bdata [lmap m [split $msgbundle "\n"] { 301 set m [string trim $m] 302 if {$m eq ""} {continue} 303 set m 304 }] 305 set plen [llength $bdata] 306 incr retries -1 307 if {$retries < 1} {break} 308 } 309 foreach bline $bdata { 310 set parts [split $bline ":"] 311 if {[llength $parts] > 1} { # valid message 312 set mid [string trim [lindex $parts 0]] 313 set bdata [binary decode base64 [lindex $parts 1]] 314 # calculate ii Node-to-Point ID to verify the message integrity 315 set content_id [n2p_id $bdata] 316 set mdata [encoding convertfrom utf-8 $bdata] 317 set msglines [split $mdata "\n"] 318 set replyto "" 319 set tags [split [lindex $msglines 0] "/"] 320 if {[dict exists $tags repto]} { 321 set replyto [dict get $tags repto] 322 } else {set replyto ""} 323 set echoarea [string trim [lindex $msglines 1]] 324 set timestamp [string trim [lindex $msglines 2]] 325 set msgfrom [string trim [lindex $msglines 3]] 326 set msgfromaddr [string trim [lindex $msglines 4]] 327 set msgto [string trim [lindex $msglines 5]] 328 set subj [string trim [lindex $msglines 6]] 329 set msgbody [string trimright [lrange $msglines 8 end]] 330 msgdb eval {INSERT OR IGNORE INTO `msg` (`msgid`, `timestamp`, `echoname`, `repto`, `msgfrom`, 331 `msgfromaddr`, `msgto`, `subj`, `body`, `content_id`) 332 VALUES ($mid, $timestamp, $echoarea, $replyto, $msgfrom, $msgfromaddr, $msgto, $subj, $msgbody, $content_id);} 333 } 334 } 335 } 336 } 337 } 338 msgdb close 339 } 340 341 proc massfetch {echos db dolog} { 342 global appdir 343 if {$dolog eq 1} {puts "No ii/idec station URL specified, using stations.txt"} 344 set stfile [file join $appdir "stations.txt"] 345 if {[file exists $stfile]} { 346 set stlist [readfile $stfile] 347 dict for {station stmaxids} $stlist { 348 set station [string trim $station] 349 if {$station ne "" && ![string match "#*" $station]} { 350 if {$dolog eq 1} {puts "Fetching from $station"} 351 fetchiidb $station $echos $db $dolog $stmaxids 352 } 353 } 354 } else { 355 if {$dolog eq 1} {puts "No stations.txt found, bailing out!"} 356 } 357 } 358 359 # end of procs, start the entrypoint 360 if {![info exists argv0] || [file tail [info script]] ne [file tail $argv0]} {return} 361 362 set scriptpath [file normalize [info script]] 363 set appdir [file dirname $scriptpath] 364 # check if we're running from a starpack 365 if [string match *app-tiifetch $appdir] { 366 set appdir [file normalize [file join $appdir ".." ".." ".." ]] 367 } 368 set localdb [file join $appdir "tii.db"] 369 370 # populate general HTTP configuration 371 set cfgfile [file join $appdir "config.txt"] 372 set net_timeout 5000 373 if {[file exists $cfgfile]} { 374 set cfg [readfile $cfgfile] 375 if {[dict exists $cfg useragent]} { 376 ::http::config -useragent [dict get $cfg useragent] 377 } 378 if {[dict exists $cfg proxyhost]} { 379 ::http::config -proxyhost [dict get $cfg proxyhost] 380 } 381 if {[dict exists $cfg proxyport]} { 382 ::http::config -proxyport [dict get $cfg proxyport] 383 } 384 if {[dict exists $cfg net_timeout]} { 385 set net_timeout [dict get $cfg net_timeout] 386 } 387 } 388 389 if {$argc > 0} { 390 if {$argc > 2} { 391 set localdb [lindex $argv 2] 392 } 393 puts "Fetching messages, please wait..." 394 set sturl [string trim [lindex $argv 0]] 395 if {$sturl eq ""} { 396 massfetch [lindex $argv 1] $localdb 1 397 } else { 398 fetchiidb $sturl [lindex $argv 1] $localdb 1 12 399 } 400 puts "Messages fetched" 401 } else { 402 puts "Fetching messages, please wait..." 403 massfetch "" $localdb 1 404 puts "Messages fetched" 405 } 406