tii

Tcl-based suite for working with ii/idec protocol
git clone git://git.luxferre.top/tii.git
Log | Files | Refs | README

tiifetch.tcl (13549B)


      1 #!/usr/bin/env tclsh
      2 # tiifetch: fetch all data from an ii/idec station into the local text db
      3 # (see https://github.com/idec-net/new-docs/blob/master/protocol-en.md)
      4 # Usage: tiifetch.tcl [station_url] [echos] [db_file]
      5 # The echo list should be delimited with slash (/), comma (,) or semicolon (;)
      6 # if no echos are specified (or "" is passed), then list.txt will be fetched
      7 # and then all missing echo content from it will be downloaded
      8 # If db_dir isn't specified, it's fetched and merged into the
      9 # tiidb directory in the program root with echoconfs and messages respectively
     10 # This component only fetches the messages, doesn't parse or display them
     11 # Supported protocols: HTTP, HTTPS, Gemini, Spartan, Gopher/Finger/Nex
     12 # Depends on Tcllib for URI parsing and SQLite3 for data storage
     13 # Created by Luxferre in 2024, released into public domain
     14 
     15 package require http
     16 package require uri
     17 package require sqlite3
     18 package require sha256
     19 
     20 # autodetect TclTLS support and enable HTTPS request support if detected
     21 set tls_support 0
     22 catch {package require tls; set tls_support 1}
     23 if {$tls_support eq 1} {
     24   ::http::register https 443 [list ::tls::socket -autoservername true]
     25 }
     26 
     27 proc url2dict {inputurl} {
     28   set out [dict create]
     29   if [regexp {^(.*)://} $inputurl _ lscheme] {
     30     dict set out scheme $lscheme
     31   } else {
     32     dict set out handler "render_handler_invalid"
     33     return $out
     34   }
     35   set rawout [::uri::split [regsub {^.*://} $inputurl "http://"]]
     36   set rhost [dict get $rawout host]
     37   set rpath [dict get $rawout path]
     38   set rport [dict get $rawout port]
     39   set secondarydata [dict get $rawout query]
     40   dict set out host $rhost
     41   set selector $rpath
     42   dict set out handler "render_handler_$lscheme"
     43   # protocol-specific request logic
     44   switch "$lscheme" {
     45     gophers -
     46     gopher {
     47       if {$rport eq ""} {set rport 70}
     48       set selector [string cat "/" $rpath "\r\n"]
     49       dict set out handler render_handler_gopher
     50     }
     51     finger {
     52       if {$rport eq ""} {set rport 79}
     53       set selector "/$selector\r\n"
     54     }
     55     spartan {
     56       if {$rport eq ""} {set rport 300}
     57       if {$rpath eq ""} {set rpath "/"}
     58       if {![string match "/*" $rpath]} {
     59         set rpath "/$rpath"
     60       }
     61       set blen [string length $secondarydata]
     62       set selector "$rhost $rpath $blen\r\n$secondarydata"
     63     }
     64     nex {
     65       if {$rport eq ""} {set rport 1900}
     66       set selector "/$selector\r\n"
     67     }
     68     gemini {
     69       if {$rport eq ""} {set rport 1965}
     70       set selector "$inputurl\r\n"
     71     }
     72     default {dict set out handler render_handler_none}
     73   }
     74   dict set out path $rpath
     75   dict set out selector $selector
     76   dict set out port $rport
     77   return $out
     78 }
     79 
     80 proc reqresp {host port reqline is_tls encoding} {
     81   global sock_response net_timeout
     82   set sock 0
     83   if {$is_tls eq 1} {
     84     catch {set sock [::tls::socket -autoservername true -async $host $port]}
     85   } elseif {$is_tls eq 2} {
     86     catch {set sock [::tls::socket -autoservername true -async $host $port]}
     87     if {$sock eq 0} {catch {set sock [socket -async $host $port]}}
     88   } else {
     89     catch {set sock [socket -async $host $port]}
     90   }
     91   if {$sock eq 0} {set sock_response ""; return}
     92   global rcv_end_$sock
     93   unset -nocomplain rcv_end_$sock
     94   if {$encoding eq ""} {set encoding utf-8}
     95   fconfigure $sock -translation binary -buffering none -encoding $encoding
     96   fileevent $sock writable [list connected $sock $reqline]
     97   proc connected {sock reqline} {
     98     fileevent $sock writable {}
     99     puts -nonewline $sock "$reqline"
    100     flush $sock
    101     fileevent $sock readable [list rdbl $sock]
    102   }
    103   set sock_response ""
    104   proc rdbl {sock} {
    105     global sock_response rcv_end_$sock
    106     while {![eof $sock]} {
    107       append sock_response [read $sock]
    108     }
    109     set rcv_end_$sock 0
    110   }
    111   after $net_timeout "global rcv_end_$sock; set rcv_end_$sock 1"
    112   vwait rcv_end_$sock
    113   catch {close $sock}
    114   unset -nocomplain rcv_end_$sock
    115 }
    116 
    117 
    118 # file download helper
    119 proc getfile {url} {
    120   global net_timeout
    121   set url [regsub -all {([^:])//} $url {\1/}]
    122   set urlparts [url2dict $url]
    123   set scheme [dict get $urlparts scheme]
    124   set host [dict get $urlparts host]
    125   set port [dict get $urlparts port]
    126   set sel [dict get $urlparts selector]
    127   global sock_response tls_support
    128   switch $tls_support {
    129     0 {set localtls 0}
    130     1 {set localtls 2}
    131   }
    132   switch $scheme {
    133     gophers - gopher - finger - nex {
    134       if {$scheme eq "gophers"} {set localtls 2} else {set localtls 0}
    135       reqresp $host $port $sel $localtls utf-8
    136       set body "$sock_response"
    137       set sock_response ""
    138       return $body
    139     }
    140     gemini - spartan {
    141       if {$scheme eq "gemini"} {set localtls 1} else {set localtls 0}
    142       reqresp $host $port $sel $localtls utf-8
    143       set body "$sock_response"
    144       set sock_response ""
    145       if {[regexp {^([^\n]*)\n} $body _ statusline]} {
    146         set statusline [string trimright $statusline]
    147         set statusparts [split $statusline " "]
    148         set statuscode [lindex $statusparts 0]
    149         set mainstatuscode [string index $statuscode 0]
    150         if {$mainstatuscode eq 2} {
    151           regsub {.*?\n} $body "" body
    152           return $body
    153         } else {return {}}
    154       } else {return {}}
    155     }
    156     https - http {
    157       set hs ""
    158       while {$hs eq ""} {
    159         set hs [::http::geturl $url -binary 1 -keepalive 0 -timeout $net_timeout]
    160       }
    161       if {[::http::ncode $hs] eq "200"} {
    162         return [::http::data $hs]
    163       } else {return {}}
    164     }
    165     default {return {}}
    166   }
    167 }
    168 
    169 # file read helper
    170 proc readfile {fname} {
    171   set fp [open $fname r]
    172   fconfigure $fp -encoding utf-8
    173   set data [read $fp]
    174   close $fp
    175   return $data
    176 }
    177 
    178 # file write helper (leaves a newline at the end)
    179 proc writefileln {fname data} {
    180   set fp [open $fname w]
    181   fconfigure $fp -encoding utf-8
    182   puts $fp $data
    183   close $fp
    184 }
    185 
    186 # list comparison helper (listcomp $new $old)
    187 proc listcomp {new old} {
    188   foreach i $old {
    189     set new [lsearch -all -inline -not -exact $new $i]
    190   }
    191   return $new
    192 }
    193 
    194 # generate ID from the Node-to-Point msg contents
    195 # (exactly how it was transferred inside base64)
    196 proc n2p_id {binmsg} {
    197   set hash [::sha2::sha256 -bin -- $binmsg]
    198   set trimbased [string range [binary encode base64 $hash] 0 19]
    199   return [string map {+ A - A / z _ z} $trimbased]
    200 }
    201 
    202 # ensure database file is created
    203 proc createdb {fname} {
    204   sqlite3 fdb $fname
    205   fdb eval {
    206     PRAGMA journal_mode=WAL;
    207     CREATE TABLE `msg` (`id` INTEGER PRIMARY KEY AUTOINCREMENT,
    208       `msgid` VARCHAR(20) NOT NULL UNIQUE,
    209       `timestamp` INT NOT NULL,
    210       `echoname` VARCHAR(120) NOT NULL,
    211       `repto` VARCHAR(120) NOT NULL,
    212       `msgfrom` VARCHAR(120) NOT NULL,
    213       `msgfromaddr` VARCHAR(120) NOT NULL,
    214       `msgto` VARCHAR(120) NOT NULL,
    215       `subj` VARCHAR(120) NOT NULL,
    216       `body` TEXT NOT NULL,
    217       `content_id` VARCHAR(20) NOT NULL);
    218   }
    219   fdb close
    220 }
    221 
    222 # main logic proc
    223 proc fetchiidb {url echos dbfile dolog maxids} {
    224   if {$maxids < 12} {set maxids 12}
    225   # trim the parameters 
    226   set url [string trim $url]
    227   set echos [string trim $echos]
    228   set dbfile [file normalize [string trim $dbfile]]
    229   if {![file exists $dbfile]} {createdb $dbfile}
    230   sqlite3 msgdb $dbfile
    231   # attempt to fetch the echolist if echos are empty
    232   if {$echos eq {}} {
    233     if {$dolog eq 1} {puts "Fetching echolist..."}
    234     set echolist [getfile "$url/list.txt"]
    235     set echos [lmap e [split $echolist "\n"] {lindex [split $e ":"] 0}]
    236   } else {
    237     set echos [split $echos "/,;"]
    238   }
    239   set echos [string trim [lmap s $echos {string trim $s " \t\r\n"}] " \t\r\n"]
    240   if {$dolog eq 1} {puts "Echos to fetch: $echos"}
    241   if {$dolog eq 1} {puts "Building message indexes..."}
    242   set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]]
    243   set datalines [split $echodata \n]
    244   # iterate over the fetched data and fetch corresponding messages
    245   set curecho ""
    246   set echomap ""
    247   # build the map of lists of message IDs
    248   foreach line $datalines {
    249     set line [string trim $line " \t\r\n"]
    250     if {$line ne ""} {
    251       # detect if the line is related to echo name or message ID 
    252       if {[string first "." $line] eq -1} { # message ID
    253         if {[string length $line] == 20} { # filter out invalid IDs
    254           if {$curecho ne ""} {
    255             dict lappend echomap $curecho $line
    256           }
    257         }
    258       } else { # echo name
    259         set curecho $line
    260         dict set echomap $curecho ""
    261       }
    262     }
    263   }
    264   if {$dolog eq 1} {puts "Echomap built"}
    265   # pass the echo list and fetch the message IDs
    266   # now, process the map we've built
    267   dict for {echoname msgids} $echomap {
    268     if {![string match *.* $echoname]} {continue} 
    269     if {[llength $msgids] eq 0} {continue}
    270     # get the existing message IDs in the echo
    271     set oldmsgids [msgdb eval {SELECT `msgid` FROM `msg` WHERE `echoname` = $echoname ORDER BY `id` ASC;}]
    272     # pre-filter the new message IDs to fetch
    273     set newmsgids [listcomp $msgids $oldmsgids] 
    274     set idgroups ""
    275     set grcount 0
    276     set localcount 0
    277     set globalcount 0
    278     foreach nmid $newmsgids { # iterate over new messages to group them
    279       if {$nmid ne ""} {
    280         set cid [string trim [msgdb eval {SELECT `msgid` FROM `msg` WHERE `msgid` = $nmid;}]]
    281         if {$nmid ne $cid} {
    282           incr globalcount
    283           # insert new message ID to the echo mapping
    284           dict lappend idgroups $grcount $nmid 
    285           incr localcount
    286           if {$localcount > $maxids} {
    287             incr grcount
    288             set localcount 0
    289           } 
    290         }
    291       }
    292     }
    293     if {$globalcount > 0} {
    294       if {$dolog eq 1} {puts "Fetching $globalcount new messages from $echoname..."}
    295       dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages
    296         # get the message data in the bundle format
    297         set plen 0
    298         set retries 4
    299         while {$plen < $globalcount} {
    300           set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]]
    301           set bdata [lmap m [split $msgbundle "\n"] {
    302             set m [string trim $m]
    303             if {$m eq ""} {continue}
    304             set m
    305           }]
    306           set plen [llength $bdata]
    307           incr retries -1
    308           if {$retries < 1} {break}
    309         }
    310         foreach bline $bdata {
    311           set parts [split $bline ":"]
    312           if {[llength $parts] > 1} { # valid message
    313             set mid [string trim [lindex $parts 0]]
    314             set bdata [binary decode base64 [lindex $parts 1]]
    315             # calculate ii Node-to-Point ID to verify the message integrity
    316             set content_id [n2p_id $bdata]
    317             set mdata [encoding convertfrom utf-8 $bdata]
    318             set msglines [split $mdata "\n"]
    319             set replyto ""
    320             set tags [split [lindex $msglines 0] "/"]
    321             if {[dict exists $tags repto]} {
    322               set replyto [dict get $tags repto]
    323             } else {set replyto ""}
    324             set echoarea [string trim [lindex $msglines 1]]
    325             set timestamp [string trim [lindex $msglines 2]]
    326             set msgfrom [string trim [lindex $msglines 3]]
    327             set msgfromaddr [string trim [lindex $msglines 4]]
    328             set msgto [string trim [lindex $msglines 5]]
    329             set subj [string trim [lindex $msglines 6]]
    330             set msgbody [string trimright [lrange $msglines 8 end]]
    331             msgdb eval {INSERT OR IGNORE INTO `msg` (`msgid`, `timestamp`, `echoname`, `repto`, `msgfrom`, 
    332               `msgfromaddr`, `msgto`, `subj`, `body`, `content_id`) 
    333               VALUES ($mid, $timestamp, $echoarea, $replyto, $msgfrom, $msgfromaddr, $msgto, $subj, $msgbody, $content_id);}
    334           }
    335         }
    336       }
    337     }
    338   }
    339   msgdb close
    340 }
    341 
    342 proc massfetch {echos db dolog} {
    343   global appdir
    344   if {$dolog eq 1} {puts "No ii/idec station URL specified, using stations.txt"}
    345   set stfile [file join $appdir "stations.txt"]
    346   if {[file exists $stfile]} {
    347     set stlist [readfile $stfile]
    348     dict for {station stmaxids} $stlist {
    349       set station [string trim $station]
    350       if {$station ne "" && ![string match "#*" $station]} {
    351         if {$dolog eq 1} {puts "Fetching from $station"}
    352         fetchiidb $station $echos $db $dolog $stmaxids
    353       }
    354     }
    355   } else {
    356     if {$dolog eq 1} {puts "No stations.txt found, bailing out!"}
    357   }
    358 }
    359 
    360 # end of procs, start the entrypoint
    361 if {![info exists argv0] || [file tail [info script]] ne [file tail $argv0]} {return}
    362 
    363 set scriptpath [file normalize [info script]]
    364 set appdir [file dirname $scriptpath]
    365 # check if we're running from a starpack
    366 if [string match *app-tiifetch $appdir] {
    367   set appdir [file normalize [file join $appdir ".." ".." ".." ]]
    368 }
    369 set localdb [file join $appdir "tii.db"]
    370 
    371 # populate general HTTP configuration 
    372 set cfgfile [file join $appdir "config.txt"]
    373 set net_timeout 5000
    374 if {[file exists $cfgfile]} {
    375   set cfg [readfile $cfgfile]
    376   if {[dict exists $cfg useragent]} {
    377     ::http::config -useragent [dict get $cfg useragent] 
    378   }
    379   if {[dict exists $cfg proxyhost]} {
    380     ::http::config -proxyhost [dict get $cfg proxyhost] 
    381   }
    382   if {[dict exists $cfg proxyport]} {
    383     ::http::config -proxyport [dict get $cfg proxyport] 
    384   }
    385   if {[dict exists $cfg net_timeout]} {
    386     set net_timeout [dict get $cfg net_timeout]
    387   }
    388 }
    389 
    390 if {$argc > 0} {
    391   if {$argc > 2} {
    392     set localdb [lindex $argv 2]
    393   }
    394   puts "Fetching messages, please wait..."
    395   set sturl [string trim [lindex $argv 0]]
    396   if {$sturl eq ""} {
    397     massfetch [lindex $argv 1] $localdb 1
    398   } else {
    399     fetchiidb $sturl [lindex $argv 1] $localdb 1 12
    400   }
    401   puts "Messages fetched"
    402 } else {
    403   puts "Fetching messages, please wait..."
    404   massfetch "" $localdb 1
    405   puts "Messages fetched"
    406 }
    407