tii

Tcl-based suite for working with ii/idec protocol
git clone git://git.luxferre.top/tii.git
Log | Files | Refs | README

tiifetch.tcl (13491B)


      1 #!/usr/bin/env tclsh
      2 # tiifetch: fetch all data from an ii/idec station into the local text db
      3 # (see https://github.com/idec-net/new-docs/blob/master/protocol-en.md)
      4 # Usage: tiifetch.tcl [station_url] [echos] [db_file]
      5 # The echo list should be delimited with slash (/), comma (,) or semicolon (;)
      6 # if no echos are specified (or "" is passed), then list.txt will be fetched
      7 # and then all missing echo content from it will be downloaded
      8 # If db_dir isn't specified, it's fetched and merged into the
      9 # tiidb directory in the program root with echoconfs and messages respectively
     10 # This component only fetches the messages, doesn't parse or display them
     11 # Supported protocols: HTTP, HTTPS, Gemini, Spartan, Gopher/Finger/Nex
     12 # Depends on Tcllib for URI parsing and SQLite3 for data storage
     13 # Created by Luxferre in 2024, released into public domain
     14 
     15 package require http
     16 package require uri
     17 package require sqlite3
     18 package require sha256
     19 
     20 # autodetect TclTLS support and enable HTTPS request support if detected
     21 set tls_support 0
     22 catch {package require tls; set tls_support 1}
     23 if {$tls_support eq 1} {
     24   ::http::register https 443 [list ::tls::socket -autoservername true]
     25 }
     26 
     27 proc url2dict {inputurl} {
     28   set out [dict create]
     29   if [regexp {^(.*)://} $inputurl _ lscheme] {
     30     dict set out scheme $lscheme
     31   } else {
     32     dict set out handler "render_handler_invalid"
     33     return $out
     34   }
     35   set rawout [::uri::split [regsub {^.*://} $inputurl "http://"]]
     36   set rhost [dict get $rawout host]
     37   set rpath [dict get $rawout path]
     38   set rport [dict get $rawout port]
     39   set secondarydata [dict get $rawout query]
     40   dict set out host $rhost
     41   set selector $rpath
     42   dict set out handler "render_handler_$lscheme"
     43   # protocol-specific request logic
     44   switch "$lscheme" {
     45     gophers -
     46     gopher {
     47       if {$rport eq ""} {set rport 70}
     48       set selector [string cat [string range $rpath 1 end] "\r\n"]
     49       dict set out handler render_handler_gopher
     50     }
     51     finger {
     52       if {$rport eq ""} {set rport 79}
     53       set selector "$selector\r\n"
     54     }
     55     spartan {
     56       if {$rport eq ""} {set rport 300}
     57       if {$rpath eq ""} {set rpath "/"}
     58       if {![string match "/*" $rpath]} {
     59         set rpath "/$rpath"
     60       }
     61       set blen [string length $secondarydata]
     62       set selector "$rhost $rpath $blen\r\n$secondarydata"
     63     }
     64     nex {
     65       if {$rport eq ""} {set rport 1900}
     66       set selector "$selector\r\n"
     67     }
     68     gemini {
     69       if {$rport eq ""} {set rport 1965}
     70       set selector "$inputurl\r\n"
     71     }
     72     default {dict set out handler render_handler_none}
     73   }
     74   dict set out path $rpath
     75   dict set out selector $selector
     76   dict set out port $rport
     77   return $out
     78 }
     79 
     80 proc reqresp {host port reqline is_tls encoding} {
     81   global sock_response net_timeout
     82   set sock 0
     83   if {$is_tls eq 1} {
     84     catch {set sock [::tls::socket -autoservername true -async $host $port]}
     85   } elseif {$is_tls eq 2} {
     86     catch {set sock [::tls::socket -autoservername true -async $host $port]}
     87     if {$sock eq 0} {catch {set sock [socket -async $host $port]}}
     88   } else {
     89     catch {set sock [socket -async $host $port]}
     90   }
     91   if {$sock eq 0} {set sock_response ""; return}
     92   global rcv_end_$sock
     93   unset -nocomplain rcv_end_$sock
     94   if {$encoding eq ""} {set encoding utf-8}
     95   fconfigure $sock -translation binary -buffering none -encoding $encoding
     96   fileevent $sock writable [list connected $sock $reqline]
     97   proc connected {sock reqline} {
     98     fileevent $sock writable {}
     99     puts -nonewline $sock "$reqline"
    100     flush $sock
    101     fileevent $sock readable [list rdbl $sock]
    102   }
    103   set sock_response ""
    104   proc rdbl {sock} {
    105     global sock_response rcv_end_$sock
    106     while {![eof $sock]} {
    107       append sock_response [read $sock]
    108     }
    109     set rcv_end_$sock 0
    110   }
    111   after $net_timeout "global rcv_end_$sock; set rcv_end_$sock 1"
    112   vwait rcv_end_$sock
    113   catch {close $sock}
    114   unset -nocomplain rcv_end_$sock
    115 }
    116 
    117 
    118 # file download helper
    119 proc getfile {url} {
    120   global net_timeout
    121   set url [regsub -all {([^:])//} $url {\1/}]
    122   set urlparts [url2dict $url]
    123   set scheme [dict get $urlparts scheme]
    124   set host [dict get $urlparts host]
    125   set port [dict get $urlparts port]
    126   set sel [dict get $urlparts selector]
    127   global sock_response tls_support
    128   switch $tls_support {
    129     0 {set localtls 0}
    130     1 {set localtls 2}
    131   }
    132   switch $scheme {
    133     gophers - gopher - finger - nex {
    134       if {$scheme eq "gophers"} {set localtls 1}
    135       reqresp $host $port $sel $localtls utf-8
    136       set body "$sock_response"
    137       set sock_response ""
    138       return $body
    139     }
    140     gemini - spartan {
    141       if {$scheme eq "gemini"} {set localtls 1}
    142       reqresp $host $port $sel $localtls utf-8
    143       set body "$sock_response"
    144       set sock_response ""
    145       if {[regexp {^([^\n]*)\n} $body _ statusline]} {
    146         set statusline [string trimright $statusline]
    147         set statusparts [split $statusline " "]
    148         set statuscode [lindex $statusparts 0]
    149         set mainstatuscode [string index $statuscode 0]
    150         if {$mainstatuscode eq 2} {
    151           regsub {.*?\n} $body "" body
    152           return $body
    153         } else {return {}}
    154       } else {return {}}
    155     }
    156     https - http {
    157       set hs ""
    158       while {$hs eq ""} {
    159         set hs [::http::geturl $url -binary 1 -keepalive 0 -timeout $net_timeout]
    160       }
    161       if {[::http::ncode $hs] eq "200"} {
    162         return [::http::data $hs]
    163       } else {return {}}
    164     }
    165     default {return {}}
    166   }
    167 }
    168 
    169 # file read helper
    170 proc readfile {fname} {
    171   set fp [open $fname r]
    172   fconfigure $fp -encoding utf-8
    173   set data [read $fp]
    174   close $fp
    175   return $data
    176 }
    177 
    178 # file write helper (leaves a newline at the end)
    179 proc writefileln {fname data} {
    180   set fp [open $fname w]
    181   fconfigure $fp -encoding utf-8
    182   puts $fp $data
    183   close $fp
    184 }
    185 
    186 # list comparison helper (listcomp $new $old)
    187 proc listcomp {new old} {
    188   foreach i $old {
    189     set new [lsearch -all -inline -not -exact $new $i]
    190   }
    191   return $new
    192 }
    193 
    194 # generate ID from the Node-to-Point msg contents
    195 # (exactly how it was transferred inside base64)
    196 proc n2p_id {binmsg} {
    197   set hash [::sha2::sha256 -bin -- $binmsg]
    198   set trimbased [string range [binary encode base64 $hash] 0 19]
    199   return [string map {+ A - A / z _ z} $trimbased]
    200 }
    201 
    202 # ensure database file is created
    203 proc createdb {fname} {
    204   sqlite3 fdb $fname
    205   fdb eval {
    206     CREATE TABLE `msg` (`id` INTEGER PRIMARY KEY AUTOINCREMENT,
    207       `msgid` VARCHAR(20) NOT NULL UNIQUE,
    208       `timestamp` INT NOT NULL,
    209       `echoname` VARCHAR(120) NOT NULL,
    210       `repto` VARCHAR(120) NOT NULL,
    211       `msgfrom` VARCHAR(120) NOT NULL,
    212       `msgfromaddr` VARCHAR(120) NOT NULL,
    213       `msgto` VARCHAR(120) NOT NULL,
    214       `subj` VARCHAR(120) NOT NULL,
    215       `body` TEXT NOT NULL,
    216       `content_id` VARCHAR(20) NOT NULL);
    217   }
    218   fdb close
    219 }
    220 
    221 # main logic proc
    222 proc fetchiidb {url echos dbfile dolog maxids} {
    223   if {$maxids < 12} {set maxids 12}
    224   # trim the parameters 
    225   set url [string trim $url]
    226   set echos [string trim $echos]
    227   set dbfile [file normalize [string trim $dbfile]]
    228   if {![file exists $dbfile]} {createdb $dbfile}
    229   sqlite3 msgdb $dbfile
    230   # attempt to fetch the echolist if echos are empty
    231   if {$echos eq {}} {
    232     if {$dolog eq 1} {puts "Fetching echolist..."}
    233     set echolist [getfile "$url/list.txt"]
    234     set echos [lmap e [split $echolist "\n"] {lindex [split $e ":"] 0}]
    235   } else {
    236     set echos [split $echos "/,;"]
    237   }
    238   set echos [string trim [lmap s $echos {string trim $s " \t\r\n"}] " \t\r\n"]
    239   if {$dolog eq 1} {puts "Echos to fetch: $echos"}
    240   if {$dolog eq 1} {puts "Building message indexes..."}
    241   set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]]
    242   set datalines [split $echodata \n]
    243   # iterate over the fetched data and fetch corresponding messages
    244   set curecho ""
    245   set echomap ""
    246   # build the map of lists of message IDs
    247   foreach line $datalines {
    248     set line [string trim $line " \t\r\n"]
    249     if {$line ne ""} {
    250       # detect if the line is related to echo name or message ID 
    251       if {[string first "." $line] eq -1} { # message ID
    252         if {[string length $line] == 20} { # filter out invalid IDs
    253           if {$curecho ne ""} {
    254             dict lappend echomap $curecho $line
    255           }
    256         }
    257       } else { # echo name
    258         set curecho $line
    259         dict set echomap $curecho ""
    260       }
    261     }
    262   }
    263   if {$dolog eq 1} {puts "Echomap built"}
    264   # pass the echo list and fetch the message IDs
    265   # now, process the map we've built
    266   dict for {echoname msgids} $echomap {
    267     if {![string match *.* $echoname]} {continue} 
    268     if {[llength $msgids] eq 0} {continue}
    269     # get the existing message IDs in the echo
    270     set oldmsgids [msgdb eval {SELECT `msgid` FROM `msg` WHERE `echoname` = $echoname ORDER BY `id` ASC;}]
    271     # pre-filter the new message IDs to fetch
    272     set newmsgids [listcomp $msgids $oldmsgids] 
    273     set idgroups ""
    274     set grcount 0
    275     set localcount 0
    276     set globalcount 0
    277     foreach nmid $newmsgids { # iterate over new messages to group them
    278       if {$nmid ne ""} {
    279         set cid [string trim [msgdb eval {SELECT `msgid` FROM `msg` WHERE `msgid` = $nmid;}]]
    280         if {$nmid ne $cid} {
    281           incr globalcount
    282           # insert new message ID to the echo mapping
    283           dict lappend idgroups $grcount $nmid 
    284           incr localcount
    285           if {$localcount > $maxids} {
    286             incr grcount
    287             set localcount 0
    288           } 
    289         }
    290       }
    291     }
    292     if {$globalcount > 0} {
    293       if {$dolog eq 1} {puts "Fetching $globalcount new messages from $echoname..."}
    294       dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages
    295         # get the message data in the bundle format
    296         set plen 0
    297         set retries 4
    298         while {$plen < $globalcount} {
    299           set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]]
    300           set bdata [lmap m [split $msgbundle "\n"] {
    301             set m [string trim $m]
    302             if {$m eq ""} {continue}
    303             set m
    304           }]
    305           set plen [llength $bdata]
    306           incr retries -1
    307           if {$retries < 1} {break}
    308         }
    309         foreach bline $bdata {
    310           set parts [split $bline ":"]
    311           if {[llength $parts] > 1} { # valid message
    312             set mid [string trim [lindex $parts 0]]
    313             set bdata [binary decode base64 [lindex $parts 1]]
    314             # calculate ii Node-to-Point ID to verify the message integrity
    315             set content_id [n2p_id $bdata]
    316             set mdata [encoding convertfrom utf-8 $bdata]
    317             set msglines [split $mdata "\n"]
    318             set replyto ""
    319             set tags [split [lindex $msglines 0] "/"]
    320             if {[dict exists $tags repto]} {
    321               set replyto [dict get $tags repto]
    322             } else {set replyto ""}
    323             set echoarea [string trim [lindex $msglines 1]]
    324             set timestamp [string trim [lindex $msglines 2]]
    325             set msgfrom [string trim [lindex $msglines 3]]
    326             set msgfromaddr [string trim [lindex $msglines 4]]
    327             set msgto [string trim [lindex $msglines 5]]
    328             set subj [string trim [lindex $msglines 6]]
    329             set msgbody [string trimright [lrange $msglines 8 end]]
    330             msgdb eval {INSERT OR IGNORE INTO `msg` (`msgid`, `timestamp`, `echoname`, `repto`, `msgfrom`, 
    331               `msgfromaddr`, `msgto`, `subj`, `body`, `content_id`) 
    332               VALUES ($mid, $timestamp, $echoarea, $replyto, $msgfrom, $msgfromaddr, $msgto, $subj, $msgbody, $content_id);}
    333           }
    334         }
    335       }
    336     }
    337   }
    338   msgdb close
    339 }
    340 
    341 proc massfetch {echos db dolog} {
    342   global appdir
    343   if {$dolog eq 1} {puts "No ii/idec station URL specified, using stations.txt"}
    344   set stfile [file join $appdir "stations.txt"]
    345   if {[file exists $stfile]} {
    346     set stlist [readfile $stfile]
    347     dict for {station stmaxids} $stlist {
    348       set station [string trim $station]
    349       if {$station ne "" && ![string match "#*" $station]} {
    350         if {$dolog eq 1} {puts "Fetching from $station"}
    351         fetchiidb $station $echos $db $dolog $stmaxids
    352       }
    353     }
    354   } else {
    355     if {$dolog eq 1} {puts "No stations.txt found, bailing out!"}
    356   }
    357 }
    358 
    359 # end of procs, start the entrypoint
    360 if {![info exists argv0] || [file tail [info script]] ne [file tail $argv0]} {return}
    361 
    362 set scriptpath [file normalize [info script]]
    363 set appdir [file dirname $scriptpath]
    364 # check if we're running from a starpack
    365 if [string match *app-tiifetch $appdir] {
    366   set appdir [file normalize [file join $appdir ".." ".." ".." ]]
    367 }
    368 set localdb [file join $appdir "tii.db"]
    369 
    370 # populate general HTTP configuration 
    371 set cfgfile [file join $appdir "config.txt"]
    372 set net_timeout 5000
    373 if {[file exists $cfgfile]} {
    374   set cfg [readfile $cfgfile]
    375   if {[dict exists $cfg useragent]} {
    376     ::http::config -useragent [dict get $cfg useragent] 
    377   }
    378   if {[dict exists $cfg proxyhost]} {
    379     ::http::config -proxyhost [dict get $cfg proxyhost] 
    380   }
    381   if {[dict exists $cfg proxyport]} {
    382     ::http::config -proxyport [dict get $cfg proxyport] 
    383   }
    384   if {[dict exists $cfg net_timeout]} {
    385     set net_timeout [dict get $cfg net_timeout]
    386   }
    387 }
    388 
    389 if {$argc > 0} {
    390   if {$argc > 2} {
    391     set localdb [lindex $argv 2]
    392   }
    393   puts "Fetching messages, please wait..."
    394   set sturl [string trim [lindex $argv 0]]
    395   if {$sturl eq ""} {
    396     massfetch [lindex $argv 1] $localdb 1
    397   } else {
    398     fetchiidb $sturl [lindex $argv 1] $localdb 1 12
    399   }
    400   puts "Messages fetched"
    401 } else {
    402   puts "Fetching messages, please wait..."
    403   massfetch "" $localdb 1
    404   puts "Messages fetched"
    405 }
    406