reverted fetching to a simpler and more robust algo - tii - Tcl-based suite for working with ii/idec protocol

commit 6bc8a2072c20fd9790b0788e79961f8d94098fce
parent d8b658a92b6e6a958a8aabe38147749065826033
Author: Luxferre <lux@ferre>
Date:   Sun, 27 Oct 2024 09:44:54 +0200

reverted fetching to a simpler and more robust algo

Diffstat:
M ii-doc.txt  | 13 -------------
M tiifetch.tcl  | 164 +++++++++++++++++++++++++++++++++++--------------------------------------------

2 files changed, 72 insertions(+), 105 deletions(-)
diff --git a/ii-doc.txt b/ii-doc.txt
@@ -32,8 +32,6 @@ and depend on the message contents hash (see the exact algorithm below).
 Station (node) HTTP API (as implemented in tii)
 -----------------------------------------------
 Every station must implement the following HTTP API calls.
-The tii codebase also implements some IDEC extenstions but they are entirely
-optional.
 In case of multi-line responses, the newline separator must be "\n" character
 (line feed, ASCII 10).
 
@@ -49,8 +47,6 @@ reported by nodes still should not be the basis for any client-side logic.
 - Listing messages in the echo(s) -
 
 Request: GET /u/e/echo.1.name/echo.2.name/...
-IDEC extended syntax: GET /u/e/echo.1.name/echo.2.name/.../offset:count
-(where offset can be negative)
 Response: newline-separated list of echo names and message IDs in the format:
 
 echo.1.name
@@ -82,15 +78,6 @@ Response: in case of success, must start with "msg ok"
 where base64_msgtext is the Base64-encoded Point-to-Node Message (see below).
 The maximum length of the tmsg field must be 87382 bytes.
 
-- Listing IDEC features (IDEC extension) -
-
-Request: GET /x/features
-Response: newline-separated list of supported non-standard URL paths
-(u/e, /list.txt etc)
-
-Note: if the /x/features path is unavailable, the client must assume that no
-IDEC extensions except /list.txt are supported by the server.
-
 Node-to-Point Message format
 ----------------------------
 The encoding must be UTF-8 and the newline separator must be "\n" (ASCII 10).
diff --git a/tiifetch.tcl b/tiifetch.tcl
@@ -181,14 +181,11 @@ proc writefileln {fname data} {
 }
 
 # list comparison helper (listcomp $new $old)
-proc listcomp {a b} {
-  set diff {}
-  foreach i $a {
-    if {[lsearch -exact $b $i]==-1} {
-      lappend diff $i
-    }
+proc listcomp {new old} {
+  foreach i $old {
+    set new [lsearch -all -inline -not -exact $new $i]
   }
-  return $diff
+  return $new
 }
 
 # generate ID from the Node-to-Point msg contents
@@ -199,6 +196,25 @@ proc n2p_id {binmsg} {
   return [string map {+ A - A / z _ z} $trimbased]
 }
 
+# ensure database file is created
+proc createdb {fname} {
+  sqlite3 fdb $fname
+  fdb eval {
+    CREATE TABLE `msg` (`id` INTEGER PRIMARY KEY AUTOINCREMENT,
+      `msgid` VARCHAR(20) NOT NULL UNIQUE,
+      `timestamp` INT NOT NULL,
+      `echoname` VARCHAR(120) NOT NULL,
+      `repto` VARCHAR(120) NOT NULL,
+      `msgfrom` VARCHAR(120) NOT NULL,
+      `msgfromaddr` VARCHAR(120) NOT NULL,
+      `msgto` VARCHAR(120) NOT NULL,
+      `subj` VARCHAR(120) NOT NULL,
+      `body` TEXT NOT NULL,
+      `content_id` VARCHAR(20) NOT NULL);
+  }
+  fdb close
+}
+
 # main logic proc
 proc fetchiidb {url echos dbfile dolog maxids} {
   if {$maxids < 12} {set maxids 12}
@@ -206,14 +222,8 @@ proc fetchiidb {url echos dbfile dolog maxids} {
   set url [string trim $url]
   set echos [string trim $echos]
   set dbfile [file normalize [string trim $dbfile]]
-  # prepare starting script
+  if {![file exists $dbfile]} {createdb $dbfile}
   sqlite3 msgdb $dbfile
-  msgdb eval {
-    CREATE TABLE IF NOT EXISTS `msg` (`id` INTEGER PRIMARY KEY AUTOINCREMENT, `msgid` VARCHAR(20) UNIQUE,
-      `timestamp` INT, `echoname` VARCHAR(120), `repto` TEXT, `msgfrom` TEXT, `msgfromaddr` TEXT,
-      `msgto` TEXT, `subj` TEXT, `body` TEXT, `content_id` VARCHAR(20));
-  }
-
   # attempt to fetch the echolist if echos are empty
   if {$echos eq {}} {
     if {$dolog eq 1} {puts "Fetching echolist..."}
@@ -222,55 +232,17 @@ proc fetchiidb {url echos dbfile dolog maxids} {
   } else {
     set echos [split $echos "/,;"]
   }
-  set echos [lmap s $echos {string trim $s}]
+  set echos [string trim [lmap s $echos {string trim $s " \t\r\n"}] " \t\r\n"]
   if {$dolog eq 1} {puts "Echos to fetch: $echos"}
   if {$dolog eq 1} {puts "Building message indexes..."}
-  # get the IDEC extended feature list
-  set featurelist ""
-  catch {set featurelist [getfile [string cat $url "/x/features"]]}
-  set featurelist [lmap s [split $featurelist \n] {string trim $s}]
-  set datalines ""
-  if {[lsearch $featurelist u/e] > -1} {
-    # echoname => localcount map
-    set localcounts [msgdb eval {SELECT `echoname`, COUNT(`id`) FROM `msg` GROUP BY `echoname`;}]
-    foreach ename $echos {
-      if {$ename ne ""} {
-        set localdata ""
-        set localcount 0
-        if {[dict exists $localcounts $ename]} {
-          set localcount [dict get $localcounts $ename]
-        }
-        if {$localcount > 12} {
-          set diff 6
-          set needsync 1
-          while {$needsync eq 1} {
-            incr diff $diff
-            set localdata [getfile [string cat $url "/u/e/" $ename "/-$diff:1"]]
-            # control message id
-            set cmsg [string trim [lindex [split $localdata \n] 1]]
-            msgdb eval {SELECT `msgid` FROM `msg` WHERE `msgid` = $cmsg;} row {
-              if {[string trim $row(msgid)] eq $cmsg} {
-                set needsync 0
-              }
-            }
-          }
-          set localdata [getfile [string cat $url "/u/e/" $ename "/-$diff:$diff"]]
-        } else {
-          set localdata [getfile [string cat $url "/u/e/" $ename]]
-        }
-        append datalines [string trim $localdata] \n
-      }
-    }
-  } else { # no extended feature support, pass the echo list and fetch the message IDs
-    set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]]
-    set datalines [split $echodata \n]
-  }
+  set echodata [getfile [string cat $url "/u/e/" [join $echos "/"]]]
+  set datalines [split $echodata \n]
   # iterate over the fetched data and fetch corresponding messages
   set curecho ""
   set echomap ""
   # build the map of lists of message IDs
   foreach line $datalines {
-    set line [string trim $line]
+    set line [string trim $line " \t\r\n"]
     if {$line ne ""} {
       # detect if the line is related to echo name or message ID 
       if {[string first "." $line] eq -1} { # message ID
@@ -290,53 +262,61 @@ proc fetchiidb {url echos dbfile dolog maxids} {
   # now, process the map we've built
   dict for {echoname msgids} $echomap {
     if {![string match *.* $echoname]} {continue} 
-    if {[llength msgids] eq 0} {continue}
+    if {[llength $msgids] eq 0} {continue}
     # get the existing message IDs in the echo
     set oldmsgids [msgdb eval {SELECT `msgid` FROM `msg` WHERE `echoname` = $echoname ORDER BY `id` ASC;}]
     # pre-filter the new message IDs to fetch
     set newmsgids [listcomp $msgids $oldmsgids] 
-    if {$dolog eq 1} {puts "Fetching [llength $newmsgids] new messages from $echoname..."}
     set idgroups ""
     set grcount 0
     set localcount 0
+    set globalcount 0
     foreach nmid $newmsgids { # iterate over new messages to group them
       if {$nmid ne ""} {
-        # insert new message ID to the echo mapping
-        dict lappend idgroups $grcount $nmid 
-        incr localcount
-        if {$localcount > $maxids} {
-          incr grcount
-          set localcount 0
-        } 
+        set cid [string trim [msgdb eval {SELECT `msgid` FROM `msg` WHERE `msgid` = $nmid;}]]
+        if {$nmid ne $cid} {
+          incr globalcount
+          # insert new message ID to the echo mapping
+          dict lappend idgroups $grcount $nmid 
+          incr localcount
+          if {$localcount > $maxids} {
+            incr grcount
+            set localcount 0
+          } 
+        }
       }
     }
-    dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages
-      # get the message data in the bundle format
-      set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]]
-      set bdata [split $msgbundle "\n"]
-      foreach bline $bdata {
-        set parts [split $bline ":"]
-        if {[llength $parts] > 1} { # valid message
-          set mid [lindex $parts 0]
-          set bdata [binary decode base64 [lindex $parts 1]]
-          # calculate ii Node-to-Point ID to verify the message integrity
-          set content_id [n2p_id $bdata]
-          set mdata [encoding convertfrom utf-8 $bdata]
-          set msglines [split $mdata "\n"]
-          set replyto ""
-          set tags [split [lindex $msglines 0] "/"]
-          if {[dict exists $tags repto]} {
-            set replyto [dict get $tags repto]
-          } else {set replyto ""}
-          set echoarea [string trim [lindex $msglines 1]]
-          set timestamp [string trim [lindex $msglines 2]]
-          set msgfrom [string trim [lindex $msglines 3]]
-          set msgfromaddr [string trim [lindex $msglines 4]]
-          set msgto [string trim [lindex $msglines 5]]
-          set subj [string trim [lindex $msglines 6]]
-          set msgbody [string trimright [lrange $msglines 8 end]]
-          msgdb eval {INSERT OR IGNORE INTO `msg` (`msgid`, `timestamp`, `echoname`, `repto`, `msgfrom`, `msgfromaddr`, `msgto`, `subj`, `body`, `content_id`) 
-            VALUES ($mid, $timestamp, $echoarea, $replyto, $msgfrom, $msgfromaddr, $msgto, $subj, $msgbody, $content_id);}
+    if {$globalcount > 0} {
+      if {$dolog eq 1} {puts "Fetching $globalcount new messages from $echoname..."}
+      dict for {mgrpind mgrp} $idgroups { # iterate over groups to fetch the messages
+        # get the message data in the bundle format
+        set msgbundle [getfile [string cat $url "/u/m/" [join $mgrp "/"]]]
+        set bdata [split $msgbundle "\n"]
+        foreach bline $bdata {
+          set parts [split $bline ":"]
+          if {[llength $parts] > 1} { # valid message
+            set mid [string trim [lindex $parts 0]]
+            set bdata [binary decode base64 [lindex $parts 1]]
+            # calculate ii Node-to-Point ID to verify the message integrity
+            set content_id [n2p_id $bdata]
+            set mdata [encoding convertfrom utf-8 $bdata]
+            set msglines [split $mdata "\n"]
+            set replyto ""
+            set tags [split [lindex $msglines 0] "/"]
+            if {[dict exists $tags repto]} {
+              set replyto [dict get $tags repto]
+            } else {set replyto ""}
+            set echoarea [string trim [lindex $msglines 1]]
+            set timestamp [string trim [lindex $msglines 2]]
+            set msgfrom [string trim [lindex $msglines 3]]
+            set msgfromaddr [string trim [lindex $msglines 4]]
+            set msgto [string trim [lindex $msglines 5]]
+            set subj [string trim [lindex $msglines 6]]
+            set msgbody [string trimright [lrange $msglines 8 end]]
+            msgdb eval {INSERT OR IGNORE INTO `msg` (`msgid`, `timestamp`, `echoname`, `repto`, `msgfrom`, 
+              `msgfromaddr`, `msgto`, `subj`, `body`, `content_id`) 
+              VALUES ($mid, $timestamp, $echoarea, $replyto, $msgfrom, $msgfromaddr, $msgto, $subj, $msgbody, $content_id);}
+          }
         }
       }
     }

	tii Tcl-based suite for working with ii/idec protocol
	git clone git://git.luxferre.top/tii.git
	Log \| Files \| Refs \| README

M	ii-doc.txt	\|	13	-------------
M	tiifetch.tcl	\|	164	+++++++++++++++++++++++++++++++++++--------------------------------------------