commit 76143e66b0db5975243d64c6689a44cfb8c2abc4
parent ca880cef641517ee62e0d821116de357f0a991d5
Author: Luxferre <lux@ferre>
Date: Fri, 25 Oct 2024 10:19:12 +0300
Implemented max message ID per request customization, also added ii-doc.txt
Diffstat:
M | README | | | 14 | +++++++++++--- |
A | ii-doc.txt | | | 135 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | tiifetch.tcl | | | 15 | ++++++--------- |
3 files changed, 152 insertions(+), 12 deletions(-)
diff --git a/README b/README
@@ -62,9 +62,17 @@ Fetching is supported for the following station URL schemes and protocols:
If the station_url parameter is empty or no parameters are passed at all,
tiifetch.tcl will look for a file called stations.txt that lists (each on a
-new line) all the station URLs to sync from. Messages from all listed stations
-will be merged into the same echo conference database. You can comment out a
-station to temporarily stop fetching from it by prepending the # sign.
+new line) all the station URLs to sync from and the amount of IDs that can
+be fetched from a particular station with a single GET request (this amount
+can be 12 or more):
+
+https://url1 389
+gemini://url2 24
+http://url3 12
+
+Messages from all listed stations will be merged into the same echo database.
+You also can comment out a station to temporarily stop fetching from it by
+prepending the # sign.
### Viewing the messages from CLI (tiiview.tcl): ###
diff --git a/ii-doc.txt b/ii-doc.txt
@@ -0,0 +1,135 @@
+ii protocol documentation
+=========================
+This document describes the basic ii protocol, as implemented in tii.
+It aims to be as clear and concise as possible.
+
+Network structure
+-----------------
+Clients aka points can:
+* post messages
+* fetch echos (conferences) and their message ID lists
+* fetch messages by their IDs
+
+Nodes aka stations can:
+* accept (or not accept) posted messages from points
+* serve echo lists
+* serve echos and their message ID lists
+* serve message bundles
+* fetch echos and messages from other stations
+
+The main transport protocol is currently HTTP/HTTPS, although the spec doesn't
+theoretically limit the ways of message transfer. E.g. fetching can be easily
+implemented over Gopher/Finger/Nex/Spartan/Gemini etc.
+
+The API spec below is given for the HTTP(S) protocol.
+
+Echo and message naming convention
+----------------------------------
+Within the network, echo names must be from 3 to 120 characters long and have
+at least one dot (.) character. Message IDs must be exactly 20 characters long
+and depend on the message contents hash (see the exact algorithm below).
+
+Station (node) HTTP API (as implemented in tii)
+-----------------------------------------------
+Every station must implement the following HTTP API calls.
+In case of multi-line responses, the newline separator must be "\n" character
+(line feed, ASCII 10).
+
+- Fetching the public echo list -
+
+Request: GET /list.txt
+Response: newline-separated list of echo_name:msg_count:echo_description
+
+- Listing messages in the echo (s) -
+
+Request: GET /u/e/echo.1.name/echo.2.name/...
+Response: newline-separated list of echo names and message IDs in the format:
+
+echo.1.name
+msgid1fromecho1
+msgid2fromecho1
+...
+echo.2.name
+msgid1fromecho2
+msgid2fromecho2
+...
+
+When a new message is posted to the echo, it gets appended to the end of the
+corresponding message ID list for this echo.
+
+- Fetching the message bundles -
+
+Request: GET /u/m/msgid1/msgid2/...
+Response: newline-separated list of msgid:base64_msgtext
+
+where base64_msgtext is a Base64-encoded Node-to-Point Message (see below).
+
+- Posting a message -
+
+Request: POST /u/point
+Content-Type: application/x-www-form-urlencoded
+Data: pauth=auth_string&tmsg=base64_msgtext
+Response: in case of success, must start with "msg ok"
+
+where base64_msgtext is the Base64-encoded Point-to-Node Message (see below).
+The maximum length of the tmsg field must be 87382 bytes.
+
+Node-to-Point Message format
+----------------------------
+The encoding must be UTF-8 and the newline separator must be "\n" (ASCII 10).
+Every Node-to-Point message contains the following fields in this particular
+order, all of them are mandatory and start on a new line each:
+
+* Line 1: message tags. Must start with "ii/ok". If "ii/ok/repto/id" form is
+ encountered, then the id refers to the message this message replies to.
+* Line 2: echo name where the message was posted.
+* Line 3: message Unix timestamp (integer, in seconds, UTC)
+* Line 4: message sender name
+* Line 5: message sender address (autofilled by the originating station)
+* Line 6: message recipient name (or All if there's no particular recipient)
+* Line 7: message subject
+* Line 8: must be empty
+* Line 9 and further: message body
+
+Point-to-Node Message format
+----------------------------
+The encoding must be UTF-8 and the newline separator must be "\n" (ASCII 10).
+Every Point-to-Node message contains the following fields in this particular
+order, all of them are mandatory and start on a new line each:
+
+* Line 1: echo name where the message is being posted.
+* Line 2: message recipient name (or All if there's no particular recipient)
+* Line 3: message subject
+* Line 4: must be empty
+* Line 5 and further: message body
+
+If you are replying to a message [msgid], then message body must begin with:
+
+@repto:msgid
+
+and the message text itself must start on the next line.
+
+Message ID generation algorithm
+-------------------------------
+This algorithm must be implemented by every station to generate message IDs:
+
+1. Calculate SHA256 of the message in the Node-to-Point format as binary data.
+2. Calculate Base64 of the resulting binary hash sum.
+3. Truncate to the first 20 characters.
+4. Replace all occurrences of + or - with A, and / or _ with Z.
+5. The result of these operations is your ii message ID.
+
+Implementation notes
+--------------------
+* Most HTTP servers are configured to reject long GET lines, so tii passes a
+ limited amount of message IDs to the /u/m endpoints. This behaviour can be
+ configured in the stations.txt file.
+* Some of the crucial validations is for message IDs to be 20 lines and for
+ all messages (Node-to-Point and Point-to-Node) to strictly have LF line
+ endings, not CRLF.
+* The message order in an echo does not always match the timestamp ordering;
+ it is fully up to the client on how to sort the messages internally. The
+ messages are only guaranteed to be saved by the server in the order they
+ arrive onto the server.
+
+--- Luxferre ---
diff --git a/tiifetch.tcl b/tiifetch.tcl
@@ -189,7 +189,8 @@ proc listcomp {a b} {
}
# main logic proc
-proc fetchiidb {url echos dbfile dolog} {
+proc fetchiidb {url echos dbfile dolog maxids} {
+ if {$maxids < 12} {set maxids 12}
# trim the parameters
set url [string trim $url]
set echos [string trim $echos]
@@ -233,10 +234,6 @@ proc fetchiidb {url echos dbfile dolog} {
}
}
if {$dolog eq 1} {puts "Echomap built"}
- # set how many message IDs we can pass in a single query
- # (assuming the maximum GET length is 256 chars)
- # and we assume 21 character per message ID
- set maxids 12
# pass the echo list and fetch the message IDs
# now, process the map we've built
dict for {echoname msgids} $echomap {
@@ -297,12 +294,12 @@ proc massfetch {echos db dolog} {
if {$dolog eq 1} {puts "No ii/idec station URL specified, using stations.txt"}
set stfile [file join $appdir "stations.txt"]
if {[file exists $stfile]} {
- set stlist [split [readfile $stfile] "\n"]
- foreach station $stlist {
+ set stlist [readfile $stfile]
+ dict for {station stmaxids} $stlist {
set station [string trim $station]
if {$station ne "" && ![string match "#*" $station]} {
if {$dolog eq 1} {puts "Fetching from $station"}
- fetchiidb $station $echos $db $dolog
+ fetchiidb $station $echos $db $dolog $stmaxids
}
}
} else {
@@ -345,7 +342,7 @@ if {$argc > 0} {
if {$sturl eq ""} {
massfetch [lindex $argv 1] $localdb 1
} else {
- fetchiidb $sturl [lindex $argv 1] $localdb 1
+ fetchiidb $sturl [lindex $argv 1] $localdb 1 12
}
puts "Messages fetched"
} else {