This patch implements gemini protocol as described in specification[^1],
with following pieces missing:

 * Redirects. 3x status codes are not handled any specially from any
   other status code that has no body, -L option has no effect on gemini
   URLs.

 * Certificate verification. Gemini servers rarely use certificates with
   trust chain from certificates in /etc/ssl/certs; self-signed
   certificates are the norm. Option -k should be the default for gemini
   protocol.

Other than that, things works:

        $ make
        $ ./src/curl -k -D- gemini://tilde.pink/~kaction/dist/
        20 text/gemini
        # Directory listing

        => /~kaction ..
        => flake-dhall/ flake-dhall/                                           
Nov 22 2020

 [^1]: https://gemini.circumlunar.space/docs/specification.html

Signed-off-by: Dmitry Bogatov <curl-library#cool.haxx.se#v...@kaction.cc>
---

Notes:
    I would like to point to following lines in gemini.c:
    
        err = Curl_read(conn, sockfd, into, more, &amount);
    
        /* XXX: This conditional can probably be eliminated by fixing
        * doing_get_proto function, but I do not know how.
        */
        if(err == CURLE_AGAIN)
          return CURLE_OK;
    
    I read gopher.c and http.c, and there is no check for CURLE_AGAIN there,
    so probably gemini.c can be improved; yet, if I remove this check
    curl(1) fails with error that socket is not ready for send/recv.
    
    Also, about redirects, I tried to call Curl_follow if (status == '3'),
    but it seems to be specific to HTTP(s) and did not result in expected
    behaviour with -L flag.
    
    Review and advices on how to improve patch are welcome.

 include/curl/curl.h |   1 +
 lib/Makefile.inc    |   4 +-
 lib/gemini.c        | 269 ++++++++++++++++++++++++++++++++++++++++++++
 lib/gemini.h        |  55 +++++++++
 lib/url.c           |   5 +
 lib/urldata.h       |   2 +
 6 files changed, 334 insertions(+), 2 deletions(-)
 create mode 100644 lib/gemini.c
 create mode 100644 lib/gemini.h

diff --git a/include/curl/curl.h b/include/curl/curl.h
index a73418dce..d2b29ab91 100644
--- a/include/curl/curl.h
+++ b/include/curl/curl.h
@@ -1015,6 +1015,7 @@ typedef CURLSTScode (*curl_hstswrite_callback)(CURL *easy,
 #define CURLPROTO_SMB    (1<<26)
 #define CURLPROTO_SMBS   (1<<27)
 #define CURLPROTO_MQTT   (1<<28)
+#define CURLPROTO_GEMINI (1<<29)
 #define CURLPROTO_ALL    (~0) /* enable everything */
 
 /* long may be 32 or 64 bits, but we should never depend on anything else
diff --git a/lib/Makefile.inc b/lib/Makefile.inc
index 6d35704c0..4a9a3145e 100644
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
@@ -61,7 +61,7 @@ LIB_CFILES = altsvc.c amigaos.c asyn-ares.c asyn-thread.c 
base64.c            \
   socks_gssapi.c socks_sspi.c speedcheck.c splay.c strcase.c strdup.c         \
   strerror.c strtok.c strtoofft.c system_win32.c telnet.c tftp.c timeval.c    \
   transfer.c urlapi.c version.c warnless.c wildcard.c x509asn1.c dynbuf.c     \
-  version_win32.c easyoptions.c easygetopt.c hsts.c
+  version_win32.c easyoptions.c easygetopt.c hsts.c gemini.c
 
 LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h conncache.h connect.h    \
   content_encoding.h cookie.h curl_addrinfo.h curl_base64.h curl_ctype.h      \
@@ -80,7 +80,7 @@ LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h 
conncache.h connect.h    \
   smb.h smtp.h sockaddr.h socketpair.h socks.h speedcheck.h splay.h strcase.h \
   strdup.h strerror.h strtok.h strtoofft.h system_win32.h telnet.h tftp.h     \
   timeval.h transfer.h urlapi-int.h urldata.h warnless.h wildcard.h           \
-  x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h
+  x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h gemini.h
 
 LIB_RCFILES = libcurl.rc
 
diff --git a/lib/gemini.c b/lib/gemini.c
new file mode 100644
index 000000000..b1eb9aeff
--- /dev/null
+++ b/lib/gemini.c
@@ -0,0 +1,269 @@
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2020, Daniel Stenberg, <dan...@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#include "curl_setup.h"
+
+#if !defined CURL_DISABLE_GEMINI && defined USE_SSL
+
+#include <ctype.h>
+#include <string.h>
+#include "gemini.h"
+#include "urldata.h"
+#include "vtls/vtls.h"
+#include <curl/curl.h>
+#include "transfer.h"
+#include "sendf.h"
+#include "connect.h"
+#include "multiif.h"
+#include "progress.h"
+#include "gopher.h"
+#include "select.h"
+#include "strdup.h"
+#include "url.h"
+#include "escape.h"
+#include "warnless.h"
+#include "curl_printf.h"
+#include "curl_memory.h"
+/* The last #include file should be: */
+#include "memdebug.h"
+
+static char *gemini_request(const struct urlpieces *up)
+{
+  if(up->query)
+    return aprintf("gemini://%s%s?%s\r\n", up->hostname, up->path, up->query);
+  else
+    return aprintf("gemini://%s%s\r\n", up->hostname, up->path);
+}
+
+static CURLcode gemini_setup_connection(struct connectdata *conn)
+{
+  struct GEMINI *gemini;
+  struct Curl_easy *data = conn->data;
+  DEBUGASSERT(data->req.p.gemini == NULL);
+
+  gemini = calloc(1, sizeof(struct GEMINI));
+  if(!gemini)
+    return CURLE_OUT_OF_MEMORY;
+  data->req.p.gemini = gemini;
+  return CURLE_OK;
+}
+
+static CURLcode gemini_connecting(struct connectdata *conn, bool *done)
+{
+  return Curl_ssl_connect_nonblocking(conn, FIRSTSOCKET, done);
+}
+
+static CURLcode gemini_do_it(struct connectdata *conn, bool *done)
+{
+  struct GEMINI *gemini;
+  struct Curl_easy *data;
+  char *request;
+
+  data = conn->data;
+  request = gemini_request(&data->state.up);
+
+  if(!request)
+    return CURLE_OUT_OF_MEMORY;
+
+  gemini = data->req.p.gemini;
+  gemini->request.data = request;
+  gemini->request.amount_total = strlen(request);
+  gemini->request.amount_sent = 0;
+
+  /* Real work happens in gemini_doing, so we can use non-blocking
+   * functions and avoid busy loops.
+   */
+
+
+  return CURLE_OK;
+}
+
+static CURLcode gemini_doing_finish(struct connectdata *, bool *);
+static CURLcode gemini_doing(struct connectdata *conn, bool *done)
+{
+  CURLcode err;
+  curl_socket_t sockfd;
+  struct GEMINI *gemini;
+  size_t more;
+  size_t sent;
+  size_t amount;
+
+  gemini = conn->data->req.p.gemini;
+  sockfd = conn->sock[FIRSTSOCKET];
+
+  /* stage1: send request */
+  sent = gemini->request.amount_sent;
+  more = gemini->request.amount_total - sent;
+  if(more) {
+    char *from;
+
+    from = gemini->request.data + sent;
+    err = Curl_write(conn, sockfd, from, more, &amount);
+    if(err)
+      return err;
+
+    gemini->request.amount_sent += amount;
+    more -= amount;
+
+    if(more)
+      return CURLE_OK;
+  }
+
+  /* stage2: read block big enough to contain header */
+  if(!gemini->block.done) {
+    char *into;
+
+    into = gemini->block.data + gemini->block.amount;
+    more = GEMINI_RESPONSE_BUFSIZE - gemini->block.amount;
+
+    err = Curl_read(conn, sockfd, into, more, &amount);
+
+    /* XXX: This conditional can probably be eliminated by fixing
+     * doing_get_proto function, but I do not know how.
+     */
+    if(err == CURLE_AGAIN)
+      return CURLE_OK;
+
+    if(err)
+      return err;
+
+    gemini->block.amount += amount;
+    more -= amount;
+
+    /* !more means that we succesfully read GEMINI_RESPONSE_BUFSIZE bytes.
+     * !amount means that there is no more data. It is quite possible
+     * for whole response, header + body combined to be less than
+     * GEMINI_RESPONSE_BUFSIZE bytes big.
+     */
+
+    if(!amount || !more)
+      gemini->block.done = TRUE;
+
+    /* Optimization: We check for LF, and skip reading more when it is
+     * found. Curl main engine adds noticable delays between
+     * invokactions of "doing" function, so it is desirable to get
+     * things done in as little calls to "doing" function, as possible,
+     * but without busy looping on socket that is not yet ready.
+     *
+     * For many servers first read returns exacly header, because it is
+     * natural thing to do on server side, although we can't rely on it.
+     * But this is reason why it does not worth to optimize search by
+     * keeping track of old {amount} value and searching only in bytes
+     * just read.
+     */
+    gemini->block.lf = memchr(gemini->block.data, '\n', gemini->block.amount);
+    if(!gemini->block.lf && gemini->block.done)
+      return CURLE_WEIRD_SERVER_REPLY;
+
+    if(gemini->block.lf)
+      gemini->block.done = TRUE;
+
+    if(!gemini->block.done)
+      return CURLE_OK;
+  }
+
+  return gemini_doing_finish(conn, done);
+}
+
+static CURLcode gemini_doing_finish(struct connectdata *conn, bool *done)
+{
+  CURLcode err;
+  struct GEMINI *gemini;
+  char *block;
+  struct Curl_easy *data;
+  size_t amount;
+  size_t hsize;
+  char status;
+  char *lf;
+
+  data = conn->data;
+  gemini = data->req.p.gemini;
+  block = gemini->block.data;
+  amount = gemini->block.amount;
+  lf = gemini->block.lf;
+
+  if(!amount)
+    return CURLE_GOT_NOTHING;
+
+  /* Two digit status, space, empty meta string and \r\n at least. */
+  if(amount < 5)
+    return CURLE_WEIRD_SERVER_REPLY;
+  if(block[2] != ' ' || !isdigit(block[0]) || !isdigit(block[1]))
+    return CURLE_WEIRD_SERVER_REPLY;
+
+  /* We already checked that first byte is digit, so {lf} can't point to
+   * first byte of buffer and {cr} can't underrun buffer.
+   */
+  if(*(lf - 1) != '\r') {
+    return CURLE_WEIRD_SERVER_REPLY;
+  }
+
+  hsize = lf - block + 1;
+  err = Curl_client_write(conn, CLIENTWRITE_HEADER, block, hsize);
+  if(err)
+    return err;
+
+  status = block[0];
+  if(status != '2') { /* TODO: handle redirects */
+    *done = TRUE;
+    return CURLE_OK;
+  }
+
+  err = Curl_client_write(conn, CLIENTWRITE_BODY, block + hsize,
+                          amount - hsize);
+  if(err)
+    return err;
+
+  *done = TRUE;
+  Curl_setup_transfer(data, FIRSTSOCKET, -1, FALSE, -1);
+  return CURLE_OK;
+}
+
+static int gemini_doing_getsock(struct connectdata *conn, curl_socket_t *socks)
+{
+  socks[0] = conn->sock[FIRSTSOCKET];
+  return GETSOCK_READSOCK(0) | GETSOCK_WRITESOCK(0);
+}
+
+const struct Curl_handler Curl_handler_gemini = {
+  "GEMINI",                             /* scheme */
+  gemini_setup_connection,              /* setup_connection */
+  gemini_do_it,                         /* do_it */
+  ZERO_NULL,                            /* done */
+  ZERO_NULL,                            /* do_more */
+  gemini_connecting,                    /* connect_it */
+  gemini_connecting,                    /* connecting */
+  gemini_doing,                         /* doing */
+  Curl_ssl_getsock,                     /* proto_getsock */
+  gemini_doing_getsock,                 /* doing_getsock */
+  ZERO_NULL,                            /* domore_getsock */
+  ZERO_NULL,                            /* perform_getsock */
+  ZERO_NULL,                            /* disconnect */
+  ZERO_NULL,                            /* readwrite */
+  ZERO_NULL,                            /* connection_check */
+  PORT_GEMINI,                          /* defport */
+  CURLPROTO_GEMINI,                     /* protocol */
+  CURLPROTO_GEMINI,                     /* family */
+  PROTOPT_SSL                           /* flags */
+};
+
+#endif /*CURL_DISABLE_GEMINI*/
diff --git a/lib/gemini.h b/lib/gemini.h
new file mode 100644
index 000000000..b9f27ee70
--- /dev/null
+++ b/lib/gemini.h
@@ -0,0 +1,55 @@
+#ifndef HEADER_CURL_GEMINI_H
+#define HEADER_CURL_GEMINI_H
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2020, Daniel Stenberg, <dan...@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#ifndef CURL_DISABLE_GEMINI
+extern const struct Curl_handler Curl_handler_gemini;
+#endif
+
+/*
+ * According to specification, response has following format:
+ *
+ *     <STATUS><SPACE><META><CR><LF>
+ *
+ * and <META> is UTF-8 string up to 1024 bytes long, so buffer of
+ * size >= (2 + 1 + 1024 + 1 + 1) = 1029 is enough to read whole
+ * response header into memory. It is more efficient than reading
+ * byte-after-byte until \n is found.
+ */
+#define GEMINI_RESPONSE_BUFSIZE 1029
+
+struct GEMINI {
+  struct {
+    char data[GEMINI_RESPONSE_BUFSIZE];
+    size_t amount; /* Count of bytes read */
+    bool done;
+    char *lf; /* Pointer to linefeed character in {data} */
+  } block;
+  struct {
+    char *data; /* Allocated string */
+    size_t amount_total; /* How many bytes in {data} */
+    size_t amount_sent; /* How many bytes of it we already sent */
+  } request;
+};
+
+#endif /* HEADER_CURL_GEMINI_H */
diff --git a/lib/url.c b/lib/url.c
index f8b2a0030..7d4028161 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -115,6 +115,7 @@ bool curl_win32_idn_to_ascii(const char *in, char **out);
 #include "http_ntlm.h"
 #include "curl_rtmp.h"
 #include "gopher.h"
+#include "gemini.h"
 #include "mqtt.h"
 #include "http_proxy.h"
 #include "conncache.h"
@@ -253,6 +254,10 @@ static const struct Curl_handler * const protocols[] = {
   &Curl_handler_gopher,
 #endif
 
+#if !defined CURL_DISABLE_GEMINI && defined USE_SSL
+  &Curl_handler_gemini,
+#endif
+
 #ifdef USE_LIBRTMP
   &Curl_handler_rtmp,
   &Curl_handler_rtmpt,
diff --git a/lib/urldata.h b/lib/urldata.h
index f085c093c..76ad00856 100644
--- a/lib/urldata.h
+++ b/lib/urldata.h
@@ -49,6 +49,7 @@
 #define PORT_RTMPT PORT_HTTP
 #define PORT_RTMPS PORT_HTTPS
 #define PORT_GOPHER 70
+#define PORT_GEMINI 1965
 #define PORT_MQTT 1883
 
 #define DICT_MATCH "/MATCH:"
@@ -659,6 +660,7 @@ struct SingleRequest {
     struct SMTP *smtp;
     struct SSHPROTO *ssh;
     struct TELNET *telnet;
+    struct GEMINI *gemini;
   } p;
 #ifndef CURL_DISABLE_DOH
   struct dohdata doh; /* DoH specific data for this request */
-- 
If possible, please keep mailing list in CC. It is public.

-------------------------------------------------------------------
Unsubscribe: https://cool.haxx.se/list/listinfo/curl-library
Etiquette:   https://curl.se/mail/etiquette.html

Reply via email to