From: Chengyu Zhu <hudson...@tencent.com>

- Add HTTP range downloads for OCI blobs
- Introduce ocierofs_iostream for virtual file I/O
- Add --oci option for OCI image mounting with NBD backend

New mount.erofs -t erofs.nbd option: --oci=[option] source-image mountpoint

Supported options:
- platform=os/arch (default: linux/amd64)
- layer=N (extract specific layer, default: all layers)
- username/password (basic authentication)

e.g.:
./mount/mount.erofs -t erofs.nbd  --oci=platform=linux/amd64 \
quay.io/chengyuzhu6/golang:1.22.8-erofs /tmp/test/

Signed-off-by: Chengyu Zhu <hudson...@tencent.com>
---
 lib/liberofs_oci.h |  19 +++-
 lib/remotes/oci.c  | 242 ++++++++++++++++++++++++++++++++++++++++++++-
 mount/Makefile.am  |   3 +-
 mount/main.c       | 236 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 493 insertions(+), 7 deletions(-)

diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
index d119a2b..f35228c 100644
--- a/lib/liberofs_oci.h
+++ b/lib/liberofs_oci.h
@@ -7,7 +7,6 @@
 #define __EROFS_OCI_H
 
 #include <stdbool.h>
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -56,7 +55,11 @@ struct ocierofs_ctx {
        } img;
 };
 
-int ocierofs_init(struct ocierofs_ctx *ctx, const struct ocierofs_config 
*config);
+struct ocierofs_iostream {
+       struct ocierofs_ctx *ctx;
+       struct erofs_vfile vf;
+       u64 offset;
+};
 
 /*
  * ocierofs_build_trees - Build file trees from an OCI container image
@@ -65,8 +68,16 @@ int ocierofs_init(struct ocierofs_ctx *ctx, const struct 
ocierofs_config *config
  *
  * Return: 0 on success, negative errno on failure
  */
-int ocierofs_build_trees(struct erofs_importer *importer,
-                        const struct ocierofs_config *cfg);
+int ocierofs_build_trees(struct erofs_importer *importer, const struct 
ocierofs_config *cfg);
+
+int ocierofs_is_erofs_native_image(struct ocierofs_ctx *ctx);
+
+int ocierofs_init(struct ocierofs_ctx *ctx, const struct ocierofs_config 
*config);
+
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx);
+
+int ocierofs_iostream_open(struct ocierofs_iostream *oci_iostream, struct 
ocierofs_ctx *oci_ctx);
+void ocierofs_iostream_close(struct ocierofs_iostream *oci_iostream);
 
 #ifdef __cplusplus
 }
diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
index 01f1e24..8750772 100644
--- a/lib/remotes/oci.c
+++ b/lib/remotes/oci.c
@@ -33,6 +33,9 @@
 #define OCI_MEDIATYPE_MANIFEST "application/vnd.oci.image.manifest.v1+json"
 #define OCI_MEDIATYPE_INDEX "application/vnd.oci.image.index.v1+json"
 
+/* Erofs Native Layer Media Type */
+#define EROFS_MEDIATYPE "application/vnd.erofs"
+
 struct ocierofs_request {
        char *url;
        struct curl_slist *headers;
@@ -1133,7 +1136,7 @@ out:
        return ret;
 }
 
-static void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
 {
        if (!ctx)
                return;
@@ -1193,3 +1196,240 @@ int ocierofs_build_trees(struct erofs_importer 
*importer,
        ocierofs_ctx_cleanup(&ctx);
        return ret;
 }
+
+static int ocierofs_download_blob_range(struct ocierofs_ctx *ctx, off_t 
offset, size_t length,
+                                       void **out_buf, size_t *out_size)
+{
+       struct ocierofs_request req = {};
+       struct ocierofs_response resp = {};
+       const char *api_registry;
+       char rangehdr[64];
+       long http_code = 0;
+       int ret;
+       int index = ctx->img.layer_index;
+
+       if (offset < 0)
+               return -EINVAL;
+
+       api_registry = ocierofs_get_api_registry(ctx->img.registry);
+       if (asprintf(&req.url, "https://%s/v2/%s/blobs/%s";,
+            api_registry, ctx->img.repository, ctx->img.layers[index]->digest) 
== -1)
+               return -ENOMEM;
+
+       if (length)
+               snprintf(rangehdr, sizeof(rangehdr), "Range: bytes=%lld-%lld",
+                        (long long)offset, (long long)(offset + (off_t)length 
- 1));
+       else
+               snprintf(rangehdr, sizeof(rangehdr), "Range: bytes=%lld-",
+                        (long long)offset);
+
+       if (ctx->net.auth_header && strstr(ctx->net.auth_header, "Bearer"))
+               req.headers = curl_slist_append(req.headers, 
ctx->net.auth_header);
+       req.headers = curl_slist_append(req.headers, rangehdr);
+
+       curl_easy_reset(ctx->net.curl);
+
+       ret = ocierofs_curl_setup_common_options(ctx->net.curl);
+       if (ret)
+               goto out;
+
+       ret = ocierofs_curl_setup_rq(ctx->net.curl, req.url, OCIEROFS_HTTP_GET,
+                                    req.headers,
+                                    ocierofs_write_callback,
+                                    &resp, NULL, NULL);
+       if (ret)
+               goto out;
+
+       ret = ocierofs_curl_perform(ctx->net.curl, &http_code);
+       if (ret)
+               goto out;
+
+       if (http_code == 206) {
+               *out_buf = resp.data;
+               *out_size = resp.size;
+               resp.data = NULL;
+               ret = 0;
+       } else if (http_code == 200) {
+               if (offset == 0) {
+                       *out_buf = resp.data;
+                       *out_size = resp.size;
+                       resp.data = NULL;
+                       ret = 0;
+               } else {
+                       if (offset < resp.size) {
+                               size_t available = resp.size - offset;
+                               size_t copy_size = length ? min_t(size_t, 
length, available) : available;
+
+                               *out_buf = malloc(copy_size);
+                               if (!*out_buf) {
+                                       ret = -ENOMEM;
+                                       goto out;
+                               }
+                               memcpy(*out_buf, resp.data + offset, copy_size);
+                               *out_size = copy_size;
+                               ret = 0;
+                       } else {
+                               *out_buf = NULL;
+                               *out_size = 0;
+                               ret = 0;
+                       }
+               }
+       } else {
+               erofs_err("HTTP range request failed with code %ld", http_code);
+               ret = -EIO;
+       }
+
+out:
+       if (req.headers)
+               curl_slist_free_all(req.headers);
+       free(req.url);
+       free(resp.data);
+       return ret;
+}
+
+static ssize_t ocierofs_io_pread(struct erofs_vfile *vf, void *buf, size_t 
len, u64 offset)
+{
+       struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream 
**)vf->payload;
+       void *download_buf = NULL;
+       size_t download_size = 0;
+       ssize_t ret;
+
+       ret = ocierofs_download_blob_range(oci_iostream->ctx, offset, len,
+                                          &download_buf, &download_size);
+       if (ret < 0) {
+               memset(buf, 0, len);
+               return len;
+       }
+
+       if (download_buf && download_size > 0) {
+               memcpy(buf, download_buf, download_size);
+               free(download_buf);
+               return download_size;
+       }
+
+       return 0;
+}
+
+static ssize_t ocierofs_io_read(struct erofs_vfile *vf, void *buf, size_t len)
+{
+       struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream 
**)vf->payload;
+       ssize_t ret;
+
+       ret = ocierofs_io_pread(vf, buf, len, oci_iostream->offset);
+       if (ret > 0)
+               oci_iostream->offset += ret;
+
+       return ret;
+}
+
+static off_t ocierofs_io_lseek(struct erofs_vfile *vf, u64 offset, int whence)
+{
+       struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream 
**)vf->payload;
+       off_t new_offset;
+       int layer_index = oci_iostream->ctx->img.layer_index;
+
+       switch (whence) {
+       case SEEK_SET:
+               new_offset = offset;
+               break;
+       case SEEK_CUR:
+               new_offset = oci_iostream->offset + offset;
+               break;
+       case SEEK_END:
+               new_offset = oci_iostream->ctx->img.layers[layer_index]->size + 
offset;
+               break;
+       default:
+               return -1;
+       }
+
+       if (new_offset < 0 || new_offset > 
oci_iostream->ctx->img.layers[layer_index]->size)
+               return -1;
+
+       oci_iostream->offset = new_offset;
+       return new_offset;
+}
+
+static ssize_t ocierofs_io_sendfile(struct erofs_vfile *vout, struct 
erofs_vfile *vin,
+                              off_t *pos, size_t count)
+{
+       struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream 
**)vin->payload;
+       char *buf = NULL;
+       ssize_t total_written = 0;
+       ssize_t ret = 0;
+
+       buf = malloc(min_t(size_t, count, 32768));
+       if (!buf)
+               return -ENOMEM;
+
+       while (count > 0) {
+               size_t to_read = min_t(size_t, count, 32768);
+               u64 read_offset = pos ? *pos : oci_iostream->offset;
+
+               ret = ocierofs_io_pread(vin, buf, to_read, read_offset);
+               if (ret <= 0) {
+                       erofs_err("OCI I/O sendfile: failed to read from OCI: 
%s",
+                                 erofs_strerror(ret));
+                       memset(buf, 0, to_read);
+                       ret = to_read;
+               }
+
+               ssize_t written = write(vout->fd, buf, ret);
+
+               if (written < 0) {
+                       erofs_err("OCI I/O sendfile: failed to write to output: 
%s",
+                                 strerror(errno));
+                       ret = -errno;
+                       break;
+               }
+
+               if (written != ret) {
+                       erofs_err("OCI I/O sendfile: partial write: %zd != 
%zd", written, ret);
+                       ret = written;
+               }
+
+               total_written += ret;
+               count -= ret;
+               if (pos)
+                       *pos += ret;
+               else
+                       oci_iostream->offset += ret;
+       }
+
+       free(buf);
+       return count;
+}
+
+static struct erofs_vfops ocierofs_io_vfops = {
+       .pread = ocierofs_io_pread,
+       .read = ocierofs_io_read,
+       .lseek = ocierofs_io_lseek,
+       .sendfile = ocierofs_io_sendfile,
+};
+
+int ocierofs_iostream_open(struct ocierofs_iostream *oci_iostream, struct 
ocierofs_ctx *oci_ctx)
+{
+
+       memset(oci_iostream, 0, sizeof(*oci_iostream));
+       oci_iostream->ctx = oci_ctx;
+       oci_iostream->vf.ops = &ocierofs_io_vfops;
+       oci_iostream->vf.fd = -1;
+       *(struct ocierofs_iostream **)oci_iostream->vf.payload = oci_iostream;
+
+       return 0;
+}
+
+void ocierofs_iostream_close(struct ocierofs_iostream *oci_iostream)
+{
+       close(oci_iostream->vf.fd);
+}
+
+int ocierofs_is_erofs_native_image(struct ocierofs_ctx *ctx)
+{
+       if (ctx->img.layer_count > 0 && ctx->img.layers[0] &&
+           ctx->img.layers[0]->media_type) {
+               if (strcmp(ctx->img.layers[0]->media_type, EROFS_MEDIATYPE) != 
0)
+                       return -ENOENT;
+               return 0;
+       }
+       return -ENOENT;
+}
diff --git a/mount/Makefile.am b/mount/Makefile.am
index d93f3f4..7b971f5 100644
--- a/mount/Makefile.am
+++ b/mount/Makefile.am
@@ -9,5 +9,4 @@ mount_erofs_SOURCES = main.c
 mount_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 mount_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
        ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
-       ${libzstd_LIBS} ${libqpl_LIBS} ${libxxhash_LIBS} ${libnl3_LIBS}
-endif
+       ${libzstd_LIBS} ${libqpl_LIBS} ${libxxhash_LIBS} ${libnl3_LIBS} 
${openssl_LIBS}
diff --git a/mount/main.c b/mount/main.c
index 139b532..77c16f5 100644
--- a/mount/main.c
+++ b/mount/main.c
@@ -15,6 +15,7 @@
 #include "erofs/err.h"
 #include "erofs/io.h"
 #include "../lib/liberofs_nbd.h"
+#include "../lib/liberofs_oci.h"
 #ifdef HAVE_LINUX_LOOP_H
 #include <linux/loop.h>
 #else
@@ -34,6 +35,10 @@ struct loop_info {
 #include <sys/sysmacros.h>
 #endif
 
+#ifdef OCIEROFS_ENABLED
+static struct ocierofs_config ocicfg;
+#endif
+
 enum erofs_backend_drv {
        EROFSAUTO,
        EROFSLOCAL,
@@ -56,6 +61,11 @@ static struct erofsmount_cfg {
        long flags;
        enum erofs_backend_drv backend;
        enum erofsmount_mode mountmode;
+       bool umount;
+#ifdef OCIEROFS_ENABLED
+       char *oci_options;
+       bool use_oci;
+#endif
 } mountcfg = {
        .full_options = "ro",
        .flags = MS_RDONLY,             /* default mountflags */
@@ -128,6 +138,9 @@ static int erofsmount_parse_options(int argc, char **argv)
        static const struct option long_options[] = {
                {"help", no_argument, 0, 'h'},
                {"reattach", no_argument, 0, 512},
+#ifdef OCIEROFS_ENABLED
+               {"oci", optional_argument, 0, 513},
+#endif
                {0, 0, 0, 0},
        };
        char *dot;
@@ -165,6 +178,12 @@ static int erofsmount_parse_options(int argc, char **argv)
                case 512:
                        mountcfg.mountmode = EROFSMOUNT_MODE_REATTACH;
                        break;
+#ifdef OCIEROFS_ENABLED
+               case 513:
+                       mountcfg.oci_options = optarg;
+                       mountcfg.use_oci = true;
+                       break;
+#endif
                default:
                        return -EINVAL;
                }
@@ -198,6 +217,74 @@ static int erofsmount_parse_options(int argc, char **argv)
        return 0;
 }
 
+static int mount_parse_oci_options(struct ocierofs_config *oci_cfg, char 
*options_str)
+{
+       char *opt, *q, *p;
+
+       if (!options_str)
+               return 0;
+
+       opt = options_str;
+       while (opt) {
+               q = strchr(opt, ',');
+               if (q)
+                       *q = '\0';
+
+               p = strstr(opt, "platform=");
+               if (p) {
+                       p += strlen("platform=");
+                       free(oci_cfg->platform);
+                       oci_cfg->platform = strdup(p);
+                       if (!oci_cfg->platform)
+                               return -ENOMEM;
+                       opt = q ? q + 1 : NULL;
+                       continue;
+               }
+
+               p = strstr(opt, "layer=");
+               if (p) {
+                       p += strlen("layer=");
+                       oci_cfg->layer_index = atoi(p);
+                       if (oci_cfg->layer_index < 0) {
+                               erofs_err("invalid layer index %d",
+                                 oci_cfg->layer_index);
+                               return -EINVAL;
+                       }
+                       opt = q ? q + 1 : NULL;
+                       continue;
+               }
+
+               p = strstr(opt, "username=");
+               if (p) {
+                       p += strlen("username=");
+                       free(oci_cfg->username);
+                       oci_cfg->username = strdup(p);
+                       if (!oci_cfg->username)
+                               return -ENOMEM;
+                       opt = q ? q + 1 : NULL;
+                       continue;
+               }
+
+               p = strstr(opt, "password=");
+               if (p) {
+                       p += strlen("password=");
+                       free(oci_cfg->password);
+                       oci_cfg->password = strdup(p);
+                       if (!oci_cfg->password)
+                               return -ENOMEM;
+                       opt = q ? q + 1 : NULL;
+                       continue;
+               }
+
+               erofs_err("mkfs: invalid --oci value %s", opt);
+               return -EINVAL;
+
+               opt = q ? q + 1 : NULL;
+       }
+
+       return 0;
+}
+
 static int erofsmount_fuse(const char *source, const char *mountpoint,
                           const char *fstype, const char *options)
 {
@@ -750,6 +837,122 @@ err_out:
        return err < 0 ? err : 0;
 }
 
+/**
+ * erofsmount_startnbd_oci - Start NBD server for OCI image
+ * @nbdfd: NBD device file descriptor
+ * @oci_ctx: OCI client structure (pre-authenticated)
+ * @auth_header: pre-authenticated auth header
+ *
+ * Start an NBD server that serves data from an OCI image layer.
+ * This function reuses the existing erofsmount_nbd_loopfn logic
+ * but uses erofsoci_iostream as the virtual device instead of a local file.
+ * The OCI client should be pre-authenticated to avoid concurrent auth issues.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int erofsmount_startnbd_oci(int nbdfd, struct ocierofs_ctx *oci_ctx)
+{
+       struct erofsmount_nbd_ctx ctx = {};
+       struct ocierofs_iostream *oci_iostream = NULL;
+       uintptr_t retcode;
+       pthread_t th;
+       int err, err2;
+       int blkbits = 12;
+       int index = oci_ctx->img.layer_index;
+       u64 blocks;
+
+       blocks = (oci_ctx->img.layers[index]->size + (1ULL << blkbits) - 1) >> 
blkbits;
+
+       oci_iostream = malloc(sizeof(struct ocierofs_iostream));
+       if (!oci_iostream)
+               return -ENOMEM;
+
+       err = ocierofs_iostream_open(oci_iostream, oci_ctx);
+       if (err) {
+               free(oci_iostream);
+               return err;
+       }
+
+       ctx.vd = oci_iostream->vf;
+
+       err = erofs_nbd_connect(nbdfd, blkbits, blocks);
+       if (err < 0) {
+               ocierofs_iostream_close(oci_iostream);
+               free(oci_iostream);
+               return err;
+       }
+       ctx.sk.fd = err;
+
+       err = -pthread_create(&th, NULL, erofsmount_nbd_loopfn, &ctx);
+       if (err) {
+               ocierofs_iostream_close(oci_iostream);
+               free(oci_iostream);
+               close(ctx.sk.fd);
+               return err;
+       }
+
+       err = erofs_nbd_do_it(nbdfd);
+       err2 = -pthread_join(th, (void **)&retcode);
+       if (!err2 && retcode) {
+               erofs_err("NBD worker failed with %s",
+                         erofs_strerror(retcode));
+               err2 = retcode;
+       }
+
+       ocierofs_iostream_close(oci_iostream);
+       free(oci_iostream);
+
+       return err ?: err2;
+}
+
+static int erofsmount_nbd_oci(struct ocierofs_ctx *ctx, const char *mountpoint,
+                             const char *fstype, int flags, const char 
*options)
+{
+       char nbdpath[32];
+       int num, nbdfd;
+       pid_t pid;
+       long err;
+
+       if (strcmp(fstype, "erofs")) {
+               fprintf(stderr, "unsupported filesystem type `%s`\n", fstype);
+               return -ENODEV;
+       }
+
+       flags |= O_RDONLY;
+
+       num = erofs_nbd_devscan();
+       if (num < 0)
+               return num;
+
+       (void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", num);
+       nbdfd = open(nbdpath, O_RDWR);
+       if (nbdfd < 0)
+               return -errno;
+
+       if ((pid = fork()) == 0) {
+               return erofsmount_startnbd_oci(nbdfd, ctx) ?
+                       EXIT_FAILURE : EXIT_SUCCESS;
+       }
+       close(nbdfd);
+
+       while (1) {
+               err = erofs_nbd_in_service(num);
+               if (err == -ENOENT || err == -ENOTCONN) {
+                       usleep(50000);
+                       continue;
+               }
+               if (err >= 0)
+                       err = (err != pid ? -EBUSY : 0);
+               break;
+       }
+       if (!err) {
+               err = mount(nbdpath, mountpoint, fstype, flags, options);
+               if (err < 0)
+                       err = -errno;
+       }
+       return err;
+}
+
 int main(int argc, char *argv[])
 {
        int err;
@@ -785,9 +988,42 @@ int main(int argc, char *argv[])
        }
 
        if (mountcfg.backend == EROFSNBD) {
+#ifdef OCIEROFS_ENABLED
+               if (mountcfg.use_oci) {
+                       struct ocierofs_ctx ctx = {};
+
+                       ocicfg.image_ref = mountcfg.device;
+                       err = mount_parse_oci_options(&ocicfg, 
mountcfg.oci_options);
+                       if (err)
+                               goto exit;
+                       err = ocierofs_init(&ctx, &ocicfg);
+                       if (err) {
+                               ocierofs_ctx_cleanup(&ctx);
+                               goto exit;
+                       }
+
+                       err = ocierofs_is_erofs_native_image(&ctx);
+                       if (err) {
+                               ocierofs_ctx_cleanup(&ctx);
+                               goto exit;
+                       }
+
+                       err = erofsmount_nbd_oci(&ctx, mountcfg.target,
+                                                mountcfg.fstype, 
mountcfg.flags, mountcfg.options);
+                       if (err) {
+                               ocierofs_ctx_cleanup(&ctx);
+                               goto exit;
+                       }
+               } else {
+                       err = erofsmount_nbd(mountcfg.device, mountcfg.target,
+                                            mountcfg.fstype, mountcfg.flags,
+                                            mountcfg.options);
+               }
+#else
                err = erofsmount_nbd(mountcfg.device, mountcfg.target,
                                     mountcfg.fstype, mountcfg.flags,
                                     mountcfg.options);
+#endif
                goto exit;
        }
 
-- 
2.51.0


Reply via email to