This patch adds support for handling OCI registry operations in EROFS. The following functionalities are included:
1. `oci_registry_read`: Reads data from the OCI registry. 2. `oci_registry_pread`: Reads data from a specified offset. 3. `oci_registry_lseek`: Adjusts the file offset. 4. `open_oci_registry`: handle the opening of the OCI registry. Signed-off-by: Changzhi Xie <s...@qq.com> --- lib/oci_registry.c | 595 +++++++++++++++++++++++++++++++++++++++++++++ lib/oci_registry.h | 17 ++ 2 files changed, 612 insertions(+) create mode 100644 lib/oci_registry.c create mode 100644 lib/oci_registry.h diff --git a/lib/oci_registry.c b/lib/oci_registry.c new file mode 100644 index 0000000..fd4f6d5 --- /dev/null +++ b/lib/oci_registry.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 + +#include "oci_registry.h" + +#define erofs_token_mode 1 +#define erofs_image_index_mode 2 +#define erofs_manifest_mode 3 +#define erofs_blob_mode 4 + +#define docker_auth_url \ +"https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/%s:pull" + +struct erofs_oci_registry_memory { + char *memory; + size_t size; +}; + +static CURLM *erofs_oci_registry_multi_handle(void) +{ + static CURLM *multi_handle; + + if (!multi_handle) + multi_handle = curl_multi_init(); + + return multi_handle; +} + +static size_t erofs_oci_registry_callback(void *contents, size_t size, + size_t nmemb, void *userp) +{ + size_t real_size = size * nmemb; + struct erofs_oci_registry_memory *mem = + (struct erofs_oci_registry_memory *)userp; + char *ptr = realloc(mem->memory, mem->size + real_size + 1); + + if (!ptr) { + fprintf(stderr, "realloc failed\n"); + return 0; + } + + mem->memory = ptr; + memcpy(&(mem->memory[mem->size]), contents, real_size); + mem->size += real_size; + mem->memory[mem->size] = 0; + return real_size; +} + +ssize_t erofs_oci_registry_read(struct erofs_vfile *vf, void *buf, size_t len) +{ + struct erofs_oci_registry_memory *memory_struct = + (struct erofs_oci_registry_memory *)(vf->payload); + + if (vf->offset >= memory_struct->size) + return 0; + + if (len > memory_struct->size - vf->offset) + len = memory_struct->size - vf->offset; + + memcpy(buf, memory_struct->memory + vf->offset, len); + vf->offset += len; + + return len; +} + +ssize_t erofs_oci_registry_pread(struct erofs_vfile *vf, void *buf, + u64 offset, size_t len) +{ + struct erofs_oci_registry_memory *memory_struct = + (struct erofs_oci_registry_memory *)(vf->payload); + + if (offset >= memory_struct->size) + return 0; + + if (offset + len > memory_struct->size) + len = memory_struct->size - offset; + + memcpy(buf, memory_struct->memory + offset, len); + + return len; +} + +off_t erofs_oci_registry_lseek(struct erofs_vfile *vf, u64 offset, int whence) +{ + struct erofs_oci_registry_memory *memory_struct = + (struct erofs_oci_registry_memory *)(vf->payload); + u64 new_offset = 0; + + switch (whence) { + case SEEK_SET: + new_offset = offset; + break; + case SEEK_CUR: + new_offset = vf->offset + offset; + break; + case SEEK_END: + new_offset = memory_struct->size + offset; + break; + default: + return -1; + } + + if (new_offset > memory_struct->size) + return -1; + + vf->offset = new_offset; + + return new_offset; +} + +static char *erofs_get_authorization_header(struct erofs_oci_registry_memory *data) +{ + json_object *parsed_json, *token_json; + const char *token; + char *auth_header; + + if (!data->memory) { + fprintf(stderr, "No data received\n"); + return NULL; + } + parsed_json = json_tokener_parse(data->memory); + + if (!parsed_json) { + fprintf(stderr, "Failed to parse JSON\n"); + return NULL; + } + + if (!json_object_object_get_ex(parsed_json, "token", &token_json)) { + fprintf(stderr, "Token not found in JSON\n"); + json_object_put(parsed_json); + return NULL; + } + token = json_object_get_string(token_json); + auth_header = malloc(strlen("Authorization: Bearer ") + strlen(token) + 1); + + if (!auth_header) { + fprintf(stderr, "Failed to allocate memory for authorization header\n"); + json_object_put(parsed_json); + return NULL; + } + + strscpy(auth_header, "Authorization: Bearer ", sizeof(auth_header)); + strcat(auth_header, token); + + json_object_put(parsed_json); + free(data->memory); + + data->memory = NULL; + data->size = 0; + + return auth_header; +} + +static char *erofs_get_manifest_digest(struct erofs_oci_registry_memory *data, + const char *arch, const char *os, char *media_type) +{ + json_object *parsed_json, *manifests_array; + int len; + + if (!data->memory) { + fprintf(stderr, "No data received\n"); + return NULL; + } + + parsed_json = json_tokener_parse(data->memory); + + if (!parsed_json) { + fprintf(stderr, "Failed to parse JSON\n"); + return NULL; + } + + if (!json_object_object_get_ex(parsed_json, + "manifests", &manifests_array)) { + fprintf(stderr, "Cannot find manifests in JSON\n"); + json_object_put(parsed_json); + return NULL; + } + + len = json_object_array_length(manifests_array); + + for (int i = 0; i < len; i++) { + json_object *manifest = json_object_array_get_idx(manifests_array, i); + json_object *platform_json; + + if (json_object_object_get_ex(manifest, "platform", &platform_json)) { + json_object *arch_json, *os_json, *digest_json, *media_type_json; + + if (json_object_object_get_ex(platform_json, + "architecture", &arch_json) && + json_object_object_get_ex(platform_json, "os", &os_json) && + json_object_object_get_ex(manifest, "digest", &digest_json)) { + + const char *manifest_arch = json_object_get_string(arch_json); + const char *manifest_os = json_object_get_string(os_json); + + if (strcmp(manifest_arch, arch) == 0 && + strcmp(manifest_os, os) == 0) { + char *digest = strdup(json_object_get_string(digest_json)); + + if (json_object_object_get_ex(manifest, + "mediaType", &media_type_json)) { + const char *manifest_media_type = + json_object_get_string(media_type_json); + + sprintf(media_type, "Accept: %s", manifest_media_type); + } + + json_object_put(parsed_json); + free(data->memory); + + data->memory = NULL; + data->size = 0; + + return digest; + } + } + } + } + + json_object_put(parsed_json); + free(data->memory); + + data->memory = NULL; + data->size = 0; + + fprintf(stderr, "No matching arch and os found\n"); + return NULL; +} + +static char *erofs_get_layer_digest(struct erofs_oci_registry_memory *data, + char *media_type, int count) +{ + json_object *parsed_json, *layers_array; + json_object *layer, *digest_json, *media_type_json; + int len; + char *digest = NULL; + + parsed_json = json_tokener_parse(data->memory); + + if (!parsed_json) { + fprintf(stderr, "Failed to parse JSON\n"); + return NULL; + } + + if (!json_object_object_get_ex(parsed_json, "layers", &layers_array) || + json_object_get_type(layers_array) != json_type_array) { + fprintf(stderr, "Layers key not found or is not an array in JSON\n"); + json_object_put(parsed_json); + return NULL; + } + + len = json_object_array_length(layers_array); + + if (count < 0 || count >= len) { + fprintf(stderr, "Count %d is out of bounds (0-%d)\n", count, len - 1); + json_object_put(parsed_json); + return NULL; + } + + layer = json_object_array_get_idx(layers_array, count); + + if (!json_object_object_get_ex(layer, "digest", &digest_json)) + fprintf(stderr, "Digest not found in layer #%d\n", count); + else { + digest = strdup(json_object_get_string(digest_json)); + if (json_object_object_get_ex(layer, "mediaType", &media_type_json)) { + const char *manifest_media_type = + json_object_get_string(media_type_json); + + sprintf(media_type, "Accept: %s", manifest_media_type); + } + } + + json_object_put(parsed_json); + return digest; +} + +static void erofs_curl_io(CURLM *multi_handle, int *still_running) +{ + CURLMcode mc; + + do { + mc = curl_multi_perform(multi_handle, still_running); + + if (mc != CURLM_OK) { + fprintf(stderr, "curl_multi_perform() failed: %s\n", + curl_multi_strerror(mc)); + break; + } + + if (*still_running) { + int numfds; + + mc = curl_multi_poll(multi_handle, NULL, 0, 1000, &numfds); + if (mc != CURLM_OK) { + fprintf(stderr, "curl_multi_poll failed: %s\n", + curl_multi_strerror(mc)); + break; + } + } + + } while (*still_running > 0); +} + +static struct erofs_oci_registry_memory *erofs_curl_setopt( + CURLM *multi_handle, CURL *curl, const char *auth_header, + const char *media_type, const char *url, int mode) +{ + struct erofs_oci_registry_memory *data = + malloc(sizeof(struct erofs_oci_registry_memory)); + struct curl_slist *headers = NULL; + + if (!data) { + fprintf(stderr, + "Failed to allocate memory for erofs_oci_registry_memory\n"); + return NULL; + } + data->memory = NULL; + data->size = 0; + + switch (mode) { + case erofs_token_mode: + curl_easy_setopt(curl, + CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data); + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_multi_add_handle(multi_handle, curl); + break; + case erofs_image_index_mode: + curl_easy_setopt(curl, + CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data); + headers = curl_slist_append(headers, auth_header); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_multi_add_handle(multi_handle, curl); + break; + case erofs_manifest_mode: + curl_easy_setopt(curl, + CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data); + headers = curl_slist_append(headers, auth_header); + headers = curl_slist_append(headers, media_type); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_multi_add_handle(multi_handle, curl); + break; + case erofs_blob_mode: + curl_easy_setopt(curl, + CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data); + headers = curl_slist_append(headers, auth_header); + headers = curl_slist_append(headers, media_type); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + curl_multi_add_handle(multi_handle, curl); + break; + default: + break; + } + + return data; +} + +static void erofs_extract_urlfront_repository(const char *url, char *repo_end, + char *repository, char *url_front) +{ + const char *repo_start = strstr(url, "/library/"); + + if (!repo_start) + return; + repo_start += strlen("/library/"); + repo_end = strchr(repo_start, '/'); + + if (!repo_end) + return; + strscpy(repository, repo_start, repo_end - repo_start + 1); + strscpy(url_front, url, repo_start - url + 1); +} + +static void erofs_token_header(const char *repository, + char **token_header, int still_running) +{ + char url_token[512]; + + snprintf(url_token, sizeof(url_token), docker_auth_url, repository); + CURL *curl_token = curl_easy_init(); + struct erofs_oci_registry_memory *data_token = + erofs_curl_setopt(erofs_oci_registry_multi_handle(), + curl_token, NULL, NULL, url_token, erofs_token_mode); + + erofs_curl_io(erofs_oci_registry_multi_handle(), &still_running); + *token_header = erofs_get_authorization_header(data_token); + + curl_multi_remove_handle(erofs_oci_registry_multi_handle(), curl_token); + curl_easy_cleanup(curl_token); + + if (data_token) + free(data_token); +} + +static void erofs_blob_info(char *blob_start, const char *url_front, + const char *repository, char *digest_value, + char *media_type_value, char *url_blob) +{ + const char *digest_start = blob_start + strlen("/blobs/"); + const char *digest_end = strchr(digest_start, '/'); + + if (!digest_end) + digest_end = digest_start + strlen(digest_start); + strscpy(digest_value, digest_start, digest_end - digest_start + 1); + + const char *media_type_start = strstr(digest_end, "Accept: "); + + if (media_type_start) + strscpy(media_type_value, media_type_start, sizeof(media_type_value)); + else + strscpy(media_type_value, "", sizeof(media_type_value)); + + snprintf(url_blob, 512, "%s%s/blobs/%s", + url_front, repository, digest_value); +} + +static void erofs_parse_params(const char *repo_end, + char *arch, char *os, int *digest) +{ + const char *params = repo_end + 1; + + while (params && *params != '\0') { + if (strncmp(params, "arch-", 5) == 0) { + params += 5; + const char *param_end = strchr(params, '/'); + + if (param_end) { + strscpy(arch, params, param_end - params + 1); + params = param_end + 1; + } else { + strscpy(arch, params, sizeof(arch)); + break; + } + } else if (strncmp(params, "os-", 3) == 0) { + params += 3; + const char *param_end = strchr(params, '/'); + + if (param_end) { + strscpy(os, params, param_end - params + 1); + params = param_end + 1; + } else { + strscpy(os, params, sizeof(os)); + break; + } + } else if (strncmp(params, "digest-", 7) == 0) { + params += 7; + *digest = atoi(params) - 1; + break; + } + + params = strchr(params, '/'); + if (params) + params++; + } +} + +static void erofs_manifest(const char *url_front, const char *repository, + const char *token_header, const char *arch, + const char *os, char *media_type_blob, int *digest, + int *still_running, char *media_type, char *url_blob) +{ + char url_image_index[512]; + + snprintf(url_image_index, sizeof(url_image_index), + "%s%s/manifests/latest", url_front, repository); + + CURL *curl_image_index = curl_easy_init(); + struct erofs_oci_registry_memory *data_image_index = + erofs_curl_setopt(erofs_oci_registry_multi_handle(), + curl_image_index, token_header, + NULL, url_image_index, erofs_image_index_mode); + erofs_curl_io(erofs_oci_registry_multi_handle(), still_running); + char *digest_image_index = erofs_get_manifest_digest(data_image_index, + arch, os, media_type); + if (data_image_index) + free(data_image_index); + + curl_multi_remove_handle(erofs_oci_registry_multi_handle(), + curl_image_index); + curl_easy_cleanup(curl_image_index); + + char url_manifest[512]; + + snprintf(url_manifest, sizeof(url_manifest), "%s%s/manifests/%s", + url_front, repository, digest_image_index); + + if (digest_image_index) + free(digest_image_index); + + CURL *curl_manifest = curl_easy_init(); + struct erofs_oci_registry_memory *data_manifest = + erofs_curl_setopt(erofs_oci_registry_multi_handle(), + curl_manifest, token_header, media_type, + url_manifest, erofs_manifest_mode); + erofs_curl_io(erofs_oci_registry_multi_handle(), still_running); + char *digest_manifest = erofs_get_layer_digest(data_manifest, + media_type_blob, *digest); + + if (data_manifest) + free(data_manifest); + + curl_multi_remove_handle(erofs_oci_registry_multi_handle(), curl_manifest); + curl_easy_cleanup(curl_manifest); + snprintf(url_blob, 512, "%s%s/blobs/%s", + url_front, repository, digest_manifest); +} + +struct erofs_vfile *open_oci_registry(const char *url) +{ + char *url_front = (char *)malloc(256 * sizeof(char)); + char *repository = (char *)malloc(256 * sizeof(char)); + char *arch = (char *)malloc(256 * sizeof(char)); + char *os = (char *)malloc(256 * sizeof(char)); + char *media_type_value = (char *)malloc(512 * sizeof(char)); + char *media_type_blob = (char *)malloc(512 * sizeof(char)); + char *url_blob = (char *)malloc(512 * sizeof(char)); + char *repo_end = (char *)malloc(256 * sizeof(char)); + char **token_header = NULL; + char *media_type = (char *)malloc(512 * sizeof(char)); + char *blob_start = NULL; + CURL *curl_blob = NULL; + struct erofs_oci_registry_memory *data_blob = NULL; + struct erofs_vfile *vf = + (struct erofs_vfile *)malloc(sizeof(struct erofs_vfile)); + + int digest = 0; + int still_running = 0; + int mode = 0; + + erofs_extract_urlfront_repository(url, repo_end, repository, url_front); + erofs_token_header(repository, &token_header, still_running); + + blob_start = strstr(repo_end, "/blobs/"); + + if (blob_start) { + char *digest_value = (char *)malloc(128 * sizeof(char)); + + erofs_blob_info(blob_start, url_front, repository, + digest_value, media_type_value, url_blob); + free(blob_start); + + mode = 1; + + goto pull_blob_mode; + } else { + strscpy(arch, "amd64", sizeof(arch)); + strscpy(os, "linux", sizeof(os)); + digest = 0; + + erofs_parse_params(repo_end, arch, os, &digest); + erofs_manifest(url_front, repository, token_header, arch, os, + media_type_blob, &digest, + &still_running, media_type, url_blob); + } + + free(url_front); + free(arch); + free(os); + free(media_type); + +pull_blob_mode: + + curl_blob = curl_easy_init(); + + if (mode == 1) { + data_blob = erofs_curl_setopt(erofs_oci_registry_multi_handle(), + curl_blob, token_header, + media_type_value, + url_blob, erofs_blob_mode); + } else { + data_blob = erofs_curl_setopt(erofs_oci_registry_multi_handle(), + curl_blob, token_header, + media_type_blob, + url_blob, erofs_blob_mode); + } + + erofs_curl_io(erofs_oci_registry_multi_handle(), &still_running); + curl_multi_remove_handle(erofs_oci_registry_multi_handle(), curl_blob); + curl_easy_cleanup(curl_blob); + + vf->ops = malloc(sizeof(struct erofs_vfops)); + vf->ops->read = erofs_oci_registry_read; + vf->ops->pread = erofs_oci_registry_pread; + vf->ops->lseek = erofs_oci_registry_lseek; + *((struct erofs_oci_registry_memory **)(vf->payload)) = data_blob; + + return vf; +} diff --git a/lib/oci_registry.h b/lib/oci_registry.h new file mode 100644 index 0000000..ba6a08b --- /dev/null +++ b/lib/oci_registry.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */ + + +#include <stdio.h> +#include <curl/curl.h> +#include <json-c/json.h> +#include <stdlib.h> +#include <string.h> +#include "erofs/io.h" + +struct erofs_vfile *erofs_open_oci_registry(const char *url); +ssize_t erofs_oci_registry_read(struct erofs_vfile *vf, + void *buf, size_t len); +ssize_t erofs_oci_registry_pread(struct erofs_vfile *vf, void *buf, + u64 offset, size_t len); +off_t erofs_oci_registry_lseek(struct erofs_vfile *vf, + u64 offset, int whence); -- 2.44.0.windows.1