This crate can parse an OCI image tarball and extract its rootfs. Layers are applied in sequence, but an overlay filesystem is currently not used.
Signed-off-by: Filip Schauer <f.scha...@proxmox.com> --- Changed since v2: * remove reachable unwraps & refactor code * increase hasher buffer size from 4096 to 32768 (matching internal sha2::Digest buffering) * preserve permissions and xattrs during rootfs extraction * handle whiteouts & opaque whiteouts Cargo.toml | 1 + proxmox-oci/Cargo.toml | 22 +++ proxmox-oci/debian/changelog | 5 + proxmox-oci/debian/control | 45 +++++ proxmox-oci/debian/debcargo.toml | 7 + proxmox-oci/src/lib.rs | 283 +++++++++++++++++++++++++++++++ proxmox-oci/src/oci_tar_image.rs | 145 ++++++++++++++++ 7 files changed, 508 insertions(+) create mode 100644 proxmox-oci/Cargo.toml create mode 100644 proxmox-oci/debian/changelog create mode 100644 proxmox-oci/debian/control create mode 100644 proxmox-oci/debian/debcargo.toml create mode 100644 proxmox-oci/src/lib.rs create mode 100644 proxmox-oci/src/oci_tar_image.rs diff --git a/Cargo.toml b/Cargo.toml index 020e7497..4606fc19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "proxmox-metrics", "proxmox-network-api", "proxmox-notify", + "proxmox-oci", "proxmox-openid", "proxmox-product-config", "proxmox-resource-scheduling", diff --git a/proxmox-oci/Cargo.toml b/proxmox-oci/Cargo.toml new file mode 100644 index 00000000..4daff6ab --- /dev/null +++ b/proxmox-oci/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "proxmox-oci" +description = "OCI image parsing and extraction" +version = "0.1.0" + +authors.workspace = true +edition.workspace = true +exclude.workspace = true +homepage.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +flate2.workspace = true +oci-spec = "0.8.1" +sha2 = "0.10" +tar.workspace = true +thiserror = "1" +zstd.workspace = true + +proxmox-io.workspace = true diff --git a/proxmox-oci/debian/changelog b/proxmox-oci/debian/changelog new file mode 100644 index 00000000..754d06c1 --- /dev/null +++ b/proxmox-oci/debian/changelog @@ -0,0 +1,5 @@ +rust-proxmox-oci (0.1.0-1) bookworm; urgency=medium + + * Initial release. + + -- Proxmox Support Team <supp...@proxmox.com> Mon, 28 Apr 2025 12:34:56 +0200 diff --git a/proxmox-oci/debian/control b/proxmox-oci/debian/control new file mode 100644 index 00000000..f33331c5 --- /dev/null +++ b/proxmox-oci/debian/control @@ -0,0 +1,45 @@ +Source: rust-proxmox-oci +Section: rust +Priority: optional +Build-Depends: debhelper-compat (= 13), + dh-sequence-cargo +Build-Depends-Arch: cargo:native <!nocheck>, + rustc:native (>= 1.82) <!nocheck>, + libstd-rust-dev <!nocheck>, + librust-flate2-1+default-dev <!nocheck>, + librust-oci-spec-0.8+default-dev (>= 0.8.1-~~) <!nocheck>, + librust-proxmox-io-1+default-dev (>= 1.2.0-~~) <!nocheck>, + librust-sha2-0.10+default-dev <!nocheck>, + librust-tar-0.4+default-dev <!nocheck>, + librust-thiserror-1+default-dev <!nocheck>, + librust-zstd-0.13+default-dev <!nocheck> +Maintainer: Proxmox Support Team <supp...@proxmox.com> +Standards-Version: 4.7.0 +Vcs-Git: git://git.proxmox.com/git/proxmox.git +Vcs-Browser: https://git.proxmox.com/?p=proxmox.git +Homepage: https://proxmox.com +X-Cargo-Crate: proxmox-oci +Rules-Requires-Root: no + +Package: librust-proxmox-oci-dev +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + librust-flate2-1+default-dev, + librust-oci-spec-0.8+default-dev (>= 0.8.1-~~), + librust-proxmox-io-1+default-dev (>= 1.2.0-~~), + librust-sha2-0.10+default-dev, + librust-tar-0.4+default-dev, + librust-thiserror-1+default-dev, + librust-zstd-0.13+default-dev +Provides: + librust-proxmox-oci+default-dev (= ${binary:Version}), + librust-proxmox-oci-0-dev (= ${binary:Version}), + librust-proxmox-oci-0+default-dev (= ${binary:Version}), + librust-proxmox-oci-0.1-dev (= ${binary:Version}), + librust-proxmox-oci-0.1+default-dev (= ${binary:Version}), + librust-proxmox-oci-0.1.0-dev (= ${binary:Version}), + librust-proxmox-oci-0.1.0+default-dev (= ${binary:Version}) +Description: OCI image parsing and extraction - Rust source code + Source code for Debianized Rust crate "proxmox-oci" diff --git a/proxmox-oci/debian/debcargo.toml b/proxmox-oci/debian/debcargo.toml new file mode 100644 index 00000000..b7864cdb --- /dev/null +++ b/proxmox-oci/debian/debcargo.toml @@ -0,0 +1,7 @@ +overlay = "." +crate_src_path = ".." +maintainer = "Proxmox Support Team <supp...@proxmox.com>" + +[source] +vcs_git = "git://git.proxmox.com/git/proxmox.git" +vcs_browser = "https://git.proxmox.com/?p=proxmox.git" diff --git a/proxmox-oci/src/lib.rs b/proxmox-oci/src/lib.rs new file mode 100644 index 00000000..b54d06e1 --- /dev/null +++ b/proxmox-oci/src/lib.rs @@ -0,0 +1,283 @@ +use std::collections::HashMap; +use std::fs::{read_dir, remove_dir_all, remove_file, File}; +use std::io::{Read, Seek}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use flate2::read::GzDecoder; +pub use oci_spec::image::Config; +use oci_spec::image::{Arch, ImageConfiguration, ImageManifest, MediaType}; +use oci_spec::OciSpecError; +use sha2::digest::generic_array::GenericArray; +use sha2::{Digest, Sha256}; +use tar::{Archive, EntryType}; +use thiserror::Error; + +mod oci_tar_image; +use oci_tar_image::OciTarImage; + +fn compute_digest<R: Read, H: Digest>( + mut reader: R, + mut hasher: H, +) -> std::io::Result<GenericArray<u8, H::OutputSize>> { + let mut buf = proxmox_io::boxed::zeroed(32768); + + loop { + let bytes_read = reader.read(&mut buf)?; + if bytes_read == 0 { + break Ok(hasher.finalize()); + } + + hasher.update(&buf[..bytes_read]); + } +} + +fn compute_sha256<R: Read>(reader: R) -> std::io::Result<oci_spec::image::Sha256Digest> { + let digest = compute_digest(reader, Sha256::new())?; + Ok(oci_spec::image::Sha256Digest::from_str(&format!("{digest:x}")).unwrap()) +} + +/// Build a mapping from uncompressed layer digests (as found in the image config's `rootfs.diff_ids`) +/// to their corresponding compressed-layer digests (i.e. the filenames under `blobs/<algorithm>/<digest>`) +fn build_layer_map<R: Read + Seek>( + mut oci_tar_image: OciTarImage<R>, + image_manifest: &ImageManifest, +) -> Result< + ( + OciTarImage<R>, + HashMap<oci_spec::image::Digest, oci_spec::image::Descriptor>, + ), + ExtractError, +> { + let mut layer_mapping = HashMap::new(); + + for layer in image_manifest.layers() { + let digest = match layer.media_type() { + MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => layer.digest().clone(), + MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => { + let mut compressed_blob = oci_tar_image + .open_blob(layer.digest()) + .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?; + let decoder = GzDecoder::new(&mut compressed_blob); + let hash = compute_sha256(decoder)?.into(); + oci_tar_image = compressed_blob.into_oci_tar_image(); + hash + } + MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => { + let mut compressed_blob = oci_tar_image + .open_blob(layer.digest()) + .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?; + let decoder = zstd::Decoder::new(&mut compressed_blob)?; + let hash = compute_sha256(decoder)?.into(); + oci_tar_image = compressed_blob.into_oci_tar_image(); + hash + } + // Skip any other non-ImageLayer related media types. + // Match explicitly to avoid missing new image layer types when oci-spec updates. + MediaType::Descriptor + | MediaType::LayoutHeader + | MediaType::ImageManifest + | MediaType::ImageIndex + | MediaType::ImageConfig + | MediaType::ArtifactManifest + | MediaType::EmptyJSON + | MediaType::Other(_) => continue, + }; + + layer_mapping.insert(digest, layer.clone()); + } + + Ok((oci_tar_image, layer_mapping)) +} + +#[derive(Debug, Error)] +pub enum ProxmoxOciError { + #[error("Error while parsing OCI image: {0}")] + ParseError(#[from] ParseError), + #[error("Error while extracting OCI image: {0}")] + ExtractError(#[from] ExtractError), +} + +pub fn parse_and_extract_image<P: AsRef<Path>>( + oci_tar_path: P, + rootfs_path: P, +) -> Result<Option<Config>, ProxmoxOciError> { + let (oci_tar_image, image_manifest, image_config) = parse_image(oci_tar_path)?; + + extract_image_rootfs(oci_tar_image, &image_manifest, &image_config, rootfs_path)?; + + Ok(image_config.config().clone()) +} + +#[derive(Debug, Error)] +pub enum ParseError { + #[error("OCI spec error: {0}")] + OciSpec(#[from] OciSpecError), + #[error("Wrong media type")] + WrongMediaType, + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("Unsupported CPU architecture")] + UnsupportedArchitecture, + #[error("Missing image config")] + MissingImageConfig, +} + +fn parse_image<P: AsRef<Path>>( + oci_tar_path: P, +) -> Result<(OciTarImage<File>, ImageManifest, ImageConfiguration), ParseError> { + let oci_tar_file = File::open(oci_tar_path)?; + let mut oci_tar_image = OciTarImage::new(oci_tar_file)?; + + let image_manifest = oci_tar_image + .image_manifest(&Arch::Amd64) + .ok_or(ParseError::UnsupportedArchitecture)??; + + let image_config_descriptor = image_manifest.config(); + + if image_config_descriptor.media_type() != &MediaType::ImageConfig { + return Err(ParseError::WrongMediaType); + } + + let mut image_config_file = oci_tar_image + .open_blob(image_config_descriptor.digest()) + .ok_or(ParseError::MissingImageConfig)?; + let image_config = ImageConfiguration::from_reader(&mut image_config_file)?; + + Ok(( + image_config_file.into_oci_tar_image(), + image_manifest, + image_config, + )) +} + +#[derive(Debug, Error)] +pub enum ExtractError { + #[error("Incorrectly formatted digest: \"{0}\"")] + InvalidDigest(String), + #[error("Unknown layer digest {0} found in rootfs.diff_ids")] + UnknownLayerDigest(oci_spec::image::Digest), + #[error("Layer file {0} mentioned in image manifest is missing")] + MissingLayerFile(oci_spec::image::Digest), + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("Layer has wrong media type: {0}")] + WrongMediaType(String), +} + +fn extract_image_rootfs<R: Read + Seek, P: AsRef<Path>>( + oci_tar_image: OciTarImage<R>, + image_manifest: &ImageManifest, + image_config: &ImageConfiguration, + target_path: P, +) -> Result<(), ExtractError> { + let (mut oci_tar_image, layer_map) = build_layer_map(oci_tar_image, image_manifest)?; + + for layer in image_config.rootfs().diff_ids() { + let layer_digest = oci_spec::image::Digest::from_str(layer) + .map_err(|_| ExtractError::InvalidDigest(layer.to_string()))?; + let layer_descriptor = layer_map + .get(&layer_digest) + .ok_or(ExtractError::UnknownLayerDigest(layer_digest.clone()))?; + let mut layer_file = oci_tar_image + .open_blob(layer_descriptor.digest()) + .ok_or(ExtractError::MissingLayerFile(layer_digest))?; + + let (whiteouts, opaque_whiteouts) = match layer_descriptor.media_type() { + MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => { + extract_archive(&mut layer_file, &target_path)? + } + MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => { + let mut gz_decoder = GzDecoder::new(&mut layer_file); + extract_archive(&mut gz_decoder, &target_path)? + } + MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => { + let mut zstd_decoder = zstd::Decoder::new(&mut layer_file)?; + extract_archive(&mut zstd_decoder, &target_path)? + } + // Error on any other non-ImageLayer related media types. + // Match explicitly to avoid missing new image layer types when oci-spec updates. + media_type @ (MediaType::Descriptor + | MediaType::LayoutHeader + | MediaType::ImageManifest + | MediaType::ImageIndex + | MediaType::ImageConfig + | MediaType::ArtifactManifest + | MediaType::EmptyJSON + | MediaType::Other(_)) => { + return Err(ExtractError::WrongMediaType(media_type.to_string())) + } + }; + + oci_tar_image = layer_file.into_oci_tar_image(); + + for whiteout in whiteouts { + let wh_abs_path = target_path.as_ref().join(&whiteout); + remove_path(wh_abs_path)?; + } + + for opaque_whiteout in opaque_whiteouts { + let wh_abs_path = target_path.as_ref().join(&opaque_whiteout); + for direntry in read_dir(wh_abs_path)? { + remove_path(direntry?.path())?; + } + } + } + + Ok(()) +} + +fn extract_archive<R: Read, P: AsRef<Path>>( + reader: &mut R, + target_path: P, +) -> std::io::Result<(Vec<PathBuf>, Vec<PathBuf>)> { + const WHITEOUT_PREFIX: &str = ".wh."; + const OPAQUE_WHITEOUT_NAME: &str = ".wh..wh..opq"; + + let mut archive = Archive::new(reader); + archive.set_preserve_ownerships(true); + archive.set_preserve_permissions(true); + archive.set_unpack_xattrs(true); + let mut directories = Vec::new(); + let mut whiteouts = Vec::new(); + let mut opaque_whiteouts = Vec::new(); + + for entry in archive.entries()? { + let mut file = entry?; + if file.header().entry_type() == EntryType::Directory { + directories.push(file); + } else { + let filepath = file.path()?.into_owned(); + if let Some(filename) = filepath.file_name() { + if filename == OPAQUE_WHITEOUT_NAME { + if let Some(parent) = filepath.parent() { + opaque_whiteouts.push(parent.to_path_buf()); + } + continue; + } else if let Some(filename) = filename.to_str() { + if let Some(filename_stripped) = filename.strip_prefix(WHITEOUT_PREFIX) { + whiteouts.push(filepath.with_file_name(filename_stripped)); + continue; + } + } + } + + file.unpack_in(&target_path)?; + } + } + + directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes())); + for mut dir in directories { + dir.unpack_in(&target_path)?; + } + + Ok((whiteouts, opaque_whiteouts)) +} + +fn remove_path(path: PathBuf) -> std::io::Result<()> { + if path.metadata()?.is_dir() { + remove_dir_all(path) + } else { + remove_file(path) + } +} diff --git a/proxmox-oci/src/oci_tar_image.rs b/proxmox-oci/src/oci_tar_image.rs new file mode 100644 index 00000000..212f6b53 --- /dev/null +++ b/proxmox-oci/src/oci_tar_image.rs @@ -0,0 +1,145 @@ +use std::collections::HashMap; +use std::io::{Read, Seek, SeekFrom}; +use std::ops::Range; +use std::path::{Path, PathBuf}; + +use oci_spec::image::{Arch, Digest, ImageIndex, ImageManifest, MediaType}; +use oci_spec::OciSpecError; +use tar::Archive; + +use proxmox_io::RangeReader; + +#[derive(Clone)] +struct TarEntry { + range: Range<u64>, +} + +impl TarEntry { + fn new(range: Range<u64>) -> Self { + Self { range } + } +} + +pub struct OciTarImage<R: Read + Seek> { + reader: R, + entries: HashMap<PathBuf, TarEntry>, + image_index: ImageIndex, +} + +impl<R: Read + Seek> OciTarImage<R> { + pub fn new(reader: R) -> oci_spec::Result<Self> { + let mut archive = Archive::new(reader); + let entries = archive.entries_with_seek()?; + let mut entries_index = HashMap::new(); + let mut image_index = None; + + for entry in entries { + let mut entry = entry?; + let offset = entry.raw_file_position(); + let size = entry.size(); + let path = entry.path()?.into_owned(); + + if path.as_path() == Path::new("index.json") { + image_index = Some(ImageIndex::from_reader(&mut entry)?); + } + + let tar_entry = TarEntry::new(offset..(offset + size)); + entries_index.insert(path, tar_entry); + } + + if let Some(image_index) = image_index { + Ok(Self { + reader: archive.into_inner(), + entries: entries_index, + image_index, + }) + } else { + Err(OciSpecError::Other("Missing index.json file".into())) + } + } + + pub fn image_index(&self) -> &ImageIndex { + &self.image_index + } + + fn get_blob_entry(&self, digest: &Digest) -> Option<TarEntry> { + let path = get_blob_path(digest); + self.entries.get(&path).cloned() + } + + pub fn open_blob(self, digest: &Digest) -> Option<OciTarImageBlob<R>> { + if let Some(entry) = self.get_blob_entry(digest) { + Some(OciTarImageBlob::new(self, entry.range)) + } else { + None + } + } + + pub fn image_manifest( + &mut self, + architecture: &Arch, + ) -> Option<oci_spec::Result<ImageManifest>> { + let digest = match self.image_index.manifests().iter().find(|&x| { + x.media_type() == &MediaType::ImageManifest + && x.platform() + .as_ref() + .is_none_or(|platform| platform.architecture() == architecture) + }) { + Some(descriptor) => descriptor.digest(), + None => return None, + }; + + if let Some(entry) = self.get_blob_entry(digest) { + let mut range_reader = RangeReader::new(&mut self.reader, entry.range); + Some(ImageManifest::from_reader(&mut range_reader)) + } else { + Some(Err(OciSpecError::Other(format!( + "Image manifest with digest {digest} mentioned in image index is missing" + )))) + } + } +} + +fn get_blob_path(digest: &Digest) -> PathBuf { + let algorithm = digest.algorithm(); + let digest = digest.digest(); + format!("blobs/{algorithm}/{digest}").into() +} + +pub struct OciTarImageBlob<R: Read + Seek> { + range_reader: RangeReader<R>, + entries: HashMap<PathBuf, TarEntry>, + image_index: ImageIndex, +} + +impl<R: Read + Seek> OciTarImageBlob<R> { + fn new(archive: OciTarImage<R>, range: Range<u64>) -> Self { + let range_reader = RangeReader::new(archive.reader, range); + + Self { + range_reader, + entries: archive.entries, + image_index: archive.image_index, + } + } + + pub fn into_oci_tar_image(self) -> OciTarImage<R> { + OciTarImage { + reader: self.range_reader.into_inner(), + entries: self.entries, + image_index: self.image_index, + } + } +} + +impl<R: Read + Seek> Read for OciTarImageBlob<R> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { + self.range_reader.read(buf) + } +} + +impl<R: Read + Seek> Seek for OciTarImageBlob<R> { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> { + self.range_reader.seek(pos) + } +} -- 2.47.2 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel