On Wed, Jul 09, 2025 at 02:34:19PM +0200, Filip Schauer wrote: > This crate can parse an OCI image tarball and extract its rootfs. Layers > are applied in sequence, but an overlay filesystem is currently not > used. > > Signed-off-by: Filip Schauer <f.scha...@proxmox.com> > --- > Changed since v2: > * remove reachable unwraps & refactor code > * increase hasher buffer size from 4096 to 32768 (matching internal > sha2::Digest buffering) > * preserve permissions and xattrs during rootfs extraction > * handle whiteouts & opaque whiteouts > > Cargo.toml | 1 + > proxmox-oci/Cargo.toml | 22 +++ > proxmox-oci/debian/changelog | 5 + > proxmox-oci/debian/control | 45 +++++ > proxmox-oci/debian/debcargo.toml | 7 + > proxmox-oci/src/lib.rs | 283 +++++++++++++++++++++++++++++++ > proxmox-oci/src/oci_tar_image.rs | 145 ++++++++++++++++ > 7 files changed, 508 insertions(+) > create mode 100644 proxmox-oci/Cargo.toml > create mode 100644 proxmox-oci/debian/changelog > create mode 100644 proxmox-oci/debian/control > create mode 100644 proxmox-oci/debian/debcargo.toml > create mode 100644 proxmox-oci/src/lib.rs > create mode 100644 proxmox-oci/src/oci_tar_image.rs > > diff --git a/Cargo.toml b/Cargo.toml > index 020e7497..4606fc19 100644 > --- a/Cargo.toml > +++ b/Cargo.toml > @@ -26,6 +26,7 @@ members = [ > "proxmox-metrics", > "proxmox-network-api", > "proxmox-notify", > + "proxmox-oci", > "proxmox-openid", > "proxmox-product-config", > "proxmox-resource-scheduling", > diff --git a/proxmox-oci/Cargo.toml b/proxmox-oci/Cargo.toml > new file mode 100644 > index 00000000..4daff6ab > --- /dev/null > +++ b/proxmox-oci/Cargo.toml > @@ -0,0 +1,22 @@ > +[package] > +name = "proxmox-oci" > +description = "OCI image parsing and extraction" > +version = "0.1.0" > + > +authors.workspace = true > +edition.workspace = true > +exclude.workspace = true > +homepage.workspace = true > +license.workspace = true > +repository.workspace = true > +rust-version.workspace = true > + > +[dependencies] > +flate2.workspace = true > +oci-spec = "0.8.1" > +sha2 = "0.10" > +tar.workspace = true > +thiserror = "1" > +zstd.workspace = true > + > +proxmox-io.workspace = true > diff --git a/proxmox-oci/debian/changelog b/proxmox-oci/debian/changelog > new file mode 100644 > index 00000000..754d06c1 > --- /dev/null > +++ b/proxmox-oci/debian/changelog > @@ -0,0 +1,5 @@ > +rust-proxmox-oci (0.1.0-1) bookworm; urgency=medium > + > + * Initial release. > + > + -- Proxmox Support Team <supp...@proxmox.com> Mon, 28 Apr 2025 12:34:56 > +0200 > diff --git a/proxmox-oci/debian/control b/proxmox-oci/debian/control > new file mode 100644 > index 00000000..f33331c5 > --- /dev/null > +++ b/proxmox-oci/debian/control > @@ -0,0 +1,45 @@ > +Source: rust-proxmox-oci > +Section: rust > +Priority: optional > +Build-Depends: debhelper-compat (= 13), > + dh-sequence-cargo > +Build-Depends-Arch: cargo:native <!nocheck>, > + rustc:native (>= 1.82) <!nocheck>, > + libstd-rust-dev <!nocheck>, > + librust-flate2-1+default-dev <!nocheck>, > + librust-oci-spec-0.8+default-dev (>= 0.8.1-~~) <!nocheck>, > + librust-proxmox-io-1+default-dev (>= 1.2.0-~~) <!nocheck>, > + librust-sha2-0.10+default-dev <!nocheck>, > + librust-tar-0.4+default-dev <!nocheck>, > + librust-thiserror-1+default-dev <!nocheck>, > + librust-zstd-0.13+default-dev <!nocheck> > +Maintainer: Proxmox Support Team <supp...@proxmox.com> > +Standards-Version: 4.7.0 > +Vcs-Git: git://git.proxmox.com/git/proxmox.git > +Vcs-Browser: https://git.proxmox.com/?p=proxmox.git > +Homepage: https://proxmox.com > +X-Cargo-Crate: proxmox-oci > +Rules-Requires-Root: no > + > +Package: librust-proxmox-oci-dev > +Architecture: any > +Multi-Arch: same > +Depends: > + ${misc:Depends}, > + librust-flate2-1+default-dev, > + librust-oci-spec-0.8+default-dev (>= 0.8.1-~~), > + librust-proxmox-io-1+default-dev (>= 1.2.0-~~), > + librust-sha2-0.10+default-dev, > + librust-tar-0.4+default-dev, > + librust-thiserror-1+default-dev, > + librust-zstd-0.13+default-dev > +Provides: > + librust-proxmox-oci+default-dev (= ${binary:Version}), > + librust-proxmox-oci-0-dev (= ${binary:Version}), > + librust-proxmox-oci-0+default-dev (= ${binary:Version}), > + librust-proxmox-oci-0.1-dev (= ${binary:Version}), > + librust-proxmox-oci-0.1+default-dev (= ${binary:Version}), > + librust-proxmox-oci-0.1.0-dev (= ${binary:Version}), > + librust-proxmox-oci-0.1.0+default-dev (= ${binary:Version}) > +Description: OCI image parsing and extraction - Rust source code > + Source code for Debianized Rust crate "proxmox-oci" > diff --git a/proxmox-oci/debian/debcargo.toml > b/proxmox-oci/debian/debcargo.toml > new file mode 100644 > index 00000000..b7864cdb > --- /dev/null > +++ b/proxmox-oci/debian/debcargo.toml > @@ -0,0 +1,7 @@ > +overlay = "." > +crate_src_path = ".." > +maintainer = "Proxmox Support Team <supp...@proxmox.com>" > + > +[source] > +vcs_git = "git://git.proxmox.com/git/proxmox.git" > +vcs_browser = "https://git.proxmox.com/?p=proxmox.git" > diff --git a/proxmox-oci/src/lib.rs b/proxmox-oci/src/lib.rs > new file mode 100644 > index 00000000..b54d06e1 > --- /dev/null > +++ b/proxmox-oci/src/lib.rs > @@ -0,0 +1,283 @@ > +use std::collections::HashMap; > +use std::fs::{read_dir, remove_dir_all, remove_file, File}; > +use std::io::{Read, Seek}; > +use std::path::{Path, PathBuf}; > +use std::str::FromStr; > + > +use flate2::read::GzDecoder; > +pub use oci_spec::image::Config; > +use oci_spec::image::{Arch, ImageConfiguration, ImageManifest, MediaType}; > +use oci_spec::OciSpecError; > +use sha2::digest::generic_array::GenericArray; > +use sha2::{Digest, Sha256}; > +use tar::{Archive, EntryType}; > +use thiserror::Error; > + > +mod oci_tar_image; > +use oci_tar_image::OciTarImage; > + > +fn compute_digest<R: Read, H: Digest>( > + mut reader: R, > + mut hasher: H, > +) -> std::io::Result<GenericArray<u8, H::OutputSize>> { > + let mut buf = proxmox_io::boxed::zeroed(32768); > + > + loop { > + let bytes_read = reader.read(&mut buf)?; > + if bytes_read == 0 { > + break Ok(hasher.finalize()); > + } > + > + hasher.update(&buf[..bytes_read]); > + } > +} > + > +fn compute_sha256<R: Read>(reader: R) -> > std::io::Result<oci_spec::image::Sha256Digest> { > + let digest = compute_digest(reader, Sha256::new())?; > + > Ok(oci_spec::image::Sha256Digest::from_str(&format!("{digest:x}")).unwrap()) > +} > + > +/// Build a mapping from uncompressed layer digests (as found in the image > config's `rootfs.diff_ids`) > +/// to their corresponding compressed-layer digests (i.e. the filenames > under `blobs/<algorithm>/<digest>`) > +fn build_layer_map<R: Read + Seek>( > + mut oci_tar_image: OciTarImage<R>, > + image_manifest: &ImageManifest, > +) -> Result< > + ( > + OciTarImage<R>, > + HashMap<oci_spec::image::Digest, oci_spec::image::Descriptor>, > + ), > + ExtractError, > +> { > + let mut layer_mapping = HashMap::new(); > + > + for layer in image_manifest.layers() { > + let digest = match layer.media_type() { > + MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => > layer.digest().clone(), > + MediaType::ImageLayerGzip | > MediaType::ImageLayerNonDistributableGzip => { > + let mut compressed_blob = oci_tar_image > + .open_blob(layer.digest()) > + > .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?; > + let decoder = GzDecoder::new(&mut compressed_blob); > + let hash = compute_sha256(decoder)?.into(); > + oci_tar_image = compressed_blob.into_oci_tar_image(); > + hash > + } > + MediaType::ImageLayerZstd | > MediaType::ImageLayerNonDistributableZstd => { > + let mut compressed_blob = oci_tar_image > + .open_blob(layer.digest()) > + > .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?; > + let decoder = zstd::Decoder::new(&mut compressed_blob)?; > + let hash = compute_sha256(decoder)?.into(); > + oci_tar_image = compressed_blob.into_oci_tar_image(); > + hash > + } > + // Skip any other non-ImageLayer related media types. > + // Match explicitly to avoid missing new image layer types when > oci-spec updates. > + MediaType::Descriptor > + | MediaType::LayoutHeader > + | MediaType::ImageManifest > + | MediaType::ImageIndex > + | MediaType::ImageConfig > + | MediaType::ArtifactManifest > + | MediaType::EmptyJSON > + | MediaType::Other(_) => continue, > + }; > + > + layer_mapping.insert(digest, layer.clone()); > + } > + > + Ok((oci_tar_image, layer_mapping)) > +} > + > +#[derive(Debug, Error)] > +pub enum ProxmoxOciError { > + #[error("Error while parsing OCI image: {0}")] > + ParseError(#[from] ParseError), > + #[error("Error while extracting OCI image: {0}")] > + ExtractError(#[from] ExtractError), > +} > + > +pub fn parse_and_extract_image<P: AsRef<Path>>( > + oci_tar_path: P, > + rootfs_path: P, > +) -> Result<Option<Config>, ProxmoxOciError> { > + let (oci_tar_image, image_manifest, image_config) = > parse_image(oci_tar_path)?; > + > + extract_image_rootfs(oci_tar_image, &image_manifest, &image_config, > rootfs_path)?; > + > + Ok(image_config.config().clone()) > +} > + > +#[derive(Debug, Error)] > +pub enum ParseError { > + #[error("OCI spec error: {0}")] > + OciSpec(#[from] OciSpecError), > + #[error("Wrong media type")] > + WrongMediaType, > + #[error("IO error: {0}")] > + Io(#[from] std::io::Error), > + #[error("Unsupported CPU architecture")] > + UnsupportedArchitecture, > + #[error("Missing image config")] > + MissingImageConfig, > +} > + > +fn parse_image<P: AsRef<Path>>( > + oci_tar_path: P, > +) -> Result<(OciTarImage<File>, ImageManifest, ImageConfiguration), > ParseError> { > + let oci_tar_file = File::open(oci_tar_path)?; > + let mut oci_tar_image = OciTarImage::new(oci_tar_file)?; > + > + let image_manifest = oci_tar_image > + .image_manifest(&Arch::Amd64) > + .ok_or(ParseError::UnsupportedArchitecture)??; > + > + let image_config_descriptor = image_manifest.config(); > + > + if image_config_descriptor.media_type() != &MediaType::ImageConfig { > + return Err(ParseError::WrongMediaType); > + } > + > + let mut image_config_file = oci_tar_image > + .open_blob(image_config_descriptor.digest()) > + .ok_or(ParseError::MissingImageConfig)?; > + let image_config = ImageConfiguration::from_reader(&mut > image_config_file)?; > + > + Ok(( > + image_config_file.into_oci_tar_image(), > + image_manifest, > + image_config, > + )) > +} > + > +#[derive(Debug, Error)] > +pub enum ExtractError { > + #[error("Incorrectly formatted digest: \"{0}\"")] > + InvalidDigest(String), > + #[error("Unknown layer digest {0} found in rootfs.diff_ids")] > + UnknownLayerDigest(oci_spec::image::Digest), > + #[error("Layer file {0} mentioned in image manifest is missing")] > + MissingLayerFile(oci_spec::image::Digest), > + #[error("IO error: {0}")] > + Io(#[from] std::io::Error), > + #[error("Layer has wrong media type: {0}")] > + WrongMediaType(String), > +} > + > +fn extract_image_rootfs<R: Read + Seek, P: AsRef<Path>>( > + oci_tar_image: OciTarImage<R>, > + image_manifest: &ImageManifest, > + image_config: &ImageConfiguration, > + target_path: P, > +) -> Result<(), ExtractError> { > + let (mut oci_tar_image, layer_map) = build_layer_map(oci_tar_image, > image_manifest)?; > + > + for layer in image_config.rootfs().diff_ids() { > + let layer_digest = oci_spec::image::Digest::from_str(layer) > + .map_err(|_| ExtractError::InvalidDigest(layer.to_string()))?; > + let layer_descriptor = layer_map > + .get(&layer_digest) > + .ok_or(ExtractError::UnknownLayerDigest(layer_digest.clone()))?; > + let mut layer_file = oci_tar_image > + .open_blob(layer_descriptor.digest()) > + .ok_or(ExtractError::MissingLayerFile(layer_digest))?; > + > + let (whiteouts, opaque_whiteouts) = match > layer_descriptor.media_type() { > + MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => > { > + extract_archive(&mut layer_file, &target_path)? > + } > + MediaType::ImageLayerGzip | > MediaType::ImageLayerNonDistributableGzip => { > + let mut gz_decoder = GzDecoder::new(&mut layer_file); > + extract_archive(&mut gz_decoder, &target_path)? > + } > + MediaType::ImageLayerZstd | > MediaType::ImageLayerNonDistributableZstd => { > + let mut zstd_decoder = zstd::Decoder::new(&mut layer_file)?; > + extract_archive(&mut zstd_decoder, &target_path)? > + } > + // Error on any other non-ImageLayer related media types. > + // Match explicitly to avoid missing new image layer types when > oci-spec updates. > + media_type @ (MediaType::Descriptor > + | MediaType::LayoutHeader > + | MediaType::ImageManifest > + | MediaType::ImageIndex > + | MediaType::ImageConfig > + | MediaType::ArtifactManifest > + | MediaType::EmptyJSON > + | MediaType::Other(_)) => { > + return > Err(ExtractError::WrongMediaType(media_type.to_string())) > + } > + }; > + > + oci_tar_image = layer_file.into_oci_tar_image(); > + > + for whiteout in whiteouts { > + let wh_abs_path = target_path.as_ref().join(&whiteout); > + remove_path(wh_abs_path)?; > + } > + > + for opaque_whiteout in opaque_whiteouts { > + let wh_abs_path = target_path.as_ref().join(&opaque_whiteout); > + for direntry in read_dir(wh_abs_path)? { > + remove_path(direntry?.path())?; > + } > + } > + } > + > + Ok(()) > +} > + > +fn extract_archive<R: Read, P: AsRef<Path>>( > + reader: &mut R, > + target_path: P, > +) -> std::io::Result<(Vec<PathBuf>, Vec<PathBuf>)> { > + const WHITEOUT_PREFIX: &str = ".wh."; > + const OPAQUE_WHITEOUT_NAME: &str = ".wh..wh..opq"; > + > + let mut archive = Archive::new(reader); > + archive.set_preserve_ownerships(true); > + archive.set_preserve_permissions(true); > + archive.set_unpack_xattrs(true); > + let mut directories = Vec::new(); > + let mut whiteouts = Vec::new(); > + let mut opaque_whiteouts = Vec::new(); > + > + for entry in archive.entries()? { > + let mut file = entry?; > + if file.header().entry_type() == EntryType::Directory { > + directories.push(file); > + } else { > + let filepath = file.path()?.into_owned(); > + if let Some(filename) = filepath.file_name() { > + if filename == OPAQUE_WHITEOUT_NAME { > + if let Some(parent) = filepath.parent() { > + opaque_whiteouts.push(parent.to_path_buf());
Paths can *technically* exist as both a whiteout and a new entry, so delaying the removal to after extraction may potentially remove files which should still exist. *Technically* the spec allows whiteouts to be ordered "wrong"... The spec states: - Whiteout files MUST only apply to resources in lower/parent layers. - Files that are present in the same layer as a whiteout file can only be hidden by whiteout files in subsequent layers. So in order to be "truly" correct, we'd have to go through the archive twice: once to apply all the whiteouts, and then to extract all the non-whiteouts... I'm not sure this happens in the real world, but I think code-wise it's easy enough. > + } > + continue; > + } else if let Some(filename) = filename.to_str() { > + if let Some(filename_stripped) = > filename.strip_prefix(WHITEOUT_PREFIX) { > + > whiteouts.push(filepath.with_file_name(filename_stripped)); > + continue; > + } > + } > + } > + At this point we also have to remove the destination - potentially recursively. When replacing a directory with a file, the new layer simply contains the file without any previous whiteouts. > + file.unpack_in(&target_path)?; > + } > + } > + > + directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes())); > + for mut dir in directories { > + dir.unpack_in(&target_path)?; > + } > + > + Ok((whiteouts, opaque_whiteouts)) > +} > + > +fn remove_path(path: PathBuf) -> std::io::Result<()> { > + if path.metadata()?.is_dir() { > + remove_dir_all(path) > + } else { > + remove_file(path) > + } > +} > diff --git a/proxmox-oci/src/oci_tar_image.rs > b/proxmox-oci/src/oci_tar_image.rs > new file mode 100644 > index 00000000..212f6b53 > --- /dev/null > +++ b/proxmox-oci/src/oci_tar_image.rs > @@ -0,0 +1,145 @@ > +use std::collections::HashMap; > +use std::io::{Read, Seek, SeekFrom}; > +use std::ops::Range; > +use std::path::{Path, PathBuf}; > + > +use oci_spec::image::{Arch, Digest, ImageIndex, ImageManifest, MediaType}; > +use oci_spec::OciSpecError; > +use tar::Archive; > + > +use proxmox_io::RangeReader; > + > +#[derive(Clone)] > +struct TarEntry { > + range: Range<u64>, > +} > + > +impl TarEntry { > + fn new(range: Range<u64>) -> Self { > + Self { range } > + } > +} > + > +pub struct OciTarImage<R: Read + Seek> { > + reader: R, > + entries: HashMap<PathBuf, TarEntry>, > + image_index: ImageIndex, > +} > + > +impl<R: Read + Seek> OciTarImage<R> { > + pub fn new(reader: R) -> oci_spec::Result<Self> { > + let mut archive = Archive::new(reader); > + let entries = archive.entries_with_seek()?; > + let mut entries_index = HashMap::new(); > + let mut image_index = None; > + > + for entry in entries { > + let mut entry = entry?; > + let offset = entry.raw_file_position(); > + let size = entry.size(); > + let path = entry.path()?.into_owned(); > + > + if path.as_path() == Path::new("index.json") { > + image_index = Some(ImageIndex::from_reader(&mut entry)?); > + } > + > + let tar_entry = TarEntry::new(offset..(offset + size)); > + entries_index.insert(path, tar_entry); > + } > + > + if let Some(image_index) = image_index { > + Ok(Self { > + reader: archive.into_inner(), > + entries: entries_index, > + image_index, > + }) > + } else { > + Err(OciSpecError::Other("Missing index.json file".into())) > + } > + } > + > + pub fn image_index(&self) -> &ImageIndex { > + &self.image_index > + } > + > + fn get_blob_entry(&self, digest: &Digest) -> Option<TarEntry> { > + let path = get_blob_path(digest); > + self.entries.get(&path).cloned() > + } > + > + pub fn open_blob(self, digest: &Digest) -> Option<OciTarImageBlob<R>> { > + if let Some(entry) = self.get_blob_entry(digest) { > + Some(OciTarImageBlob::new(self, entry.range)) > + } else { > + None > + } > + } > + > + pub fn image_manifest( > + &mut self, > + architecture: &Arch, > + ) -> Option<oci_spec::Result<ImageManifest>> { > + let digest = match self.image_index.manifests().iter().find(|&x| { > + x.media_type() == &MediaType::ImageManifest > + && x.platform() > + .as_ref() > + .is_none_or(|platform| platform.architecture() == > architecture) > + }) { > + Some(descriptor) => descriptor.digest(), > + None => return None, > + }; > + > + if let Some(entry) = self.get_blob_entry(digest) { > + let mut range_reader = RangeReader::new(&mut self.reader, > entry.range); > + Some(ImageManifest::from_reader(&mut range_reader)) > + } else { > + Some(Err(OciSpecError::Other(format!( > + "Image manifest with digest {digest} mentioned in image > index is missing" > + )))) > + } > + } > +} > + > +fn get_blob_path(digest: &Digest) -> PathBuf { > + let algorithm = digest.algorithm(); > + let digest = digest.digest(); > + format!("blobs/{algorithm}/{digest}").into() > +} > + > +pub struct OciTarImageBlob<R: Read + Seek> { > + range_reader: RangeReader<R>, > + entries: HashMap<PathBuf, TarEntry>, > + image_index: ImageIndex, > +} > + > +impl<R: Read + Seek> OciTarImageBlob<R> { > + fn new(archive: OciTarImage<R>, range: Range<u64>) -> Self { > + let range_reader = RangeReader::new(archive.reader, range); > + > + Self { > + range_reader, > + entries: archive.entries, > + image_index: archive.image_index, > + } > + } > + > + pub fn into_oci_tar_image(self) -> OciTarImage<R> { > + OciTarImage { > + reader: self.range_reader.into_inner(), > + entries: self.entries, > + image_index: self.image_index, > + } > + } > +} > + > +impl<R: Read + Seek> Read for OciTarImageBlob<R> { > + fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { > + self.range_reader.read(buf) > + } > +} > + > +impl<R: Read + Seek> Seek for OciTarImageBlob<R> { > + fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> { > + self.range_reader.seek(pos) > + } > +} > -- > 2.47.2 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel