This is an automated email from the ASF dual-hosted git repository.

ppkarwasz pushed a commit to branch feat/vdr-generation
in repository https://gitbox.apache.org/repos/asf/logging-site.git

commit 74d0e198d8e22b127ba0244ec9deb021847bff02
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Fri Apr 24 12:27:49 2026 +0200

    feat: add `vdr_split` script
    
    The `vdr_split` script deterministically and reproducibly splits our 
monolithic VDR into one file per vulnerability. The files are stored at:
    
    src/vulnerabilities/CVE-XXXX-YYYY/<bom-ref>.cdx.xml
    
    where `bom-ref` is the slug we used as a BOM reference.
    
    The Conan package for Log4cxx has no identity of its own, so its 
vulnerabilities are recorded in the same file as the corresponding Log4cxx ones.
    
    A template for new CycloneDX files is stored in 
`src/vulnerabilities/template.cdx.xml`.
---
 scripts/vdr_split.py                 | 239 +++++++++++++++++++++++++++++++++++
 src/vulnerabilities/template.cdx.xml | 170 +++++++++++++++++++++++++
 2 files changed, 409 insertions(+)

diff --git a/scripts/vdr_split.py b/scripts/vdr_split.py
new file mode 100755
index 00000000..eade77f0
--- /dev/null
+++ b/scripts/vdr_split.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.11"
+# dependencies = ["lxml>=5"]
+# ///
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Split the monolithic ``vdr.xml`` into per-(CVE, component) files.
+
+Reads ``src/site/static/cyclonedx/vdr.xml`` (CycloneDX 1.6) and writes
+``src/vulnerabilities/<CVE-id>/<component>.cdx.xml`` (CycloneDX 1.7).
+
+One output file is produced per affected component, except that
+``log4cxx-conan`` never gets its own file; its vulnerabilities are always
+one-to-one with ``log4cxx`` and are reflected in the log4cxx file via a
+``<components>`` entry plus a ``<dependencies>`` edge.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+import uuid
+from pathlib import Path
+
+from lxml import etree
+
+ROOT = Path(__file__).resolve().parent.parent
+SRC = ROOT / "src" / "site" / "static" / "cyclonedx" / "vdr.xml"
+OUT_DIR = ROOT / "src" / "vulnerabilities"
+
+NS_OLD = "http://cyclonedx.org/schema/bom/1.6";
+NS_NEW = "http://cyclonedx.org/schema/bom/1.7";
+NS_XSI = "http://www.w3.org/2001/XMLSchema-instance";
+SCHEMA_LOCATION = f"{NS_NEW} https://cyclonedx.org/schema/bom-1.7.xsd";
+
+LICENSE_HEADER = """<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to you under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~      http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->"""
+
+
+def qn(tag: str, ns: str = NS_NEW) -> str:
+    """Return the Clark-notation qualified name ``{ns}tag`` for lxml 
lookups."""
+    return f"{{{ns}}}{tag}"
+
+
+def copy_into_new_ns(source_elem: etree._Element) -> etree._Element:
+    """Deep-copy ``source_elem`` from NS_OLD into NS_NEW, preserving CDATA.
+
+    Serializes, strips the inherited default xmlns, and re-parses under a
+    wrapper declaring NS_NEW as default so the returned element carries no
+    redundant xmlns attribute and can be appended cleanly to the new BOM.
+    """
+    inner = etree.tostring(source_elem, encoding="unicode")
+    inner = inner.replace(f' xmlns="{NS_OLD}"', "")
+    wrapped = f'<wrap xmlns="{NS_NEW}">{inner}</wrap>'
+    parser = etree.XMLParser(strip_cdata=False)
+    wrap = etree.fromstring(wrapped.encode("utf-8"), parser)
+    return wrap[0]
+
+
+def read_existing_serial(path: Path) -> str | None:
+    """Return the ``serialNumber`` of the BOM at ``path`` if it parses, else 
None.
+
+    Used to keep ``urn:uuid:`` identifiers stable across re-runs so the script
+    is idempotent and re-generation produces no spurious diffs.
+    """
+    if not path.is_file():
+        return None
+    try:
+        existing = etree.parse(str(path)).getroot()
+    except etree.XMLSyntaxError:
+        return None
+    return existing.get("serialNumber")
+
+
+def build_bom(
+    subject_component: etree._Element,
+    vuln_elem: etree._Element,
+    timestamp: str,
+    serial_number: str,
+    extra_components: list[etree._Element] | None = None,
+    dependencies: list[tuple[str, list[str]]] | None = None,
+) -> etree._Element:
+    """Build a CycloneDX 1.7 ``<bom>`` element wrapping a single vulnerability.
+
+    ``subject_component`` becomes ``metadata/component``; ``extra_components``
+    and ``dependencies`` (optional) populate the top-level ``<components>`` and
+    ``<dependencies>`` sections used by log4cxx files to link log4cxx-conan.
+    """
+    bom = etree.Element(qn("bom"), nsmap={None: NS_NEW, "xsi": NS_XSI})
+    bom.set(f"{{{NS_XSI}}}schemaLocation", SCHEMA_LOCATION)
+    bom.set("serialNumber", serial_number)
+    bom.set("version", "1")
+
+    metadata = etree.SubElement(bom, qn("metadata"))
+    etree.SubElement(metadata, qn("timestamp")).text = timestamp
+    metadata.append(copy_into_new_ns(subject_component))
+    manufacturer = etree.SubElement(metadata, qn("manufacturer"))
+    etree.SubElement(manufacturer, qn("name")).text = "Apache Logging Services"
+    etree.SubElement(manufacturer, qn("url")).text = 
"https://logging.apache.org";
+
+    # Log4cxx dependencies get extra components that are dependants of the 
main log4cxx component.
+    # This way we express the fact that:
+    #
+    # - Our statement is about log4cxx, not log4cxx-conan.
+    # - log4cxx-conan is derived from log4cxx and thus shares its 
vulnerabilities.
+    if extra_components:
+        components_elem = etree.SubElement(bom, qn("components"))
+        for c in extra_components:
+            components_elem.append(copy_into_new_ns(c))
+
+    if dependencies:
+        deps_elem = etree.SubElement(bom, qn("dependencies"))
+        for dep_ref, sub_refs in dependencies:
+            d = etree.SubElement(deps_elem, qn("dependency"), ref=dep_ref)
+            for s in sub_refs:
+                etree.SubElement(d, qn("dependency"), ref=s)
+
+    vulns_elem = etree.SubElement(bom, qn("vulnerabilities"))
+    vulns_elem.append(copy_into_new_ns(vuln_elem))
+
+    return bom
+
+
+def fold_bom_attributes(body: bytes) -> bytes:
+    """Hack: fold the ``<bom ...>`` start tag so each attribute past the
+    first sits on its own line, indented to align under the first. lxml's
+    serializer offers no per-attribute wrap option, so we post-process it.
+    """
+    match = re.match(rb'<bom ([^>]*?)(/?>)', body)
+    if not match:
+        return body
+    attrs = re.findall(rb'[\w:-]+="[^"]*"', match.group(1))
+    if len(attrs) <= 1:
+        return body
+    indent = b"\n" + b" " * len(b"<bom ")
+    return b"<bom " + indent.join(attrs) + match.group(2) + body[match.end():]
+
+
+def serialize(bom: etree._Element) -> bytes:
+    """Serialize ``bom`` as a pretty-printed UTF-8 file with the ASF header.
+
+    Drops unused namespace declarations inherited from the source tree,
+    applies 2-space indentation, and prepends the XML declaration and
+    Apache License comment block.
+    """
+    etree.cleanup_namespaces(bom, top_nsmap={None: NS_NEW})
+    etree.indent(bom, space="  ")
+    body = fold_bom_attributes(etree.tostring(bom, xml_declaration=False, 
encoding="UTF-8"))
+    decl = b'<?xml version="1.0" encoding="UTF-8"?>\n'
+    header = LICENSE_HEADER.encode("utf-8") + b"\n"
+    return decl + header + body + b"\n"
+
+
+def main() -> int:
+    """Parse the source VDR and write one output file per (CVE, component) 
pair.
+
+    Skips ``log4cxx-conan`` as a standalone subject; instead, when a
+    vulnerability affects both ``log4cxx`` and ``log4cxx-conan``, the
+    log4cxx output file gains the conan component plus a dependency edge.
+    """
+    parser = etree.XMLParser(remove_blank_text=True, strip_cdata=False)
+    tree = etree.parse(str(SRC), parser)
+    root = tree.getroot()
+
+    # Parses the components
+    components_root = root.find(qn("components", NS_OLD))
+    components_by_ref = {
+        c.get("bom-ref"): c
+        for c in components_root.findall(qn("component", NS_OLD))
+    }
+
+    # Parses the vulnerabilities and write one file per (CVE, component) pair.
+    vulns_root = root.find(qn("vulnerabilities", NS_OLD))
+    count = 0
+    for vuln in vulns_root.findall(qn("vulnerability", NS_OLD)):
+        cve_id = vuln.findtext(qn("id", NS_OLD))
+        target_refs = [
+            t.findtext(qn("ref", NS_OLD))
+            for t in vuln.findall(f".//{qn('target', NS_OLD)}")
+        ]
+        subject_refs = [r for r in target_refs if r != "log4cxx-conan"]
+        if not subject_refs:
+            print(f"warning: {cve_id} has no non-conan subject; skipping", 
file=sys.stderr)
+            continue
+        updated = vuln.findtext(qn("updated", NS_OLD))
+        for subject in subject_refs:
+            extras: list[etree._Element] = []
+            deps: list[tuple[str, list[str]]] = []
+            # log4cxx-conan is always an extra component of log4cxx, never a 
standalone subject.
+            if subject == "log4cxx" and "log4cxx-conan" in target_refs:
+                extras = [components_by_ref["log4cxx-conan"]]
+                deps = [("log4cxx-conan", ["log4cxx"])]
+            out_path = OUT_DIR / cve_id / f"{subject}.cdx.xml"
+            serial_number = read_existing_serial(out_path) or 
f"urn:uuid:{uuid.uuid4()}"
+            bom = build_bom(
+                subject_component=components_by_ref[subject],
+                vuln_elem=vuln,
+                timestamp=updated,
+                serial_number=serial_number,
+                extra_components=extras,
+                dependencies=deps,
+            )
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_bytes(serialize(bom))
+            print(f"wrote {out_path.relative_to(ROOT)}")
+            count += 1
+    print(f"generated {count} files")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/vulnerabilities/template.cdx.xml 
b/src/vulnerabilities/template.cdx.xml
new file mode 100644
index 00000000..347fa8ac
--- /dev/null
+++ b/src/vulnerabilities/template.cdx.xml
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to you under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~      http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<!--
+  Template for a per-vulnerability CycloneDX VDR file.
+
+  Copy to: src/vulnerabilities/<CVE-id>/<component>.cdx.xml
+
+  Rules:
+    * <component> is the filename stem and must be one of:
+        log4cxx, log4j-core, log4j-1.2-api, log4j-layout-template-json, log4net
+      (log4cxx-conan never gets its own file; see the log4cxx-only block 
below.)
+    * Generate a fresh UUID for `serialNumber`, e.g.:
+        python -c 'import uuid; print(uuid.uuid4())'
+    * Start `version` at "1"; bump by one whenever you edit the file.
+    * `metadata/timestamp` MUST equal `vulnerability/updated`.
+    * Range syntax in <affects>:
+        Maven artifacts:  vers:maven/>=X|<Y
+        NuGet packages:   vers:nuget/>=X|<Y
+        SemVer / native:  vers:semver/>=X|<Y
+-->
+<bom xmlns="http://cyclonedx.org/schema/bom/1.7";
+     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+     xsi:schemaLocation="http://cyclonedx.org/schema/bom/1.7 
https://cyclonedx.org/schema/bom-1.7.xsd";
+     serialNumber="urn:uuid:00000000-0000-0000-0000-000000000000"
+     version="1">
+
+  <metadata>
+    <!-- Must match <updated> below. ISO-8601, UTC (trailing Z). -->
+    <timestamp>2099-12-31T00:00:00Z</timestamp>
+    <!-- Replace the block below with exactly one of the component
+         definitions from src/site/static/cyclonedx/vdr.xml (the
+         <components> section at the top). The `bom-ref` must equal
+         the filename stem. Example below is log4j-core. -->
+    <component type="library" bom-ref="log4j-core">
+      <group>org.apache.logging.log4j</group>
+      <name>log4j-core</name>
+      <cpe>cpe:2.3:a:apache:log4j:*:*:*:*:*:*:*:*</cpe>
+      <purl>pkg:maven/org.apache.logging.log4j/log4j-core?type=jar</purl>
+    </component>
+        <manufacturer>
+      <name>Apache Logging Services</name>
+      <url>https://logging.apache.org</url>
+    </manufacturer>
+  </metadata>
+
+  <!-- =========================================================
+       Log4cxx-only: uncomment the two blocks below to link the
+       Conan distribution. Also add a second <target> in <affects>
+       with <ref>log4cxx-conan</ref> and the matching version range.
+       Remove this entire comment and the blocks for non-log4cxx files.
+       =========================================================
+  <components>
+    <component type="library" bom-ref="log4cxx-conan">
+      <name>Log4cxx</name>
+      <purl>pkg:conan/log4cxx</purl>
+    </component>
+  </components>
+  <dependencies>
+    <dependency ref="log4cxx-conan">
+      <dependency ref="log4cxx"/>
+    </dependency>
+  </dependencies>
+  -->
+
+  <vulnerabilities>
+    <vulnerability>
+      <id>CVE-YYYY-NNNNN</id>
+      <source>
+        <name>NVD</name>
+        <url>https://nvd.nist.gov/vuln/detail/CVE-YYYY-NNNNN</url>
+      </source>
+      <!-- Optional. Add <references> for Jira tickets, fix commits, PRs, etc.
+      <references>
+        <reference>
+          <id>LOG4J2-XXXX</id>
+          <source>
+            <name>Issue tracker</name>
+            <url>https://issues.apache.org/jira/browse/LOG4J2-XXXX</url>
+          </source>
+        </reference>
+      </references>
+      -->
+      <ratings>
+        <rating>
+          <source>
+            <!-- Use "The Apache Software Foundation" for ASF-assigned ratings,
+                 or "NVD" when carrying over the NVD score. -->
+            <name>The Apache Software Foundation</name>
+            
<url><![CDATA[https://www.first.org/cvss/calculator/4-0#CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:N/VC:N/VI:N/VA:N/SC:N/SI:N/SA:N]]></url>
+          </source>
+          <score>0.0</score>
+          <!-- none | low | medium | high | critical -->
+          <severity>none</severity>
+          <!-- CVSSv2 | CVSSv3 | CVSSv4 -->
+          <method>CVSSv4</method>
+          
<vector>AV:N/AC:L/AT:N/PR:N/UI:N/VC:N/VI:N/VA:N/SC:N/SI:N/SA:N</vector>
+        </rating>
+      </ratings>
+      <cwes>
+        <cwe>0</cwe>
+      </cwes>
+      <description><![CDATA[AsciiDoc-formatted description of the 
vulnerability.]]></description>
+      <recommendation><![CDATA[Upgrade recommendation and 
workarounds.]]></recommendation>
+      <!-- Optional. Include when the ASF assessment diverges from NVD
+           (e.g., state=not_affected with a justification).
+      <analysis>
+        <state>not_affected</state>
+        <justification>protected_by_mitigating_control</justification>
+        <detail><![CDATA[Why this is not exploitable in practice.]]></detail>
+      </analysis>
+      -->
+      <created>2099-12-31T00:00:00Z</created>
+      <published>2099-12-31T00:00:00Z</published>
+      <!-- Must match metadata/timestamp above. Bump on every edit. -->
+      <updated>2099-12-31T00:00:00Z</updated>
+      <credits>
+        <individuals>
+          <individual>
+            <name>Reporter Name</name>
+          </individual>
+        </individuals>
+        <!-- Or, for organizational reporters:
+        <organizations>
+          <organization>
+            <name>Organization Name</name>
+            <url>https://example.org/</url>
+          </organization>
+        </organizations>
+        -->
+      </credits>
+      <affects>
+        <target>
+          <!-- Must equal metadata/component/@bom-ref. -->
+          <ref>log4j-core</ref>
+          <versions>
+            <version>
+              <range><![CDATA[vers:maven/>=0|<X.Y.Z]]></range>
+            </version>
+          </versions>
+        </target>
+        <!-- Log4cxx-only: include a second target for the Conan distribution.
+        <target>
+          <ref>log4cxx-conan</ref>
+          <versions>
+            <version>
+              <range><![CDATA[vers:semver/>=0|<X.Y.Z]]></range>
+            </version>
+          </versions>
+        </target>
+        -->
+      </affects>
+    </vulnerability>
+  </vulnerabilities>
+
+</bom>

Reply via email to