commit:     af85b2f95937f3d18b127652867c06482d1cbd58
Author:     Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Sun Jul  6 19:08:36 2025 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon Jul  7 07:33:35 2025 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=af85b2f9

sci-ml/caffe2: fix compilation with aotriton, without composable_kernel

Closes: https://bugs.gentoo.org/959580
Closes: https://bugs.gentoo.org/956674
Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Part-of: https://github.com/gentoo/gentoo/pull/42907
Closes: https://github.com/gentoo/gentoo/pull/42907
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 profiles/features/musl/package.use.mask            |  4 +++
 sci-ml/caffe2/Manifest                             |  1 -
 ...ffe2-2.7.1-r1.ebuild => caffe2-2.7.1-r2.ebuild} | 24 +++++++-------
 sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch   | 38 ++++++++++++++++++++++
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/profiles/features/musl/package.use.mask 
b/profiles/features/musl/package.use.mask
index 781a9be8b80e..c8fdc17aee36 100644
--- a/profiles/features/musl/package.use.mask
+++ b/profiles/features/musl/package.use.mask
@@ -1,6 +1,10 @@
 # Copyright 1999-2025 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
+# Sv. Lockal <[email protected]> (2025-07-07)
+# sci-libs/aotriton-bin is masked on musl
+sci-ml/caffe2 memefficient
+
 # Alfred Wingate <[email protected]> (2025-05-17)
 # Tests cannot be built on musl due to libc specific ifdefs (bug #836710)
 net-mail/cyrus-imapd test

diff --git a/sci-ml/caffe2/Manifest b/sci-ml/caffe2/Manifest
index 9510577bc397..fda2a4a29e01 100644
--- a/sci-ml/caffe2/Manifest
+++ b/sci-ml/caffe2/Manifest
@@ -1,4 +1,3 @@
-DIST aotriton-0.9.2b-manylinux_2_28_x86_64-rocm6.3-shared.tar.gz 444786966 
BLAKE2B 
38ebf7edd1686d137bf70022a50ce2b7c060eb8420fe11e406074d3531e84538f4aeb198f1aafe34abf3f5b1262f3edd6e81d204eb84ec320f8ba8180387313b
 SHA512 
b0d6d25ae4be9272d43bd80fa9d0178b27f5feb1b83bfed50e87c7efedfdd66a18981d0f3fabf8087b1c476d9937eaa80d462dea26502b24702134145bd09394
 DIST composable_kernel-50ee4267.tar.gz 4194795 BLAKE2B 
b3c97d98a0c9e4620fdae3d30006edf55cc60ffa7f8518f6acb8d808647bc4de362c2e2b7e974686503fa2c7f359b6981cfbda74e40cc1bad4d351c5d2ff92e1
 SHA512 
9fc6f5f15556f020414b4567520329ef762209a82411a246c2bc1240a9fed2669f7fcb982cf773e3e9561bf9a2c557dba82b8b469d2e5844e679e2f5ab7c3e17
 DIST composable_kernel-8086bbe3.tar.gz 4418862 BLAKE2B 
b710e3d4586899443ec01044dad19fd2f992c351e2f65ba526dfcc47cc65c095beaf8ac21a8f71c02a0eb524d364e817b27241a9198884f2bdae9924b51e24e4
 SHA512 
8410b5a1c864d71f3034ef0d9d1245078856d09cc191faec59856c229bf11d89ae291036d735cb5cec4f1d72e6e9e8f6921833147f9619d30cfab8722d3a9f63
 DIST flash-attention-2.7.4.gh.tar.gz 5841323 BLAKE2B 
432999d763f2b3d732580ddfea5d3e01370351db0656546259a5e500a07516dd03c98828bfb55855dabe4adc651033b5d97ea4725ca46158b9970f0fbc662710
 SHA512 
05a4afb09e666f7404d6a3f8b5256e7bed6eba60a6f1bde2b7dbb96d318975f0b458c2521c7a38d88e97b6e4c27f29077cf787849daf82586e33f43a3d9a84b3

diff --git a/sci-ml/caffe2/caffe2-2.7.1-r1.ebuild 
b/sci-ml/caffe2/caffe2-2.7.1-r2.ebuild
similarity index 96%
rename from sci-ml/caffe2/caffe2-2.7.1-r1.ebuild
rename to sci-ml/caffe2/caffe2-2.7.1-r2.ebuild
index f06f1e769a20..4ccb6c07061c 100644
--- a/sci-ml/caffe2/caffe2-2.7.1-r1.ebuild
+++ b/sci-ml/caffe2/caffe2-2.7.1-r2.ebuild
@@ -31,11 +31,6 @@ SRC_URI="
        rocm? (
                
https://github.com/ROCm/composable_kernel/archive/${CK_COMMIT}.tar.gz
                -> ${CK_P}.tar.gz
-               memefficient? (
-                       amd64? (
-                               
https://github.com/ROCm/${AOTRITON_PN}/releases/download/${AOTRITON_PV}/${AOTRITON_tar}
-                       )
-               )
        )
        flash? (
                
https://github.com/Dao-AILab/${FLASH_PN}/archive/refs/tags/v${FLASH_PV}.tar.gz
@@ -111,6 +106,7 @@ RDEPEND="
                >=sci-libs/miopen-6.1    <sci-libs/miopen-6.5
                >=sci-libs/rocPRIM-6.1   <sci-libs/rocPRIM-6.5
                >=sci-libs/rocThrust-6.1 <sci-libs/rocThrust-6.5
+               memefficient? ( sci-libs/aotriton-bin:0/0.9 )
        )
        distributed? (
                sci-ml/tensorpipe[cuda?]
@@ -150,6 +146,7 @@ PATCHES=(
        "${FILESDIR}"/${PN}-2.7.0-cmake.patch
        "${FILESDIR}"/${PN}-2.7.0-glog-0.7.1.patch
        "${FILESDIR}"/${PN}-2.7.0-llvm.patch
+       "${FILESDIR}"/${PN}-2.7.1-ck-config.patch
 )
 
 src_prepare() {
@@ -186,6 +183,12 @@ src_prepare() {
                cmake/Dependencies.cmake \
                || die
 
+       # Change libaotriton path
+       sed -i \
+               -e "s|}/lib|}/$(get_libdir)|g" \
+               cmake/External/aotriton.cmake \
+               || die
+
        # Noisy warnings from Logging.h
        sed -i 's/-Wextra-semi//' cmake/public/utils.cmake || die
 
@@ -193,13 +196,6 @@ src_prepare() {
        pushd torch/csrc/jit/serialization || die
        flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die
        popd
-       if use rocm && use memefficient; then
-               mkdir -p "${BUILD_DIR}"/aotriton_external-prefix/src || die
-               rm -rf "${WORKDIR}"/aotriton
-               if use amd64; then
-                       cp "${DISTDIR}"/${AOTRITON_tar} 
"${BUILD_DIR}"/aotriton_external-prefix/src || die
-               fi
-       fi
 
        # prefixify the hardcoded paths, after all patches are applied
        hprefixify \
@@ -328,6 +324,10 @@ src_configure() {
        elif use rocm; then
                export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
 
+               if use memefficient; then
+                       export AOTRITON_INSTALLED_PREFIX="${ESYSROOT}/usr"
+               fi
+
                mycmakeargs+=(
                        -DUSE_NCCL=ON
                        -DUSE_SYSTEM_NCCL=ON

diff --git a/sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch 
b/sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch
new file mode 100644
index 000000000000..8e77ab0ef465
--- /dev/null
+++ b/sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch
@@ -0,0 +1,38 @@
+Use generated CK config.h rather than system
+
+Upstream commit: 
https://github.com/pytorch/pytorch/commit/38e81a53324146d445a81eb8f80bccebe623eb35
+--- a/aten/src/ATen/CMakeLists.txt
++++ b/aten/src/ATen/CMakeLists.txt
+@@ -343,9 +343,32 @@ if(USE_CUDA)
+ endif()
+ 
+ if(USE_ROCM)
++  # NOTE: The PyTorch build does not actually add_subdirectory
++  # third_party/composable_kernel or use it as a CMake library. What is used
++  # is header only, so this should be ok, except that the CMake build 
generates
++  # a ck/config.h. We just do that part here. Without this, the ck.h from the
++  # ROCM SDK may get accidentally used instead.
++  function(_pytorch_rocm_generate_ck_conf)
++    set(CK_ENABLE_INT8 "ON")
++    set(CK_ENABLE_FP16 "ON")
++    set(CK_ENABLE_FP32 "ON")
++    set(CK_ENABLE_FP64 "ON")
++    set(CK_ENABLE_BF16 "ON")
++    set(CK_ENABLE_FP8 "ON")
++    set(CK_ENABLE_BF8 "ON")
++    set(CK_USE_XDL "ON")
++    set(CK_USE_WMMA "ON")
++    configure_file(
++      
"${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
++      "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
++      )
++  endfunction()
+   list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
+   list(APPEND ATen_HIP_INCLUDE 
${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
+   list(APPEND ATen_HIP_INCLUDE 
${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
++  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
++  _pytorch_rocm_generate_ck_conf()
++
+   # Next two lines are needed because TunableOp uses third-party/fmt
+   list(APPEND ATen_HIP_INCLUDE 
$<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
+   list(APPEND ATen_HIP_DEPENDENCY_LIBS fmt::fmt-header-only)

Reply via email to