commit: af85b2f95937f3d18b127652867c06482d1cbd58 Author: Sv. Lockal <lockalsash <AT> gmail <DOT> com> AuthorDate: Sun Jul 6 19:08:36 2025 +0000 Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org> CommitDate: Mon Jul 7 07:33:35 2025 +0000 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=af85b2f9
sci-ml/caffe2: fix compilation with aotriton, without composable_kernel Closes: https://bugs.gentoo.org/959580 Closes: https://bugs.gentoo.org/956674 Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com> Part-of: https://github.com/gentoo/gentoo/pull/42907 Closes: https://github.com/gentoo/gentoo/pull/42907 Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org> profiles/features/musl/package.use.mask | 4 +++ sci-ml/caffe2/Manifest | 1 - ...ffe2-2.7.1-r1.ebuild => caffe2-2.7.1-r2.ebuild} | 24 +++++++------- sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch | 38 ++++++++++++++++++++++ 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/profiles/features/musl/package.use.mask b/profiles/features/musl/package.use.mask index 781a9be8b80e..c8fdc17aee36 100644 --- a/profiles/features/musl/package.use.mask +++ b/profiles/features/musl/package.use.mask @@ -1,6 +1,10 @@ # Copyright 1999-2025 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 +# Sv. Lockal <[email protected]> (2025-07-07) +# sci-libs/aotriton-bin is masked on musl +sci-ml/caffe2 memefficient + # Alfred Wingate <[email protected]> (2025-05-17) # Tests cannot be built on musl due to libc specific ifdefs (bug #836710) net-mail/cyrus-imapd test diff --git a/sci-ml/caffe2/Manifest b/sci-ml/caffe2/Manifest index 9510577bc397..fda2a4a29e01 100644 --- a/sci-ml/caffe2/Manifest +++ b/sci-ml/caffe2/Manifest @@ -1,4 +1,3 @@ -DIST aotriton-0.9.2b-manylinux_2_28_x86_64-rocm6.3-shared.tar.gz 444786966 BLAKE2B 38ebf7edd1686d137bf70022a50ce2b7c060eb8420fe11e406074d3531e84538f4aeb198f1aafe34abf3f5b1262f3edd6e81d204eb84ec320f8ba8180387313b SHA512 b0d6d25ae4be9272d43bd80fa9d0178b27f5feb1b83bfed50e87c7efedfdd66a18981d0f3fabf8087b1c476d9937eaa80d462dea26502b24702134145bd09394 DIST composable_kernel-50ee4267.tar.gz 4194795 BLAKE2B b3c97d98a0c9e4620fdae3d30006edf55cc60ffa7f8518f6acb8d808647bc4de362c2e2b7e974686503fa2c7f359b6981cfbda74e40cc1bad4d351c5d2ff92e1 SHA512 9fc6f5f15556f020414b4567520329ef762209a82411a246c2bc1240a9fed2669f7fcb982cf773e3e9561bf9a2c557dba82b8b469d2e5844e679e2f5ab7c3e17 DIST composable_kernel-8086bbe3.tar.gz 4418862 BLAKE2B b710e3d4586899443ec01044dad19fd2f992c351e2f65ba526dfcc47cc65c095beaf8ac21a8f71c02a0eb524d364e817b27241a9198884f2bdae9924b51e24e4 SHA512 8410b5a1c864d71f3034ef0d9d1245078856d09cc191faec59856c229bf11d89ae291036d735cb5cec4f1d72e6e9e8f6921833147f9619d30cfab8722d3a9f63 DIST flash-attention-2.7.4.gh.tar.gz 5841323 BLAKE2B 432999d763f2b3d732580ddfea5d3e01370351db0656546259a5e500a07516dd03c98828bfb55855dabe4adc651033b5d97ea4725ca46158b9970f0fbc662710 SHA512 05a4afb09e666f7404d6a3f8b5256e7bed6eba60a6f1bde2b7dbb96d318975f0b458c2521c7a38d88e97b6e4c27f29077cf787849daf82586e33f43a3d9a84b3 diff --git a/sci-ml/caffe2/caffe2-2.7.1-r1.ebuild b/sci-ml/caffe2/caffe2-2.7.1-r2.ebuild similarity index 96% rename from sci-ml/caffe2/caffe2-2.7.1-r1.ebuild rename to sci-ml/caffe2/caffe2-2.7.1-r2.ebuild index f06f1e769a20..4ccb6c07061c 100644 --- a/sci-ml/caffe2/caffe2-2.7.1-r1.ebuild +++ b/sci-ml/caffe2/caffe2-2.7.1-r2.ebuild @@ -31,11 +31,6 @@ SRC_URI=" rocm? ( https://github.com/ROCm/composable_kernel/archive/${CK_COMMIT}.tar.gz -> ${CK_P}.tar.gz - memefficient? ( - amd64? ( - https://github.com/ROCm/${AOTRITON_PN}/releases/download/${AOTRITON_PV}/${AOTRITON_tar} - ) - ) ) flash? ( https://github.com/Dao-AILab/${FLASH_PN}/archive/refs/tags/v${FLASH_PV}.tar.gz @@ -111,6 +106,7 @@ RDEPEND=" >=sci-libs/miopen-6.1 <sci-libs/miopen-6.5 >=sci-libs/rocPRIM-6.1 <sci-libs/rocPRIM-6.5 >=sci-libs/rocThrust-6.1 <sci-libs/rocThrust-6.5 + memefficient? ( sci-libs/aotriton-bin:0/0.9 ) ) distributed? ( sci-ml/tensorpipe[cuda?] @@ -150,6 +146,7 @@ PATCHES=( "${FILESDIR}"/${PN}-2.7.0-cmake.patch "${FILESDIR}"/${PN}-2.7.0-glog-0.7.1.patch "${FILESDIR}"/${PN}-2.7.0-llvm.patch + "${FILESDIR}"/${PN}-2.7.1-ck-config.patch ) src_prepare() { @@ -186,6 +183,12 @@ src_prepare() { cmake/Dependencies.cmake \ || die + # Change libaotriton path + sed -i \ + -e "s|}/lib|}/$(get_libdir)|g" \ + cmake/External/aotriton.cmake \ + || die + # Noisy warnings from Logging.h sed -i 's/-Wextra-semi//' cmake/public/utils.cmake || die @@ -193,13 +196,6 @@ src_prepare() { pushd torch/csrc/jit/serialization || die flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die popd - if use rocm && use memefficient; then - mkdir -p "${BUILD_DIR}"/aotriton_external-prefix/src || die - rm -rf "${WORKDIR}"/aotriton - if use amd64; then - cp "${DISTDIR}"/${AOTRITON_tar} "${BUILD_DIR}"/aotriton_external-prefix/src || die - fi - fi # prefixify the hardcoded paths, after all patches are applied hprefixify \ @@ -328,6 +324,10 @@ src_configure() { elif use rocm; then export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)" + if use memefficient; then + export AOTRITON_INSTALLED_PREFIX="${ESYSROOT}/usr" + fi + mycmakeargs+=( -DUSE_NCCL=ON -DUSE_SYSTEM_NCCL=ON diff --git a/sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch b/sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch new file mode 100644 index 000000000000..8e77ab0ef465 --- /dev/null +++ b/sci-ml/caffe2/files/caffe2-2.7.1-ck-config.patch @@ -0,0 +1,38 @@ +Use generated CK config.h rather than system + +Upstream commit: https://github.com/pytorch/pytorch/commit/38e81a53324146d445a81eb8f80bccebe623eb35 +--- a/aten/src/ATen/CMakeLists.txt ++++ b/aten/src/ATen/CMakeLists.txt +@@ -343,9 +343,32 @@ if(USE_CUDA) + endif() + + if(USE_ROCM) ++ # NOTE: The PyTorch build does not actually add_subdirectory ++ # third_party/composable_kernel or use it as a CMake library. What is used ++ # is header only, so this should be ok, except that the CMake build generates ++ # a ck/config.h. We just do that part here. Without this, the ck.h from the ++ # ROCM SDK may get accidentally used instead. ++ function(_pytorch_rocm_generate_ck_conf) ++ set(CK_ENABLE_INT8 "ON") ++ set(CK_ENABLE_FP16 "ON") ++ set(CK_ENABLE_FP32 "ON") ++ set(CK_ENABLE_FP64 "ON") ++ set(CK_ENABLE_BF16 "ON") ++ set(CK_ENABLE_FP8 "ON") ++ set(CK_ENABLE_BF8 "ON") ++ set(CK_USE_XDL "ON") ++ set(CK_USE_WMMA "ON") ++ configure_file( ++ "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" ++ "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" ++ ) ++ endfunction() + list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip) + list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) + list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) ++ list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) ++ _pytorch_rocm_generate_ck_conf() ++ + # Next two lines are needed because TunableOp uses third-party/fmt + list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>) + list(APPEND ATen_HIP_DEPENDENCY_LIBS fmt::fmt-header-only)
