summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'sci-libs')
-rw-r--r--sci-libs/miopen/Manifest2
-rw-r--r--sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch74
-rw-r--r--sci-libs/miopen/metadata.xml3
-rw-r--r--sci-libs/miopen/miopen-5.7.1-r1.ebuild122
4 files changed, 201 insertions, 0 deletions
diff --git a/sci-libs/miopen/Manifest b/sci-libs/miopen/Manifest
index ababb7dc3105..13af314de19a 100644
--- a/sci-libs/miopen/Manifest
+++ b/sci-libs/miopen/Manifest
@@ -1 +1,3 @@
DIST MIOpen-5.1.3.tar.gz 88118329 BLAKE2B d24722ffc5f5dab6d6a1de2ce34193ad2f25c9a2562e38c52e010a29870f01d9ea1c56970ba0601a088c8286e97958ee95d0da27fc8082126dd2ebe5ccb36b70 SHA512 a14e28cfcb12e5061e0e7b999ef3e67fa0a0e897e31bc50e7288b8a23eb1791312e33d3b697021c2b654ccc065ae1b046c1cfd77ba8e04b0f3e87e9cc0626dcd
+DIST MIOpen-5.7.1.tar.gz 100751593 BLAKE2B c5f847fe4374ab22737c281a65401125012328412d584fc09244b431ea6265d6d5028429115ee15fa8b04cbe0edd020e4e7ac8deb22561183ed76cb8c3d4d9d4 SHA512 3354b3b154f29a6337403abc5a71ec47c0b2558320c5a1b0cbfbbdb370c4fada2db12d4a19a312b5e30ca2e2302ee50ece3390603e84d132b2212a168e9523fa
+DIST gtest-1.11.0_p20210611.tar.gz 887296 BLAKE2B 8f29b7028a6dd8190a113cd93398705b23b61d88bee38beaf9dcc0dfc8a463aed7fcd3719f6f1b131d4363aa57231629aaeffa108f6558efb58416cfface6d6e SHA512 cf9e7f3fd3e31ce6677eac355fb8bfe19c5b56a8ec3af8b9417d0904cdf5da92f99f7411a08131cc9fa4fc7d38e6a71fcfac993648e47b269a74a27de7607f7a
diff --git a/sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch b/sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch
new file mode 100644
index 000000000000..859667f3da30
--- /dev/null
+++ b/sci-libs/miopen/files/miopen-5.7.1-fix-miopendriver-gemm.patch
@@ -0,0 +1,74 @@
+Fix uninitialized variable in MIOpenDriver gemm and restore gemmfp16 for testing
+Upstream bug: https://github.com/ROCmSoftwarePlatform/MIOpen/issues/2505
+--- a/driver/driver.hpp
++++ b/driver/driver.hpp
+@@ -141,7 +141,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz)
+ printf("Usage: ./driver *base_arg* *other_args*\n");
+ printf("Supported Base Arguments: conv[fp16|int8|bfp16], CBAInfer[fp16], "
+ "pool[fp16], lrn[fp16], "
+- "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm, ctc, dropout[fp16], "
++ "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm[fp16], ctc, dropout[fp16], "
+ "tensorop[fp16], reduce[fp16,fp64]\n");
+ exit(0); // NOLINT (concurrency-mt-unsafe)
+ }
+@@ -160,7 +160,7 @@ inline std::string ParseBaseArg(int argc, char* argv[])
+ arg != "CBAInfer" && arg != "CBAInferfp16" && arg != "pool" && arg != "poolfp16" &&
+ arg != "lrn" && arg != "lrnfp16" && arg != "activ" && arg != "activfp16" &&
+ arg != "softmax" && arg != "softmaxfp16" && arg != "bnorm" && arg != "bnormfp16" &&
+- arg != "rnn" && arg != "rnnfp16" && arg != "gemm" /*&& arg != "gemmfp16"*/ && arg != "ctc" &&
++ arg != "rnn" && arg != "rnnfp16" && arg != "gemm" && arg != "gemmfp16" && arg != "ctc" &&
+ arg != "dropout" && arg != "dropoutfp16" && arg != "tensorop" && arg != "tensoropfp16" &&
+ arg != "reduce" && arg != "reducefp16" && arg != "reducefp64" && arg != "--version")
+ {
+--- a/driver/gemm_driver.hpp
++++ b/driver/gemm_driver.hpp
+@@ -207,6 +207,19 @@ int GemmDriver<T>::GetandSetData()
+ gemm_desc.strideB = gemm_desc.k * gemm_desc.n;
+ gemm_desc.strideC = gemm_desc.m * gemm_desc.n;
+
++ if constexpr (std::is_same_v<T, float>)
++ {
++ gemm_desc.dataType = miopenFloat;
++ }
++ else if constexpr (std::is_same_v<T, float16>)
++ {
++ gemm_desc.dataType = miopenHalf;
++ }
++ else
++ {
++ static_assert(!"unsupported type");
++ }
++
+ return (0);
+ }
+
+@@ -230,9 +243,9 @@ int GemmDriver<T>::AllocateBuffersAndCopy()
+ a = std::vector<T>(a_sz);
+ b = std::vector<T>(b_sz);
+ #if GEMM_DRIVER_DEBUG
+- c = std::vector<T>(c_sz, 1.);
++ c = std::vector<T>(c_sz, static_cast<T>(1.));
+ #else
+- c = std::vector<T>(c_sz, 0.);
++ c = std::vector<T>(c_sz, static_cast<T>(0.));
+ #endif
+ chost = c;
+
+--- a/driver/main.cpp
++++ b/driver/main.cpp
+@@ -125,11 +125,10 @@ int main(int argc, char* argv[])
+ {
+ drv = new GemmDriver<float>();
+ }
+-// TODO half is not supported in gemm
+-// else if(base_arg == "gemmfp16")
+-// {
+-// drv = new GemmDriver<float16>();
+-// }
++ else if(base_arg == "gemmfp16")
++ {
++ drv = new GemmDriver<float16>();
++ }
+ #endif
+ else if(base_arg == "bnorm")
+ {
diff --git a/sci-libs/miopen/metadata.xml b/sci-libs/miopen/metadata.xml
index e74d41fd607c..08c299887077 100644
--- a/sci-libs/miopen/metadata.xml
+++ b/sci-libs/miopen/metadata.xml
@@ -12,4 +12,7 @@
<email>xgreenlandforwyy@gmail.com</email>
<name>Yiyang Wu</name>
</maintainer>
+ <upstream>
+ <remote-id type="github">ROCmSoftwarePlatform/MIOpen</remote-id>
+ </upstream>
</pkgmetadata>
diff --git a/sci-libs/miopen/miopen-5.7.1-r1.ebuild b/sci-libs/miopen/miopen-5.7.1-r1.ebuild
new file mode 100644
index 000000000000..93bcde8190ae
--- /dev/null
+++ b/sci-libs/miopen/miopen-5.7.1-r1.ebuild
@@ -0,0 +1,122 @@
+# Copyright 1999-2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+ROCM_VERSION=${PV}
+
+inherit cmake flag-o-matic llvm rocm
+
+GTEST_COMMIT="e2239ee6043f73722e7aa812a459f54a28552929"
+GTEST_FILE="gtest-1.11.0_p20210611.tar.gz"
+
+LLVM_MAX_SLOT=17
+
+DESCRIPTION="AMD's Machine Intelligence Library"
+HOMEPAGE="https://github.com/ROCmSoftwarePlatform/MIOpen"
+
+SRC_URI="https://github.com/ROCmSoftwarePlatform/MIOpen/archive/rocm-${PV}.tar.gz -> MIOpen-${PV}.tar.gz
+ test? ( https://github.com/google/googletest/archive/${GTEST_COMMIT}.tar.gz -> ${GTEST_FILE} )"
+
+LICENSE="MIT"
+KEYWORDS="~amd64"
+SLOT="0/$(ver_cut 1-2)"
+
+IUSE="debug test"
+RESTRICT="!test? ( test )"
+
+RDEPEND="
+ dev-util/hip
+ >=dev-db/sqlite-3.17
+ sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}]
+ sci-libs/composable-kernel:${SLOT}[${ROCM_USEDEP}]
+ >=dev-libs/boost-1.72
+ dev-cpp/nlohmann_json
+ dev-cpp/frugally-deep
+"
+
+DEPEND="${RDEPEND}"
+
+BDEPEND="dev-libs/half:0/1
+ dev-build/rocm-cmake
+"
+
+S="${WORKDIR}/MIOpen-rocm-${PV}"
+
+PATCHES=(
+ "${FILESDIR}/${PN}-4.2.0-disable-no-inline-boost.patch"
+ "${FILESDIR}/${PN}-4.2.0-gcc11-numeric_limits.patch"
+ "${FILESDIR}/${PN}-4.3.0-fix-interface-include-in-HIP_COMPILER_FLAGS.patch"
+ "${FILESDIR}/${PN}-4.3.0-enable-test.patch"
+ "${FILESDIR}/${PN}-5.1.3-no-strip.patch"
+ "${FILESDIR}/${PN}-5.1.3-include-array.patch"
+ "${FILESDIR}/${PN}-5.7.1-fix-miopendriver-gemm.patch"
+)
+
+src_prepare() {
+ cmake_src_prepare
+
+ sed -e "s:/opt/rocm/llvm:$(get_llvm_prefix ${LLVM_MAX_SLOT}) NO_DEFAULT_PATH:" \
+ -e "s:/opt/rocm/hip:$(hipconfig -p) NO_DEFAULT_PATH:" \
+ -e '/set( MIOPEN_INSTALL_DIR/s:miopen:${CMAKE_INSTALL_PREFIX}:' \
+ -e '/MIOPEN_TIDY_ERRORS ALL/d' \
+ -e 's:find_program(UNZIPPER lbunzip2 bunzip2):find_program(UNZIPPER NAMES lbunzip2 bunzip2):' \
+ -i CMakeLists.txt || die
+
+ sed -e "/add_test/s:--build \${CMAKE_CURRENT_BINARY_DIR}:--build ${BUILD_DIR}:" \
+ -i test/CMakeLists.txt || die
+
+ sed -e "s:\${PROJECT_BINARY_DIR}/miopen/include:\${PROJECT_BINARY_DIR}/include:" \
+ -i src/CMakeLists.txt || die
+
+ sed -e "s:\${AMD_DEVICE_LIBS_PREFIX}/lib:${EPREFIX}/usr/lib/amdgcn/bitcode:" -i cmake/hip-config.cmake || die
+}
+
+src_configure() {
+ if ! use debug; then
+ append-cflags "-DNDEBUG"
+ append-cxxflags "-DNDEBUG"
+ CMAKE_BUILD_TYPE="Release"
+ else
+ CMAKE_BUILD_TYPE="Debug"
+ fi
+
+ local mycmakeargs=(
+ -DCMAKE_SKIP_RPATH=ON
+ -DAMDGPU_TARGETS="$(get_amdgpu_flags)"
+ -DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr"
+ -DMIOPEN_BACKEND=HIP
+ -DBoost_USE_STATIC_LIBS=OFF
+ -DMIOPEN_USE_MLIR=OFF
+ -DBUILD_TESTS=$(usex test ON OFF)
+ -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF
+ -DROCM_SYMLINK_LIBS=OFF
+ )
+
+ if use test; then
+ mycmakeargs+=(
+ -DMIOPEN_TEST_ALL=ON
+ -DBUILD_TESTING=ON
+ -DMIOPEN_TEST_GDB=OFF
+ -DGOOGLETEST_DIR="${WORKDIR}/googletest-${GTEST_COMMIT}"
+ )
+ for gpu_target in ${AMDGPU_TARGETS}; do
+ mycmakeargs+=(-DMIOPEN_TEST_${gpu_target^^}=ON )
+ done
+ fi
+
+ addpredict /dev/kfd
+ addpredict /dev/dri/
+ append-cxxflags "--rocm-path=$(hipconfig -R)"
+ append-cxxflags "--hip-device-lib-path=${EPREFIX}/usr/lib/amdgcn/bitcode"
+ CXX="$(get_llvm_prefix ${LLVM_MAX_SLOT})/bin/clang++" cmake_src_configure
+}
+
+src_test() {
+ check_amdgpu
+ LD_LIBRARY_PATH="${BUILD_DIR}"/lib cmake_src_test -j1
+}
+
+src_install() {
+ cmake_src_install
+}