dev-cpp/llama-cpp/llama-cpp-9999.ebuild


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

# Copyright 2023 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

ROCM_VERSION=5.5

inherit cmake llvm rocm

LLVM_MAX_SLOT=16

EGIT_REPO_URI="https://github.com/ggerganov/llama.cpp.git"
inherit git-r3

DESCRIPTION="Port of Facebook's LLaMA model in C/C++"
HOMEPAGE="https://github.com/ggerganov/llama.cpp"

LICENSE="MIT"
SLOT="0"
KEYWORDS="~amd64"
IUSE="blas cublas lto tests tools rocm"
CPU_FLAGS_X86=( avx avx2 f16c )

DEPEND="blas? ( sci-libs/openblas:= )
	cublas? ( dev-util/nvidia-cuda-toolkit )
	rocm? ( sci-libs/rocBLAS )"
RDEPEND="${DEPEND}"
BDEPEND="${DEPEND}"

S="${WORKDIR}/${P}"

src_configure() {
	if use rocm ; then
		CC=/usr/lib/llvm/${LLVM_MAX_SLOT}/bin/clang
		CXX=/usr/lib/llvm/${LLVM_MAX_SLOT}/bin/clang++
		export DEVICE_LIB_PATH=/usr/lib/amdgcn/bitcode
		export HIP_DEVICE_LIB_PATH=/usr/lib/amdgcn/bitcode
	fi
	local mycmakeargs=(
		-DLLAMA_BLAS="$(usex blas)"
		-DLLAMA_CUBLAS="$(usex cublas)"
		-DLLAMA_LTO="$(usex lto)"
		-DLLAMA_BUILD_TESTS="$(usex tests)"
		-DLLAMA_HIPBLAS="$(usex rocm)"
		-DAMDGPU_TARGETS="$(get_amdgpu_flags)"
		-DLLAMA_BUILD_SERVER=OFF
		-DCMAKE_SKIP_BUILD_RPATH=ON
		-DBUILD_NUMBER="1"
	)
	if use cublas ; then
		addpredict /dev/nvidiactl
	fi
	cmake_src_configure
}

src_install() {
	doheader llama.h

	cd "${BUILD_DIR}" || die

	dolib.so libllama.so

	newbin bin/main llama-cpp

	if use tools ; then
		newbin bin/benchmark llama-cpp-benchmark
		newbin bin/perplexity llama-cpp-perplexity
		newbin bin/q8dot llama-cpp-q8dot
		newbin bin/quantize llama-cpp-quantize
		newbin bin/quantize-stats llama-cpp-quantize-stats
		newbin bin/vdot llama-cpp-vdot
	fi

}

pkg_postinst() {
	elog "The main binary has been installed as \"llama-cpp\""
}