Commit
·
61b850a
1
Parent(s):
7a6d9d7
llama.cpp
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- llama.cpp/.clang-format +161 -0
- llama.cpp/.clang-tidy +26 -0
- llama.cpp/.devops/cloud-v-pipeline +22 -0
- llama.cpp/.devops/cpu.Dockerfile +92 -0
- llama.cpp/.devops/cuda.Dockerfile +94 -0
- llama.cpp/.devops/intel.Dockerfile +91 -0
- llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
- llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
- llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
- llama.cpp/.devops/musa.Dockerfile +108 -0
- llama.cpp/.devops/nix/apps.nix +21 -0
- llama.cpp/.devops/nix/devshells.nix +52 -0
- llama.cpp/.devops/nix/docker.nix +37 -0
- llama.cpp/.devops/nix/jetson-support.nix +39 -0
- llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
- llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
- llama.cpp/.devops/nix/package.nix +247 -0
- llama.cpp/.devops/nix/python-scripts.nix +66 -0
- llama.cpp/.devops/nix/scope.nix +41 -0
- llama.cpp/.devops/nix/sif.nix +27 -0
- llama.cpp/.devops/rocm.Dockerfile +113 -0
- llama.cpp/.devops/tools.sh +49 -0
- llama.cpp/.devops/vulkan.Dockerfile +89 -0
- llama.cpp/.dockerignore +20 -0
- llama.cpp/.ecrc +6 -0
- llama.cpp/.editorconfig +50 -0
- llama.cpp/.flake8 +17 -0
- llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +87 -0
- llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
- llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +91 -0
- llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
- llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
- llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
- llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
- llama.cpp/.github/labeler.yml +86 -0
- llama.cpp/.github/pull_request_template.md +1 -0
- llama.cpp/.github/workflows/bench.yml.disabled +315 -0
- llama.cpp/.github/workflows/build.yml +1645 -0
- llama.cpp/.github/workflows/close-issue.yml +28 -0
- llama.cpp/.github/workflows/docker.yml +173 -0
- llama.cpp/.github/workflows/editorconfig.yml +29 -0
- llama.cpp/.github/workflows/gguf-publish.yml +44 -0
- llama.cpp/.github/workflows/labeler.yml +17 -0
- llama.cpp/.github/workflows/python-check-requirements.yml +33 -0
- llama.cpp/.github/workflows/python-lint.yml +30 -0
- llama.cpp/.github/workflows/python-type-check.yml +40 -0
- llama.cpp/.github/workflows/server.yml +239 -0
- llama.cpp/.gitignore +145 -0
- llama.cpp/.gitmodules +3 -0
.gitattributes
CHANGED
|
@@ -81,3 +81,4 @@ llama.cpp/build/bin/test-json-schema-to-grammar filter=lfs diff=lfs merge=lfs -t
|
|
| 81 |
llama.cpp/build/bin/test-tokenizer-0 filter=lfs diff=lfs merge=lfs -text
|
| 82 |
llama.cpp/build/bin/test-tokenizer-1-bpe filter=lfs diff=lfs merge=lfs -text
|
| 83 |
llama.cpp/build/bin/test-tokenizer-1-spm filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 81 |
llama.cpp/build/bin/test-tokenizer-0 filter=lfs diff=lfs merge=lfs -text
|
| 82 |
llama.cpp/build/bin/test-tokenizer-1-bpe filter=lfs diff=lfs merge=lfs -text
|
| 83 |
llama.cpp/build/bin/test-tokenizer-1-spm filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
llama.cpp/models/*.gguf filter=lfs diff=lfs merge=lfs -text
|
llama.cpp/.clang-format
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
Language: Cpp
|
| 3 |
+
AlignAfterOpenBracket: Align
|
| 4 |
+
AlignArrayOfStructures: Left
|
| 5 |
+
AlignConsecutiveAssignments: AcrossComments
|
| 6 |
+
AlignConsecutiveBitFields: AcrossComments
|
| 7 |
+
AlignConsecutiveDeclarations: AcrossComments
|
| 8 |
+
AlignConsecutiveMacros: AcrossComments
|
| 9 |
+
# AlignConsecutiveShortCaseStatements: AcrossComments
|
| 10 |
+
AlignEscapedNewlines: Left # LeftWithLastLine
|
| 11 |
+
AlignOperands: Align
|
| 12 |
+
AlignTrailingComments:
|
| 13 |
+
Kind: Always
|
| 14 |
+
OverEmptyLines: 1
|
| 15 |
+
AllowAllArgumentsOnNextLine: true
|
| 16 |
+
AllowAllParametersOfDeclarationOnNextLine: false
|
| 17 |
+
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
| 18 |
+
AllowShortBlocksOnASingleLine: Never
|
| 19 |
+
AllowShortCaseLabelsOnASingleLine: false
|
| 20 |
+
AllowShortFunctionsOnASingleLine: Inline
|
| 21 |
+
AllowShortIfStatementsOnASingleLine: Never
|
| 22 |
+
AllowShortLambdasOnASingleLine: Inline
|
| 23 |
+
AllowShortLoopsOnASingleLine: false
|
| 24 |
+
AlwaysBreakBeforeMultilineStrings: true
|
| 25 |
+
BinPackArguments: true
|
| 26 |
+
BinPackParameters: true # OnePerLine
|
| 27 |
+
BitFieldColonSpacing: Both
|
| 28 |
+
BreakBeforeBraces: Custom # Attach
|
| 29 |
+
BraceWrapping:
|
| 30 |
+
AfterCaseLabel: true
|
| 31 |
+
AfterClass: false
|
| 32 |
+
AfterControlStatement: false
|
| 33 |
+
AfterEnum: false
|
| 34 |
+
AfterFunction: false
|
| 35 |
+
AfterNamespace: false
|
| 36 |
+
AfterObjCDeclaration: false
|
| 37 |
+
AfterStruct: false
|
| 38 |
+
AfterUnion: false
|
| 39 |
+
AfterExternBlock: false
|
| 40 |
+
BeforeCatch: false
|
| 41 |
+
BeforeElse: false
|
| 42 |
+
BeforeLambdaBody: false
|
| 43 |
+
BeforeWhile: false
|
| 44 |
+
IndentBraces: false
|
| 45 |
+
SplitEmptyFunction: false
|
| 46 |
+
SplitEmptyRecord: false
|
| 47 |
+
SplitEmptyNamespace: false
|
| 48 |
+
# BreakAdjacentStringLiterals: true
|
| 49 |
+
BreakAfterAttributes: Never
|
| 50 |
+
BreakBeforeBinaryOperators: None
|
| 51 |
+
BreakBeforeInlineASMColon: OnlyMultiline
|
| 52 |
+
BreakBeforeTernaryOperators: false
|
| 53 |
+
# BreakBinaryOperations: Never
|
| 54 |
+
BreakConstructorInitializers: AfterColon
|
| 55 |
+
# BreakFunctionDefinitionParameters: false
|
| 56 |
+
BreakInheritanceList: AfterComma
|
| 57 |
+
BreakStringLiterals: true
|
| 58 |
+
# BreakTemplateDeclarations: Yes
|
| 59 |
+
ColumnLimit: 120
|
| 60 |
+
CommentPragmas: '^ IWYU pragma:'
|
| 61 |
+
CompactNamespaces: false
|
| 62 |
+
ConstructorInitializerIndentWidth: 4
|
| 63 |
+
ContinuationIndentWidth: 4
|
| 64 |
+
Cpp11BracedListStyle: false
|
| 65 |
+
DerivePointerAlignment: false
|
| 66 |
+
DisableFormat: false
|
| 67 |
+
EmptyLineBeforeAccessModifier: Leave
|
| 68 |
+
EmptyLineAfterAccessModifier: Never
|
| 69 |
+
ExperimentalAutoDetectBinPacking: false
|
| 70 |
+
FixNamespaceComments: true
|
| 71 |
+
IncludeBlocks: Regroup
|
| 72 |
+
IncludeCategories:
|
| 73 |
+
- Regex: '^<.*\.h>'
|
| 74 |
+
Priority: 1
|
| 75 |
+
SortPriority: 0
|
| 76 |
+
- Regex: '^<.*'
|
| 77 |
+
Priority: 2
|
| 78 |
+
SortPriority: 0
|
| 79 |
+
- Regex: '.*'
|
| 80 |
+
Priority: 3
|
| 81 |
+
SortPriority: 0
|
| 82 |
+
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
| 83 |
+
IncludeIsMainSourceRegex: ''
|
| 84 |
+
IndentAccessModifiers: false
|
| 85 |
+
IndentCaseBlocks: true
|
| 86 |
+
IndentCaseLabels: true
|
| 87 |
+
IndentExternBlock: NoIndent
|
| 88 |
+
IndentGotoLabels: false
|
| 89 |
+
IndentPPDirectives: AfterHash
|
| 90 |
+
IndentWidth: 4
|
| 91 |
+
IndentWrappedFunctionNames: false
|
| 92 |
+
InsertBraces: true # NOTE: may lead to incorrect formatting
|
| 93 |
+
InsertNewlineAtEOF: true
|
| 94 |
+
JavaScriptQuotes: Leave
|
| 95 |
+
JavaScriptWrapImports: true
|
| 96 |
+
KeepEmptyLinesAtTheStartOfBlocks: false
|
| 97 |
+
LambdaBodyIndentation: Signature
|
| 98 |
+
LineEnding: LF
|
| 99 |
+
MacroBlockBegin: ''
|
| 100 |
+
MacroBlockEnd: ''
|
| 101 |
+
MaxEmptyLinesToKeep: 1
|
| 102 |
+
NamespaceIndentation: None
|
| 103 |
+
ObjCBinPackProtocolList: Auto
|
| 104 |
+
ObjCBlockIndentWidth: 4
|
| 105 |
+
ObjCSpaceAfterProperty: true
|
| 106 |
+
ObjCSpaceBeforeProtocolList: true
|
| 107 |
+
PPIndentWidth: -1
|
| 108 |
+
PackConstructorInitializers: CurrentLine
|
| 109 |
+
PenaltyBreakAssignment: 2
|
| 110 |
+
PenaltyBreakBeforeFirstCallParameter: 1
|
| 111 |
+
PenaltyBreakComment: 300
|
| 112 |
+
PenaltyBreakFirstLessLess: 120
|
| 113 |
+
PenaltyBreakString: 1000
|
| 114 |
+
PenaltyBreakTemplateDeclaration: 10
|
| 115 |
+
PenaltyExcessCharacter: 1000000
|
| 116 |
+
PenaltyReturnTypeOnItsOwnLine: 200
|
| 117 |
+
PointerAlignment: Middle
|
| 118 |
+
QualifierAlignment: Left
|
| 119 |
+
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
| 120 |
+
RawStringFormats:
|
| 121 |
+
- Language: Cpp
|
| 122 |
+
Delimiters:
|
| 123 |
+
- cc
|
| 124 |
+
- CC
|
| 125 |
+
- cpp
|
| 126 |
+
- Cpp
|
| 127 |
+
- CPP
|
| 128 |
+
- 'c++'
|
| 129 |
+
- 'C++'
|
| 130 |
+
CanonicalDelimiter: ''
|
| 131 |
+
ReferenceAlignment: Middle
|
| 132 |
+
ReflowComments: false # IndentOnly
|
| 133 |
+
SeparateDefinitionBlocks: Always
|
| 134 |
+
SortIncludes: CaseInsensitive
|
| 135 |
+
SortUsingDeclarations: LexicographicNumeric
|
| 136 |
+
SpaceAfterCStyleCast: true
|
| 137 |
+
SpaceAfterLogicalNot: false
|
| 138 |
+
SpaceAfterTemplateKeyword: true
|
| 139 |
+
SpaceBeforeAssignmentOperators: true
|
| 140 |
+
SpaceBeforeCpp11BracedList: false
|
| 141 |
+
SpaceBeforeCtorInitializerColon: true
|
| 142 |
+
SpaceBeforeInheritanceColon: true
|
| 143 |
+
SpaceBeforeParens: ControlStatements
|
| 144 |
+
SpaceBeforeRangeBasedForLoopColon: true
|
| 145 |
+
SpaceInEmptyBlock: false
|
| 146 |
+
SpaceInEmptyParentheses: false
|
| 147 |
+
SpacesBeforeTrailingComments: 2
|
| 148 |
+
SpacesInAngles: Never
|
| 149 |
+
SpacesInContainerLiterals: true
|
| 150 |
+
SpacesInLineCommentPrefix:
|
| 151 |
+
Minimum: 1
|
| 152 |
+
Maximum: -1
|
| 153 |
+
SpacesInParentheses: false
|
| 154 |
+
SpacesInSquareBrackets: false
|
| 155 |
+
SpaceBeforeSquareBrackets: false
|
| 156 |
+
Standard: c++17
|
| 157 |
+
TabWidth: 4
|
| 158 |
+
UseTab: Never
|
| 159 |
+
WhitespaceSensitiveMacros: ['STRINGIZE']
|
| 160 |
+
...
|
| 161 |
+
|
llama.cpp/.clang-tidy
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
Checks: >
|
| 3 |
+
bugprone-*,
|
| 4 |
+
-bugprone-easily-swappable-parameters,
|
| 5 |
+
-bugprone-implicit-widening-of-multiplication-result,
|
| 6 |
+
-bugprone-misplaced-widening-cast,
|
| 7 |
+
-bugprone-narrowing-conversions,
|
| 8 |
+
readability-*,
|
| 9 |
+
-readability-avoid-unconditional-preprocessor-if,
|
| 10 |
+
-readability-function-cognitive-complexity,
|
| 11 |
+
-readability-identifier-length,
|
| 12 |
+
-readability-implicit-bool-conversion,
|
| 13 |
+
-readability-magic-numbers,
|
| 14 |
+
-readability-uppercase-literal-suffix,
|
| 15 |
+
-readability-simplify-boolean-expr,
|
| 16 |
+
clang-analyzer-*,
|
| 17 |
+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
| 18 |
+
performance-*,
|
| 19 |
+
portability-*,
|
| 20 |
+
-portability-simd-intrinsics,
|
| 21 |
+
misc-*,
|
| 22 |
+
-misc-const-correctness,
|
| 23 |
+
-misc-non-private-member-variables-in-classes,
|
| 24 |
+
-misc-no-recursion,
|
| 25 |
+
-misc-use-anonymous-namespace,
|
| 26 |
+
FormatStyle: none
|
llama.cpp/.devops/cloud-v-pipeline
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
|
| 2 |
+
stage('Cleanup'){
|
| 3 |
+
cleanWs() // Cleaning previous CI build in workspace
|
| 4 |
+
}
|
| 5 |
+
stage('checkout repo'){
|
| 6 |
+
retry(5){ // Retry if the cloning fails due to some reason
|
| 7 |
+
checkout scm // Clone the repo on Runner
|
| 8 |
+
}
|
| 9 |
+
}
|
| 10 |
+
stage('Compiling llama.cpp'){
|
| 11 |
+
sh'''#!/bin/bash
|
| 12 |
+
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
|
| 13 |
+
'''
|
| 14 |
+
}
|
| 15 |
+
stage('Running llama.cpp'){
|
| 16 |
+
sh'''#!/bin/bash
|
| 17 |
+
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
| 18 |
+
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
| 19 |
+
cat llama_log.txt # Printing results
|
| 20 |
+
'''
|
| 21 |
+
}
|
| 22 |
+
}
|
llama.cpp/.devops/cpu.Dockerfile
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
|
| 3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
| 4 |
+
|
| 5 |
+
ARG TARGETARCH
|
| 6 |
+
|
| 7 |
+
ARG GGML_CPU_ARM_ARCH=armv8-a
|
| 8 |
+
|
| 9 |
+
RUN apt-get update && \
|
| 10 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
| 11 |
+
|
| 12 |
+
WORKDIR /app
|
| 13 |
+
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
| 17 |
+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
| 18 |
+
elif [ "$TARGETARCH" = "arm64" ]; then \
|
| 19 |
+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
|
| 20 |
+
else \
|
| 21 |
+
echo "Unsupported architecture"; \
|
| 22 |
+
exit 1; \
|
| 23 |
+
fi && \
|
| 24 |
+
cmake --build build -j $(nproc)
|
| 25 |
+
|
| 26 |
+
RUN mkdir -p /app/lib && \
|
| 27 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 28 |
+
|
| 29 |
+
RUN mkdir -p /app/full \
|
| 30 |
+
&& cp build/bin/* /app/full \
|
| 31 |
+
&& cp *.py /app/full \
|
| 32 |
+
&& cp -r gguf-py /app/full \
|
| 33 |
+
&& cp -r requirements /app/full \
|
| 34 |
+
&& cp requirements.txt /app/full \
|
| 35 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 36 |
+
|
| 37 |
+
## Base image
|
| 38 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
| 39 |
+
|
| 40 |
+
RUN apt-get update \
|
| 41 |
+
&& apt-get install -y libgomp1 curl\
|
| 42 |
+
&& apt autoremove -y \
|
| 43 |
+
&& apt clean -y \
|
| 44 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 45 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 46 |
+
&& find /var/cache -type f -delete
|
| 47 |
+
|
| 48 |
+
COPY --from=build /app/lib/ /app
|
| 49 |
+
|
| 50 |
+
### Full
|
| 51 |
+
FROM base AS full
|
| 52 |
+
|
| 53 |
+
COPY --from=build /app/full /app
|
| 54 |
+
|
| 55 |
+
WORKDIR /app
|
| 56 |
+
|
| 57 |
+
RUN apt-get update \
|
| 58 |
+
&& apt-get install -y \
|
| 59 |
+
git \
|
| 60 |
+
python3 \
|
| 61 |
+
python3-pip \
|
| 62 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 63 |
+
&& pip install -r requirements.txt \
|
| 64 |
+
&& apt autoremove -y \
|
| 65 |
+
&& apt clean -y \
|
| 66 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 67 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 68 |
+
&& find /var/cache -type f -delete
|
| 69 |
+
|
| 70 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 71 |
+
|
| 72 |
+
### Light, CLI only
|
| 73 |
+
FROM base AS light
|
| 74 |
+
|
| 75 |
+
COPY --from=build /app/full/llama-cli /app
|
| 76 |
+
|
| 77 |
+
WORKDIR /app
|
| 78 |
+
|
| 79 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 80 |
+
|
| 81 |
+
### Server, Server only
|
| 82 |
+
FROM base AS server
|
| 83 |
+
|
| 84 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 85 |
+
|
| 86 |
+
COPY --from=build /app/full/llama-server /app
|
| 87 |
+
|
| 88 |
+
WORKDIR /app
|
| 89 |
+
|
| 90 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 91 |
+
|
| 92 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/cuda.Dockerfile
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
# This needs to generally match the container host's environment.
|
| 3 |
+
ARG CUDA_VERSION=12.6.0
|
| 4 |
+
# Target the CUDA build image
|
| 5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
| 6 |
+
|
| 7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
| 8 |
+
|
| 9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
| 10 |
+
|
| 11 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
| 12 |
+
ARG CUDA_DOCKER_ARCH=default
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
| 16 |
+
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
| 22 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
| 23 |
+
fi && \
|
| 24 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 25 |
+
cmake --build build --config Release -j$(nproc)
|
| 26 |
+
|
| 27 |
+
RUN mkdir -p /app/lib && \
|
| 28 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 29 |
+
|
| 30 |
+
RUN mkdir -p /app/full \
|
| 31 |
+
&& cp build/bin/* /app/full \
|
| 32 |
+
&& cp *.py /app/full \
|
| 33 |
+
&& cp -r gguf-py /app/full \
|
| 34 |
+
&& cp -r requirements /app/full \
|
| 35 |
+
&& cp requirements.txt /app/full \
|
| 36 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 37 |
+
|
| 38 |
+
## Base image
|
| 39 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
| 40 |
+
|
| 41 |
+
RUN apt-get update \
|
| 42 |
+
&& apt-get install -y libgomp1 curl\
|
| 43 |
+
&& apt autoremove -y \
|
| 44 |
+
&& apt clean -y \
|
| 45 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 46 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 47 |
+
&& find /var/cache -type f -delete
|
| 48 |
+
|
| 49 |
+
COPY --from=build /app/lib/ /app
|
| 50 |
+
|
| 51 |
+
### Full
|
| 52 |
+
FROM base AS full
|
| 53 |
+
|
| 54 |
+
COPY --from=build /app/full /app
|
| 55 |
+
|
| 56 |
+
WORKDIR /app
|
| 57 |
+
|
| 58 |
+
RUN apt-get update \
|
| 59 |
+
&& apt-get install -y \
|
| 60 |
+
git \
|
| 61 |
+
python3 \
|
| 62 |
+
python3-pip \
|
| 63 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 64 |
+
&& pip install -r requirements.txt \
|
| 65 |
+
&& apt autoremove -y \
|
| 66 |
+
&& apt clean -y \
|
| 67 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 68 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 69 |
+
&& find /var/cache -type f -delete
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 73 |
+
|
| 74 |
+
### Light, CLI only
|
| 75 |
+
FROM base AS light
|
| 76 |
+
|
| 77 |
+
COPY --from=build /app/full/llama-cli /app
|
| 78 |
+
|
| 79 |
+
WORKDIR /app
|
| 80 |
+
|
| 81 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 82 |
+
|
| 83 |
+
### Server, Server only
|
| 84 |
+
FROM base AS server
|
| 85 |
+
|
| 86 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 87 |
+
|
| 88 |
+
COPY --from=build /app/full/llama-server /app
|
| 89 |
+
|
| 90 |
+
WORKDIR /app
|
| 91 |
+
|
| 92 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 93 |
+
|
| 94 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/intel.Dockerfile
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
| 2 |
+
|
| 3 |
+
## Build Image
|
| 4 |
+
|
| 5 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
| 6 |
+
|
| 7 |
+
ARG GGML_SYCL_F16=OFF
|
| 8 |
+
RUN apt-get update && \
|
| 9 |
+
apt-get install -y git libcurl4-openssl-dev
|
| 10 |
+
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
| 16 |
+
echo "GGML_SYCL_F16 is set" \
|
| 17 |
+
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
| 18 |
+
fi && \
|
| 19 |
+
echo "Building with dynamic libs" && \
|
| 20 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
| 21 |
+
cmake --build build --config Release -j$(nproc)
|
| 22 |
+
|
| 23 |
+
RUN mkdir -p /app/lib && \
|
| 24 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 25 |
+
|
| 26 |
+
RUN mkdir -p /app/full \
|
| 27 |
+
&& cp build/bin/* /app/full \
|
| 28 |
+
&& cp *.py /app/full \
|
| 29 |
+
&& cp -r gguf-py /app/full \
|
| 30 |
+
&& cp -r requirements /app/full \
|
| 31 |
+
&& cp requirements.txt /app/full \
|
| 32 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 33 |
+
|
| 34 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
|
| 35 |
+
|
| 36 |
+
RUN apt-get update \
|
| 37 |
+
&& apt-get install -y libgomp1 curl\
|
| 38 |
+
&& apt autoremove -y \
|
| 39 |
+
&& apt clean -y \
|
| 40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 42 |
+
&& find /var/cache -type f -delete
|
| 43 |
+
|
| 44 |
+
### Full
|
| 45 |
+
FROM base AS full
|
| 46 |
+
|
| 47 |
+
COPY --from=build /app/lib/ /app
|
| 48 |
+
COPY --from=build /app/full /app
|
| 49 |
+
|
| 50 |
+
WORKDIR /app
|
| 51 |
+
|
| 52 |
+
RUN apt-get update \
|
| 53 |
+
&& apt-get install -y \
|
| 54 |
+
git \
|
| 55 |
+
python3 \
|
| 56 |
+
python3-pip \
|
| 57 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 58 |
+
&& pip install -r requirements.txt \
|
| 59 |
+
&& apt autoremove -y \
|
| 60 |
+
&& apt clean -y \
|
| 61 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 62 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 63 |
+
&& find /var/cache -type f -delete
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 67 |
+
|
| 68 |
+
### Light, CLI only
|
| 69 |
+
FROM base AS light
|
| 70 |
+
|
| 71 |
+
COPY --from=build /app/lib/ /app
|
| 72 |
+
COPY --from=build /app/full/llama-cli /app
|
| 73 |
+
|
| 74 |
+
WORKDIR /app
|
| 75 |
+
|
| 76 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 77 |
+
|
| 78 |
+
### Server, Server only
|
| 79 |
+
FROM base AS server
|
| 80 |
+
|
| 81 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 82 |
+
|
| 83 |
+
COPY --from=build /app/lib/ /app
|
| 84 |
+
COPY --from=build /app/full/llama-server /app
|
| 85 |
+
|
| 86 |
+
WORKDIR /app
|
| 87 |
+
|
| 88 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 89 |
+
|
| 90 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
| 91 |
+
|
llama.cpp/.devops/llama-cli-cann.Dockerfile
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
| 2 |
+
|
| 3 |
+
FROM ascendai/cann:$ASCEND_VERSION AS build
|
| 4 |
+
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
COPY . .
|
| 8 |
+
|
| 9 |
+
RUN yum install -y gcc g++ cmake make
|
| 10 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 11 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
| 12 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
| 13 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
| 14 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
| 15 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
| 16 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 17 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
| 18 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
| 19 |
+
|
| 20 |
+
# find libascend_hal.so, because the drive hasn`t been mounted.
|
| 21 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
| 22 |
+
|
| 23 |
+
RUN echo "Building with static libs" && \
|
| 24 |
+
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
| 25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
|
| 26 |
+
cmake --build build --config Release --target llama-cli
|
| 27 |
+
|
| 28 |
+
# TODO: use image with NNRT
|
| 29 |
+
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
| 30 |
+
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
| 31 |
+
|
| 32 |
+
ENV LC_ALL=C.utf8
|
| 33 |
+
|
| 34 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 35 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
| 36 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
| 37 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
| 38 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
| 39 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
| 40 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 41 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
| 42 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
| 43 |
+
|
| 44 |
+
ENTRYPOINT ["/llama-cli" ]
|
llama.cpp/.devops/llama-cpp-cuda.srpm.spec
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
| 2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
| 3 |
+
# Built and maintained by John Boero - boeroboy@gmail.com
|
| 4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
| 5 |
+
|
| 6 |
+
# Notes for llama.cpp:
|
| 7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
| 8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
| 9 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
| 10 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
| 11 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
| 12 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
| 13 |
+
# It is up to the user to install the correct vendor-specific support.
|
| 14 |
+
|
| 15 |
+
Name: llama.cpp-cuda
|
| 16 |
+
Version: %( date "+%%Y%%m%%d" )
|
| 17 |
+
Release: 1%{?dist}
|
| 18 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
| 19 |
+
License: MIT
|
| 20 |
+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
| 21 |
+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
|
| 22 |
+
Requires: cuda-toolkit
|
| 23 |
+
URL: https://github.com/ggerganov/llama.cpp
|
| 24 |
+
|
| 25 |
+
%define debug_package %{nil}
|
| 26 |
+
%define source_date_epoch_from_changelog 0
|
| 27 |
+
|
| 28 |
+
%description
|
| 29 |
+
CPU inference for Meta's Lllama2 models using default options.
|
| 30 |
+
|
| 31 |
+
%prep
|
| 32 |
+
%setup -n llama.cpp-master
|
| 33 |
+
|
| 34 |
+
%build
|
| 35 |
+
make -j GGML_CUDA=1
|
| 36 |
+
|
| 37 |
+
%install
|
| 38 |
+
mkdir -p %{buildroot}%{_bindir}/
|
| 39 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
| 40 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
| 41 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
| 42 |
+
|
| 43 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
| 44 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
| 45 |
+
[Unit]
|
| 46 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
| 47 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
| 48 |
+
|
| 49 |
+
[Service]
|
| 50 |
+
Type=simple
|
| 51 |
+
EnvironmentFile=/etc/sysconfig/llama
|
| 52 |
+
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
| 53 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
| 54 |
+
Restart=never
|
| 55 |
+
|
| 56 |
+
[Install]
|
| 57 |
+
WantedBy=default.target
|
| 58 |
+
EOF
|
| 59 |
+
|
| 60 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
| 61 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
| 62 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
| 63 |
+
EOF
|
| 64 |
+
|
| 65 |
+
%clean
|
| 66 |
+
rm -rf %{buildroot}
|
| 67 |
+
rm -rf %{_builddir}/*
|
| 68 |
+
|
| 69 |
+
%files
|
| 70 |
+
%{_bindir}/llama-cuda-cli
|
| 71 |
+
%{_bindir}/llama-cuda-server
|
| 72 |
+
%{_bindir}/llama-cuda-simple
|
| 73 |
+
/usr/lib/systemd/system/llamacuda.service
|
| 74 |
+
%config /etc/sysconfig/llama
|
| 75 |
+
|
| 76 |
+
%pre
|
| 77 |
+
|
| 78 |
+
%post
|
| 79 |
+
|
| 80 |
+
%preun
|
| 81 |
+
%postun
|
| 82 |
+
|
| 83 |
+
%changelog
|
llama.cpp/.devops/llama-cpp.srpm.spec
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
| 2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
| 3 |
+
# Built and maintained by John Boero - boeroboy@gmail.com
|
| 4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
| 5 |
+
|
| 6 |
+
# Notes for llama.cpp:
|
| 7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
| 8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
| 9 |
+
# In the meantime, YYYYMMDD format will be used.
|
| 10 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
| 11 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
| 12 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
| 13 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
| 14 |
+
# It is up to the user to install the correct vendor-specific support.
|
| 15 |
+
|
| 16 |
+
Name: llama.cpp
|
| 17 |
+
Version: %( date "+%%Y%%m%%d" )
|
| 18 |
+
Release: 1%{?dist}
|
| 19 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
| 20 |
+
License: MIT
|
| 21 |
+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
| 22 |
+
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
|
| 23 |
+
Requires: libstdc++
|
| 24 |
+
URL: https://github.com/ggerganov/llama.cpp
|
| 25 |
+
|
| 26 |
+
%define debug_package %{nil}
|
| 27 |
+
%define source_date_epoch_from_changelog 0
|
| 28 |
+
|
| 29 |
+
%description
|
| 30 |
+
CPU inference for Meta's Lllama2 models using default options.
|
| 31 |
+
Models are not included in this package and must be downloaded separately.
|
| 32 |
+
|
| 33 |
+
%prep
|
| 34 |
+
%setup -n llama.cpp-master
|
| 35 |
+
|
| 36 |
+
%build
|
| 37 |
+
make -j
|
| 38 |
+
|
| 39 |
+
%install
|
| 40 |
+
mkdir -p %{buildroot}%{_bindir}/
|
| 41 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
| 42 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
| 43 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
| 44 |
+
|
| 45 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
| 46 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
| 47 |
+
[Unit]
|
| 48 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
| 49 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
| 50 |
+
|
| 51 |
+
[Service]
|
| 52 |
+
Type=simple
|
| 53 |
+
EnvironmentFile=/etc/sysconfig/llama
|
| 54 |
+
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
| 55 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
| 56 |
+
Restart=never
|
| 57 |
+
|
| 58 |
+
[Install]
|
| 59 |
+
WantedBy=default.target
|
| 60 |
+
EOF
|
| 61 |
+
|
| 62 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
| 63 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
| 64 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
| 65 |
+
EOF
|
| 66 |
+
|
| 67 |
+
%clean
|
| 68 |
+
rm -rf %{buildroot}
|
| 69 |
+
rm -rf %{_builddir}/*
|
| 70 |
+
|
| 71 |
+
%files
|
| 72 |
+
%{_bindir}/llama-cli
|
| 73 |
+
%{_bindir}/llama-server
|
| 74 |
+
%{_bindir}/llama-simple
|
| 75 |
+
/usr/lib/systemd/system/llama.service
|
| 76 |
+
%config /etc/sysconfig/llama
|
| 77 |
+
|
| 78 |
+
%pre
|
| 79 |
+
|
| 80 |
+
%post
|
| 81 |
+
|
| 82 |
+
%preun
|
| 83 |
+
%postun
|
| 84 |
+
|
| 85 |
+
%changelog
|
llama.cpp/.devops/musa.Dockerfile
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
# This needs to generally match the container host's environment.
|
| 3 |
+
ARG MUSA_VERSION=rc3.1.0
|
| 4 |
+
# Target the MUSA build image
|
| 5 |
+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
| 6 |
+
|
| 7 |
+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
| 8 |
+
|
| 9 |
+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
| 10 |
+
|
| 11 |
+
# MUSA architecture to build for (defaults to all supported archs)
|
| 12 |
+
ARG MUSA_DOCKER_ARCH=default
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y \
|
| 16 |
+
build-essential \
|
| 17 |
+
cmake \
|
| 18 |
+
python3 \
|
| 19 |
+
python3-pip \
|
| 20 |
+
git \
|
| 21 |
+
libcurl4-openssl-dev \
|
| 22 |
+
libgomp1
|
| 23 |
+
|
| 24 |
+
COPY requirements.txt requirements.txt
|
| 25 |
+
COPY requirements requirements
|
| 26 |
+
|
| 27 |
+
RUN pip install --upgrade pip setuptools wheel \
|
| 28 |
+
&& pip install -r requirements.txt
|
| 29 |
+
|
| 30 |
+
WORKDIR /app
|
| 31 |
+
|
| 32 |
+
COPY . .
|
| 33 |
+
|
| 34 |
+
# Use the default MUSA archs if not specified
|
| 35 |
+
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
| 36 |
+
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
| 37 |
+
fi && \
|
| 38 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 39 |
+
cmake --build build --config Release -j$(nproc)
|
| 40 |
+
|
| 41 |
+
RUN mkdir -p /app/lib && \
|
| 42 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 43 |
+
|
| 44 |
+
RUN mkdir -p /app/full \
|
| 45 |
+
&& cp build/bin/* /app/full \
|
| 46 |
+
&& cp *.py /app/full \
|
| 47 |
+
&& cp -r gguf-py /app/full \
|
| 48 |
+
&& cp -r requirements /app/full \
|
| 49 |
+
&& cp requirements.txt /app/full \
|
| 50 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 51 |
+
|
| 52 |
+
## Base image
|
| 53 |
+
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
| 54 |
+
|
| 55 |
+
RUN apt-get update \
|
| 56 |
+
&& apt-get install -y libgomp1 curl\
|
| 57 |
+
&& apt autoremove -y \
|
| 58 |
+
&& apt clean -y \
|
| 59 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 60 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 61 |
+
&& find /var/cache -type f -delete
|
| 62 |
+
|
| 63 |
+
COPY --from=build /app/lib/ /app
|
| 64 |
+
|
| 65 |
+
### Full
|
| 66 |
+
FROM base AS full
|
| 67 |
+
|
| 68 |
+
COPY --from=build /app/full /app
|
| 69 |
+
|
| 70 |
+
WORKDIR /app
|
| 71 |
+
|
| 72 |
+
RUN apt-get update \
|
| 73 |
+
&& apt-get install -y \
|
| 74 |
+
git \
|
| 75 |
+
python3 \
|
| 76 |
+
python3-pip \
|
| 77 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 78 |
+
&& pip install -r requirements.txt \
|
| 79 |
+
&& apt autoremove -y \
|
| 80 |
+
&& apt clean -y \
|
| 81 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 82 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 83 |
+
&& find /var/cache -type f -delete
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 87 |
+
|
| 88 |
+
### Light, CLI only
|
| 89 |
+
FROM base AS light
|
| 90 |
+
|
| 91 |
+
COPY --from=build /app/full/llama-cli /app
|
| 92 |
+
|
| 93 |
+
WORKDIR /app
|
| 94 |
+
|
| 95 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 96 |
+
|
| 97 |
+
### Server, Server only
|
| 98 |
+
FROM base AS server
|
| 99 |
+
|
| 100 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 101 |
+
|
| 102 |
+
COPY --from=build /app/full/llama-server /app
|
| 103 |
+
|
| 104 |
+
WORKDIR /app
|
| 105 |
+
|
| 106 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 107 |
+
|
| 108 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/nix/apps.nix
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
perSystem =
|
| 3 |
+
{ config, lib, ... }:
|
| 4 |
+
{
|
| 5 |
+
apps =
|
| 6 |
+
let
|
| 7 |
+
inherit (config.packages) default;
|
| 8 |
+
binaries = [
|
| 9 |
+
"llama-cli"
|
| 10 |
+
"llama-embedding"
|
| 11 |
+
"llama-server"
|
| 12 |
+
"llama-quantize"
|
| 13 |
+
];
|
| 14 |
+
mkApp = name: {
|
| 15 |
+
type = "app";
|
| 16 |
+
program = "${default}/bin/${name}";
|
| 17 |
+
};
|
| 18 |
+
in
|
| 19 |
+
lib.genAttrs binaries mkApp;
|
| 20 |
+
};
|
| 21 |
+
}
|
llama.cpp/.devops/nix/devshells.nix
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{ inputs, ... }:
|
| 2 |
+
|
| 3 |
+
{
|
| 4 |
+
perSystem =
|
| 5 |
+
{
|
| 6 |
+
config,
|
| 7 |
+
lib,
|
| 8 |
+
system,
|
| 9 |
+
...
|
| 10 |
+
}:
|
| 11 |
+
{
|
| 12 |
+
devShells =
|
| 13 |
+
let
|
| 14 |
+
pkgs = import inputs.nixpkgs { inherit system; };
|
| 15 |
+
stdenv = pkgs.stdenv;
|
| 16 |
+
scripts = config.packages.python-scripts;
|
| 17 |
+
in
|
| 18 |
+
lib.pipe (config.packages) [
|
| 19 |
+
(lib.concatMapAttrs (
|
| 20 |
+
name: package: {
|
| 21 |
+
${name} = pkgs.mkShell {
|
| 22 |
+
name = "${name}";
|
| 23 |
+
inputsFrom = [ package ];
|
| 24 |
+
shellHook = ''
|
| 25 |
+
echo "Entering ${name} devShell"
|
| 26 |
+
'';
|
| 27 |
+
};
|
| 28 |
+
"${name}-extra" =
|
| 29 |
+
if (name == "python-scripts") then
|
| 30 |
+
null
|
| 31 |
+
else
|
| 32 |
+
pkgs.mkShell {
|
| 33 |
+
name = "${name}-extra";
|
| 34 |
+
inputsFrom = [
|
| 35 |
+
package
|
| 36 |
+
scripts
|
| 37 |
+
];
|
| 38 |
+
# Extra packages that *may* be used by some scripts
|
| 39 |
+
packages = [
|
| 40 |
+
pkgs.python3Packages.tiktoken
|
| 41 |
+
];
|
| 42 |
+
shellHook = ''
|
| 43 |
+
echo "Entering ${name} devShell"
|
| 44 |
+
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
| 45 |
+
'';
|
| 46 |
+
};
|
| 47 |
+
}
|
| 48 |
+
))
|
| 49 |
+
(lib.filterAttrs (name: value: value != null))
|
| 50 |
+
];
|
| 51 |
+
};
|
| 52 |
+
}
|
llama.cpp/.devops/nix/docker.nix
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
dockerTools,
|
| 4 |
+
buildEnv,
|
| 5 |
+
llama-cpp,
|
| 6 |
+
interactive ? true,
|
| 7 |
+
coreutils,
|
| 8 |
+
}:
|
| 9 |
+
|
| 10 |
+
# A tar that can be fed into `docker load`:
|
| 11 |
+
#
|
| 12 |
+
# $ nix build .#llamaPackages.docker
|
| 13 |
+
# $ docker load < result
|
| 14 |
+
|
| 15 |
+
# For details and variations cf.
|
| 16 |
+
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
|
| 17 |
+
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
|
| 18 |
+
# - https://nixery.dev/
|
| 19 |
+
|
| 20 |
+
# Approximate (compressed) sizes, at the time of writing, are:
|
| 21 |
+
#
|
| 22 |
+
# .#llamaPackages.docker: 125M;
|
| 23 |
+
# .#llamaPackagesCuda.docker: 537M;
|
| 24 |
+
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
|
| 25 |
+
|
| 26 |
+
dockerTools.buildLayeredImage {
|
| 27 |
+
name = llama-cpp.pname;
|
| 28 |
+
tag = "latest";
|
| 29 |
+
|
| 30 |
+
contents =
|
| 31 |
+
[ llama-cpp ]
|
| 32 |
+
++ lib.optionals interactive [
|
| 33 |
+
coreutils
|
| 34 |
+
dockerTools.binSh
|
| 35 |
+
dockerTools.caCertificates
|
| 36 |
+
];
|
| 37 |
+
}
|
llama.cpp/.devops/nix/jetson-support.nix
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{ inputs, ... }:
|
| 2 |
+
{
|
| 3 |
+
perSystem =
|
| 4 |
+
{
|
| 5 |
+
config,
|
| 6 |
+
system,
|
| 7 |
+
lib,
|
| 8 |
+
pkgsCuda,
|
| 9 |
+
...
|
| 10 |
+
}:
|
| 11 |
+
{
|
| 12 |
+
legacyPackages =
|
| 13 |
+
let
|
| 14 |
+
caps.llamaPackagesXavier = "7.2";
|
| 15 |
+
caps.llamaPackagesOrin = "8.7";
|
| 16 |
+
caps.llamaPackagesTX2 = "6.2";
|
| 17 |
+
caps.llamaPackagesNano = "5.3";
|
| 18 |
+
|
| 19 |
+
pkgsFor =
|
| 20 |
+
cap:
|
| 21 |
+
import inputs.nixpkgs {
|
| 22 |
+
inherit system;
|
| 23 |
+
config = {
|
| 24 |
+
cudaSupport = true;
|
| 25 |
+
cudaCapabilities = [ cap ];
|
| 26 |
+
cudaEnableForwardCompat = false;
|
| 27 |
+
inherit (pkgsCuda.config) allowUnfreePredicate;
|
| 28 |
+
};
|
| 29 |
+
};
|
| 30 |
+
in
|
| 31 |
+
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
|
| 32 |
+
|
| 33 |
+
packages = lib.optionalAttrs (system == "aarch64-linux") {
|
| 34 |
+
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
|
| 35 |
+
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
|
| 36 |
+
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
|
| 37 |
+
};
|
| 38 |
+
};
|
| 39 |
+
}
|
llama.cpp/.devops/nix/nixpkgs-instances.nix
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{ inputs, ... }:
|
| 2 |
+
{
|
| 3 |
+
# The _module.args definitions are passed on to modules as arguments. E.g.
|
| 4 |
+
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
|
| 5 |
+
# `_module.args.pkgs` (defined in this case by flake-parts).
|
| 6 |
+
perSystem =
|
| 7 |
+
{ system, ... }:
|
| 8 |
+
{
|
| 9 |
+
_module.args = {
|
| 10 |
+
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
|
| 11 |
+
# again, the below creates several nixpkgs instances which the
|
| 12 |
+
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
|
| 13 |
+
#
|
| 14 |
+
# This is currently "slow" and "expensive", on a certain scale.
|
| 15 |
+
# This also isn't "right" in that this hinders dependency injection at
|
| 16 |
+
# the level of flake inputs. This might get removed in the foreseeable
|
| 17 |
+
# future.
|
| 18 |
+
#
|
| 19 |
+
# Note that you can use these expressions without Nix
|
| 20 |
+
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
|
| 21 |
+
|
| 22 |
+
pkgsCuda = import inputs.nixpkgs {
|
| 23 |
+
inherit system;
|
| 24 |
+
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
|
| 25 |
+
# and ucx are built with CUDA support)
|
| 26 |
+
config.cudaSupport = true;
|
| 27 |
+
config.allowUnfreePredicate =
|
| 28 |
+
p:
|
| 29 |
+
builtins.all (
|
| 30 |
+
license:
|
| 31 |
+
license.free
|
| 32 |
+
|| builtins.elem license.shortName [
|
| 33 |
+
"CUDA EULA"
|
| 34 |
+
"cuDNN EULA"
|
| 35 |
+
]
|
| 36 |
+
) (p.meta.licenses or [ p.meta.license ]);
|
| 37 |
+
};
|
| 38 |
+
# Ensure dependencies use ROCm consistently
|
| 39 |
+
pkgsRocm = import inputs.nixpkgs {
|
| 40 |
+
inherit system;
|
| 41 |
+
config.rocmSupport = true;
|
| 42 |
+
};
|
| 43 |
+
};
|
| 44 |
+
};
|
| 45 |
+
}
|
llama.cpp/.devops/nix/package-gguf-py.nix
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
llamaVersion,
|
| 4 |
+
numpy,
|
| 5 |
+
tqdm,
|
| 6 |
+
sentencepiece,
|
| 7 |
+
pyyaml,
|
| 8 |
+
poetry-core,
|
| 9 |
+
buildPythonPackage,
|
| 10 |
+
pytestCheckHook,
|
| 11 |
+
}:
|
| 12 |
+
|
| 13 |
+
buildPythonPackage {
|
| 14 |
+
pname = "gguf";
|
| 15 |
+
version = llamaVersion;
|
| 16 |
+
pyproject = true;
|
| 17 |
+
nativeBuildInputs = [ poetry-core ];
|
| 18 |
+
propagatedBuildInputs = [
|
| 19 |
+
numpy
|
| 20 |
+
tqdm
|
| 21 |
+
sentencepiece
|
| 22 |
+
pyyaml
|
| 23 |
+
];
|
| 24 |
+
src = lib.cleanSource ../../gguf-py;
|
| 25 |
+
pythonImportsCheck = [
|
| 26 |
+
"numpy"
|
| 27 |
+
"gguf"
|
| 28 |
+
];
|
| 29 |
+
nativeCheckInputs = [ pytestCheckHook ];
|
| 30 |
+
doCheck = true;
|
| 31 |
+
meta = with lib; {
|
| 32 |
+
description = "Python package for writing binary files in the GGUF format";
|
| 33 |
+
license = licenses.mit;
|
| 34 |
+
maintainers = [ maintainers.ditsuke ];
|
| 35 |
+
};
|
| 36 |
+
}
|
llama.cpp/.devops/nix/package.nix
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
glibc,
|
| 4 |
+
config,
|
| 5 |
+
stdenv,
|
| 6 |
+
runCommand,
|
| 7 |
+
cmake,
|
| 8 |
+
ninja,
|
| 9 |
+
pkg-config,
|
| 10 |
+
git,
|
| 11 |
+
mpi,
|
| 12 |
+
blas,
|
| 13 |
+
cudaPackages,
|
| 14 |
+
autoAddDriverRunpath,
|
| 15 |
+
darwin,
|
| 16 |
+
rocmPackages,
|
| 17 |
+
vulkan-headers,
|
| 18 |
+
vulkan-loader,
|
| 19 |
+
curl,
|
| 20 |
+
shaderc,
|
| 21 |
+
useBlas ?
|
| 22 |
+
builtins.all (x: !x) [
|
| 23 |
+
useCuda
|
| 24 |
+
useMetalKit
|
| 25 |
+
useRocm
|
| 26 |
+
useVulkan
|
| 27 |
+
]
|
| 28 |
+
&& blas.meta.available,
|
| 29 |
+
useCuda ? config.cudaSupport,
|
| 30 |
+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
| 31 |
+
# Increases the runtime closure size by ~700M
|
| 32 |
+
useMpi ? false,
|
| 33 |
+
useRocm ? config.rocmSupport,
|
| 34 |
+
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
| 35 |
+
enableCurl ? true,
|
| 36 |
+
useVulkan ? false,
|
| 37 |
+
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
| 38 |
+
|
| 39 |
+
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
| 40 |
+
# otherwise we get libstdc++ errors downstream.
|
| 41 |
+
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
| 42 |
+
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
| 43 |
+
precompileMetalShaders ? false,
|
| 44 |
+
}:
|
| 45 |
+
|
| 46 |
+
let
|
| 47 |
+
inherit (lib)
|
| 48 |
+
cmakeBool
|
| 49 |
+
cmakeFeature
|
| 50 |
+
optionals
|
| 51 |
+
strings
|
| 52 |
+
;
|
| 53 |
+
|
| 54 |
+
stdenv = throw "Use effectiveStdenv instead";
|
| 55 |
+
|
| 56 |
+
suffices =
|
| 57 |
+
lib.optionals useBlas [ "BLAS" ]
|
| 58 |
+
++ lib.optionals useCuda [ "CUDA" ]
|
| 59 |
+
++ lib.optionals useMetalKit [ "MetalKit" ]
|
| 60 |
+
++ lib.optionals useMpi [ "MPI" ]
|
| 61 |
+
++ lib.optionals useRocm [ "ROCm" ]
|
| 62 |
+
++ lib.optionals useVulkan [ "Vulkan" ];
|
| 63 |
+
|
| 64 |
+
pnameSuffix =
|
| 65 |
+
strings.optionalString (suffices != [ ])
|
| 66 |
+
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
| 67 |
+
descriptionSuffix = strings.optionalString (
|
| 68 |
+
suffices != [ ]
|
| 69 |
+
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
| 70 |
+
|
| 71 |
+
xcrunHost = runCommand "xcrunHost" { } ''
|
| 72 |
+
mkdir -p $out/bin
|
| 73 |
+
ln -s /usr/bin/xcrun $out/bin
|
| 74 |
+
'';
|
| 75 |
+
|
| 76 |
+
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
| 77 |
+
# separately
|
| 78 |
+
darwinBuildInputs =
|
| 79 |
+
with darwin.apple_sdk.frameworks;
|
| 80 |
+
[
|
| 81 |
+
Accelerate
|
| 82 |
+
CoreVideo
|
| 83 |
+
CoreGraphics
|
| 84 |
+
]
|
| 85 |
+
++ optionals useMetalKit [ MetalKit ];
|
| 86 |
+
|
| 87 |
+
cudaBuildInputs = with cudaPackages; [
|
| 88 |
+
cuda_cudart
|
| 89 |
+
cuda_cccl # <nv/target>
|
| 90 |
+
libcublas
|
| 91 |
+
];
|
| 92 |
+
|
| 93 |
+
rocmBuildInputs = with rocmPackages; [
|
| 94 |
+
clr
|
| 95 |
+
hipblas
|
| 96 |
+
rocblas
|
| 97 |
+
];
|
| 98 |
+
|
| 99 |
+
vulkanBuildInputs = [
|
| 100 |
+
vulkan-headers
|
| 101 |
+
vulkan-loader
|
| 102 |
+
shaderc
|
| 103 |
+
];
|
| 104 |
+
in
|
| 105 |
+
|
| 106 |
+
effectiveStdenv.mkDerivation (finalAttrs: {
|
| 107 |
+
pname = "llama-cpp${pnameSuffix}";
|
| 108 |
+
version = llamaVersion;
|
| 109 |
+
|
| 110 |
+
# Note: none of the files discarded here are visible in the sandbox or
|
| 111 |
+
# affect the output hash. This also means they can be modified without
|
| 112 |
+
# triggering a rebuild.
|
| 113 |
+
src = lib.cleanSourceWith {
|
| 114 |
+
filter =
|
| 115 |
+
name: type:
|
| 116 |
+
let
|
| 117 |
+
noneOf = builtins.all (x: !x);
|
| 118 |
+
baseName = baseNameOf name;
|
| 119 |
+
in
|
| 120 |
+
noneOf [
|
| 121 |
+
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
| 122 |
+
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
| 123 |
+
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
| 124 |
+
(baseName == "flake.lock")
|
| 125 |
+
];
|
| 126 |
+
src = lib.cleanSource ../../.;
|
| 127 |
+
};
|
| 128 |
+
|
| 129 |
+
postPatch = ''
|
| 130 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
| 131 |
+
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
| 132 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
| 133 |
+
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
| 134 |
+
'';
|
| 135 |
+
|
| 136 |
+
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
| 137 |
+
# `default.metallib` may be compiled with Metal compiler from XCode
|
| 138 |
+
# and we need to escape sandbox on MacOS to access Metal compiler.
|
| 139 |
+
# `xcrun` is used find the path of the Metal compiler, which is varible
|
| 140 |
+
# and not on $PATH
|
| 141 |
+
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
| 142 |
+
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
| 143 |
+
|
| 144 |
+
nativeBuildInputs =
|
| 145 |
+
[
|
| 146 |
+
cmake
|
| 147 |
+
ninja
|
| 148 |
+
pkg-config
|
| 149 |
+
git
|
| 150 |
+
]
|
| 151 |
+
++ optionals useCuda [
|
| 152 |
+
cudaPackages.cuda_nvcc
|
| 153 |
+
|
| 154 |
+
autoAddDriverRunpath
|
| 155 |
+
]
|
| 156 |
+
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
| 157 |
+
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
| 158 |
+
|
| 159 |
+
buildInputs =
|
| 160 |
+
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
| 161 |
+
++ optionals useCuda cudaBuildInputs
|
| 162 |
+
++ optionals useMpi [ mpi ]
|
| 163 |
+
++ optionals useRocm rocmBuildInputs
|
| 164 |
+
++ optionals useBlas [ blas ]
|
| 165 |
+
++ optionals useVulkan vulkanBuildInputs
|
| 166 |
+
++ optionals enableCurl [ curl ];
|
| 167 |
+
|
| 168 |
+
cmakeFlags =
|
| 169 |
+
[
|
| 170 |
+
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
| 171 |
+
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
| 172 |
+
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
| 173 |
+
(cmakeBool "LLAMA_CURL" enableCurl)
|
| 174 |
+
(cmakeBool "GGML_NATIVE" false)
|
| 175 |
+
(cmakeBool "GGML_BLAS" useBlas)
|
| 176 |
+
(cmakeBool "GGML_CUDA" useCuda)
|
| 177 |
+
(cmakeBool "GGML_HIP" useRocm)
|
| 178 |
+
(cmakeBool "GGML_METAL" useMetalKit)
|
| 179 |
+
(cmakeBool "GGML_VULKAN" useVulkan)
|
| 180 |
+
(cmakeBool "GGML_STATIC" enableStatic)
|
| 181 |
+
]
|
| 182 |
+
++ optionals useCuda [
|
| 183 |
+
(
|
| 184 |
+
with cudaPackages.flags;
|
| 185 |
+
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
| 186 |
+
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
| 187 |
+
)
|
| 188 |
+
)
|
| 189 |
+
]
|
| 190 |
+
++ optionals useRocm [
|
| 191 |
+
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
| 192 |
+
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
| 193 |
+
]
|
| 194 |
+
++ optionals useMetalKit [
|
| 195 |
+
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
| 196 |
+
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
| 197 |
+
];
|
| 198 |
+
|
| 199 |
+
# Environment variables needed for ROCm
|
| 200 |
+
env = optionals useRocm {
|
| 201 |
+
ROCM_PATH = "${rocmPackages.clr}";
|
| 202 |
+
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
| 203 |
+
};
|
| 204 |
+
|
| 205 |
+
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
| 206 |
+
# if they haven't been added yet.
|
| 207 |
+
postInstall = ''
|
| 208 |
+
mkdir -p $out/include
|
| 209 |
+
cp $src/include/llama.h $out/include/
|
| 210 |
+
'';
|
| 211 |
+
|
| 212 |
+
meta = {
|
| 213 |
+
# Configurations we don't want even the CI to evaluate. Results in the
|
| 214 |
+
# "unsupported platform" messages. This is mostly a no-op, because
|
| 215 |
+
# cudaPackages would've refused to evaluate anyway.
|
| 216 |
+
badPlatforms = optionals useCuda lib.platforms.darwin;
|
| 217 |
+
|
| 218 |
+
# Configurations that are known to result in build failures. Can be
|
| 219 |
+
# overridden by importing Nixpkgs with `allowBroken = true`.
|
| 220 |
+
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
| 221 |
+
|
| 222 |
+
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
| 223 |
+
homepage = "https://github.com/ggerganov/llama.cpp/";
|
| 224 |
+
license = lib.licenses.mit;
|
| 225 |
+
|
| 226 |
+
# Accommodates `nix run` and `lib.getExe`
|
| 227 |
+
mainProgram = "llama-cli";
|
| 228 |
+
|
| 229 |
+
# These people might respond, on the best effort basis, if you ping them
|
| 230 |
+
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
| 231 |
+
# Consider adding yourself to this list if you want to ensure this flake
|
| 232 |
+
# stays maintained and you're willing to invest your time. Do not add
|
| 233 |
+
# other people without their consent. Consider removing people after
|
| 234 |
+
# they've been unreachable for long periods of time.
|
| 235 |
+
|
| 236 |
+
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
| 237 |
+
# an attrset following the same format as in
|
| 238 |
+
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
| 239 |
+
maintainers = with lib.maintainers; [
|
| 240 |
+
philiptaron
|
| 241 |
+
SomeoneSerge
|
| 242 |
+
];
|
| 243 |
+
|
| 244 |
+
# Extend `badPlatforms` instead
|
| 245 |
+
platforms = lib.platforms.all;
|
| 246 |
+
};
|
| 247 |
+
})
|
llama.cpp/.devops/nix/python-scripts.nix
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
stdenv,
|
| 4 |
+
buildPythonPackage,
|
| 5 |
+
poetry-core,
|
| 6 |
+
mkShell,
|
| 7 |
+
python3Packages,
|
| 8 |
+
gguf-py,
|
| 9 |
+
}@inputs:
|
| 10 |
+
|
| 11 |
+
let
|
| 12 |
+
llama-python-deps = with python3Packages; [
|
| 13 |
+
numpy
|
| 14 |
+
sentencepiece
|
| 15 |
+
transformers
|
| 16 |
+
protobuf
|
| 17 |
+
torchWithoutCuda
|
| 18 |
+
gguf-py
|
| 19 |
+
tqdm
|
| 20 |
+
|
| 21 |
+
# for scripts/compare-llama-bench.py
|
| 22 |
+
gitpython
|
| 23 |
+
tabulate
|
| 24 |
+
|
| 25 |
+
# for examples/pydantic-models-to-grammar-examples.py
|
| 26 |
+
docstring-parser
|
| 27 |
+
pydantic
|
| 28 |
+
|
| 29 |
+
];
|
| 30 |
+
|
| 31 |
+
llama-python-test-deps = with python3Packages; [
|
| 32 |
+
# Server bench
|
| 33 |
+
matplotlib
|
| 34 |
+
|
| 35 |
+
# server tests
|
| 36 |
+
openai
|
| 37 |
+
pytest
|
| 38 |
+
prometheus-client
|
| 39 |
+
];
|
| 40 |
+
in
|
| 41 |
+
|
| 42 |
+
buildPythonPackage ({
|
| 43 |
+
pname = "llama-scripts";
|
| 44 |
+
version = "0.0.0";
|
| 45 |
+
pyproject = true;
|
| 46 |
+
|
| 47 |
+
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
| 48 |
+
# do they affect the output hash. They can be modified without triggering a rebuild.
|
| 49 |
+
src = lib.cleanSourceWith {
|
| 50 |
+
filter =
|
| 51 |
+
name: type:
|
| 52 |
+
let
|
| 53 |
+
any = builtins.any (x: x);
|
| 54 |
+
baseName = builtins.baseNameOf name;
|
| 55 |
+
in
|
| 56 |
+
any [
|
| 57 |
+
(lib.hasSuffix ".py" name)
|
| 58 |
+
(baseName == "README.md")
|
| 59 |
+
(baseName == "pyproject.toml")
|
| 60 |
+
];
|
| 61 |
+
src = lib.cleanSource ../../.;
|
| 62 |
+
};
|
| 63 |
+
nativeBuildInputs = [ poetry-core ];
|
| 64 |
+
nativeCheckInputs = llama-python-test-deps;
|
| 65 |
+
dependencies = llama-python-deps;
|
| 66 |
+
})
|
llama.cpp/.devops/nix/scope.nix
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
newScope,
|
| 4 |
+
python3,
|
| 5 |
+
llamaVersion ? "0.0.0",
|
| 6 |
+
}:
|
| 7 |
+
|
| 8 |
+
let
|
| 9 |
+
pythonPackages = python3.pkgs;
|
| 10 |
+
buildPythonPackage = pythonPackages.buildPythonPackage;
|
| 11 |
+
numpy = pythonPackages.numpy;
|
| 12 |
+
tqdm = pythonPackages.tqdm;
|
| 13 |
+
sentencepiece = pythonPackages.sentencepiece;
|
| 14 |
+
pyyaml = pythonPackages.pyyaml;
|
| 15 |
+
poetry-core = pythonPackages.poetry-core;
|
| 16 |
+
pytestCheckHook = pythonPackages.pytestCheckHook;
|
| 17 |
+
in
|
| 18 |
+
|
| 19 |
+
# We're using `makeScope` instead of just writing out an attrset
|
| 20 |
+
# because it allows users to apply overlays later using `overrideScope'`.
|
| 21 |
+
# Cf. https://noogle.dev/f/lib/makeScope
|
| 22 |
+
|
| 23 |
+
lib.makeScope newScope (self: {
|
| 24 |
+
inherit llamaVersion;
|
| 25 |
+
gguf-py = self.callPackage ./package-gguf-py.nix {
|
| 26 |
+
inherit
|
| 27 |
+
buildPythonPackage
|
| 28 |
+
numpy
|
| 29 |
+
tqdm
|
| 30 |
+
sentencepiece
|
| 31 |
+
poetry-core
|
| 32 |
+
pyyaml
|
| 33 |
+
pytestCheckHook
|
| 34 |
+
;
|
| 35 |
+
};
|
| 36 |
+
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
| 37 |
+
llama-cpp = self.callPackage ./package.nix { };
|
| 38 |
+
docker = self.callPackage ./docker.nix { };
|
| 39 |
+
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
| 40 |
+
sif = self.callPackage ./sif.nix { };
|
| 41 |
+
})
|
llama.cpp/.devops/nix/sif.nix
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
singularity-tools,
|
| 4 |
+
llama-cpp,
|
| 5 |
+
bashInteractive,
|
| 6 |
+
interactive ? false,
|
| 7 |
+
}:
|
| 8 |
+
|
| 9 |
+
let
|
| 10 |
+
optionalInt = cond: x: if cond then x else 0;
|
| 11 |
+
in
|
| 12 |
+
singularity-tools.buildImage rec {
|
| 13 |
+
inherit (llama-cpp) name;
|
| 14 |
+
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
|
| 15 |
+
|
| 16 |
+
# These are excessive (but safe) for most variants. Building singularity
|
| 17 |
+
# images requires superuser privileges, so we build them inside a VM in a
|
| 18 |
+
# writable image of pre-determined size.
|
| 19 |
+
#
|
| 20 |
+
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
|
| 21 |
+
#
|
| 22 |
+
# Expected image sizes:
|
| 23 |
+
# - cpu/blas: 150M,
|
| 24 |
+
# - cuda, all gencodes: 560M,
|
| 25 |
+
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
|
| 26 |
+
memSize = diskSize;
|
| 27 |
+
}
|
llama.cpp/.devops/rocm.Dockerfile
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=24.04
|
| 2 |
+
|
| 3 |
+
# This needs to generally match the container host's environment.
|
| 4 |
+
ARG ROCM_VERSION=6.3
|
| 5 |
+
ARG AMDGPU_VERSION=6.3
|
| 6 |
+
|
| 7 |
+
# Target the CUDA build image
|
| 8 |
+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
| 9 |
+
|
| 10 |
+
### Build image
|
| 11 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
| 12 |
+
|
| 13 |
+
# Unless otherwise specified, we make a fat build.
|
| 14 |
+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
| 15 |
+
# This is mostly tied to rocBLAS supported archs.
|
| 16 |
+
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
|
| 17 |
+
# gfx906 is deprecated
|
| 18 |
+
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
|
| 19 |
+
|
| 20 |
+
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
|
| 21 |
+
ARG ROCM_DOCKER_ARCH=gfx1100
|
| 22 |
+
|
| 23 |
+
# Set nvcc architectured
|
| 24 |
+
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
| 25 |
+
# Enable ROCm
|
| 26 |
+
# ENV CC=/opt/rocm/llvm/bin/clang
|
| 27 |
+
# ENV CXX=/opt/rocm/llvm/bin/clang++
|
| 28 |
+
|
| 29 |
+
RUN apt-get update \
|
| 30 |
+
&& apt-get install -y \
|
| 31 |
+
build-essential \
|
| 32 |
+
cmake \
|
| 33 |
+
git \
|
| 34 |
+
libcurl4-openssl-dev \
|
| 35 |
+
curl \
|
| 36 |
+
libgomp1
|
| 37 |
+
|
| 38 |
+
WORKDIR /app
|
| 39 |
+
|
| 40 |
+
COPY . .
|
| 41 |
+
|
| 42 |
+
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
| 43 |
+
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
|
| 44 |
+
&& cmake --build build --config Release -j$(nproc)
|
| 45 |
+
|
| 46 |
+
RUN mkdir -p /app/lib \
|
| 47 |
+
&& find build -name "*.so" -exec cp {} /app/lib \;
|
| 48 |
+
|
| 49 |
+
RUN mkdir -p /app/full \
|
| 50 |
+
&& cp build/bin/* /app/full \
|
| 51 |
+
&& cp *.py /app/full \
|
| 52 |
+
&& cp -r gguf-py /app/full \
|
| 53 |
+
&& cp -r requirements /app/full \
|
| 54 |
+
&& cp requirements.txt /app/full \
|
| 55 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 56 |
+
|
| 57 |
+
## Base image
|
| 58 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
| 59 |
+
|
| 60 |
+
RUN apt-get update \
|
| 61 |
+
&& apt-get install -y libgomp1 curl\
|
| 62 |
+
&& apt autoremove -y \
|
| 63 |
+
&& apt clean -y \
|
| 64 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 65 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 66 |
+
&& find /var/cache -type f -delete
|
| 67 |
+
|
| 68 |
+
COPY --from=build /app/lib/ /app
|
| 69 |
+
|
| 70 |
+
### Full
|
| 71 |
+
FROM base AS full
|
| 72 |
+
|
| 73 |
+
COPY --from=build /app/full /app
|
| 74 |
+
|
| 75 |
+
WORKDIR /app
|
| 76 |
+
|
| 77 |
+
RUN apt-get update \
|
| 78 |
+
&& apt-get install -y \
|
| 79 |
+
git \
|
| 80 |
+
python3-pip \
|
| 81 |
+
python3 \
|
| 82 |
+
python3-wheel\
|
| 83 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
| 84 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 85 |
+
&& apt autoremove -y \
|
| 86 |
+
&& apt clean -y \
|
| 87 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 88 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 89 |
+
&& find /var/cache -type f -delete
|
| 90 |
+
|
| 91 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 92 |
+
|
| 93 |
+
### Light, CLI only
|
| 94 |
+
FROM base AS light
|
| 95 |
+
|
| 96 |
+
COPY --from=build /app/full/llama-cli /app
|
| 97 |
+
|
| 98 |
+
WORKDIR /app
|
| 99 |
+
|
| 100 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 101 |
+
|
| 102 |
+
### Server, Server only
|
| 103 |
+
FROM base AS server
|
| 104 |
+
|
| 105 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 106 |
+
|
| 107 |
+
COPY --from=build /app/full/llama-server /app
|
| 108 |
+
|
| 109 |
+
WORKDIR /app
|
| 110 |
+
|
| 111 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 112 |
+
|
| 113 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/tools.sh
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
# Read the first argument into a variable
|
| 5 |
+
arg1="$1"
|
| 6 |
+
|
| 7 |
+
# Shift the arguments to remove the first one
|
| 8 |
+
shift
|
| 9 |
+
|
| 10 |
+
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
| 11 |
+
exec python3 ./convert_hf_to_gguf.py "$@"
|
| 12 |
+
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
| 13 |
+
exec ./llama-quantize "$@"
|
| 14 |
+
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
| 15 |
+
exec ./llama-cli "$@"
|
| 16 |
+
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
| 17 |
+
exec ./llama-bench "$@"
|
| 18 |
+
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
| 19 |
+
exec ./llama-perplexity "$@"
|
| 20 |
+
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
| 21 |
+
echo "Converting PTH to GGML..."
|
| 22 |
+
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
| 23 |
+
if [ -f "${i/f16/q4_0}" ]; then
|
| 24 |
+
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
| 25 |
+
else
|
| 26 |
+
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
| 27 |
+
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
| 28 |
+
fi
|
| 29 |
+
done
|
| 30 |
+
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
| 31 |
+
exec ./llama-server "$@"
|
| 32 |
+
else
|
| 33 |
+
echo "Unknown command: $arg1"
|
| 34 |
+
echo "Available commands: "
|
| 35 |
+
echo " --run (-r): Run a model previously converted into ggml"
|
| 36 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
| 37 |
+
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
| 38 |
+
echo " ex: -m model.gguf"
|
| 39 |
+
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
| 40 |
+
echo " ex: -m model.gguf -f file.txt"
|
| 41 |
+
echo " --convert (-c): Convert a llama model into ggml"
|
| 42 |
+
echo " ex: --outtype f16 \"/models/7B/\" "
|
| 43 |
+
echo " --quantize (-q): Optimize with quantization process ggml"
|
| 44 |
+
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
| 45 |
+
echo " --all-in-one (-a): Execute --convert & --quantize"
|
| 46 |
+
echo " ex: \"/models/\" 7B"
|
| 47 |
+
echo " --server (-s): Run a model on the server"
|
| 48 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
|
| 49 |
+
fi
|
llama.cpp/.devops/vulkan.Dockerfile
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=24.04
|
| 2 |
+
|
| 3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
| 4 |
+
|
| 5 |
+
# Install build tools
|
| 6 |
+
RUN apt update && apt install -y git build-essential cmake wget
|
| 7 |
+
|
| 8 |
+
# Install Vulkan SDK and cURL
|
| 9 |
+
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
| 10 |
+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
| 11 |
+
apt update -y && \
|
| 12 |
+
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
| 13 |
+
|
| 14 |
+
# Build it
|
| 15 |
+
WORKDIR /app
|
| 16 |
+
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
| 20 |
+
cmake --build build --config Release -j$(nproc)
|
| 21 |
+
|
| 22 |
+
RUN mkdir -p /app/lib && \
|
| 23 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 24 |
+
|
| 25 |
+
RUN mkdir -p /app/full \
|
| 26 |
+
&& cp build/bin/* /app/full \
|
| 27 |
+
&& cp *.py /app/full \
|
| 28 |
+
&& cp -r gguf-py /app/full \
|
| 29 |
+
&& cp -r requirements /app/full \
|
| 30 |
+
&& cp requirements.txt /app/full \
|
| 31 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 32 |
+
|
| 33 |
+
## Base image
|
| 34 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
| 35 |
+
|
| 36 |
+
RUN apt-get update \
|
| 37 |
+
&& apt-get install -y libgomp1 curl libvulkan-dev \
|
| 38 |
+
&& apt autoremove -y \
|
| 39 |
+
&& apt clean -y \
|
| 40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 42 |
+
&& find /var/cache -type f -delete
|
| 43 |
+
|
| 44 |
+
COPY --from=build /app/lib/ /app
|
| 45 |
+
|
| 46 |
+
### Full
|
| 47 |
+
FROM base AS full
|
| 48 |
+
|
| 49 |
+
COPY --from=build /app/full /app
|
| 50 |
+
|
| 51 |
+
WORKDIR /app
|
| 52 |
+
|
| 53 |
+
RUN apt-get update \
|
| 54 |
+
&& apt-get install -y \
|
| 55 |
+
git \
|
| 56 |
+
python3 \
|
| 57 |
+
python3-pip \
|
| 58 |
+
python3-wheel \
|
| 59 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
| 60 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 61 |
+
&& apt autoremove -y \
|
| 62 |
+
&& apt clean -y \
|
| 63 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 64 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 65 |
+
&& find /var/cache -type f -delete
|
| 66 |
+
|
| 67 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 68 |
+
|
| 69 |
+
### Light, CLI only
|
| 70 |
+
FROM base AS light
|
| 71 |
+
|
| 72 |
+
COPY --from=build /app/full/llama-cli /app
|
| 73 |
+
|
| 74 |
+
WORKDIR /app
|
| 75 |
+
|
| 76 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 77 |
+
|
| 78 |
+
### Server, Server only
|
| 79 |
+
FROM base AS server
|
| 80 |
+
|
| 81 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 82 |
+
|
| 83 |
+
COPY --from=build /app/full/llama-server /app
|
| 84 |
+
|
| 85 |
+
WORKDIR /app
|
| 86 |
+
|
| 87 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 88 |
+
|
| 89 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.dockerignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.o
|
| 2 |
+
*.a
|
| 3 |
+
.cache/
|
| 4 |
+
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
| 5 |
+
.github/
|
| 6 |
+
.gitignore
|
| 7 |
+
.vs/
|
| 8 |
+
.vscode/
|
| 9 |
+
.DS_Store
|
| 10 |
+
|
| 11 |
+
build*/
|
| 12 |
+
|
| 13 |
+
models/*
|
| 14 |
+
|
| 15 |
+
/llama-cli
|
| 16 |
+
/llama-quantize
|
| 17 |
+
|
| 18 |
+
arm_neon.h
|
| 19 |
+
compile_commands.json
|
| 20 |
+
Dockerfile
|
llama.cpp/.ecrc
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
| 3 |
+
"Disable": {
|
| 4 |
+
"IndentSize": true
|
| 5 |
+
}
|
| 6 |
+
}
|
llama.cpp/.editorconfig
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://EditorConfig.org
|
| 2 |
+
|
| 3 |
+
# Top-most EditorConfig file
|
| 4 |
+
root = true
|
| 5 |
+
|
| 6 |
+
# Unix-style newlines with a newline ending every file, utf-8 charset
|
| 7 |
+
[*]
|
| 8 |
+
end_of_line = lf
|
| 9 |
+
insert_final_newline = true
|
| 10 |
+
trim_trailing_whitespace = true
|
| 11 |
+
charset = utf-8
|
| 12 |
+
indent_style = space
|
| 13 |
+
indent_size = 4
|
| 14 |
+
|
| 15 |
+
[Makefile]
|
| 16 |
+
indent_style = tab
|
| 17 |
+
|
| 18 |
+
[scripts/*.mk]
|
| 19 |
+
indent_style = tab
|
| 20 |
+
|
| 21 |
+
[prompts/*.txt]
|
| 22 |
+
insert_final_newline = unset
|
| 23 |
+
|
| 24 |
+
[examples/server/public/*]
|
| 25 |
+
indent_size = 2
|
| 26 |
+
|
| 27 |
+
[examples/server/public/deps_*]
|
| 28 |
+
trim_trailing_whitespace = unset
|
| 29 |
+
indent_style = unset
|
| 30 |
+
indent_size = unset
|
| 31 |
+
|
| 32 |
+
[examples/server/deps_*]
|
| 33 |
+
trim_trailing_whitespace = unset
|
| 34 |
+
indent_style = unset
|
| 35 |
+
indent_size = unset
|
| 36 |
+
|
| 37 |
+
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
| 38 |
+
indent_style = tab
|
| 39 |
+
|
| 40 |
+
[examples/cvector-generator/*.txt]
|
| 41 |
+
trim_trailing_whitespace = unset
|
| 42 |
+
insert_final_newline = unset
|
| 43 |
+
|
| 44 |
+
[models/templates/*.jinja]
|
| 45 |
+
indent_style = unset
|
| 46 |
+
indent_size = unset
|
| 47 |
+
end_of_line = unset
|
| 48 |
+
charset = unset
|
| 49 |
+
trim_trailing_whitespace = unset
|
| 50 |
+
insert_final_newline = unset
|
llama.cpp/.flake8
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
max-line-length = 125
|
| 3 |
+
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
|
| 4 |
+
exclude =
|
| 5 |
+
# Do not traverse examples
|
| 6 |
+
examples,
|
| 7 |
+
# Do not include package initializers
|
| 8 |
+
__init__.py,
|
| 9 |
+
# No need to traverse our git directory
|
| 10 |
+
.git,
|
| 11 |
+
# There's no value in checking cache directories
|
| 12 |
+
__pycache__,
|
| 13 |
+
# No need to include the build path
|
| 14 |
+
build,
|
| 15 |
+
# This contains builds that we don't want to check
|
| 16 |
+
dist # This is generated with `python build .` for package releases
|
| 17 |
+
# max-complexity = 10
|
llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Bug (compilation)
|
| 2 |
+
description: Something goes wrong when trying to compile llama.cpp.
|
| 3 |
+
title: "Compile bug: "
|
| 4 |
+
labels: ["bug-unconfirmed", "compilation"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: >
|
| 9 |
+
Thanks for taking the time to fill out this bug report!
|
| 10 |
+
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
| 11 |
+
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
| 12 |
+
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
| 13 |
+
by clearing `~/.cache/ccache` (on Linux).
|
| 14 |
+
- type: textarea
|
| 15 |
+
id: commit
|
| 16 |
+
attributes:
|
| 17 |
+
label: Git commit
|
| 18 |
+
description: Which commit are you trying to compile?
|
| 19 |
+
placeholder: |
|
| 20 |
+
$git rev-parse HEAD
|
| 21 |
+
84a07a17b1b08cf2b9747c633a2372782848a27f
|
| 22 |
+
validations:
|
| 23 |
+
required: true
|
| 24 |
+
- type: dropdown
|
| 25 |
+
id: operating-system
|
| 26 |
+
attributes:
|
| 27 |
+
label: Operating systems
|
| 28 |
+
description: Which operating systems do you know to be affected?
|
| 29 |
+
multiple: true
|
| 30 |
+
options:
|
| 31 |
+
- Linux
|
| 32 |
+
- Mac
|
| 33 |
+
- Windows
|
| 34 |
+
- BSD
|
| 35 |
+
- Other? (Please let us know in description)
|
| 36 |
+
validations:
|
| 37 |
+
required: true
|
| 38 |
+
- type: dropdown
|
| 39 |
+
id: backends
|
| 40 |
+
attributes:
|
| 41 |
+
label: GGML backends
|
| 42 |
+
description: Which GGML backends do you know to be affected?
|
| 43 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
| 44 |
+
multiple: true
|
| 45 |
+
validations:
|
| 46 |
+
required: true
|
| 47 |
+
- type: textarea
|
| 48 |
+
id: info
|
| 49 |
+
attributes:
|
| 50 |
+
label: Problem description & steps to reproduce
|
| 51 |
+
description: >
|
| 52 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
| 53 |
+
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
| 54 |
+
placeholder: >
|
| 55 |
+
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
| 56 |
+
Here are the exact commands that I used: ...
|
| 57 |
+
validations:
|
| 58 |
+
required: true
|
| 59 |
+
- type: textarea
|
| 60 |
+
id: first_bad_commit
|
| 61 |
+
attributes:
|
| 62 |
+
label: First Bad Commit
|
| 63 |
+
description: >
|
| 64 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
| 65 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
| 66 |
+
validations:
|
| 67 |
+
required: false
|
| 68 |
+
- type: textarea
|
| 69 |
+
id: command
|
| 70 |
+
attributes:
|
| 71 |
+
label: Compile command
|
| 72 |
+
description: >
|
| 73 |
+
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
|
| 74 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 75 |
+
render: shell
|
| 76 |
+
validations:
|
| 77 |
+
required: true
|
| 78 |
+
- type: textarea
|
| 79 |
+
id: logs
|
| 80 |
+
attributes:
|
| 81 |
+
label: Relevant log output
|
| 82 |
+
description: >
|
| 83 |
+
Please copy and paste any relevant log output, including any generated text.
|
| 84 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 85 |
+
render: shell
|
| 86 |
+
validations:
|
| 87 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Bug (model use)
|
| 2 |
+
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
| 3 |
+
title: "Eval bug: "
|
| 4 |
+
labels: ["bug-unconfirmed", "model evaluation"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: >
|
| 9 |
+
Thanks for taking the time to fill out this bug report!
|
| 10 |
+
This issue template is intended for bug reports where the model evaluation results
|
| 11 |
+
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
| 12 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
| 13 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
| 14 |
+
The `llama-cli` binary can be used for simple and reproducible model inference.
|
| 15 |
+
- type: textarea
|
| 16 |
+
id: version
|
| 17 |
+
attributes:
|
| 18 |
+
label: Name and Version
|
| 19 |
+
description: Which version of our software are you running? (use `--version` to get a version string)
|
| 20 |
+
placeholder: |
|
| 21 |
+
$./llama-cli --version
|
| 22 |
+
version: 2999 (42b4109e)
|
| 23 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
| 24 |
+
validations:
|
| 25 |
+
required: true
|
| 26 |
+
- type: dropdown
|
| 27 |
+
id: operating-system
|
| 28 |
+
attributes:
|
| 29 |
+
label: Operating systems
|
| 30 |
+
description: Which operating systems do you know to be affected?
|
| 31 |
+
multiple: true
|
| 32 |
+
options:
|
| 33 |
+
- Linux
|
| 34 |
+
- Mac
|
| 35 |
+
- Windows
|
| 36 |
+
- BSD
|
| 37 |
+
- Other? (Please let us know in description)
|
| 38 |
+
validations:
|
| 39 |
+
required: true
|
| 40 |
+
- type: dropdown
|
| 41 |
+
id: backends
|
| 42 |
+
attributes:
|
| 43 |
+
label: GGML backends
|
| 44 |
+
description: Which GGML backends do you know to be affected?
|
| 45 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
| 46 |
+
multiple: true
|
| 47 |
+
validations:
|
| 48 |
+
required: true
|
| 49 |
+
- type: textarea
|
| 50 |
+
id: hardware
|
| 51 |
+
attributes:
|
| 52 |
+
label: Hardware
|
| 53 |
+
description: Which CPUs/GPUs are you using?
|
| 54 |
+
placeholder: >
|
| 55 |
+
e.g. Ryzen 5950X + 2x RTX 4090
|
| 56 |
+
validations:
|
| 57 |
+
required: true
|
| 58 |
+
- type: textarea
|
| 59 |
+
id: model
|
| 60 |
+
attributes:
|
| 61 |
+
label: Models
|
| 62 |
+
description: >
|
| 63 |
+
Which model(s) at which quantization were you using when encountering the bug?
|
| 64 |
+
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
| 65 |
+
placeholder: >
|
| 66 |
+
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
| 67 |
+
validations:
|
| 68 |
+
required: false
|
| 69 |
+
- type: textarea
|
| 70 |
+
id: info
|
| 71 |
+
attributes:
|
| 72 |
+
label: Problem description & steps to reproduce
|
| 73 |
+
description: >
|
| 74 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
| 75 |
+
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
| 76 |
+
that information would be very much appreciated by us.
|
| 77 |
+
placeholder: >
|
| 78 |
+
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
| 79 |
+
When I use -ngl 0 it works correctly.
|
| 80 |
+
Here are the exact commands that I used: ...
|
| 81 |
+
validations:
|
| 82 |
+
required: true
|
| 83 |
+
- type: textarea
|
| 84 |
+
id: first_bad_commit
|
| 85 |
+
attributes:
|
| 86 |
+
label: First Bad Commit
|
| 87 |
+
description: >
|
| 88 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
| 89 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
| 90 |
+
validations:
|
| 91 |
+
required: false
|
| 92 |
+
- type: textarea
|
| 93 |
+
id: logs
|
| 94 |
+
attributes:
|
| 95 |
+
label: Relevant log output
|
| 96 |
+
description: >
|
| 97 |
+
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
| 98 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 99 |
+
render: shell
|
| 100 |
+
validations:
|
| 101 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Bug (misc.)
|
| 2 |
+
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
| 3 |
+
title: "Misc. bug: "
|
| 4 |
+
labels: ["bug-unconfirmed"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: >
|
| 9 |
+
Thanks for taking the time to fill out this bug report!
|
| 10 |
+
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
| 11 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
| 12 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
| 13 |
+
- type: textarea
|
| 14 |
+
id: version
|
| 15 |
+
attributes:
|
| 16 |
+
label: Name and Version
|
| 17 |
+
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
| 18 |
+
placeholder: |
|
| 19 |
+
$./llama-cli --version
|
| 20 |
+
version: 2999 (42b4109e)
|
| 21 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
| 22 |
+
validations:
|
| 23 |
+
required: true
|
| 24 |
+
- type: dropdown
|
| 25 |
+
id: operating-system
|
| 26 |
+
attributes:
|
| 27 |
+
label: Operating systems
|
| 28 |
+
description: Which operating systems do you know to be affected?
|
| 29 |
+
multiple: true
|
| 30 |
+
options:
|
| 31 |
+
- Linux
|
| 32 |
+
- Mac
|
| 33 |
+
- Windows
|
| 34 |
+
- BSD
|
| 35 |
+
- Other? (Please let us know in description)
|
| 36 |
+
validations:
|
| 37 |
+
required: false
|
| 38 |
+
- type: dropdown
|
| 39 |
+
id: module
|
| 40 |
+
attributes:
|
| 41 |
+
label: Which llama.cpp modules do you know to be affected?
|
| 42 |
+
multiple: true
|
| 43 |
+
options:
|
| 44 |
+
- Documentation/Github
|
| 45 |
+
- libllama (core library)
|
| 46 |
+
- llama-cli
|
| 47 |
+
- llama-server
|
| 48 |
+
- llama-bench
|
| 49 |
+
- llama-quantize
|
| 50 |
+
- Python/Bash scripts
|
| 51 |
+
- Test code
|
| 52 |
+
- Other (Please specify in the next section)
|
| 53 |
+
validations:
|
| 54 |
+
required: false
|
| 55 |
+
- type: textarea
|
| 56 |
+
id: command
|
| 57 |
+
attributes:
|
| 58 |
+
label: Command line
|
| 59 |
+
description: >
|
| 60 |
+
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
|
| 61 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 62 |
+
render: shell
|
| 63 |
+
validations:
|
| 64 |
+
required: false
|
| 65 |
+
- type: textarea
|
| 66 |
+
id: info
|
| 67 |
+
attributes:
|
| 68 |
+
label: Problem description & steps to reproduce
|
| 69 |
+
description: >
|
| 70 |
+
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
| 71 |
+
validations:
|
| 72 |
+
required: true
|
| 73 |
+
- type: textarea
|
| 74 |
+
id: first_bad_commit
|
| 75 |
+
attributes:
|
| 76 |
+
label: First Bad Commit
|
| 77 |
+
description: >
|
| 78 |
+
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
| 79 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
| 80 |
+
validations:
|
| 81 |
+
required: false
|
| 82 |
+
- type: textarea
|
| 83 |
+
id: logs
|
| 84 |
+
attributes:
|
| 85 |
+
label: Relevant log output
|
| 86 |
+
description: >
|
| 87 |
+
If applicable, please copy and paste any relevant log output, including any generated text.
|
| 88 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 89 |
+
render: shell
|
| 90 |
+
validations:
|
| 91 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Enhancement
|
| 2 |
+
description: Used to request enhancements for llama.cpp.
|
| 3 |
+
title: "Feature Request: "
|
| 4 |
+
labels: ["enhancement"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: |
|
| 9 |
+
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
|
| 10 |
+
|
| 11 |
+
- type: checkboxes
|
| 12 |
+
id: prerequisites
|
| 13 |
+
attributes:
|
| 14 |
+
label: Prerequisites
|
| 15 |
+
description: Please confirm the following before submitting your enhancement request.
|
| 16 |
+
options:
|
| 17 |
+
- label: I am running the latest code. Mention the version if possible as well.
|
| 18 |
+
required: true
|
| 19 |
+
- label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
|
| 20 |
+
required: true
|
| 21 |
+
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
|
| 22 |
+
required: true
|
| 23 |
+
- label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
|
| 24 |
+
required: true
|
| 25 |
+
|
| 26 |
+
- type: textarea
|
| 27 |
+
id: feature-description
|
| 28 |
+
attributes:
|
| 29 |
+
label: Feature Description
|
| 30 |
+
description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
|
| 31 |
+
placeholder: Detailed description of the enhancement
|
| 32 |
+
validations:
|
| 33 |
+
required: true
|
| 34 |
+
|
| 35 |
+
- type: textarea
|
| 36 |
+
id: motivation
|
| 37 |
+
attributes:
|
| 38 |
+
label: Motivation
|
| 39 |
+
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
|
| 40 |
+
placeholder: Explanation of why this feature is needed and its benefits
|
| 41 |
+
validations:
|
| 42 |
+
required: true
|
| 43 |
+
|
| 44 |
+
- type: textarea
|
| 45 |
+
id: possible-implementation
|
| 46 |
+
attributes:
|
| 47 |
+
label: Possible Implementation
|
| 48 |
+
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
|
| 49 |
+
placeholder: Detailed description of potential implementation
|
| 50 |
+
validations:
|
| 51 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Research
|
| 2 |
+
description: Track new technical research area.
|
| 3 |
+
title: "Research: "
|
| 4 |
+
labels: ["research 🔬"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: |
|
| 9 |
+
Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
|
| 10 |
+
|
| 11 |
+
- type: checkboxes
|
| 12 |
+
id: research-stage
|
| 13 |
+
attributes:
|
| 14 |
+
label: Research Stage
|
| 15 |
+
description: Track general state of this research ticket
|
| 16 |
+
options:
|
| 17 |
+
- label: Background Research (Let's try to avoid reinventing the wheel)
|
| 18 |
+
- label: Hypothesis Formed (How do you think this will work and it's effect?)
|
| 19 |
+
- label: Strategy / Implementation Forming
|
| 20 |
+
- label: Analysis of results
|
| 21 |
+
- label: Debrief / Documentation (So people in the future can learn from us)
|
| 22 |
+
|
| 23 |
+
- type: textarea
|
| 24 |
+
id: background
|
| 25 |
+
attributes:
|
| 26 |
+
label: Previous existing literature and research
|
| 27 |
+
description: Whats the current state of the art and whats the motivation for this research?
|
| 28 |
+
|
| 29 |
+
- type: textarea
|
| 30 |
+
id: hypothesis
|
| 31 |
+
attributes:
|
| 32 |
+
label: Hypothesis
|
| 33 |
+
description: How do you think this will work and it's effect?
|
| 34 |
+
|
| 35 |
+
- type: textarea
|
| 36 |
+
id: implementation
|
| 37 |
+
attributes:
|
| 38 |
+
label: Implementation
|
| 39 |
+
description: Got an approach? e.g. a PR ready to go?
|
| 40 |
+
|
| 41 |
+
- type: textarea
|
| 42 |
+
id: analysis
|
| 43 |
+
attributes:
|
| 44 |
+
label: Analysis
|
| 45 |
+
description: How does the proposed implementation behave?
|
| 46 |
+
|
| 47 |
+
- type: textarea
|
| 48 |
+
id: logs
|
| 49 |
+
attributes:
|
| 50 |
+
label: Relevant log output
|
| 51 |
+
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
| 52 |
+
render: shell
|
llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Refactor (Maintainers)
|
| 2 |
+
description: Used to track refactoring opportunities.
|
| 3 |
+
title: "Refactor: "
|
| 4 |
+
labels: ["refactor"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: |
|
| 9 |
+
Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
|
| 10 |
+
Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
|
| 11 |
+
|
| 12 |
+
- type: textarea
|
| 13 |
+
id: background-description
|
| 14 |
+
attributes:
|
| 15 |
+
label: Background Description
|
| 16 |
+
description: Please provide a detailed written description of the pain points you are trying to solve.
|
| 17 |
+
placeholder: Detailed description behind your motivation to request refactor
|
| 18 |
+
validations:
|
| 19 |
+
required: true
|
| 20 |
+
|
| 21 |
+
- type: textarea
|
| 22 |
+
id: possible-approaches
|
| 23 |
+
attributes:
|
| 24 |
+
label: Possible Refactor Approaches
|
| 25 |
+
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
|
| 26 |
+
placeholder: Your idea of possible refactoring opportunity/approaches
|
| 27 |
+
validations:
|
| 28 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/config.yml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
blank_issues_enabled: true
|
| 2 |
+
contact_links:
|
| 3 |
+
- name: Got an idea?
|
| 4 |
+
url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
|
| 5 |
+
about: Pop it there. It may then become an enhancement ticket.
|
| 6 |
+
- name: Got a question?
|
| 7 |
+
url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
|
| 8 |
+
about: Ask a question there!
|
| 9 |
+
- name: Want to contribute?
|
| 10 |
+
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
| 11 |
+
about: Head to the contribution guide page of the wiki for areas you can help with
|
llama.cpp/.github/labeler.yml
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://github.com/actions/labeler
|
| 2 |
+
Kompute:
|
| 3 |
+
- changed-files:
|
| 4 |
+
- any-glob-to-any-file:
|
| 5 |
+
- ggml/include/ggml-kompute.h
|
| 6 |
+
- ggml/src/ggml-kompute/**
|
| 7 |
+
- README-kompute.md
|
| 8 |
+
Apple Metal:
|
| 9 |
+
- changed-files:
|
| 10 |
+
- any-glob-to-any-file:
|
| 11 |
+
- ggml/include/ggml-metal.h
|
| 12 |
+
- ggml/src/ggml-metal/**
|
| 13 |
+
- README-metal.md
|
| 14 |
+
SYCL:
|
| 15 |
+
- changed-files:
|
| 16 |
+
- any-glob-to-any-file:
|
| 17 |
+
- ggml/include/ggml-sycl.h
|
| 18 |
+
- ggml/src/ggml-sycl/**
|
| 19 |
+
- docs/backend/SYCL.md
|
| 20 |
+
- examples/sycl/**
|
| 21 |
+
Nvidia GPU:
|
| 22 |
+
- changed-files:
|
| 23 |
+
- any-glob-to-any-file:
|
| 24 |
+
- ggml/include/ggml-cuda.h
|
| 25 |
+
- ggml/src/ggml-cuda/**
|
| 26 |
+
Vulkan:
|
| 27 |
+
- changed-files:
|
| 28 |
+
- any-glob-to-any-file:
|
| 29 |
+
- ggml/include/ggml-vulkan.h
|
| 30 |
+
- ggml/src/ggml-vulkan/**
|
| 31 |
+
documentation:
|
| 32 |
+
- changed-files:
|
| 33 |
+
- any-glob-to-any-file:
|
| 34 |
+
- docs/**
|
| 35 |
+
- media/**
|
| 36 |
+
testing:
|
| 37 |
+
- changed-files:
|
| 38 |
+
- any-glob-to-any-file:
|
| 39 |
+
- tests/**
|
| 40 |
+
build:
|
| 41 |
+
- changed-files:
|
| 42 |
+
- any-glob-to-any-file:
|
| 43 |
+
- cmake/**
|
| 44 |
+
- CMakeLists.txt
|
| 45 |
+
- CMakePresets.json
|
| 46 |
+
examples:
|
| 47 |
+
- changed-files:
|
| 48 |
+
- any-glob-to-any-file: examples/**
|
| 49 |
+
devops:
|
| 50 |
+
- changed-files:
|
| 51 |
+
- any-glob-to-any-file:
|
| 52 |
+
- .devops/**
|
| 53 |
+
- .github/**
|
| 54 |
+
- ci/**
|
| 55 |
+
python:
|
| 56 |
+
- changed-files:
|
| 57 |
+
- any-glob-to-any-file:
|
| 58 |
+
- "**/*.py"
|
| 59 |
+
- requirements/**
|
| 60 |
+
- gguf-py/**
|
| 61 |
+
- .flake8
|
| 62 |
+
script:
|
| 63 |
+
- changed-files:
|
| 64 |
+
- any-glob-to-any-file:
|
| 65 |
+
- scripts/**
|
| 66 |
+
android:
|
| 67 |
+
- changed-files:
|
| 68 |
+
- any-glob-to-any-file:
|
| 69 |
+
- examples/llama.android/**
|
| 70 |
+
server:
|
| 71 |
+
- changed-files:
|
| 72 |
+
- any-glob-to-any-file:
|
| 73 |
+
- examples/server/**
|
| 74 |
+
ggml:
|
| 75 |
+
- changed-files:
|
| 76 |
+
- any-glob-to-any-file:
|
| 77 |
+
- ggml/**
|
| 78 |
+
nix:
|
| 79 |
+
- changed-files:
|
| 80 |
+
- any-glob-to-any-file:
|
| 81 |
+
- "**/*.nix"
|
| 82 |
+
- .github/workflows/nix-*.yml
|
| 83 |
+
- .devops/nix/nixpkgs-instances.nix
|
| 84 |
+
embedding:
|
| 85 |
+
- changed-files:
|
| 86 |
+
- any-glob-to-any-file: examples/embedding/
|
llama.cpp/.github/pull_request_template.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
llama.cpp/.github/workflows/bench.yml.disabled
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TODO: there have been some issues with the workflow, so disabling for now
|
| 2 |
+
# https://github.com/ggerganov/llama.cpp/issues/7893
|
| 3 |
+
#
|
| 4 |
+
# Benchmark
|
| 5 |
+
name: Benchmark
|
| 6 |
+
|
| 7 |
+
on:
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
inputs:
|
| 10 |
+
gpu-series:
|
| 11 |
+
description: 'Azure GPU series to run with'
|
| 12 |
+
required: true
|
| 13 |
+
type: choice
|
| 14 |
+
options:
|
| 15 |
+
- Standard_NC4as_T4_v3
|
| 16 |
+
- Standard_NC24ads_A100_v4
|
| 17 |
+
- Standard_NC80adis_H100_v5
|
| 18 |
+
sha:
|
| 19 |
+
description: 'Commit SHA1 to build'
|
| 20 |
+
required: false
|
| 21 |
+
type: string
|
| 22 |
+
duration:
|
| 23 |
+
description: 'Duration of the bench'
|
| 24 |
+
type: string
|
| 25 |
+
default: 10m
|
| 26 |
+
|
| 27 |
+
push:
|
| 28 |
+
branches:
|
| 29 |
+
- master
|
| 30 |
+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
| 31 |
+
pull_request_target:
|
| 32 |
+
types: [opened, synchronize, reopened]
|
| 33 |
+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
| 34 |
+
schedule:
|
| 35 |
+
- cron: '04 2 * * *'
|
| 36 |
+
|
| 37 |
+
concurrency:
|
| 38 |
+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
|
| 39 |
+
cancel-in-progress: true
|
| 40 |
+
|
| 41 |
+
jobs:
|
| 42 |
+
bench-server-baseline:
|
| 43 |
+
runs-on: Standard_NC4as_T4_v3
|
| 44 |
+
env:
|
| 45 |
+
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
| 46 |
+
N_USERS: 8
|
| 47 |
+
DURATION: 10m
|
| 48 |
+
|
| 49 |
+
strategy:
|
| 50 |
+
matrix:
|
| 51 |
+
model: [phi-2]
|
| 52 |
+
ftype: [q4_0, q8_0, f16]
|
| 53 |
+
include:
|
| 54 |
+
- model: phi-2
|
| 55 |
+
ftype: q4_0
|
| 56 |
+
pr_comment_enabled: "true"
|
| 57 |
+
|
| 58 |
+
if: |
|
| 59 |
+
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|
| 60 |
+
|| (
|
| 61 |
+
github.event_name == 'schedule'
|
| 62 |
+
&& github.ref_name == 'master'
|
| 63 |
+
&& github.repository_owner == 'ggerganov'
|
| 64 |
+
)
|
| 65 |
+
|| github.event_name == 'pull_request_target'
|
| 66 |
+
|| (
|
| 67 |
+
github.event_name == 'push'
|
| 68 |
+
&& github.event.ref == 'refs/heads/master'
|
| 69 |
+
&& github.repository_owner == 'ggerganov'
|
| 70 |
+
)
|
| 71 |
+
steps:
|
| 72 |
+
- name: Clone
|
| 73 |
+
id: checkout
|
| 74 |
+
uses: actions/checkout@v4
|
| 75 |
+
with:
|
| 76 |
+
fetch-depth: 0
|
| 77 |
+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
| 78 |
+
|
| 79 |
+
- name: Install python env
|
| 80 |
+
id: pipenv
|
| 81 |
+
run: |
|
| 82 |
+
cd examples/server/bench
|
| 83 |
+
python3 -m venv venv
|
| 84 |
+
source venv/bin/activate
|
| 85 |
+
pip install -r requirements.txt
|
| 86 |
+
|
| 87 |
+
- name: Prometheus
|
| 88 |
+
id: install_prometheus
|
| 89 |
+
run: |
|
| 90 |
+
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
|
| 91 |
+
tar xzf prometheus*.tar.gz --strip-components=1
|
| 92 |
+
./prometheus --config.file=examples/server/bench/prometheus.yml &
|
| 93 |
+
while ! nc -z localhost 9090; do
|
| 94 |
+
sleep 0.1
|
| 95 |
+
done
|
| 96 |
+
|
| 97 |
+
- name: Set up Go
|
| 98 |
+
uses: actions/setup-go@v5
|
| 99 |
+
with:
|
| 100 |
+
go-version: '1.21'
|
| 101 |
+
|
| 102 |
+
- name: Install k6 and xk6-sse
|
| 103 |
+
id: k6_installation
|
| 104 |
+
run: |
|
| 105 |
+
cd examples/server/bench
|
| 106 |
+
go install go.k6.io/xk6/cmd/xk6@latest
|
| 107 |
+
xk6 build master \
|
| 108 |
+
--with github.com/phymbert/xk6-sse
|
| 109 |
+
|
| 110 |
+
- name: Build
|
| 111 |
+
id: cmake_build
|
| 112 |
+
run: |
|
| 113 |
+
set -eux
|
| 114 |
+
cmake -B build \
|
| 115 |
+
-DGGML_NATIVE=OFF \
|
| 116 |
+
-DLLAMA_BUILD_SERVER=ON \
|
| 117 |
+
-DLLAMA_CURL=ON \
|
| 118 |
+
-DLLAMA_CUBLAS=ON \
|
| 119 |
+
-DCUDAToolkit_ROOT=/usr/local/cuda \
|
| 120 |
+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
| 121 |
+
-DCMAKE_CUDA_ARCHITECTURES=75 \
|
| 122 |
+
-DLLAMA_FATAL_WARNINGS=OFF \
|
| 123 |
+
-DLLAMA_ALL_WARNINGS=OFF \
|
| 124 |
+
-DCMAKE_BUILD_TYPE=Release;
|
| 125 |
+
cmake --build build --config Release -j $(nproc) --target llama-server
|
| 126 |
+
|
| 127 |
+
- name: Download the dataset
|
| 128 |
+
id: download_dataset
|
| 129 |
+
run: |
|
| 130 |
+
cd examples/server/bench
|
| 131 |
+
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
| 132 |
+
|
| 133 |
+
- name: Server bench
|
| 134 |
+
id: server_bench
|
| 135 |
+
env:
|
| 136 |
+
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
| 137 |
+
run: |
|
| 138 |
+
set -eux
|
| 139 |
+
|
| 140 |
+
cd examples/server/bench
|
| 141 |
+
source venv/bin/activate
|
| 142 |
+
python bench.py \
|
| 143 |
+
--runner-label ${{ env.RUNNER_LABEL }} \
|
| 144 |
+
--name ${{ github.job }} \
|
| 145 |
+
--branch $HEAD_REF \
|
| 146 |
+
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
| 147 |
+
--scenario script.js \
|
| 148 |
+
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
| 149 |
+
--hf-repo ggml-org/models \
|
| 150 |
+
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
| 151 |
+
--model-path-prefix /models \
|
| 152 |
+
--parallel ${{ env.N_USERS }} \
|
| 153 |
+
-ngl 33 \
|
| 154 |
+
--batch-size 2048 \
|
| 155 |
+
--ubatch-size 256 \
|
| 156 |
+
--ctx-size 16384 \
|
| 157 |
+
--n-prompts 1000 \
|
| 158 |
+
--max-prompt-tokens 1024 \
|
| 159 |
+
--max-tokens 2048
|
| 160 |
+
|
| 161 |
+
cat results.github.env >> $GITHUB_ENV
|
| 162 |
+
|
| 163 |
+
# Remove dataset as we do not want it in the artefact
|
| 164 |
+
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
| 165 |
+
|
| 166 |
+
- uses: actions/upload-artifact@v4
|
| 167 |
+
with:
|
| 168 |
+
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
| 169 |
+
compression-level: 9
|
| 170 |
+
path: |
|
| 171 |
+
examples/server/bench/*.jpg
|
| 172 |
+
examples/server/bench/*.json
|
| 173 |
+
examples/server/bench/*.log
|
| 174 |
+
|
| 175 |
+
- name: Commit status
|
| 176 |
+
uses: Sibz/github-status-action@v1
|
| 177 |
+
with:
|
| 178 |
+
authToken: ${{secrets.GITHUB_TOKEN}}
|
| 179 |
+
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
| 180 |
+
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
| 181 |
+
description: |
|
| 182 |
+
${{ env.BENCH_RESULTS }}
|
| 183 |
+
state: 'success'
|
| 184 |
+
|
| 185 |
+
- name: Upload benchmark images
|
| 186 |
+
uses: devicons/public-upload-to-imgur@v2.2.2
|
| 187 |
+
continue-on-error: true # Important as it looks unstable: 503
|
| 188 |
+
id: imgur_step
|
| 189 |
+
with:
|
| 190 |
+
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
| 191 |
+
path: |
|
| 192 |
+
examples/server/bench/prompt_tokens_seconds.jpg
|
| 193 |
+
examples/server/bench/predicted_tokens_seconds.jpg
|
| 194 |
+
examples/server/bench/kv_cache_usage_ratio.jpg
|
| 195 |
+
examples/server/bench/requests_processing.jpg
|
| 196 |
+
|
| 197 |
+
- name: Extract mermaid
|
| 198 |
+
id: set_mermaid
|
| 199 |
+
run: |
|
| 200 |
+
set -eux
|
| 201 |
+
|
| 202 |
+
cd examples/server/bench
|
| 203 |
+
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
| 204 |
+
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
| 205 |
+
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
| 206 |
+
echo "EOF" >> $GITHUB_ENV
|
| 207 |
+
|
| 208 |
+
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
| 209 |
+
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
| 210 |
+
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
| 211 |
+
echo "EOF" >> $GITHUB_ENV
|
| 212 |
+
|
| 213 |
+
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
| 214 |
+
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
| 215 |
+
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
| 216 |
+
echo "EOF" >> $GITHUB_ENV
|
| 217 |
+
|
| 218 |
+
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
| 219 |
+
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
| 220 |
+
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
| 221 |
+
echo "EOF" >> $GITHUB_ENV
|
| 222 |
+
|
| 223 |
+
- name: Extract image url
|
| 224 |
+
id: extract_image_url
|
| 225 |
+
continue-on-error: true
|
| 226 |
+
run: |
|
| 227 |
+
set -eux
|
| 228 |
+
|
| 229 |
+
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
|
| 230 |
+
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
|
| 231 |
+
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
|
| 232 |
+
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
|
| 233 |
+
|
| 234 |
+
- name: Comment PR
|
| 235 |
+
uses: mshick/add-pr-comment@v2
|
| 236 |
+
id: comment_pr
|
| 237 |
+
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
| 238 |
+
with:
|
| 239 |
+
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
| 240 |
+
message: |
|
| 241 |
+
<p align="center">
|
| 242 |
+
|
| 243 |
+
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
| 244 |
+
|
| 245 |
+
</p>
|
| 246 |
+
|
| 247 |
+
<details>
|
| 248 |
+
|
| 249 |
+
<summary>Expand details for performance related PR only</summary>
|
| 250 |
+
|
| 251 |
+
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
| 252 |
+
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
| 253 |
+
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
| 254 |
+
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
| 255 |
+
- ${{ env.BENCH_GRAPH_XLABEL }}
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
<p align="center">
|
| 259 |
+
|
| 260 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
|
| 261 |
+
|
| 262 |
+
<details>
|
| 263 |
+
|
| 264 |
+
<summary>More</summary>
|
| 265 |
+
|
| 266 |
+
```mermaid
|
| 267 |
+
${{ env.PROMPT_TOKENS_SECONDS }}
|
| 268 |
+
```
|
| 269 |
+
|
| 270 |
+
</details>
|
| 271 |
+
|
| 272 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
|
| 273 |
+
|
| 274 |
+
<details>
|
| 275 |
+
<summary>More</summary>
|
| 276 |
+
|
| 277 |
+
```mermaid
|
| 278 |
+
${{ env.PREDICTED_TOKENS_SECONDS }}
|
| 279 |
+
```
|
| 280 |
+
|
| 281 |
+
</details>
|
| 282 |
+
|
| 283 |
+
</p>
|
| 284 |
+
|
| 285 |
+
<details>
|
| 286 |
+
|
| 287 |
+
<summary>Details</summary>
|
| 288 |
+
|
| 289 |
+
<p align="center">
|
| 290 |
+
|
| 291 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
|
| 292 |
+
|
| 293 |
+
<details>
|
| 294 |
+
<summary>More</summary>
|
| 295 |
+
|
| 296 |
+
```mermaid
|
| 297 |
+
${{ env.KV_CACHE_USAGE_RATIO }}
|
| 298 |
+
```
|
| 299 |
+
|
| 300 |
+
</details>
|
| 301 |
+
|
| 302 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
|
| 303 |
+
|
| 304 |
+
<details>
|
| 305 |
+
<summary>More</summary>
|
| 306 |
+
|
| 307 |
+
```mermaid
|
| 308 |
+
${{ env.REQUESTS_PROCESSING }}
|
| 309 |
+
```
|
| 310 |
+
|
| 311 |
+
</details>
|
| 312 |
+
|
| 313 |
+
</p>
|
| 314 |
+
</details>
|
| 315 |
+
</details>
|
llama.cpp/.github/workflows/build.yml
ADDED
|
@@ -0,0 +1,1645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch: # allows manual triggering
|
| 5 |
+
inputs:
|
| 6 |
+
create_release:
|
| 7 |
+
description: 'Create new release'
|
| 8 |
+
required: true
|
| 9 |
+
type: boolean
|
| 10 |
+
push:
|
| 11 |
+
branches:
|
| 12 |
+
- master
|
| 13 |
+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
|
| 14 |
+
pull_request:
|
| 15 |
+
types: [opened, synchronize, reopened]
|
| 16 |
+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
|
| 17 |
+
|
| 18 |
+
concurrency:
|
| 19 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
| 20 |
+
cancel-in-progress: true
|
| 21 |
+
|
| 22 |
+
# Fine-grant permission
|
| 23 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
| 24 |
+
permissions:
|
| 25 |
+
contents: write # for creating release
|
| 26 |
+
|
| 27 |
+
env:
|
| 28 |
+
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
| 29 |
+
GGML_NLOOP: 3
|
| 30 |
+
GGML_N_THREADS: 1
|
| 31 |
+
LLAMA_LOG_COLORS: 1
|
| 32 |
+
LLAMA_LOG_PREFIX: 1
|
| 33 |
+
LLAMA_LOG_TIMESTAMPS: 1
|
| 34 |
+
|
| 35 |
+
jobs:
|
| 36 |
+
macOS-latest-cmake-arm64:
|
| 37 |
+
runs-on: macos-14
|
| 38 |
+
|
| 39 |
+
steps:
|
| 40 |
+
- name: Clone
|
| 41 |
+
id: checkout
|
| 42 |
+
uses: actions/checkout@v4
|
| 43 |
+
with:
|
| 44 |
+
fetch-depth: 0
|
| 45 |
+
|
| 46 |
+
- name: ccache
|
| 47 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 48 |
+
with:
|
| 49 |
+
key: macOS-latest-cmake-arm64
|
| 50 |
+
evict-old-files: 1d
|
| 51 |
+
|
| 52 |
+
- name: Dependencies
|
| 53 |
+
id: depends
|
| 54 |
+
continue-on-error: true
|
| 55 |
+
run: |
|
| 56 |
+
brew update
|
| 57 |
+
|
| 58 |
+
- name: Build
|
| 59 |
+
id: cmake_build
|
| 60 |
+
run: |
|
| 61 |
+
sysctl -a
|
| 62 |
+
cmake -B build \
|
| 63 |
+
-DCMAKE_BUILD_RPATH="@loader_path" \
|
| 64 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
| 65 |
+
-DLLAMA_CURL=ON \
|
| 66 |
+
-DGGML_METAL_USE_BF16=ON \
|
| 67 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
| 68 |
+
-DGGML_RPC=ON
|
| 69 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
| 70 |
+
|
| 71 |
+
- name: Test
|
| 72 |
+
id: cmake_test
|
| 73 |
+
run: |
|
| 74 |
+
cd build
|
| 75 |
+
ctest -L 'main|curl' --verbose --timeout 900
|
| 76 |
+
|
| 77 |
+
- name: Determine tag name
|
| 78 |
+
id: tag
|
| 79 |
+
shell: bash
|
| 80 |
+
run: |
|
| 81 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 82 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 83 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 84 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 85 |
+
else
|
| 86 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 87 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 88 |
+
fi
|
| 89 |
+
|
| 90 |
+
- name: Pack artifacts
|
| 91 |
+
id: pack_artifacts
|
| 92 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 93 |
+
run: |
|
| 94 |
+
cp LICENSE ./build/bin/
|
| 95 |
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
| 96 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
| 97 |
+
|
| 98 |
+
- name: Upload artifacts
|
| 99 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 100 |
+
uses: actions/upload-artifact@v4
|
| 101 |
+
with:
|
| 102 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
|
| 103 |
+
name: llama-bin-macos-arm64.zip
|
| 104 |
+
|
| 105 |
+
macOS-latest-cmake-x64:
|
| 106 |
+
runs-on: macos-13
|
| 107 |
+
|
| 108 |
+
steps:
|
| 109 |
+
- name: Clone
|
| 110 |
+
id: checkout
|
| 111 |
+
uses: actions/checkout@v4
|
| 112 |
+
with:
|
| 113 |
+
fetch-depth: 0
|
| 114 |
+
|
| 115 |
+
- name: ccache
|
| 116 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 117 |
+
with:
|
| 118 |
+
key: macOS-latest-cmake-x64
|
| 119 |
+
evict-old-files: 1d
|
| 120 |
+
|
| 121 |
+
- name: Dependencies
|
| 122 |
+
id: depends
|
| 123 |
+
continue-on-error: true
|
| 124 |
+
run: |
|
| 125 |
+
brew update
|
| 126 |
+
|
| 127 |
+
- name: Build
|
| 128 |
+
id: cmake_build
|
| 129 |
+
run: |
|
| 130 |
+
sysctl -a
|
| 131 |
+
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
| 132 |
+
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
| 133 |
+
cmake -B build \
|
| 134 |
+
-DCMAKE_BUILD_RPATH="@loader_path" \
|
| 135 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
| 136 |
+
-DLLAMA_CURL=ON \
|
| 137 |
+
-DGGML_METAL=OFF \
|
| 138 |
+
-DGGML_RPC=ON
|
| 139 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
| 140 |
+
|
| 141 |
+
- name: Test
|
| 142 |
+
id: cmake_test
|
| 143 |
+
run: |
|
| 144 |
+
cd build
|
| 145 |
+
ctest -L main --verbose --timeout 900
|
| 146 |
+
|
| 147 |
+
- name: Determine tag name
|
| 148 |
+
id: tag
|
| 149 |
+
shell: bash
|
| 150 |
+
run: |
|
| 151 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 152 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 153 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 154 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 155 |
+
else
|
| 156 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 157 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 158 |
+
fi
|
| 159 |
+
|
| 160 |
+
- name: Pack artifacts
|
| 161 |
+
id: pack_artifacts
|
| 162 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 163 |
+
run: |
|
| 164 |
+
cp LICENSE ./build/bin/
|
| 165 |
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
| 166 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
| 167 |
+
|
| 168 |
+
- name: Upload artifacts
|
| 169 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 170 |
+
uses: actions/upload-artifact@v4
|
| 171 |
+
with:
|
| 172 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
| 173 |
+
name: llama-bin-macos-x64.zip
|
| 174 |
+
|
| 175 |
+
ubuntu-cpu-cmake:
|
| 176 |
+
runs-on: ubuntu-22.04
|
| 177 |
+
|
| 178 |
+
steps:
|
| 179 |
+
- name: Clone
|
| 180 |
+
id: checkout
|
| 181 |
+
uses: actions/checkout@v4
|
| 182 |
+
with:
|
| 183 |
+
fetch-depth: 0
|
| 184 |
+
|
| 185 |
+
- name: ccache
|
| 186 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 187 |
+
with:
|
| 188 |
+
key: ubuntu-cpu-cmake
|
| 189 |
+
evict-old-files: 1d
|
| 190 |
+
|
| 191 |
+
- name: Dependencies
|
| 192 |
+
id: depends
|
| 193 |
+
run: |
|
| 194 |
+
sudo apt-get update
|
| 195 |
+
sudo apt-get install build-essential libcurl4-openssl-dev
|
| 196 |
+
|
| 197 |
+
- name: Build
|
| 198 |
+
id: cmake_build
|
| 199 |
+
run: |
|
| 200 |
+
cmake -B build \
|
| 201 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
| 202 |
+
-DLLAMA_CURL=ON \
|
| 203 |
+
-DGGML_RPC=ON
|
| 204 |
+
cmake --build build --config Release -j $(nproc)
|
| 205 |
+
|
| 206 |
+
- name: Test
|
| 207 |
+
id: cmake_test
|
| 208 |
+
run: |
|
| 209 |
+
cd build
|
| 210 |
+
ctest -L 'main|curl' --verbose --timeout 900
|
| 211 |
+
|
| 212 |
+
- name: Test llama2c conversion
|
| 213 |
+
id: llama2c_test
|
| 214 |
+
run: |
|
| 215 |
+
cd build
|
| 216 |
+
echo "Fetch tokenizer"
|
| 217 |
+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
|
| 218 |
+
echo "Fetch llama2c model"
|
| 219 |
+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
|
| 220 |
+
./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
|
| 221 |
+
./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
|
| 222 |
+
|
| 223 |
+
- name: Determine tag name
|
| 224 |
+
id: tag
|
| 225 |
+
shell: bash
|
| 226 |
+
run: |
|
| 227 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 228 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 229 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 230 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 231 |
+
else
|
| 232 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 233 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 234 |
+
fi
|
| 235 |
+
|
| 236 |
+
- name: Pack artifacts
|
| 237 |
+
id: pack_artifacts
|
| 238 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 239 |
+
run: |
|
| 240 |
+
cp LICENSE ./build/bin/
|
| 241 |
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
| 242 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
|
| 243 |
+
|
| 244 |
+
- name: Upload artifacts
|
| 245 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 246 |
+
uses: actions/upload-artifact@v4
|
| 247 |
+
with:
|
| 248 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
|
| 249 |
+
name: llama-bin-ubuntu-x64.zip
|
| 250 |
+
|
| 251 |
+
ubuntu-latest-cmake-sanitizer:
|
| 252 |
+
runs-on: ubuntu-latest
|
| 253 |
+
|
| 254 |
+
continue-on-error: true
|
| 255 |
+
|
| 256 |
+
strategy:
|
| 257 |
+
matrix:
|
| 258 |
+
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
| 259 |
+
build_type: [Debug]
|
| 260 |
+
|
| 261 |
+
steps:
|
| 262 |
+
- name: Clone
|
| 263 |
+
id: checkout
|
| 264 |
+
uses: actions/checkout@v4
|
| 265 |
+
|
| 266 |
+
- name: ccache
|
| 267 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 268 |
+
with:
|
| 269 |
+
key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
|
| 270 |
+
evict-old-files: 1d
|
| 271 |
+
|
| 272 |
+
- name: Dependencies
|
| 273 |
+
id: depends
|
| 274 |
+
run: |
|
| 275 |
+
sudo apt-get update
|
| 276 |
+
sudo apt-get install build-essential
|
| 277 |
+
|
| 278 |
+
- name: Build
|
| 279 |
+
id: cmake_build
|
| 280 |
+
if: ${{ matrix.sanitizer != 'THREAD' }}
|
| 281 |
+
run: |
|
| 282 |
+
cmake -B build \
|
| 283 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
| 284 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
| 285 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
| 286 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
| 287 |
+
|
| 288 |
+
- name: Build (no OpenMP)
|
| 289 |
+
id: cmake_build_no_openmp
|
| 290 |
+
if: ${{ matrix.sanitizer == 'THREAD' }}
|
| 291 |
+
run: |
|
| 292 |
+
cmake -B build \
|
| 293 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
| 294 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
| 295 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
| 296 |
+
-DGGML_OPENMP=OFF
|
| 297 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
| 298 |
+
|
| 299 |
+
- name: Test
|
| 300 |
+
id: cmake_test
|
| 301 |
+
run: |
|
| 302 |
+
cd build
|
| 303 |
+
ctest -L main --verbose --timeout 900
|
| 304 |
+
|
| 305 |
+
ubuntu-latest-llguidance:
|
| 306 |
+
runs-on: ubuntu-latest
|
| 307 |
+
|
| 308 |
+
steps:
|
| 309 |
+
- name: Clone
|
| 310 |
+
id: checkout
|
| 311 |
+
uses: actions/checkout@v4
|
| 312 |
+
|
| 313 |
+
- name: Dependencies
|
| 314 |
+
id: depends
|
| 315 |
+
run: |
|
| 316 |
+
sudo apt-get update
|
| 317 |
+
sudo apt-get install build-essential
|
| 318 |
+
|
| 319 |
+
- name: Build
|
| 320 |
+
id: cmake_build
|
| 321 |
+
run: |
|
| 322 |
+
mkdir build
|
| 323 |
+
cd build
|
| 324 |
+
cmake .. \
|
| 325 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
| 326 |
+
-DLLAMA_LLGUIDANCE=ON
|
| 327 |
+
cmake --build . --config Release -j $(nproc)
|
| 328 |
+
|
| 329 |
+
- name: Test
|
| 330 |
+
id: cmake_test
|
| 331 |
+
run: |
|
| 332 |
+
cd build
|
| 333 |
+
ctest -L main --verbose --timeout 900
|
| 334 |
+
|
| 335 |
+
ubuntu-latest-cmake-rpc:
|
| 336 |
+
runs-on: ubuntu-latest
|
| 337 |
+
|
| 338 |
+
continue-on-error: true
|
| 339 |
+
|
| 340 |
+
steps:
|
| 341 |
+
- name: Clone
|
| 342 |
+
id: checkout
|
| 343 |
+
uses: actions/checkout@v4
|
| 344 |
+
|
| 345 |
+
- name: ccache
|
| 346 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 347 |
+
with:
|
| 348 |
+
key: ubuntu-latest-cmake-rpc
|
| 349 |
+
evict-old-files: 1d
|
| 350 |
+
|
| 351 |
+
- name: Dependencies
|
| 352 |
+
id: depends
|
| 353 |
+
run: |
|
| 354 |
+
sudo apt-get update
|
| 355 |
+
sudo apt-get install build-essential
|
| 356 |
+
|
| 357 |
+
- name: Build
|
| 358 |
+
id: cmake_build
|
| 359 |
+
run: |
|
| 360 |
+
cmake -B build \
|
| 361 |
+
-DGGML_RPC=ON
|
| 362 |
+
cmake --build build --config Release -j $(nproc)
|
| 363 |
+
|
| 364 |
+
- name: Test
|
| 365 |
+
id: cmake_test
|
| 366 |
+
run: |
|
| 367 |
+
cd build
|
| 368 |
+
ctest -L main --verbose
|
| 369 |
+
|
| 370 |
+
ubuntu-22-cmake-vulkan:
|
| 371 |
+
runs-on: ubuntu-22.04
|
| 372 |
+
|
| 373 |
+
steps:
|
| 374 |
+
- name: Clone
|
| 375 |
+
id: checkout
|
| 376 |
+
uses: actions/checkout@v4
|
| 377 |
+
|
| 378 |
+
- name: ccache
|
| 379 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 380 |
+
with:
|
| 381 |
+
key: ubuntu-22-cmake-vulkan
|
| 382 |
+
evict-old-files: 1d
|
| 383 |
+
|
| 384 |
+
- name: Dependencies
|
| 385 |
+
id: depends
|
| 386 |
+
run: |
|
| 387 |
+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
| 388 |
+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
| 389 |
+
sudo apt-get update -y
|
| 390 |
+
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
|
| 391 |
+
|
| 392 |
+
- name: Build
|
| 393 |
+
id: cmake_build
|
| 394 |
+
run: |
|
| 395 |
+
cmake -B build \
|
| 396 |
+
-DGGML_VULKAN=ON
|
| 397 |
+
cmake --build build --config Release -j $(nproc)
|
| 398 |
+
|
| 399 |
+
- name: Test
|
| 400 |
+
id: cmake_test
|
| 401 |
+
run: |
|
| 402 |
+
cd build
|
| 403 |
+
# This is using llvmpipe and runs slower than other backends
|
| 404 |
+
ctest -L main --verbose --timeout 1800
|
| 405 |
+
|
| 406 |
+
ubuntu-22-cmake-hip:
|
| 407 |
+
runs-on: ubuntu-22.04
|
| 408 |
+
container: rocm/dev-ubuntu-22.04:6.0.2
|
| 409 |
+
|
| 410 |
+
steps:
|
| 411 |
+
- name: Clone
|
| 412 |
+
id: checkout
|
| 413 |
+
uses: actions/checkout@v4
|
| 414 |
+
|
| 415 |
+
- name: Dependencies
|
| 416 |
+
id: depends
|
| 417 |
+
run: |
|
| 418 |
+
sudo apt-get update
|
| 419 |
+
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
|
| 420 |
+
|
| 421 |
+
- name: ccache
|
| 422 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 423 |
+
with:
|
| 424 |
+
key: ubuntu-22-cmake-hip
|
| 425 |
+
evict-old-files: 1d
|
| 426 |
+
|
| 427 |
+
- name: Build with native CMake HIP support
|
| 428 |
+
id: cmake_build
|
| 429 |
+
run: |
|
| 430 |
+
cmake -B build -S . \
|
| 431 |
+
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
|
| 432 |
+
-DGGML_HIP=ON
|
| 433 |
+
cmake --build build --config Release -j $(nproc)
|
| 434 |
+
|
| 435 |
+
- name: Build with legacy HIP support
|
| 436 |
+
id: cmake_build_legacy_hip
|
| 437 |
+
run: |
|
| 438 |
+
cmake -B build2 -S . \
|
| 439 |
+
-DCMAKE_C_COMPILER=hipcc \
|
| 440 |
+
-DCMAKE_CXX_COMPILER=hipcc \
|
| 441 |
+
-DGGML_HIP=ON
|
| 442 |
+
cmake --build build2 --config Release -j $(nproc)
|
| 443 |
+
|
| 444 |
+
ubuntu-22-cmake-musa:
|
| 445 |
+
runs-on: ubuntu-22.04
|
| 446 |
+
container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
|
| 447 |
+
|
| 448 |
+
steps:
|
| 449 |
+
- name: Clone
|
| 450 |
+
id: checkout
|
| 451 |
+
uses: actions/checkout@v4
|
| 452 |
+
|
| 453 |
+
- name: Dependencies
|
| 454 |
+
id: depends
|
| 455 |
+
run: |
|
| 456 |
+
apt-get update
|
| 457 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
| 458 |
+
|
| 459 |
+
- name: ccache
|
| 460 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 461 |
+
with:
|
| 462 |
+
key: ubuntu-22-cmake-musa
|
| 463 |
+
evict-old-files: 1d
|
| 464 |
+
|
| 465 |
+
- name: Build with native CMake MUSA support
|
| 466 |
+
id: cmake_build
|
| 467 |
+
run: |
|
| 468 |
+
cmake -B build -S . \
|
| 469 |
+
-DGGML_MUSA=ON
|
| 470 |
+
cmake --build build --config Release -j $(nproc)
|
| 471 |
+
|
| 472 |
+
ubuntu-22-cmake-sycl:
|
| 473 |
+
runs-on: ubuntu-22.04
|
| 474 |
+
|
| 475 |
+
continue-on-error: true
|
| 476 |
+
|
| 477 |
+
steps:
|
| 478 |
+
- uses: actions/checkout@v4
|
| 479 |
+
|
| 480 |
+
- name: add oneAPI to apt
|
| 481 |
+
shell: bash
|
| 482 |
+
run: |
|
| 483 |
+
cd /tmp
|
| 484 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
| 485 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
| 486 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
| 487 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
| 488 |
+
|
| 489 |
+
- name: install oneAPI dpcpp compiler
|
| 490 |
+
shell: bash
|
| 491 |
+
run: |
|
| 492 |
+
sudo apt update
|
| 493 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
| 494 |
+
|
| 495 |
+
- name: install oneAPI MKL library
|
| 496 |
+
shell: bash
|
| 497 |
+
run: |
|
| 498 |
+
sudo apt install intel-oneapi-mkl-devel
|
| 499 |
+
|
| 500 |
+
- name: Clone
|
| 501 |
+
id: checkout
|
| 502 |
+
uses: actions/checkout@v4
|
| 503 |
+
|
| 504 |
+
- name: ccache
|
| 505 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 506 |
+
with:
|
| 507 |
+
key: ubuntu-22-cmake-sycl
|
| 508 |
+
evict-old-files: 1d
|
| 509 |
+
|
| 510 |
+
- name: Build
|
| 511 |
+
id: cmake_build
|
| 512 |
+
run: |
|
| 513 |
+
source /opt/intel/oneapi/setvars.sh
|
| 514 |
+
cmake -B build \
|
| 515 |
+
-DGGML_SYCL=ON \
|
| 516 |
+
-DCMAKE_C_COMPILER=icx \
|
| 517 |
+
-DCMAKE_CXX_COMPILER=icpx
|
| 518 |
+
cmake --build build --config Release -j $(nproc)
|
| 519 |
+
|
| 520 |
+
ubuntu-22-cmake-sycl-fp16:
|
| 521 |
+
runs-on: ubuntu-22.04
|
| 522 |
+
|
| 523 |
+
continue-on-error: true
|
| 524 |
+
|
| 525 |
+
steps:
|
| 526 |
+
- uses: actions/checkout@v4
|
| 527 |
+
|
| 528 |
+
- name: add oneAPI to apt
|
| 529 |
+
shell: bash
|
| 530 |
+
run: |
|
| 531 |
+
cd /tmp
|
| 532 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
| 533 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
| 534 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
| 535 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
| 536 |
+
|
| 537 |
+
- name: install oneAPI dpcpp compiler
|
| 538 |
+
shell: bash
|
| 539 |
+
run: |
|
| 540 |
+
sudo apt update
|
| 541 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
| 542 |
+
|
| 543 |
+
- name: install oneAPI MKL library
|
| 544 |
+
shell: bash
|
| 545 |
+
run: |
|
| 546 |
+
sudo apt install intel-oneapi-mkl-devel
|
| 547 |
+
|
| 548 |
+
- name: Clone
|
| 549 |
+
id: checkout
|
| 550 |
+
uses: actions/checkout@v4
|
| 551 |
+
|
| 552 |
+
- name: ccache
|
| 553 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 554 |
+
with:
|
| 555 |
+
key: ubuntu-22-cmake-sycl-fp16
|
| 556 |
+
evict-old-files: 1d
|
| 557 |
+
|
| 558 |
+
- name: Build
|
| 559 |
+
id: cmake_build
|
| 560 |
+
run: |
|
| 561 |
+
source /opt/intel/oneapi/setvars.sh
|
| 562 |
+
cmake -B build \
|
| 563 |
+
-DGGML_SYCL=ON \
|
| 564 |
+
-DCMAKE_C_COMPILER=icx \
|
| 565 |
+
-DCMAKE_CXX_COMPILER=icpx \
|
| 566 |
+
-DGGML_SYCL_F16=ON
|
| 567 |
+
cmake --build build --config Release -j $(nproc)
|
| 568 |
+
|
| 569 |
+
macOS-latest-cmake-ios:
|
| 570 |
+
runs-on: macos-latest
|
| 571 |
+
|
| 572 |
+
steps:
|
| 573 |
+
- name: Clone
|
| 574 |
+
id: checkout
|
| 575 |
+
uses: actions/checkout@v4
|
| 576 |
+
|
| 577 |
+
- name: ccache
|
| 578 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 579 |
+
with:
|
| 580 |
+
key: macOS-latest-cmake-ios
|
| 581 |
+
evict-old-files: 1d
|
| 582 |
+
|
| 583 |
+
- name: Dependencies
|
| 584 |
+
id: depends
|
| 585 |
+
continue-on-error: true
|
| 586 |
+
run: |
|
| 587 |
+
brew update
|
| 588 |
+
|
| 589 |
+
- name: Build
|
| 590 |
+
id: cmake_build
|
| 591 |
+
run: |
|
| 592 |
+
sysctl -a
|
| 593 |
+
cmake -B build -G Xcode \
|
| 594 |
+
-DGGML_METAL_USE_BF16=ON \
|
| 595 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
| 596 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
| 597 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
| 598 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
| 599 |
+
-DCMAKE_SYSTEM_NAME=iOS \
|
| 600 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
| 601 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
| 602 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
| 603 |
+
|
| 604 |
+
macOS-latest-cmake-tvos:
|
| 605 |
+
runs-on: macos-latest
|
| 606 |
+
|
| 607 |
+
steps:
|
| 608 |
+
- name: Clone
|
| 609 |
+
id: checkout
|
| 610 |
+
uses: actions/checkout@v4
|
| 611 |
+
|
| 612 |
+
- name: ccache
|
| 613 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 614 |
+
with:
|
| 615 |
+
key: macOS-latest-cmake-tvos
|
| 616 |
+
evict-old-files: 1d
|
| 617 |
+
|
| 618 |
+
- name: Dependencies
|
| 619 |
+
id: depends
|
| 620 |
+
continue-on-error: true
|
| 621 |
+
run: |
|
| 622 |
+
brew update
|
| 623 |
+
|
| 624 |
+
- name: Build
|
| 625 |
+
id: cmake_build
|
| 626 |
+
run: |
|
| 627 |
+
sysctl -a
|
| 628 |
+
cmake -B build -G Xcode \
|
| 629 |
+
-DGGML_METAL_USE_BF16=ON \
|
| 630 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
| 631 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
| 632 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
| 633 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
| 634 |
+
-DCMAKE_SYSTEM_NAME=tvOS \
|
| 635 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
| 636 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
| 637 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
| 638 |
+
|
| 639 |
+
macOS-latest-swift:
|
| 640 |
+
runs-on: macos-latest
|
| 641 |
+
|
| 642 |
+
strategy:
|
| 643 |
+
matrix:
|
| 644 |
+
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
| 645 |
+
|
| 646 |
+
steps:
|
| 647 |
+
- name: Clone
|
| 648 |
+
id: checkout
|
| 649 |
+
uses: actions/checkout@v4
|
| 650 |
+
|
| 651 |
+
- name: ccache
|
| 652 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 653 |
+
with:
|
| 654 |
+
key: macOS-latest-swift
|
| 655 |
+
evict-old-files: 1d
|
| 656 |
+
|
| 657 |
+
- name: Dependencies
|
| 658 |
+
id: depends
|
| 659 |
+
continue-on-error: true
|
| 660 |
+
run: |
|
| 661 |
+
brew update
|
| 662 |
+
|
| 663 |
+
- name: Build llama.cpp with CMake
|
| 664 |
+
id: cmake_build
|
| 665 |
+
run: |
|
| 666 |
+
sysctl -a
|
| 667 |
+
cmake -B build -G Xcode \
|
| 668 |
+
-DGGML_METAL_USE_BF16=ON \
|
| 669 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
| 670 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
| 671 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
| 672 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
| 673 |
+
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
| 674 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
| 675 |
+
sudo cmake --install build --config Release
|
| 676 |
+
|
| 677 |
+
- name: xcodebuild for swift package
|
| 678 |
+
id: xcodebuild
|
| 679 |
+
run: |
|
| 680 |
+
xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
|
| 681 |
+
|
| 682 |
+
windows-msys2:
|
| 683 |
+
runs-on: windows-latest
|
| 684 |
+
|
| 685 |
+
strategy:
|
| 686 |
+
fail-fast: false
|
| 687 |
+
matrix:
|
| 688 |
+
include:
|
| 689 |
+
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
| 690 |
+
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
| 691 |
+
|
| 692 |
+
steps:
|
| 693 |
+
- name: Clone
|
| 694 |
+
uses: actions/checkout@v4
|
| 695 |
+
|
| 696 |
+
- name: ccache
|
| 697 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 698 |
+
with:
|
| 699 |
+
key: windows-msys2
|
| 700 |
+
variant: sccache
|
| 701 |
+
evict-old-files: 1d
|
| 702 |
+
|
| 703 |
+
- name: Setup ${{ matrix.sys }}
|
| 704 |
+
uses: msys2/setup-msys2@v2
|
| 705 |
+
with:
|
| 706 |
+
update: true
|
| 707 |
+
msystem: ${{matrix.sys}}
|
| 708 |
+
install: >-
|
| 709 |
+
base-devel
|
| 710 |
+
git
|
| 711 |
+
mingw-w64-${{matrix.env}}-toolchain
|
| 712 |
+
mingw-w64-${{matrix.env}}-cmake
|
| 713 |
+
mingw-w64-${{matrix.env}}-openblas
|
| 714 |
+
|
| 715 |
+
- name: Build using CMake
|
| 716 |
+
shell: msys2 {0}
|
| 717 |
+
run: |
|
| 718 |
+
cmake -B build
|
| 719 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
| 720 |
+
|
| 721 |
+
- name: Clean after building using CMake
|
| 722 |
+
shell: msys2 {0}
|
| 723 |
+
run: |
|
| 724 |
+
rm -rf build
|
| 725 |
+
|
| 726 |
+
- name: Build using CMake w/ OpenBLAS
|
| 727 |
+
shell: msys2 {0}
|
| 728 |
+
run: |
|
| 729 |
+
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
| 730 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
| 731 |
+
|
| 732 |
+
windows-latest-cmake:
|
| 733 |
+
runs-on: windows-latest
|
| 734 |
+
|
| 735 |
+
env:
|
| 736 |
+
OPENBLAS_VERSION: 0.3.23
|
| 737 |
+
SDE_VERSION: 9.33.0-2024-01-07
|
| 738 |
+
VULKAN_VERSION: 1.3.261.1
|
| 739 |
+
|
| 740 |
+
strategy:
|
| 741 |
+
matrix:
|
| 742 |
+
include:
|
| 743 |
+
- build: 'noavx-x64'
|
| 744 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
|
| 745 |
+
- build: 'avx2-x64'
|
| 746 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
|
| 747 |
+
- build: 'avx-x64'
|
| 748 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
|
| 749 |
+
- build: 'avx512-x64'
|
| 750 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
|
| 751 |
+
- build: 'openblas-x64'
|
| 752 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
| 753 |
+
- build: 'kompute-x64'
|
| 754 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
|
| 755 |
+
- build: 'vulkan-x64'
|
| 756 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
|
| 757 |
+
- build: 'llvm-arm64'
|
| 758 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
| 759 |
+
- build: 'msvc-arm64'
|
| 760 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
| 761 |
+
- build: 'llvm-arm64-opencl-adreno'
|
| 762 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
|
| 763 |
+
|
| 764 |
+
steps:
|
| 765 |
+
- name: Clone
|
| 766 |
+
id: checkout
|
| 767 |
+
uses: actions/checkout@v4
|
| 768 |
+
with:
|
| 769 |
+
fetch-depth: 0
|
| 770 |
+
|
| 771 |
+
- name: ccache
|
| 772 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 773 |
+
with:
|
| 774 |
+
key: windows-latest-cmake-${{ matrix.build }}
|
| 775 |
+
variant: sccache
|
| 776 |
+
evict-old-files: 1d
|
| 777 |
+
|
| 778 |
+
- name: Clone Kompute submodule
|
| 779 |
+
id: clone_kompute
|
| 780 |
+
if: ${{ matrix.build == 'kompute-x64' }}
|
| 781 |
+
run: |
|
| 782 |
+
git submodule update --init ggml/src/ggml-kompute/kompute
|
| 783 |
+
|
| 784 |
+
- name: Download OpenBLAS
|
| 785 |
+
id: get_openblas
|
| 786 |
+
if: ${{ matrix.build == 'openblas-x64' }}
|
| 787 |
+
run: |
|
| 788 |
+
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
|
| 789 |
+
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
|
| 790 |
+
mkdir $env:RUNNER_TEMP/openblas
|
| 791 |
+
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
|
| 792 |
+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
|
| 793 |
+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
|
| 794 |
+
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
|
| 795 |
+
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
|
| 796 |
+
|
| 797 |
+
- name: Install Vulkan SDK
|
| 798 |
+
id: get_vulkan
|
| 799 |
+
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
|
| 800 |
+
run: |
|
| 801 |
+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
|
| 802 |
+
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
|
| 803 |
+
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
|
| 804 |
+
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
|
| 805 |
+
|
| 806 |
+
- name: Install Ninja
|
| 807 |
+
id: install_ninja
|
| 808 |
+
run: |
|
| 809 |
+
choco install ninja
|
| 810 |
+
|
| 811 |
+
- name: Install OpenCL Headers and Libs
|
| 812 |
+
id: install_opencl
|
| 813 |
+
if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
|
| 814 |
+
run: |
|
| 815 |
+
git clone https://github.com/KhronosGroup/OpenCL-Headers
|
| 816 |
+
cd OpenCL-Headers
|
| 817 |
+
cmake -B build `
|
| 818 |
+
-DBUILD_TESTING=OFF `
|
| 819 |
+
-DOPENCL_HEADERS_BUILD_TESTING=OFF `
|
| 820 |
+
-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
|
| 821 |
+
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
|
| 822 |
+
cmake --build build --target install
|
| 823 |
+
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
|
| 824 |
+
cd OpenCL-ICD-Loader
|
| 825 |
+
cmake -B build-arm64-release `
|
| 826 |
+
-A arm64 `
|
| 827 |
+
-DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
|
| 828 |
+
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
|
| 829 |
+
cmake --build build-arm64-release --target install --config release
|
| 830 |
+
|
| 831 |
+
- name: Build
|
| 832 |
+
id: cmake_build
|
| 833 |
+
run: |
|
| 834 |
+
cmake -S . -B build ${{ matrix.defines }}
|
| 835 |
+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
| 836 |
+
|
| 837 |
+
- name: Add libopenblas.dll
|
| 838 |
+
id: add_libopenblas_dll
|
| 839 |
+
if: ${{ matrix.build == 'openblas-x64' }}
|
| 840 |
+
run: |
|
| 841 |
+
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
|
| 842 |
+
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
|
| 843 |
+
|
| 844 |
+
- name: Check AVX512F support
|
| 845 |
+
id: check_avx512f
|
| 846 |
+
if: ${{ matrix.build == 'avx512-x64' }}
|
| 847 |
+
continue-on-error: true
|
| 848 |
+
run: |
|
| 849 |
+
cd build
|
| 850 |
+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
|
| 851 |
+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
|
| 852 |
+
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
|
| 853 |
+
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
|
| 854 |
+
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
|
| 855 |
+
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
|
| 856 |
+
|
| 857 |
+
- name: Test
|
| 858 |
+
id: cmake_test
|
| 859 |
+
# not all machines have native AVX-512
|
| 860 |
+
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
|
| 861 |
+
run: |
|
| 862 |
+
cd build
|
| 863 |
+
ctest -L main -C Release --verbose --timeout 900
|
| 864 |
+
|
| 865 |
+
- name: Test (Intel SDE)
|
| 866 |
+
id: cmake_test_sde
|
| 867 |
+
if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
|
| 868 |
+
run: |
|
| 869 |
+
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
|
| 870 |
+
# for some weird reason windows tar doesn't like sde tar.xz
|
| 871 |
+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
|
| 872 |
+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
|
| 873 |
+
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
|
| 874 |
+
cd build
|
| 875 |
+
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
|
| 876 |
+
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
|
| 877 |
+
|
| 878 |
+
- name: Determine tag name
|
| 879 |
+
id: tag
|
| 880 |
+
shell: bash
|
| 881 |
+
run: |
|
| 882 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 883 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 884 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 885 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 886 |
+
else
|
| 887 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 888 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 889 |
+
fi
|
| 890 |
+
|
| 891 |
+
- name: Pack artifacts
|
| 892 |
+
id: pack_artifacts
|
| 893 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 894 |
+
run: |
|
| 895 |
+
Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
|
| 896 |
+
Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
|
| 897 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
|
| 898 |
+
|
| 899 |
+
- name: Upload artifacts
|
| 900 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 901 |
+
uses: actions/upload-artifact@v4
|
| 902 |
+
with:
|
| 903 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
|
| 904 |
+
name: llama-bin-win-${{ matrix.build }}.zip
|
| 905 |
+
|
| 906 |
+
ubuntu-latest-cmake-cuda:
|
| 907 |
+
runs-on: ubuntu-latest
|
| 908 |
+
container: nvidia/cuda:12.6.2-devel-ubuntu24.04
|
| 909 |
+
|
| 910 |
+
steps:
|
| 911 |
+
- name: Clone
|
| 912 |
+
id: checkout
|
| 913 |
+
uses: actions/checkout@v4
|
| 914 |
+
with:
|
| 915 |
+
fetch-depth: 0
|
| 916 |
+
|
| 917 |
+
- name: Install dependencies
|
| 918 |
+
env:
|
| 919 |
+
DEBIAN_FRONTEND: noninteractive
|
| 920 |
+
run: |
|
| 921 |
+
apt update
|
| 922 |
+
apt install -y cmake build-essential ninja-build libgomp1 git
|
| 923 |
+
|
| 924 |
+
- name: ccache
|
| 925 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 926 |
+
with:
|
| 927 |
+
key: ubuntu-latest-cmake-cuda
|
| 928 |
+
evict-old-files: 1d
|
| 929 |
+
|
| 930 |
+
- name: Build with CMake
|
| 931 |
+
run: |
|
| 932 |
+
cmake -S . -B build -G Ninja \
|
| 933 |
+
-DCMAKE_BUILD_TYPE=Release \
|
| 934 |
+
-DCMAKE_CUDA_ARCHITECTURES=89-real \
|
| 935 |
+
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
|
| 936 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
| 937 |
+
-DGGML_NATIVE=OFF \
|
| 938 |
+
-DGGML_CUDA=ON
|
| 939 |
+
cmake --build build
|
| 940 |
+
|
| 941 |
+
windows-2019-cmake-cuda:
|
| 942 |
+
runs-on: windows-2019
|
| 943 |
+
|
| 944 |
+
strategy:
|
| 945 |
+
matrix:
|
| 946 |
+
cuda: ['12.4', '11.7']
|
| 947 |
+
build: ['cuda']
|
| 948 |
+
|
| 949 |
+
steps:
|
| 950 |
+
- name: Clone
|
| 951 |
+
id: checkout
|
| 952 |
+
uses: actions/checkout@v4
|
| 953 |
+
with:
|
| 954 |
+
fetch-depth: 0
|
| 955 |
+
|
| 956 |
+
- name: Install ccache
|
| 957 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 958 |
+
with:
|
| 959 |
+
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
|
| 960 |
+
variant: sccache
|
| 961 |
+
evict-old-files: 1d
|
| 962 |
+
|
| 963 |
+
- name: Install Cuda Toolkit 11.7
|
| 964 |
+
if: ${{ matrix.cuda == '11.7' }}
|
| 965 |
+
run: |
|
| 966 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
| 967 |
+
choco install unzip -y
|
| 968 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
|
| 969 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
|
| 970 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
|
| 971 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
|
| 972 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
|
| 973 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
|
| 974 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
|
| 975 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
|
| 976 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
| 977 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 978 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 979 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 980 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 981 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 982 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 983 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 984 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 985 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 986 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 987 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
| 988 |
+
echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
| 989 |
+
|
| 990 |
+
- name: Install Cuda Toolkit 12.4
|
| 991 |
+
if: ${{ matrix.cuda == '12.4' }}
|
| 992 |
+
run: |
|
| 993 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
| 994 |
+
choco install unzip -y
|
| 995 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
|
| 996 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
|
| 997 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
|
| 998 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
|
| 999 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
|
| 1000 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
|
| 1001 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
|
| 1002 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
|
| 1003 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
|
| 1004 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
| 1005 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1006 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1007 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1008 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1009 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1010 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1011 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1012 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1013 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 1014 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 1015 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 1016 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
| 1017 |
+
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
| 1018 |
+
|
| 1019 |
+
- name: Install Ninja
|
| 1020 |
+
id: install_ninja
|
| 1021 |
+
run: |
|
| 1022 |
+
choco install ninja
|
| 1023 |
+
|
| 1024 |
+
- name: Build
|
| 1025 |
+
id: cmake_build
|
| 1026 |
+
shell: cmd
|
| 1027 |
+
run: |
|
| 1028 |
+
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
| 1029 |
+
cmake -S . -B build -G "Ninja Multi-Config" ^
|
| 1030 |
+
-DLLAMA_BUILD_SERVER=ON ^
|
| 1031 |
+
-DGGML_NATIVE=OFF ^
|
| 1032 |
+
-DGGML_CUDA=ON ^
|
| 1033 |
+
-DGGML_RPC=ON
|
| 1034 |
+
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
| 1035 |
+
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
| 1036 |
+
cmake --build build --config Release
|
| 1037 |
+
|
| 1038 |
+
- name: Determine tag name
|
| 1039 |
+
id: tag
|
| 1040 |
+
shell: bash
|
| 1041 |
+
run: |
|
| 1042 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 1043 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 1044 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 1045 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 1046 |
+
else
|
| 1047 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 1048 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 1049 |
+
fi
|
| 1050 |
+
|
| 1051 |
+
- name: Pack artifacts
|
| 1052 |
+
id: pack_artifacts
|
| 1053 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 1054 |
+
run: |
|
| 1055 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
|
| 1056 |
+
|
| 1057 |
+
- name: Upload artifacts
|
| 1058 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 1059 |
+
uses: actions/upload-artifact@v4
|
| 1060 |
+
with:
|
| 1061 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
|
| 1062 |
+
name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
| 1063 |
+
|
| 1064 |
+
- name: Copy and pack Cuda runtime
|
| 1065 |
+
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
| 1066 |
+
run: |
|
| 1067 |
+
echo "Cuda install location: ${{ env.CUDA_PATH }}"
|
| 1068 |
+
$dst='.\build\bin\cudart\'
|
| 1069 |
+
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
| 1070 |
+
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
| 1071 |
+
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
|
| 1072 |
+
|
| 1073 |
+
- name: Upload Cuda runtime
|
| 1074 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 1075 |
+
uses: actions/upload-artifact@v4
|
| 1076 |
+
with:
|
| 1077 |
+
path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
| 1078 |
+
name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
| 1079 |
+
|
| 1080 |
+
windows-latest-cmake-sycl:
|
| 1081 |
+
runs-on: windows-latest
|
| 1082 |
+
|
| 1083 |
+
defaults:
|
| 1084 |
+
run:
|
| 1085 |
+
shell: bash
|
| 1086 |
+
|
| 1087 |
+
env:
|
| 1088 |
+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
|
| 1089 |
+
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
|
| 1090 |
+
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
| 1091 |
+
steps:
|
| 1092 |
+
- name: Clone
|
| 1093 |
+
id: checkout
|
| 1094 |
+
uses: actions/checkout@v4
|
| 1095 |
+
with:
|
| 1096 |
+
fetch-depth: 0
|
| 1097 |
+
|
| 1098 |
+
- name: ccache
|
| 1099 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 1100 |
+
with:
|
| 1101 |
+
key: windows-latest-cmake-sycl
|
| 1102 |
+
variant: sccache
|
| 1103 |
+
evict-old-files: 1d
|
| 1104 |
+
|
| 1105 |
+
- name: Install
|
| 1106 |
+
run: |
|
| 1107 |
+
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
|
| 1108 |
+
|
| 1109 |
+
- name: Build
|
| 1110 |
+
id: cmake_build
|
| 1111 |
+
run: examples/sycl/win-build-sycl.bat
|
| 1112 |
+
|
| 1113 |
+
- name: Determine tag name
|
| 1114 |
+
id: tag
|
| 1115 |
+
shell: bash
|
| 1116 |
+
run: |
|
| 1117 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 1118 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 1119 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 1120 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 1121 |
+
else
|
| 1122 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 1123 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 1124 |
+
fi
|
| 1125 |
+
|
| 1126 |
+
- name: Build the release package
|
| 1127 |
+
id: pack_artifacts
|
| 1128 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 1129 |
+
run: |
|
| 1130 |
+
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
|
| 1131 |
+
|
| 1132 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
|
| 1133 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
|
| 1134 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
|
| 1135 |
+
|
| 1136 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
|
| 1137 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
|
| 1138 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
|
| 1139 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
|
| 1140 |
+
|
| 1141 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
|
| 1142 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
| 1143 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
| 1144 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
| 1145 |
+
|
| 1146 |
+
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
|
| 1147 |
+
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
|
| 1148 |
+
|
| 1149 |
+
echo "cp oneAPI running time dll files to ./build/bin done"
|
| 1150 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
|
| 1151 |
+
|
| 1152 |
+
- name: Upload the release package
|
| 1153 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 1154 |
+
uses: actions/upload-artifact@v4
|
| 1155 |
+
with:
|
| 1156 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
|
| 1157 |
+
name: llama-bin-win-sycl-x64.zip
|
| 1158 |
+
|
| 1159 |
+
windows-latest-cmake-hip:
|
| 1160 |
+
if: ${{ github.event.inputs.create_release != 'true' }}
|
| 1161 |
+
runs-on: windows-latest
|
| 1162 |
+
|
| 1163 |
+
steps:
|
| 1164 |
+
- name: Clone
|
| 1165 |
+
id: checkout
|
| 1166 |
+
uses: actions/checkout@v4
|
| 1167 |
+
|
| 1168 |
+
- name: Install
|
| 1169 |
+
id: depends
|
| 1170 |
+
run: |
|
| 1171 |
+
$ErrorActionPreference = "Stop"
|
| 1172 |
+
write-host "Downloading AMD HIP SDK Installer"
|
| 1173 |
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
| 1174 |
+
write-host "Installing AMD HIP SDK"
|
| 1175 |
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
| 1176 |
+
write-host "Completed AMD HIP SDK installation"
|
| 1177 |
+
|
| 1178 |
+
- name: Verify ROCm
|
| 1179 |
+
id: verify
|
| 1180 |
+
run: |
|
| 1181 |
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
| 1182 |
+
|
| 1183 |
+
- name: Install ccache
|
| 1184 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 1185 |
+
with:
|
| 1186 |
+
key: ${{ github.job }}
|
| 1187 |
+
evict-old-files: 1d
|
| 1188 |
+
|
| 1189 |
+
- name: Build
|
| 1190 |
+
id: cmake_build
|
| 1191 |
+
run: |
|
| 1192 |
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
| 1193 |
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
| 1194 |
+
cmake -G "Unix Makefiles" -B build -S . `
|
| 1195 |
+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
| 1196 |
+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
| 1197 |
+
-DCMAKE_BUILD_TYPE=Release `
|
| 1198 |
+
-DGGML_HIP=ON `
|
| 1199 |
+
-DGGML_RPC=ON
|
| 1200 |
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
| 1201 |
+
|
| 1202 |
+
windows-latest-cmake-hip-release:
|
| 1203 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 1204 |
+
runs-on: windows-latest
|
| 1205 |
+
|
| 1206 |
+
strategy:
|
| 1207 |
+
matrix:
|
| 1208 |
+
gpu_target: [gfx1100, gfx1101, gfx1030]
|
| 1209 |
+
|
| 1210 |
+
steps:
|
| 1211 |
+
- name: Clone
|
| 1212 |
+
id: checkout
|
| 1213 |
+
uses: actions/checkout@v4
|
| 1214 |
+
with:
|
| 1215 |
+
fetch-depth: 0
|
| 1216 |
+
|
| 1217 |
+
- name: ccache
|
| 1218 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 1219 |
+
with:
|
| 1220 |
+
key: windows-latest-cmake-hip-release
|
| 1221 |
+
evict-old-files: 1d
|
| 1222 |
+
|
| 1223 |
+
- name: Install
|
| 1224 |
+
id: depends
|
| 1225 |
+
run: |
|
| 1226 |
+
$ErrorActionPreference = "Stop"
|
| 1227 |
+
write-host "Downloading AMD HIP SDK Installer"
|
| 1228 |
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
| 1229 |
+
write-host "Installing AMD HIP SDK"
|
| 1230 |
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
| 1231 |
+
write-host "Completed AMD HIP SDK installation"
|
| 1232 |
+
|
| 1233 |
+
- name: Verify ROCm
|
| 1234 |
+
id: verify
|
| 1235 |
+
run: |
|
| 1236 |
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
| 1237 |
+
|
| 1238 |
+
- name: Build
|
| 1239 |
+
id: cmake_build
|
| 1240 |
+
run: |
|
| 1241 |
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
| 1242 |
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
| 1243 |
+
cmake -G "Unix Makefiles" -B build -S . `
|
| 1244 |
+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
| 1245 |
+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
| 1246 |
+
-DCMAKE_BUILD_TYPE=Release `
|
| 1247 |
+
-DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
|
| 1248 |
+
-DGGML_HIP=ON `
|
| 1249 |
+
-DGGML_RPC=ON
|
| 1250 |
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
| 1251 |
+
md "build\bin\rocblas\library\"
|
| 1252 |
+
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
|
| 1253 |
+
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
|
| 1254 |
+
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
|
| 1255 |
+
|
| 1256 |
+
- name: Determine tag name
|
| 1257 |
+
id: tag
|
| 1258 |
+
shell: bash
|
| 1259 |
+
run: |
|
| 1260 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 1261 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 1262 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 1263 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 1264 |
+
else
|
| 1265 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 1266 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 1267 |
+
fi
|
| 1268 |
+
|
| 1269 |
+
- name: Pack artifacts
|
| 1270 |
+
id: pack_artifacts
|
| 1271 |
+
run: |
|
| 1272 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
|
| 1273 |
+
|
| 1274 |
+
- name: Upload artifacts
|
| 1275 |
+
uses: actions/upload-artifact@v4
|
| 1276 |
+
with:
|
| 1277 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
| 1278 |
+
name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
| 1279 |
+
|
| 1280 |
+
ios-xcode-build:
|
| 1281 |
+
runs-on: macos-latest
|
| 1282 |
+
|
| 1283 |
+
steps:
|
| 1284 |
+
- name: Checkout code
|
| 1285 |
+
uses: actions/checkout@v4
|
| 1286 |
+
|
| 1287 |
+
- name: Build
|
| 1288 |
+
id: cmake_build
|
| 1289 |
+
run: |
|
| 1290 |
+
sysctl -a
|
| 1291 |
+
cmake -B build -G Xcode \
|
| 1292 |
+
-DGGML_METAL_USE_BF16=ON \
|
| 1293 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
| 1294 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
| 1295 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
| 1296 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
| 1297 |
+
-DCMAKE_SYSTEM_NAME=iOS \
|
| 1298 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
| 1299 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
| 1300 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
| 1301 |
+
sudo cmake --install build --config Release
|
| 1302 |
+
|
| 1303 |
+
- name: xcodebuild for swift package
|
| 1304 |
+
id: xcodebuild
|
| 1305 |
+
run: |
|
| 1306 |
+
xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
|
| 1307 |
+
|
| 1308 |
+
- name: Build Xcode project
|
| 1309 |
+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
| 1310 |
+
|
| 1311 |
+
android-build:
|
| 1312 |
+
runs-on: ubuntu-latest
|
| 1313 |
+
|
| 1314 |
+
steps:
|
| 1315 |
+
- name: Clone
|
| 1316 |
+
uses: actions/checkout@v4
|
| 1317 |
+
|
| 1318 |
+
- name: ccache
|
| 1319 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 1320 |
+
with:
|
| 1321 |
+
key: android-build
|
| 1322 |
+
evict-old-files: 1d
|
| 1323 |
+
|
| 1324 |
+
- name: Set up JDK
|
| 1325 |
+
uses: actions/setup-java@v3
|
| 1326 |
+
with:
|
| 1327 |
+
java-version: 17
|
| 1328 |
+
distribution: zulu
|
| 1329 |
+
|
| 1330 |
+
- name: Setup Android SDK
|
| 1331 |
+
uses: android-actions/setup-android@v3
|
| 1332 |
+
with:
|
| 1333 |
+
log-accepted-android-sdk-licenses: false
|
| 1334 |
+
|
| 1335 |
+
- name: Build
|
| 1336 |
+
run: |
|
| 1337 |
+
cd examples/llama.android
|
| 1338 |
+
|
| 1339 |
+
./gradlew build --no-daemon
|
| 1340 |
+
|
| 1341 |
+
release:
|
| 1342 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
| 1343 |
+
|
| 1344 |
+
runs-on: ubuntu-latest
|
| 1345 |
+
|
| 1346 |
+
needs:
|
| 1347 |
+
- ubuntu-cpu-cmake
|
| 1348 |
+
- windows-latest-cmake
|
| 1349 |
+
- windows-2019-cmake-cuda
|
| 1350 |
+
- windows-latest-cmake-hip-release
|
| 1351 |
+
- macOS-latest-cmake-arm64
|
| 1352 |
+
- macOS-latest-cmake-x64
|
| 1353 |
+
|
| 1354 |
+
steps:
|
| 1355 |
+
- name: Clone
|
| 1356 |
+
id: checkout
|
| 1357 |
+
uses: actions/checkout@v4
|
| 1358 |
+
with:
|
| 1359 |
+
fetch-depth: 0
|
| 1360 |
+
|
| 1361 |
+
- name: ccache
|
| 1362 |
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
| 1363 |
+
with:
|
| 1364 |
+
key: release
|
| 1365 |
+
evict-old-files: 1d
|
| 1366 |
+
|
| 1367 |
+
- name: Determine tag name
|
| 1368 |
+
id: tag
|
| 1369 |
+
shell: bash
|
| 1370 |
+
run: |
|
| 1371 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 1372 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 1373 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 1374 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 1375 |
+
else
|
| 1376 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 1377 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 1378 |
+
fi
|
| 1379 |
+
|
| 1380 |
+
- name: Download artifacts
|
| 1381 |
+
id: download-artifact
|
| 1382 |
+
uses: actions/download-artifact@v4
|
| 1383 |
+
with:
|
| 1384 |
+
path: ./artifact
|
| 1385 |
+
|
| 1386 |
+
- name: Move artifacts
|
| 1387 |
+
id: move_artifacts
|
| 1388 |
+
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
|
| 1389 |
+
|
| 1390 |
+
- name: Create release
|
| 1391 |
+
id: create_release
|
| 1392 |
+
uses: ggml-org/action-create-release@v1
|
| 1393 |
+
env:
|
| 1394 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 1395 |
+
with:
|
| 1396 |
+
tag_name: ${{ steps.tag.outputs.name }}
|
| 1397 |
+
|
| 1398 |
+
- name: Upload release
|
| 1399 |
+
id: upload_release
|
| 1400 |
+
uses: actions/github-script@v3
|
| 1401 |
+
with:
|
| 1402 |
+
github-token: ${{secrets.GITHUB_TOKEN}}
|
| 1403 |
+
script: |
|
| 1404 |
+
const path = require('path');
|
| 1405 |
+
const fs = require('fs');
|
| 1406 |
+
const release_id = '${{ steps.create_release.outputs.id }}';
|
| 1407 |
+
for (let file of await fs.readdirSync('./artifact/release')) {
|
| 1408 |
+
if (path.extname(file) === '.zip') {
|
| 1409 |
+
console.log('uploadReleaseAsset', file);
|
| 1410 |
+
await github.repos.uploadReleaseAsset({
|
| 1411 |
+
owner: context.repo.owner,
|
| 1412 |
+
repo: context.repo.repo,
|
| 1413 |
+
release_id: release_id,
|
| 1414 |
+
name: file,
|
| 1415 |
+
data: await fs.readFileSync(`./artifact/release/${file}`)
|
| 1416 |
+
});
|
| 1417 |
+
}
|
| 1418 |
+
}
|
| 1419 |
+
|
| 1420 |
+
# ubuntu-latest-gcc:
|
| 1421 |
+
# runs-on: ubuntu-latest
|
| 1422 |
+
#
|
| 1423 |
+
# strategy:
|
| 1424 |
+
# matrix:
|
| 1425 |
+
# build: [Debug, Release]
|
| 1426 |
+
#
|
| 1427 |
+
# steps:
|
| 1428 |
+
# - name: Clone
|
| 1429 |
+
# uses: actions/checkout@v4
|
| 1430 |
+
#
|
| 1431 |
+
# - name: Dependencies
|
| 1432 |
+
# run: |
|
| 1433 |
+
# sudo apt-get update
|
| 1434 |
+
# sudo apt-get install build-essential
|
| 1435 |
+
# sudo apt-get install cmake
|
| 1436 |
+
#
|
| 1437 |
+
# - name: Configure
|
| 1438 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
| 1439 |
+
#
|
| 1440 |
+
# - name: Build
|
| 1441 |
+
# run: |
|
| 1442 |
+
# make
|
| 1443 |
+
#
|
| 1444 |
+
# ubuntu-latest-clang:
|
| 1445 |
+
# runs-on: ubuntu-latest
|
| 1446 |
+
#
|
| 1447 |
+
# strategy:
|
| 1448 |
+
# matrix:
|
| 1449 |
+
# build: [Debug, Release]
|
| 1450 |
+
#
|
| 1451 |
+
# steps:
|
| 1452 |
+
# - name: Clone
|
| 1453 |
+
# uses: actions/checkout@v4
|
| 1454 |
+
#
|
| 1455 |
+
# - name: Dependencies
|
| 1456 |
+
# run: |
|
| 1457 |
+
# sudo apt-get update
|
| 1458 |
+
# sudo apt-get install build-essential
|
| 1459 |
+
# sudo apt-get install cmake
|
| 1460 |
+
#
|
| 1461 |
+
# - name: Configure
|
| 1462 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
| 1463 |
+
#
|
| 1464 |
+
# - name: Build
|
| 1465 |
+
# run: |
|
| 1466 |
+
# make
|
| 1467 |
+
#
|
| 1468 |
+
# ubuntu-latest-gcc-sanitized:
|
| 1469 |
+
# runs-on: ubuntu-latest
|
| 1470 |
+
#
|
| 1471 |
+
# strategy:
|
| 1472 |
+
# matrix:
|
| 1473 |
+
# sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
| 1474 |
+
#
|
| 1475 |
+
# steps:
|
| 1476 |
+
# - name: Clone
|
| 1477 |
+
# uses: actions/checkout@v4
|
| 1478 |
+
#
|
| 1479 |
+
# - name: Dependencies
|
| 1480 |
+
# run: |
|
| 1481 |
+
# sudo apt-get update
|
| 1482 |
+
# sudo apt-get install build-essential
|
| 1483 |
+
# sudo apt-get install cmake
|
| 1484 |
+
#
|
| 1485 |
+
# - name: Configure
|
| 1486 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
|
| 1487 |
+
#
|
| 1488 |
+
# - name: Build
|
| 1489 |
+
# run: |
|
| 1490 |
+
# make
|
| 1491 |
+
#
|
| 1492 |
+
# windows:
|
| 1493 |
+
# runs-on: windows-latest
|
| 1494 |
+
#
|
| 1495 |
+
# strategy:
|
| 1496 |
+
# matrix:
|
| 1497 |
+
# build: [Release]
|
| 1498 |
+
# arch: [Win32, x64]
|
| 1499 |
+
# include:
|
| 1500 |
+
# - arch: Win32
|
| 1501 |
+
# s2arc: x86
|
| 1502 |
+
# - arch: x64
|
| 1503 |
+
# s2arc: x64
|
| 1504 |
+
#
|
| 1505 |
+
# steps:
|
| 1506 |
+
# - name: Clone
|
| 1507 |
+
# uses: actions/checkout@v4
|
| 1508 |
+
#
|
| 1509 |
+
# - name: Add msbuild to PATH
|
| 1510 |
+
# uses: microsoft/setup-msbuild@v1
|
| 1511 |
+
#
|
| 1512 |
+
# - name: Configure
|
| 1513 |
+
# run: >
|
| 1514 |
+
# cmake -S . -B ./build -A ${{ matrix.arch }}
|
| 1515 |
+
# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
| 1516 |
+
#
|
| 1517 |
+
# - name: Build
|
| 1518 |
+
# run: |
|
| 1519 |
+
# cd ./build
|
| 1520 |
+
# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
| 1521 |
+
#
|
| 1522 |
+
# - name: Upload binaries
|
| 1523 |
+
# uses: actions/upload-artifact@v4
|
| 1524 |
+
# with:
|
| 1525 |
+
# name: llama-bin-${{ matrix.arch }}
|
| 1526 |
+
# path: build/bin/${{ matrix.build }}
|
| 1527 |
+
#
|
| 1528 |
+
# windows-blas:
|
| 1529 |
+
# runs-on: windows-latest
|
| 1530 |
+
#
|
| 1531 |
+
# strategy:
|
| 1532 |
+
# matrix:
|
| 1533 |
+
# build: [Release]
|
| 1534 |
+
# arch: [Win32, x64]
|
| 1535 |
+
# blas: [ON]
|
| 1536 |
+
# include:
|
| 1537 |
+
# - arch: Win32
|
| 1538 |
+
# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
|
| 1539 |
+
# s2arc: x86
|
| 1540 |
+
# - arch: x64
|
| 1541 |
+
# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
|
| 1542 |
+
# s2arc: x64
|
| 1543 |
+
#
|
| 1544 |
+
# steps:
|
| 1545 |
+
# - name: Clone
|
| 1546 |
+
# uses: actions/checkout@v4
|
| 1547 |
+
#
|
| 1548 |
+
# - name: Add msbuild to PATH
|
| 1549 |
+
# uses: microsoft/setup-msbuild@v1
|
| 1550 |
+
#
|
| 1551 |
+
# - name: Fetch OpenBLAS
|
| 1552 |
+
# if: matrix.blas == 'ON'
|
| 1553 |
+
# run: |
|
| 1554 |
+
# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
|
| 1555 |
+
# 7z x blas.zip -oblas -y
|
| 1556 |
+
# copy blas/include/cblas.h .
|
| 1557 |
+
# copy blas/include/openblas_config.h .
|
| 1558 |
+
# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
| 1559 |
+
#
|
| 1560 |
+
# - name: Configure
|
| 1561 |
+
# run: >
|
| 1562 |
+
# cmake -S . -B ./build -A ${{ matrix.arch }}
|
| 1563 |
+
# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
| 1564 |
+
# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
|
| 1565 |
+
# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
|
| 1566 |
+
#
|
| 1567 |
+
# - name: Build
|
| 1568 |
+
# run: |
|
| 1569 |
+
# cd ./build
|
| 1570 |
+
# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
| 1571 |
+
#
|
| 1572 |
+
# - name: Copy libopenblas.dll
|
| 1573 |
+
# if: matrix.blas == 'ON'
|
| 1574 |
+
# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
| 1575 |
+
#
|
| 1576 |
+
# - name: Upload binaries
|
| 1577 |
+
# if: matrix.blas == 'ON'
|
| 1578 |
+
# uses: actions/upload-artifact@v4
|
| 1579 |
+
# with:
|
| 1580 |
+
# name: llama-blas-bin-${{ matrix.arch }}
|
| 1581 |
+
# path: build/bin/${{ matrix.build }}
|
| 1582 |
+
#
|
| 1583 |
+
# emscripten:
|
| 1584 |
+
# runs-on: ubuntu-latest
|
| 1585 |
+
#
|
| 1586 |
+
# strategy:
|
| 1587 |
+
# matrix:
|
| 1588 |
+
# build: [Release]
|
| 1589 |
+
#
|
| 1590 |
+
# steps:
|
| 1591 |
+
# - name: Clone
|
| 1592 |
+
# uses: actions/checkout@v4
|
| 1593 |
+
#
|
| 1594 |
+
# - name: Dependencies
|
| 1595 |
+
# run: |
|
| 1596 |
+
# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
|
| 1597 |
+
# tar -xvf master.tar.gz
|
| 1598 |
+
# emsdk-master/emsdk update
|
| 1599 |
+
# emsdk-master/emsdk install latest
|
| 1600 |
+
# emsdk-master/emsdk activate latest
|
| 1601 |
+
#
|
| 1602 |
+
# - name: Configure
|
| 1603 |
+
# run: echo "tmp"
|
| 1604 |
+
#
|
| 1605 |
+
# - name: Build
|
| 1606 |
+
# run: |
|
| 1607 |
+
# pushd emsdk-master
|
| 1608 |
+
# source ./emsdk_env.sh
|
| 1609 |
+
# popd
|
| 1610 |
+
# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
| 1611 |
+
# make
|
| 1612 |
+
|
| 1613 |
+
openEuler-latest-cmake-cann:
|
| 1614 |
+
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
|
| 1615 |
+
defaults:
|
| 1616 |
+
run:
|
| 1617 |
+
shell: bash -el {0}
|
| 1618 |
+
runs-on: ubuntu-24.04-arm
|
| 1619 |
+
strategy:
|
| 1620 |
+
matrix:
|
| 1621 |
+
cann:
|
| 1622 |
+
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
|
| 1623 |
+
device:
|
| 1624 |
+
- 'ascend910b3'
|
| 1625 |
+
build:
|
| 1626 |
+
- 'Release'
|
| 1627 |
+
container: ascendai/cann:${{ matrix.cann }}
|
| 1628 |
+
steps:
|
| 1629 |
+
- name: Checkout
|
| 1630 |
+
uses: actions/checkout@v4
|
| 1631 |
+
|
| 1632 |
+
- name: Dependencies
|
| 1633 |
+
run: |
|
| 1634 |
+
yum update -y
|
| 1635 |
+
yum install -y git gcc gcc-c++ make cmake
|
| 1636 |
+
|
| 1637 |
+
- name: Build
|
| 1638 |
+
run: |
|
| 1639 |
+
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
|
| 1640 |
+
|
| 1641 |
+
cmake -S . -B build \
|
| 1642 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
|
| 1643 |
+
-DGGML_CANN=on \
|
| 1644 |
+
-DSOC_TYPE=${{ matrix.device }}
|
| 1645 |
+
cmake --build build -j $(nproc)
|
llama.cpp/.github/workflows/close-issue.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Close inactive issues
|
| 2 |
+
on:
|
| 3 |
+
schedule:
|
| 4 |
+
- cron: "42 0 * * *"
|
| 5 |
+
|
| 6 |
+
# Fine-grant permission
|
| 7 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
| 8 |
+
permissions:
|
| 9 |
+
issues: write
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
close-issues:
|
| 13 |
+
runs-on: ubuntu-latest
|
| 14 |
+
permissions:
|
| 15 |
+
issues: write
|
| 16 |
+
pull-requests: write
|
| 17 |
+
steps:
|
| 18 |
+
- uses: actions/stale@v5
|
| 19 |
+
with:
|
| 20 |
+
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
| 21 |
+
days-before-issue-stale: 30
|
| 22 |
+
days-before-issue-close: 14
|
| 23 |
+
stale-issue-label: "stale"
|
| 24 |
+
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
| 25 |
+
days-before-pr-stale: -1
|
| 26 |
+
days-before-pr-close: -1
|
| 27 |
+
operations-per-run: 10000
|
| 28 |
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
llama.cpp/.github/workflows/docker.yml
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This workflow uses actions that are not certified by GitHub.
|
| 2 |
+
# They are provided by a third-party and are governed by
|
| 3 |
+
# separate terms of service, privacy policy, and support
|
| 4 |
+
# documentation.
|
| 5 |
+
|
| 6 |
+
# GitHub recommends pinning actions to a commit SHA.
|
| 7 |
+
# To get a newer version, you will need to update the SHA.
|
| 8 |
+
# You can also reference a tag or branch, but the action may change without warning.
|
| 9 |
+
|
| 10 |
+
name: Publish Docker image
|
| 11 |
+
|
| 12 |
+
on:
|
| 13 |
+
workflow_dispatch: # allows manual triggering
|
| 14 |
+
schedule:
|
| 15 |
+
# Rebuild daily rather than on every push because it is expensive
|
| 16 |
+
- cron: '12 4 * * *'
|
| 17 |
+
|
| 18 |
+
concurrency:
|
| 19 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
| 20 |
+
cancel-in-progress: true
|
| 21 |
+
|
| 22 |
+
# Fine-grant permission
|
| 23 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
| 24 |
+
permissions:
|
| 25 |
+
packages: write
|
| 26 |
+
|
| 27 |
+
jobs:
|
| 28 |
+
push_to_registry:
|
| 29 |
+
name: Push Docker image to Docker Hub
|
| 30 |
+
|
| 31 |
+
runs-on: ubuntu-22.04
|
| 32 |
+
env:
|
| 33 |
+
COMMIT_SHA: ${{ github.sha }}
|
| 34 |
+
strategy:
|
| 35 |
+
fail-fast: false
|
| 36 |
+
matrix:
|
| 37 |
+
config:
|
| 38 |
+
# Multi-stage build
|
| 39 |
+
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
|
| 40 |
+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
| 41 |
+
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
| 42 |
+
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
| 43 |
+
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
| 44 |
+
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
| 45 |
+
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
|
| 46 |
+
steps:
|
| 47 |
+
- name: Check out the repo
|
| 48 |
+
uses: actions/checkout@v4
|
| 49 |
+
with:
|
| 50 |
+
fetch-depth: 0 # preserve git history, so we can determine the build number
|
| 51 |
+
|
| 52 |
+
- name: Set up QEMU
|
| 53 |
+
uses: docker/setup-qemu-action@v3
|
| 54 |
+
|
| 55 |
+
- name: Set up Docker Buildx
|
| 56 |
+
uses: docker/setup-buildx-action@v3
|
| 57 |
+
|
| 58 |
+
- name: Log in to Docker Hub
|
| 59 |
+
uses: docker/login-action@v2
|
| 60 |
+
with:
|
| 61 |
+
registry: ghcr.io
|
| 62 |
+
username: ${{ github.repository_owner }}
|
| 63 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
| 64 |
+
|
| 65 |
+
- name: Determine tag name
|
| 66 |
+
id: tag
|
| 67 |
+
shell: bash
|
| 68 |
+
run: |
|
| 69 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 70 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 71 |
+
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
| 72 |
+
REPO_NAME="${{ github.event.repository.name }}"
|
| 73 |
+
|
| 74 |
+
# determine tag name postfix (build number, commit hash)
|
| 75 |
+
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
| 76 |
+
TAG_POSTFIX="-b${BUILD_NUMBER}"
|
| 77 |
+
else
|
| 78 |
+
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
| 79 |
+
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
|
| 80 |
+
fi
|
| 81 |
+
# list all tags possible
|
| 82 |
+
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
| 83 |
+
TYPE=""
|
| 84 |
+
else
|
| 85 |
+
TYPE="-${{ matrix.config.tag }}"
|
| 86 |
+
fi
|
| 87 |
+
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
| 88 |
+
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
|
| 89 |
+
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
|
| 90 |
+
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
|
| 91 |
+
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
| 92 |
+
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
| 93 |
+
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
|
| 94 |
+
echo "full_output_tags=$FULLTAGS" # print out for debugging
|
| 95 |
+
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
|
| 96 |
+
echo "server_output_tags=$SERVERTAGS" # print out for debugging
|
| 97 |
+
env:
|
| 98 |
+
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
| 99 |
+
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
| 100 |
+
|
| 101 |
+
- name: Free Disk Space (Ubuntu)
|
| 102 |
+
if: ${{ matrix.config.free_disk_space == true }}
|
| 103 |
+
uses: ggml-org/free-disk-space@v1.3.1
|
| 104 |
+
with:
|
| 105 |
+
# this might remove tools that are actually needed,
|
| 106 |
+
# if set to "true" but frees about 6 GB
|
| 107 |
+
tool-cache: false
|
| 108 |
+
|
| 109 |
+
# all of these default to true, but feel free to set to
|
| 110 |
+
# "false" if necessary for your workflow
|
| 111 |
+
android: true
|
| 112 |
+
dotnet: true
|
| 113 |
+
haskell: true
|
| 114 |
+
large-packages: true
|
| 115 |
+
docker-images: true
|
| 116 |
+
swap-storage: true
|
| 117 |
+
|
| 118 |
+
- name: Build and push Full Docker image (tagged + versioned)
|
| 119 |
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
|
| 120 |
+
uses: docker/build-push-action@v6
|
| 121 |
+
with:
|
| 122 |
+
context: .
|
| 123 |
+
push: true
|
| 124 |
+
platforms: ${{ matrix.config.platforms }}
|
| 125 |
+
# tag list is generated from step above
|
| 126 |
+
tags: ${{ steps.tag.outputs.full_output_tags }}
|
| 127 |
+
file: ${{ matrix.config.dockerfile }}
|
| 128 |
+
target: full
|
| 129 |
+
provenance: false
|
| 130 |
+
# using github experimental cache
|
| 131 |
+
cache-from: type=gha
|
| 132 |
+
cache-to: type=gha,mode=max
|
| 133 |
+
# return to this if the experimental github cache is having issues
|
| 134 |
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
| 135 |
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
| 136 |
+
|
| 137 |
+
- name: Build and push Light Docker image (tagged + versioned)
|
| 138 |
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
|
| 139 |
+
uses: docker/build-push-action@v6
|
| 140 |
+
with:
|
| 141 |
+
context: .
|
| 142 |
+
push: true
|
| 143 |
+
platforms: ${{ matrix.config.platforms }}
|
| 144 |
+
# tag list is generated from step above
|
| 145 |
+
tags: ${{ steps.tag.outputs.light_output_tags }}
|
| 146 |
+
file: ${{ matrix.config.dockerfile }}
|
| 147 |
+
target: light
|
| 148 |
+
provenance: false
|
| 149 |
+
# using github experimental cache
|
| 150 |
+
cache-from: type=gha
|
| 151 |
+
cache-to: type=gha,mode=max
|
| 152 |
+
# return to this if the experimental github cache is having issues
|
| 153 |
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
| 154 |
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
| 155 |
+
|
| 156 |
+
- name: Build and push Server Docker image (tagged + versioned)
|
| 157 |
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
|
| 158 |
+
uses: docker/build-push-action@v6
|
| 159 |
+
with:
|
| 160 |
+
context: .
|
| 161 |
+
push: true
|
| 162 |
+
platforms: ${{ matrix.config.platforms }}
|
| 163 |
+
# tag list is generated from step above
|
| 164 |
+
tags: ${{ steps.tag.outputs.server_output_tags }}
|
| 165 |
+
file: ${{ matrix.config.dockerfile }}
|
| 166 |
+
target: server
|
| 167 |
+
provenance: false
|
| 168 |
+
# using github experimental cache
|
| 169 |
+
cache-from: type=gha
|
| 170 |
+
cache-to: type=gha,mode=max
|
| 171 |
+
# return to this if the experimental github cache is having issues
|
| 172 |
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
| 173 |
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
llama.cpp/.github/workflows/editorconfig.yml
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: EditorConfig Checker
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch: # allows manual triggering
|
| 5 |
+
inputs:
|
| 6 |
+
create_release:
|
| 7 |
+
description: 'Create new release'
|
| 8 |
+
required: true
|
| 9 |
+
type: boolean
|
| 10 |
+
push:
|
| 11 |
+
branches:
|
| 12 |
+
- master
|
| 13 |
+
pull_request:
|
| 14 |
+
branches:
|
| 15 |
+
- master
|
| 16 |
+
|
| 17 |
+
concurrency:
|
| 18 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
| 19 |
+
cancel-in-progress: true
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
editorconfig:
|
| 23 |
+
runs-on: ubuntu-latest
|
| 24 |
+
steps:
|
| 25 |
+
- uses: actions/checkout@v4
|
| 26 |
+
- uses: editorconfig-checker/action-editorconfig-checker@v2
|
| 27 |
+
with:
|
| 28 |
+
version: v3.0.3
|
| 29 |
+
- run: editorconfig-checker
|
llama.cpp/.github/workflows/gguf-publish.yml
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This workflow will upload a Python Package using Twine when a GGUF release is created
|
| 2 |
+
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
| 3 |
+
|
| 4 |
+
# See `gguf-py/README.md` for how to make a release.
|
| 5 |
+
|
| 6 |
+
# This workflow uses actions that are not certified by GitHub.
|
| 7 |
+
# They are provided by a third-party and are governed by
|
| 8 |
+
# separate terms of service, privacy policy, and support
|
| 9 |
+
# documentation.
|
| 10 |
+
|
| 11 |
+
name: Upload Python Package
|
| 12 |
+
|
| 13 |
+
on:
|
| 14 |
+
workflow_dispatch:
|
| 15 |
+
push:
|
| 16 |
+
# Pattern matched against refs/tags
|
| 17 |
+
tags:
|
| 18 |
+
- 'gguf-v*' # Push events to every version tag
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
deploy:
|
| 23 |
+
|
| 24 |
+
runs-on: ubuntu-latest
|
| 25 |
+
|
| 26 |
+
steps:
|
| 27 |
+
- uses: actions/checkout@v4
|
| 28 |
+
- name: Set up Python
|
| 29 |
+
uses: actions/setup-python@v5
|
| 30 |
+
with:
|
| 31 |
+
python-version: '3.9.x'
|
| 32 |
+
- name: Install dependencies
|
| 33 |
+
run: |
|
| 34 |
+
cd gguf-py
|
| 35 |
+
python -m pip install poetry
|
| 36 |
+
poetry install
|
| 37 |
+
|
| 38 |
+
- name: Build package
|
| 39 |
+
run: cd gguf-py && poetry build
|
| 40 |
+
- name: Publish package
|
| 41 |
+
uses: pypa/gh-action-pypi-publish@release/v1
|
| 42 |
+
with:
|
| 43 |
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
| 44 |
+
packages-dir: gguf-py/dist
|
llama.cpp/.github/workflows/labeler.yml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: "Pull Request Labeler"
|
| 2 |
+
on:
|
| 3 |
+
- pull_request_target
|
| 4 |
+
|
| 5 |
+
jobs:
|
| 6 |
+
labeler:
|
| 7 |
+
permissions:
|
| 8 |
+
contents: read
|
| 9 |
+
pull-requests: write
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
steps:
|
| 12 |
+
- uses: actions/checkout@v4
|
| 13 |
+
with:
|
| 14 |
+
repository: "ggerganov/llama.cpp"
|
| 15 |
+
- uses: actions/labeler@v5
|
| 16 |
+
with:
|
| 17 |
+
configuration-path: '.github/labeler.yml'
|
llama.cpp/.github/workflows/python-check-requirements.yml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Python check requirements.txt
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
paths:
|
| 6 |
+
- '.github/workflows/python-check-requirements.yml'
|
| 7 |
+
- 'scripts/check-requirements.sh'
|
| 8 |
+
- 'convert*.py'
|
| 9 |
+
- '**/requirements*.txt'
|
| 10 |
+
pull_request:
|
| 11 |
+
paths:
|
| 12 |
+
- '.github/workflows/python-check-requirements.yml'
|
| 13 |
+
- 'scripts/check-requirements.sh'
|
| 14 |
+
- 'convert*.py'
|
| 15 |
+
- '**/requirements*.txt'
|
| 16 |
+
|
| 17 |
+
concurrency:
|
| 18 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
| 19 |
+
cancel-in-progress: true
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
python-check-requirements:
|
| 23 |
+
runs-on: ubuntu-latest
|
| 24 |
+
name: check-requirements
|
| 25 |
+
steps:
|
| 26 |
+
- name: Check out source repository
|
| 27 |
+
uses: actions/checkout@v4
|
| 28 |
+
- name: Set up Python environment
|
| 29 |
+
uses: actions/setup-python@v5
|
| 30 |
+
with:
|
| 31 |
+
python-version: "3.11"
|
| 32 |
+
- name: Run check-requirements.sh script
|
| 33 |
+
run: bash scripts/check-requirements.sh
|
llama.cpp/.github/workflows/python-lint.yml
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: flake8 Lint
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
| 8 |
+
pull_request:
|
| 9 |
+
types: [opened, synchronize, reopened]
|
| 10 |
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
| 11 |
+
|
| 12 |
+
concurrency:
|
| 13 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
| 14 |
+
cancel-in-progress: true
|
| 15 |
+
|
| 16 |
+
jobs:
|
| 17 |
+
flake8-lint:
|
| 18 |
+
runs-on: ubuntu-latest
|
| 19 |
+
name: Lint
|
| 20 |
+
steps:
|
| 21 |
+
- name: Check out source repository
|
| 22 |
+
uses: actions/checkout@v4
|
| 23 |
+
- name: Set up Python environment
|
| 24 |
+
uses: actions/setup-python@v5
|
| 25 |
+
with:
|
| 26 |
+
python-version: "3.11"
|
| 27 |
+
- name: flake8 Lint
|
| 28 |
+
uses: py-actions/flake8@v2
|
| 29 |
+
with:
|
| 30 |
+
plugins: "flake8-no-print"
|
llama.cpp/.github/workflows/python-type-check.yml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Python Type-Check
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
paths:
|
| 6 |
+
- '.github/workflows/python-type-check.yml'
|
| 7 |
+
- 'pyrightconfig.json'
|
| 8 |
+
- '**.py'
|
| 9 |
+
- '**/requirements*.txt'
|
| 10 |
+
pull_request:
|
| 11 |
+
paths:
|
| 12 |
+
- '.github/workflows/python-type-check.yml'
|
| 13 |
+
- 'pyrightconfig.json'
|
| 14 |
+
- '**.py'
|
| 15 |
+
- '**/requirements*.txt'
|
| 16 |
+
|
| 17 |
+
concurrency:
|
| 18 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
| 19 |
+
cancel-in-progress: true
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
python-type-check:
|
| 23 |
+
runs-on: ubuntu-latest
|
| 24 |
+
name: pyright type-check
|
| 25 |
+
steps:
|
| 26 |
+
- name: Check out source repository
|
| 27 |
+
uses: actions/checkout@v4
|
| 28 |
+
- name: Set up Python environment
|
| 29 |
+
uses: actions/setup-python@v5
|
| 30 |
+
with:
|
| 31 |
+
python-version: "3.11"
|
| 32 |
+
- name: Install Python dependencies
|
| 33 |
+
# TODO: use a venv
|
| 34 |
+
run: pip install -r requirements/requirements-all.txt
|
| 35 |
+
- name: Type-check with Pyright
|
| 36 |
+
uses: jakebailey/pyright-action@v2
|
| 37 |
+
with:
|
| 38 |
+
version: 1.1.382
|
| 39 |
+
level: warning
|
| 40 |
+
warnings: true
|
llama.cpp/.github/workflows/server.yml
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Server build and tests
|
| 2 |
+
name: Server
|
| 3 |
+
|
| 4 |
+
on:
|
| 5 |
+
workflow_dispatch: # allows manual triggering
|
| 6 |
+
inputs:
|
| 7 |
+
sha:
|
| 8 |
+
description: 'Commit SHA1 to build'
|
| 9 |
+
required: false
|
| 10 |
+
type: string
|
| 11 |
+
slow_tests:
|
| 12 |
+
description: 'Run slow tests'
|
| 13 |
+
required: true
|
| 14 |
+
type: boolean
|
| 15 |
+
push:
|
| 16 |
+
branches:
|
| 17 |
+
- master
|
| 18 |
+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
| 19 |
+
pull_request:
|
| 20 |
+
types: [opened, synchronize, reopened]
|
| 21 |
+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
| 22 |
+
|
| 23 |
+
env:
|
| 24 |
+
LLAMA_LOG_COLORS: 1
|
| 25 |
+
LLAMA_LOG_PREFIX: 1
|
| 26 |
+
LLAMA_LOG_TIMESTAMPS: 1
|
| 27 |
+
LLAMA_LOG_VERBOSITY: 10
|
| 28 |
+
|
| 29 |
+
concurrency:
|
| 30 |
+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
| 31 |
+
cancel-in-progress: true
|
| 32 |
+
|
| 33 |
+
jobs:
|
| 34 |
+
server:
|
| 35 |
+
runs-on: ubuntu-latest
|
| 36 |
+
|
| 37 |
+
strategy:
|
| 38 |
+
matrix:
|
| 39 |
+
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
|
| 40 |
+
build_type: [RelWithDebInfo]
|
| 41 |
+
include:
|
| 42 |
+
- build_type: Release
|
| 43 |
+
sanitizer: ""
|
| 44 |
+
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
| 45 |
+
|
| 46 |
+
steps:
|
| 47 |
+
- name: Dependencies
|
| 48 |
+
id: depends
|
| 49 |
+
run: |
|
| 50 |
+
sudo apt-get update
|
| 51 |
+
sudo apt-get -y install \
|
| 52 |
+
build-essential \
|
| 53 |
+
xxd \
|
| 54 |
+
git \
|
| 55 |
+
cmake \
|
| 56 |
+
curl \
|
| 57 |
+
wget \
|
| 58 |
+
language-pack-en \
|
| 59 |
+
libcurl4-openssl-dev
|
| 60 |
+
|
| 61 |
+
- name: Clone
|
| 62 |
+
id: checkout
|
| 63 |
+
uses: actions/checkout@v4
|
| 64 |
+
with:
|
| 65 |
+
fetch-depth: 0
|
| 66 |
+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
| 67 |
+
|
| 68 |
+
- name: Python setup
|
| 69 |
+
id: setup_python
|
| 70 |
+
uses: actions/setup-python@v5
|
| 71 |
+
with:
|
| 72 |
+
python-version: '3.11'
|
| 73 |
+
|
| 74 |
+
- name: Tests dependencies
|
| 75 |
+
id: test_dependencies
|
| 76 |
+
run: |
|
| 77 |
+
pip install -r examples/server/tests/requirements.txt
|
| 78 |
+
|
| 79 |
+
# Setup nodejs (to be used for verifying bundled index.html)
|
| 80 |
+
- uses: actions/setup-node@v4
|
| 81 |
+
with:
|
| 82 |
+
node-version: '22.11.0'
|
| 83 |
+
|
| 84 |
+
- name: WebUI - Install dependencies
|
| 85 |
+
id: webui_lint
|
| 86 |
+
run: |
|
| 87 |
+
cd examples/server/webui
|
| 88 |
+
npm ci
|
| 89 |
+
|
| 90 |
+
- name: WebUI - Check code format
|
| 91 |
+
id: webui_format
|
| 92 |
+
run: |
|
| 93 |
+
git config --global --add safe.directory $(realpath .)
|
| 94 |
+
cd examples/server/webui
|
| 95 |
+
git status
|
| 96 |
+
|
| 97 |
+
npm run format
|
| 98 |
+
git status
|
| 99 |
+
modified_files="$(git status -s)"
|
| 100 |
+
echo "Modified files: ${modified_files}"
|
| 101 |
+
if [ -n "${modified_files}" ]; then
|
| 102 |
+
echo "Files do not follow coding style. To fix: npm run format"
|
| 103 |
+
echo "${modified_files}"
|
| 104 |
+
exit 1
|
| 105 |
+
fi
|
| 106 |
+
|
| 107 |
+
- name: Verify bundled index.html
|
| 108 |
+
id: verify_server_index_html
|
| 109 |
+
run: |
|
| 110 |
+
git config --global --add safe.directory $(realpath .)
|
| 111 |
+
cd examples/server/webui
|
| 112 |
+
git status
|
| 113 |
+
|
| 114 |
+
npm run build
|
| 115 |
+
git status
|
| 116 |
+
modified_files="$(git status -s)"
|
| 117 |
+
echo "Modified files: ${modified_files}"
|
| 118 |
+
if [ -n "${modified_files}" ]; then
|
| 119 |
+
echo "Repository is dirty or server/webui is not built as expected"
|
| 120 |
+
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
| 121 |
+
echo "${modified_files}"
|
| 122 |
+
exit 1
|
| 123 |
+
fi
|
| 124 |
+
|
| 125 |
+
- name: Build (no OpenMP)
|
| 126 |
+
id: cmake_build_no_openmp
|
| 127 |
+
if: ${{ matrix.sanitizer == 'THREAD' }}
|
| 128 |
+
run: |
|
| 129 |
+
cmake -B build \
|
| 130 |
+
-DGGML_NATIVE=OFF \
|
| 131 |
+
-DLLAMA_BUILD_SERVER=ON \
|
| 132 |
+
-DLLAMA_CURL=ON \
|
| 133 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
| 134 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
| 135 |
+
-DGGML_OPENMP=OFF ;
|
| 136 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
| 137 |
+
|
| 138 |
+
- name: Build (sanitizers)
|
| 139 |
+
id: cmake_build_sanitizers
|
| 140 |
+
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
|
| 141 |
+
run: |
|
| 142 |
+
cmake -B build \
|
| 143 |
+
-DGGML_NATIVE=OFF \
|
| 144 |
+
-DLLAMA_BUILD_SERVER=ON \
|
| 145 |
+
-DLLAMA_CURL=ON \
|
| 146 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
| 147 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
| 148 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
| 149 |
+
|
| 150 |
+
- name: Build (sanitizers)
|
| 151 |
+
id: cmake_build
|
| 152 |
+
if: ${{ matrix.sanitizer == '' }}
|
| 153 |
+
run: |
|
| 154 |
+
cmake -B build \
|
| 155 |
+
-DGGML_NATIVE=OFF \
|
| 156 |
+
-DLLAMA_BUILD_SERVER=ON \
|
| 157 |
+
-DLLAMA_CURL=ON \
|
| 158 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
| 159 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
| 160 |
+
|
| 161 |
+
- name: Tests
|
| 162 |
+
id: server_integration_tests
|
| 163 |
+
if: ${{ matrix.sanitizer == '' }}
|
| 164 |
+
run: |
|
| 165 |
+
cd examples/server/tests
|
| 166 |
+
./tests.sh
|
| 167 |
+
|
| 168 |
+
- name: Tests (sanitizers)
|
| 169 |
+
id: server_integration_tests_sanitizers
|
| 170 |
+
if: ${{ matrix.sanitizer != '' }}
|
| 171 |
+
run: |
|
| 172 |
+
cd examples/server/tests
|
| 173 |
+
LLAMA_SANITIZE=1 ./tests.sh
|
| 174 |
+
|
| 175 |
+
- name: Slow tests
|
| 176 |
+
id: server_integration_tests_slow
|
| 177 |
+
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
| 178 |
+
run: |
|
| 179 |
+
cd examples/server/tests
|
| 180 |
+
SLOW_TESTS=1 ./tests.sh
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
server-windows:
|
| 184 |
+
runs-on: windows-2019
|
| 185 |
+
|
| 186 |
+
steps:
|
| 187 |
+
- name: Clone
|
| 188 |
+
id: checkout
|
| 189 |
+
uses: actions/checkout@v4
|
| 190 |
+
with:
|
| 191 |
+
fetch-depth: 0
|
| 192 |
+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
| 193 |
+
|
| 194 |
+
- name: libCURL
|
| 195 |
+
id: get_libcurl
|
| 196 |
+
env:
|
| 197 |
+
CURL_VERSION: 8.6.0_6
|
| 198 |
+
run: |
|
| 199 |
+
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
|
| 200 |
+
mkdir $env:RUNNER_TEMP/libcurl
|
| 201 |
+
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
| 202 |
+
|
| 203 |
+
- name: Build
|
| 204 |
+
id: cmake_build
|
| 205 |
+
run: |
|
| 206 |
+
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
| 207 |
+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
|
| 208 |
+
|
| 209 |
+
- name: Python setup
|
| 210 |
+
id: setup_python
|
| 211 |
+
uses: actions/setup-python@v5
|
| 212 |
+
with:
|
| 213 |
+
python-version: '3.11'
|
| 214 |
+
|
| 215 |
+
- name: Tests dependencies
|
| 216 |
+
id: test_dependencies
|
| 217 |
+
run: |
|
| 218 |
+
pip install -r examples/server/tests/requirements.txt
|
| 219 |
+
|
| 220 |
+
- name: Copy Libcurl
|
| 221 |
+
id: prepare_libcurl
|
| 222 |
+
run: |
|
| 223 |
+
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
|
| 224 |
+
|
| 225 |
+
- name: Tests
|
| 226 |
+
id: server_integration_tests
|
| 227 |
+
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
| 228 |
+
run: |
|
| 229 |
+
cd examples/server/tests
|
| 230 |
+
$env:PYTHONIOENCODING = ":replace"
|
| 231 |
+
pytest -v -x -m "not slow"
|
| 232 |
+
|
| 233 |
+
- name: Slow tests
|
| 234 |
+
id: server_integration_tests_slow
|
| 235 |
+
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
| 236 |
+
run: |
|
| 237 |
+
cd examples/server/tests
|
| 238 |
+
$env:SLOW_TESTS = "1"
|
| 239 |
+
pytest -v -x
|
llama.cpp/.gitignore
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Extensions
|
| 2 |
+
|
| 3 |
+
*.a
|
| 4 |
+
*.bat
|
| 5 |
+
*.bin
|
| 6 |
+
*.d
|
| 7 |
+
*.dll
|
| 8 |
+
*.dot
|
| 9 |
+
*.etag
|
| 10 |
+
*.exe
|
| 11 |
+
*.gcda
|
| 12 |
+
*.gcno
|
| 13 |
+
*.gcov
|
| 14 |
+
*.gguf
|
| 15 |
+
*.gguf.json
|
| 16 |
+
*.lastModified
|
| 17 |
+
*.log
|
| 18 |
+
*.metallib
|
| 19 |
+
*.o
|
| 20 |
+
*.so
|
| 21 |
+
*.swp
|
| 22 |
+
*.tmp
|
| 23 |
+
|
| 24 |
+
# IDE / OS
|
| 25 |
+
|
| 26 |
+
.cache/
|
| 27 |
+
.ccls-cache/
|
| 28 |
+
.direnv/
|
| 29 |
+
.DS_Store
|
| 30 |
+
.envrc
|
| 31 |
+
.idea/
|
| 32 |
+
.swiftpm
|
| 33 |
+
.vs/
|
| 34 |
+
.vscode/
|
| 35 |
+
nppBackup
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# Coverage
|
| 39 |
+
|
| 40 |
+
gcovr-report/
|
| 41 |
+
lcov-report/
|
| 42 |
+
|
| 43 |
+
# Build Artifacts
|
| 44 |
+
|
| 45 |
+
tags
|
| 46 |
+
.build/
|
| 47 |
+
build*
|
| 48 |
+
!build-info.cmake
|
| 49 |
+
!build-info.cpp.in
|
| 50 |
+
!build-info.sh
|
| 51 |
+
!build.zig
|
| 52 |
+
!docs/build.md
|
| 53 |
+
/libllama.so
|
| 54 |
+
/llama-*
|
| 55 |
+
/vulkan-shaders-gen
|
| 56 |
+
android-ndk-*
|
| 57 |
+
arm_neon.h
|
| 58 |
+
cmake-build-*
|
| 59 |
+
CMakeSettings.json
|
| 60 |
+
compile_commands.json
|
| 61 |
+
ggml-metal-embed.metal
|
| 62 |
+
llama-batched-swift
|
| 63 |
+
/rpc-server
|
| 64 |
+
out/
|
| 65 |
+
tmp/
|
| 66 |
+
autogen-*.md
|
| 67 |
+
|
| 68 |
+
# Deprecated
|
| 69 |
+
|
| 70 |
+
/main
|
| 71 |
+
/server
|
| 72 |
+
|
| 73 |
+
# CI
|
| 74 |
+
|
| 75 |
+
!.github/workflows/*.yml
|
| 76 |
+
|
| 77 |
+
# Models
|
| 78 |
+
|
| 79 |
+
models/*
|
| 80 |
+
models-mnt
|
| 81 |
+
!models/.editorconfig
|
| 82 |
+
!models/ggml-vocab-*.gguf*
|
| 83 |
+
|
| 84 |
+
# Zig
|
| 85 |
+
zig-out/
|
| 86 |
+
zig-cache/
|
| 87 |
+
|
| 88 |
+
# Logs
|
| 89 |
+
|
| 90 |
+
ppl-*.txt
|
| 91 |
+
qnt-*.txt
|
| 92 |
+
perf-*.txt
|
| 93 |
+
|
| 94 |
+
# Examples
|
| 95 |
+
|
| 96 |
+
examples/jeopardy/results.txt
|
| 97 |
+
examples/server/*.css.hpp
|
| 98 |
+
examples/server/*.html.hpp
|
| 99 |
+
examples/server/*.js.hpp
|
| 100 |
+
examples/server/*.mjs.hpp
|
| 101 |
+
!build_64.sh
|
| 102 |
+
!examples/*.bat
|
| 103 |
+
!examples/*/*.kts
|
| 104 |
+
!examples/*/*/*.kts
|
| 105 |
+
!examples/sycl/*.bat
|
| 106 |
+
!examples/sycl/*.sh
|
| 107 |
+
|
| 108 |
+
# Server Web UI temporary files
|
| 109 |
+
node_modules
|
| 110 |
+
examples/server/webui/dist
|
| 111 |
+
|
| 112 |
+
# Python
|
| 113 |
+
|
| 114 |
+
/.venv
|
| 115 |
+
__pycache__/
|
| 116 |
+
*/poetry.lock
|
| 117 |
+
poetry.toml
|
| 118 |
+
|
| 119 |
+
# Nix
|
| 120 |
+
/result
|
| 121 |
+
|
| 122 |
+
# Test binaries
|
| 123 |
+
/tests/test-backend-ops
|
| 124 |
+
/tests/test-double-float
|
| 125 |
+
/tests/test-grad0
|
| 126 |
+
/tests/test-grammar-parser
|
| 127 |
+
/tests/test-llama-grammar
|
| 128 |
+
/tests/test-opt
|
| 129 |
+
/tests/test-quantize-fns
|
| 130 |
+
/tests/test-quantize-perf
|
| 131 |
+
/tests/test-rope
|
| 132 |
+
/tests/test-sampling
|
| 133 |
+
/tests/test-tokenizer-0
|
| 134 |
+
/tests/test-tokenizer-1-bpe
|
| 135 |
+
/tests/test-tokenizer-1-spm
|
| 136 |
+
|
| 137 |
+
# Scripts
|
| 138 |
+
!/scripts/install-oneapi.bat
|
| 139 |
+
|
| 140 |
+
# Test models for lora adapters
|
| 141 |
+
/lora-tests
|
| 142 |
+
|
| 143 |
+
# Local scripts
|
| 144 |
+
/run-vim.sh
|
| 145 |
+
/run-chat.sh
|
llama.cpp/.gitmodules
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[submodule "kompute"]
|
| 2 |
+
path = ggml/src/ggml-kompute/kompute
|
| 3 |
+
url = https://github.com/nomic-ai/kompute.git
|