Upload folder using huggingface_hub
Browse files
Dockerfile
CHANGED
|
@@ -1,5 +1,10 @@
|
|
| 1 |
FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
|
| 2 |
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
RUN apt-get update && apt-get install -y
|
| 4 |
RUN apt-get update && \
|
| 5 |
apt-get upgrade -y
|
|
@@ -72,7 +77,8 @@ RUN pip install git+https://github.com/huggingface/transformers.git --break-syst
|
|
| 72 |
|
| 73 |
|
| 74 |
|
| 75 |
-
|
|
|
|
| 76 |
|
| 77 |
ENV PYTHONPATH=${HOME}/app \
|
| 78 |
PYTHONUNBUFFERED=1 \
|
|
|
|
| 1 |
FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
|
| 2 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 3 |
+
|
| 4 |
+
ARG HF_TOKEN
|
| 5 |
+
|
| 6 |
+
ENV HF_TOKEN=$HF_TOKEN
|
| 7 |
+
|
| 8 |
RUN apt-get update && apt-get install -y
|
| 9 |
RUN apt-get update && \
|
| 10 |
apt-get upgrade -y
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
|
| 80 |
+
RUN hf download lainlives/llama.cpp --local-dir /usr/bin/ --token $HF_TOKEN
|
| 81 |
+
RUN chmod +x /usr/bin/llama-*
|
| 82 |
|
| 83 |
ENV PYTHONPATH=${HOME}/app \
|
| 84 |
PYTHONUNBUFFERED=1 \
|
README.md
CHANGED
|
@@ -8,5 +8,3 @@ pinned: false
|
|
| 8 |
suggested_hardware: "a10g-large"
|
| 9 |
disable_embedding: true
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 8 |
suggested_hardware: "a10g-large"
|
| 9 |
disable_embedding: true
|
| 10 |
---
|
|
|
|
|
|
app.py
CHANGED
|
@@ -63,19 +63,14 @@ def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
|
|
| 63 |
logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))
|
| 64 |
|
| 65 |
try:
|
| 66 |
-
# 2. Run 'ollama create' via CLI
|
| 67 |
-
# This replaces ollama.create(...)
|
| 68 |
create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
|
| 69 |
subprocess.run(create_cmd, check=True, capture_output=True)
|
| 70 |
|
| 71 |
-
# Clean up the temporary Modelfile
|
| 72 |
if modelfile_path.exists():
|
| 73 |
os.remove(modelfile_path)
|
| 74 |
|
| 75 |
logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))
|
| 76 |
|
| 77 |
-
# 3. Run 'ollama push' via CLI
|
| 78 |
-
# This replaces ollama.push(...)
|
| 79 |
push_cmd = ["ollama", "push", ollama_tag]
|
| 80 |
push_result = subprocess.run(push_cmd, capture_output=True, text=True)
|
| 81 |
|
|
@@ -84,7 +79,7 @@ def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
|
|
| 84 |
else:
|
| 85 |
logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))
|
| 86 |
|
| 87 |
-
#
|
| 88 |
subprocess.run(["ollama", "rm", ollama_tag], stdout=subprocess.DEVNULL)
|
| 89 |
|
| 90 |
except subprocess.CalledProcessError as e:
|
|
@@ -111,13 +106,16 @@ def start_ollama_daemon(ollama_key):
|
|
| 111 |
process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, env=env)
|
| 112 |
pid = process.pid
|
| 113 |
logs.append(format_log("⏳ Starting Ollama daemon in background..."))
|
| 114 |
-
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
def stop_ollama_daemon(pid):
|
| 118 |
print("⏳ Stopping Ollama daemon...")
|
| 119 |
-
|
|
|
|
| 120 |
subprocess.Popen(["pkill", "ollama"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
|
|
@@ -150,7 +148,7 @@ def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
|
|
| 150 |
logs.append(format_log("⚙️ Converting to BF16..."))
|
| 151 |
yield "\n".join(logs)
|
| 152 |
|
| 153 |
-
cmd = [
|
| 154 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 155 |
|
| 156 |
if result.returncode == 0:
|
|
@@ -166,7 +164,7 @@ def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
|
|
| 166 |
logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
|
| 167 |
yield "\n".join(logs)
|
| 168 |
|
| 169 |
-
cmd = [
|
| 170 |
subprocess.run(cmd, check=True, capture_output=True)
|
| 171 |
|
| 172 |
logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
|
|
@@ -209,6 +207,7 @@ def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progres
|
|
| 209 |
# We yield from the generator
|
| 210 |
for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
|
| 211 |
yield update
|
|
|
|
| 212 |
stop_ollama_daemon(pid)
|
| 213 |
|
| 214 |
|
|
|
|
| 63 |
logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))
|
| 64 |
|
| 65 |
try:
|
|
|
|
|
|
|
| 66 |
create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
|
| 67 |
subprocess.run(create_cmd, check=True, capture_output=True)
|
| 68 |
|
|
|
|
| 69 |
if modelfile_path.exists():
|
| 70 |
os.remove(modelfile_path)
|
| 71 |
|
| 72 |
logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))
|
| 73 |
|
|
|
|
|
|
|
| 74 |
push_cmd = ["ollama", "push", ollama_tag]
|
| 75 |
push_result = subprocess.run(push_cmd, capture_output=True, text=True)
|
| 76 |
|
|
|
|
| 79 |
else:
|
| 80 |
logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))
|
| 81 |
|
| 82 |
+
# Remove the local tag to save disk space in the container
|
| 83 |
subprocess.run(["ollama", "rm", ollama_tag], stdout=subprocess.DEVNULL)
|
| 84 |
|
| 85 |
except subprocess.CalledProcessError as e:
|
|
|
|
| 106 |
process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, env=env)
|
| 107 |
pid = process.pid
|
| 108 |
logs.append(format_log("⏳ Starting Ollama daemon in background..."))
|
| 109 |
+
sleep(2)
|
| 110 |
+
return pid, logs
|
| 111 |
|
| 112 |
|
| 113 |
def stop_ollama_daemon(pid):
|
| 114 |
print("⏳ Stopping Ollama daemon...")
|
| 115 |
+
logs.append(format_log("⏳ Stopping Ollama daemon..."))
|
| 116 |
+
os.kill(pid, signal.SIGQUIT)
|
| 117 |
subprocess.Popen(["pkill", "ollama"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 118 |
+
return logs
|
| 119 |
|
| 120 |
|
| 121 |
def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
|
|
|
|
| 148 |
logs.append(format_log("⚙️ Converting to BF16..."))
|
| 149 |
yield "\n".join(logs)
|
| 150 |
|
| 151 |
+
cmd = [str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
|
| 152 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 153 |
|
| 154 |
if result.returncode == 0:
|
|
|
|
| 164 |
logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
|
| 165 |
yield "\n".join(logs)
|
| 166 |
|
| 167 |
+
cmd = [str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
|
| 168 |
subprocess.run(cmd, check=True, capture_output=True)
|
| 169 |
|
| 170 |
logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
|
|
|
|
| 207 |
# We yield from the generator
|
| 208 |
for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
|
| 209 |
yield update
|
| 210 |
+
sleep(10)
|
| 211 |
stop_ollama_daemon(pid)
|
| 212 |
|
| 213 |
|
start.sh
CHANGED
|
@@ -1,24 +1,12 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
-
export CMAKE_CUDA_ARCHITECTURES="all"
|
| 4 |
-
cd /app && \
|
| 5 |
-
git clone --recursive https://github.com/ggerganov/llama.cpp && \
|
| 6 |
-
cd llama.cpp && \
|
| 7 |
-
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON && \
|
| 8 |
-
cmake --build build --config Release --parallel 32 && \
|
| 9 |
-
cp ./build/bin/llama-* /usr/bin/ && \
|
| 10 |
-
hf upload --repo-type model lainlives/ztestzz ./build/bin/
|
| 11 |
-
cp convert_hf_to_gguf.py /app/convert_hf_to_gguf && \
|
| 12 |
-
rm -rf build && \
|
| 13 |
-
cd ..
|
| 14 |
|
| 15 |
-
python3 /app/tmp.py
|
| 16 |
|
| 17 |
|
| 18 |
|
| 19 |
-
ollama serve & # >/dev/null 2>&1 &
|
| 20 |
-
PID="$!"
|
| 21 |
-
disown "$PID"
|
| 22 |
|
| 23 |
|
| 24 |
python3 app.py
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
+
# ollama serve & # >/dev/null 2>&1 &
|
| 8 |
+
# PID="$!"
|
| 9 |
+
# disown "$PID"
|
| 10 |
|
| 11 |
|
| 12 |
python3 app.py
|