lainlives commited on
Commit
7d36c43
·
verified ·
1 Parent(s): 2d10560

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Dockerfile +7 -1
  2. README.md +0 -2
  3. app.py +9 -10
  4. start.sh +3 -15
Dockerfile CHANGED
@@ -1,5 +1,10 @@
1
  FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
2
  ENV DEBIAN_FRONTEND=noninteractive
 
 
 
 
 
3
  RUN apt-get update && apt-get install -y
4
  RUN apt-get update && \
5
  apt-get upgrade -y
@@ -72,7 +77,8 @@ RUN pip install git+https://github.com/huggingface/transformers.git --break-syst
72
 
73
 
74
 
75
-
 
76
 
77
  ENV PYTHONPATH=${HOME}/app \
78
  PYTHONUNBUFFERED=1 \
 
1
  FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
2
  ENV DEBIAN_FRONTEND=noninteractive
3
+
4
+ ARG HF_TOKEN
5
+
6
+ ENV HF_TOKEN=$HF_TOKEN
7
+
8
  RUN apt-get update && apt-get install -y
9
  RUN apt-get update && \
10
  apt-get upgrade -y
 
77
 
78
 
79
 
80
+ RUN hf download lainlives/llama.cpp --local-dir /usr/bin/ --token $HF_TOKEN
81
+ RUN chmod +x /usr/bin/llama-*
82
 
83
  ENV PYTHONPATH=${HOME}/app \
84
  PYTHONUNBUFFERED=1 \
README.md CHANGED
@@ -8,5 +8,3 @@ pinned: false
8
  suggested_hardware: "a10g-large"
9
  disable_embedding: true
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  suggested_hardware: "a10g-large"
9
  disable_embedding: true
10
  ---
 
 
app.py CHANGED
@@ -63,19 +63,14 @@ def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
63
  logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))
64
 
65
  try:
66
- # 2. Run 'ollama create' via CLI
67
- # This replaces ollama.create(...)
68
  create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
69
  subprocess.run(create_cmd, check=True, capture_output=True)
70
 
71
- # Clean up the temporary Modelfile
72
  if modelfile_path.exists():
73
  os.remove(modelfile_path)
74
 
75
  logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))
76
 
77
- # 3. Run 'ollama push' via CLI
78
- # This replaces ollama.push(...)
79
  push_cmd = ["ollama", "push", ollama_tag]
80
  push_result = subprocess.run(push_cmd, capture_output=True, text=True)
81
 
@@ -84,7 +79,7 @@ def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
84
  else:
85
  logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))
86
 
87
- # Optional: Remove the local tag to save disk space in the container
88
  subprocess.run(["ollama", "rm", ollama_tag], stdout=subprocess.DEVNULL)
89
 
90
  except subprocess.CalledProcessError as e:
@@ -111,13 +106,16 @@ def start_ollama_daemon(ollama_key):
111
  process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, env=env)
112
  pid = process.pid
113
  logs.append(format_log("⏳ Starting Ollama daemon in background..."))
114
- return pid
 
115
 
116
 
117
  def stop_ollama_daemon(pid):
118
  print("⏳ Stopping Ollama daemon...")
119
- os.kill(pid, signal.SIGKILL)
 
120
  subprocess.Popen(["pkill", "ollama"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
121
 
122
 
123
  def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
@@ -150,7 +148,7 @@ def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
150
  logs.append(format_log("⚙️ Converting to BF16..."))
151
  yield "\n".join(logs)
152
 
153
- cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
154
  result = subprocess.run(cmd, capture_output=True, text=True)
155
 
156
  if result.returncode == 0:
@@ -166,7 +164,7 @@ def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
166
  logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
167
  yield "\n".join(logs)
168
 
169
- cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
170
  subprocess.run(cmd, check=True, capture_output=True)
171
 
172
  logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
@@ -209,6 +207,7 @@ def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progres
209
  # We yield from the generator
210
  for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
211
  yield update
 
212
  stop_ollama_daemon(pid)
213
 
214
 
 
63
  logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))
64
 
65
  try:
 
 
66
  create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
67
  subprocess.run(create_cmd, check=True, capture_output=True)
68
 
 
69
  if modelfile_path.exists():
70
  os.remove(modelfile_path)
71
 
72
  logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))
73
 
 
 
74
  push_cmd = ["ollama", "push", ollama_tag]
75
  push_result = subprocess.run(push_cmd, capture_output=True, text=True)
76
 
 
79
  else:
80
  logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))
81
 
82
+ # Remove the local tag to save disk space in the container
83
  subprocess.run(["ollama", "rm", ollama_tag], stdout=subprocess.DEVNULL)
84
 
85
  except subprocess.CalledProcessError as e:
 
106
  process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, env=env)
107
  pid = process.pid
108
  logs.append(format_log("⏳ Starting Ollama daemon in background..."))
109
+ sleep(2)
110
+ return pid, logs
111
 
112
 
113
  def stop_ollama_daemon(pid):
114
  print("⏳ Stopping Ollama daemon...")
115
+ logs.append(format_log("⏳ Stopping Ollama daemon..."))
116
+ os.kill(pid, signal.SIGQUIT)
117
  subprocess.Popen(["pkill", "ollama"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
118
+ return logs
119
 
120
 
121
  def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
 
148
  logs.append(format_log("⚙️ Converting to BF16..."))
149
  yield "\n".join(logs)
150
 
151
+ cmd = [str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
152
  result = subprocess.run(cmd, capture_output=True, text=True)
153
 
154
  if result.returncode == 0:
 
164
  logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
165
  yield "\n".join(logs)
166
 
167
+ cmd = [str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
168
  subprocess.run(cmd, check=True, capture_output=True)
169
 
170
  logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
 
207
  # We yield from the generator
208
  for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
209
  yield update
210
+ sleep(10)
211
  stop_ollama_daemon(pid)
212
 
213
 
start.sh CHANGED
@@ -1,24 +1,12 @@
1
  #!/bin/bash
2
 
3
- export CMAKE_CUDA_ARCHITECTURES="all"
4
- cd /app && \
5
- git clone --recursive https://github.com/ggerganov/llama.cpp && \
6
- cd llama.cpp && \
7
- cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON && \
8
- cmake --build build --config Release --parallel 32 && \
9
- cp ./build/bin/llama-* /usr/bin/ && \
10
- hf upload --repo-type model lainlives/ztestzz ./build/bin/
11
- cp convert_hf_to_gguf.py /app/convert_hf_to_gguf && \
12
- rm -rf build && \
13
- cd ..
14
 
15
- python3 /app/tmp.py
16
 
17
 
18
 
19
- ollama serve & # >/dev/null 2>&1 &
20
- PID="$!"
21
- disown "$PID"
22
 
23
 
24
  python3 app.py
 
1
  #!/bin/bash
2
 
 
 
 
 
 
 
 
 
 
 
 
3
 
 
4
 
5
 
6
 
7
+ # ollama serve & # >/dev/null 2>&1 &
8
+ # PID="$!"
9
+ # disown "$PID"
10
 
11
 
12
  python3 app.py