Note in this tutorial I’m using mistral model and not the llama model. you can use whatever you like.
pip install llama-stack
mkdir -p ~/local
cd ~/local
git clone [email protected]:meta-llama/llama-stack.git
conda create -n stack python=3.10
conda activate stack
cd llama-stack
$CONDA_PREFIX/bin/pip install -e .
pip install ollama llamastack fastapi uvicorn aiosqlite
# Start Ollama server
ollama serve
# In another terminal, start the model
ollama run mistral --keepalive 60m
Verify model status:
ollama ps # Should show mistral:latest running
ollama list # Should show available models
Create run.yaml:
version: '2'
image_name: ollama
docker_image: null
conda_env: local
apis:
- inference
providers:
inference:
- provider_id: ollama
provider_type: remote::ollama
config:
host: localhost
port: 11434
models:
- model_id: mistral:latest
provider_id: ollama
metadata:
llama_model: meta-llama/Llama-2-7b # Required by LlamaStack's model registry
Note: The llama_model field is required because LlamaStack's model registry expects a Llama model for validation, even when using different models like Mistral through Ollama.