Files
agents/llm.py
2026-03-21 14:24:10 +01:00

27 lines
894 B
Python

import requests, json
#docker exec -it ollama ollama run llama3.2
# Define the local Ollama API endpoint
#url = "http://192.168.50.14:11434/api/generate"
#url = "http://localhost:11434/api/generate"
url = "http://192.168.50.215:11434/api/generate"
# Send a prompt to the Gemma 3 model
payload = {
"model": "llama3.2",
#"model": "gpt-oss:20b",
"prompt": "list all running docker containers"
}
# stream=True tells requests to read the response as a live data stream
response = requests.post(url, json=payload, stream=True)
#print(response)
# Ollama sends one JSON object per line as it generates text
for line in response.iter_lines():
if line:
data = json.loads(line.decode("utf-8"))
# Each chunk has a "response" key containing part of the text
if "response" in data:
print(data["response"], end="", flush=True)