agents/llm.py

import requests, json

#docker exec -it ollama ollama run llama3.2

# Define the local Ollama API endpoint
#url = "http://192.168.50.14:11434/api/generate"
#url = "http://localhost:11434/api/generate"
url = "http://192.168.50.215:11434/api/generate"

# Send a prompt to the Gemma 3 model
payload = {
    "model": "llama3.2",
    #"model": "gpt-oss:20b",
    "prompt": "list all running docker containers"
}

# stream=True tells requests to read the response as a live data stream
response = requests.post(url, json=payload, stream=True)
#print(response)
# Ollama sends one JSON object per line as it generates text
for line in response.iter_lines():
    if line:
        data = json.loads(line.decode("utf-8"))
        # Each chunk has a "response" key containing part of the text
        if "response" in data:
            print(data["response"], end="", flush=True)