import requests, json #docker exec -it ollama ollama run llama3.2 # Define the local Ollama API endpoint #url = "http://192.168.50.14:11434/api/generate" #url = "http://localhost:11434/api/generate" url = "http://192.168.50.215:11434/api/generate" # Send a prompt to the Gemma 3 model payload = { "model": "llama3.2", #"model": "gpt-oss:20b", "prompt": "list all running docker containers" } # stream=True tells requests to read the response as a live data stream response = requests.post(url, json=payload, stream=True) #print(response) # Ollama sends one JSON object per line as it generates text for line in response.iter_lines(): if line: data = json.loads(line.decode("utf-8")) # Each chunk has a "response" key containing part of the text if "response" in data: print(data["response"], end="", flush=True)