27 lines
894 B
Python
27 lines
894 B
Python
import requests, json
|
|
|
|
#docker exec -it ollama ollama run llama3.2
|
|
|
|
# Define the local Ollama API endpoint
|
|
#url = "http://192.168.50.14:11434/api/generate"
|
|
#url = "http://localhost:11434/api/generate"
|
|
url = "http://192.168.50.215:11434/api/generate"
|
|
|
|
# Send a prompt to the Gemma 3 model
|
|
payload = {
|
|
"model": "llama3.2",
|
|
#"model": "gpt-oss:20b",
|
|
"prompt": "list all running docker containers"
|
|
}
|
|
|
|
# stream=True tells requests to read the response as a live data stream
|
|
response = requests.post(url, json=payload, stream=True)
|
|
#print(response)
|
|
# Ollama sends one JSON object per line as it generates text
|
|
for line in response.iter_lines():
|
|
if line:
|
|
data = json.loads(line.decode("utf-8"))
|
|
# Each chunk has a "response" key containing part of the text
|
|
if "response" in data:
|
|
print(data["response"], end="", flush=True)
|