116 lines
4.1 KiB
Python
116 lines
4.1 KiB
Python
import requests, json
|
|
import getopt
|
|
import sys
|
|
from enum import IntEnum
|
|
|
|
#docker exec -it ollama ollama run llama3.2
|
|
|
|
class State(IntEnum):
|
|
IDLE = 0
|
|
THINKING = 1
|
|
RESPONDING = 2
|
|
DONE = 3
|
|
|
|
state = State.IDLE
|
|
|
|
#spinner
|
|
#https://www.youtube.com/watch?v=vk_s-gDTqkM
|
|
|
|
def _usage():
|
|
print("Usage: ask_llm.py [options] \"your question here\"")
|
|
print("Options:")
|
|
print(" -h, --help Show this help message and exit")
|
|
print(" -p, --print Print the command without executing (dry run)")
|
|
print(" -i, --ip=IP Specify the IP address of the Ollama server (default: 192.168.50.215)")
|
|
print(" -m, --model=MODEL Specify the model to use (default: llama3.2)")
|
|
print("Example:")
|
|
print('e.g python3 ask_llm.py --ip="192.168.50.215" -m "dolphin3:8b" "summarize this file: $(cat docker_log_pihole.txt)"')
|
|
|
|
def update_state(new_state):
|
|
global state
|
|
|
|
if new_state != state:
|
|
print(f"\n\n---\n\n## State changed from {state.name} to {new_state.name}\n\n")
|
|
state = new_state
|
|
|
|
def ask_llm(url, payload):
|
|
response = requests.post(url, json=payload, stream=True) # stream=True tells requests to read the response as a live data stream
|
|
|
|
# Ollama sends one JSON object per line as it generates text
|
|
for line in response.iter_lines():
|
|
if line:
|
|
data = json.loads(line.decode("utf-8"))
|
|
#print(data)
|
|
try:
|
|
if 'error' in data:
|
|
print("Error from Ollama API: " + data['error'])
|
|
update_state(State.DONE)
|
|
continue
|
|
except Exception as e:
|
|
print("Exception while checking for error: " + str(e))
|
|
|
|
if 'thinking' in data and data['thinking'] != '':
|
|
update_state(State.THINKING)
|
|
print(data['thinking'], end="", flush=True)
|
|
|
|
if 'response' in data and data['response'] != '':
|
|
update_state(State.RESPONDING)
|
|
print(data['response'], end="", flush=True)
|
|
|
|
if 'done' in data and data['done'] == True:
|
|
update_state(State.DONE)
|
|
print('done_reason: ' + data['done_reason'])
|
|
print('total_duration: ' + str(data['total_duration']))
|
|
print('load_duration: ' + str(data['load_duration']))
|
|
print('prompt_eval_count: ' + str(data['prompt_eval_count']))
|
|
print('prompt_eval_duration: ' + str(data['prompt_eval_duration']))
|
|
print('eval_count: ' + str(data['eval_count']))
|
|
print('eval_duration: ' + str(data['eval_duration']))
|
|
print('\n')
|
|
|
|
def main():
|
|
# Define the Ollama API endpoint
|
|
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], "phi:m:", ["ip=", "print", "help", "model="])
|
|
except getopt.GetoptError as err:
|
|
# print help information and exit:
|
|
print(err) # will print something like "option -a not recognized"
|
|
sys.exit(2)
|
|
|
|
_ip = "192.168.50.14"
|
|
_model = "llama3.2" # or "gpt-oss:20b" or "gemma3:4b" or "qwen3:8b"
|
|
### parse input command line
|
|
for o, a in opts:
|
|
if o in ("-h", "--help"):
|
|
_usage()
|
|
sys.exit()
|
|
elif o in ("-p", "--print"):
|
|
# Dry run option, not implemented in this snippet
|
|
pass
|
|
elif o in ("-i", "--ip"):
|
|
_ip = a
|
|
elif o in ("-m", "--model"):
|
|
_model = a
|
|
else:
|
|
print(o, a)
|
|
assert False, "unhandled option"
|
|
|
|
if args =="" or len(args) == 0:
|
|
print("No command provided. Use -h for help.")
|
|
sys.exit(2)
|
|
|
|
_url = "http://" + _ip + ":11434/api/generate"
|
|
|
|
#print ("ask: " + args[0])
|
|
|
|
# Send a prompt to the model
|
|
payload = {
|
|
"model": _model,
|
|
"prompt": args[0]
|
|
}
|
|
|
|
ask_llm(_url, payload)
|
|
|
|
if __name__ == '__main__':
|
|
main() |