agents/ask_llm.py

import requests, json
import getopt
import sys
from enum import IntEnum

#docker exec -it ollama ollama run llama3.2

class State(IntEnum):
    IDLE = 0
    THINKING = 1
    RESPONDING = 2
    DONE = 3

state = State.IDLE

#spinner
#https://www.youtube.com/watch?v=vk_s-gDTqkM

def _usage():
    print("Usage: ask_llm.py [options] \"your question here\"")
    print("Options:")
    print("  -h, --help          Show this help message and exit")
    print("  -p, --print         Print the command without executing (dry run)")
    print("  -i, --ip=IP         Specify the IP address of the Ollama server (default: 192.168.50.215)")
    print("  -m, --model=MODEL   Specify the model to use (default: llama3.2)")
    print("Example:")
    print('e.g python3 ask_llm.py --ip="192.168.50.215" -m "dolphin3:8b"  "summarize this file: $(cat docker_log_pihole.txt)"')

def update_state(new_state):
    global state

    if new_state != state:
        print(f"\n\n---\n\n## State changed from {state.name} to {new_state.name}\n\n")
        state = new_state

def ask_llm(url, payload):
        response = requests.post(url, json=payload, stream=True)     # stream=True tells requests to read the response as a live data stream

        # Ollama sends one JSON object per line as it generates text
        for line in response.iter_lines():
            if line:
                data = json.loads(line.decode("utf-8"))
                #print(data)
                try:
                    if 'error' in data:
                        print("Error from Ollama API: " + data['error'])
                        update_state(State.DONE)
                        continue
                except Exception as e:
                    print("Exception while checking for error: " + str(e))

                if 'thinking' in data and data['thinking'] != '':
                    update_state(State.THINKING)
                    print(data['thinking'], end="", flush=True)

                if 'response' in data and data['response'] != '':
                     update_state(State.RESPONDING)
                     print(data['response'], end="", flush=True)

                if 'done' in data and data['done'] == True:
                    update_state(State.DONE)
                    print('done_reason: ' + data['done_reason'])
                    print('total_duration: ' + str(data['total_duration']))
                    print('load_duration: ' + str(data['load_duration']))
                    print('prompt_eval_count: ' + str(data['prompt_eval_count']))
                    print('prompt_eval_duration: ' + str(data['prompt_eval_duration']))
                    print('eval_count: ' + str(data['eval_count']))
                    print('eval_duration: ' + str(data['eval_duration']))
        print('\n')

def main():
    # Define the Ollama API endpoint

    try:
        opts, args = getopt.getopt(sys.argv[1:], "phi:m:", ["ip=", "print", "help", "model="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print(err) # will print something like "option -a not recognized"
        sys.exit(2)

    _ip = "192.168.50.14"
    _model = "llama3.2" # or "gpt-oss:20b" or "gemma3:4b" or "qwen3:8b"
    ### parse input command line
    for o, a in opts:
        if o in ("-h", "--help"):
            _usage()
            sys.exit()
        elif o in ("-p", "--print"):
            # Dry run option, not implemented in this snippet
            pass
        elif o in ("-i", "--ip"):
            _ip = a
        elif o in ("-m", "--model"):
            _model = a
        else:
            print(o, a)
            assert False, "unhandled option"

    if args =="" or len(args) == 0:
        print("No command provided. Use -h for help.")
        sys.exit(2)

    _url = "http://" + _ip + ":11434/api/generate"

    #print ("ask: " + args[0])

    # Send a prompt to the model
    payload = {
        "model": _model,
        "prompt": args[0]
    }

    ask_llm(_url, payload)

if __name__ == '__main__':
   main()