ready for server deployment

This commit is contained in:
2026-04-19 11:54:46 +02:00
parent 369ff69c8c
commit 490c90b6ca

View File

@@ -6,6 +6,7 @@
from datetime import timedelta from datetime import timedelta
import sys
from typing import TypedDict, List from typing import TypedDict, List
from urllib import response from urllib import response
#from asyncio import tools #from asyncio import tools
@@ -14,7 +15,7 @@ from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.tools import tool from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode from langgraph.prebuilt import ToolNode
import requests, json, re, string import requests, json, re, string, os
from html.parser import HTMLParser from html.parser import HTMLParser
from langchain_ollama import ChatOllama from langchain_ollama import ChatOllama
import operator import operator
@@ -66,7 +67,12 @@ LINKDING_API_URL = "https://linkding.hal.se/api/bookmarks/"
LINKDING_API_TOKEN = "fa54dee2ccbcad80a0c6259bdbbed896581e1423" LINKDING_API_TOKEN = "fa54dee2ccbcad80a0c6259bdbbed896581e1423"
llm = ChatOllama(model=MODEL_NAME, base_url=LLM_BASE_URL) llm = ChatOllama(
model=MODEL_NAME,
base_url=LLM_BASE_URL,
num_ctx=4096, # Increase context window to handle longer content
temperature=0.3 # Lower temperature for more focused summaries
)
@tool @tool
def todays_date() -> str: def todays_date() -> str:
@@ -184,94 +190,253 @@ def write_to_file(filename: str, content: str) -> str:
except Exception as e: except Exception as e:
return f"Error occurred while writing to file: {e}" return f"Error occurred while writing to file: {e}"
# ----- Helper function to fetch raw bookmark data -----
def fetch_raw_bookmarks(date_added: str) -> List[dict]:
"""Internal helper to fetch raw bookmark JSON data"""
_url = f"{LINKDING_API_URL}?added_since={date_added}"
_headers = {
"Authorization": "Token " + LINKDING_API_TOKEN
}
try:
response = requests.get(_url, headers=_headers)
data = response.json()
return data.get('results', [])
except Exception as e:
print(f"Error fetching bookmarks: {e}")
return []
# ----- Shared State ----- # ----- Shared State -----
class AgentState(TypedDict): class AgentState(TypedDict):
messages: Annotated[list, operator.add] messages: Annotated[list, operator.add]
bookmarks: list # Raw bookmark data
current_index: int # Index of bookmark being processed
target_date: str # Date used for fetching bookmarks. in ISO 8601 format (e.g., "2026-04-01T00:00:00Z").
path_to_file: str # Path to the file where summaries will be written
# ----- Agent Nodes ----- # ----- Agent Nodes -----
def agent_node(state: AgentState): def initialization_node(state: AgentState):
"""This is the main agent node that processes messages and decides when to call tools.""" """Phase 1: Fetch relevant bookmarks"""
llm_with_tools = llm.bind_tools([add_tag_to_bookmark, fetch_bookmarks, crawl_homepage, todays_date, calculate_date, write_to_file]) messages = state['messages']
system_prompt = SystemMessage(f""" # Fetch bookmarks using the tool (for logging)
You are a bookmark processing agent. You have these tools: bookmarks_result = fetch_bookmarks.invoke({'date_added': state['target_date']})
1. **todays_date**: Get today's date messages.append(HumanMessage(f"[INIT] {bookmarks_result}"))
2. **calculate_date**: Calculate a past date
3. **fetch_bookmarks**: Get bookmarks added since a date
4. **crawl_homepage**: Read website content
5. **write_to_file**: Write content to ~/bookmark_summaries.md
6. **add_tag_to_bookmark**: Add tags to bookmarks
YOUR TASK - FOLLOW THIS EXACTLY: # Also fetch raw bookmark data for processing
raw_bookmarks = fetch_raw_bookmarks(state['target_date'])
messages.append(HumanMessage(f"[INIT] Found {len(raw_bookmarks)} bookmarks to process"))
PHASE 1: Get bookmarks if not raw_bookmarks:
- Call todays_date to get current date messages.append(HumanMessage("[INIT] No bookmarks found. Stopping."))
- Call calculate_date to get the date 28 days ago
- Call fetch_bookmarks with that date to get all bookmarks
If No bookmarks found, stop here. Otherwise, move to PHASE 2.
PHASE 2: Process EACH bookmark (do NOT skip any): return {
For each bookmark from fetch_bookmarks: 'messages': messages,
Step A: Call crawl_homepage with the bookmark URL 'bookmarks': raw_bookmarks,
Step B: IMMEDIATELY call write_to_file to write: [URL] | [DESCRIPTION] | [CRAWLED CONTENT SUMMARY OF MAX 100 WORDS] 'current_index': 0
Step C: IMMEDIATELY call add_tag_to_bookmark with the bookmark ID and 1-2 relevant tags }
Step D: ONLY THEN move to the next bookmark
CRITICAL RULES: def process_bookmarks_node(state: AgentState):
- NEVER respond with text - ONLY call tools """Phase 2: Process each bookmark by crawling, writing, and tagging"""
- Process ALL bookmarks before finishing messages = state['messages']
- For each bookmark, MUST call: crawl_homepage, write_to_file, add_tag_to_bookmark (IN THAT ORDER) bookmarks = state['bookmarks']
- Do not stop until all bookmarks have all three tools called
""")
if not bookmarks:
messages.append(HumanMessage("[PROCESS] No bookmarks to process."))
return {
'messages': messages,
'current_index': 0
}
messages = [system_prompt] + state['messages'] # Step 1: Crawl all bookmarks
crawled_data = []
for i, bookmark in enumerate(bookmarks, 1):
messages.append(HumanMessage(f"\n[PROCESS] Crawling bookmark {i}/{len(bookmarks)}: {bookmark['title']}"))
response = llm_with_tools.invoke(messages) try:
content = crawl_homepage.invoke({'url': bookmark['url']})
messages.append(HumanMessage(f" ✓ Crawled: {len(content)} chars"))
crawled_data.append({
'bookmark': bookmark,
'content': content
})
except Exception as e:
messages.append(HumanMessage(f" ✗ Error crawling: {e}"))
crawled_data.append({
'bookmark': bookmark,
'content': ''
})
if hasattr(response, 'tool_calls') and response.tool_calls: # Step 2: Batch-analyze all content in a single LLM call for summaries
for tc in response.tool_calls: messages.append(HumanMessage(f"\n[PROCESS] Generating summaries for all {len(crawled_data)} bookmarks..."))
print(f"[AGENT] called Tool {tc.get('name', '?')} with args {tc.get('args', '?')}") summaries_by_bookmark = {}
try:
# Prepare content for LLM summary analysis
summary_text = ""
for i, item in enumerate(crawled_data, 1):
title = item['bookmark']['title']
content = item['content'][:1000] # Reduce to 1000 chars to keep LLM focused
summary_text += f"{i}. Title: {title}\nContent: {content}\n\n"
# Use LLM to summarize all bookmarks at once - with a very simple, explicit prompt
summary_prompt = SystemMessage("""For each numbered item (1-10), write a 5-10 sentence summary. Output format: "N. summary text"
Example:
1. Travel guides website offering Lonely Planet destination collections and expert travel advice.
2. Cloudflare security page blocking access to the website.""")
content_message = HumanMessage(f"Summarize these items:\n\n{summary_text}")
response = llm.invoke([summary_prompt, content_message])
summary_response = response.content.strip()
messages.append(HumanMessage(f" ✓ Generated summaries for all bookmarks"))
# Parse the simple format: "1. text", "2. text", etc.
lines = summary_response.split('\n')
for line in lines:
line = line.strip()
if not line or len(line) < 3:
continue
# Look for pattern "N. text" or "N) text" or "N- text"
match = re.match(r'^(\d+)[.\)\-]\s+(.*)', line)
if match:
try:
num = int(match.group(1))
summary_content = match.group(2).strip()
if 1 <= num <= len(crawled_data) and len(summary_content) > 10:
summaries_by_bookmark[num] = summary_content
except (ValueError, IndexError):
pass
# Log how many summaries were extracted
messages.append(HumanMessage(f" ✓ Extracted {len(summaries_by_bookmark)} summaries from LLM response"))
except Exception as e:
messages.append(HumanMessage(f" ✗ Error generating summaries: {e}"))
summaries_by_bookmark = {}
# Step 3: Batch-analyze all content in a single LLM call for tags
messages.append(HumanMessage(f"\n[PROCESS] Analyzing all {len(crawled_data)} bookmarks for tags..."))
try:
# Prepare content for LLM analysis
analysis_text = ""
for i, item in enumerate(crawled_data, 1):
title = item['bookmark']['title']
content_snippet = item['content'][:500] # Use first 500 chars per bookmark
analysis_text += f"{i}. Title: {title}\nContent snippet: {content_snippet}\n---\n"
# Use LLM to analyze all bookmarks at once
analysis_prompt = SystemMessage("""You are a bookmark tagging assistant. For each bookmark, suggest 1-3 relevant tags.
Output format: For each numbered bookmark, respond with:
N: tag1, tag2, tag3
Tags should be lowercase and hyphenated if multi-word. Be concise.""")
content_message = HumanMessage(f"Analyze these bookmarks and suggest relevant tags:\n\n{analysis_text}")
response = llm.invoke([analysis_prompt, content_message])
tags_response = response.content.strip()
messages.append(HumanMessage(f" ✓ Generated tags for all bookmarks"))
# Parse the response to extract tags for each bookmark
tags_by_bookmark = {}
for line in tags_response.split('\n'):
if line.strip() and ':' in line:
try:
num_str = line.split(':')[0].strip()
num = int(num_str)
if 1 <= num <= len(crawled_data):
tags_text = line.split(':', 1)[1].strip()
tags = [tag.strip() for tag in tags_text.split(',')]
tags_by_bookmark[num] = tags
except (ValueError, IndexError):
pass
except Exception as e:
messages.append(HumanMessage(f" ✗ Error analyzing bookmarks: {e}"))
tags_by_bookmark = {}
# Step 4: Write to file and add tags for each bookmark
for i, item in enumerate(crawled_data, 1):
bookmark = item['bookmark']
suggested_tags = tags_by_bookmark.get(i, [])
suggested_summary = summaries_by_bookmark.get(i, "No summary available")
messages.append(HumanMessage(f"\n[WRITE] Bookmark {i}/{len(crawled_data)}: {bookmark['title']}"))
# Write to file
try:
output_content = f"## {bookmark['title']}\n- URL: {bookmark['url']}\n- Description: {bookmark['description']}\n- Summary: {suggested_summary}\n- Tags: {', '.join(suggested_tags)}\n- Date: {bookmark['date_added']}\n\n"
filename = os.path.expanduser(state['path_to_file'] + '/bookmark_summaries_' + state['target_date'][:10] + '.md')
write_to_file.invoke({'filename': filename, 'content': output_content})
messages.append(HumanMessage(f" ✓ Wrote to file"))
except Exception as e:
messages.append(HumanMessage(f" ✗ Error writing file: {e}"))
continue
# Add tags to bookmark
try:
if suggested_tags:
for tag in suggested_tags:
add_tag_to_bookmark.invoke({
'bookmark_id': bookmark['id'],
'tag': tag
})
messages.append(HumanMessage(f" ✓ Added tags: {', '.join(suggested_tags)}"))
else: else:
print(f"[AGENT] Responding...") messages.append(HumanMessage(f" ⊘ No tags generated"))
except Exception as e:
messages.append(HumanMessage(f" ✗ Error tagging: {e}"))
return {'messages': [response]} messages.append(HumanMessage(f"\n[PROCESS] ✓ ALL {len(bookmarks)} BOOKMARKS PROCESSED"))
return {
'messages': messages,
'current_index': len(bookmarks)
}
def should_continue(state: AgentState):
last = state['messages'][-1]
if hasattr(last, 'tool_calls') and last.tool_calls:
return "tools"
else:
return END
# =============================================================================
# Graph
# =============================================================================
def create_agent(): def create_agent():
builder = StateGraph(AgentState) builder = StateGraph(AgentState)
builder.add_node("agent", agent_node) builder.add_node("initialize", initialization_node)
builder.add_node("tools", ToolNode([add_tag_to_bookmark, fetch_bookmarks, crawl_homepage, todays_date, calculate_date, write_to_file])) builder.add_node("process", process_bookmarks_node)
builder.set_entry_point("agent") builder.set_entry_point("initialize")
builder.add_conditional_edges("agent", should_continue, ["tools", END]) builder.add_edge("initialize", "process")
builder.add_edge("process", END)
builder.add_edge("tools", "agent")
graph = builder.compile() graph = builder.compile()
return graph return graph
def main():
if len(sys.argv) == 3:
days = int(sys.argv[1])
file_path = sys.argv[2]
else:
days = 7 # Default to 7 days if not provided
file_path = '~'
agent = create_agent() agent = create_agent()
human_prompt = HumanMessage("Process all bookmarks from the last 14 days: fetch them, summarize their content, write summaries to a file, and add relevant tags.")
result = agent.invoke({'messages': [human_prompt]})
# Get today's date
today = todays_date.invoke({})
# Calculate date 7 days ago
target_date = calculate_date.invoke({'dat': today, 'days': days})
human_prompt = HumanMessage(f"Process all bookmarks from the last {days} days: fetch them, summarize their content, write summaries to a file, and add relevant tags.")
#print(result['messages']) result = agent.invoke({'messages': [human_prompt], 'bookmarks': [], 'current_index': 0, 'target_date': target_date, 'path_to_file': file_path})
#print(result['messages'][-1].content)
print("\n" + "="*80)
print("EXECUTION COMPLETE")
print("="*80)
for msg in result['messages']:
if isinstance(msg, HumanMessage):
print(msg.content)
print("="*80)
if __name__ == "__main__":
main()
""" """