Main commit

2024-06-09 13:45:19 -04:00 · 2024-06-09 13:45:19 -04:00 · aaca68b74e
commit aaca68b74e
parent b71c728fbd
34 changed files with 2193 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1 @@
-# agents
+docker-compose -f docker/docker-compose.yml up --build
 RAG Agents for LLM
--- a/app/init.py
+++ b/app/init.py
@ -0,0 +1 @@
 # This file can be empty or contain initialization code for the app package
--- a/app/rag_system_with_agents.py
+++ b/app/rag_system_with_agents.py
@ -0,0 +1,702 @@
 import os
 import openai
 import uuid
 import requests
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from qdrant_client import QdrantClient
 from qdrant_client.http import models
 from email.mime.text import MIMEText
 from airflow_client.client import ApiClient, Configuration
 from airflow_client.client.api.dag_run_api import DAGRunApi
 import smtplib
 from threading import Thread
 import time
 import json
 # Initialize Flask app
 app = Flask(__name__, static_folder='./frontend', static_url_path='/')
 CORS(app)
 # Configure OpenAI API
 openai.api_key = os.getenv('OPENAI_API_KEY')
 # Configure Qdrant
 qdrant = QdrantClient(host=os.getenv('QDRANT_HOST'))
 # Dictionary to store the status and progress of tasks
 tasks_status = {}
 def embed_text(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"
    )
    embedding = response['data'][0]['embedding']
    return embedding
 def query_qdrant(embedding, top_n=5):
    search_result = qdrant.search(
        collection_name="rag",
        query_vector=embedding,
        limit=top_n
    )
    return search_result
 def parse_react_response(response):
    steps = []
    final_answer = ""
    lines = response.split('\n')
    for line in lines:
        if line.startswith("["):
            steps.append(line.strip())
        elif line.startswith("Final Answer:"):
            final_answer = line.split(":")[1].strip()
    return steps, final_answer
 def update_task_status(task_id, status, step=None, results=[]):
    if task_id not in tasks_status:
        tasks_status[task_id] = {"status": status, "current_step": step, "steps": [], "results": []}
    else:
        tasks_status[task_id]["status"] = status
        if step:
            tasks_status[task_id]["current_step"] = step
            tasks_status[task_id]["steps"].append(step)
            tasks_status[task_id]["results"] = results
 def process_steps(steps, task_id, memory, results):
    try:
        for step in steps:
            if "[" in step and "]" in step:
                agent = step.split("[")[1].split("]")[0].strip().lower().replace(" ", "_")
                task = step.split("]")[1].strip()
                result = run_agent(agent, task, task_id, memory)
                if isinstance(result, tuple):
                    result = result[0]
                results.append(result["message"])
                update_task_status(task_id, "processing", step, results)
                memory[agent] = result["message"]  # Store the result in memory
        update_task_status(task_id, "completed", None, results)
    except Exception as e:
        update_task_status(task_id, f"failed: {e}")
        print(f"Error processing steps: {e}")
@app.route('/upload', methods=['POST'])
 def upload_file():
    if 'file' not in request.files:
        return jsonify({"error": "No file part"}), 400
    file = request.files['file']
    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400
    if file and file.filename.endswith('.txt'):
        content = file.read().decode('utf-8')
        embedding = embed_text(content)
        document_id = str(uuid.uuid4())  # Generate a UUID for the document ID
        qdrant.upsert(
            collection_name='rag',
            points=[models.PointStruct(id=document_id, vector=embedding, payload={"content": content})]
        )
        return jsonify({"message": "File uploaded and embedded successfully"}), 200
    else:
        return jsonify({"error": "Invalid file type. Only .txt files are allowed"}), 400
@app.route('/query', methods=['POST'])
 def query():
    data = request.json
    query_text = data['query']
    embedding = embed_text(query_text)
    results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in results]
    return jsonify({"results": sources})
@app.route('/react_query', methods=['POST'])
 def react_query():
    data = request.json
    query_text = data['query']
    task_id = str(uuid.uuid4())
    update_task_status(task_id, "initialized")
    # Create the system prompt with capabilities
    system_prompt = f"""
    You are a research assistant that can perform the following tasks:
    1. Research Paper Finder
    2. Citation Generator
    3. Data Summarizer
    4. Question Answering
    5. Statistical Analysis
    6. Graph Generator
    7. Keyword Extractor
    8. Research Outline Generator
    9. Hypothesis Generator
    10. Methodology Advisor
    11. Experimental Design Helper
    12. Survey Designer
    13. Plagiarism Checker
    14. Grammar and Style Checker
    15. Literature Review Organizer
    16. Data Cleaning Agent
    17. Bibliography Manager
    18. Thesis Statement Generator
    19. Funding Finder
    20. Conference Finder
    21. Web Scraper
    22. API Integrator
    23. Email Notifier
    24. File Converter
    25. Translation Agent
    26. OCR Agent
    27. Scheduler
    28. Weather Information Agent
    Using the ReAct (Reason and Act) paradigm, analyze the following query and determine the steps to answer it. Each step should indicate the agent to use and the task to perform in a structured format, clearly separated by new lines. Make sure to include the agent name in square brackets. Example format: [Agent] Task.
    Query: {query_text}
    """
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query_text}
        ],
        max_tokens=500
    )
    react_response = response['choices'][0]['message']['content'].strip()
    steps, final_answer = parse_react_response(react_response)
    if not steps:
        update_task_status(task_id, "failed")
        return jsonify({"error": "No steps generated by the ReAct system"}), 400
    update_task_status(task_id, "processing", steps[0])
    results = []
    memory = {}
    Thread(target=process_steps, args=(steps, task_id, memory, results)).start()
    return jsonify({"steps": steps, "task_id": task_id})
 def run_agent(agent, query_text, task_id, memory):
    # Here we call the appropriate agent function based on the agent type
    if agent == 'research_paper_finder':
        return research_paper_finder(query_text, memory)
    elif agent == 'citation_generator':
        return citation_generator(query_text, memory)
    elif agent == 'data_summarizer':
        return data_summarizer(query_text, memory)
    elif agent == 'question_answering':
        return question_answering(query_text, memory)
    elif agent == 'statistical_analysis':
        return statistical_analysis(query_text, memory)
    elif agent == 'graph_generator':
        return graph_generator(query_text, memory)
    elif agent == 'keyword_extractor':
        return keyword_extractor(query_text, memory)
    elif agent == 'research_outline_generator':
        return research_outline_generator(query_text, memory)
    elif agent == 'hypothesis_generator':
        return hypothesis_generator(query_text, memory)
    elif agent == 'methodology_advisor':
        return methodology_advisor(query_text, memory)
    elif agent == 'experimental_design_helper':
        return experimental_design_helper(query_text, memory)
    elif agent == 'survey_designer':
        return survey_designer(query_text, memory)
    elif agent == 'plagiarism_checker':
        return plagiarism_checker(query_text, memory)
    elif agent == 'grammar_and_style_checker':
        return grammar_and_style_checker(query_text, memory)
    elif agent == 'literature_review_organizer':
        return literature_review_organizer(query_text, memory)
    elif agent == 'data_cleaning_agent':
        return data_cleaning_agent(query_text, memory)
    elif agent == 'bibliography_manager':
        return bibliography_manager(query_text, memory)
    elif agent == 'thesis_statement_generator':
        return thesis_statement_generator(query_text, memory)
    elif agent == 'funding_finder':
        return funding_finder(query_text, memory)
    elif agent == 'conference_finder':
        return conference_finder(query_text, memory)
    elif agent == 'web_scraper_using_scrapyd' or 'web_scraper':
        return web_scraper(query_text, memory)
    elif agent == 'api_integrator':
        return api_integrator(query_text, memory)
    elif agent == 'email_notifier':
        return email_notifier(query_text, memory)
    elif agent == 'file_converter':
        return file_converter(query_text, memory)
    elif agent == 'translation_agent':
        return translation_agent(query_text, memory)
    elif agent == 'ocr_agent':
        return ocr_agent(query_text, memory)
    elif agent == 'scheduler':
        return scheduler(query_text, memory)
    elif agent == 'weather_information_agent':
        return weather_information_agent(query_text, memory)
    elif agent == 'currency_converter':
        return currency_converter(query_text, memory)
    elif agent == 'news_aggregator':
        return news_aggregator(query_text, memory)
    else:
        return {"message": f"Unknown agent: {agent}"}
 def research_paper_finder(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Find research papers related to: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def citation_generator(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Generate a citation for: {query_text}"}
        ],
        max_tokens=50
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def data_summarizer(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Summarize the following text: {query_text}"}
        ],
        max_tokens=1000
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def question_answering(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Answer the following question: {query_text}"}
        ],
        max_tokens=100
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def statistical_analysis(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Perform statistical analysis on the following data: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def graph_generator(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Generate a graph for the following data: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def keyword_extractor(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Extract keywords from the following text: {query_text}"}
        ],
        max_tokens=50
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def research_outline_generator(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Generate a research outline for: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def hypothesis_generator(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Generate a hypothesis based on the following topic: {query_text}"}
        ],
        max_tokens=100
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def methodology_advisor(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Suggest a methodology for the following research topic: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def experimental_design_helper(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Help design an experiment for: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def survey_designer(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Design a survey for: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def plagiarism_checker(query_text, memory):
    return {"message": "Plagiarism check is not implemented yet.", "query": query_text}
 def grammar_and_style_checker(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Check and correct the grammar and style of the following text: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def literature_review_organizer(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Organize the following literature review: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def data_cleaning_agent(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Clean the following data: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def bibliography_manager(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Manage the bibliography for: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def thesis_statement_generator(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Generate a thesis statement for: {query_text}"}
        ],
        max_tokens=100
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def funding_finder(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Find funding opportunities for: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def conference_finder(query_text, memory):
    embedding = embed_text(query_text)
    rag_results = query_qdrant(embedding)
    sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Find conferences related to: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def web_scraper(query_text, memory):
    project_name = 'my_project'
    spider_name = 'my_spider'
    scrapyd_host = os.getenv('SCRAPYD_HOST', 'localhost')
    data = {
        'project': project_name,
        'spider': spider_name,
        'start_urls': query_text
    }
    try:
        response = requests.post(f'http://{scrapyd_host}:6800/schedule.json', data=data)
        # if response.status_code == "200":
        job_id = response.json().get('jobid')
        # Wait for the job to finish and fetch the results
        time.sleep(15)  # Adjust this sleep time as needed
        items_response = requests.get(f'http://{scrapyd_host}:6800/items/{project_name}/{spider_name}/{job_id}.jl')
        #if items_response.status_code == 200:
        items = [json.loads(line) for line in items_response.text.splitlines()]
        # for item in items:
        # Insert each scraped item into Qdrant
        content = items[0].get('content', '')
        embedding = embed_text(content)
        document_id = str(uuid.uuid4())
        qdrant.upsert(
            collection_name='rag',
            points=[models.PointStruct(id=document_id, vector=embedding, payload={"content": content})]
        )
        return {"message": content}
        # return {"message": f"Job completed with {len(items)} items scraped", "items": items}
            # else:
            #     return {"message": "Failed to fetch scraped items"}, 500
        #else:
        #    return {"message": "Failed to schedule job"}, 500
    except Exception as e:
        print(f"Error scheduling scrapy job: {e}")
        return {"message": f"Failed to schedule job - {e}"}, 500
 def api_integrator(query_text, memory):
    response = requests.post(
        'http://localhost:1880/api_integrator',
        json={'query': query_text}
    )
    return {"message": response.json(), "query": query_text}
 def email_notifier(query_text, memory):
    msg = MIMEText(query_text)
    msg['Subject'] = 'Notification'
    msg['From'] = 'test@example.com'
    msg['To'] = 'mahesh.kommareddi@gmail.com'
    with smtplib.SMTP('mailhog', 1025) as server:
        server.sendmail(msg['From'], [msg['To']], msg.as_string())
    return {"message": "Email sent successfully"}
 def file_converter(query_text, memory):
    response = requests.post(
        'http://libreoffice:8084/convert',
        files={'file': query_text}
    )
    return {"message": "File conversion completed", "data": response.json()}
 def translation_agent(query_text, memory):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"The previous response relating to the query was: {memory}"},
            {"role": "user", "content": f"Translate the following text: {query_text}"}
        ],
        max_tokens=150
    )
    response_message = response['choices'][0]['message']['content'].strip()
    return {"message": response_message, "sources": sources}
 def ocr_agent(query_text, memory):
    response = requests.post(
        'http://localhost:8081/ocr',
        files={'file': query_text}
    )
    return {"message": response.json(), "query": query_text}
 def scheduler(query_text, memory):
    configuration = Configuration(
        host="http://localhost:8082/api/v1"
    )
    api_client = ApiClient(configuration)
    dag_run_api = DAGRunApi(api_client)
    dag_id = 'example_dag'
    dag_run = dag_run_api.post_dag_run(
        dag_id=dag_id,
        dag_run={"conf": {"query_text": query_text}}
    )
    return {"message": f"Scheduled task for {query_text}", "dag_run_id": dag_run.dag_run_id}
 def weather_information_agent(query_text, memory):
    api_key = os.getenv('OPENWEATHERMAP_API_KEY')
    response = requests.get(
        f'http://api.openweathermap.org/data/2.5/weather?q={query_text}&appid={api_key}'
    )
    return {"message": response.json(), "query": query_text}
@app.route('/ocr', methods=['POST'])
 def handle_ocr():
    if 'file' not in request.files:
        return jsonify({"error": "No file part"}), 400
    file = request.files['file']
    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400
    response = requests.post(
        'http://localhost:8081/ocr',
        files={'file': file}
    )
    return jsonify(response.json())
@app.route('/schedule', methods=['POST'])
 def handle_schedule():
    data = request.json
    query_text = data['query']
    return jsonify(scheduler(query_text))
@app.route('/weather', methods=['POST'])
 def handle_weather():
    data = request.json
    query_text = data['query']
    return jsonify(weather_information_agent(query_text))
@app.route('/scrape', methods=['POST'])
 def handle_scrape():
    data = request.json
    query_text = data['query']
    return web_scraper(query_text, {})
@app.route('/integrate', methods=['POST'])
 def handle_integrate():
    data = request.json
    query_text = data['query']
    return jsonify(api_integrator(query_text))
@app.route('/notify', methods=['POST'])
 def handle_notify():
    data = request.json
    query_text = data['query']
    return jsonify(email_notifier(query_text))
@app.route('/convert', methods=['POST'])
 def handle_convert():
    if 'file' not in request.files:
        return jsonify({"error": "No file part"}), 400
    file = request.files['file']
    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400
    response = requests.post(
        'http://localhost:8084/convert',
        files={'file': file}
    )
    return jsonify(response.json())
@app.route('/')
 def serve_index():
    return send_from_directory(app.static_folder, 'index.html')
@app.route('/status/<task_id>', methods=['GET'])
 def get_status(task_id):
    return jsonify(tasks_status.get(task_id, {"error": "Task ID not found"}))
 if __name__ == '__main__':
    app.run(host='0.0.0.0', port=1337)
--- a/app/requirements.txt
+++ b/app/requirements.txt
@ -0,0 +1,10 @@
 flask
 flask-cors
 openai==0.28
 openai[datalib]
 pymongo
 requests
 numpy
 qdrant_client
 apache-airflow-client
 scrapy
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -0,0 +1,23 @@
 # Use the official Python image from the Docker Hub
 FROM python:3.10
 # Set the working directory in the container
 WORKDIR /app
 # Copy the requirements file into the container
 COPY ./app/requirements.txt .
 # Install any dependencies specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the application code into the container
 COPY ./app /app
 # Copy the frontend files into the container
 COPY ./frontend /app/frontend
 # Make port 5000 available to the world outside this container
 EXPOSE 1337
 # Run the application
 CMD ["python", "rag_system_with_agents.py"]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -0,0 +1,87 @@
 version: '3.7'
 services:
  flask-app:
    build:
      context: ../
      dockerfile: docker/Dockerfile
    container_name: flask-app
    ports:
      - "1337:1337"
    environment:
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - QDRANT_HOST=qdrant
      - SCRAPYD_HOST=scrapyd
      - OPENWEATHERMAP_API_KEY=${OPENWEATHERMAP_API_KEY}
    depends_on:
      - qdrant
      - ocr_service
      - airflow-webserver
      - airflow-scheduler
      - scrapyd
      - node_red
      - mailhog
      - libreoffice
  qdrant:
    image: qdrant/qdrant
    ports:
      - "6333:6333"
  ocr_service:
    image: jbarlow83/ocrmypdf
    ports:
      - "8081:8081"
  airflow-webserver:
    image: apache/airflow:latest
    container_name: airflow-webserver
    command: webserver
    ports:
      - "8082:8082"
    environment:
      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
      - AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:////usr/local/airflow/airflow.db
      - AIRFLOW__CORE__LOAD_EXAMPLES=False
    volumes:
      - ./dags:/usr/local/airflow/dags
      - ./logs:/usr/local/airflow/logs
      - ./plugins:/usr/local/airflow/plugins
  airflow-scheduler:
    image: apache/airflow:latest
    container_name: airflow-scheduler
    command: scheduler
    environment:
      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
      - AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:////usr/local/airflow/airflow.db
      - AIRFLOW__CORE__LOAD_EXAMPLES=False
    volumes:
      - ./dags:/usr/local/airflow/dags
      - ./logs:/usr/local/airflow/logs
      - ./plugins:/usr/local/airflow/plugins
    depends_on:
      - airflow-webserver
  scrapyd:
    image: vimagick/scrapyd
    ports:
      - "6800:6800"
    volumes:
      - ./scrapy_project:/scrapy_project
    command: ["scrapyd"]
  node_red:
    image: nodered/node-red
    ports:
      - "1880:1880"
  mailhog:
    image: mailhog/mailhog
    ports:
      - "1025:1025"
  libreoffice:
    image: linuxserver/libreoffice
    ports:
      - "8084:8084"
--- a/docker/scrapy_project/my_project/build/lib/my_project/init.py
+++ b/docker/scrapy_project/my_project/build/lib/my_project/init.py
--- a/docker/scrapy_project/my_project/build/lib/my_project/items.py
+++ b/docker/scrapy_project/my_project/build/lib/my_project/items.py
@ -0,0 +1,12 @@
 # Define here the models for your scraped items
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/items.html
 import scrapy
 class MyProjectItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    pass
--- a/docker/scrapy_project/my_project/build/lib/my_project/middlewares.py
+++ b/docker/scrapy_project/my_project/build/lib/my_project/middlewares.py
@ -0,0 +1,103 @@
 # Define here the models for your spider middleware
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 from scrapy import signals
 # useful for handling different item types with a single interface
 from itemadapter import is_item, ItemAdapter
 class MyProjectSpiderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the spider middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def process_spider_input(self, response, spider):
        # Called for each response that goes through the spider
        # middleware and into the spider.
        # Should return None or raise an exception.
        return None
    def process_spider_output(self, response, result, spider):
        # Called with the results returned from the Spider, after
        # it has processed the response.
        # Must return an iterable of Request, or item objects.
        for i in result:
            yield i
    def process_spider_exception(self, response, exception, spider):
        # Called when a spider or process_spider_input() method
        # (from other spider middleware) raises an exception.
        # Should return either None or an iterable of Request or item objects.
        pass
    def process_start_requests(self, start_requests, spider):
        # Called with the start requests of the spider, and works
        # similarly to the process_spider_output() method, except
        # that it doesn’t have a response associated.
        # Must return only requests (not items).
        for r in start_requests:
            yield r
    def spider_opened(self, spider):
        spider.logger.info("Spider opened: %s" % spider.name)
 class MyProjectDownloaderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the downloader middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def process_request(self, request, spider):
        # Called for each request that goes through the downloader
        # middleware.
        # Must either:
        # - return None: continue processing this request
        # - or return a Response object
        # - or return a Request object
        # - or raise IgnoreRequest: process_exception() methods of
        #   installed downloader middleware will be called
        return None
    def process_response(self, request, response, spider):
        # Called with the response returned from the downloader.
        # Must either;
        # - return a Response object
        # - return a Request object
        # - or raise IgnoreRequest
        return response
    def process_exception(self, request, exception, spider):
        # Called when a download handler or a process_request()
        # (from other downloader middleware) raises an exception.
        # Must either:
        # - return None: continue processing this exception
        # - return a Response object: stops process_exception() chain
        # - return a Request object: stops process_exception() chain
        pass
    def spider_opened(self, spider):
        spider.logger.info("Spider opened: %s" % spider.name)
--- a/docker/scrapy_project/my_project/build/lib/my_project/pipelines.py
+++ b/docker/scrapy_project/my_project/build/lib/my_project/pipelines.py
@ -0,0 +1,13 @@
 # Define your item pipelines here
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 # useful for handling different item types with a single interface
 from itemadapter import ItemAdapter
 class MyProjectPipeline:
    def process_item(self, item, spider):
        return item
--- a/docker/scrapy_project/my_project/build/lib/my_project/settings.py
+++ b/docker/scrapy_project/my_project/build/lib/my_project/settings.py
@ -0,0 +1,93 @@
 # Scrapy settings for my_project project
 #
 # For simplicity, this file contains only settings considered important or
 # commonly used. You can find more settings consulting the documentation:
 #
 #     https://docs.scrapy.org/en/latest/topics/settings.html
 #     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 BOT_NAME = "my_project"
 SPIDER_MODULES = ["my_project.spiders"]
 NEWSPIDER_MODULE = "my_project.spiders"
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = "my_project (+http://www.yourdomain.com)"
 # Obey robots.txt rules
 ROBOTSTXT_OBEY = True
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
 #CONCURRENT_REQUESTS = 32
 # Configure a delay for requests for the same website (default: 0)
 # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
 # See also autothrottle settings and docs
 #DOWNLOAD_DELAY = 3
 # The download delay setting will honor only one of:
 #CONCURRENT_REQUESTS_PER_DOMAIN = 16
 #CONCURRENT_REQUESTS_PER_IP = 16
 # Disable cookies (enabled by default)
 #COOKIES_ENABLED = False
 # Disable Telnet Console (enabled by default)
 #TELNETCONSOLE_ENABLED = False
 # Override the default request headers:
 #DEFAULT_REQUEST_HEADERS = {
 #    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 #    "Accept-Language": "en",
 #}
 # Enable or disable spider middlewares
 # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 #SPIDER_MIDDLEWARES = {
 #    "my_project.middlewares.MyProjectSpiderMiddleware": 543,
 #}
 # Enable or disable downloader middlewares
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #DOWNLOADER_MIDDLEWARES = {
 #    "my_project.middlewares.MyProjectDownloaderMiddleware": 543,
 #}
 # Enable or disable extensions
 # See https://docs.scrapy.org/en/latest/topics/extensions.html
 #EXTENSIONS = {
 #    "scrapy.extensions.telnet.TelnetConsole": None,
 #}
 # Configure item pipelines
 # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 #ITEM_PIPELINES = {
 #    "my_project.pipelines.MyProjectPipeline": 300,
 #}
 # Enable and configure the AutoThrottle extension (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
 #AUTOTHROTTLE_ENABLED = True
 # The initial download delay
 #AUTOTHROTTLE_START_DELAY = 5
 # The maximum download delay to be set in case of high latencies
 #AUTOTHROTTLE_MAX_DELAY = 60
 # The average number of requests Scrapy should be sending in parallel to
 # each remote server
 #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 # Enable showing throttling stats for every response received:
 #AUTOTHROTTLE_DEBUG = False
 # Enable and configure HTTP caching (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
 #HTTPCACHE_ENABLED = True
 #HTTPCACHE_EXPIRATION_SECS = 0
 #HTTPCACHE_DIR = "httpcache"
 #HTTPCACHE_IGNORE_HTTP_CODES = []
 #HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
 # Set settings whose default value is deprecated to a future-proof value
 REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
 TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
 FEED_EXPORT_ENCODING = "utf-8"
--- a/docker/scrapy_project/my_project/build/lib/my_project/spiders/init.py
+++ b/docker/scrapy_project/my_project/build/lib/my_project/spiders/init.py
@ -0,0 +1,4 @@
 # This package will contain the spiders of your Scrapy project
 #
 # Please refer to the documentation for information on how to create and manage
 # your spiders.
--- a/docker/scrapy_project/my_project/build/lib/my_project/spiders/my_spider.py
+++ b/docker/scrapy_project/my_project/build/lib/my_project/spiders/my_spider.py
@ -0,0 +1,32 @@
 import scrapy
 import re
 class MySpider(scrapy.Spider):
    name = 'my_spider'
    def __init__(self, start_urls=None, *args, **kwargs):
        super(MySpider, self).__init__(*args, **kwargs)
        if start_urls:
            self.start_urls = self.extract_urls(start_urls)
        else:
            self.start_urls = []
    def extract_urls(self, text):
        url_pattern = re.compile(r'(https?://\S+)')
        urls = url_pattern.findall(text)
        return urls
    def parse(self, response):
        page_content = response.text
        page_urls = response.css('a::attr(href)').getall()
        for url in page_urls:
            if url.startswith('http'):
                yield {'url': url, 'content': page_content}
            else:
                yield {'url': response.urljoin(url), 'content': page_content}
        # for url in page_urls:
        #     if url.startswith('http'):
        #         yield scrapy.Request(url, callback=self.parse)
        #     else:
        #         yield scrapy.Request(response.urljoin(url), callback=self.parse)
--- a/docker/scrapy_project/my_project/my_project/init.py
+++ b/docker/scrapy_project/my_project/my_project/init.py
--- a/docker/scrapy_project/my_project/my_project/items.py
+++ b/docker/scrapy_project/my_project/my_project/items.py
@ -0,0 +1,12 @@
 # Define here the models for your scraped items
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/items.html
 import scrapy
 class MyProjectItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    pass
--- a/docker/scrapy_project/my_project/my_project/middlewares.py
+++ b/docker/scrapy_project/my_project/my_project/middlewares.py
@ -0,0 +1,103 @@
 # Define here the models for your spider middleware
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 from scrapy import signals
 # useful for handling different item types with a single interface
 from itemadapter import is_item, ItemAdapter
 class MyProjectSpiderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the spider middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def process_spider_input(self, response, spider):
        # Called for each response that goes through the spider
        # middleware and into the spider.
        # Should return None or raise an exception.
        return None
    def process_spider_output(self, response, result, spider):
        # Called with the results returned from the Spider, after
        # it has processed the response.
        # Must return an iterable of Request, or item objects.
        for i in result:
            yield i
    def process_spider_exception(self, response, exception, spider):
        # Called when a spider or process_spider_input() method
        # (from other spider middleware) raises an exception.
        # Should return either None or an iterable of Request or item objects.
        pass
    def process_start_requests(self, start_requests, spider):
        # Called with the start requests of the spider, and works
        # similarly to the process_spider_output() method, except
        # that it doesn’t have a response associated.
        # Must return only requests (not items).
        for r in start_requests:
            yield r
    def spider_opened(self, spider):
        spider.logger.info("Spider opened: %s" % spider.name)
 class MyProjectDownloaderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the downloader middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def process_request(self, request, spider):
        # Called for each request that goes through the downloader
        # middleware.
        # Must either:
        # - return None: continue processing this request
        # - or return a Response object
        # - or return a Request object
        # - or raise IgnoreRequest: process_exception() methods of
        #   installed downloader middleware will be called
        return None
    def process_response(self, request, response, spider):
        # Called with the response returned from the downloader.
        # Must either;
        # - return a Response object
        # - return a Request object
        # - or raise IgnoreRequest
        return response
    def process_exception(self, request, exception, spider):
        # Called when a download handler or a process_request()
        # (from other downloader middleware) raises an exception.
        # Must either:
        # - return None: continue processing this exception
        # - return a Response object: stops process_exception() chain
        # - return a Request object: stops process_exception() chain
        pass
    def spider_opened(self, spider):
        spider.logger.info("Spider opened: %s" % spider.name)
--- a/docker/scrapy_project/my_project/my_project/pipelines.py
+++ b/docker/scrapy_project/my_project/my_project/pipelines.py
@ -0,0 +1,13 @@
 # Define your item pipelines here
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 # useful for handling different item types with a single interface
 from itemadapter import ItemAdapter
 class MyProjectPipeline:
    def process_item(self, item, spider):
        return item
--- a/docker/scrapy_project/my_project/my_project/settings.py
+++ b/docker/scrapy_project/my_project/my_project/settings.py
@ -0,0 +1,93 @@
 # Scrapy settings for my_project project
 #
 # For simplicity, this file contains only settings considered important or
 # commonly used. You can find more settings consulting the documentation:
 #
 #     https://docs.scrapy.org/en/latest/topics/settings.html
 #     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 BOT_NAME = "my_project"
 SPIDER_MODULES = ["my_project.spiders"]
 NEWSPIDER_MODULE = "my_project.spiders"
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = "my_project (+http://www.yourdomain.com)"
 # Obey robots.txt rules
 ROBOTSTXT_OBEY = True
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
 #CONCURRENT_REQUESTS = 32
 # Configure a delay for requests for the same website (default: 0)
 # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
 # See also autothrottle settings and docs
 #DOWNLOAD_DELAY = 3
 # The download delay setting will honor only one of:
 #CONCURRENT_REQUESTS_PER_DOMAIN = 16
 #CONCURRENT_REQUESTS_PER_IP = 16
 # Disable cookies (enabled by default)
 #COOKIES_ENABLED = False
 # Disable Telnet Console (enabled by default)
 #TELNETCONSOLE_ENABLED = False
 # Override the default request headers:
 #DEFAULT_REQUEST_HEADERS = {
 #    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 #    "Accept-Language": "en",
 #}
 # Enable or disable spider middlewares
 # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 #SPIDER_MIDDLEWARES = {
 #    "my_project.middlewares.MyProjectSpiderMiddleware": 543,
 #}
 # Enable or disable downloader middlewares
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #DOWNLOADER_MIDDLEWARES = {
 #    "my_project.middlewares.MyProjectDownloaderMiddleware": 543,
 #}
 # Enable or disable extensions
 # See https://docs.scrapy.org/en/latest/topics/extensions.html
 #EXTENSIONS = {
 #    "scrapy.extensions.telnet.TelnetConsole": None,
 #}
 # Configure item pipelines
 # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 #ITEM_PIPELINES = {
 #    "my_project.pipelines.MyProjectPipeline": 300,
 #}
 # Enable and configure the AutoThrottle extension (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
 #AUTOTHROTTLE_ENABLED = True
 # The initial download delay
 #AUTOTHROTTLE_START_DELAY = 5
 # The maximum download delay to be set in case of high latencies
 #AUTOTHROTTLE_MAX_DELAY = 60
 # The average number of requests Scrapy should be sending in parallel to
 # each remote server
 #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 # Enable showing throttling stats for every response received:
 #AUTOTHROTTLE_DEBUG = False
 # Enable and configure HTTP caching (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
 #HTTPCACHE_ENABLED = True
 #HTTPCACHE_EXPIRATION_SECS = 0
 #HTTPCACHE_DIR = "httpcache"
 #HTTPCACHE_IGNORE_HTTP_CODES = []
 #HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
 # Set settings whose default value is deprecated to a future-proof value
 REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
 TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
 FEED_EXPORT_ENCODING = "utf-8"
--- a/docker/scrapy_project/my_project/my_project/spiders/init.py
+++ b/docker/scrapy_project/my_project/my_project/spiders/init.py
@ -0,0 +1,4 @@
 # This package will contain the spiders of your Scrapy project
 #
 # Please refer to the documentation for information on how to create and manage
 # your spiders.
--- a/docker/scrapy_project/my_project/my_project/spiders/my_spider.py
+++ b/docker/scrapy_project/my_project/my_project/spiders/my_spider.py
@ -0,0 +1,32 @@
 import scrapy
 import re
 class MySpider(scrapy.Spider):
    name = 'my_spider'
    def __init__(self, start_urls=None, *args, **kwargs):
        super(MySpider, self).__init__(*args, **kwargs)
        if start_urls:
            self.start_urls = self.extract_urls(start_urls)
        else:
            self.start_urls = []
    def extract_urls(self, text):
        url_pattern = re.compile(r'(https?://\S+)')
        urls = url_pattern.findall(text)
        return urls
    def parse(self, response):
        page_content = response.text
        page_urls = response.css('a::attr(href)').getall()
        for url in page_urls:
            if url.startswith('http'):
                yield {'url': url, 'content': page_content}
            else:
                yield {'url': response.urljoin(url), 'content': page_content}
        # for url in page_urls:
        #     if url.startswith('http'):
        #         yield scrapy.Request(url, callback=self.parse)
        #     else:
        #         yield scrapy.Request(response.urljoin(url), callback=self.parse)
--- a/docker/scrapy_project/my_project/project.egg-info/PKG-INFO
+++ b/docker/scrapy_project/my_project/project.egg-info/PKG-INFO
@ -0,0 +1,3 @@
 Metadata-Version: 2.1
 Name: project
 Version: 1.0
--- a/docker/scrapy_project/my_project/project.egg-info/SOURCES.txt
+++ b/docker/scrapy_project/my_project/project.egg-info/SOURCES.txt
@ -0,0 +1,13 @@
 setup.py
 my_project/__init__.py
 my_project/items.py
 my_project/middlewares.py
 my_project/pipelines.py
 my_project/settings.py
 my_project/spiders/__init__.py
 my_project/spiders/my_spider.py
 project.egg-info/PKG-INFO
 project.egg-info/SOURCES.txt
 project.egg-info/dependency_links.txt
 project.egg-info/entry_points.txt
 project.egg-info/top_level.txt
--- a/docker/scrapy_project/my_project/project.egg-info/dependency_links.txt
+++ b/docker/scrapy_project/my_project/project.egg-info/dependency_links.txt
@ -0,0 +1 @@
--- a/docker/scrapy_project/my_project/project.egg-info/entry_points.txt
+++ b/docker/scrapy_project/my_project/project.egg-info/entry_points.txt
@ -0,0 +1,2 @@
 [scrapy]
 settings = my_project.settings
--- a/docker/scrapy_project/my_project/project.egg-info/top_level.txt
+++ b/docker/scrapy_project/my_project/project.egg-info/top_level.txt
@ -0,0 +1 @@
 my_project
--- a/docker/scrapy_project/my_project/scrapy.cfg
+++ b/docker/scrapy_project/my_project/scrapy.cfg
@ -0,0 +1,6 @@
 [settings]
 default = my_project.settings
 [deploy]
 url = http://localhost:6800/
 project = my_project
--- a/docker/scrapy_project/my_project/setup.py
+++ b/docker/scrapy_project/my_project/setup.py
@ -0,0 +1,10 @@
 # Automatically created by: scrapyd-deploy
 from setuptools import setup, find_packages
 setup(
    name         = 'project',
    version      = '1.0',
    packages     = find_packages(),
    entry_points = {'scrapy': ['settings = my_project.settings']},
 )
--- a/frontend/app.js
+++ b/frontend/app.js
@ -0,0 +1,62 @@
 document.getElementById('react-query-form').addEventListener('submit', async (e) => {
    e.preventDefault();
    const queryText = document.getElementById('react-query-text').value;
    const response = await fetch('/react_query', {
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
        },
        body: JSON.stringify({ query: queryText }),
    });
    const data = await response.json();
    const taskId = data.task_id;
    monitorTask(taskId);
 });
 async function monitorTask(taskId) {
    const statusDiv = document.getElementById('results');
    statusDiv.innerHTML = `<p>Task ID: ${taskId}</p><p>Status: Monitoring...</p>`;
    let completed = false;
    while (!completed) {
        const response = await fetch(`/status/${taskId}`);
        const data = await response.json();
        statusDiv.innerHTML = `<p>Task ID: ${taskId}</p><p>Status: ${data.status}</p><p>Current Step: ${data.current_step}</p>`;
        if (data.status === 'completed') {
            completed = true;
            const stepsList = data.steps.map(step => `<li>${step}</li>`).join('');
            statusDiv.innerHTML += `<ul>${stepsList}</ul>`;
            break;
        }
        await new Promise(resolve => setTimeout(resolve, 2000));
    }
 }
 document.getElementById('query-form').addEventListener('submit', async (e) => {
    e.preventDefault();
    const queryText = document.getElementById('query-text').value;
    const agent = document.getElementById('agent-select').value;
    const response = await fetch('/query', {
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
        },
        body: JSON.stringify({ query: queryText, agent }),
    });
    const data = await response.json();
    const resultsDiv = document.getElementById('results');
    resultsDiv.innerHTML = data.results.map(result => `<p>${result.content}</p>`).join('');
 });
 document.getElementById('upload-form').addEventListener('submit', async (e) => {
    e.preventDefault();
    const fileInput = document.getElementById('file-input');
    const formData = new FormData();
    formData.append('file', fileInput.files[0]);
    const response = await fetch('/upload', {
        method: 'POST',
        body: formData,
    });
    const data = await response.json();
    const resultsDiv = document.getElementById('results');
    resultsDiv.innerHTML = `<p>${data.message}</p>`;
 });
--- a/frontend/index.html
+++ b/frontend/index.html
@ -0,0 +1,129 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Agentive RAG System</title>
    <link rel="stylesheet" href="styles.css">
 </head>
 <body>
    <div class="container">
        <h1>Agentive RAG System</h1>
        <form id="query-form">
            <textarea id="query-text" placeholder="Enter your query here..." required></textarea>
            <button type="submit">Submit</button>
        </form>
        <form id="upload-form" enctype="multipart/form-data">
            <input type="file" id="file-input" accept=".txt" required>
            <button type="submit">Upload</button>
        </form>
        <form id="react-query-form">
            <textarea id="react-query-text" placeholder="Enter your query for ReAct..." required></textarea>
            <button type="submit">Submit</button>
        </form>
        <div id="results"></div>
    </div>
    <script>
        const queryForm = document.getElementById('query-form');
        const uploadForm = document.getElementById('upload-form');
        const reactQueryForm = document.getElementById('react-query-form');
        const resultsDiv = document.getElementById('results');
        queryForm.addEventListener('submit', async (event) => {
            event.preventDefault();
            const queryText = document.getElementById('query-text').value;
            const response = await fetch('/query', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify({ query: queryText })
            });
            const data = await response.json();
            displayResults(data.results);
        });
        uploadForm.addEventListener('submit', async (event) => {
            event.preventDefault();
            const fileInput = document.getElementById('file-input');
            const formData = new FormData();
            formData.append('file', fileInput.files[0]);
            const response = await fetch('/upload', {
                method: 'POST',
                body: formData
            });
            const data = await response.json();
            displayResults(data.message);
        });
        reactQueryForm.addEventListener('submit', async (event) => {
            event.preventDefault();
            const queryText = document.getElementById('react-query-text').value;
            const response = await fetch('/react_query', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify({ query: queryText })
            });
            const data = await response.json();
            pollTaskStatus(data.task_id);
        });
        async function pollTaskStatus(taskId) {
            const interval = setInterval(async () => {
                const response = await fetch(`/status/${taskId}`);
                const data = await response.json();
                displayTaskStatus(data);
                if (data.status === 'completed' || data.status.startsWith('failed')) {
                    clearInterval(interval);
                }
            }, 2000);
        }
        function displayResults(results) {
            resultsDiv.innerHTML = '';
            if (Array.isArray(results)) {
                results.forEach(result => {
                    const p = document.createElement('p');
                    p.textContent = result;
                    resultsDiv.appendChild(p);
                });
            } else {
                const p = document.createElement('p');
                p.textContent = results;
                resultsDiv.appendChild(p);
            }
        }
        function displayTaskStatus(data) {
            resultsDiv.innerHTML = `<h3>Status: ${data.status}</h3>`;
            if (data.current_step) {
                const step = document.createElement('p');
                step.textContent = `Current Step: ${data.current_step}`;
                resultsDiv.appendChild(step);
            }
            if (data.steps) {
                const stepsList = document.createElement('ul');
                data.steps.forEach(step => {
                    const li = document.createElement('li');
                    li.textContent = step;
                    stepsList.appendChild(li);
                });
                resultsDiv.appendChild(stepsList);
            }
            if (data.results) {
                const resultsList = document.createElement('ul');
                data.results.forEach(result => {
                    const li = document.createElement('li');
                    li.textContent = result;
                    resultsList.appendChild(li);
                });
                resultsDiv.appendChild(resultsList);
            }
        }
    </script>
 </body>
 </html>
--- a/frontend/styles.css
+++ b/frontend/styles.css
@ -0,0 +1,78 @@
 body {
    font-family: Arial, sans-serif;
    margin: 0;
    padding: 0;
    background-color: #f5f5f5;
 }
 .container {
    width: 90%;
    max-width: 1200px;
    margin: 0 auto;
    padding: 20px;
    background-color: #ffffff;
    box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
 }
 h1 {
    text-align: center;
    color: #333;
 }
 h2 {
    color: #555;
 }
 .form-section {
    margin-bottom: 30px;
 }
 textarea, select, input[type="file"], button {
    width: 100%;
    padding: 10px;
    margin: 10px 0;
    border-radius: 5px;
    border: 1px solid #ccc;
 }
 button {
    background-color: #007BFF;
    color: #fff;
    cursor: pointer;
    border: none;
 }
 button:hover {
    background-color: #0056b3;
 }
 #results {
    margin-top: 30px;
 }
 #results-content {
    background-color: #f9f9f9;
    padding: 20px;
    border: 1px solid #ddd;
    border-radius: 5px;
    white-space: pre-wrap;
 }
 #task-queue {
    margin-top: 30px;
 }
 table {
    width: 100%;
    border-collapse: collapse;
 }
 th, td {
    border: 1px solid #ddd;
    padding: 10px;
    text-align: left;
 }
 th {
    background-color: #f2f2f2;
 }
--- a/samples/FRANCE.txt
+++ b/samples/FRANCE.txt
@ -0,0 +1,125 @@
 {{Short description|Country in Western Europe}}
 {{Hatnote|For other uses, see [[France (disambiguation)]], [[Lafrance (disambiguation)|Lafrance]], or (for prior French Republics) [[French Republics (disambiguation)|French Republics]].}}
 {{pp-vandalism|small=yes}}
 {{Use British English|date=July 2022}}
 {{Use dmy dates|date=March 2022}}
 {{Infobox country
 | conventional_long_name = French Republic
 | common_name            = France
 | native_name            = {{Native name|fr|République française}}
 | image_flag             = Flag of France.svg
 | image_coat             = Arms of the French Republic.svg
 | symbol_width           = 75px
 | symbol_type            = [[Coat of arms of France|Coat of arms]]{{Efn-ur|The current [[Constitution of France]] does not specify a national emblem.<ref>{{Cite constitution|article=II|polity=France|date=1958}}</ref> The [[Fasces|lictor's fasces]] is very often used to represent the French Republic, although today it holds no official status.<ref>{{cite web|url=https://www.elysee.fr/en/french-presidency/the-lictor-s-fasces|date=15 December 2022|title=THE LICTOR'S FASCES|access-date=18 May 2024|archive-date=7 April 2024|archive-url=https://web.archive.org/web/20240407081203/https://www.elysee.fr/en/french-presidency/the-lictor-s-fasces|url-status=live}}</ref> In addition to the coat of arms, France also uses a [[Diplomatic emblem of France|different emblem]] for diplomatic and consular purposes.}}
 | other_symbol           = [[File:Armoiries république française.svg|90px]]
 | other_symbol_type      = [[Diplomatic emblem of France|Diplomatic emblem]]
 | national_motto         = "{{Lang|fr|[[Liberté, égalité, fraternité]]|italics=no}}"
 | englishmotto           = ("Liberty, Equality, Fraternity")
 | national_anthem        = "[[La Marseillaise]]"<br /><div style="display:inline-block;margin-top:0.4em">[[File:La Marseillaise.ogg|alt=sound clip of the Marseillaise French national anthem]]</div>
 | image_map              = {{Switcher|[[File:EU-France (orthographic projection).svg|upright=1.15|frameless]]|France on the globe centred on Europe|[[File:EU-France.svg|upright=1.15|frameless]]|[[Metropolitan France]] (European part of France) in Europe|[[File:France and its region.png|frameless]]|France and its neighbors<!--Map restored per [[WP:CONSENSUS]] in 03:24, 11 July 2023 discussion [[Talk:France#Removal of map]]-->|[[File:Territorial waters - France.svg|upright=1.15|frameless]]|Show France, its overseas territories and [[Exclusive economic zone of France|its exclusive economic zones]]|Labelled map|default=1}}
 | map_caption            = {{Map caption|location_color=blue or dark green|region=Europe|region_color=dark grey|subregion=the European Union|subregion_color=green|unbulleted list|Location of the territory of the (red)|[[Adélie Land]] (Antarctic claim; hatched)}}
 | capital                = [[Paris]]
 | coordinates            = {{Coord|48|51|N|2|21|E|type:city(2,100,000)_region:FR-75C}}
 | largest_city           = capital
 | languages_type         = Official language<br />{{Nobold|and national language}}
 | languages              = [[French language|French]]{{Efn-ur|name=one|For information about regional languages, see [[Languages of France]].}}{{Infobox|child=yes
 | regional_languages = See [[Languages of France]]
 | label1 = Nationality {{Nobold|(2021)<ref>{{cite web |title=L'essentiel sur... les immigrés et les étrangers |url=https://www.insee.fr/fr/statistiques/3633212 |website=[[Insee]] |access-date=9 September 2023 |archive-date=26 June 2019 |archive-url=https://web.archive.org/web/20190626142004/https://www.insee.fr/fr/statistiques/3633212 |url-status=live }}</ref>}}
 | data1 = {{Unbulleted list|92.2% [[French people|French]]|7.8% [[Demographics of France|other]]}}}}
 | religion_ref           = <ref name=religion2020>{{cite web|last1=Drouhot|first1=Lucas|last2=Simon|first2=Patrick|last3=Tiberj|first3=Vincent|url=https://www.insee.fr/fr/statistiques/fichier/6793308/IMMFRA23-D2.pdf|title=La diversité religieuse en France : transmissions intergénérationnelles et pratiques selon les origines|trans-title=Religious diversity in France: Intergenerational transmissions and practices according to the origins|publisher=[[Institut national de la statistique et des études économiques|National Institute of Statistics and Economic Studies]] (INSEE)|type=official statistics|date=30 March 2023|language=fr|archive-url=https://web.archive.org/web/20230330154402/https://www.insee.fr/fr/statistiques/fichier/6793308/IMMFRA23-D2.pdf|archive-date=30 March 2023}}</ref>
 | religion_year          = 2023
 | religion               = Aged 18 - 59 {{ublist|item_style=white-space;|51% [[Irreligion|No Religion]]|38% [[Christianity]]|10% [[Islam]]|0.5% [[Judaism]]|0.5% [[Buddhism]]}}
 | demonym                = French
 | government_type        = Unitary [[Semi-presidential system|semi-presidential republic]]
 | leader_title1          = [[President of France|President]]
 | leader_name1           = [[Emmanuel Macron]]
 | leader_title2          = [[Prime Minister of France|Prime Minister]]
 | leader_name2           = [[Gabriel Attal]]
 | leader_title3          = [[List of presidents of the Senate of France|President of the Senate]]
 | leader_name3           = [[Gérard Larcher]]
 | leader_title4          = [[List of presidents of the National Assembly of France|President of the National Assembly]]
 | leader_name4           = [[Yaël Braun-Pivet]]
 | legislature            = [[French Parliament|Parliament]]
 | upper_house            = [[Senate (France)|Senate]]
 | lower_house            = [[National Assembly (France)|National Assembly]]
 | sovereignty_type       = [[History of France|Establishment]]
 | established_event1     = [[West Francia|Kingdom of the West Franks]] – [[Treaty of Verdun]]
 | established_date1      = 10 August 843
 | established_event2     = [[Kingdom of France]] – [[List of French monarchs#House of Capet (987–1792)|Capetian rulers of France]]
 | established_date2      = 3 July 987
 | established_event3     = [[Proclamation of the abolition of the monarchy|French Republic]] – [[French First Republic]]
 | established_date3      = 22 September 1792
 | established_event4     = [[Enlargement of the European Union#Founding members|Founded]] the [[European Economic Community|EEC]]{{Efn-ur|[[European Union]] since 1993}}
 | established_date4      = 1 January 1958
 | established_event5     = [[Constitution of France|Current&nbsp;constitution]] – [[French Fifth Republic]]
 | established_date5      = 4 October 1958
 | area_km2               = 643,801
 | area_footnote          = <ref name="Field Listing :: Area">{{Cite web |title=Field Listing :: Area |url=https://www.cia.gov/library/publications/the-world-factbook/fields/2147.html |url-status=dead |archive-url=https://web.archive.org/web/20140131115000/https://www.cia.gov/library/publications/the-world-factbook/fields/2147.html |archive-date=31 January 2014 |access-date=1 November 2015 |website=The World Factbook |publisher=CIA}} {{PD-notice}}</ref>
 | area_rank              = 42nd <!-- Area rank should match [[List of countries and dependencies by area]] -->
 | area_sq_mi             = 248,600 <!--Do not remove per [[Wikipedia:Manual of Style/Dates and numbers|WP:MOSNUM]] -->
 | area_label2            = [[Metropolitan France]] ([[Institut géographique national|IGN]])
 | area_data2             = {{Cvt|551695|km2}}{{Efn-ur|name=three|French [[Institut géographique national|National Geographic Institute]] data, which includes bodies of water}} ([[List of countries and dependencies by area|50th]])
 | area_label3            = Metropolitan France ([[Cadastre]])
 | area_data3             = {{Cvt|543940.9|km2}}{{Efn-ur|name=four|French [[Land registration|Land Register]] data, which exclude lakes, ponds and [[glacier]]s larger than 1 km<sup>2</sup> (0.386 sq mi or 247 acres) as well as the estuaries of rivers}}<ref>{{Cite journal |year=2011 |title=France Métropolitaine |url=http://www.insee.fr/fr/themes/comparateur.asp?codgeo=METRODOM-1 |url-status=dead |journal=INSEE |archive-url=https://web.archive.org/web/20150828051307/http://www.insee.fr/fr/themes/comparateur.asp?codgeo=METRODOM-1 |archive-date=28 August 2015}}</ref> ([[List of countries and dependencies by area|50th]])
 | population_estimate    = {{IncreaseNeutral}} 68,373,433<ref name="pop_est">{{Cite web |date=16 January 2023 |title=Bilan démographique 2023 – Composantes de la croissance démographique, France |url=https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-1 |access-date=2024-02-02 |website=Insee |archive-date=18 January 2024 |archive-url=https://web.archive.org/web/20240118223724/https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-1 |url-status=live }}</ref>
 | percent_water          = 0.86<ref>{{Cite web |title=Surface water and surface water change |url=https://stats.oecd.org/Index.aspx?DataSetCode=SURFACE_WATER |access-date=11 October 2020 |publisher=Organisation for Economic Co-operation and Development (OECD) |archive-date=24 March 2021 |archive-url=https://web.archive.org/web/20210324133453/https://stats.oecd.org/Index.aspx?DataSetCode=SURFACE_WATER |url-status=live }}</ref>
 | population_estimate_year = January 2024
 | population_estimate_rank = 20th
 | population_label2      = Density
 | population_data2       = {{Pop density|68373433|643801|km2}} ([[List of countries and territories by population density|106th]])
 | population_label3      = Metropolitan France, estimate {{As of|lc=y|January 2024}}
 | population_data3       = {{IncreaseNeutral}} 66,142,961<ref>{{Cite web |date=16 January 2024 |title=Bilan démographique 2023 – Composantes de la croissance démographique, France métropolitaine |url=https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-3 |access-date=2024-02-02 |website=Insee |archive-date=18 January 2024 |archive-url=https://web.archive.org/web/20240118223724/https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-3 |url-status=live }}</ref> ([[List of countries and dependencies by population|23rd]])
 | population_density_km2 = 122
 | population_density_sq_mi = 313 <!-- Do not remove per [[Wikipedia:Manual of Style/Dates and numbers|WP:MOSNUM]] -->
 | population_density_rank = 89th
 | GDP_PPP                           = {{increase}} $3.988 trillion<ref name="IMFWEO.FR">{{cite web |url=https://www.imf.org/en/Publications/WEO/weo-database/2024/April/weo-report?c=132,&s=NGDPD,PPPGDP,NGDPDPC,PPPPC,&sy=2022&ey=2029&ssm=0&scsm=1&scc=0&ssd=1&ssc=0&sic=0&sort=country&ds=.&br=1 |title=World Economic Outlook Database, April 2024 Edition. (France) |publisher=[[International Monetary Fund]] |website=www.imf.org |date=16 April 2024 |access-date=16 April 2024}}</ref>
 | GDP_PPP_year                   = 2024
 | GDP_PPP_rank                   = 10th
 | GDP_PPP_per_capita          = {{increase}} $60,339<ref name="IMFWEO.FR" />
 | GDP_PPP_per_capita_rank = 26th
 | GDP_nominal                      = {{increase}} $3.130 trillion<ref name="IMFWEO.FR" />
 | GDP_nominal_year             = 2024
 | GDP_nominal_rank             = 7th
 | GDP_nominal_per_capita   = {{increase}} $47,359<ref name="IMFWEO.FR" />
 | GDP_nominal_per_capita_rank = 23rd
 | Gini                   = 29.8 <!-- number only -->
 | Gini_year              = 2022
 | Gini_change            = increase <!-- increase/decrease/steady -->
 | Gini_ref               = <ref name="eurogini">{{Cite web |title=Gini coefficient of equivalised disposable income – EU-SILC survey |url=https://ec.europa.eu/eurostat/databrowser/view/tessi190/default/table?lang=en |access-date=25 November 2023 |website=ec.europa.eu |publisher=[[Eurostat]] |archive-date=9 October 2020 |archive-url=https://web.archive.org/web/20201009091832/https://ec.europa.eu/eurostat/databrowser/view/tessi190/default/table?lang=en |url-status=live }}</ref>
 | HDI                    = 0.910<!-- number only -->
 | HDI_year               = 2022 <!-- Please use the year to which the data refers, not the publication year -->
 | HDI_change             = steady <!-- increase/decrease/steady -->
 | HDI_ref                = <ref name="UNHDR">{{cite web|url=https://hdr.undp.org/system/files/documents/global-report-document/hdr2023-24reporten.pdf|title=Human Development Report 2023/24|language=en|publisher=[[United Nations Development Programme]]|date=13 March 2024|page=288|access-date=13 March 2024|archive-date=13 March 2024|archive-url=https://web.archive.org/web/20240313164319/https://hdr.undp.org/system/files/documents/global-report-document/hdr2023-24reporten.pdf|url-status=live}}</ref>
 | HDI_rank               = 28th
 | currency               = {{Unbulleted list
 | [[Euro]] ([[Euro sign|€]]) ([[ISO 4217|EUR]]){{Efn-ur|name=six|Whole of the except the overseas territories in the Pacific Ocean}}
 | [[CFP franc]] (XPF){{Efn-ur|name=seven|French overseas territories in the Pacific Ocean only}}
 }}
 | time_zone              = [[Central European Time]]
 | utc_offset             = +1
 | utc_offset_DST         = +2
 | time_zone_DST          = [[Central European Summer Time]]{{Efn-ur|name=eight|Daylight saving time is observed in metropolitan France and [[Saint Pierre and Miquelon]] only.}}
 | DST_note               = Note: Various other time zones are observed in overseas France.{{Efn-ur|name=nine|Time zones across the span from UTC−10 ([[French Polynesia]]) to UTC+12 ([[Wallis and Futuna]])}}<br /> Although France is in the [[Coordinated Universal Time|UTC]] (Z) ([[Western European Time]]) zone, [[UTC+01:00]] ([[Central European Time]]) was enforced as the standard time since 25 February 1940, upon [[German military administration in occupied France during World War II|German occupation in WW2]], with a +0:50:39 offset (and +1:50:39 during [[Daylight saving time|DST]]) from Paris [[Local mean time|LMT]] (UTC+0:09:21).<ref>{{Cite web |title=Time Zone & Clock Changes in Paris, Île-de-France, France |url=https://www.timeanddate.com/time/zone/france/paris |access-date=9 October 2021 |website=timeanddate.com |archive-date=23 October 2021 |archive-url=https://web.archive.org/web/20211023233753/https://www.timeanddate.com/time/zone/france/paris |url-status=live }}</ref>
 | date_format            = dd/mm/yyyy ([[Anno Domini|AD]])
 | drives_on              = right
 | calling_code           = [[Telephone numbers in France|+33]]{{Efn-ur|name=eleven|The overseas regions and collectivities form part of the [[Telephone numbers in France|French telephone numbering plan]], but have their own country calling codes: [[Guadeloupe]] +590; [[Martinique]] +596; [[French Guiana]] +594; [[Réunion]] and [[Mayotte]] +262; [[Saint Pierre and Miquelon]] +508. The overseas territories are not part of the French telephone numbering plan; their country calling codes are: [[New Caledonia]] +687; [[French Polynesia]] +689; [[Wallis and Futuna]] +681.}}
 | cctld                  = [[.fr]]{{Efn-ur|name=ten|In addition to [[.fr]], several other Internet TLDs are used in French overseas ''départements'' and territories: [[.re]], [[.mq]], [[.gp]], [[.tf]], [[.nc]], [[.pf]], [[.wf]], [[.pm]], [[.gf]] and [[.yt]]. France also uses [[.eu]], shared with other members of the European Union. The [[.cat]] domain is used in [[Catalan Countries|Catalan-speaking territories]].}}
 | footnotes              = Source gives area of metropolitan France as 551,500 km<sup>2</sup> (212,900 sq mi) and lists overseas regions separately, whose areas sum to 89,179 km<sup>2</sup> (34,432 sq mi). Adding these give the total shown here for the entire French Republic. [[The World Factbook]] reports the total as 643,801 km<sup>2</sup> (248,573 sq mi).
 | flag_p1                = Flag of France (1794–1815, 1830–1974, 2020–present).svg
 }}
 '''France''',{{efn|{{IPA|fr|fʁɑ̃s|lang|LL-Q150 (fra)-Fhala.K-France.wav}}<!-- Do not add English pronunciation per [[Wikipedia:Manual of Style/Lead Section]]. -->}} officially the '''French Republic''',{{efn|{{Lang-fr|link=no|République française}} {{IPA-fr|ʁepyblik fʁɑ̃sɛːz|}}}} is a country located primarily in [[Western Europe]]. It also includes [[Overseas France|overseas regions and territories]] in the [[Americas]] and the [[Atlantic Ocean|Atlantic]], [[Pacific Ocean|Pacific]] and [[Indian Ocean|Indian]] oceans,{{Efn-ur|name=twelve|[[French Guiana]] is in South America; [[Guadeloupe]] and [[Martinique]] are in the Caribbean Sea; and [[Réunion]] and [[Mayotte]] are in the Indian Ocean, off the coast of Africa. All five [[Administrative divisions of France|are considered integral parts of the French Republic]]. France also comprises [[Saint Pierre and Miquelon]] in North America; [[Saint Barthélemy]] and [[Saint Martin (island)|Saint Martin]] in the Caribbean; [[French Polynesia]], [[New Caledonia]], [[Wallis and Futuna]] and [[Clipperton Island]] in the Pacific Ocean; and the [[French Southern and Antarctic Lands]].}} giving it one of the largest discontiguous [[exclusive economic zone]]s in the world. [[Metropolitan France]] shares borders with [[Belgium]] and [[Luxembourg]] to the north, [[Germany]] to the north east, [[Switzerland]] to the east, [[Italy]] and [[Monaco]] to the south east, [[Andorra]] and [[Spain]] to the south, and a maritime border with the [[United Kingdom]] to the north west. Its metropolitan area extends from the [[Rhine]] to the Atlantic Ocean and from the [[Mediterranean Sea]] to the [[English Channel]] and the [[North Sea]]. Its overseas territories include [[French Guiana]] in [[South America]], [[Saint Pierre and Miquelon]] in the North Atlantic, the [[French West Indies]], and many islands in [[Oceania]] and the [[Indian Ocean]]. Its [[Regions of France|eighteen integral regions]] (five of which are overseas) span a combined area of {{Cvt|643801|km2}} and have a total population of 68.4 million {{As of|2024|January|lc=y}}.<ref name="Field Listing :: Area"/><ref name=pop_est/> France is a [[Unitary state|unitary]] [[Semi-presidential system|semi-presidential]] [[republic]] with its capital in [[Paris]], the [[List of communes in France with over 20,000 inhabitants|country's largest city]] and main cultural and commercial centre; other major [[Urban area (France)|urban areas]] include [[Marseille]], [[Lyon]], [[Toulouse]], [[Lille]], [[Bordeaux]], [[Strasbourg]], [[Nantes]] and [[Nice]].
 Metropolitan France was settled during the [[Iron Age]] by [[List of ancient Celtic peoples and tribes|Celtic tribes]] known as [[Gauls]] before [[Roman Gaul|Rome annexed the area]] in 51 BC, leading to a distinct [[Gallo-Roman culture]]. In the [[Early Middle Ages]], the [[Germanic peoples|Germanic]] [[Franks]] formed the Kingdom of [[Francia]], which became the heartland of the [[Carolingian Empire]]. The [[Treaty of Verdun]] of 843 partitioned the empire, with [[West Francia]] evolving into the [[Kingdom of France]]. In the [[High Middle Ages]], France was a powerful but decentralized [[Feudalism|feudal]] kingdom, but from the mid-14th to the mid-15th centuries, France was plunged into a dynastic conflict with [[Kingdom of England|England]] known as the [[Hundred Years' War]]. In the 16th century, the [[French Renaissance]] saw culture flourish and a [[French colonial empire]] rise.<ref name=":8">{{Cite book |url=https://books.google.com/books?id=UX8aeX_Lbi4C&pg=PA1 |title=Memory, Empire, and Postcolonialism: Legacies of French Colonialism |publisher=Lexington Books |year=2005 |isbn=978-0-7391-0821-5 |editor-last=Hargreaves, Alan G. |page=1}}</ref> Internally, France was dominated by the conflict with the [[House of Habsburg]] and the [[French Wars of Religion]] between [[Catholics]] and [[Huguenots]]. France was successful in the [[Thirty Years' War]] and further increased its influence during the reign of [[Louis XIV]].<ref>{{Cite book |last1=R.R. Palmer |url=https://archive.org/details/historyofmodernw00palm |title=A History of the Modern World |last2=Joel Colton |year=1978 |edition=5th |page=[https://archive.org/details/historyofmodernw00palm/page/161 161] |url-access=registration}}</ref>
 The [[French Revolution]] of 1789 overthrew the {{Lang|fr|[[Ancien Régime]]|italic=no}} and produced the [[Declaration of the Rights of Man and of the Citizen|Declaration of the Rights of Man]], which expresses the nation's ideals to this day. France reached its political and military zenith in the early 19th century under [[Napoleon|Napoleon Bonaparte]], subjugating part of continental Europe and establishing the [[First French Empire]]. The [[French Revolutionary Wars|French Revolutionary]] and [[Napoleonic Wars]] significantly shaped the course of European history. The collapse of the empire initiated a period of relative decline, in which France endured the [[Bourbon Restoration]] until the founding of the [[French Second Republic]] which was succeeded by the [[Second French Empire]] upon [[Napoleon III]]'s takeover. His empire collapsed during the [[Franco-Prussian War]] in 1870. This led to the establishment of the [[Third French Republic]] Subsequent decades saw a period of economic prosperity and cultural and scientific flourishing known as the [[Belle Époque]]. France was one of the [[Triple Entente|major participants]] of [[World War I]], from which [[Treaty of Versailles|it emerged victorious]] at great human and economic cost. It was among the [[Allies of World War II|Allied powers]] of [[World War II]], but it surrendered and [[German military administration in occupied France during World War II|was occupied]] by the [[Axis powers|Axis]] in 1940. Following [[Liberation of France|its liberation in 1944]], the short-lived [[French Fourth Republic|Fourth Republic]] was established and later dissolved in the course of the defeat in the [[Algerian War]] and [[Rif War|Moroccan War of Independence]]. The current [[French Fifth Republic|Fifth Republic]] was formed in 1958 by [[Charles de Gaulle]]. Algeria and most French colonies became independent in the 1960s, with the majority retaining [[Françafrique|close economic and military ties with France]].
 France retains its centuries-long status as a global centre [[French art|of art]], [[Science and technology in France|science]], and [[French philosophy|philosophy]]. [[List of World Heritage Sites in France|It hosts]] the [[World Heritage Sites by country|third-largest]] number of [[UNESCO]] [[World Heritage Site]]s and is the world's [[World Tourism rankings|leading tourist destination]], receiving over 89&nbsp;million foreign [[Tourism in France|visitors in 2018]].<ref>{{Cite web |date=17 May 2019 |title=France posts new tourist record despite Yellow Vest unrest |url=https://www.france24.com/en/20190517-france-tourism-record-number-visitors-tourists-despite-yellow-vests-paris |website=France 24 |access-date=18 May 2024 |archive-date=12 May 2023 |archive-url=https://web.archive.org/web/20230512192740/https://www.france24.com/en/20190517-france-tourism-record-number-visitors-tourists-despite-yellow-vests-paris |url-status=live }}</ref> France is a [[developed country]] with a [[List of countries by GDP (nominal) per capita|high nominal per capita income globally]] and [[Economy of France|its advanced economy]] ranks among the [[List of countries by GDP (nominal)|largest in the world]]. It is a [[great power]] in global affairs,<ref>Jack S. Levy, ''War in the Modern Great Power System, 1495–1975'', (2014) p. 29</ref> being one of the five [[permanent members of the United Nations Security Council]] and an official [[List of states with nuclear weapons|nuclear-weapon state]]. France is a founding and [[Big Four (Western Europe)|leading]] [[Member state of the European Union|member of the European Union]] and the [[eurozone]],<ref name="superficy" /> as well as a key member of the [[Group of Seven]], [[NATO|North Atlantic Treaty Organization]] (NATO), [[OECD|Organisation for Economic Co-operation and Development]] (OECD), and [[Organisation internationale de la Francophonie|Francophonie]].
 ==Etymology and pronunciation==
 {{Main|Name of France}}
 Originally applied to the whole [[Francia|Frankish Empire]], the name ''France'' comes from the [[Latin]] {{Lang|la|[[Francia]]}}, or "realm of the [[Franks]]".<ref>{{Cite web |title=History of France |url=http://www.discoverfrance.net/France/History/DF_history.shtml |url-status=dead |archive-url=https://web.archive.org/web/20110824051936/http://www.discoverfrance.net/France/History/DF_history.shtml |archive-date=24 August 2011 |access-date=17 July 2011 |publisher=Discoverfrance.net}}</ref> Modern France is still named today {{Lang|it|Francia}} in Italian and Spanish, while {{Lang|de|Frankreich}} in German, {{Lang|nl|Frankrijk}} in Dutch and {{Lang|sv|Frankrike}} in Swedish and Norwegian all mean "Land/realm of the Franks".
 The [[name of the Franks]] is related to the English word ''frank'' ("free"): the latter stems from the [[Old French]] {{Lang|ang|franc}} ("free, noble, sincere"), ultimately from [[Medieval Latin]] ''francus'' ("free, exempt from service; freeman, Frank"), a generalisation of the tribal name that emerged as a [[Late Latin]] borrowing of the reconstructed [[Frankish language|Frankish]] [[Endonym and exonym|endonym]] {{Lang|frk|*Frank}}.<ref>Examples: {{Cite encyclopedia |title=frank |encyclopedia=American Heritage Dictionary}} {{Cite encyclopedia|title=frank|encyclopedia=Webster's Third New International Dictionary}} And so on.</ref><ref name=":0"/> It has been suggested that the meaning "free" was adopted because, after the conquest of [[Gaul]], only Franks were free of taxation,<ref>{{Cite book |first=Michel |last=Rouche |title=A History of Private Life: From Pagan Rome to Byzantium |publisher=Belknap Press |year=1987 |isbn=978-0-674-39974-7 |editor-first=Paul |editor-last=Veyne |page=425 |chapter=The Early Middle Ages in the West |oclc=59830199}}</ref> or more generally because they had the status of freemen in contrast to servants or slaves.<ref name=":0"/> The etymology of ''*Frank'' is uncertain. It is traditionally derived from the [[Proto-Germanic language|Proto-Germanic]] word {{Lang|gem-x-proto|frankōn}}, which translates as "javelin" or "lance" (the throwing axe of the Franks was known as the ''[[francisca]]''),<ref>{{Cite book |last1=Tarassuk |first1=Leonid |url=https://books.google.com/books?id=UJbyPwAACAAJ |title=The Complete Encyclopedia of Arms and Weapons: the most comprehensive reference work ever published on arms and armor from prehistoric times to the present with over 1,250 illustrations |last2=Blair |first2=Claude |publisher=Simon & Schuster |year=1982 |isbn=978-0-671-42257-8 |page=186 |access-date=5 July 2011}}</ref> although these weapons may have been named because of their use by the Franks, not the other way around.<ref name=":0">{{Cite web |title=Origin and meaning of Frank |url=https://www.etymonline.com/word/frank |website=Online Etymology Dictionary |language=en |access-date=18 May 2024 |archive-date=15 May 2024 |archive-url=https://web.archive.org/web/20240515001926/https://www.etymonline.com/word/frank |url-status=live }}</ref>
 In English, 'France' is pronounced {{IPAc-en|f|r|æ|n|s}} {{Respell|FRANSS}} in American English and {{IPAc-en|f|r|ɑː|n|s}} {{Respell|FRAHNSS}} or {{IPAc-en|f|r|æ|n|s}} {{Respell|FRANSS}} in British English. The pronunciation with {{IPAc-en|ɑː}} is mostly confined to accents with the [[Trap–bath split|trap-bath split]] such as [[Received Pronunciation]], though it can be also heard in some other dialects such as [[Cardiff English]], in which {{IPAc-en|f|r|ɑː|n|s}} is in free variation with {{IPAc-en|f|r|æ|n|s}}.<ref>{{Cite book |last=Wells |first=John C. |title=Longman Pronunciation Dictionary |publisher=Longman |year=2008 |isbn=978-1-4058-8118-0 |edition=3rd}}; {{Cite book|last1=Collins|first1=Beverley|last2=Mees|first2=Inger M.|editor-last1=Coupland|editor-first1=Nikolas|editor-last2=Thomas|editor-first2=Alan Richard|year=1990|title=English in Wales: Diversity, Conflict, and Change|chapter=The Phonetics of Cardiff English|publisher=Multilingual Matters Ltd.|page=96|isbn=978-1-85359-032-0|url=https://books.google.com/books?id=tPwYt3gVbu4C}}</ref>
--- a/samples/Polar
+++ b/samples/Polar
--- a/samples/ReActPrompts.txt
+++ b/samples/ReActPrompts.txt
@ -0,0 +1,67 @@
 Sure, here are 10 ReAct queries that would use at least 4 agents in some sequence:
 1. **Query**: "Generate a research outline on the effects of climate change on polar bear populations, including recent studies, statistical data, and a summary of findings."
    - Agents: Research Paper Finder, Data Summarizer, Statistical Analysis, Research Outline Generator
 2. **Query**: "Create a literature review on machine learning applications in healthcare, with a summary of each paper and a keyword extraction for the main topics."
    - Agents: Research Paper Finder, Literature Review Organizer, Data Summarizer, Keyword Extractor
 3. **Query**: "Design an experimental study on the impact of social media on teenagers' mental health, including hypothesis generation, methodology advice, and survey questions."
    - Agents: Hypothesis Generator, Methodology Advisor, Experimental Design Helper, Survey Designer
 4. **Query**: "Find funding opportunities for a research project on renewable energy technologies, generate a thesis statement, and create a bibliography for the related literature."
    - Agents: Funding Finder, Research Paper Finder, Thesis Statement Generator, Bibliography Manager
 5. **Query**: "Analyze the statistical significance of data from a survey on remote work productivity, generate graphs, and provide a summary of the findings."
    - Agents: Data Cleaning Agent, Statistical Analysis, Graph Generator, Data Summarizer
 6. **Query**: "Create a comprehensive research outline on the impact of diet on cardiovascular health, including a literature review, data summary, and keyword extraction."
    - Agents: Research Paper Finder, Research Outline Generator, Data Summarizer, Keyword Extractor
 7. **Query**: "Design a study on the effectiveness of different teaching methods in elementary education, including hypothesis generation, methodology advice, and statistical analysis."
    - Agents: Hypothesis Generator, Methodology Advisor, Experimental Design Helper, Statistical Analysis
 8. **Query**: "Generate a citation for a recent article on quantum computing, summarize the article, and include it in a bibliography for a related research project."
    - Agents: Citation Generator, Research Paper Finder, Data Summarizer, Bibliography Manager
 9. **Query**: "Develop a research proposal on the socio-economic impacts of urbanization, including literature review, hypothesis generation, and data summary."
    - Agents: Research Paper Finder, Literature Review Organizer, Hypothesis Generator, Data Summarizer
 10. **Query**: "Create a detailed research outline for a study on the psychological effects of video games on adolescents, including a survey design and a review of recent studies."
    - Agents: Research Paper Finder, Research Outline Generator, Survey Designer, Literature Review Organizer
 These queries are designed to invoke multiple agents in a sequence, allowing you to test the integration and functionality of your ReAct system effectively.
 Sure, here are ten more queries that utilize the ReAct paradigm, showcasing the various agents and tasks:
 1. **Query**: Develop a comprehensive research plan to investigate the impact of climate change on polar bear populations, including data analysis and a visualization of temperature trends.
   - **Agents/Tasks**: Research Paper Finder, Data Summarizer, Statistical Analysis, Graph Generator, Research Outline Generator.
 2. **Query**: Create a detailed marketing plan for a new tech startup, including a competitor analysis, keyword extraction, and a list of potential investors.
   - **Agents/Tasks**: Research Paper Finder, Keyword Extractor, Data Summarizer, Funding Finder, Research Outline Generator.
 3. **Query**: Design a user study to evaluate the usability of a new mobile app, including survey questions, statistical analysis, and a literature review on similar studies.
   - **Agents/Tasks**: Survey Designer, Research Paper Finder, Literature Review Organizer, Statistical Analysis, Research Outline Generator.
 4. **Query**: Compile a comprehensive report on the latest advancements in renewable energy technologies, including citations and a summary of key findings.
   - **Agents/Tasks**: Research Paper Finder, Citation Generator, Data Summarizer, Research Outline Generator, Keyword Extractor.
 5. **Query**: Plan an experimental study on the effects of sleep deprivation on cognitive performance, including a survey design and a review of relevant methodologies.
   - **Agents/Tasks**: Research Paper Finder, Survey Designer, Methodology Advisor, Research Outline Generator, Experimental Design Helper.
 6. **Query**: Create a systematic review of the literature on artificial intelligence in healthcare, including keyword extraction and a summary of major trends.
   - **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Keyword Extractor, Data Summarizer, Research Outline Generator.
 7. **Query**: Develop a thesis proposal on the economic impact of remote work, including a literature review, hypothesis generation, and a bibliography.
   - **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Hypothesis Generator, Research Outline Generator, Bibliography Manager.
 8. **Query**: Generate a detailed report on the effects of social media on mental health, including data cleaning, statistical analysis, and visualization of the findings.
   - **Agents/Tasks**: Research Paper Finder, Data Cleaning Agent, Statistical Analysis, Graph Generator, Data Summarizer.
 9. **Query**: Design a comprehensive survey to study consumer preferences for electric vehicles, including a methodology overview and a review of similar studies.
   - **Agents/Tasks**: Survey Designer, Research Paper Finder, Methodology Advisor, Literature Review Organizer, Research Outline Generator.
 10. **Query**: Create a funding proposal for a project on sustainable agriculture practices, including a literature review, a list of potential funding sources, and a bibliography.
    - **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Funding Finder, Research Outline Generator, Bibliography Manager.
 These queries utilize multiple agents and tasks to demonstrate the ReAct paradigm, highlighting the interplay between different agents and the sequential nature of the process.
--- a/samples/State
+++ b/samples/State
@ -0,0 +1,356 @@
 THE STATE OF THE
 POLAR BEAR REPORT
 2023
 The Global Warming Policy Foundation
 Briefing 67
 Susan Crockford
 The State of the Polar Bear Report 2023
 Susan Crockford
 Briefing 67, The Global Warming Policy Foundation
 © Copyright 2024, The Global Warming Policy Foundation
 iii
 Contents
 About the author iii
 Foreword v
 Executive summary v
 1. Introduction 1
 2. Conservation status 1
 3. Population trends 5
 4. Habitat and primary productivity 6
 5. Human/bear interactions 11
 6. Discussion 14
 Bibliography 16
 Notes 24
 About the Global Warming Policy Foundation 26
 About the author
 Dr Susan Crockford is an evolutionary biologist and has been working for more than 40 years in
 archaeozoology, paleozoology and forensic zoology.1
 She is a former adjunct professor at the
 University of Victoria, British Columbia and works full time for a private consulting company she
 co-owns (Pacific Identifications Inc). She is the author of Eaten: A Novel (a science-based polar bear
 attack thriller), Polar Bear Facts and Myths (for ages seven and up, also available in French, German,
 Dutch, Portuguese, and Norwegian), Polar Bears Have Big Feet (for preschoolers), and several
 fully referenced books including, Polar Bear Evolution: A Model for the Origin of Species, Sir David
 Attenborough and the Walrus Deception, The Polar Bear Catastrophe That Never Happened, and Polar
 Bears: Outstanding Survivors of Climate Change,2 as well as a scientific paper on polar bear conservation status and a peer-reviewed paper on the distribution of ancient polar bear remains.3
 She has
 authored several earlier briefing papers, reports, and videos for GWPF, as well as opinion pieces for
 major news outlets, on polar bear and walrus ecology and conservation.4
 Susan Crockford blogs
 at www.polarbearscience.com. 
 v
 Foreword
 This report is intended to provide
 a brief update on the habitat and
 conservation status of polar bears,
 with commentary regarding inconsistencies and sources of bias found
 in recent literature that won’t be
 found elsewhere. It is a summary
 of the most recent information
 on polar bears, relative to historical records, based on a review of
 2023 scientific literature and media
 reports, and, in places, reiterates or
 updates information provided in
 previous papers. This publication
 is intended for a wide audience,
 including scientists, teachers,
 students, decision-makers, and
 members of the general public
 interested in polar bears and the
 Arctic sea ice environment.
 Executive summary
 2023 marked 50 years of international cooperation to protect
 polar bears across the Arctic.
 Those efforts should be hailed as
 a conservation success story: from
 late-1960s population estimate by
 the US Fish and Wildlife Service of
 about 12,000 individuals, numbers
 have almost tripled, to just over
 32,000 in 2023 (with a wide range
 of potential error for both estimates).
 • There were no reports from
 the Arctic in 2023 indicating polar
 bears were being harmed due to
 lack of suitable habitat, in part
 because Arctic sea ice in summer
 has not declined since 2007.
 • Contrary to expectations, a
 study in Svalbard found a decrease
 in polar bears killed in defense of
 life or property over the last 40
 years, despite profound declines in
 sea ice over the last two decades.
 • A survey of Southern Hudson
 Bay polar bears in 2021 showed
 an astonishing 30% increase over
 five years, which adds another 223
 bears to the global total.
 • A concurrent survey of Western
 Hudson Bay polar bears in 2021
 showed that numbers had not
 declined since 2011, which also
 means they have not declined
 since 2004. Movement of polar
 bears across the boundaries with
 neighbouring subpopulations
 may account for the appearance
 of a decline, when none actually
 occurred.
 • The IUCN Polar Bear Specialist
 Group has ignored a 2016 recommendation that the boundaries
 of three Hudson Bay subpopulations (Western HB, Southern HB,
 and Foxe Basin) be adjusted to
 account for genetic distinctiveness
 of bears inhabiting the Hudson Bay
 region. A similar boundary issue
 in the western Arctic between the
 Chukchi Sea, and the Southern and
 Northern Beaufort subpopulations,
 based on known movements of
 bears between regions, has been
 acknowledged since 2014 but has
 not yet been resolved.
 • The US Fish and Wildlife Service
 and the IUCN Polar Bear Specialist
 Group, in their 2023 reports, failed
 to officially acknowledge the newfound South-East Greenland bears
 as the 20th subpopulation, despite
 undisputed evidence that this is a
 genetically distinct and geographically isolated group. Numbers are
 estimated at 234 individuals.
 1
 1. Introduction
 Fifty years ago, on 15 November 1973, the five Arctic nations of
 Canada, Russia, the USA, Norway and Greenland signed an international treaty to protect polar bears against the rampant overhunting
 that had taken place in the first half of the 20th century, and which
 had decimated many subpopulations. The treaty was effective, and
 by the late 1990s, polar bear populations that could be studied had at
 least doubled, making it a huge conservation success story. However,
 in 2009, the wording of the treaty was amended to protect the bears
 against on-going and future loss of sea ice habitat , which was
 assumed to be caused by human-generated greenhouse gas emissions. This was in line with similar declarations by the International
 Union for the Conservation of Nature (IUCN) and the US Endangered
 Species Act (USESA). These pessimistic conservation assessments,
 based on computer-modelled future declines rather than observed
 conditions, have been upheld ever since, even as the predicted relationship between polar bear survival and sea-ice loss has failed to
 emerge in the observational data.5
 The current population of polar bears is large, and their historical range has not diminished due to habitat loss since 1979. Indeed,
 previously inhabited areas have been recolonised as numbers have
 recovered: recent data suggest that territory in Davis Strait used before
 1970 during the summer ice-free period – by all ages and by pregnant
 females for maternity denning – is now being used once again.6.
 2. Conservation status
 The IUCN, in their 2015 Red List assessment, provided by the Polar
 Bear Specialist Group (PBSG), again listed polar bears as ‘vulnerable’
 to extinction, just as it did in 2006. Similarly, in 2023 the US Fish and
 Wildlife Service (USFWS) upheld its 2008 conclusion that polar bears
 were ‘threatened’ with extinction. In both instances, conservation
 status assessments have been based on computer-modelled predictions of future sea-ice conditions and assumed resultant population
 declines rather than current conditions.7
 In Canada, the 2018 COSEWIC report assigned a status of ‘special
 concern’ to the species. This assessment had not changed by 2023.8
 3. Population size at 2023
 Global
 The latest official estimate for the global population, from 17 October
 2023, is the PBSG estimate of 26,000 (range 22,000–31,000), arrived
 at in 2015 and unadjusted since then. In their 2023 assessment, the
 PBSG has failed to increase this estimate, even to account for undisputed, statistically-significant increases in two subpopulations and
 the addition of a newly-discovered subpopulation, which should reasonably boost their very conservative mid-point estimate to about
 26,600: Kane Basin is up by 133, Southern Hudson Bay is up by 223,
 and newly-discovered SE Greenland adds another 234.9
 2
 However, survey results postdating preparation of the 2015
 assessment, including those made public after July 2021 (for
 Davis Strait, Chukchi Sea, SE Greenland, Western Hudson Bay, and
 Southern Hudson Bay), plausibly brought the mid-point total to
 just over 32,000 (Figure 1).10
 A plan to survey all Russian polar bear subpopulations between
 2021 and 2023 seems to have been put on hold. In addition, an
 abundance estimate for the Viscount Melville subpopulation in the
 western Canadian Arctic has still not been released, even though
 a three-year survey completed in 2014 has produced other published data.11 Surveys of Lancaster Sound and East Greenland
 were completed in spring 2023, and these results, when released,
 may put the global population mid-point estimate well above
 32,000.12 While there is a wide margin of potential error attached
 to this number, it is nowhere near the figure of 7,493 (6,660–8,325),
 implicit in the prediction that two thirds of the global population
 of polar bears would be gone by now, given the sea ice conditions
 prevailing since 2007.13
 Note that the 2023 USFWS Status Report did not include the
 Kara Sea estimate of 3,200 bears, the Laptev Sea estimate of 1,000
 bears, or the East Greenland estimate of 2,000 bears, figures that
 were used for the 2015 IUCN assessment. It also used the lowest
 of three available 2016 estimates for the Chukchi Sea, as discussed
 in the State of the Polar Bear Report 2021.
 14 Although the USFWS
 report was published in August 2023, it also did not include results
 of the 2021 assessments of the Western and Southern Hudson
 Bay subpopulations that were published in November 2022, or
 the newly-discovered subpopulation of South East Greenland
 reported in June 2022.15.
 Figure1: Estimates of the
 global polar bear population, 1960 to date.
 60
 40
 20
 0
 1960
 000s
 1980 2000 2020
 3
 Subpopulation survey results published in 2022
 For detailed discussions of the changing status and abundance
 issues over time for all 19 subpopulations, see the State of the Polar
 Bear Report 2018.
 16
 Western Hudson Bay (WH)
 An aerial survey conducted in September 2021 generated a new
 subpopulation estimate of 618 (range 385–852), an apparent
 decline of 27% since the 2016 estimate of 842 (range 562–1121)
 and about a 40% decline from a 2011 estimate of 949 (range 618–
 1280) that used similar survey methods. However, the WH 2021
 report authors stated categorically that this apparent decline since
 2011 was not statistically significant, in part due to evidence that
 some bears moved into neighbouring subpopulations combined
 with the large margins of error. While it seems inconceivable that
 a decline of 40% over 10 years could be statistically insignificant,
 recall that a similar conclusion was reached in 2015 regarding the
 42% increase in abundance of Svalbard bears. Since the estimate
 calculated in 2004 was 935 (range 794–1076), it seems the abundance of WH polar bears has not changed since 2004.17 Note that
 a more comprehensive survey was conducted in 2011, generating
 an estimate of 1030 (range 754–1406). This became the official WH
 estimate used by the PBSG.18
 Given the conclusions of the 2021 survey that the 2016 and 2021
 estimates were not statistically different from the 2011 estimate, it
 appears that the 2021 comprehensive estimate of 1030 may still
 be the most valid figure for WH.
 The 2021 WH survey authors also made it clear that the most
 recent population estimate was not associated with poor ice conditions, since sea-ice levels had been as good as the 1980s in four
 out of the five years between 2017 and 2021. Confoundingly, they
 could not explain why adult females and subadults were underrepresented in the population.
 No evidence was provided for lack of prey, and although
 emigration to neighbouring Southern Hudson Bay was largely
 dismissed as an explanation, the possibility of a movement north
 into Foxe Basin was not explored.
 This is odd, since a 2016 genetic study suggested that the
 northern boundary for WH polar bears should be moved to the
 north of Southampton Island (a major denning area currently
 included in FB) and the SH boundary to the north of Akimiski
 Island in James Bay, adding the entire southern Hudson Bay coast
 in Ontario, as well as the Belcher Islands, to WH (currently included
 in SH), leaving only James Bay to represent SH.19
 In 2023, the PBSG indicated the WH subpopulation was ‘likely
 decreasing’, based on the 2021 estimate of 618 bears. However,
 they did not include the caveat from the survey report that this
 apparent decline was not statistically significant, and also did not
 incorporate the conclusion of the 2022 Canadian Polar Bear Technical
 Committee (PBTC) that indigenous knowledge (IK) assessed this
 subpopulation as ‘increased’. Similarly, the 2023 assessment by the 
 4
 USFWS listed WH as ‘likely decreased’, based on the 2016 survey
 only (2021 survey results were not included). It acknowledged
 that in 2022 IK considered this subpopulation to have ‘increased’.20
 Southern Hudson Bay (SH)
 An aerial survey conducted in September 2021 generated a new
 subpopulation estimate of 1119 (range 860–1454), which represented a 30% increase over five years. The result was considered
 robust, and reflective of the true size of the population. However,
 another estimate, of 1003 (range 773–1302), was generated based
 on the same data. This was considered more comparable to the
 2016 estimate of 780 (range 590–1029). While the authors did not
 explicitly address the issue of statistical significance, they concluded
 that a natural increase in numbers, via increased cub production
 and survival, must have taken place in conjunction with good sea
 ice conditions from 2017 to 2020, perhaps in addition to immigration from another unidentified subpopulation.21.
 In their 2023 assessment, the IUCN PBSG discussed the apparent
 increased abundance of SH bears, but did not unequivocally state
 that the subpopulation had increased, instead only implying that
 an increase may have been possible (‘years of relatively good ice
 conditions, combined with comparatively reduced harvest from
 2016–2021 may have buffered the population against further
 decline or allowed for recovery’). They also did not include the 2022
 assessment by the PBTC that IK considered the SH subpopulation
 was ‘stable/likely increased’ (i.e. stable in the James Bay portion,
 likely increased in southeastern Hudson Bay).22.
 The 2023 assessment by the USFWS listed SH as ‘likely decreased’,
 based only on 2016 survey results (2021 survey results were not
 included), but did acknowledge that in 2022, IK considered this
 subpopulation to be ‘stable/likely increased.’23.
 Southeast Greenland (SG)
 As part of a multiyear project on the status of SG polar bears that
 began in 2011, surveys were conducted during mid-March and
 mid-April of 2015–2017 for bears that lived below 64°N latitude.
 The results were compared with data from bears living in EG further
 north, which had been collected up to 2021. This southern region
 of Greenland had not previously been surveyed, or even visited by
 polar bear scientists, and there are no permanent human inhabitants. Few Inuit hunters even venture into the region.24
 Based on capture-recapture data, a population estimate of
 234 (range 111–462) was generated for SG. Weight (indicating
 body condition or fatness) of almost two dozen females captured
 in SG averaged 186 kg, which was similar to females in Svalbard
 in the western Barents Sea (185 kg) in the 1990–2000 period and
 in EG in recent years (186 kg).
 Most surprisingly, there was strong evidence that these SG
 polar bears are the most genetically distinct subpopulation in
 the Arctic, indicating a lack of interbreeding with bears in EG for
 at least 200 years.25.
 5
 Much emphasis was given by study authors Kirstin Laidre
 and colleagues to their interpretation that bears in these SG fjords
 frequently used glacier ice to hunt seals during the summer; in
 other locations bears only do so occasionally. Seals feed in such
 ‘glacier-front’ habitats in summer because primary productivity
 is high: melting glaciers in the fjords attract fish because their
 food – marine plankton – is plentiful. However, the only evidence
 provided of seal-hunting behaviour by polar bears in summer in
 SG is one photo, taken by an unidentified photographer, of a bear
 on glacier ice beside a seal kill taken in September 2016. As noted
 above, the SG polar bear surveys were conducted in March and
 April and therefore, frequent summer hunting of seals could not
 have been observed by the authors, but was simply assumed to
 have happened.
 In addition, although the authors imply that glacier-front habitat
 is rare, it is in fact rather common across the Arctic and widely
 used by polar bears year-round because the sea ice covering such
 fjords in late winter and spring (including those in SG) are used by
 ringed seals as a birthing platform. Moreover, newborn seals are
 the preferred prey of polar bears, making up roughly two thirds of
 their diet. Fjords with glacier ice are present all along both coasts
 of Greenland, in Svalbard, Novaya Zemlya and Franz Josef Land in
 Russia, and in Baffin and Ellesmere Islands in the Canadian Arctic.26
 The authors concluded their report with a recommendation
 that SG be officially recognized by the IUCN PBSG as a polar bear
 subpopulation distinct from EG for management and conservation
 purposes. However, despite the fact that Dr Laidre is currently the
 co-chair of the PBSG, and that in March 2023 the government of
 Greenland declared SG a protected ‘new and separate management unit’, the PBSG declined to add it as a distinct subpopulation.
 The 2023 USFWS assessment cited the 2022 Laidre report and its
 abundance estimate for SG, but regarding a change in boundaries
 for EG, it stated only that, ‘ecoregion and subpopulation status will
 likely be re-evaluated by PBSG in 2023’.27
 4. Population trends
 In Canada, where roughly two thirds of the world’s polar bear population live, a 2022 update from the PBTC for the first time included
 assessments based on Inuit IK for each of the 13 subpopulations for
 which Canada has sole or joint management responsibility. While
 the ‘scientific’ assessments for trends in abundance for these subpopulations are simply the widely varying ones provided by the
 PBSG in 2021, those based on IK were either ‘increased’ or ‘stable’.28
 Later in 2022, the Government of Canada published updated
 global polar bear population trend maps based on 2021 PBSG
 ‘scientific’ data: no provision was made for the conflicting information from IK discussed above, calling into question whether IK
 assessments are actually given any weight in assessing current
 conditions.29 And while the 2023 USFWS assessment included the
 2022 Canadian IK trend information in their status table, it gave 
 6
 priority to 2021 PBSG scientific data.30
 Figure 2 shows a more realistic representation of current
 polar bear population trends based on all available information
 (scientific survey results, IK, and studies on health and survival
 status published up to 31 December 2023, extrapolated to regions
 lacking recent survey data). This gives the following subpopulation classifications at 2023, including the new subpopulation of
 SE Greenland (SG):
 • seven ‘increasing’ or ‘likely increasing’ [KB, DS, MC, GB, CS, BS, SH].
 • four ‘stable’ or ‘likely stable’ [BB, SB, WH, SG].
 • nine ‘presumed stable or increasing’ [EG, LS, LP, KS, VM, NB, GB,
 FB, NW].
`@ -1,3 +1 @@`
	`# agents`	`docker-compose -f docker/docker-compose.yml up --build`

	`RAG Agents for LLM`
		`@ -0,0 +1 @@`
							`# This file can be empty or contain initialization code for the app package`