From aaca68b74ecc312fcc628c9f12ecb693f72ed7d8 Mon Sep 17 00:00:00 2001 From: Mahesh Kommareddi Date: Sun, 9 Jun 2024 13:45:19 -0400 Subject: [PATCH] Main commit --- README.md | 4 +- app/__init__.py | 1 + app/rag_system_with_agents.py | 702 ++++++++++++++++++ app/requirements.txt | 10 + docker/Dockerfile | 23 + docker/docker-compose.yml | 87 +++ .../build/lib/my_project/__init__.py | 0 .../my_project/build/lib/my_project/items.py | 12 + .../build/lib/my_project/middlewares.py | 103 +++ .../build/lib/my_project/pipelines.py | 13 + .../build/lib/my_project/settings.py | 93 +++ .../build/lib/my_project/spiders/__init__.py | 4 + .../build/lib/my_project/spiders/my_spider.py | 32 + .../my_project/my_project/__init__.py | 0 .../my_project/my_project/items.py | 12 + .../my_project/my_project/middlewares.py | 103 +++ .../my_project/my_project/pipelines.py | 13 + .../my_project/my_project/settings.py | 93 +++ .../my_project/my_project/spiders/__init__.py | 4 + .../my_project/spiders/my_spider.py | 32 + .../my_project/project.egg-info/PKG-INFO | 3 + .../my_project/project.egg-info/SOURCES.txt | 13 + .../project.egg-info/dependency_links.txt | 1 + .../project.egg-info/entry_points.txt | 2 + .../my_project/project.egg-info/top_level.txt | 1 + docker/scrapy_project/my_project/scrapy.cfg | 6 + docker/scrapy_project/my_project/setup.py | 10 + frontend/app.js | 62 ++ frontend/index.html | 129 ++++ frontend/styles.css | 78 ++ samples/FRANCE.txt | 125 ++++ samples/Polar Bears a Complete Guide.txt | 2 + samples/ReActPrompts.txt | 67 ++ samples/State of Polar Bears.txt | 356 +++++++++ 34 files changed, 2193 insertions(+), 3 deletions(-) create mode 100644 app/__init__.py create mode 100644 app/rag_system_with_agents.py create mode 100644 app/requirements.txt create mode 100644 docker/Dockerfile create mode 100644 docker/docker-compose.yml create mode 100644 docker/scrapy_project/my_project/build/lib/my_project/__init__.py create mode 100644 docker/scrapy_project/my_project/build/lib/my_project/items.py create mode 100644 docker/scrapy_project/my_project/build/lib/my_project/middlewares.py create mode 100644 docker/scrapy_project/my_project/build/lib/my_project/pipelines.py create mode 100644 docker/scrapy_project/my_project/build/lib/my_project/settings.py create mode 100644 docker/scrapy_project/my_project/build/lib/my_project/spiders/__init__.py create mode 100644 docker/scrapy_project/my_project/build/lib/my_project/spiders/my_spider.py create mode 100644 docker/scrapy_project/my_project/my_project/__init__.py create mode 100644 docker/scrapy_project/my_project/my_project/items.py create mode 100644 docker/scrapy_project/my_project/my_project/middlewares.py create mode 100644 docker/scrapy_project/my_project/my_project/pipelines.py create mode 100644 docker/scrapy_project/my_project/my_project/settings.py create mode 100644 docker/scrapy_project/my_project/my_project/spiders/__init__.py create mode 100644 docker/scrapy_project/my_project/my_project/spiders/my_spider.py create mode 100644 docker/scrapy_project/my_project/project.egg-info/PKG-INFO create mode 100644 docker/scrapy_project/my_project/project.egg-info/SOURCES.txt create mode 100644 docker/scrapy_project/my_project/project.egg-info/dependency_links.txt create mode 100644 docker/scrapy_project/my_project/project.egg-info/entry_points.txt create mode 100644 docker/scrapy_project/my_project/project.egg-info/top_level.txt create mode 100644 docker/scrapy_project/my_project/scrapy.cfg create mode 100644 docker/scrapy_project/my_project/setup.py create mode 100644 frontend/app.js create mode 100644 frontend/index.html create mode 100644 frontend/styles.css create mode 100644 samples/FRANCE.txt create mode 100644 samples/Polar Bears a Complete Guide.txt create mode 100644 samples/ReActPrompts.txt create mode 100644 samples/State of Polar Bears.txt diff --git a/README.md b/README.md index 0ba55a2..22924fa 100644 --- a/README.md +++ b/README.md @@ -1,3 +1 @@ -# agents - -RAG Agents for LLM \ No newline at end of file +docker-compose -f docker/docker-compose.yml up --build diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..1c426f6 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ +# This file can be empty or contain initialization code for the app package diff --git a/app/rag_system_with_agents.py b/app/rag_system_with_agents.py new file mode 100644 index 0000000..26e0004 --- /dev/null +++ b/app/rag_system_with_agents.py @@ -0,0 +1,702 @@ +import os +import openai +import uuid +import requests +from flask import Flask, request, jsonify, send_from_directory +from flask_cors import CORS +from qdrant_client import QdrantClient +from qdrant_client.http import models +from email.mime.text import MIMEText +from airflow_client.client import ApiClient, Configuration +from airflow_client.client.api.dag_run_api import DAGRunApi +import smtplib +from threading import Thread +import time +import json + +# Initialize Flask app +app = Flask(__name__, static_folder='./frontend', static_url_path='/') +CORS(app) + +# Configure OpenAI API +openai.api_key = os.getenv('OPENAI_API_KEY') + +# Configure Qdrant +qdrant = QdrantClient(host=os.getenv('QDRANT_HOST')) + +# Dictionary to store the status and progress of tasks +tasks_status = {} + +def embed_text(text): + response = openai.Embedding.create( + input=text, + model="text-embedding-ada-002" + ) + embedding = response['data'][0]['embedding'] + return embedding + +def query_qdrant(embedding, top_n=5): + search_result = qdrant.search( + collection_name="rag", + query_vector=embedding, + limit=top_n + ) + return search_result + +def parse_react_response(response): + steps = [] + final_answer = "" + lines = response.split('\n') + for line in lines: + if line.startswith("["): + steps.append(line.strip()) + elif line.startswith("Final Answer:"): + final_answer = line.split(":")[1].strip() + return steps, final_answer + +def update_task_status(task_id, status, step=None, results=[]): + if task_id not in tasks_status: + tasks_status[task_id] = {"status": status, "current_step": step, "steps": [], "results": []} + else: + tasks_status[task_id]["status"] = status + if step: + tasks_status[task_id]["current_step"] = step + tasks_status[task_id]["steps"].append(step) + tasks_status[task_id]["results"] = results + +def process_steps(steps, task_id, memory, results): + try: + for step in steps: + if "[" in step and "]" in step: + agent = step.split("[")[1].split("]")[0].strip().lower().replace(" ", "_") + task = step.split("]")[1].strip() + result = run_agent(agent, task, task_id, memory) + if isinstance(result, tuple): + result = result[0] + results.append(result["message"]) + update_task_status(task_id, "processing", step, results) + memory[agent] = result["message"] # Store the result in memory + update_task_status(task_id, "completed", None, results) + except Exception as e: + update_task_status(task_id, f"failed: {e}") + print(f"Error processing steps: {e}") + + +@app.route('/upload', methods=['POST']) +def upload_file(): + if 'file' not in request.files: + return jsonify({"error": "No file part"}), 400 + file = request.files['file'] + if file.filename == '': + return jsonify({"error": "No selected file"}), 400 + if file and file.filename.endswith('.txt'): + content = file.read().decode('utf-8') + embedding = embed_text(content) + document_id = str(uuid.uuid4()) # Generate a UUID for the document ID + qdrant.upsert( + collection_name='rag', + points=[models.PointStruct(id=document_id, vector=embedding, payload={"content": content})] + ) + return jsonify({"message": "File uploaded and embedded successfully"}), 200 + else: + return jsonify({"error": "Invalid file type. Only .txt files are allowed"}), 400 + +@app.route('/query', methods=['POST']) +def query(): + data = request.json + query_text = data['query'] + embedding = embed_text(query_text) + results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in results] + return jsonify({"results": sources}) + +@app.route('/react_query', methods=['POST']) +def react_query(): + data = request.json + query_text = data['query'] + task_id = str(uuid.uuid4()) + update_task_status(task_id, "initialized") + + # Create the system prompt with capabilities + system_prompt = f""" + You are a research assistant that can perform the following tasks: + 1. Research Paper Finder + 2. Citation Generator + 3. Data Summarizer + 4. Question Answering + 5. Statistical Analysis + 6. Graph Generator + 7. Keyword Extractor + 8. Research Outline Generator + 9. Hypothesis Generator + 10. Methodology Advisor + 11. Experimental Design Helper + 12. Survey Designer + 13. Plagiarism Checker + 14. Grammar and Style Checker + 15. Literature Review Organizer + 16. Data Cleaning Agent + 17. Bibliography Manager + 18. Thesis Statement Generator + 19. Funding Finder + 20. Conference Finder + 21. Web Scraper + 22. API Integrator + 23. Email Notifier + 24. File Converter + 25. Translation Agent + 26. OCR Agent + 27. Scheduler + 28. Weather Information Agent + + Using the ReAct (Reason and Act) paradigm, analyze the following query and determine the steps to answer it. Each step should indicate the agent to use and the task to perform in a structured format, clearly separated by new lines. Make sure to include the agent name in square brackets. Example format: [Agent] Task. + + Query: {query_text} + """ + + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": query_text} + ], + max_tokens=500 + ) + + react_response = response['choices'][0]['message']['content'].strip() + steps, final_answer = parse_react_response(react_response) + + if not steps: + update_task_status(task_id, "failed") + return jsonify({"error": "No steps generated by the ReAct system"}), 400 + + update_task_status(task_id, "processing", steps[0]) + results = [] + memory = {} + Thread(target=process_steps, args=(steps, task_id, memory, results)).start() + + return jsonify({"steps": steps, "task_id": task_id}) + +def run_agent(agent, query_text, task_id, memory): + # Here we call the appropriate agent function based on the agent type + if agent == 'research_paper_finder': + return research_paper_finder(query_text, memory) + elif agent == 'citation_generator': + return citation_generator(query_text, memory) + elif agent == 'data_summarizer': + return data_summarizer(query_text, memory) + elif agent == 'question_answering': + return question_answering(query_text, memory) + elif agent == 'statistical_analysis': + return statistical_analysis(query_text, memory) + elif agent == 'graph_generator': + return graph_generator(query_text, memory) + elif agent == 'keyword_extractor': + return keyword_extractor(query_text, memory) + elif agent == 'research_outline_generator': + return research_outline_generator(query_text, memory) + elif agent == 'hypothesis_generator': + return hypothesis_generator(query_text, memory) + elif agent == 'methodology_advisor': + return methodology_advisor(query_text, memory) + elif agent == 'experimental_design_helper': + return experimental_design_helper(query_text, memory) + elif agent == 'survey_designer': + return survey_designer(query_text, memory) + elif agent == 'plagiarism_checker': + return plagiarism_checker(query_text, memory) + elif agent == 'grammar_and_style_checker': + return grammar_and_style_checker(query_text, memory) + elif agent == 'literature_review_organizer': + return literature_review_organizer(query_text, memory) + elif agent == 'data_cleaning_agent': + return data_cleaning_agent(query_text, memory) + elif agent == 'bibliography_manager': + return bibliography_manager(query_text, memory) + elif agent == 'thesis_statement_generator': + return thesis_statement_generator(query_text, memory) + elif agent == 'funding_finder': + return funding_finder(query_text, memory) + elif agent == 'conference_finder': + return conference_finder(query_text, memory) + elif agent == 'web_scraper_using_scrapyd' or 'web_scraper': + return web_scraper(query_text, memory) + elif agent == 'api_integrator': + return api_integrator(query_text, memory) + elif agent == 'email_notifier': + return email_notifier(query_text, memory) + elif agent == 'file_converter': + return file_converter(query_text, memory) + elif agent == 'translation_agent': + return translation_agent(query_text, memory) + elif agent == 'ocr_agent': + return ocr_agent(query_text, memory) + elif agent == 'scheduler': + return scheduler(query_text, memory) + elif agent == 'weather_information_agent': + return weather_information_agent(query_text, memory) + elif agent == 'currency_converter': + return currency_converter(query_text, memory) + elif agent == 'news_aggregator': + return news_aggregator(query_text, memory) + else: + return {"message": f"Unknown agent: {agent}"} + +def research_paper_finder(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Find research papers related to: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def citation_generator(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Generate a citation for: {query_text}"} + ], + max_tokens=50 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def data_summarizer(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Summarize the following text: {query_text}"} + ], + max_tokens=1000 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def question_answering(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Answer the following question: {query_text}"} + ], + max_tokens=100 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def statistical_analysis(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Perform statistical analysis on the following data: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def graph_generator(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Generate a graph for the following data: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def keyword_extractor(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Extract keywords from the following text: {query_text}"} + ], + max_tokens=50 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def research_outline_generator(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Generate a research outline for: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def hypothesis_generator(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Generate a hypothesis based on the following topic: {query_text}"} + ], + max_tokens=100 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def methodology_advisor(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Suggest a methodology for the following research topic: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def experimental_design_helper(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Help design an experiment for: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def survey_designer(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Design a survey for: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def plagiarism_checker(query_text, memory): + return {"message": "Plagiarism check is not implemented yet.", "query": query_text} + +def grammar_and_style_checker(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Check and correct the grammar and style of the following text: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def literature_review_organizer(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Organize the following literature review: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def data_cleaning_agent(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Clean the following data: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def bibliography_manager(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Manage the bibliography for: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def thesis_statement_generator(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Generate a thesis statement for: {query_text}"} + ], + max_tokens=100 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def funding_finder(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Find funding opportunities for: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def conference_finder(query_text, memory): + embedding = embed_text(query_text) + rag_results = query_qdrant(embedding) + sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results] + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Find conferences related to: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def web_scraper(query_text, memory): + project_name = 'my_project' + spider_name = 'my_spider' + scrapyd_host = os.getenv('SCRAPYD_HOST', 'localhost') + + data = { + 'project': project_name, + 'spider': spider_name, + 'start_urls': query_text + } + + try: + response = requests.post(f'http://{scrapyd_host}:6800/schedule.json', data=data) + # if response.status_code == "200": + job_id = response.json().get('jobid') + # Wait for the job to finish and fetch the results + time.sleep(15) # Adjust this sleep time as needed + items_response = requests.get(f'http://{scrapyd_host}:6800/items/{project_name}/{spider_name}/{job_id}.jl') + #if items_response.status_code == 200: + items = [json.loads(line) for line in items_response.text.splitlines()] + # for item in items: + # Insert each scraped item into Qdrant + content = items[0].get('content', '') + embedding = embed_text(content) + document_id = str(uuid.uuid4()) + qdrant.upsert( + collection_name='rag', + points=[models.PointStruct(id=document_id, vector=embedding, payload={"content": content})] + ) + return {"message": content} + # return {"message": f"Job completed with {len(items)} items scraped", "items": items} + # else: + # return {"message": "Failed to fetch scraped items"}, 500 + #else: + # return {"message": "Failed to schedule job"}, 500 + except Exception as e: + print(f"Error scheduling scrapy job: {e}") + return {"message": f"Failed to schedule job - {e}"}, 500 + +def api_integrator(query_text, memory): + response = requests.post( + 'http://localhost:1880/api_integrator', + json={'query': query_text} + ) + return {"message": response.json(), "query": query_text} + +def email_notifier(query_text, memory): + msg = MIMEText(query_text) + msg['Subject'] = 'Notification' + msg['From'] = 'test@example.com' + msg['To'] = 'mahesh.kommareddi@gmail.com' + + with smtplib.SMTP('mailhog', 1025) as server: + server.sendmail(msg['From'], [msg['To']], msg.as_string()) + + return {"message": "Email sent successfully"} + +def file_converter(query_text, memory): + response = requests.post( + 'http://libreoffice:8084/convert', + files={'file': query_text} + ) + return {"message": "File conversion completed", "data": response.json()} + +def translation_agent(query_text, memory): + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": f"The previous response relating to the query was: {memory}"}, + {"role": "user", "content": f"Translate the following text: {query_text}"} + ], + max_tokens=150 + ) + response_message = response['choices'][0]['message']['content'].strip() + return {"message": response_message, "sources": sources} + +def ocr_agent(query_text, memory): + response = requests.post( + 'http://localhost:8081/ocr', + files={'file': query_text} + ) + return {"message": response.json(), "query": query_text} + +def scheduler(query_text, memory): + configuration = Configuration( + host="http://localhost:8082/api/v1" + ) + api_client = ApiClient(configuration) + dag_run_api = DAGRunApi(api_client) + + dag_id = 'example_dag' + dag_run = dag_run_api.post_dag_run( + dag_id=dag_id, + dag_run={"conf": {"query_text": query_text}} + ) + return {"message": f"Scheduled task for {query_text}", "dag_run_id": dag_run.dag_run_id} + +def weather_information_agent(query_text, memory): + api_key = os.getenv('OPENWEATHERMAP_API_KEY') + response = requests.get( + f'http://api.openweathermap.org/data/2.5/weather?q={query_text}&appid={api_key}' + ) + return {"message": response.json(), "query": query_text} + +@app.route('/ocr', methods=['POST']) +def handle_ocr(): + if 'file' not in request.files: + return jsonify({"error": "No file part"}), 400 + file = request.files['file'] + if file.filename == '': + return jsonify({"error": "No selected file"}), 400 + response = requests.post( + 'http://localhost:8081/ocr', + files={'file': file} + ) + return jsonify(response.json()) + +@app.route('/schedule', methods=['POST']) +def handle_schedule(): + data = request.json + query_text = data['query'] + return jsonify(scheduler(query_text)) + +@app.route('/weather', methods=['POST']) +def handle_weather(): + data = request.json + query_text = data['query'] + return jsonify(weather_information_agent(query_text)) + +@app.route('/scrape', methods=['POST']) +def handle_scrape(): + data = request.json + query_text = data['query'] + return web_scraper(query_text, {}) + +@app.route('/integrate', methods=['POST']) +def handle_integrate(): + data = request.json + query_text = data['query'] + return jsonify(api_integrator(query_text)) + +@app.route('/notify', methods=['POST']) +def handle_notify(): + data = request.json + query_text = data['query'] + return jsonify(email_notifier(query_text)) + +@app.route('/convert', methods=['POST']) +def handle_convert(): + if 'file' not in request.files: + return jsonify({"error": "No file part"}), 400 + file = request.files['file'] + if file.filename == '': + return jsonify({"error": "No selected file"}), 400 + response = requests.post( + 'http://localhost:8084/convert', + files={'file': file} + ) + return jsonify(response.json()) + +@app.route('/') +def serve_index(): + return send_from_directory(app.static_folder, 'index.html') + +@app.route('/status/', methods=['GET']) +def get_status(task_id): + return jsonify(tasks_status.get(task_id, {"error": "Task ID not found"})) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=1337) diff --git a/app/requirements.txt b/app/requirements.txt new file mode 100644 index 0000000..67d8f2d --- /dev/null +++ b/app/requirements.txt @@ -0,0 +1,10 @@ +flask +flask-cors +openai==0.28 +openai[datalib] +pymongo +requests +numpy +qdrant_client +apache-airflow-client +scrapy \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..651dcc6 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,23 @@ +# Use the official Python image from the Docker Hub +FROM python:3.10 + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file into the container +COPY ./app/requirements.txt . + +# Install any dependencies specified in requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the application code into the container +COPY ./app /app + +# Copy the frontend files into the container +COPY ./frontend /app/frontend + +# Make port 5000 available to the world outside this container +EXPOSE 1337 + +# Run the application +CMD ["python", "rag_system_with_agents.py"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..605946c --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,87 @@ +version: '3.7' + +services: + flask-app: + build: + context: ../ + dockerfile: docker/Dockerfile + container_name: flask-app + ports: + - "1337:1337" + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - QDRANT_HOST=qdrant + - SCRAPYD_HOST=scrapyd + - OPENWEATHERMAP_API_KEY=${OPENWEATHERMAP_API_KEY} + depends_on: + - qdrant + - ocr_service + - airflow-webserver + - airflow-scheduler + - scrapyd + - node_red + - mailhog + - libreoffice + + qdrant: + image: qdrant/qdrant + ports: + - "6333:6333" + + ocr_service: + image: jbarlow83/ocrmypdf + ports: + - "8081:8081" + + airflow-webserver: + image: apache/airflow:latest + container_name: airflow-webserver + command: webserver + ports: + - "8082:8082" + environment: + - AIRFLOW__CORE__EXECUTOR=LocalExecutor + - AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:////usr/local/airflow/airflow.db + - AIRFLOW__CORE__LOAD_EXAMPLES=False + volumes: + - ./dags:/usr/local/airflow/dags + - ./logs:/usr/local/airflow/logs + - ./plugins:/usr/local/airflow/plugins + + airflow-scheduler: + image: apache/airflow:latest + container_name: airflow-scheduler + command: scheduler + environment: + - AIRFLOW__CORE__EXECUTOR=LocalExecutor + - AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:////usr/local/airflow/airflow.db + - AIRFLOW__CORE__LOAD_EXAMPLES=False + volumes: + - ./dags:/usr/local/airflow/dags + - ./logs:/usr/local/airflow/logs + - ./plugins:/usr/local/airflow/plugins + depends_on: + - airflow-webserver + + scrapyd: + image: vimagick/scrapyd + ports: + - "6800:6800" + volumes: + - ./scrapy_project:/scrapy_project + command: ["scrapyd"] + + node_red: + image: nodered/node-red + ports: + - "1880:1880" + + mailhog: + image: mailhog/mailhog + ports: + - "1025:1025" + + libreoffice: + image: linuxserver/libreoffice + ports: + - "8084:8084" diff --git a/docker/scrapy_project/my_project/build/lib/my_project/__init__.py b/docker/scrapy_project/my_project/build/lib/my_project/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docker/scrapy_project/my_project/build/lib/my_project/items.py b/docker/scrapy_project/my_project/build/lib/my_project/items.py new file mode 100644 index 0000000..e80ad17 --- /dev/null +++ b/docker/scrapy_project/my_project/build/lib/my_project/items.py @@ -0,0 +1,12 @@ +# Define here the models for your scraped items +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/items.html + +import scrapy + + +class MyProjectItem(scrapy.Item): + # define the fields for your item here like: + # name = scrapy.Field() + pass diff --git a/docker/scrapy_project/my_project/build/lib/my_project/middlewares.py b/docker/scrapy_project/my_project/build/lib/my_project/middlewares.py new file mode 100644 index 0000000..c5eb12d --- /dev/null +++ b/docker/scrapy_project/my_project/build/lib/my_project/middlewares.py @@ -0,0 +1,103 @@ +# Define here the models for your spider middleware +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +from scrapy import signals + +# useful for handling different item types with a single interface +from itemadapter import is_item, ItemAdapter + + +class MyProjectSpiderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the spider middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_spider_input(self, response, spider): + # Called for each response that goes through the spider + # middleware and into the spider. + + # Should return None or raise an exception. + return None + + def process_spider_output(self, response, result, spider): + # Called with the results returned from the Spider, after + # it has processed the response. + + # Must return an iterable of Request, or item objects. + for i in result: + yield i + + def process_spider_exception(self, response, exception, spider): + # Called when a spider or process_spider_input() method + # (from other spider middleware) raises an exception. + + # Should return either None or an iterable of Request or item objects. + pass + + def process_start_requests(self, start_requests, spider): + # Called with the start requests of the spider, and works + # similarly to the process_spider_output() method, except + # that it doesn’t have a response associated. + + # Must return only requests (not items). + for r in start_requests: + yield r + + def spider_opened(self, spider): + spider.logger.info("Spider opened: %s" % spider.name) + + +class MyProjectDownloaderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the downloader middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_request(self, request, spider): + # Called for each request that goes through the downloader + # middleware. + + # Must either: + # - return None: continue processing this request + # - or return a Response object + # - or return a Request object + # - or raise IgnoreRequest: process_exception() methods of + # installed downloader middleware will be called + return None + + def process_response(self, request, response, spider): + # Called with the response returned from the downloader. + + # Must either; + # - return a Response object + # - return a Request object + # - or raise IgnoreRequest + return response + + def process_exception(self, request, exception, spider): + # Called when a download handler or a process_request() + # (from other downloader middleware) raises an exception. + + # Must either: + # - return None: continue processing this exception + # - return a Response object: stops process_exception() chain + # - return a Request object: stops process_exception() chain + pass + + def spider_opened(self, spider): + spider.logger.info("Spider opened: %s" % spider.name) diff --git a/docker/scrapy_project/my_project/build/lib/my_project/pipelines.py b/docker/scrapy_project/my_project/build/lib/my_project/pipelines.py new file mode 100644 index 0000000..e1f4ac8 --- /dev/null +++ b/docker/scrapy_project/my_project/build/lib/my_project/pipelines.py @@ -0,0 +1,13 @@ +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html + + +# useful for handling different item types with a single interface +from itemadapter import ItemAdapter + + +class MyProjectPipeline: + def process_item(self, item, spider): + return item diff --git a/docker/scrapy_project/my_project/build/lib/my_project/settings.py b/docker/scrapy_project/my_project/build/lib/my_project/settings.py new file mode 100644 index 0000000..e7d9500 --- /dev/null +++ b/docker/scrapy_project/my_project/build/lib/my_project/settings.py @@ -0,0 +1,93 @@ +# Scrapy settings for my_project project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# https://docs.scrapy.org/en/latest/topics/settings.html +# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +BOT_NAME = "my_project" + +SPIDER_MODULES = ["my_project.spiders"] +NEWSPIDER_MODULE = "my_project.spiders" + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = "my_project (+http://www.yourdomain.com)" + +# Obey robots.txt rules +ROBOTSTXT_OBEY = True + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +#CONCURRENT_REQUESTS = 32 + +# Configure a delay for requests for the same website (default: 0) +# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY = 3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 +#CONCURRENT_REQUESTS_PER_IP = 16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED = False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED = False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", +# "Accept-Language": "en", +#} + +# Enable or disable spider middlewares +# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# "my_project.middlewares.MyProjectSpiderMiddleware": 543, +#} + +# Enable or disable downloader middlewares +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +#DOWNLOADER_MIDDLEWARES = { +# "my_project.middlewares.MyProjectDownloaderMiddleware": 543, +#} + +# Enable or disable extensions +# See https://docs.scrapy.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# "scrapy.extensions.telnet.TelnetConsole": None, +#} + +# Configure item pipelines +# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html +#ITEM_PIPELINES = { +# "my_project.pipelines.MyProjectPipeline": 300, +#} + +# Enable and configure the AutoThrottle extension (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/autothrottle.html +#AUTOTHROTTLE_ENABLED = True +# The initial download delay +#AUTOTHROTTLE_START_DELAY = 5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY = 60 +# The average number of requests Scrapy should be sending in parallel to +# each remote server +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG = False + +# Enable and configure HTTP caching (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED = True +#HTTPCACHE_EXPIRATION_SECS = 0 +#HTTPCACHE_DIR = "httpcache" +#HTTPCACHE_IGNORE_HTTP_CODES = [] +#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage" + +# Set settings whose default value is deprecated to a future-proof value +REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7" +TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" +FEED_EXPORT_ENCODING = "utf-8" diff --git a/docker/scrapy_project/my_project/build/lib/my_project/spiders/__init__.py b/docker/scrapy_project/my_project/build/lib/my_project/spiders/__init__.py new file mode 100644 index 0000000..ebd689a --- /dev/null +++ b/docker/scrapy_project/my_project/build/lib/my_project/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/docker/scrapy_project/my_project/build/lib/my_project/spiders/my_spider.py b/docker/scrapy_project/my_project/build/lib/my_project/spiders/my_spider.py new file mode 100644 index 0000000..edaaf53 --- /dev/null +++ b/docker/scrapy_project/my_project/build/lib/my_project/spiders/my_spider.py @@ -0,0 +1,32 @@ +import scrapy +import re + +class MySpider(scrapy.Spider): + name = 'my_spider' + + def __init__(self, start_urls=None, *args, **kwargs): + super(MySpider, self).__init__(*args, **kwargs) + if start_urls: + self.start_urls = self.extract_urls(start_urls) + else: + self.start_urls = [] + + def extract_urls(self, text): + url_pattern = re.compile(r'(https?://\S+)') + urls = url_pattern.findall(text) + return urls + + def parse(self, response): + page_content = response.text + page_urls = response.css('a::attr(href)').getall() + for url in page_urls: + if url.startswith('http'): + yield {'url': url, 'content': page_content} + else: + yield {'url': response.urljoin(url), 'content': page_content} + + # for url in page_urls: + # if url.startswith('http'): + # yield scrapy.Request(url, callback=self.parse) + # else: + # yield scrapy.Request(response.urljoin(url), callback=self.parse) diff --git a/docker/scrapy_project/my_project/my_project/__init__.py b/docker/scrapy_project/my_project/my_project/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docker/scrapy_project/my_project/my_project/items.py b/docker/scrapy_project/my_project/my_project/items.py new file mode 100644 index 0000000..e80ad17 --- /dev/null +++ b/docker/scrapy_project/my_project/my_project/items.py @@ -0,0 +1,12 @@ +# Define here the models for your scraped items +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/items.html + +import scrapy + + +class MyProjectItem(scrapy.Item): + # define the fields for your item here like: + # name = scrapy.Field() + pass diff --git a/docker/scrapy_project/my_project/my_project/middlewares.py b/docker/scrapy_project/my_project/my_project/middlewares.py new file mode 100644 index 0000000..c5eb12d --- /dev/null +++ b/docker/scrapy_project/my_project/my_project/middlewares.py @@ -0,0 +1,103 @@ +# Define here the models for your spider middleware +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +from scrapy import signals + +# useful for handling different item types with a single interface +from itemadapter import is_item, ItemAdapter + + +class MyProjectSpiderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the spider middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_spider_input(self, response, spider): + # Called for each response that goes through the spider + # middleware and into the spider. + + # Should return None or raise an exception. + return None + + def process_spider_output(self, response, result, spider): + # Called with the results returned from the Spider, after + # it has processed the response. + + # Must return an iterable of Request, or item objects. + for i in result: + yield i + + def process_spider_exception(self, response, exception, spider): + # Called when a spider or process_spider_input() method + # (from other spider middleware) raises an exception. + + # Should return either None or an iterable of Request or item objects. + pass + + def process_start_requests(self, start_requests, spider): + # Called with the start requests of the spider, and works + # similarly to the process_spider_output() method, except + # that it doesn’t have a response associated. + + # Must return only requests (not items). + for r in start_requests: + yield r + + def spider_opened(self, spider): + spider.logger.info("Spider opened: %s" % spider.name) + + +class MyProjectDownloaderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the downloader middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_request(self, request, spider): + # Called for each request that goes through the downloader + # middleware. + + # Must either: + # - return None: continue processing this request + # - or return a Response object + # - or return a Request object + # - or raise IgnoreRequest: process_exception() methods of + # installed downloader middleware will be called + return None + + def process_response(self, request, response, spider): + # Called with the response returned from the downloader. + + # Must either; + # - return a Response object + # - return a Request object + # - or raise IgnoreRequest + return response + + def process_exception(self, request, exception, spider): + # Called when a download handler or a process_request() + # (from other downloader middleware) raises an exception. + + # Must either: + # - return None: continue processing this exception + # - return a Response object: stops process_exception() chain + # - return a Request object: stops process_exception() chain + pass + + def spider_opened(self, spider): + spider.logger.info("Spider opened: %s" % spider.name) diff --git a/docker/scrapy_project/my_project/my_project/pipelines.py b/docker/scrapy_project/my_project/my_project/pipelines.py new file mode 100644 index 0000000..e1f4ac8 --- /dev/null +++ b/docker/scrapy_project/my_project/my_project/pipelines.py @@ -0,0 +1,13 @@ +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html + + +# useful for handling different item types with a single interface +from itemadapter import ItemAdapter + + +class MyProjectPipeline: + def process_item(self, item, spider): + return item diff --git a/docker/scrapy_project/my_project/my_project/settings.py b/docker/scrapy_project/my_project/my_project/settings.py new file mode 100644 index 0000000..e7d9500 --- /dev/null +++ b/docker/scrapy_project/my_project/my_project/settings.py @@ -0,0 +1,93 @@ +# Scrapy settings for my_project project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# https://docs.scrapy.org/en/latest/topics/settings.html +# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +BOT_NAME = "my_project" + +SPIDER_MODULES = ["my_project.spiders"] +NEWSPIDER_MODULE = "my_project.spiders" + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = "my_project (+http://www.yourdomain.com)" + +# Obey robots.txt rules +ROBOTSTXT_OBEY = True + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +#CONCURRENT_REQUESTS = 32 + +# Configure a delay for requests for the same website (default: 0) +# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY = 3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 +#CONCURRENT_REQUESTS_PER_IP = 16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED = False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED = False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", +# "Accept-Language": "en", +#} + +# Enable or disable spider middlewares +# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# "my_project.middlewares.MyProjectSpiderMiddleware": 543, +#} + +# Enable or disable downloader middlewares +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +#DOWNLOADER_MIDDLEWARES = { +# "my_project.middlewares.MyProjectDownloaderMiddleware": 543, +#} + +# Enable or disable extensions +# See https://docs.scrapy.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# "scrapy.extensions.telnet.TelnetConsole": None, +#} + +# Configure item pipelines +# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html +#ITEM_PIPELINES = { +# "my_project.pipelines.MyProjectPipeline": 300, +#} + +# Enable and configure the AutoThrottle extension (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/autothrottle.html +#AUTOTHROTTLE_ENABLED = True +# The initial download delay +#AUTOTHROTTLE_START_DELAY = 5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY = 60 +# The average number of requests Scrapy should be sending in parallel to +# each remote server +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG = False + +# Enable and configure HTTP caching (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED = True +#HTTPCACHE_EXPIRATION_SECS = 0 +#HTTPCACHE_DIR = "httpcache" +#HTTPCACHE_IGNORE_HTTP_CODES = [] +#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage" + +# Set settings whose default value is deprecated to a future-proof value +REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7" +TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" +FEED_EXPORT_ENCODING = "utf-8" diff --git a/docker/scrapy_project/my_project/my_project/spiders/__init__.py b/docker/scrapy_project/my_project/my_project/spiders/__init__.py new file mode 100644 index 0000000..ebd689a --- /dev/null +++ b/docker/scrapy_project/my_project/my_project/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/docker/scrapy_project/my_project/my_project/spiders/my_spider.py b/docker/scrapy_project/my_project/my_project/spiders/my_spider.py new file mode 100644 index 0000000..edaaf53 --- /dev/null +++ b/docker/scrapy_project/my_project/my_project/spiders/my_spider.py @@ -0,0 +1,32 @@ +import scrapy +import re + +class MySpider(scrapy.Spider): + name = 'my_spider' + + def __init__(self, start_urls=None, *args, **kwargs): + super(MySpider, self).__init__(*args, **kwargs) + if start_urls: + self.start_urls = self.extract_urls(start_urls) + else: + self.start_urls = [] + + def extract_urls(self, text): + url_pattern = re.compile(r'(https?://\S+)') + urls = url_pattern.findall(text) + return urls + + def parse(self, response): + page_content = response.text + page_urls = response.css('a::attr(href)').getall() + for url in page_urls: + if url.startswith('http'): + yield {'url': url, 'content': page_content} + else: + yield {'url': response.urljoin(url), 'content': page_content} + + # for url in page_urls: + # if url.startswith('http'): + # yield scrapy.Request(url, callback=self.parse) + # else: + # yield scrapy.Request(response.urljoin(url), callback=self.parse) diff --git a/docker/scrapy_project/my_project/project.egg-info/PKG-INFO b/docker/scrapy_project/my_project/project.egg-info/PKG-INFO new file mode 100644 index 0000000..3e6e2f5 --- /dev/null +++ b/docker/scrapy_project/my_project/project.egg-info/PKG-INFO @@ -0,0 +1,3 @@ +Metadata-Version: 2.1 +Name: project +Version: 1.0 diff --git a/docker/scrapy_project/my_project/project.egg-info/SOURCES.txt b/docker/scrapy_project/my_project/project.egg-info/SOURCES.txt new file mode 100644 index 0000000..6f6d562 --- /dev/null +++ b/docker/scrapy_project/my_project/project.egg-info/SOURCES.txt @@ -0,0 +1,13 @@ +setup.py +my_project/__init__.py +my_project/items.py +my_project/middlewares.py +my_project/pipelines.py +my_project/settings.py +my_project/spiders/__init__.py +my_project/spiders/my_spider.py +project.egg-info/PKG-INFO +project.egg-info/SOURCES.txt +project.egg-info/dependency_links.txt +project.egg-info/entry_points.txt +project.egg-info/top_level.txt \ No newline at end of file diff --git a/docker/scrapy_project/my_project/project.egg-info/dependency_links.txt b/docker/scrapy_project/my_project/project.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docker/scrapy_project/my_project/project.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/docker/scrapy_project/my_project/project.egg-info/entry_points.txt b/docker/scrapy_project/my_project/project.egg-info/entry_points.txt new file mode 100644 index 0000000..b43554b --- /dev/null +++ b/docker/scrapy_project/my_project/project.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[scrapy] +settings = my_project.settings diff --git a/docker/scrapy_project/my_project/project.egg-info/top_level.txt b/docker/scrapy_project/my_project/project.egg-info/top_level.txt new file mode 100644 index 0000000..3595216 --- /dev/null +++ b/docker/scrapy_project/my_project/project.egg-info/top_level.txt @@ -0,0 +1 @@ +my_project diff --git a/docker/scrapy_project/my_project/scrapy.cfg b/docker/scrapy_project/my_project/scrapy.cfg new file mode 100644 index 0000000..bc04c1a --- /dev/null +++ b/docker/scrapy_project/my_project/scrapy.cfg @@ -0,0 +1,6 @@ +[settings] +default = my_project.settings + +[deploy] +url = http://localhost:6800/ +project = my_project diff --git a/docker/scrapy_project/my_project/setup.py b/docker/scrapy_project/my_project/setup.py new file mode 100644 index 0000000..f1f6a7b --- /dev/null +++ b/docker/scrapy_project/my_project/setup.py @@ -0,0 +1,10 @@ +# Automatically created by: scrapyd-deploy + +from setuptools import setup, find_packages + +setup( + name = 'project', + version = '1.0', + packages = find_packages(), + entry_points = {'scrapy': ['settings = my_project.settings']}, +) diff --git a/frontend/app.js b/frontend/app.js new file mode 100644 index 0000000..4517df6 --- /dev/null +++ b/frontend/app.js @@ -0,0 +1,62 @@ +document.getElementById('react-query-form').addEventListener('submit', async (e) => { + e.preventDefault(); + const queryText = document.getElementById('react-query-text').value; + const response = await fetch('/react_query', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ query: queryText }), + }); + const data = await response.json(); + const taskId = data.task_id; + monitorTask(taskId); +}); + +async function monitorTask(taskId) { + const statusDiv = document.getElementById('results'); + statusDiv.innerHTML = `

Task ID: ${taskId}

Status: Monitoring...

`; + let completed = false; + while (!completed) { + const response = await fetch(`/status/${taskId}`); + const data = await response.json(); + statusDiv.innerHTML = `

Task ID: ${taskId}

Status: ${data.status}

Current Step: ${data.current_step}

`; + if (data.status === 'completed') { + completed = true; + const stepsList = data.steps.map(step => `
  • ${step}
  • `).join(''); + statusDiv.innerHTML += ``; + break; + } + await new Promise(resolve => setTimeout(resolve, 2000)); + } +} + +document.getElementById('query-form').addEventListener('submit', async (e) => { + e.preventDefault(); + const queryText = document.getElementById('query-text').value; + const agent = document.getElementById('agent-select').value; + const response = await fetch('/query', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ query: queryText, agent }), + }); + const data = await response.json(); + const resultsDiv = document.getElementById('results'); + resultsDiv.innerHTML = data.results.map(result => `

    ${result.content}

    `).join(''); +}); + +document.getElementById('upload-form').addEventListener('submit', async (e) => { + e.preventDefault(); + const fileInput = document.getElementById('file-input'); + const formData = new FormData(); + formData.append('file', fileInput.files[0]); + const response = await fetch('/upload', { + method: 'POST', + body: formData, + }); + const data = await response.json(); + const resultsDiv = document.getElementById('results'); + resultsDiv.innerHTML = `

    ${data.message}

    `; +}); diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..a1e2c21 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,129 @@ + + + + + + Agentive RAG System + + + +
    +

    Agentive RAG System

    +
    + + +
    +
    + + +
    +
    + + +
    +
    +
    + + + + + diff --git a/frontend/styles.css b/frontend/styles.css new file mode 100644 index 0000000..d57bb6d --- /dev/null +++ b/frontend/styles.css @@ -0,0 +1,78 @@ +body { + font-family: Arial, sans-serif; + margin: 0; + padding: 0; + background-color: #f5f5f5; +} + +.container { + width: 90%; + max-width: 1200px; + margin: 0 auto; + padding: 20px; + background-color: #ffffff; + box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); +} + +h1 { + text-align: center; + color: #333; +} + +h2 { + color: #555; +} + +.form-section { + margin-bottom: 30px; +} + +textarea, select, input[type="file"], button { + width: 100%; + padding: 10px; + margin: 10px 0; + border-radius: 5px; + border: 1px solid #ccc; +} + +button { + background-color: #007BFF; + color: #fff; + cursor: pointer; + border: none; +} + +button:hover { + background-color: #0056b3; +} + +#results { + margin-top: 30px; +} + +#results-content { + background-color: #f9f9f9; + padding: 20px; + border: 1px solid #ddd; + border-radius: 5px; + white-space: pre-wrap; +} + +#task-queue { + margin-top: 30px; +} + +table { + width: 100%; + border-collapse: collapse; +} + +th, td { + border: 1px solid #ddd; + padding: 10px; + text-align: left; +} + +th { + background-color: #f2f2f2; +} diff --git a/samples/FRANCE.txt b/samples/FRANCE.txt new file mode 100644 index 0000000..341275b --- /dev/null +++ b/samples/FRANCE.txt @@ -0,0 +1,125 @@ +{{Short description|Country in Western Europe}} +{{Hatnote|For other uses, see [[France (disambiguation)]], [[Lafrance (disambiguation)|Lafrance]], or (for prior French Republics) [[French Republics (disambiguation)|French Republics]].}} +{{pp-vandalism|small=yes}} +{{Use British English|date=July 2022}} +{{Use dmy dates|date=March 2022}} +{{Infobox country +| conventional_long_name = French Republic +| common_name = France +| native_name = {{Native name|fr|République française}} +| image_flag = Flag of France.svg +| image_coat = Arms of the French Republic.svg +| symbol_width = 75px +| symbol_type = [[Coat of arms of France|Coat of arms]]{{Efn-ur|The current [[Constitution of France]] does not specify a national emblem.{{Cite constitution|article=II|polity=France|date=1958}} The [[Fasces|lictor's fasces]] is very often used to represent the French Republic, although today it holds no official status.{{cite web|url=https://www.elysee.fr/en/french-presidency/the-lictor-s-fasces|date=15 December 2022|title=THE LICTOR'S FASCES|access-date=18 May 2024|archive-date=7 April 2024|archive-url=https://web.archive.org/web/20240407081203/https://www.elysee.fr/en/french-presidency/the-lictor-s-fasces|url-status=live}} In addition to the coat of arms, France also uses a [[Diplomatic emblem of France|different emblem]] for diplomatic and consular purposes.}} +| other_symbol = [[File:Armoiries république française.svg|90px]] +| other_symbol_type = [[Diplomatic emblem of France|Diplomatic emblem]] +| national_motto = "{{Lang|fr|[[Liberté, égalité, fraternité]]|italics=no}}" +| englishmotto = ("Liberty, Equality, Fraternity") +| national_anthem = "[[La Marseillaise]]"
    [[File:La Marseillaise.ogg|alt=sound clip of the Marseillaise French national anthem]]
    +| image_map = {{Switcher|[[File:EU-France (orthographic projection).svg|upright=1.15|frameless]]|France on the globe centred on Europe|[[File:EU-France.svg|upright=1.15|frameless]]|[[Metropolitan France]] (European part of France) in Europe|[[File:France and its region.png|frameless]]|France and its neighbors|[[File:Territorial waters - France.svg|upright=1.15|frameless]]|Show France, its overseas territories and [[Exclusive economic zone of France|its exclusive economic zones]]|Labelled map|default=1}} +| map_caption = {{Map caption|location_color=blue or dark green|region=Europe|region_color=dark grey|subregion=the European Union|subregion_color=green|unbulleted list|Location of the territory of the (red)|[[Adélie Land]] (Antarctic claim; hatched)}} +| capital = [[Paris]] +| coordinates = {{Coord|48|51|N|2|21|E|type:city(2,100,000)_region:FR-75C}} +| largest_city = capital +| languages_type = Official language
    {{Nobold|and national language}} +| languages = [[French language|French]]{{Efn-ur|name=one|For information about regional languages, see [[Languages of France]].}}{{Infobox|child=yes +| regional_languages = See [[Languages of France]] +| label1 = Nationality {{Nobold|(2021){{cite web |title=L'essentiel sur... les immigrés et les étrangers |url=https://www.insee.fr/fr/statistiques/3633212 |website=[[Insee]] |access-date=9 September 2023 |archive-date=26 June 2019 |archive-url=https://web.archive.org/web/20190626142004/https://www.insee.fr/fr/statistiques/3633212 |url-status=live }}}} +| data1 = {{Unbulleted list|92.2% [[French people|French]]|7.8% [[Demographics of France|other]]}}}} +| religion_ref = {{cite web|last1=Drouhot|first1=Lucas|last2=Simon|first2=Patrick|last3=Tiberj|first3=Vincent|url=https://www.insee.fr/fr/statistiques/fichier/6793308/IMMFRA23-D2.pdf|title=La diversité religieuse en France : transmissions intergénérationnelles et pratiques selon les origines|trans-title=Religious diversity in France: Intergenerational transmissions and practices according to the origins|publisher=[[Institut national de la statistique et des études économiques|National Institute of Statistics and Economic Studies]] (INSEE)|type=official statistics|date=30 March 2023|language=fr|archive-url=https://web.archive.org/web/20230330154402/https://www.insee.fr/fr/statistiques/fichier/6793308/IMMFRA23-D2.pdf|archive-date=30 March 2023}} +| religion_year = 2023 +| religion = Aged 18 - 59 {{ublist|item_style=white-space;|51% [[Irreligion|No Religion]]|38% [[Christianity]]|10% [[Islam]]|0.5% [[Judaism]]|0.5% [[Buddhism]]}} +| demonym = French +| government_type = Unitary [[Semi-presidential system|semi-presidential republic]] +| leader_title1 = [[President of France|President]] +| leader_name1 = [[Emmanuel Macron]] +| leader_title2 = [[Prime Minister of France|Prime Minister]] +| leader_name2 = [[Gabriel Attal]] +| leader_title3 = [[List of presidents of the Senate of France|President of the Senate]] +| leader_name3 = [[Gérard Larcher]] +| leader_title4 = [[List of presidents of the National Assembly of France|President of the National Assembly]] +| leader_name4 = [[Yaël Braun-Pivet]] +| legislature = [[French Parliament|Parliament]] +| upper_house = [[Senate (France)|Senate]] +| lower_house = [[National Assembly (France)|National Assembly]] +| sovereignty_type = [[History of France|Establishment]] +| established_event1 = [[West Francia|Kingdom of the West Franks]] – [[Treaty of Verdun]] +| established_date1 = 10 August 843 +| established_event2 = [[Kingdom of France]] – [[List of French monarchs#House of Capet (987–1792)|Capetian rulers of France]] +| established_date2 = 3 July 987 +| established_event3 = [[Proclamation of the abolition of the monarchy|French Republic]] – [[French First Republic]] +| established_date3 = 22 September 1792 +| established_event4 = [[Enlargement of the European Union#Founding members|Founded]] the [[European Economic Community|EEC]]{{Efn-ur|[[European Union]] since 1993}} +| established_date4 = 1 January 1958 +| established_event5 = [[Constitution of France|Current constitution]] – [[French Fifth Republic]] +| established_date5 = 4 October 1958 +| area_km2 = 643,801 +| area_footnote = {{Cite web |title=Field Listing :: Area |url=https://www.cia.gov/library/publications/the-world-factbook/fields/2147.html |url-status=dead |archive-url=https://web.archive.org/web/20140131115000/https://www.cia.gov/library/publications/the-world-factbook/fields/2147.html |archive-date=31 January 2014 |access-date=1 November 2015 |website=The World Factbook |publisher=CIA}} {{PD-notice}} +| area_rank = 42nd +| area_sq_mi = 248,600 +| area_label2 = [[Metropolitan France]] ([[Institut géographique national|IGN]]) +| area_data2 = {{Cvt|551695|km2}}{{Efn-ur|name=three|French [[Institut géographique national|National Geographic Institute]] data, which includes bodies of water}} ([[List of countries and dependencies by area|50th]]) +| area_label3 = Metropolitan France ([[Cadastre]]) +| area_data3 = {{Cvt|543940.9|km2}}{{Efn-ur|name=four|French [[Land registration|Land Register]] data, which exclude lakes, ponds and [[glacier]]s larger than 1 km2 (0.386 sq mi or 247 acres) as well as the estuaries of rivers}}{{Cite journal |year=2011 |title=France Métropolitaine |url=http://www.insee.fr/fr/themes/comparateur.asp?codgeo=METRODOM-1 |url-status=dead |journal=INSEE |archive-url=https://web.archive.org/web/20150828051307/http://www.insee.fr/fr/themes/comparateur.asp?codgeo=METRODOM-1 |archive-date=28 August 2015}} ([[List of countries and dependencies by area|50th]]) +| population_estimate = {{IncreaseNeutral}} 68,373,433{{Cite web |date=16 January 2023 |title=Bilan démographique 2023 – Composantes de la croissance démographique, France |url=https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-1 |access-date=2024-02-02 |website=Insee |archive-date=18 January 2024 |archive-url=https://web.archive.org/web/20240118223724/https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-1 |url-status=live }} +| percent_water = 0.86{{Cite web |title=Surface water and surface water change |url=https://stats.oecd.org/Index.aspx?DataSetCode=SURFACE_WATER |access-date=11 October 2020 |publisher=Organisation for Economic Co-operation and Development (OECD) |archive-date=24 March 2021 |archive-url=https://web.archive.org/web/20210324133453/https://stats.oecd.org/Index.aspx?DataSetCode=SURFACE_WATER |url-status=live }} +| population_estimate_year = January 2024 +| population_estimate_rank = 20th +| population_label2 = Density +| population_data2 = {{Pop density|68373433|643801|km2}} ([[List of countries and territories by population density|106th]]) +| population_label3 = Metropolitan France, estimate {{As of|lc=y|January 2024}} +| population_data3 = {{IncreaseNeutral}} 66,142,961{{Cite web |date=16 January 2024 |title=Bilan démographique 2023 – Composantes de la croissance démographique, France métropolitaine |url=https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-3 |access-date=2024-02-02 |website=Insee |archive-date=18 January 2024 |archive-url=https://web.archive.org/web/20240118223724/https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-3 |url-status=live }} ([[List of countries and dependencies by population|23rd]]) +| population_density_km2 = 122 +| population_density_sq_mi = 313 +| population_density_rank = 89th +| GDP_PPP = {{increase}} $3.988 trillion{{cite web |url=https://www.imf.org/en/Publications/WEO/weo-database/2024/April/weo-report?c=132,&s=NGDPD,PPPGDP,NGDPDPC,PPPPC,&sy=2022&ey=2029&ssm=0&scsm=1&scc=0&ssd=1&ssc=0&sic=0&sort=country&ds=.&br=1 |title=World Economic Outlook Database, April 2024 Edition. (France) |publisher=[[International Monetary Fund]] |website=www.imf.org |date=16 April 2024 |access-date=16 April 2024}} +| GDP_PPP_year = 2024 +| GDP_PPP_rank = 10th +| GDP_PPP_per_capita = {{increase}} $60,339 +| GDP_PPP_per_capita_rank = 26th +| GDP_nominal = {{increase}} $3.130 trillion +| GDP_nominal_year = 2024 +| GDP_nominal_rank = 7th +| GDP_nominal_per_capita = {{increase}} $47,359 +| GDP_nominal_per_capita_rank = 23rd +| Gini = 29.8 +| Gini_year = 2022 +| Gini_change = increase +| Gini_ref = {{Cite web |title=Gini coefficient of equivalised disposable income – EU-SILC survey |url=https://ec.europa.eu/eurostat/databrowser/view/tessi190/default/table?lang=en |access-date=25 November 2023 |website=ec.europa.eu |publisher=[[Eurostat]] |archive-date=9 October 2020 |archive-url=https://web.archive.org/web/20201009091832/https://ec.europa.eu/eurostat/databrowser/view/tessi190/default/table?lang=en |url-status=live }} +| HDI = 0.910 +| HDI_year = 2022 +| HDI_change = steady +| HDI_ref = {{cite web|url=https://hdr.undp.org/system/files/documents/global-report-document/hdr2023-24reporten.pdf|title=Human Development Report 2023/24|language=en|publisher=[[United Nations Development Programme]]|date=13 March 2024|page=288|access-date=13 March 2024|archive-date=13 March 2024|archive-url=https://web.archive.org/web/20240313164319/https://hdr.undp.org/system/files/documents/global-report-document/hdr2023-24reporten.pdf|url-status=live}} +| HDI_rank = 28th +| currency = {{Unbulleted list +| [[Euro]] ([[Euro sign|€]]) ([[ISO 4217|EUR]]){{Efn-ur|name=six|Whole of the except the overseas territories in the Pacific Ocean}} +| [[CFP franc]] (XPF){{Efn-ur|name=seven|French overseas territories in the Pacific Ocean only}} +}} +| time_zone = [[Central European Time]] +| utc_offset = +1 +| utc_offset_DST = +2 +| time_zone_DST = [[Central European Summer Time]]{{Efn-ur|name=eight|Daylight saving time is observed in metropolitan France and [[Saint Pierre and Miquelon]] only.}} +| DST_note = Note: Various other time zones are observed in overseas France.{{Efn-ur|name=nine|Time zones across the span from UTC−10 ([[French Polynesia]]) to UTC+12 ([[Wallis and Futuna]])}}
    Although France is in the [[Coordinated Universal Time|UTC]] (Z) ([[Western European Time]]) zone, [[UTC+01:00]] ([[Central European Time]]) was enforced as the standard time since 25 February 1940, upon [[German military administration in occupied France during World War II|German occupation in WW2]], with a +0:50:39 offset (and +1:50:39 during [[Daylight saving time|DST]]) from Paris [[Local mean time|LMT]] (UTC+0:09:21).{{Cite web |title=Time Zone & Clock Changes in Paris, Île-de-France, France |url=https://www.timeanddate.com/time/zone/france/paris |access-date=9 October 2021 |website=timeanddate.com |archive-date=23 October 2021 |archive-url=https://web.archive.org/web/20211023233753/https://www.timeanddate.com/time/zone/france/paris |url-status=live }} +| date_format = dd/mm/yyyy ([[Anno Domini|AD]]) +| drives_on = right +| calling_code = [[Telephone numbers in France|+33]]{{Efn-ur|name=eleven|The overseas regions and collectivities form part of the [[Telephone numbers in France|French telephone numbering plan]], but have their own country calling codes: [[Guadeloupe]] +590; [[Martinique]] +596; [[French Guiana]] +594; [[Réunion]] and [[Mayotte]] +262; [[Saint Pierre and Miquelon]] +508. The overseas territories are not part of the French telephone numbering plan; their country calling codes are: [[New Caledonia]] +687; [[French Polynesia]] +689; [[Wallis and Futuna]] +681.}} +| cctld = [[.fr]]{{Efn-ur|name=ten|In addition to [[.fr]], several other Internet TLDs are used in French overseas ''départements'' and territories: [[.re]], [[.mq]], [[.gp]], [[.tf]], [[.nc]], [[.pf]], [[.wf]], [[.pm]], [[.gf]] and [[.yt]]. France also uses [[.eu]], shared with other members of the European Union. The [[.cat]] domain is used in [[Catalan Countries|Catalan-speaking territories]].}} +| footnotes = Source gives area of metropolitan France as 551,500 km2 (212,900 sq mi) and lists overseas regions separately, whose areas sum to 89,179 km2 (34,432 sq mi). Adding these give the total shown here for the entire French Republic. [[The World Factbook]] reports the total as 643,801 km2 (248,573 sq mi). +| flag_p1 = Flag of France (1794–1815, 1830–1974, 2020–present).svg +}} + +'''France''',{{efn|{{IPA|fr|fʁɑ̃s|lang|LL-Q150 (fra)-Fhala.K-France.wav}}}} officially the '''French Republic''',{{efn|{{Lang-fr|link=no|République française}} {{IPA-fr|ʁepyblik fʁɑ̃sɛːz|}}}} is a country located primarily in [[Western Europe]]. It also includes [[Overseas France|overseas regions and territories]] in the [[Americas]] and the [[Atlantic Ocean|Atlantic]], [[Pacific Ocean|Pacific]] and [[Indian Ocean|Indian]] oceans,{{Efn-ur|name=twelve|[[French Guiana]] is in South America; [[Guadeloupe]] and [[Martinique]] are in the Caribbean Sea; and [[Réunion]] and [[Mayotte]] are in the Indian Ocean, off the coast of Africa. All five [[Administrative divisions of France|are considered integral parts of the French Republic]]. France also comprises [[Saint Pierre and Miquelon]] in North America; [[Saint Barthélemy]] and [[Saint Martin (island)|Saint Martin]] in the Caribbean; [[French Polynesia]], [[New Caledonia]], [[Wallis and Futuna]] and [[Clipperton Island]] in the Pacific Ocean; and the [[French Southern and Antarctic Lands]].}} giving it one of the largest discontiguous [[exclusive economic zone]]s in the world. [[Metropolitan France]] shares borders with [[Belgium]] and [[Luxembourg]] to the north, [[Germany]] to the north east, [[Switzerland]] to the east, [[Italy]] and [[Monaco]] to the south east, [[Andorra]] and [[Spain]] to the south, and a maritime border with the [[United Kingdom]] to the north west. Its metropolitan area extends from the [[Rhine]] to the Atlantic Ocean and from the [[Mediterranean Sea]] to the [[English Channel]] and the [[North Sea]]. Its overseas territories include [[French Guiana]] in [[South America]], [[Saint Pierre and Miquelon]] in the North Atlantic, the [[French West Indies]], and many islands in [[Oceania]] and the [[Indian Ocean]]. Its [[Regions of France|eighteen integral regions]] (five of which are overseas) span a combined area of {{Cvt|643801|km2}} and have a total population of 68.4 million {{As of|2024|January|lc=y}}. France is a [[Unitary state|unitary]] [[Semi-presidential system|semi-presidential]] [[republic]] with its capital in [[Paris]], the [[List of communes in France with over 20,000 inhabitants|country's largest city]] and main cultural and commercial centre; other major [[Urban area (France)|urban areas]] include [[Marseille]], [[Lyon]], [[Toulouse]], [[Lille]], [[Bordeaux]], [[Strasbourg]], [[Nantes]] and [[Nice]]. + +Metropolitan France was settled during the [[Iron Age]] by [[List of ancient Celtic peoples and tribes|Celtic tribes]] known as [[Gauls]] before [[Roman Gaul|Rome annexed the area]] in 51 BC, leading to a distinct [[Gallo-Roman culture]]. In the [[Early Middle Ages]], the [[Germanic peoples|Germanic]] [[Franks]] formed the Kingdom of [[Francia]], which became the heartland of the [[Carolingian Empire]]. The [[Treaty of Verdun]] of 843 partitioned the empire, with [[West Francia]] evolving into the [[Kingdom of France]]. In the [[High Middle Ages]], France was a powerful but decentralized [[Feudalism|feudal]] kingdom, but from the mid-14th to the mid-15th centuries, France was plunged into a dynastic conflict with [[Kingdom of England|England]] known as the [[Hundred Years' War]]. In the 16th century, the [[French Renaissance]] saw culture flourish and a [[French colonial empire]] rise.{{Cite book |url=https://books.google.com/books?id=UX8aeX_Lbi4C&pg=PA1 |title=Memory, Empire, and Postcolonialism: Legacies of French Colonialism |publisher=Lexington Books |year=2005 |isbn=978-0-7391-0821-5 |editor-last=Hargreaves, Alan G. |page=1}} Internally, France was dominated by the conflict with the [[House of Habsburg]] and the [[French Wars of Religion]] between [[Catholics]] and [[Huguenots]]. France was successful in the [[Thirty Years' War]] and further increased its influence during the reign of [[Louis XIV]].{{Cite book |last1=R.R. Palmer |url=https://archive.org/details/historyofmodernw00palm |title=A History of the Modern World |last2=Joel Colton |year=1978 |edition=5th |page=[https://archive.org/details/historyofmodernw00palm/page/161 161] |url-access=registration}} + +The [[French Revolution]] of 1789 overthrew the {{Lang|fr|[[Ancien Régime]]|italic=no}} and produced the [[Declaration of the Rights of Man and of the Citizen|Declaration of the Rights of Man]], which expresses the nation's ideals to this day. France reached its political and military zenith in the early 19th century under [[Napoleon|Napoleon Bonaparte]], subjugating part of continental Europe and establishing the [[First French Empire]]. The [[French Revolutionary Wars|French Revolutionary]] and [[Napoleonic Wars]] significantly shaped the course of European history. The collapse of the empire initiated a period of relative decline, in which France endured the [[Bourbon Restoration]] until the founding of the [[French Second Republic]] which was succeeded by the [[Second French Empire]] upon [[Napoleon III]]'s takeover. His empire collapsed during the [[Franco-Prussian War]] in 1870. This led to the establishment of the [[Third French Republic]] Subsequent decades saw a period of economic prosperity and cultural and scientific flourishing known as the [[Belle Époque]]. France was one of the [[Triple Entente|major participants]] of [[World War I]], from which [[Treaty of Versailles|it emerged victorious]] at great human and economic cost. It was among the [[Allies of World War II|Allied powers]] of [[World War II]], but it surrendered and [[German military administration in occupied France during World War II|was occupied]] by the [[Axis powers|Axis]] in 1940. Following [[Liberation of France|its liberation in 1944]], the short-lived [[French Fourth Republic|Fourth Republic]] was established and later dissolved in the course of the defeat in the [[Algerian War]] and [[Rif War|Moroccan War of Independence]]. The current [[French Fifth Republic|Fifth Republic]] was formed in 1958 by [[Charles de Gaulle]]. Algeria and most French colonies became independent in the 1960s, with the majority retaining [[Françafrique|close economic and military ties with France]]. + +France retains its centuries-long status as a global centre [[French art|of art]], [[Science and technology in France|science]], and [[French philosophy|philosophy]]. [[List of World Heritage Sites in France|It hosts]] the [[World Heritage Sites by country|third-largest]] number of [[UNESCO]] [[World Heritage Site]]s and is the world's [[World Tourism rankings|leading tourist destination]], receiving over 89 million foreign [[Tourism in France|visitors in 2018]].{{Cite web |date=17 May 2019 |title=France posts new tourist record despite Yellow Vest unrest |url=https://www.france24.com/en/20190517-france-tourism-record-number-visitors-tourists-despite-yellow-vests-paris |website=France 24 |access-date=18 May 2024 |archive-date=12 May 2023 |archive-url=https://web.archive.org/web/20230512192740/https://www.france24.com/en/20190517-france-tourism-record-number-visitors-tourists-despite-yellow-vests-paris |url-status=live }} France is a [[developed country]] with a [[List of countries by GDP (nominal) per capita|high nominal per capita income globally]] and [[Economy of France|its advanced economy]] ranks among the [[List of countries by GDP (nominal)|largest in the world]]. It is a [[great power]] in global affairs,Jack S. Levy, ''War in the Modern Great Power System, 1495–1975'', (2014) p. 29 being one of the five [[permanent members of the United Nations Security Council]] and an official [[List of states with nuclear weapons|nuclear-weapon state]]. France is a founding and [[Big Four (Western Europe)|leading]] [[Member state of the European Union|member of the European Union]] and the [[eurozone]], as well as a key member of the [[Group of Seven]], [[NATO|North Atlantic Treaty Organization]] (NATO), [[OECD|Organisation for Economic Co-operation and Development]] (OECD), and [[Organisation internationale de la Francophonie|Francophonie]]. + +==Etymology and pronunciation== +{{Main|Name of France}} +Originally applied to the whole [[Francia|Frankish Empire]], the name ''France'' comes from the [[Latin]] {{Lang|la|[[Francia]]}}, or "realm of the [[Franks]]".{{Cite web |title=History of France |url=http://www.discoverfrance.net/France/History/DF_history.shtml |url-status=dead |archive-url=https://web.archive.org/web/20110824051936/http://www.discoverfrance.net/France/History/DF_history.shtml |archive-date=24 August 2011 |access-date=17 July 2011 |publisher=Discoverfrance.net}} Modern France is still named today {{Lang|it|Francia}} in Italian and Spanish, while {{Lang|de|Frankreich}} in German, {{Lang|nl|Frankrijk}} in Dutch and {{Lang|sv|Frankrike}} in Swedish and Norwegian all mean "Land/realm of the Franks". + +The [[name of the Franks]] is related to the English word ''frank'' ("free"): the latter stems from the [[Old French]] {{Lang|ang|franc}} ("free, noble, sincere"), ultimately from [[Medieval Latin]] ''francus'' ("free, exempt from service; freeman, Frank"), a generalisation of the tribal name that emerged as a [[Late Latin]] borrowing of the reconstructed [[Frankish language|Frankish]] [[Endonym and exonym|endonym]] {{Lang|frk|*Frank}}.Examples: {{Cite encyclopedia |title=frank |encyclopedia=American Heritage Dictionary}} {{Cite encyclopedia|title=frank|encyclopedia=Webster's Third New International Dictionary}} And so on. It has been suggested that the meaning "free" was adopted because, after the conquest of [[Gaul]], only Franks were free of taxation,{{Cite book |first=Michel |last=Rouche |title=A History of Private Life: From Pagan Rome to Byzantium |publisher=Belknap Press |year=1987 |isbn=978-0-674-39974-7 |editor-first=Paul |editor-last=Veyne |page=425 |chapter=The Early Middle Ages in the West |oclc=59830199}} or more generally because they had the status of freemen in contrast to servants or slaves. The etymology of ''*Frank'' is uncertain. It is traditionally derived from the [[Proto-Germanic language|Proto-Germanic]] word {{Lang|gem-x-proto|frankōn}}, which translates as "javelin" or "lance" (the throwing axe of the Franks was known as the ''[[francisca]]''),{{Cite book |last1=Tarassuk |first1=Leonid |url=https://books.google.com/books?id=UJbyPwAACAAJ |title=The Complete Encyclopedia of Arms and Weapons: the most comprehensive reference work ever published on arms and armor from prehistoric times to the present with over 1,250 illustrations |last2=Blair |first2=Claude |publisher=Simon & Schuster |year=1982 |isbn=978-0-671-42257-8 |page=186 |access-date=5 July 2011}} although these weapons may have been named because of their use by the Franks, not the other way around.{{Cite web |title=Origin and meaning of Frank |url=https://www.etymonline.com/word/frank |website=Online Etymology Dictionary |language=en |access-date=18 May 2024 |archive-date=15 May 2024 |archive-url=https://web.archive.org/web/20240515001926/https://www.etymonline.com/word/frank |url-status=live }} + +In English, 'France' is pronounced {{IPAc-en|f|r|æ|n|s}} {{Respell|FRANSS}} in American English and {{IPAc-en|f|r|ɑː|n|s}} {{Respell|FRAHNSS}} or {{IPAc-en|f|r|æ|n|s}} {{Respell|FRANSS}} in British English. The pronunciation with {{IPAc-en|ɑː}} is mostly confined to accents with the [[Trap–bath split|trap-bath split]] such as [[Received Pronunciation]], though it can be also heard in some other dialects such as [[Cardiff English]], in which {{IPAc-en|f|r|ɑː|n|s}} is in free variation with {{IPAc-en|f|r|æ|n|s}}.{{Cite book |last=Wells |first=John C. |title=Longman Pronunciation Dictionary |publisher=Longman |year=2008 |isbn=978-1-4058-8118-0 |edition=3rd}}; {{Cite book|last1=Collins|first1=Beverley|last2=Mees|first2=Inger M.|editor-last1=Coupland|editor-first1=Nikolas|editor-last2=Thomas|editor-first2=Alan Richard|year=1990|title=English in Wales: Diversity, Conflict, and Change|chapter=The Phonetics of Cardiff English|publisher=Multilingual Matters Ltd.|page=96|isbn=978-1-85359-032-0|url=https://books.google.com/books?id=tPwYt3gVbu4C}} diff --git a/samples/Polar Bears a Complete Guide.txt b/samples/Polar Bears a Complete Guide.txt new file mode 100644 index 0000000..19dcaed --- /dev/null +++ b/samples/Polar Bears a Complete Guide.txt @@ -0,0 +1,2 @@ +BOOK REVIEWJournal of Mammalogy, 94(1):243–244, 2013Ó2013 American Society of MammalogistsDerocher, A. E., and W. Lynch. 2012. POLAR BEARS:ACOMPLETE GUIDE TO THEIR BIOLOGY AND BEHAVIOR. JohnsHopkins University Press, Baltimore, Maryland, 264 pp. ISBN978-1-4214-0305-2, price (hardbound), $39.95.When I 1st became interested in mammals at about the ageof 8, my neighbor Gordon True—a botanist at CaliforniaAcademy of Sciences—had a book on his desk by KarlKenyon (1953), The Seals, Sea-Lions, and Sea Otter of thePacific Coast. I read through it as best I could and wastransformed. A few years later I read it again, adding (Kenyon1969) The Sea Otter in the Eastern Pacific Ocean and VictorScheffer’s (1972) The Year of the Seal and later The Year of theWhale (Scheffer 1984). I was fascinated by the interplaybetween these animals and a major predator of theirs, the polarbear.During the early 1970s many books appeared by naturalistsand ecologists that not only documented important naturalhistory stories, but also told of how humans had affected thelives of other animals and entire biomes. Greenpeace hit thehigh seas and cruised into the Arctic bringing untold stories ofslaughter of marine mammals, especially whales. As more andmore books hit the shelves, new chapters appeared on ‘‘humaninteractions,’’ and ‘‘threats,’’ and ‘‘ will they survive?’’ Wesometimes forget—as even the 1% start to admit that climatechange is a problem—that there are many stories still to tell. Infact, there are many stories yet to be discovered, especially byseasoned, trained observers such as those documented in PolarBears: A Complete Guide to Their Biology and Behavior byauthors Andrew Derocher and Wayne Lynch.And the story told in text (Derocher) and photographs(Lynch) is one that Scheffer and Kenyon would have likelyloved to have seen and read. A more proximate reason thisbook attracted my attention was its beautiful design, rich colorof the photographs, and seemingly broad (not necessarily deep)treatment of polar bear biology. There are 14 chapters thatbegin with introductory remarks about polar bears and theirevolution (chapters 1–3). A chapter of Early Human–BearInteractions starts off the book along with chapters on ArcticMarine Ecosystems, and Sea Ice and Habitat (chapters 4, 5,and 6, respectively). Then, Distribution and Populations(chapter 8) is tucked between Prey (chapter 7) and HuntingMethods followed by Polar Bear Behavior (chapters 9 and 10,respectively). The last 4 chapters are Den Ecology (chapter 11),From Birth to Death (chapter 12), Threats (chapter 13), andLooking Forward (chapter 14). There are convenient appen-dixes on scientific names of plants and animals and thingseaten by polar bears, and both an extensive list of referencesand a comprehensive index.Let me step back a bit and provide a bit more depth aboutthis book. It is printed on nice, high-quality paper, and at 1stglance appears as a ‘‘coffee-table book.’’ However the contentis both detailed and provocative because there is nopussyfooting around here because the authors are very clearin reminding us of the peril polar bears face as Arctic icedisappears. It is disappearing, and so will the bears. But as allof us know as scientists, this isn’t just a, ‘‘those poor polarbears’’ story, it also tells the story of the impact on allmammals, and our own future as well. But as for the authors,they may be the most fortunate ones, because as stated on page6, ‘‘There are few polar bear biologists in the world and eachlives in one of the five polar nations that have polar bears undertheir jurisdiction.’’ Five; and these folks passionately fight toget the word out to the rest of the planet.Basic natural history is the stuff of great tales. Even for thoseof you who are seasoned mammalogy course instructors, youwill find yourself saying to yourself, ‘‘ Huh! I didn’t knowthat!’’ For instance, overall bear health can be spotted by theirprofile as they walk, and bears have good night vision (goodfor long polar nights) to see other animals on the ice beforethey are seen—but they likely would have smelled the otheranimal before that. Ageing by sectioning teeth, adaptations forwalking on ice, highly lobulated kidneys to deal withconsumption of salt water, lack of hibernation, denning, andother ‘‘cool’’ things about living in the polar region are alldiscussed; a teaser for more details to come. For instance,details of den ecology (chapter 11; accompanied by beautifulphotographs) provides incredible information that for thesebears only a very few specific locations—such as WrangelIsland—are immensely important for bear denning, and hencethe production of bear cubs. This locality is of the highestimportance to survival of polar bears because of this fecundity.The next section (chapters 3–5) provide clear explanations ofbear evolution, human–bear interactions, and the ability ofbears to prosper in an ice–marine ecosystem. The authors evenhighlight natural hybrids between polar bears and grizzlies(‘‘grolar’’ or ‘‘ pizzly’’ bears—either way, they say love isblind). As with these types of books, there is a fascinatingsection on polar bears in mythology ending with a discussionof Inuit people and culture, who are linked to polar bearsthrough stories, folktales, taboos, and hunting. The discussionof the Arctic ecosystem is very brief (4 pages) and could havebeen combined with the following chapter on sea ice andhabitat (chapter 6). The chapter on prey (chapter 7) is extensiveand very good. Focus of the discussion in this chapter is ofpolar bear prey and the little ‘‘packaged meals’’ represented thatdot the ice as a large variety of seals and their kin. Ringed sealsare most abundant and easily devoured, whereas walrus aredangerous for bears to hunt. But discussion also extends toother ‘‘meal-deals’’ such as carrion, whales, birds, and evencrowberries (genus Empetrum) and seaweed as dietary items.Chapter 8 discusses distributions and populations and whythere are no polar bears in Antarctica. But the fascinatingwww.mammalogy.org243 +aspect of this part is the ‘‘polar bear research in action.’’ AuthorDerocher does an excellent job taking the reader there indiscovering the 19 recognized populations of polar bears and inestimating abundance (some populations have not yet beenestimated and their numbers are unknown). Each of thepopulations is discussed as well as their status and threats.Chapters 9 and 10—discussions of hunting and their generalbehaviors—could have been either combined or linked closerto chapter 6 or chapter 7. Chapter 12 is beautiful, the closest Ihave seen to Victor Scheffer’s (1972) The Year of the Seal.Opening this chapter in the Arctic summer as a cue thatpregnant females need to start finding a den begins the ‘‘ year.’’Once cubs are born—and twins are common and there isadoption of lone cubs—it takes about 2 years for them to huntand head out on their own. Swimming and moving around theirrange, competition between siblings, and techniques in huntingand defense take up much of the time with the mother. There issome information on disease, parasites, and adult survivalrates, but this could have been expanded by including resultsfrom more studies. Although there are plenty of pictures ofbears eating things, there are not any of bears being killed inthis chapter; that is for the last substantive section, chapter 13,on threats to bears.Among the threats, there are harvests by local people usingtraditional methods, but also annual quota of a legal killing ofpolar bears. Incredibly, so far removed from our industrialworld, polar bears are uniformly affected by waste aerial andparticulate pollutants that industrialized nations dump into theglobal environment. Because bear distribution is on top of theworld and they are top consumers on a cascade of prey andwhere air and water currents carry much polluting chemicalsand toxins to their world, there is no such thing as anunpolluted bear. And we have heard this story before. KenGeluso et al. (1976) described the concentration of organo-chlorine residue from the widespread use of DDT pesticides aslactating free-tailed bats (Tadarida brasiliensis) nursed theiryoung. The yearling bats, after being weaned and beginningflight, would fall dead after these pollutants that had beenconcentrated in their fat stored from lactation were metabolizedand poisoned them. The same may be happening in polar bearsas cubs mature and begin to use up their fat reserves.Derocher says that, ‘‘heavy metals mercury, lead, cadmium,and selenium are all found in polar bears. Some heavy metalsoccur naturally; others are released from metal smelters,cement production, and fossil fuel burning. Coal releases themost mercury.’’ Sure, we might as well poison the earththoroughly before melting the ice during global environmentalchanges! But wait, there’s more! As sea ice melts, there is moreand more talk about drilling for oil in the Arctic. There areactive rigs there now. A large oil spill would have seriousconsequences for polar bears that seem ill-equipped forescaping polluted waters and feeding on oil-soaked prey, andit is unlikely that rescue workers would have much successwashing them with dish soap as was done to pelicans in theBP–Gulf oil spill for more than 3 months in 2010.Clearly global change—in particular warming of theArctic—will drastically reduce bear numbers if not eliminatepolar bears completely. As the authors pen in the last, short,poignant chapter 14, evolutionarily, polar bears have onlyexisted ‘‘a few moments.’’ To help save the ice bears, theauthors implore us to learn more, get involved, and work toaddress the issues threatening bear and human lives. Think ofall the nonsense we surround ourselves with. That is all just tocompensate for what a polar bear can do on its own—raw–engaged—living to which it is adapted. This is a beautifulbook, a fascinating subject, and a turning point in our humanevolution and relationship with the natural world. Get the book,then go out and do something.—WILLIAM L. GANNON,Office of the Vice President forResearch, Department of Biology, and Museum ofSouthwestern Biology, MSC05 3480, University of NewMexico, Albuquerque, NM 87131, USA; e-mail:wgannon@unm.edu.LITERATURE CITEDGELUSO, K. N., J. S. ALTENBACH,AND D. E. WILSON. 1976. Batmortality: pesticide poisoning and migratory stress. Science, NewSeries 4261, 194:184–186.KENYON, K. W. 1953. The seals, sea-lions, and sea otter of the Pacificcoast. United States Fish and Wildlife Service, published byUniversity of California Press, Berkeley.KENYON, K. W. 1969. The sea otter in the eastern Pacific Ocean.United States Department of the Interior, North American Fauna68:i–xiii þ1–352.SCHEFFER, V. B. 1972. The year of the seal. Charles Scribner’s Sons,New York.SCHEFFER, V. B. 1984. The year of the whale. Simon & Schuster, NewYork. \ No newline at end of file diff --git a/samples/ReActPrompts.txt b/samples/ReActPrompts.txt new file mode 100644 index 0000000..602e010 --- /dev/null +++ b/samples/ReActPrompts.txt @@ -0,0 +1,67 @@ +Sure, here are 10 ReAct queries that would use at least 4 agents in some sequence: + +1. **Query**: "Generate a research outline on the effects of climate change on polar bear populations, including recent studies, statistical data, and a summary of findings." + - Agents: Research Paper Finder, Data Summarizer, Statistical Analysis, Research Outline Generator + +2. **Query**: "Create a literature review on machine learning applications in healthcare, with a summary of each paper and a keyword extraction for the main topics." + - Agents: Research Paper Finder, Literature Review Organizer, Data Summarizer, Keyword Extractor + +3. **Query**: "Design an experimental study on the impact of social media on teenagers' mental health, including hypothesis generation, methodology advice, and survey questions." + - Agents: Hypothesis Generator, Methodology Advisor, Experimental Design Helper, Survey Designer + +4. **Query**: "Find funding opportunities for a research project on renewable energy technologies, generate a thesis statement, and create a bibliography for the related literature." + - Agents: Funding Finder, Research Paper Finder, Thesis Statement Generator, Bibliography Manager + +5. **Query**: "Analyze the statistical significance of data from a survey on remote work productivity, generate graphs, and provide a summary of the findings." + - Agents: Data Cleaning Agent, Statistical Analysis, Graph Generator, Data Summarizer + +6. **Query**: "Create a comprehensive research outline on the impact of diet on cardiovascular health, including a literature review, data summary, and keyword extraction." + - Agents: Research Paper Finder, Research Outline Generator, Data Summarizer, Keyword Extractor + +7. **Query**: "Design a study on the effectiveness of different teaching methods in elementary education, including hypothesis generation, methodology advice, and statistical analysis." + - Agents: Hypothesis Generator, Methodology Advisor, Experimental Design Helper, Statistical Analysis + +8. **Query**: "Generate a citation for a recent article on quantum computing, summarize the article, and include it in a bibliography for a related research project." + - Agents: Citation Generator, Research Paper Finder, Data Summarizer, Bibliography Manager + +9. **Query**: "Develop a research proposal on the socio-economic impacts of urbanization, including literature review, hypothesis generation, and data summary." + - Agents: Research Paper Finder, Literature Review Organizer, Hypothesis Generator, Data Summarizer + +10. **Query**: "Create a detailed research outline for a study on the psychological effects of video games on adolescents, including a survey design and a review of recent studies." + - Agents: Research Paper Finder, Research Outline Generator, Survey Designer, Literature Review Organizer + +These queries are designed to invoke multiple agents in a sequence, allowing you to test the integration and functionality of your ReAct system effectively. + +Sure, here are ten more queries that utilize the ReAct paradigm, showcasing the various agents and tasks: + +1. **Query**: Develop a comprehensive research plan to investigate the impact of climate change on polar bear populations, including data analysis and a visualization of temperature trends. + - **Agents/Tasks**: Research Paper Finder, Data Summarizer, Statistical Analysis, Graph Generator, Research Outline Generator. + +2. **Query**: Create a detailed marketing plan for a new tech startup, including a competitor analysis, keyword extraction, and a list of potential investors. + - **Agents/Tasks**: Research Paper Finder, Keyword Extractor, Data Summarizer, Funding Finder, Research Outline Generator. + +3. **Query**: Design a user study to evaluate the usability of a new mobile app, including survey questions, statistical analysis, and a literature review on similar studies. + - **Agents/Tasks**: Survey Designer, Research Paper Finder, Literature Review Organizer, Statistical Analysis, Research Outline Generator. + +4. **Query**: Compile a comprehensive report on the latest advancements in renewable energy technologies, including citations and a summary of key findings. + - **Agents/Tasks**: Research Paper Finder, Citation Generator, Data Summarizer, Research Outline Generator, Keyword Extractor. + +5. **Query**: Plan an experimental study on the effects of sleep deprivation on cognitive performance, including a survey design and a review of relevant methodologies. + - **Agents/Tasks**: Research Paper Finder, Survey Designer, Methodology Advisor, Research Outline Generator, Experimental Design Helper. + +6. **Query**: Create a systematic review of the literature on artificial intelligence in healthcare, including keyword extraction and a summary of major trends. + - **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Keyword Extractor, Data Summarizer, Research Outline Generator. + +7. **Query**: Develop a thesis proposal on the economic impact of remote work, including a literature review, hypothesis generation, and a bibliography. + - **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Hypothesis Generator, Research Outline Generator, Bibliography Manager. + +8. **Query**: Generate a detailed report on the effects of social media on mental health, including data cleaning, statistical analysis, and visualization of the findings. + - **Agents/Tasks**: Research Paper Finder, Data Cleaning Agent, Statistical Analysis, Graph Generator, Data Summarizer. + +9. **Query**: Design a comprehensive survey to study consumer preferences for electric vehicles, including a methodology overview and a review of similar studies. + - **Agents/Tasks**: Survey Designer, Research Paper Finder, Methodology Advisor, Literature Review Organizer, Research Outline Generator. + +10. **Query**: Create a funding proposal for a project on sustainable agriculture practices, including a literature review, a list of potential funding sources, and a bibliography. + - **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Funding Finder, Research Outline Generator, Bibliography Manager. + +These queries utilize multiple agents and tasks to demonstrate the ReAct paradigm, highlighting the interplay between different agents and the sequential nature of the process. \ No newline at end of file diff --git a/samples/State of Polar Bears.txt b/samples/State of Polar Bears.txt new file mode 100644 index 0000000..115fd06 --- /dev/null +++ b/samples/State of Polar Bears.txt @@ -0,0 +1,356 @@ +THE STATE OF THE +POLAR BEAR REPORT +2023 +The Global Warming Policy Foundation +Briefing 67 +Susan Crockford +The State of the Polar Bear Report 2023 +Susan Crockford +Briefing 67, The Global Warming Policy Foundation +© Copyright 2024, The Global Warming Policy Foundation +iii +Contents +About the author iii +Foreword v +Executive summary v +1. Introduction 1 +2. Conservation status 1 +3. Population trends 5 +4. Habitat and primary productivity 6 +5. Human/bear interactions 11 +6. Discussion 14 +Bibliography 16 +Notes 24 +About the Global Warming Policy Foundation 26 +About the author +Dr Susan Crockford is an evolutionary biologist and has been working for more than 40 years in +archaeozoology, paleozoology and forensic zoology.1 + She is a former adjunct professor at the +University of Victoria, British Columbia and works full time for a private consulting company she +co-owns (Pacific Identifications Inc). She is the author of Eaten: A Novel (a science-based polar bear +attack thriller), Polar Bear Facts and Myths (for ages seven and up, also available in French, German, +Dutch, Portuguese, and Norwegian), Polar Bears Have Big Feet (for preschoolers), and several +fully referenced books including, Polar Bear Evolution: A Model for the Origin of Species, Sir David +Attenborough and the Walrus Deception, The Polar Bear Catastrophe That Never Happened, and Polar +Bears: Outstanding Survivors of Climate Change,2 as well as a scientific paper on polar bear conservation status and a peer-reviewed paper on the distribution of ancient polar bear remains.3 + She has +authored several earlier briefing papers, reports, and videos for GWPF, as well as opinion pieces for +major news outlets, on polar bear and walrus ecology and conservation.4 + Susan Crockford blogs +at www.polarbearscience.com. + +v +Foreword +This report is intended to provide +a brief update on the habitat and +conservation status of polar bears, +with commentary regarding inconsistencies and sources of bias found +in recent literature that won’t be +found elsewhere. It is a summary +of the most recent information +on polar bears, relative to historical records, based on a review of +2023 scientific literature and media +reports, and, in places, reiterates or +updates information provided in +previous papers. This publication +is intended for a wide audience, +including scientists, teachers, +students, decision-makers, and +members of the general public +interested in polar bears and the +Arctic sea ice environment. +Executive summary +2023 marked 50 years of international cooperation to protect +polar bears across the Arctic. +Those efforts should be hailed as +a conservation success story: from +late-1960s population estimate by +the US Fish and Wildlife Service of +about 12,000 individuals, numbers +have almost tripled, to just over +32,000 in 2023 (with a wide range +of potential error for both estimates). +• There were no reports from +the Arctic in 2023 indicating polar +bears were being harmed due to +lack of suitable habitat, in part +because Arctic sea ice in summer +has not declined since 2007. +• Contrary to expectations, a +study in Svalbard found a decrease +in polar bears killed in defense of +life or property over the last 40 +years, despite profound declines in +sea ice over the last two decades. +• A survey of Southern Hudson +Bay polar bears in 2021 showed +an astonishing 30% increase over +five years, which adds another 223 +bears to the global total. +• A concurrent survey of Western +Hudson Bay polar bears in 2021 +showed that numbers had not +declined since 2011, which also +means they have not declined +since 2004. Movement of polar +bears across the boundaries with +neighbouring subpopulations +may account for the appearance +of a decline, when none actually +occurred. +• The IUCN Polar Bear Specialist +Group has ignored a 2016 recommendation that the boundaries +of three Hudson Bay subpopulations (Western HB, Southern HB, +and Foxe Basin) be adjusted to +account for genetic distinctiveness +of bears inhabiting the Hudson Bay +region. A similar boundary issue +in the western Arctic between the +Chukchi Sea, and the Southern and +Northern Beaufort subpopulations, +based on known movements of +bears between regions, has been +acknowledged since 2014 but has +not yet been resolved. +• The US Fish and Wildlife Service +and the IUCN Polar Bear Specialist +Group, in their 2023 reports, failed +to officially acknowledge the newfound South-East Greenland bears +as the 20th subpopulation, despite +undisputed evidence that this is a +genetically distinct and geographically isolated group. Numbers are +estimated at 234 individuals. + +1 +1. Introduction +Fifty years ago, on 15 November 1973, the five Arctic nations of +Canada, Russia, the USA, Norway and Greenland signed an international treaty to protect polar bears against the rampant overhunting +that had taken place in the first half of the 20th century, and which +had decimated many subpopulations. The treaty was effective, and +by the late 1990s, polar bear populations that could be studied had at +least doubled, making it a huge conservation success story. However, +in 2009, the wording of the treaty was amended to protect the bears +against on-going and future loss of sea ice habitat , which was +assumed to be caused by human-generated greenhouse gas emissions. This was in line with similar declarations by the International +Union for the Conservation of Nature (IUCN) and the US Endangered +Species Act (USESA). These pessimistic conservation assessments, +based on computer-modelled future declines rather than observed +conditions, have been upheld ever since, even as the predicted relationship between polar bear survival and sea-ice loss has failed to +emerge in the observational data.5 +The current population of polar bears is large, and their historical range has not diminished due to habitat loss since 1979. Indeed, +previously inhabited areas have been recolonised as numbers have +recovered: recent data suggest that territory in Davis Strait used before +1970 during the summer ice-free period – by all ages and by pregnant +females for maternity denning – is now being used once again.6. +2. Conservation status +The IUCN, in their 2015 Red List assessment, provided by the Polar +Bear Specialist Group (PBSG), again listed polar bears as ‘vulnerable’ +to extinction, just as it did in 2006. Similarly, in 2023 the US Fish and +Wildlife Service (USFWS) upheld its 2008 conclusion that polar bears +were ‘threatened’ with extinction. In both instances, conservation +status assessments have been based on computer-modelled predictions of future sea-ice conditions and assumed resultant population +declines rather than current conditions.7 +In Canada, the 2018 COSEWIC report assigned a status of ‘special +concern’ to the species. This assessment had not changed by 2023.8 +3. Population size at 2023 +Global +The latest official estimate for the global population, from 17 October +2023, is the PBSG estimate of 26,000 (range 22,000–31,000), arrived +at in 2015 and unadjusted since then. In their 2023 assessment, the +PBSG has failed to increase this estimate, even to account for undisputed, statistically-significant increases in two subpopulations and +the addition of a newly-discovered subpopulation, which should reasonably boost their very conservative mid-point estimate to about +26,600: Kane Basin is up by 133, Southern Hudson Bay is up by 223, +and newly-discovered SE Greenland adds another 234.9 +2 +However, survey results postdating preparation of the 2015 +assessment, including those made public after July 2021 (for +Davis Strait, Chukchi Sea, SE Greenland, Western Hudson Bay, and +Southern Hudson Bay), plausibly brought the mid-point total to +just over 32,000 (Figure 1).10 +A plan to survey all Russian polar bear subpopulations between +2021 and 2023 seems to have been put on hold. In addition, an +abundance estimate for the Viscount Melville subpopulation in the +western Canadian Arctic has still not been released, even though +a three-year survey completed in 2014 has produced other published data.11 Surveys of Lancaster Sound and East Greenland +were completed in spring 2023, and these results, when released, +may put the global population mid-point estimate well above +32,000.12 While there is a wide margin of potential error attached +to this number, it is nowhere near the figure of 7,493 (6,660–8,325), +implicit in the prediction that two thirds of the global population +of polar bears would be gone by now, given the sea ice conditions +prevailing since 2007.13 +Note that the 2023 USFWS Status Report did not include the +Kara Sea estimate of 3,200 bears, the Laptev Sea estimate of 1,000 +bears, or the East Greenland estimate of 2,000 bears, figures that +were used for the 2015 IUCN assessment. It also used the lowest +of three available 2016 estimates for the Chukchi Sea, as discussed +in the State of the Polar Bear Report 2021. +14 Although the USFWS +report was published in August 2023, it also did not include results +of the 2021 assessments of the Western and Southern Hudson +Bay subpopulations that were published in November 2022, or +the newly-discovered subpopulation of South East Greenland +reported in June 2022.15. +Figure1: Estimates of the +global polar bear population, 1960 to date. +60 +40 +20 +0 +1960 +000s +1980 2000 2020 +3 +Subpopulation survey results published in 2022 +For detailed discussions of the changing status and abundance +issues over time for all 19 subpopulations, see the State of the Polar +Bear Report 2018. +16 +Western Hudson Bay (WH) +An aerial survey conducted in September 2021 generated a new +subpopulation estimate of 618 (range 385–852), an apparent +decline of 27% since the 2016 estimate of 842 (range 562–1121) +and about a 40% decline from a 2011 estimate of 949 (range 618– +1280) that used similar survey methods. However, the WH 2021 +report authors stated categorically that this apparent decline since +2011 was not statistically significant, in part due to evidence that +some bears moved into neighbouring subpopulations combined +with the large margins of error. While it seems inconceivable that +a decline of 40% over 10 years could be statistically insignificant, +recall that a similar conclusion was reached in 2015 regarding the +42% increase in abundance of Svalbard bears. Since the estimate +calculated in 2004 was 935 (range 794–1076), it seems the abundance of WH polar bears has not changed since 2004.17 Note that +a more comprehensive survey was conducted in 2011, generating +an estimate of 1030 (range 754–1406). This became the official WH +estimate used by the PBSG.18 +Given the conclusions of the 2021 survey that the 2016 and 2021 +estimates were not statistically different from the 2011 estimate, it +appears that the 2021 comprehensive estimate of 1030 may still +be the most valid figure for WH. +The 2021 WH survey authors also made it clear that the most +recent population estimate was not associated with poor ice conditions, since sea-ice levels had been as good as the 1980s in four +out of the five years between 2017 and 2021. Confoundingly, they +could not explain why adult females and subadults were underrepresented in the population. +No evidence was provided for lack of prey, and although +emigration to neighbouring Southern Hudson Bay was largely +dismissed as an explanation, the possibility of a movement north +into Foxe Basin was not explored. +This is odd, since a 2016 genetic study suggested that the +northern boundary for WH polar bears should be moved to the +north of Southampton Island (a major denning area currently +included in FB) and the SH boundary to the north of Akimiski +Island in James Bay, adding the entire southern Hudson Bay coast +in Ontario, as well as the Belcher Islands, to WH (currently included +in SH), leaving only James Bay to represent SH.19 +In 2023, the PBSG indicated the WH subpopulation was ‘likely +decreasing’, based on the 2021 estimate of 618 bears. However, +they did not include the caveat from the survey report that this +apparent decline was not statistically significant, and also did not +incorporate the conclusion of the 2022 Canadian Polar Bear Technical +Committee (PBTC) that indigenous knowledge (IK) assessed this +subpopulation as ‘increased’. Similarly, the 2023 assessment by the +4 +USFWS listed WH as ‘likely decreased’, based on the 2016 survey +only (2021 survey results were not included). It acknowledged +that in 2022 IK considered this subpopulation to have ‘increased’.20 +Southern Hudson Bay (SH) +An aerial survey conducted in September 2021 generated a new +subpopulation estimate of 1119 (range 860–1454), which represented a 30% increase over five years. The result was considered +robust, and reflective of the true size of the population. However, +another estimate, of 1003 (range 773–1302), was generated based +on the same data. This was considered more comparable to the +2016 estimate of 780 (range 590–1029). While the authors did not +explicitly address the issue of statistical significance, they concluded +that a natural increase in numbers, via increased cub production +and survival, must have taken place in conjunction with good sea +ice conditions from 2017 to 2020, perhaps in addition to immigration from another unidentified subpopulation.21. +In their 2023 assessment, the IUCN PBSG discussed the apparent +increased abundance of SH bears, but did not unequivocally state +that the subpopulation had increased, instead only implying that +an increase may have been possible (‘years of relatively good ice +conditions, combined with comparatively reduced harvest from +2016–2021 may have buffered the population against further +decline or allowed for recovery’). They also did not include the 2022 +assessment by the PBTC that IK considered the SH subpopulation +was ‘stable/likely increased’ (i.e. stable in the James Bay portion, +likely increased in southeastern Hudson Bay).22. +The 2023 assessment by the USFWS listed SH as ‘likely decreased’, +based only on 2016 survey results (2021 survey results were not +included), but did acknowledge that in 2022, IK considered this +subpopulation to be ‘stable/likely increased.’23. +Southeast Greenland (SG) +As part of a multiyear project on the status of SG polar bears that +began in 2011, surveys were conducted during mid-March and +mid-April of 2015–2017 for bears that lived below 64°N latitude. +The results were compared with data from bears living in EG further +north, which had been collected up to 2021. This southern region +of Greenland had not previously been surveyed, or even visited by +polar bear scientists, and there are no permanent human inhabitants. Few Inuit hunters even venture into the region.24 +Based on capture-recapture data, a population estimate of +234 (range 111–462) was generated for SG. Weight (indicating +body condition or fatness) of almost two dozen females captured +in SG averaged 186 kg, which was similar to females in Svalbard +in the western Barents Sea (185 kg) in the 1990–2000 period and +in EG in recent years (186 kg). +Most surprisingly, there was strong evidence that these SG +polar bears are the most genetically distinct subpopulation in +the Arctic, indicating a lack of interbreeding with bears in EG for +at least 200 years.25. +5 +Much emphasis was given by study authors Kirstin Laidre +and colleagues to their interpretation that bears in these SG fjords +frequently used glacier ice to hunt seals during the summer; in +other locations bears only do so occasionally. Seals feed in such +‘glacier-front’ habitats in summer because primary productivity +is high: melting glaciers in the fjords attract fish because their +food – marine plankton – is plentiful. However, the only evidence +provided of seal-hunting behaviour by polar bears in summer in +SG is one photo, taken by an unidentified photographer, of a bear +on glacier ice beside a seal kill taken in September 2016. As noted +above, the SG polar bear surveys were conducted in March and +April and therefore, frequent summer hunting of seals could not +have been observed by the authors, but was simply assumed to +have happened. +In addition, although the authors imply that glacier-front habitat +is rare, it is in fact rather common across the Arctic and widely +used by polar bears year-round because the sea ice covering such +fjords in late winter and spring (including those in SG) are used by +ringed seals as a birthing platform. Moreover, newborn seals are +the preferred prey of polar bears, making up roughly two thirds of +their diet. Fjords with glacier ice are present all along both coasts +of Greenland, in Svalbard, Novaya Zemlya and Franz Josef Land in +Russia, and in Baffin and Ellesmere Islands in the Canadian Arctic.26 +The authors concluded their report with a recommendation +that SG be officially recognized by the IUCN PBSG as a polar bear +subpopulation distinct from EG for management and conservation +purposes. However, despite the fact that Dr Laidre is currently the +co-chair of the PBSG, and that in March 2023 the government of +Greenland declared SG a protected ‘new and separate management unit’, the PBSG declined to add it as a distinct subpopulation. +The 2023 USFWS assessment cited the 2022 Laidre report and its +abundance estimate for SG, but regarding a change in boundaries +for EG, it stated only that, ‘ecoregion and subpopulation status will +likely be re-evaluated by PBSG in 2023’.27 +4. Population trends +In Canada, where roughly two thirds of the world’s polar bear population live, a 2022 update from the PBTC for the first time included +assessments based on Inuit IK for each of the 13 subpopulations for +which Canada has sole or joint management responsibility. While +the ‘scientific’ assessments for trends in abundance for these subpopulations are simply the widely varying ones provided by the +PBSG in 2021, those based on IK were either ‘increased’ or ‘stable’.28 +Later in 2022, the Government of Canada published updated +global polar bear population trend maps based on 2021 PBSG +‘scientific’ data: no provision was made for the conflicting information from IK discussed above, calling into question whether IK +assessments are actually given any weight in assessing current +conditions.29 And while the 2023 USFWS assessment included the +2022 Canadian IK trend information in their status table, it gave +6 +priority to 2021 PBSG scientific data.30 +Figure 2 shows a more realistic representation of current +polar bear population trends based on all available information +(scientific survey results, IK, and studies on health and survival +status published up to 31 December 2023, extrapolated to regions +lacking recent survey data). This gives the following subpopulation classifications at 2023, including the new subpopulation of +SE Greenland (SG): +• seven ‘increasing’ or ‘likely increasing’ [KB, DS, MC, GB, CS, BS, SH]. +• four ‘stable’ or ‘likely stable’ [BB, SB, WH, SG]. +• nine ‘presumed stable or increasing’ [EG, LS, LP, KS, VM, NB, GB, +FB, NW].