Main commit
This commit is contained in:
parent
b71c728fbd
commit
aaca68b74e
|
@ -1,3 +1 @@
|
||||||
# agents
|
docker-compose -f docker/docker-compose.yml up --build
|
||||||
|
|
||||||
RAG Agents for LLM
|
|
||||||
|
|
1
app/__init__.py
Normal file
1
app/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# This file can be empty or contain initialization code for the app package
|
702
app/rag_system_with_agents.py
Normal file
702
app/rag_system_with_agents.py
Normal file
|
@ -0,0 +1,702 @@
|
||||||
|
import os
|
||||||
|
import openai
|
||||||
|
import uuid
|
||||||
|
import requests
|
||||||
|
from flask import Flask, request, jsonify, send_from_directory
|
||||||
|
from flask_cors import CORS
|
||||||
|
from qdrant_client import QdrantClient
|
||||||
|
from qdrant_client.http import models
|
||||||
|
from email.mime.text import MIMEText
|
||||||
|
from airflow_client.client import ApiClient, Configuration
|
||||||
|
from airflow_client.client.api.dag_run_api import DAGRunApi
|
||||||
|
import smtplib
|
||||||
|
from threading import Thread
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
|
# Initialize Flask app
|
||||||
|
app = Flask(__name__, static_folder='./frontend', static_url_path='/')
|
||||||
|
CORS(app)
|
||||||
|
|
||||||
|
# Configure OpenAI API
|
||||||
|
openai.api_key = os.getenv('OPENAI_API_KEY')
|
||||||
|
|
||||||
|
# Configure Qdrant
|
||||||
|
qdrant = QdrantClient(host=os.getenv('QDRANT_HOST'))
|
||||||
|
|
||||||
|
# Dictionary to store the status and progress of tasks
|
||||||
|
tasks_status = {}
|
||||||
|
|
||||||
|
def embed_text(text):
|
||||||
|
response = openai.Embedding.create(
|
||||||
|
input=text,
|
||||||
|
model="text-embedding-ada-002"
|
||||||
|
)
|
||||||
|
embedding = response['data'][0]['embedding']
|
||||||
|
return embedding
|
||||||
|
|
||||||
|
def query_qdrant(embedding, top_n=5):
|
||||||
|
search_result = qdrant.search(
|
||||||
|
collection_name="rag",
|
||||||
|
query_vector=embedding,
|
||||||
|
limit=top_n
|
||||||
|
)
|
||||||
|
return search_result
|
||||||
|
|
||||||
|
def parse_react_response(response):
|
||||||
|
steps = []
|
||||||
|
final_answer = ""
|
||||||
|
lines = response.split('\n')
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith("["):
|
||||||
|
steps.append(line.strip())
|
||||||
|
elif line.startswith("Final Answer:"):
|
||||||
|
final_answer = line.split(":")[1].strip()
|
||||||
|
return steps, final_answer
|
||||||
|
|
||||||
|
def update_task_status(task_id, status, step=None, results=[]):
|
||||||
|
if task_id not in tasks_status:
|
||||||
|
tasks_status[task_id] = {"status": status, "current_step": step, "steps": [], "results": []}
|
||||||
|
else:
|
||||||
|
tasks_status[task_id]["status"] = status
|
||||||
|
if step:
|
||||||
|
tasks_status[task_id]["current_step"] = step
|
||||||
|
tasks_status[task_id]["steps"].append(step)
|
||||||
|
tasks_status[task_id]["results"] = results
|
||||||
|
|
||||||
|
def process_steps(steps, task_id, memory, results):
|
||||||
|
try:
|
||||||
|
for step in steps:
|
||||||
|
if "[" in step and "]" in step:
|
||||||
|
agent = step.split("[")[1].split("]")[0].strip().lower().replace(" ", "_")
|
||||||
|
task = step.split("]")[1].strip()
|
||||||
|
result = run_agent(agent, task, task_id, memory)
|
||||||
|
if isinstance(result, tuple):
|
||||||
|
result = result[0]
|
||||||
|
results.append(result["message"])
|
||||||
|
update_task_status(task_id, "processing", step, results)
|
||||||
|
memory[agent] = result["message"] # Store the result in memory
|
||||||
|
update_task_status(task_id, "completed", None, results)
|
||||||
|
except Exception as e:
|
||||||
|
update_task_status(task_id, f"failed: {e}")
|
||||||
|
print(f"Error processing steps: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/upload', methods=['POST'])
|
||||||
|
def upload_file():
|
||||||
|
if 'file' not in request.files:
|
||||||
|
return jsonify({"error": "No file part"}), 400
|
||||||
|
file = request.files['file']
|
||||||
|
if file.filename == '':
|
||||||
|
return jsonify({"error": "No selected file"}), 400
|
||||||
|
if file and file.filename.endswith('.txt'):
|
||||||
|
content = file.read().decode('utf-8')
|
||||||
|
embedding = embed_text(content)
|
||||||
|
document_id = str(uuid.uuid4()) # Generate a UUID for the document ID
|
||||||
|
qdrant.upsert(
|
||||||
|
collection_name='rag',
|
||||||
|
points=[models.PointStruct(id=document_id, vector=embedding, payload={"content": content})]
|
||||||
|
)
|
||||||
|
return jsonify({"message": "File uploaded and embedded successfully"}), 200
|
||||||
|
else:
|
||||||
|
return jsonify({"error": "Invalid file type. Only .txt files are allowed"}), 400
|
||||||
|
|
||||||
|
@app.route('/query', methods=['POST'])
|
||||||
|
def query():
|
||||||
|
data = request.json
|
||||||
|
query_text = data['query']
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in results]
|
||||||
|
return jsonify({"results": sources})
|
||||||
|
|
||||||
|
@app.route('/react_query', methods=['POST'])
|
||||||
|
def react_query():
|
||||||
|
data = request.json
|
||||||
|
query_text = data['query']
|
||||||
|
task_id = str(uuid.uuid4())
|
||||||
|
update_task_status(task_id, "initialized")
|
||||||
|
|
||||||
|
# Create the system prompt with capabilities
|
||||||
|
system_prompt = f"""
|
||||||
|
You are a research assistant that can perform the following tasks:
|
||||||
|
1. Research Paper Finder
|
||||||
|
2. Citation Generator
|
||||||
|
3. Data Summarizer
|
||||||
|
4. Question Answering
|
||||||
|
5. Statistical Analysis
|
||||||
|
6. Graph Generator
|
||||||
|
7. Keyword Extractor
|
||||||
|
8. Research Outline Generator
|
||||||
|
9. Hypothesis Generator
|
||||||
|
10. Methodology Advisor
|
||||||
|
11. Experimental Design Helper
|
||||||
|
12. Survey Designer
|
||||||
|
13. Plagiarism Checker
|
||||||
|
14. Grammar and Style Checker
|
||||||
|
15. Literature Review Organizer
|
||||||
|
16. Data Cleaning Agent
|
||||||
|
17. Bibliography Manager
|
||||||
|
18. Thesis Statement Generator
|
||||||
|
19. Funding Finder
|
||||||
|
20. Conference Finder
|
||||||
|
21. Web Scraper
|
||||||
|
22. API Integrator
|
||||||
|
23. Email Notifier
|
||||||
|
24. File Converter
|
||||||
|
25. Translation Agent
|
||||||
|
26. OCR Agent
|
||||||
|
27. Scheduler
|
||||||
|
28. Weather Information Agent
|
||||||
|
|
||||||
|
Using the ReAct (Reason and Act) paradigm, analyze the following query and determine the steps to answer it. Each step should indicate the agent to use and the task to perform in a structured format, clearly separated by new lines. Make sure to include the agent name in square brackets. Example format: [Agent] Task.
|
||||||
|
|
||||||
|
Query: {query_text}
|
||||||
|
"""
|
||||||
|
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": query_text}
|
||||||
|
],
|
||||||
|
max_tokens=500
|
||||||
|
)
|
||||||
|
|
||||||
|
react_response = response['choices'][0]['message']['content'].strip()
|
||||||
|
steps, final_answer = parse_react_response(react_response)
|
||||||
|
|
||||||
|
if not steps:
|
||||||
|
update_task_status(task_id, "failed")
|
||||||
|
return jsonify({"error": "No steps generated by the ReAct system"}), 400
|
||||||
|
|
||||||
|
update_task_status(task_id, "processing", steps[0])
|
||||||
|
results = []
|
||||||
|
memory = {}
|
||||||
|
Thread(target=process_steps, args=(steps, task_id, memory, results)).start()
|
||||||
|
|
||||||
|
return jsonify({"steps": steps, "task_id": task_id})
|
||||||
|
|
||||||
|
def run_agent(agent, query_text, task_id, memory):
|
||||||
|
# Here we call the appropriate agent function based on the agent type
|
||||||
|
if agent == 'research_paper_finder':
|
||||||
|
return research_paper_finder(query_text, memory)
|
||||||
|
elif agent == 'citation_generator':
|
||||||
|
return citation_generator(query_text, memory)
|
||||||
|
elif agent == 'data_summarizer':
|
||||||
|
return data_summarizer(query_text, memory)
|
||||||
|
elif agent == 'question_answering':
|
||||||
|
return question_answering(query_text, memory)
|
||||||
|
elif agent == 'statistical_analysis':
|
||||||
|
return statistical_analysis(query_text, memory)
|
||||||
|
elif agent == 'graph_generator':
|
||||||
|
return graph_generator(query_text, memory)
|
||||||
|
elif agent == 'keyword_extractor':
|
||||||
|
return keyword_extractor(query_text, memory)
|
||||||
|
elif agent == 'research_outline_generator':
|
||||||
|
return research_outline_generator(query_text, memory)
|
||||||
|
elif agent == 'hypothesis_generator':
|
||||||
|
return hypothesis_generator(query_text, memory)
|
||||||
|
elif agent == 'methodology_advisor':
|
||||||
|
return methodology_advisor(query_text, memory)
|
||||||
|
elif agent == 'experimental_design_helper':
|
||||||
|
return experimental_design_helper(query_text, memory)
|
||||||
|
elif agent == 'survey_designer':
|
||||||
|
return survey_designer(query_text, memory)
|
||||||
|
elif agent == 'plagiarism_checker':
|
||||||
|
return plagiarism_checker(query_text, memory)
|
||||||
|
elif agent == 'grammar_and_style_checker':
|
||||||
|
return grammar_and_style_checker(query_text, memory)
|
||||||
|
elif agent == 'literature_review_organizer':
|
||||||
|
return literature_review_organizer(query_text, memory)
|
||||||
|
elif agent == 'data_cleaning_agent':
|
||||||
|
return data_cleaning_agent(query_text, memory)
|
||||||
|
elif agent == 'bibliography_manager':
|
||||||
|
return bibliography_manager(query_text, memory)
|
||||||
|
elif agent == 'thesis_statement_generator':
|
||||||
|
return thesis_statement_generator(query_text, memory)
|
||||||
|
elif agent == 'funding_finder':
|
||||||
|
return funding_finder(query_text, memory)
|
||||||
|
elif agent == 'conference_finder':
|
||||||
|
return conference_finder(query_text, memory)
|
||||||
|
elif agent == 'web_scraper_using_scrapyd' or 'web_scraper':
|
||||||
|
return web_scraper(query_text, memory)
|
||||||
|
elif agent == 'api_integrator':
|
||||||
|
return api_integrator(query_text, memory)
|
||||||
|
elif agent == 'email_notifier':
|
||||||
|
return email_notifier(query_text, memory)
|
||||||
|
elif agent == 'file_converter':
|
||||||
|
return file_converter(query_text, memory)
|
||||||
|
elif agent == 'translation_agent':
|
||||||
|
return translation_agent(query_text, memory)
|
||||||
|
elif agent == 'ocr_agent':
|
||||||
|
return ocr_agent(query_text, memory)
|
||||||
|
elif agent == 'scheduler':
|
||||||
|
return scheduler(query_text, memory)
|
||||||
|
elif agent == 'weather_information_agent':
|
||||||
|
return weather_information_agent(query_text, memory)
|
||||||
|
elif agent == 'currency_converter':
|
||||||
|
return currency_converter(query_text, memory)
|
||||||
|
elif agent == 'news_aggregator':
|
||||||
|
return news_aggregator(query_text, memory)
|
||||||
|
else:
|
||||||
|
return {"message": f"Unknown agent: {agent}"}
|
||||||
|
|
||||||
|
def research_paper_finder(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Find research papers related to: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def citation_generator(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Generate a citation for: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=50
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def data_summarizer(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Summarize the following text: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=1000
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def question_answering(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Answer the following question: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=100
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def statistical_analysis(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Perform statistical analysis on the following data: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def graph_generator(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Generate a graph for the following data: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def keyword_extractor(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Extract keywords from the following text: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=50
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def research_outline_generator(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Generate a research outline for: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def hypothesis_generator(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Generate a hypothesis based on the following topic: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=100
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def methodology_advisor(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Suggest a methodology for the following research topic: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def experimental_design_helper(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Help design an experiment for: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def survey_designer(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Design a survey for: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def plagiarism_checker(query_text, memory):
|
||||||
|
return {"message": "Plagiarism check is not implemented yet.", "query": query_text}
|
||||||
|
|
||||||
|
def grammar_and_style_checker(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Check and correct the grammar and style of the following text: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def literature_review_organizer(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Organize the following literature review: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def data_cleaning_agent(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Clean the following data: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def bibliography_manager(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Manage the bibliography for: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def thesis_statement_generator(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Generate a thesis statement for: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=100
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def funding_finder(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Find funding opportunities for: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def conference_finder(query_text, memory):
|
||||||
|
embedding = embed_text(query_text)
|
||||||
|
rag_results = query_qdrant(embedding)
|
||||||
|
sources = [{"content": result.payload["content"], "id": result.id} for result in rag_results]
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Find conferences related to: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def web_scraper(query_text, memory):
|
||||||
|
project_name = 'my_project'
|
||||||
|
spider_name = 'my_spider'
|
||||||
|
scrapyd_host = os.getenv('SCRAPYD_HOST', 'localhost')
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'project': project_name,
|
||||||
|
'spider': spider_name,
|
||||||
|
'start_urls': query_text
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(f'http://{scrapyd_host}:6800/schedule.json', data=data)
|
||||||
|
# if response.status_code == "200":
|
||||||
|
job_id = response.json().get('jobid')
|
||||||
|
# Wait for the job to finish and fetch the results
|
||||||
|
time.sleep(15) # Adjust this sleep time as needed
|
||||||
|
items_response = requests.get(f'http://{scrapyd_host}:6800/items/{project_name}/{spider_name}/{job_id}.jl')
|
||||||
|
#if items_response.status_code == 200:
|
||||||
|
items = [json.loads(line) for line in items_response.text.splitlines()]
|
||||||
|
# for item in items:
|
||||||
|
# Insert each scraped item into Qdrant
|
||||||
|
content = items[0].get('content', '')
|
||||||
|
embedding = embed_text(content)
|
||||||
|
document_id = str(uuid.uuid4())
|
||||||
|
qdrant.upsert(
|
||||||
|
collection_name='rag',
|
||||||
|
points=[models.PointStruct(id=document_id, vector=embedding, payload={"content": content})]
|
||||||
|
)
|
||||||
|
return {"message": content}
|
||||||
|
# return {"message": f"Job completed with {len(items)} items scraped", "items": items}
|
||||||
|
# else:
|
||||||
|
# return {"message": "Failed to fetch scraped items"}, 500
|
||||||
|
#else:
|
||||||
|
# return {"message": "Failed to schedule job"}, 500
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error scheduling scrapy job: {e}")
|
||||||
|
return {"message": f"Failed to schedule job - {e}"}, 500
|
||||||
|
|
||||||
|
def api_integrator(query_text, memory):
|
||||||
|
response = requests.post(
|
||||||
|
'http://localhost:1880/api_integrator',
|
||||||
|
json={'query': query_text}
|
||||||
|
)
|
||||||
|
return {"message": response.json(), "query": query_text}
|
||||||
|
|
||||||
|
def email_notifier(query_text, memory):
|
||||||
|
msg = MIMEText(query_text)
|
||||||
|
msg['Subject'] = 'Notification'
|
||||||
|
msg['From'] = 'test@example.com'
|
||||||
|
msg['To'] = 'mahesh.kommareddi@gmail.com'
|
||||||
|
|
||||||
|
with smtplib.SMTP('mailhog', 1025) as server:
|
||||||
|
server.sendmail(msg['From'], [msg['To']], msg.as_string())
|
||||||
|
|
||||||
|
return {"message": "Email sent successfully"}
|
||||||
|
|
||||||
|
def file_converter(query_text, memory):
|
||||||
|
response = requests.post(
|
||||||
|
'http://libreoffice:8084/convert',
|
||||||
|
files={'file': query_text}
|
||||||
|
)
|
||||||
|
return {"message": "File conversion completed", "data": response.json()}
|
||||||
|
|
||||||
|
def translation_agent(query_text, memory):
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"The previous response relating to the query was: {memory}"},
|
||||||
|
{"role": "user", "content": f"Translate the following text: {query_text}"}
|
||||||
|
],
|
||||||
|
max_tokens=150
|
||||||
|
)
|
||||||
|
response_message = response['choices'][0]['message']['content'].strip()
|
||||||
|
return {"message": response_message, "sources": sources}
|
||||||
|
|
||||||
|
def ocr_agent(query_text, memory):
|
||||||
|
response = requests.post(
|
||||||
|
'http://localhost:8081/ocr',
|
||||||
|
files={'file': query_text}
|
||||||
|
)
|
||||||
|
return {"message": response.json(), "query": query_text}
|
||||||
|
|
||||||
|
def scheduler(query_text, memory):
|
||||||
|
configuration = Configuration(
|
||||||
|
host="http://localhost:8082/api/v1"
|
||||||
|
)
|
||||||
|
api_client = ApiClient(configuration)
|
||||||
|
dag_run_api = DAGRunApi(api_client)
|
||||||
|
|
||||||
|
dag_id = 'example_dag'
|
||||||
|
dag_run = dag_run_api.post_dag_run(
|
||||||
|
dag_id=dag_id,
|
||||||
|
dag_run={"conf": {"query_text": query_text}}
|
||||||
|
)
|
||||||
|
return {"message": f"Scheduled task for {query_text}", "dag_run_id": dag_run.dag_run_id}
|
||||||
|
|
||||||
|
def weather_information_agent(query_text, memory):
|
||||||
|
api_key = os.getenv('OPENWEATHERMAP_API_KEY')
|
||||||
|
response = requests.get(
|
||||||
|
f'http://api.openweathermap.org/data/2.5/weather?q={query_text}&appid={api_key}'
|
||||||
|
)
|
||||||
|
return {"message": response.json(), "query": query_text}
|
||||||
|
|
||||||
|
@app.route('/ocr', methods=['POST'])
|
||||||
|
def handle_ocr():
|
||||||
|
if 'file' not in request.files:
|
||||||
|
return jsonify({"error": "No file part"}), 400
|
||||||
|
file = request.files['file']
|
||||||
|
if file.filename == '':
|
||||||
|
return jsonify({"error": "No selected file"}), 400
|
||||||
|
response = requests.post(
|
||||||
|
'http://localhost:8081/ocr',
|
||||||
|
files={'file': file}
|
||||||
|
)
|
||||||
|
return jsonify(response.json())
|
||||||
|
|
||||||
|
@app.route('/schedule', methods=['POST'])
|
||||||
|
def handle_schedule():
|
||||||
|
data = request.json
|
||||||
|
query_text = data['query']
|
||||||
|
return jsonify(scheduler(query_text))
|
||||||
|
|
||||||
|
@app.route('/weather', methods=['POST'])
|
||||||
|
def handle_weather():
|
||||||
|
data = request.json
|
||||||
|
query_text = data['query']
|
||||||
|
return jsonify(weather_information_agent(query_text))
|
||||||
|
|
||||||
|
@app.route('/scrape', methods=['POST'])
|
||||||
|
def handle_scrape():
|
||||||
|
data = request.json
|
||||||
|
query_text = data['query']
|
||||||
|
return web_scraper(query_text, {})
|
||||||
|
|
||||||
|
@app.route('/integrate', methods=['POST'])
|
||||||
|
def handle_integrate():
|
||||||
|
data = request.json
|
||||||
|
query_text = data['query']
|
||||||
|
return jsonify(api_integrator(query_text))
|
||||||
|
|
||||||
|
@app.route('/notify', methods=['POST'])
|
||||||
|
def handle_notify():
|
||||||
|
data = request.json
|
||||||
|
query_text = data['query']
|
||||||
|
return jsonify(email_notifier(query_text))
|
||||||
|
|
||||||
|
@app.route('/convert', methods=['POST'])
|
||||||
|
def handle_convert():
|
||||||
|
if 'file' not in request.files:
|
||||||
|
return jsonify({"error": "No file part"}), 400
|
||||||
|
file = request.files['file']
|
||||||
|
if file.filename == '':
|
||||||
|
return jsonify({"error": "No selected file"}), 400
|
||||||
|
response = requests.post(
|
||||||
|
'http://localhost:8084/convert',
|
||||||
|
files={'file': file}
|
||||||
|
)
|
||||||
|
return jsonify(response.json())
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def serve_index():
|
||||||
|
return send_from_directory(app.static_folder, 'index.html')
|
||||||
|
|
||||||
|
@app.route('/status/<task_id>', methods=['GET'])
|
||||||
|
def get_status(task_id):
|
||||||
|
return jsonify(tasks_status.get(task_id, {"error": "Task ID not found"}))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(host='0.0.0.0', port=1337)
|
10
app/requirements.txt
Normal file
10
app/requirements.txt
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
flask
|
||||||
|
flask-cors
|
||||||
|
openai==0.28
|
||||||
|
openai[datalib]
|
||||||
|
pymongo
|
||||||
|
requests
|
||||||
|
numpy
|
||||||
|
qdrant_client
|
||||||
|
apache-airflow-client
|
||||||
|
scrapy
|
23
docker/Dockerfile
Normal file
23
docker/Dockerfile
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
# Use the official Python image from the Docker Hub
|
||||||
|
FROM python:3.10
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the requirements file into the container
|
||||||
|
COPY ./app/requirements.txt .
|
||||||
|
|
||||||
|
# Install any dependencies specified in requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy the application code into the container
|
||||||
|
COPY ./app /app
|
||||||
|
|
||||||
|
# Copy the frontend files into the container
|
||||||
|
COPY ./frontend /app/frontend
|
||||||
|
|
||||||
|
# Make port 5000 available to the world outside this container
|
||||||
|
EXPOSE 1337
|
||||||
|
|
||||||
|
# Run the application
|
||||||
|
CMD ["python", "rag_system_with_agents.py"]
|
87
docker/docker-compose.yml
Normal file
87
docker/docker-compose.yml
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
version: '3.7'
|
||||||
|
|
||||||
|
services:
|
||||||
|
flask-app:
|
||||||
|
build:
|
||||||
|
context: ../
|
||||||
|
dockerfile: docker/Dockerfile
|
||||||
|
container_name: flask-app
|
||||||
|
ports:
|
||||||
|
- "1337:1337"
|
||||||
|
environment:
|
||||||
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
- QDRANT_HOST=qdrant
|
||||||
|
- SCRAPYD_HOST=scrapyd
|
||||||
|
- OPENWEATHERMAP_API_KEY=${OPENWEATHERMAP_API_KEY}
|
||||||
|
depends_on:
|
||||||
|
- qdrant
|
||||||
|
- ocr_service
|
||||||
|
- airflow-webserver
|
||||||
|
- airflow-scheduler
|
||||||
|
- scrapyd
|
||||||
|
- node_red
|
||||||
|
- mailhog
|
||||||
|
- libreoffice
|
||||||
|
|
||||||
|
qdrant:
|
||||||
|
image: qdrant/qdrant
|
||||||
|
ports:
|
||||||
|
- "6333:6333"
|
||||||
|
|
||||||
|
ocr_service:
|
||||||
|
image: jbarlow83/ocrmypdf
|
||||||
|
ports:
|
||||||
|
- "8081:8081"
|
||||||
|
|
||||||
|
airflow-webserver:
|
||||||
|
image: apache/airflow:latest
|
||||||
|
container_name: airflow-webserver
|
||||||
|
command: webserver
|
||||||
|
ports:
|
||||||
|
- "8082:8082"
|
||||||
|
environment:
|
||||||
|
- AIRFLOW__CORE__EXECUTOR=LocalExecutor
|
||||||
|
- AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:////usr/local/airflow/airflow.db
|
||||||
|
- AIRFLOW__CORE__LOAD_EXAMPLES=False
|
||||||
|
volumes:
|
||||||
|
- ./dags:/usr/local/airflow/dags
|
||||||
|
- ./logs:/usr/local/airflow/logs
|
||||||
|
- ./plugins:/usr/local/airflow/plugins
|
||||||
|
|
||||||
|
airflow-scheduler:
|
||||||
|
image: apache/airflow:latest
|
||||||
|
container_name: airflow-scheduler
|
||||||
|
command: scheduler
|
||||||
|
environment:
|
||||||
|
- AIRFLOW__CORE__EXECUTOR=LocalExecutor
|
||||||
|
- AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:////usr/local/airflow/airflow.db
|
||||||
|
- AIRFLOW__CORE__LOAD_EXAMPLES=False
|
||||||
|
volumes:
|
||||||
|
- ./dags:/usr/local/airflow/dags
|
||||||
|
- ./logs:/usr/local/airflow/logs
|
||||||
|
- ./plugins:/usr/local/airflow/plugins
|
||||||
|
depends_on:
|
||||||
|
- airflow-webserver
|
||||||
|
|
||||||
|
scrapyd:
|
||||||
|
image: vimagick/scrapyd
|
||||||
|
ports:
|
||||||
|
- "6800:6800"
|
||||||
|
volumes:
|
||||||
|
- ./scrapy_project:/scrapy_project
|
||||||
|
command: ["scrapyd"]
|
||||||
|
|
||||||
|
node_red:
|
||||||
|
image: nodered/node-red
|
||||||
|
ports:
|
||||||
|
- "1880:1880"
|
||||||
|
|
||||||
|
mailhog:
|
||||||
|
image: mailhog/mailhog
|
||||||
|
ports:
|
||||||
|
- "1025:1025"
|
||||||
|
|
||||||
|
libreoffice:
|
||||||
|
image: linuxserver/libreoffice
|
||||||
|
ports:
|
||||||
|
- "8084:8084"
|
|
@ -0,0 +1,12 @@
|
||||||
|
# Define here the models for your scraped items
|
||||||
|
#
|
||||||
|
# See documentation in:
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||||
|
|
||||||
|
import scrapy
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectItem(scrapy.Item):
|
||||||
|
# define the fields for your item here like:
|
||||||
|
# name = scrapy.Field()
|
||||||
|
pass
|
|
@ -0,0 +1,103 @@
|
||||||
|
# Define here the models for your spider middleware
|
||||||
|
#
|
||||||
|
# See documentation in:
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
|
||||||
|
from scrapy import signals
|
||||||
|
|
||||||
|
# useful for handling different item types with a single interface
|
||||||
|
from itemadapter import is_item, ItemAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectSpiderMiddleware:
|
||||||
|
# Not all methods need to be defined. If a method is not defined,
|
||||||
|
# scrapy acts as if the spider middleware does not modify the
|
||||||
|
# passed objects.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler):
|
||||||
|
# This method is used by Scrapy to create your spiders.
|
||||||
|
s = cls()
|
||||||
|
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def process_spider_input(self, response, spider):
|
||||||
|
# Called for each response that goes through the spider
|
||||||
|
# middleware and into the spider.
|
||||||
|
|
||||||
|
# Should return None or raise an exception.
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_spider_output(self, response, result, spider):
|
||||||
|
# Called with the results returned from the Spider, after
|
||||||
|
# it has processed the response.
|
||||||
|
|
||||||
|
# Must return an iterable of Request, or item objects.
|
||||||
|
for i in result:
|
||||||
|
yield i
|
||||||
|
|
||||||
|
def process_spider_exception(self, response, exception, spider):
|
||||||
|
# Called when a spider or process_spider_input() method
|
||||||
|
# (from other spider middleware) raises an exception.
|
||||||
|
|
||||||
|
# Should return either None or an iterable of Request or item objects.
|
||||||
|
pass
|
||||||
|
|
||||||
|
def process_start_requests(self, start_requests, spider):
|
||||||
|
# Called with the start requests of the spider, and works
|
||||||
|
# similarly to the process_spider_output() method, except
|
||||||
|
# that it doesn’t have a response associated.
|
||||||
|
|
||||||
|
# Must return only requests (not items).
|
||||||
|
for r in start_requests:
|
||||||
|
yield r
|
||||||
|
|
||||||
|
def spider_opened(self, spider):
|
||||||
|
spider.logger.info("Spider opened: %s" % spider.name)
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectDownloaderMiddleware:
|
||||||
|
# Not all methods need to be defined. If a method is not defined,
|
||||||
|
# scrapy acts as if the downloader middleware does not modify the
|
||||||
|
# passed objects.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler):
|
||||||
|
# This method is used by Scrapy to create your spiders.
|
||||||
|
s = cls()
|
||||||
|
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def process_request(self, request, spider):
|
||||||
|
# Called for each request that goes through the downloader
|
||||||
|
# middleware.
|
||||||
|
|
||||||
|
# Must either:
|
||||||
|
# - return None: continue processing this request
|
||||||
|
# - or return a Response object
|
||||||
|
# - or return a Request object
|
||||||
|
# - or raise IgnoreRequest: process_exception() methods of
|
||||||
|
# installed downloader middleware will be called
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_response(self, request, response, spider):
|
||||||
|
# Called with the response returned from the downloader.
|
||||||
|
|
||||||
|
# Must either;
|
||||||
|
# - return a Response object
|
||||||
|
# - return a Request object
|
||||||
|
# - or raise IgnoreRequest
|
||||||
|
return response
|
||||||
|
|
||||||
|
def process_exception(self, request, exception, spider):
|
||||||
|
# Called when a download handler or a process_request()
|
||||||
|
# (from other downloader middleware) raises an exception.
|
||||||
|
|
||||||
|
# Must either:
|
||||||
|
# - return None: continue processing this exception
|
||||||
|
# - return a Response object: stops process_exception() chain
|
||||||
|
# - return a Request object: stops process_exception() chain
|
||||||
|
pass
|
||||||
|
|
||||||
|
def spider_opened(self, spider):
|
||||||
|
spider.logger.info("Spider opened: %s" % spider.name)
|
|
@ -0,0 +1,13 @@
|
||||||
|
# Define your item pipelines here
|
||||||
|
#
|
||||||
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||||
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
|
||||||
|
|
||||||
|
# useful for handling different item types with a single interface
|
||||||
|
from itemadapter import ItemAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectPipeline:
|
||||||
|
def process_item(self, item, spider):
|
||||||
|
return item
|
|
@ -0,0 +1,93 @@
|
||||||
|
# Scrapy settings for my_project project
|
||||||
|
#
|
||||||
|
# For simplicity, this file contains only settings considered important or
|
||||||
|
# commonly used. You can find more settings consulting the documentation:
|
||||||
|
#
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
|
||||||
|
BOT_NAME = "my_project"
|
||||||
|
|
||||||
|
SPIDER_MODULES = ["my_project.spiders"]
|
||||||
|
NEWSPIDER_MODULE = "my_project.spiders"
|
||||||
|
|
||||||
|
|
||||||
|
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||||
|
#USER_AGENT = "my_project (+http://www.yourdomain.com)"
|
||||||
|
|
||||||
|
# Obey robots.txt rules
|
||||||
|
ROBOTSTXT_OBEY = True
|
||||||
|
|
||||||
|
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||||
|
#CONCURRENT_REQUESTS = 32
|
||||||
|
|
||||||
|
# Configure a delay for requests for the same website (default: 0)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||||
|
# See also autothrottle settings and docs
|
||||||
|
#DOWNLOAD_DELAY = 3
|
||||||
|
# The download delay setting will honor only one of:
|
||||||
|
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||||
|
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||||
|
|
||||||
|
# Disable cookies (enabled by default)
|
||||||
|
#COOKIES_ENABLED = False
|
||||||
|
|
||||||
|
# Disable Telnet Console (enabled by default)
|
||||||
|
#TELNETCONSOLE_ENABLED = False
|
||||||
|
|
||||||
|
# Override the default request headers:
|
||||||
|
#DEFAULT_REQUEST_HEADERS = {
|
||||||
|
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
# "Accept-Language": "en",
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable spider middlewares
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
#SPIDER_MIDDLEWARES = {
|
||||||
|
# "my_project.middlewares.MyProjectSpiderMiddleware": 543,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable downloader middlewares
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||||
|
#DOWNLOADER_MIDDLEWARES = {
|
||||||
|
# "my_project.middlewares.MyProjectDownloaderMiddleware": 543,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable extensions
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||||
|
#EXTENSIONS = {
|
||||||
|
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Configure item pipelines
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
#ITEM_PIPELINES = {
|
||||||
|
# "my_project.pipelines.MyProjectPipeline": 300,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||||
|
#AUTOTHROTTLE_ENABLED = True
|
||||||
|
# The initial download delay
|
||||||
|
#AUTOTHROTTLE_START_DELAY = 5
|
||||||
|
# The maximum download delay to be set in case of high latencies
|
||||||
|
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||||
|
# The average number of requests Scrapy should be sending in parallel to
|
||||||
|
# each remote server
|
||||||
|
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||||
|
# Enable showing throttling stats for every response received:
|
||||||
|
#AUTOTHROTTLE_DEBUG = False
|
||||||
|
|
||||||
|
# Enable and configure HTTP caching (disabled by default)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||||
|
#HTTPCACHE_ENABLED = True
|
||||||
|
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||||
|
#HTTPCACHE_DIR = "httpcache"
|
||||||
|
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||||
|
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||||
|
|
||||||
|
# Set settings whose default value is deprecated to a future-proof value
|
||||||
|
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||||
|
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||||
|
FEED_EXPORT_ENCODING = "utf-8"
|
|
@ -0,0 +1,4 @@
|
||||||
|
# This package will contain the spiders of your Scrapy project
|
||||||
|
#
|
||||||
|
# Please refer to the documentation for information on how to create and manage
|
||||||
|
# your spiders.
|
|
@ -0,0 +1,32 @@
|
||||||
|
import scrapy
|
||||||
|
import re
|
||||||
|
|
||||||
|
class MySpider(scrapy.Spider):
|
||||||
|
name = 'my_spider'
|
||||||
|
|
||||||
|
def __init__(self, start_urls=None, *args, **kwargs):
|
||||||
|
super(MySpider, self).__init__(*args, **kwargs)
|
||||||
|
if start_urls:
|
||||||
|
self.start_urls = self.extract_urls(start_urls)
|
||||||
|
else:
|
||||||
|
self.start_urls = []
|
||||||
|
|
||||||
|
def extract_urls(self, text):
|
||||||
|
url_pattern = re.compile(r'(https?://\S+)')
|
||||||
|
urls = url_pattern.findall(text)
|
||||||
|
return urls
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
page_content = response.text
|
||||||
|
page_urls = response.css('a::attr(href)').getall()
|
||||||
|
for url in page_urls:
|
||||||
|
if url.startswith('http'):
|
||||||
|
yield {'url': url, 'content': page_content}
|
||||||
|
else:
|
||||||
|
yield {'url': response.urljoin(url), 'content': page_content}
|
||||||
|
|
||||||
|
# for url in page_urls:
|
||||||
|
# if url.startswith('http'):
|
||||||
|
# yield scrapy.Request(url, callback=self.parse)
|
||||||
|
# else:
|
||||||
|
# yield scrapy.Request(response.urljoin(url), callback=self.parse)
|
12
docker/scrapy_project/my_project/my_project/items.py
Normal file
12
docker/scrapy_project/my_project/my_project/items.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
# Define here the models for your scraped items
|
||||||
|
#
|
||||||
|
# See documentation in:
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||||
|
|
||||||
|
import scrapy
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectItem(scrapy.Item):
|
||||||
|
# define the fields for your item here like:
|
||||||
|
# name = scrapy.Field()
|
||||||
|
pass
|
103
docker/scrapy_project/my_project/my_project/middlewares.py
Normal file
103
docker/scrapy_project/my_project/my_project/middlewares.py
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
# Define here the models for your spider middleware
|
||||||
|
#
|
||||||
|
# See documentation in:
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
|
||||||
|
from scrapy import signals
|
||||||
|
|
||||||
|
# useful for handling different item types with a single interface
|
||||||
|
from itemadapter import is_item, ItemAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectSpiderMiddleware:
|
||||||
|
# Not all methods need to be defined. If a method is not defined,
|
||||||
|
# scrapy acts as if the spider middleware does not modify the
|
||||||
|
# passed objects.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler):
|
||||||
|
# This method is used by Scrapy to create your spiders.
|
||||||
|
s = cls()
|
||||||
|
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def process_spider_input(self, response, spider):
|
||||||
|
# Called for each response that goes through the spider
|
||||||
|
# middleware and into the spider.
|
||||||
|
|
||||||
|
# Should return None or raise an exception.
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_spider_output(self, response, result, spider):
|
||||||
|
# Called with the results returned from the Spider, after
|
||||||
|
# it has processed the response.
|
||||||
|
|
||||||
|
# Must return an iterable of Request, or item objects.
|
||||||
|
for i in result:
|
||||||
|
yield i
|
||||||
|
|
||||||
|
def process_spider_exception(self, response, exception, spider):
|
||||||
|
# Called when a spider or process_spider_input() method
|
||||||
|
# (from other spider middleware) raises an exception.
|
||||||
|
|
||||||
|
# Should return either None or an iterable of Request or item objects.
|
||||||
|
pass
|
||||||
|
|
||||||
|
def process_start_requests(self, start_requests, spider):
|
||||||
|
# Called with the start requests of the spider, and works
|
||||||
|
# similarly to the process_spider_output() method, except
|
||||||
|
# that it doesn’t have a response associated.
|
||||||
|
|
||||||
|
# Must return only requests (not items).
|
||||||
|
for r in start_requests:
|
||||||
|
yield r
|
||||||
|
|
||||||
|
def spider_opened(self, spider):
|
||||||
|
spider.logger.info("Spider opened: %s" % spider.name)
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectDownloaderMiddleware:
|
||||||
|
# Not all methods need to be defined. If a method is not defined,
|
||||||
|
# scrapy acts as if the downloader middleware does not modify the
|
||||||
|
# passed objects.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler):
|
||||||
|
# This method is used by Scrapy to create your spiders.
|
||||||
|
s = cls()
|
||||||
|
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def process_request(self, request, spider):
|
||||||
|
# Called for each request that goes through the downloader
|
||||||
|
# middleware.
|
||||||
|
|
||||||
|
# Must either:
|
||||||
|
# - return None: continue processing this request
|
||||||
|
# - or return a Response object
|
||||||
|
# - or return a Request object
|
||||||
|
# - or raise IgnoreRequest: process_exception() methods of
|
||||||
|
# installed downloader middleware will be called
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_response(self, request, response, spider):
|
||||||
|
# Called with the response returned from the downloader.
|
||||||
|
|
||||||
|
# Must either;
|
||||||
|
# - return a Response object
|
||||||
|
# - return a Request object
|
||||||
|
# - or raise IgnoreRequest
|
||||||
|
return response
|
||||||
|
|
||||||
|
def process_exception(self, request, exception, spider):
|
||||||
|
# Called when a download handler or a process_request()
|
||||||
|
# (from other downloader middleware) raises an exception.
|
||||||
|
|
||||||
|
# Must either:
|
||||||
|
# - return None: continue processing this exception
|
||||||
|
# - return a Response object: stops process_exception() chain
|
||||||
|
# - return a Request object: stops process_exception() chain
|
||||||
|
pass
|
||||||
|
|
||||||
|
def spider_opened(self, spider):
|
||||||
|
spider.logger.info("Spider opened: %s" % spider.name)
|
13
docker/scrapy_project/my_project/my_project/pipelines.py
Normal file
13
docker/scrapy_project/my_project/my_project/pipelines.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# Define your item pipelines here
|
||||||
|
#
|
||||||
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||||
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
|
||||||
|
|
||||||
|
# useful for handling different item types with a single interface
|
||||||
|
from itemadapter import ItemAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class MyProjectPipeline:
|
||||||
|
def process_item(self, item, spider):
|
||||||
|
return item
|
93
docker/scrapy_project/my_project/my_project/settings.py
Normal file
93
docker/scrapy_project/my_project/my_project/settings.py
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
# Scrapy settings for my_project project
|
||||||
|
#
|
||||||
|
# For simplicity, this file contains only settings considered important or
|
||||||
|
# commonly used. You can find more settings consulting the documentation:
|
||||||
|
#
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||||
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
|
||||||
|
BOT_NAME = "my_project"
|
||||||
|
|
||||||
|
SPIDER_MODULES = ["my_project.spiders"]
|
||||||
|
NEWSPIDER_MODULE = "my_project.spiders"
|
||||||
|
|
||||||
|
|
||||||
|
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||||
|
#USER_AGENT = "my_project (+http://www.yourdomain.com)"
|
||||||
|
|
||||||
|
# Obey robots.txt rules
|
||||||
|
ROBOTSTXT_OBEY = True
|
||||||
|
|
||||||
|
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||||
|
#CONCURRENT_REQUESTS = 32
|
||||||
|
|
||||||
|
# Configure a delay for requests for the same website (default: 0)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||||
|
# See also autothrottle settings and docs
|
||||||
|
#DOWNLOAD_DELAY = 3
|
||||||
|
# The download delay setting will honor only one of:
|
||||||
|
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||||
|
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||||
|
|
||||||
|
# Disable cookies (enabled by default)
|
||||||
|
#COOKIES_ENABLED = False
|
||||||
|
|
||||||
|
# Disable Telnet Console (enabled by default)
|
||||||
|
#TELNETCONSOLE_ENABLED = False
|
||||||
|
|
||||||
|
# Override the default request headers:
|
||||||
|
#DEFAULT_REQUEST_HEADERS = {
|
||||||
|
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
# "Accept-Language": "en",
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable spider middlewares
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
#SPIDER_MIDDLEWARES = {
|
||||||
|
# "my_project.middlewares.MyProjectSpiderMiddleware": 543,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable downloader middlewares
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||||
|
#DOWNLOADER_MIDDLEWARES = {
|
||||||
|
# "my_project.middlewares.MyProjectDownloaderMiddleware": 543,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable or disable extensions
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||||
|
#EXTENSIONS = {
|
||||||
|
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Configure item pipelines
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
#ITEM_PIPELINES = {
|
||||||
|
# "my_project.pipelines.MyProjectPipeline": 300,
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||||
|
#AUTOTHROTTLE_ENABLED = True
|
||||||
|
# The initial download delay
|
||||||
|
#AUTOTHROTTLE_START_DELAY = 5
|
||||||
|
# The maximum download delay to be set in case of high latencies
|
||||||
|
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||||
|
# The average number of requests Scrapy should be sending in parallel to
|
||||||
|
# each remote server
|
||||||
|
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||||
|
# Enable showing throttling stats for every response received:
|
||||||
|
#AUTOTHROTTLE_DEBUG = False
|
||||||
|
|
||||||
|
# Enable and configure HTTP caching (disabled by default)
|
||||||
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||||
|
#HTTPCACHE_ENABLED = True
|
||||||
|
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||||
|
#HTTPCACHE_DIR = "httpcache"
|
||||||
|
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||||
|
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||||
|
|
||||||
|
# Set settings whose default value is deprecated to a future-proof value
|
||||||
|
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||||
|
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||||
|
FEED_EXPORT_ENCODING = "utf-8"
|
|
@ -0,0 +1,4 @@
|
||||||
|
# This package will contain the spiders of your Scrapy project
|
||||||
|
#
|
||||||
|
# Please refer to the documentation for information on how to create and manage
|
||||||
|
# your spiders.
|
|
@ -0,0 +1,32 @@
|
||||||
|
import scrapy
|
||||||
|
import re
|
||||||
|
|
||||||
|
class MySpider(scrapy.Spider):
|
||||||
|
name = 'my_spider'
|
||||||
|
|
||||||
|
def __init__(self, start_urls=None, *args, **kwargs):
|
||||||
|
super(MySpider, self).__init__(*args, **kwargs)
|
||||||
|
if start_urls:
|
||||||
|
self.start_urls = self.extract_urls(start_urls)
|
||||||
|
else:
|
||||||
|
self.start_urls = []
|
||||||
|
|
||||||
|
def extract_urls(self, text):
|
||||||
|
url_pattern = re.compile(r'(https?://\S+)')
|
||||||
|
urls = url_pattern.findall(text)
|
||||||
|
return urls
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
page_content = response.text
|
||||||
|
page_urls = response.css('a::attr(href)').getall()
|
||||||
|
for url in page_urls:
|
||||||
|
if url.startswith('http'):
|
||||||
|
yield {'url': url, 'content': page_content}
|
||||||
|
else:
|
||||||
|
yield {'url': response.urljoin(url), 'content': page_content}
|
||||||
|
|
||||||
|
# for url in page_urls:
|
||||||
|
# if url.startswith('http'):
|
||||||
|
# yield scrapy.Request(url, callback=self.parse)
|
||||||
|
# else:
|
||||||
|
# yield scrapy.Request(response.urljoin(url), callback=self.parse)
|
|
@ -0,0 +1,3 @@
|
||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: project
|
||||||
|
Version: 1.0
|
|
@ -0,0 +1,13 @@
|
||||||
|
setup.py
|
||||||
|
my_project/__init__.py
|
||||||
|
my_project/items.py
|
||||||
|
my_project/middlewares.py
|
||||||
|
my_project/pipelines.py
|
||||||
|
my_project/settings.py
|
||||||
|
my_project/spiders/__init__.py
|
||||||
|
my_project/spiders/my_spider.py
|
||||||
|
project.egg-info/PKG-INFO
|
||||||
|
project.egg-info/SOURCES.txt
|
||||||
|
project.egg-info/dependency_links.txt
|
||||||
|
project.egg-info/entry_points.txt
|
||||||
|
project.egg-info/top_level.txt
|
|
@ -0,0 +1 @@
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
[scrapy]
|
||||||
|
settings = my_project.settings
|
|
@ -0,0 +1 @@
|
||||||
|
my_project
|
6
docker/scrapy_project/my_project/scrapy.cfg
Normal file
6
docker/scrapy_project/my_project/scrapy.cfg
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
[settings]
|
||||||
|
default = my_project.settings
|
||||||
|
|
||||||
|
[deploy]
|
||||||
|
url = http://localhost:6800/
|
||||||
|
project = my_project
|
10
docker/scrapy_project/my_project/setup.py
Normal file
10
docker/scrapy_project/my_project/setup.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Automatically created by: scrapyd-deploy
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name = 'project',
|
||||||
|
version = '1.0',
|
||||||
|
packages = find_packages(),
|
||||||
|
entry_points = {'scrapy': ['settings = my_project.settings']},
|
||||||
|
)
|
62
frontend/app.js
Normal file
62
frontend/app.js
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
document.getElementById('react-query-form').addEventListener('submit', async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const queryText = document.getElementById('react-query-text').value;
|
||||||
|
const response = await fetch('/react_query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ query: queryText }),
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
const taskId = data.task_id;
|
||||||
|
monitorTask(taskId);
|
||||||
|
});
|
||||||
|
|
||||||
|
async function monitorTask(taskId) {
|
||||||
|
const statusDiv = document.getElementById('results');
|
||||||
|
statusDiv.innerHTML = `<p>Task ID: ${taskId}</p><p>Status: Monitoring...</p>`;
|
||||||
|
let completed = false;
|
||||||
|
while (!completed) {
|
||||||
|
const response = await fetch(`/status/${taskId}`);
|
||||||
|
const data = await response.json();
|
||||||
|
statusDiv.innerHTML = `<p>Task ID: ${taskId}</p><p>Status: ${data.status}</p><p>Current Step: ${data.current_step}</p>`;
|
||||||
|
if (data.status === 'completed') {
|
||||||
|
completed = true;
|
||||||
|
const stepsList = data.steps.map(step => `<li>${step}</li>`).join('');
|
||||||
|
statusDiv.innerHTML += `<ul>${stepsList}</ul>`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('query-form').addEventListener('submit', async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const queryText = document.getElementById('query-text').value;
|
||||||
|
const agent = document.getElementById('agent-select').value;
|
||||||
|
const response = await fetch('/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ query: queryText, agent }),
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
const resultsDiv = document.getElementById('results');
|
||||||
|
resultsDiv.innerHTML = data.results.map(result => `<p>${result.content}</p>`).join('');
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById('upload-form').addEventListener('submit', async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const fileInput = document.getElementById('file-input');
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', fileInput.files[0]);
|
||||||
|
const response = await fetch('/upload', {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
const resultsDiv = document.getElementById('results');
|
||||||
|
resultsDiv.innerHTML = `<p>${data.message}</p>`;
|
||||||
|
});
|
129
frontend/index.html
Normal file
129
frontend/index.html
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Agentive RAG System</title>
|
||||||
|
<link rel="stylesheet" href="styles.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<h1>Agentive RAG System</h1>
|
||||||
|
<form id="query-form">
|
||||||
|
<textarea id="query-text" placeholder="Enter your query here..." required></textarea>
|
||||||
|
<button type="submit">Submit</button>
|
||||||
|
</form>
|
||||||
|
<form id="upload-form" enctype="multipart/form-data">
|
||||||
|
<input type="file" id="file-input" accept=".txt" required>
|
||||||
|
<button type="submit">Upload</button>
|
||||||
|
</form>
|
||||||
|
<form id="react-query-form">
|
||||||
|
<textarea id="react-query-text" placeholder="Enter your query for ReAct..." required></textarea>
|
||||||
|
<button type="submit">Submit</button>
|
||||||
|
</form>
|
||||||
|
<div id="results"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const queryForm = document.getElementById('query-form');
|
||||||
|
const uploadForm = document.getElementById('upload-form');
|
||||||
|
const reactQueryForm = document.getElementById('react-query-form');
|
||||||
|
const resultsDiv = document.getElementById('results');
|
||||||
|
|
||||||
|
queryForm.addEventListener('submit', async (event) => {
|
||||||
|
event.preventDefault();
|
||||||
|
const queryText = document.getElementById('query-text').value;
|
||||||
|
const response = await fetch('/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ query: queryText })
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
displayResults(data.results);
|
||||||
|
});
|
||||||
|
|
||||||
|
uploadForm.addEventListener('submit', async (event) => {
|
||||||
|
event.preventDefault();
|
||||||
|
const fileInput = document.getElementById('file-input');
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', fileInput.files[0]);
|
||||||
|
const response = await fetch('/upload', {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
displayResults(data.message);
|
||||||
|
});
|
||||||
|
|
||||||
|
reactQueryForm.addEventListener('submit', async (event) => {
|
||||||
|
event.preventDefault();
|
||||||
|
const queryText = document.getElementById('react-query-text').value;
|
||||||
|
const response = await fetch('/react_query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ query: queryText })
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
pollTaskStatus(data.task_id);
|
||||||
|
});
|
||||||
|
|
||||||
|
async function pollTaskStatus(taskId) {
|
||||||
|
const interval = setInterval(async () => {
|
||||||
|
const response = await fetch(`/status/${taskId}`);
|
||||||
|
const data = await response.json();
|
||||||
|
displayTaskStatus(data);
|
||||||
|
if (data.status === 'completed' || data.status.startsWith('failed')) {
|
||||||
|
clearInterval(interval);
|
||||||
|
}
|
||||||
|
}, 2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
function displayResults(results) {
|
||||||
|
resultsDiv.innerHTML = '';
|
||||||
|
if (Array.isArray(results)) {
|
||||||
|
results.forEach(result => {
|
||||||
|
const p = document.createElement('p');
|
||||||
|
p.textContent = result;
|
||||||
|
resultsDiv.appendChild(p);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
const p = document.createElement('p');
|
||||||
|
p.textContent = results;
|
||||||
|
resultsDiv.appendChild(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function displayTaskStatus(data) {
|
||||||
|
resultsDiv.innerHTML = `<h3>Status: ${data.status}</h3>`;
|
||||||
|
if (data.current_step) {
|
||||||
|
const step = document.createElement('p');
|
||||||
|
step.textContent = `Current Step: ${data.current_step}`;
|
||||||
|
resultsDiv.appendChild(step);
|
||||||
|
}
|
||||||
|
if (data.steps) {
|
||||||
|
const stepsList = document.createElement('ul');
|
||||||
|
data.steps.forEach(step => {
|
||||||
|
const li = document.createElement('li');
|
||||||
|
li.textContent = step;
|
||||||
|
stepsList.appendChild(li);
|
||||||
|
});
|
||||||
|
resultsDiv.appendChild(stepsList);
|
||||||
|
}
|
||||||
|
if (data.results) {
|
||||||
|
const resultsList = document.createElement('ul');
|
||||||
|
data.results.forEach(result => {
|
||||||
|
const li = document.createElement('li');
|
||||||
|
li.textContent = result;
|
||||||
|
resultsList.appendChild(li);
|
||||||
|
});
|
||||||
|
resultsDiv.appendChild(resultsList);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
78
frontend/styles.css
Normal file
78
frontend/styles.css
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
body {
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
background-color: #f5f5f5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.container {
|
||||||
|
width: 90%;
|
||||||
|
max-width: 1200px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 20px;
|
||||||
|
background-color: #ffffff;
|
||||||
|
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
text-align: center;
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 {
|
||||||
|
color: #555;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-section {
|
||||||
|
margin-bottom: 30px;
|
||||||
|
}
|
||||||
|
|
||||||
|
textarea, select, input[type="file"], button {
|
||||||
|
width: 100%;
|
||||||
|
padding: 10px;
|
||||||
|
margin: 10px 0;
|
||||||
|
border-radius: 5px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
}
|
||||||
|
|
||||||
|
button {
|
||||||
|
background-color: #007BFF;
|
||||||
|
color: #fff;
|
||||||
|
cursor: pointer;
|
||||||
|
border: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
button:hover {
|
||||||
|
background-color: #0056b3;
|
||||||
|
}
|
||||||
|
|
||||||
|
#results {
|
||||||
|
margin-top: 30px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#results-content {
|
||||||
|
background-color: #f9f9f9;
|
||||||
|
padding: 20px;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
border-radius: 5px;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
#task-queue {
|
||||||
|
margin-top: 30px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
th, td {
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
padding: 10px;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
background-color: #f2f2f2;
|
||||||
|
}
|
125
samples/FRANCE.txt
Normal file
125
samples/FRANCE.txt
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
{{Short description|Country in Western Europe}}
|
||||||
|
{{Hatnote|For other uses, see [[France (disambiguation)]], [[Lafrance (disambiguation)|Lafrance]], or (for prior French Republics) [[French Republics (disambiguation)|French Republics]].}}
|
||||||
|
{{pp-vandalism|small=yes}}
|
||||||
|
{{Use British English|date=July 2022}}
|
||||||
|
{{Use dmy dates|date=March 2022}}
|
||||||
|
{{Infobox country
|
||||||
|
| conventional_long_name = French Republic
|
||||||
|
| common_name = France
|
||||||
|
| native_name = {{Native name|fr|République française}}
|
||||||
|
| image_flag = Flag of France.svg
|
||||||
|
| image_coat = Arms of the French Republic.svg
|
||||||
|
| symbol_width = 75px
|
||||||
|
| symbol_type = [[Coat of arms of France|Coat of arms]]{{Efn-ur|The current [[Constitution of France]] does not specify a national emblem.<ref>{{Cite constitution|article=II|polity=France|date=1958}}</ref> The [[Fasces|lictor's fasces]] is very often used to represent the French Republic, although today it holds no official status.<ref>{{cite web|url=https://www.elysee.fr/en/french-presidency/the-lictor-s-fasces|date=15 December 2022|title=THE LICTOR'S FASCES|access-date=18 May 2024|archive-date=7 April 2024|archive-url=https://web.archive.org/web/20240407081203/https://www.elysee.fr/en/french-presidency/the-lictor-s-fasces|url-status=live}}</ref> In addition to the coat of arms, France also uses a [[Diplomatic emblem of France|different emblem]] for diplomatic and consular purposes.}}
|
||||||
|
| other_symbol = [[File:Armoiries république française.svg|90px]]
|
||||||
|
| other_symbol_type = [[Diplomatic emblem of France|Diplomatic emblem]]
|
||||||
|
| national_motto = "{{Lang|fr|[[Liberté, égalité, fraternité]]|italics=no}}"
|
||||||
|
| englishmotto = ("Liberty, Equality, Fraternity")
|
||||||
|
| national_anthem = "[[La Marseillaise]]"<br /><div style="display:inline-block;margin-top:0.4em">[[File:La Marseillaise.ogg|alt=sound clip of the Marseillaise French national anthem]]</div>
|
||||||
|
| image_map = {{Switcher|[[File:EU-France (orthographic projection).svg|upright=1.15|frameless]]|France on the globe centred on Europe|[[File:EU-France.svg|upright=1.15|frameless]]|[[Metropolitan France]] (European part of France) in Europe|[[File:France and its region.png|frameless]]|France and its neighbors<!--Map restored per [[WP:CONSENSUS]] in 03:24, 11 July 2023 discussion [[Talk:France#Removal of map]]-->|[[File:Territorial waters - France.svg|upright=1.15|frameless]]|Show France, its overseas territories and [[Exclusive economic zone of France|its exclusive economic zones]]|Labelled map|default=1}}
|
||||||
|
| map_caption = {{Map caption|location_color=blue or dark green|region=Europe|region_color=dark grey|subregion=the European Union|subregion_color=green|unbulleted list|Location of the territory of the (red)|[[Adélie Land]] (Antarctic claim; hatched)}}
|
||||||
|
| capital = [[Paris]]
|
||||||
|
| coordinates = {{Coord|48|51|N|2|21|E|type:city(2,100,000)_region:FR-75C}}
|
||||||
|
| largest_city = capital
|
||||||
|
| languages_type = Official language<br />{{Nobold|and national language}}
|
||||||
|
| languages = [[French language|French]]{{Efn-ur|name=one|For information about regional languages, see [[Languages of France]].}}{{Infobox|child=yes
|
||||||
|
| regional_languages = See [[Languages of France]]
|
||||||
|
| label1 = Nationality {{Nobold|(2021)<ref>{{cite web |title=L'essentiel sur... les immigrés et les étrangers |url=https://www.insee.fr/fr/statistiques/3633212 |website=[[Insee]] |access-date=9 September 2023 |archive-date=26 June 2019 |archive-url=https://web.archive.org/web/20190626142004/https://www.insee.fr/fr/statistiques/3633212 |url-status=live }}</ref>}}
|
||||||
|
| data1 = {{Unbulleted list|92.2% [[French people|French]]|7.8% [[Demographics of France|other]]}}}}
|
||||||
|
| religion_ref = <ref name=religion2020>{{cite web|last1=Drouhot|first1=Lucas|last2=Simon|first2=Patrick|last3=Tiberj|first3=Vincent|url=https://www.insee.fr/fr/statistiques/fichier/6793308/IMMFRA23-D2.pdf|title=La diversité religieuse en France : transmissions intergénérationnelles et pratiques selon les origines|trans-title=Religious diversity in France: Intergenerational transmissions and practices according to the origins|publisher=[[Institut national de la statistique et des études économiques|National Institute of Statistics and Economic Studies]] (INSEE)|type=official statistics|date=30 March 2023|language=fr|archive-url=https://web.archive.org/web/20230330154402/https://www.insee.fr/fr/statistiques/fichier/6793308/IMMFRA23-D2.pdf|archive-date=30 March 2023}}</ref>
|
||||||
|
| religion_year = 2023
|
||||||
|
| religion = Aged 18 - 59 {{ublist|item_style=white-space;|51% [[Irreligion|No Religion]]|38% [[Christianity]]|10% [[Islam]]|0.5% [[Judaism]]|0.5% [[Buddhism]]}}
|
||||||
|
| demonym = French
|
||||||
|
| government_type = Unitary [[Semi-presidential system|semi-presidential republic]]
|
||||||
|
| leader_title1 = [[President of France|President]]
|
||||||
|
| leader_name1 = [[Emmanuel Macron]]
|
||||||
|
| leader_title2 = [[Prime Minister of France|Prime Minister]]
|
||||||
|
| leader_name2 = [[Gabriel Attal]]
|
||||||
|
| leader_title3 = [[List of presidents of the Senate of France|President of the Senate]]
|
||||||
|
| leader_name3 = [[Gérard Larcher]]
|
||||||
|
| leader_title4 = [[List of presidents of the National Assembly of France|President of the National Assembly]]
|
||||||
|
| leader_name4 = [[Yaël Braun-Pivet]]
|
||||||
|
| legislature = [[French Parliament|Parliament]]
|
||||||
|
| upper_house = [[Senate (France)|Senate]]
|
||||||
|
| lower_house = [[National Assembly (France)|National Assembly]]
|
||||||
|
| sovereignty_type = [[History of France|Establishment]]
|
||||||
|
| established_event1 = [[West Francia|Kingdom of the West Franks]] – [[Treaty of Verdun]]
|
||||||
|
| established_date1 = 10 August 843
|
||||||
|
| established_event2 = [[Kingdom of France]] – [[List of French monarchs#House of Capet (987–1792)|Capetian rulers of France]]
|
||||||
|
| established_date2 = 3 July 987
|
||||||
|
| established_event3 = [[Proclamation of the abolition of the monarchy|French Republic]] – [[French First Republic]]
|
||||||
|
| established_date3 = 22 September 1792
|
||||||
|
| established_event4 = [[Enlargement of the European Union#Founding members|Founded]] the [[European Economic Community|EEC]]{{Efn-ur|[[European Union]] since 1993}}
|
||||||
|
| established_date4 = 1 January 1958
|
||||||
|
| established_event5 = [[Constitution of France|Current constitution]] – [[French Fifth Republic]]
|
||||||
|
| established_date5 = 4 October 1958
|
||||||
|
| area_km2 = 643,801
|
||||||
|
| area_footnote = <ref name="Field Listing :: Area">{{Cite web |title=Field Listing :: Area |url=https://www.cia.gov/library/publications/the-world-factbook/fields/2147.html |url-status=dead |archive-url=https://web.archive.org/web/20140131115000/https://www.cia.gov/library/publications/the-world-factbook/fields/2147.html |archive-date=31 January 2014 |access-date=1 November 2015 |website=The World Factbook |publisher=CIA}} {{PD-notice}}</ref>
|
||||||
|
| area_rank = 42nd <!-- Area rank should match [[List of countries and dependencies by area]] -->
|
||||||
|
| area_sq_mi = 248,600 <!--Do not remove per [[Wikipedia:Manual of Style/Dates and numbers|WP:MOSNUM]] -->
|
||||||
|
| area_label2 = [[Metropolitan France]] ([[Institut géographique national|IGN]])
|
||||||
|
| area_data2 = {{Cvt|551695|km2}}{{Efn-ur|name=three|French [[Institut géographique national|National Geographic Institute]] data, which includes bodies of water}} ([[List of countries and dependencies by area|50th]])
|
||||||
|
| area_label3 = Metropolitan France ([[Cadastre]])
|
||||||
|
| area_data3 = {{Cvt|543940.9|km2}}{{Efn-ur|name=four|French [[Land registration|Land Register]] data, which exclude lakes, ponds and [[glacier]]s larger than 1 km<sup>2</sup> (0.386 sq mi or 247 acres) as well as the estuaries of rivers}}<ref>{{Cite journal |year=2011 |title=France Métropolitaine |url=http://www.insee.fr/fr/themes/comparateur.asp?codgeo=METRODOM-1 |url-status=dead |journal=INSEE |archive-url=https://web.archive.org/web/20150828051307/http://www.insee.fr/fr/themes/comparateur.asp?codgeo=METRODOM-1 |archive-date=28 August 2015}}</ref> ([[List of countries and dependencies by area|50th]])
|
||||||
|
| population_estimate = {{IncreaseNeutral}} 68,373,433<ref name="pop_est">{{Cite web |date=16 January 2023 |title=Bilan démographique 2023 – Composantes de la croissance démographique, France |url=https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-1 |access-date=2024-02-02 |website=Insee |archive-date=18 January 2024 |archive-url=https://web.archive.org/web/20240118223724/https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-1 |url-status=live }}</ref>
|
||||||
|
| percent_water = 0.86<ref>{{Cite web |title=Surface water and surface water change |url=https://stats.oecd.org/Index.aspx?DataSetCode=SURFACE_WATER |access-date=11 October 2020 |publisher=Organisation for Economic Co-operation and Development (OECD) |archive-date=24 March 2021 |archive-url=https://web.archive.org/web/20210324133453/https://stats.oecd.org/Index.aspx?DataSetCode=SURFACE_WATER |url-status=live }}</ref>
|
||||||
|
| population_estimate_year = January 2024
|
||||||
|
| population_estimate_rank = 20th
|
||||||
|
| population_label2 = Density
|
||||||
|
| population_data2 = {{Pop density|68373433|643801|km2}} ([[List of countries and territories by population density|106th]])
|
||||||
|
| population_label3 = Metropolitan France, estimate {{As of|lc=y|January 2024}}
|
||||||
|
| population_data3 = {{IncreaseNeutral}} 66,142,961<ref>{{Cite web |date=16 January 2024 |title=Bilan démographique 2023 – Composantes de la croissance démographique, France métropolitaine |url=https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-3 |access-date=2024-02-02 |website=Insee |archive-date=18 January 2024 |archive-url=https://web.archive.org/web/20240118223724/https://www.insee.fr/fr/statistiques/7746154?sommaire=7746197#titre-bloc-3 |url-status=live }}</ref> ([[List of countries and dependencies by population|23rd]])
|
||||||
|
| population_density_km2 = 122
|
||||||
|
| population_density_sq_mi = 313 <!-- Do not remove per [[Wikipedia:Manual of Style/Dates and numbers|WP:MOSNUM]] -->
|
||||||
|
| population_density_rank = 89th
|
||||||
|
| GDP_PPP = {{increase}} $3.988 trillion<ref name="IMFWEO.FR">{{cite web |url=https://www.imf.org/en/Publications/WEO/weo-database/2024/April/weo-report?c=132,&s=NGDPD,PPPGDP,NGDPDPC,PPPPC,&sy=2022&ey=2029&ssm=0&scsm=1&scc=0&ssd=1&ssc=0&sic=0&sort=country&ds=.&br=1 |title=World Economic Outlook Database, April 2024 Edition. (France) |publisher=[[International Monetary Fund]] |website=www.imf.org |date=16 April 2024 |access-date=16 April 2024}}</ref>
|
||||||
|
| GDP_PPP_year = 2024
|
||||||
|
| GDP_PPP_rank = 10th
|
||||||
|
| GDP_PPP_per_capita = {{increase}} $60,339<ref name="IMFWEO.FR" />
|
||||||
|
| GDP_PPP_per_capita_rank = 26th
|
||||||
|
| GDP_nominal = {{increase}} $3.130 trillion<ref name="IMFWEO.FR" />
|
||||||
|
| GDP_nominal_year = 2024
|
||||||
|
| GDP_nominal_rank = 7th
|
||||||
|
| GDP_nominal_per_capita = {{increase}} $47,359<ref name="IMFWEO.FR" />
|
||||||
|
| GDP_nominal_per_capita_rank = 23rd
|
||||||
|
| Gini = 29.8 <!-- number only -->
|
||||||
|
| Gini_year = 2022
|
||||||
|
| Gini_change = increase <!-- increase/decrease/steady -->
|
||||||
|
| Gini_ref = <ref name="eurogini">{{Cite web |title=Gini coefficient of equivalised disposable income – EU-SILC survey |url=https://ec.europa.eu/eurostat/databrowser/view/tessi190/default/table?lang=en |access-date=25 November 2023 |website=ec.europa.eu |publisher=[[Eurostat]] |archive-date=9 October 2020 |archive-url=https://web.archive.org/web/20201009091832/https://ec.europa.eu/eurostat/databrowser/view/tessi190/default/table?lang=en |url-status=live }}</ref>
|
||||||
|
| HDI = 0.910<!-- number only -->
|
||||||
|
| HDI_year = 2022 <!-- Please use the year to which the data refers, not the publication year -->
|
||||||
|
| HDI_change = steady <!-- increase/decrease/steady -->
|
||||||
|
| HDI_ref = <ref name="UNHDR">{{cite web|url=https://hdr.undp.org/system/files/documents/global-report-document/hdr2023-24reporten.pdf|title=Human Development Report 2023/24|language=en|publisher=[[United Nations Development Programme]]|date=13 March 2024|page=288|access-date=13 March 2024|archive-date=13 March 2024|archive-url=https://web.archive.org/web/20240313164319/https://hdr.undp.org/system/files/documents/global-report-document/hdr2023-24reporten.pdf|url-status=live}}</ref>
|
||||||
|
| HDI_rank = 28th
|
||||||
|
| currency = {{Unbulleted list
|
||||||
|
| [[Euro]] ([[Euro sign|€]]) ([[ISO 4217|EUR]]){{Efn-ur|name=six|Whole of the except the overseas territories in the Pacific Ocean}}
|
||||||
|
| [[CFP franc]] (XPF){{Efn-ur|name=seven|French overseas territories in the Pacific Ocean only}}
|
||||||
|
}}
|
||||||
|
| time_zone = [[Central European Time]]
|
||||||
|
| utc_offset = +1
|
||||||
|
| utc_offset_DST = +2
|
||||||
|
| time_zone_DST = [[Central European Summer Time]]{{Efn-ur|name=eight|Daylight saving time is observed in metropolitan France and [[Saint Pierre and Miquelon]] only.}}
|
||||||
|
| DST_note = Note: Various other time zones are observed in overseas France.{{Efn-ur|name=nine|Time zones across the span from UTC−10 ([[French Polynesia]]) to UTC+12 ([[Wallis and Futuna]])}}<br /> Although France is in the [[Coordinated Universal Time|UTC]] (Z) ([[Western European Time]]) zone, [[UTC+01:00]] ([[Central European Time]]) was enforced as the standard time since 25 February 1940, upon [[German military administration in occupied France during World War II|German occupation in WW2]], with a +0:50:39 offset (and +1:50:39 during [[Daylight saving time|DST]]) from Paris [[Local mean time|LMT]] (UTC+0:09:21).<ref>{{Cite web |title=Time Zone & Clock Changes in Paris, Île-de-France, France |url=https://www.timeanddate.com/time/zone/france/paris |access-date=9 October 2021 |website=timeanddate.com |archive-date=23 October 2021 |archive-url=https://web.archive.org/web/20211023233753/https://www.timeanddate.com/time/zone/france/paris |url-status=live }}</ref>
|
||||||
|
| date_format = dd/mm/yyyy ([[Anno Domini|AD]])
|
||||||
|
| drives_on = right
|
||||||
|
| calling_code = [[Telephone numbers in France|+33]]{{Efn-ur|name=eleven|The overseas regions and collectivities form part of the [[Telephone numbers in France|French telephone numbering plan]], but have their own country calling codes: [[Guadeloupe]] +590; [[Martinique]] +596; [[French Guiana]] +594; [[Réunion]] and [[Mayotte]] +262; [[Saint Pierre and Miquelon]] +508. The overseas territories are not part of the French telephone numbering plan; their country calling codes are: [[New Caledonia]] +687; [[French Polynesia]] +689; [[Wallis and Futuna]] +681.}}
|
||||||
|
| cctld = [[.fr]]{{Efn-ur|name=ten|In addition to [[.fr]], several other Internet TLDs are used in French overseas ''départements'' and territories: [[.re]], [[.mq]], [[.gp]], [[.tf]], [[.nc]], [[.pf]], [[.wf]], [[.pm]], [[.gf]] and [[.yt]]. France also uses [[.eu]], shared with other members of the European Union. The [[.cat]] domain is used in [[Catalan Countries|Catalan-speaking territories]].}}
|
||||||
|
| footnotes = Source gives area of metropolitan France as 551,500 km<sup>2</sup> (212,900 sq mi) and lists overseas regions separately, whose areas sum to 89,179 km<sup>2</sup> (34,432 sq mi). Adding these give the total shown here for the entire French Republic. [[The World Factbook]] reports the total as 643,801 km<sup>2</sup> (248,573 sq mi).
|
||||||
|
| flag_p1 = Flag of France (1794–1815, 1830–1974, 2020–present).svg
|
||||||
|
}}
|
||||||
|
|
||||||
|
'''France''',{{efn|{{IPA|fr|fʁɑ̃s|lang|LL-Q150 (fra)-Fhala.K-France.wav}}<!-- Do not add English pronunciation per [[Wikipedia:Manual of Style/Lead Section]]. -->}} officially the '''French Republic''',{{efn|{{Lang-fr|link=no|République française}} {{IPA-fr|ʁepyblik fʁɑ̃sɛːz|}}}} is a country located primarily in [[Western Europe]]. It also includes [[Overseas France|overseas regions and territories]] in the [[Americas]] and the [[Atlantic Ocean|Atlantic]], [[Pacific Ocean|Pacific]] and [[Indian Ocean|Indian]] oceans,{{Efn-ur|name=twelve|[[French Guiana]] is in South America; [[Guadeloupe]] and [[Martinique]] are in the Caribbean Sea; and [[Réunion]] and [[Mayotte]] are in the Indian Ocean, off the coast of Africa. All five [[Administrative divisions of France|are considered integral parts of the French Republic]]. France also comprises [[Saint Pierre and Miquelon]] in North America; [[Saint Barthélemy]] and [[Saint Martin (island)|Saint Martin]] in the Caribbean; [[French Polynesia]], [[New Caledonia]], [[Wallis and Futuna]] and [[Clipperton Island]] in the Pacific Ocean; and the [[French Southern and Antarctic Lands]].}} giving it one of the largest discontiguous [[exclusive economic zone]]s in the world. [[Metropolitan France]] shares borders with [[Belgium]] and [[Luxembourg]] to the north, [[Germany]] to the north east, [[Switzerland]] to the east, [[Italy]] and [[Monaco]] to the south east, [[Andorra]] and [[Spain]] to the south, and a maritime border with the [[United Kingdom]] to the north west. Its metropolitan area extends from the [[Rhine]] to the Atlantic Ocean and from the [[Mediterranean Sea]] to the [[English Channel]] and the [[North Sea]]. Its overseas territories include [[French Guiana]] in [[South America]], [[Saint Pierre and Miquelon]] in the North Atlantic, the [[French West Indies]], and many islands in [[Oceania]] and the [[Indian Ocean]]. Its [[Regions of France|eighteen integral regions]] (five of which are overseas) span a combined area of {{Cvt|643801|km2}} and have a total population of 68.4 million {{As of|2024|January|lc=y}}.<ref name="Field Listing :: Area"/><ref name=pop_est/> France is a [[Unitary state|unitary]] [[Semi-presidential system|semi-presidential]] [[republic]] with its capital in [[Paris]], the [[List of communes in France with over 20,000 inhabitants|country's largest city]] and main cultural and commercial centre; other major [[Urban area (France)|urban areas]] include [[Marseille]], [[Lyon]], [[Toulouse]], [[Lille]], [[Bordeaux]], [[Strasbourg]], [[Nantes]] and [[Nice]].
|
||||||
|
|
||||||
|
Metropolitan France was settled during the [[Iron Age]] by [[List of ancient Celtic peoples and tribes|Celtic tribes]] known as [[Gauls]] before [[Roman Gaul|Rome annexed the area]] in 51 BC, leading to a distinct [[Gallo-Roman culture]]. In the [[Early Middle Ages]], the [[Germanic peoples|Germanic]] [[Franks]] formed the Kingdom of [[Francia]], which became the heartland of the [[Carolingian Empire]]. The [[Treaty of Verdun]] of 843 partitioned the empire, with [[West Francia]] evolving into the [[Kingdom of France]]. In the [[High Middle Ages]], France was a powerful but decentralized [[Feudalism|feudal]] kingdom, but from the mid-14th to the mid-15th centuries, France was plunged into a dynastic conflict with [[Kingdom of England|England]] known as the [[Hundred Years' War]]. In the 16th century, the [[French Renaissance]] saw culture flourish and a [[French colonial empire]] rise.<ref name=":8">{{Cite book |url=https://books.google.com/books?id=UX8aeX_Lbi4C&pg=PA1 |title=Memory, Empire, and Postcolonialism: Legacies of French Colonialism |publisher=Lexington Books |year=2005 |isbn=978-0-7391-0821-5 |editor-last=Hargreaves, Alan G. |page=1}}</ref> Internally, France was dominated by the conflict with the [[House of Habsburg]] and the [[French Wars of Religion]] between [[Catholics]] and [[Huguenots]]. France was successful in the [[Thirty Years' War]] and further increased its influence during the reign of [[Louis XIV]].<ref>{{Cite book |last1=R.R. Palmer |url=https://archive.org/details/historyofmodernw00palm |title=A History of the Modern World |last2=Joel Colton |year=1978 |edition=5th |page=[https://archive.org/details/historyofmodernw00palm/page/161 161] |url-access=registration}}</ref>
|
||||||
|
|
||||||
|
The [[French Revolution]] of 1789 overthrew the {{Lang|fr|[[Ancien Régime]]|italic=no}} and produced the [[Declaration of the Rights of Man and of the Citizen|Declaration of the Rights of Man]], which expresses the nation's ideals to this day. France reached its political and military zenith in the early 19th century under [[Napoleon|Napoleon Bonaparte]], subjugating part of continental Europe and establishing the [[First French Empire]]. The [[French Revolutionary Wars|French Revolutionary]] and [[Napoleonic Wars]] significantly shaped the course of European history. The collapse of the empire initiated a period of relative decline, in which France endured the [[Bourbon Restoration]] until the founding of the [[French Second Republic]] which was succeeded by the [[Second French Empire]] upon [[Napoleon III]]'s takeover. His empire collapsed during the [[Franco-Prussian War]] in 1870. This led to the establishment of the [[Third French Republic]] Subsequent decades saw a period of economic prosperity and cultural and scientific flourishing known as the [[Belle Époque]]. France was one of the [[Triple Entente|major participants]] of [[World War I]], from which [[Treaty of Versailles|it emerged victorious]] at great human and economic cost. It was among the [[Allies of World War II|Allied powers]] of [[World War II]], but it surrendered and [[German military administration in occupied France during World War II|was occupied]] by the [[Axis powers|Axis]] in 1940. Following [[Liberation of France|its liberation in 1944]], the short-lived [[French Fourth Republic|Fourth Republic]] was established and later dissolved in the course of the defeat in the [[Algerian War]] and [[Rif War|Moroccan War of Independence]]. The current [[French Fifth Republic|Fifth Republic]] was formed in 1958 by [[Charles de Gaulle]]. Algeria and most French colonies became independent in the 1960s, with the majority retaining [[Françafrique|close economic and military ties with France]].
|
||||||
|
|
||||||
|
France retains its centuries-long status as a global centre [[French art|of art]], [[Science and technology in France|science]], and [[French philosophy|philosophy]]. [[List of World Heritage Sites in France|It hosts]] the [[World Heritage Sites by country|third-largest]] number of [[UNESCO]] [[World Heritage Site]]s and is the world's [[World Tourism rankings|leading tourist destination]], receiving over 89 million foreign [[Tourism in France|visitors in 2018]].<ref>{{Cite web |date=17 May 2019 |title=France posts new tourist record despite Yellow Vest unrest |url=https://www.france24.com/en/20190517-france-tourism-record-number-visitors-tourists-despite-yellow-vests-paris |website=France 24 |access-date=18 May 2024 |archive-date=12 May 2023 |archive-url=https://web.archive.org/web/20230512192740/https://www.france24.com/en/20190517-france-tourism-record-number-visitors-tourists-despite-yellow-vests-paris |url-status=live }}</ref> France is a [[developed country]] with a [[List of countries by GDP (nominal) per capita|high nominal per capita income globally]] and [[Economy of France|its advanced economy]] ranks among the [[List of countries by GDP (nominal)|largest in the world]]. It is a [[great power]] in global affairs,<ref>Jack S. Levy, ''War in the Modern Great Power System, 1495–1975'', (2014) p. 29</ref> being one of the five [[permanent members of the United Nations Security Council]] and an official [[List of states with nuclear weapons|nuclear-weapon state]]. France is a founding and [[Big Four (Western Europe)|leading]] [[Member state of the European Union|member of the European Union]] and the [[eurozone]],<ref name="superficy" /> as well as a key member of the [[Group of Seven]], [[NATO|North Atlantic Treaty Organization]] (NATO), [[OECD|Organisation for Economic Co-operation and Development]] (OECD), and [[Organisation internationale de la Francophonie|Francophonie]].
|
||||||
|
|
||||||
|
==Etymology and pronunciation==
|
||||||
|
{{Main|Name of France}}
|
||||||
|
Originally applied to the whole [[Francia|Frankish Empire]], the name ''France'' comes from the [[Latin]] {{Lang|la|[[Francia]]}}, or "realm of the [[Franks]]".<ref>{{Cite web |title=History of France |url=http://www.discoverfrance.net/France/History/DF_history.shtml |url-status=dead |archive-url=https://web.archive.org/web/20110824051936/http://www.discoverfrance.net/France/History/DF_history.shtml |archive-date=24 August 2011 |access-date=17 July 2011 |publisher=Discoverfrance.net}}</ref> Modern France is still named today {{Lang|it|Francia}} in Italian and Spanish, while {{Lang|de|Frankreich}} in German, {{Lang|nl|Frankrijk}} in Dutch and {{Lang|sv|Frankrike}} in Swedish and Norwegian all mean "Land/realm of the Franks".
|
||||||
|
|
||||||
|
The [[name of the Franks]] is related to the English word ''frank'' ("free"): the latter stems from the [[Old French]] {{Lang|ang|franc}} ("free, noble, sincere"), ultimately from [[Medieval Latin]] ''francus'' ("free, exempt from service; freeman, Frank"), a generalisation of the tribal name that emerged as a [[Late Latin]] borrowing of the reconstructed [[Frankish language|Frankish]] [[Endonym and exonym|endonym]] {{Lang|frk|*Frank}}.<ref>Examples: {{Cite encyclopedia |title=frank |encyclopedia=American Heritage Dictionary}} {{Cite encyclopedia|title=frank|encyclopedia=Webster's Third New International Dictionary}} And so on.</ref><ref name=":0"/> It has been suggested that the meaning "free" was adopted because, after the conquest of [[Gaul]], only Franks were free of taxation,<ref>{{Cite book |first=Michel |last=Rouche |title=A History of Private Life: From Pagan Rome to Byzantium |publisher=Belknap Press |year=1987 |isbn=978-0-674-39974-7 |editor-first=Paul |editor-last=Veyne |page=425 |chapter=The Early Middle Ages in the West |oclc=59830199}}</ref> or more generally because they had the status of freemen in contrast to servants or slaves.<ref name=":0"/> The etymology of ''*Frank'' is uncertain. It is traditionally derived from the [[Proto-Germanic language|Proto-Germanic]] word {{Lang|gem-x-proto|frankōn}}, which translates as "javelin" or "lance" (the throwing axe of the Franks was known as the ''[[francisca]]''),<ref>{{Cite book |last1=Tarassuk |first1=Leonid |url=https://books.google.com/books?id=UJbyPwAACAAJ |title=The Complete Encyclopedia of Arms and Weapons: the most comprehensive reference work ever published on arms and armor from prehistoric times to the present with over 1,250 illustrations |last2=Blair |first2=Claude |publisher=Simon & Schuster |year=1982 |isbn=978-0-671-42257-8 |page=186 |access-date=5 July 2011}}</ref> although these weapons may have been named because of their use by the Franks, not the other way around.<ref name=":0">{{Cite web |title=Origin and meaning of Frank |url=https://www.etymonline.com/word/frank |website=Online Etymology Dictionary |language=en |access-date=18 May 2024 |archive-date=15 May 2024 |archive-url=https://web.archive.org/web/20240515001926/https://www.etymonline.com/word/frank |url-status=live }}</ref>
|
||||||
|
|
||||||
|
In English, 'France' is pronounced {{IPAc-en|f|r|æ|n|s}} {{Respell|FRANSS}} in American English and {{IPAc-en|f|r|ɑː|n|s}} {{Respell|FRAHNSS}} or {{IPAc-en|f|r|æ|n|s}} {{Respell|FRANSS}} in British English. The pronunciation with {{IPAc-en|ɑː}} is mostly confined to accents with the [[Trap–bath split|trap-bath split]] such as [[Received Pronunciation]], though it can be also heard in some other dialects such as [[Cardiff English]], in which {{IPAc-en|f|r|ɑː|n|s}} is in free variation with {{IPAc-en|f|r|æ|n|s}}.<ref>{{Cite book |last=Wells |first=John C. |title=Longman Pronunciation Dictionary |publisher=Longman |year=2008 |isbn=978-1-4058-8118-0 |edition=3rd}}; {{Cite book|last1=Collins|first1=Beverley|last2=Mees|first2=Inger M.|editor-last1=Coupland|editor-first1=Nikolas|editor-last2=Thomas|editor-first2=Alan Richard|year=1990|title=English in Wales: Diversity, Conflict, and Change|chapter=The Phonetics of Cardiff English|publisher=Multilingual Matters Ltd.|page=96|isbn=978-1-85359-032-0|url=https://books.google.com/books?id=tPwYt3gVbu4C}}</ref>
|
2
samples/Polar Bears a Complete Guide.txt
Normal file
2
samples/Polar Bears a Complete Guide.txt
Normal file
File diff suppressed because one or more lines are too long
67
samples/ReActPrompts.txt
Normal file
67
samples/ReActPrompts.txt
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
Sure, here are 10 ReAct queries that would use at least 4 agents in some sequence:
|
||||||
|
|
||||||
|
1. **Query**: "Generate a research outline on the effects of climate change on polar bear populations, including recent studies, statistical data, and a summary of findings."
|
||||||
|
- Agents: Research Paper Finder, Data Summarizer, Statistical Analysis, Research Outline Generator
|
||||||
|
|
||||||
|
2. **Query**: "Create a literature review on machine learning applications in healthcare, with a summary of each paper and a keyword extraction for the main topics."
|
||||||
|
- Agents: Research Paper Finder, Literature Review Organizer, Data Summarizer, Keyword Extractor
|
||||||
|
|
||||||
|
3. **Query**: "Design an experimental study on the impact of social media on teenagers' mental health, including hypothesis generation, methodology advice, and survey questions."
|
||||||
|
- Agents: Hypothesis Generator, Methodology Advisor, Experimental Design Helper, Survey Designer
|
||||||
|
|
||||||
|
4. **Query**: "Find funding opportunities for a research project on renewable energy technologies, generate a thesis statement, and create a bibliography for the related literature."
|
||||||
|
- Agents: Funding Finder, Research Paper Finder, Thesis Statement Generator, Bibliography Manager
|
||||||
|
|
||||||
|
5. **Query**: "Analyze the statistical significance of data from a survey on remote work productivity, generate graphs, and provide a summary of the findings."
|
||||||
|
- Agents: Data Cleaning Agent, Statistical Analysis, Graph Generator, Data Summarizer
|
||||||
|
|
||||||
|
6. **Query**: "Create a comprehensive research outline on the impact of diet on cardiovascular health, including a literature review, data summary, and keyword extraction."
|
||||||
|
- Agents: Research Paper Finder, Research Outline Generator, Data Summarizer, Keyword Extractor
|
||||||
|
|
||||||
|
7. **Query**: "Design a study on the effectiveness of different teaching methods in elementary education, including hypothesis generation, methodology advice, and statistical analysis."
|
||||||
|
- Agents: Hypothesis Generator, Methodology Advisor, Experimental Design Helper, Statistical Analysis
|
||||||
|
|
||||||
|
8. **Query**: "Generate a citation for a recent article on quantum computing, summarize the article, and include it in a bibliography for a related research project."
|
||||||
|
- Agents: Citation Generator, Research Paper Finder, Data Summarizer, Bibliography Manager
|
||||||
|
|
||||||
|
9. **Query**: "Develop a research proposal on the socio-economic impacts of urbanization, including literature review, hypothesis generation, and data summary."
|
||||||
|
- Agents: Research Paper Finder, Literature Review Organizer, Hypothesis Generator, Data Summarizer
|
||||||
|
|
||||||
|
10. **Query**: "Create a detailed research outline for a study on the psychological effects of video games on adolescents, including a survey design and a review of recent studies."
|
||||||
|
- Agents: Research Paper Finder, Research Outline Generator, Survey Designer, Literature Review Organizer
|
||||||
|
|
||||||
|
These queries are designed to invoke multiple agents in a sequence, allowing you to test the integration and functionality of your ReAct system effectively.
|
||||||
|
|
||||||
|
Sure, here are ten more queries that utilize the ReAct paradigm, showcasing the various agents and tasks:
|
||||||
|
|
||||||
|
1. **Query**: Develop a comprehensive research plan to investigate the impact of climate change on polar bear populations, including data analysis and a visualization of temperature trends.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Data Summarizer, Statistical Analysis, Graph Generator, Research Outline Generator.
|
||||||
|
|
||||||
|
2. **Query**: Create a detailed marketing plan for a new tech startup, including a competitor analysis, keyword extraction, and a list of potential investors.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Keyword Extractor, Data Summarizer, Funding Finder, Research Outline Generator.
|
||||||
|
|
||||||
|
3. **Query**: Design a user study to evaluate the usability of a new mobile app, including survey questions, statistical analysis, and a literature review on similar studies.
|
||||||
|
- **Agents/Tasks**: Survey Designer, Research Paper Finder, Literature Review Organizer, Statistical Analysis, Research Outline Generator.
|
||||||
|
|
||||||
|
4. **Query**: Compile a comprehensive report on the latest advancements in renewable energy technologies, including citations and a summary of key findings.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Citation Generator, Data Summarizer, Research Outline Generator, Keyword Extractor.
|
||||||
|
|
||||||
|
5. **Query**: Plan an experimental study on the effects of sleep deprivation on cognitive performance, including a survey design and a review of relevant methodologies.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Survey Designer, Methodology Advisor, Research Outline Generator, Experimental Design Helper.
|
||||||
|
|
||||||
|
6. **Query**: Create a systematic review of the literature on artificial intelligence in healthcare, including keyword extraction and a summary of major trends.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Keyword Extractor, Data Summarizer, Research Outline Generator.
|
||||||
|
|
||||||
|
7. **Query**: Develop a thesis proposal on the economic impact of remote work, including a literature review, hypothesis generation, and a bibliography.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Hypothesis Generator, Research Outline Generator, Bibliography Manager.
|
||||||
|
|
||||||
|
8. **Query**: Generate a detailed report on the effects of social media on mental health, including data cleaning, statistical analysis, and visualization of the findings.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Data Cleaning Agent, Statistical Analysis, Graph Generator, Data Summarizer.
|
||||||
|
|
||||||
|
9. **Query**: Design a comprehensive survey to study consumer preferences for electric vehicles, including a methodology overview and a review of similar studies.
|
||||||
|
- **Agents/Tasks**: Survey Designer, Research Paper Finder, Methodology Advisor, Literature Review Organizer, Research Outline Generator.
|
||||||
|
|
||||||
|
10. **Query**: Create a funding proposal for a project on sustainable agriculture practices, including a literature review, a list of potential funding sources, and a bibliography.
|
||||||
|
- **Agents/Tasks**: Research Paper Finder, Literature Review Organizer, Funding Finder, Research Outline Generator, Bibliography Manager.
|
||||||
|
|
||||||
|
These queries utilize multiple agents and tasks to demonstrate the ReAct paradigm, highlighting the interplay between different agents and the sequential nature of the process.
|
356
samples/State of Polar Bears.txt
Normal file
356
samples/State of Polar Bears.txt
Normal file
|
@ -0,0 +1,356 @@
|
||||||
|
THE STATE OF THE
|
||||||
|
POLAR BEAR REPORT
|
||||||
|
2023
|
||||||
|
The Global Warming Policy Foundation
|
||||||
|
Briefing 67
|
||||||
|
Susan Crockford
|
||||||
|
The State of the Polar Bear Report 2023
|
||||||
|
Susan Crockford
|
||||||
|
Briefing 67, The Global Warming Policy Foundation
|
||||||
|
© Copyright 2024, The Global Warming Policy Foundation
|
||||||
|
iii
|
||||||
|
Contents
|
||||||
|
About the author iii
|
||||||
|
Foreword v
|
||||||
|
Executive summary v
|
||||||
|
1. Introduction 1
|
||||||
|
2. Conservation status 1
|
||||||
|
3. Population trends 5
|
||||||
|
4. Habitat and primary productivity 6
|
||||||
|
5. Human/bear interactions 11
|
||||||
|
6. Discussion 14
|
||||||
|
Bibliography 16
|
||||||
|
Notes 24
|
||||||
|
About the Global Warming Policy Foundation 26
|
||||||
|
About the author
|
||||||
|
Dr Susan Crockford is an evolutionary biologist and has been working for more than 40 years in
|
||||||
|
archaeozoology, paleozoology and forensic zoology.1
|
||||||
|
She is a former adjunct professor at the
|
||||||
|
University of Victoria, British Columbia and works full time for a private consulting company she
|
||||||
|
co-owns (Pacific Identifications Inc). She is the author of Eaten: A Novel (a science-based polar bear
|
||||||
|
attack thriller), Polar Bear Facts and Myths (for ages seven and up, also available in French, German,
|
||||||
|
Dutch, Portuguese, and Norwegian), Polar Bears Have Big Feet (for preschoolers), and several
|
||||||
|
fully referenced books including, Polar Bear Evolution: A Model for the Origin of Species, Sir David
|
||||||
|
Attenborough and the Walrus Deception, The Polar Bear Catastrophe That Never Happened, and Polar
|
||||||
|
Bears: Outstanding Survivors of Climate Change,2 as well as a scientific paper on polar bear conservation status and a peer-reviewed paper on the distribution of ancient polar bear remains.3
|
||||||
|
She has
|
||||||
|
authored several earlier briefing papers, reports, and videos for GWPF, as well as opinion pieces for
|
||||||
|
major news outlets, on polar bear and walrus ecology and conservation.4
|
||||||
|
Susan Crockford blogs
|
||||||
|
at www.polarbearscience.com.
|
||||||
|
|
||||||
|
v
|
||||||
|
Foreword
|
||||||
|
This report is intended to provide
|
||||||
|
a brief update on the habitat and
|
||||||
|
conservation status of polar bears,
|
||||||
|
with commentary regarding inconsistencies and sources of bias found
|
||||||
|
in recent literature that won’t be
|
||||||
|
found elsewhere. It is a summary
|
||||||
|
of the most recent information
|
||||||
|
on polar bears, relative to historical records, based on a review of
|
||||||
|
2023 scientific literature and media
|
||||||
|
reports, and, in places, reiterates or
|
||||||
|
updates information provided in
|
||||||
|
previous papers. This publication
|
||||||
|
is intended for a wide audience,
|
||||||
|
including scientists, teachers,
|
||||||
|
students, decision-makers, and
|
||||||
|
members of the general public
|
||||||
|
interested in polar bears and the
|
||||||
|
Arctic sea ice environment.
|
||||||
|
Executive summary
|
||||||
|
2023 marked 50 years of international cooperation to protect
|
||||||
|
polar bears across the Arctic.
|
||||||
|
Those efforts should be hailed as
|
||||||
|
a conservation success story: from
|
||||||
|
late-1960s population estimate by
|
||||||
|
the US Fish and Wildlife Service of
|
||||||
|
about 12,000 individuals, numbers
|
||||||
|
have almost tripled, to just over
|
||||||
|
32,000 in 2023 (with a wide range
|
||||||
|
of potential error for both estimates).
|
||||||
|
• There were no reports from
|
||||||
|
the Arctic in 2023 indicating polar
|
||||||
|
bears were being harmed due to
|
||||||
|
lack of suitable habitat, in part
|
||||||
|
because Arctic sea ice in summer
|
||||||
|
has not declined since 2007.
|
||||||
|
• Contrary to expectations, a
|
||||||
|
study in Svalbard found a decrease
|
||||||
|
in polar bears killed in defense of
|
||||||
|
life or property over the last 40
|
||||||
|
years, despite profound declines in
|
||||||
|
sea ice over the last two decades.
|
||||||
|
• A survey of Southern Hudson
|
||||||
|
Bay polar bears in 2021 showed
|
||||||
|
an astonishing 30% increase over
|
||||||
|
five years, which adds another 223
|
||||||
|
bears to the global total.
|
||||||
|
• A concurrent survey of Western
|
||||||
|
Hudson Bay polar bears in 2021
|
||||||
|
showed that numbers had not
|
||||||
|
declined since 2011, which also
|
||||||
|
means they have not declined
|
||||||
|
since 2004. Movement of polar
|
||||||
|
bears across the boundaries with
|
||||||
|
neighbouring subpopulations
|
||||||
|
may account for the appearance
|
||||||
|
of a decline, when none actually
|
||||||
|
occurred.
|
||||||
|
• The IUCN Polar Bear Specialist
|
||||||
|
Group has ignored a 2016 recommendation that the boundaries
|
||||||
|
of three Hudson Bay subpopulations (Western HB, Southern HB,
|
||||||
|
and Foxe Basin) be adjusted to
|
||||||
|
account for genetic distinctiveness
|
||||||
|
of bears inhabiting the Hudson Bay
|
||||||
|
region. A similar boundary issue
|
||||||
|
in the western Arctic between the
|
||||||
|
Chukchi Sea, and the Southern and
|
||||||
|
Northern Beaufort subpopulations,
|
||||||
|
based on known movements of
|
||||||
|
bears between regions, has been
|
||||||
|
acknowledged since 2014 but has
|
||||||
|
not yet been resolved.
|
||||||
|
• The US Fish and Wildlife Service
|
||||||
|
and the IUCN Polar Bear Specialist
|
||||||
|
Group, in their 2023 reports, failed
|
||||||
|
to officially acknowledge the newfound South-East Greenland bears
|
||||||
|
as the 20th subpopulation, despite
|
||||||
|
undisputed evidence that this is a
|
||||||
|
genetically distinct and geographically isolated group. Numbers are
|
||||||
|
estimated at 234 individuals.
|
||||||
|
|
||||||
|
1
|
||||||
|
1. Introduction
|
||||||
|
Fifty years ago, on 15 November 1973, the five Arctic nations of
|
||||||
|
Canada, Russia, the USA, Norway and Greenland signed an international treaty to protect polar bears against the rampant overhunting
|
||||||
|
that had taken place in the first half of the 20th century, and which
|
||||||
|
had decimated many subpopulations. The treaty was effective, and
|
||||||
|
by the late 1990s, polar bear populations that could be studied had at
|
||||||
|
least doubled, making it a huge conservation success story. However,
|
||||||
|
in 2009, the wording of the treaty was amended to protect the bears
|
||||||
|
against on-going and future loss of sea ice habitat , which was
|
||||||
|
assumed to be caused by human-generated greenhouse gas emissions. This was in line with similar declarations by the International
|
||||||
|
Union for the Conservation of Nature (IUCN) and the US Endangered
|
||||||
|
Species Act (USESA). These pessimistic conservation assessments,
|
||||||
|
based on computer-modelled future declines rather than observed
|
||||||
|
conditions, have been upheld ever since, even as the predicted relationship between polar bear survival and sea-ice loss has failed to
|
||||||
|
emerge in the observational data.5
|
||||||
|
The current population of polar bears is large, and their historical range has not diminished due to habitat loss since 1979. Indeed,
|
||||||
|
previously inhabited areas have been recolonised as numbers have
|
||||||
|
recovered: recent data suggest that territory in Davis Strait used before
|
||||||
|
1970 during the summer ice-free period – by all ages and by pregnant
|
||||||
|
females for maternity denning – is now being used once again.6.
|
||||||
|
2. Conservation status
|
||||||
|
The IUCN, in their 2015 Red List assessment, provided by the Polar
|
||||||
|
Bear Specialist Group (PBSG), again listed polar bears as ‘vulnerable’
|
||||||
|
to extinction, just as it did in 2006. Similarly, in 2023 the US Fish and
|
||||||
|
Wildlife Service (USFWS) upheld its 2008 conclusion that polar bears
|
||||||
|
were ‘threatened’ with extinction. In both instances, conservation
|
||||||
|
status assessments have been based on computer-modelled predictions of future sea-ice conditions and assumed resultant population
|
||||||
|
declines rather than current conditions.7
|
||||||
|
In Canada, the 2018 COSEWIC report assigned a status of ‘special
|
||||||
|
concern’ to the species. This assessment had not changed by 2023.8
|
||||||
|
3. Population size at 2023
|
||||||
|
Global
|
||||||
|
The latest official estimate for the global population, from 17 October
|
||||||
|
2023, is the PBSG estimate of 26,000 (range 22,000–31,000), arrived
|
||||||
|
at in 2015 and unadjusted since then. In their 2023 assessment, the
|
||||||
|
PBSG has failed to increase this estimate, even to account for undisputed, statistically-significant increases in two subpopulations and
|
||||||
|
the addition of a newly-discovered subpopulation, which should reasonably boost their very conservative mid-point estimate to about
|
||||||
|
26,600: Kane Basin is up by 133, Southern Hudson Bay is up by 223,
|
||||||
|
and newly-discovered SE Greenland adds another 234.9
|
||||||
|
2
|
||||||
|
However, survey results postdating preparation of the 2015
|
||||||
|
assessment, including those made public after July 2021 (for
|
||||||
|
Davis Strait, Chukchi Sea, SE Greenland, Western Hudson Bay, and
|
||||||
|
Southern Hudson Bay), plausibly brought the mid-point total to
|
||||||
|
just over 32,000 (Figure 1).10
|
||||||
|
A plan to survey all Russian polar bear subpopulations between
|
||||||
|
2021 and 2023 seems to have been put on hold. In addition, an
|
||||||
|
abundance estimate for the Viscount Melville subpopulation in the
|
||||||
|
western Canadian Arctic has still not been released, even though
|
||||||
|
a three-year survey completed in 2014 has produced other published data.11 Surveys of Lancaster Sound and East Greenland
|
||||||
|
were completed in spring 2023, and these results, when released,
|
||||||
|
may put the global population mid-point estimate well above
|
||||||
|
32,000.12 While there is a wide margin of potential error attached
|
||||||
|
to this number, it is nowhere near the figure of 7,493 (6,660–8,325),
|
||||||
|
implicit in the prediction that two thirds of the global population
|
||||||
|
of polar bears would be gone by now, given the sea ice conditions
|
||||||
|
prevailing since 2007.13
|
||||||
|
Note that the 2023 USFWS Status Report did not include the
|
||||||
|
Kara Sea estimate of 3,200 bears, the Laptev Sea estimate of 1,000
|
||||||
|
bears, or the East Greenland estimate of 2,000 bears, figures that
|
||||||
|
were used for the 2015 IUCN assessment. It also used the lowest
|
||||||
|
of three available 2016 estimates for the Chukchi Sea, as discussed
|
||||||
|
in the State of the Polar Bear Report 2021.
|
||||||
|
14 Although the USFWS
|
||||||
|
report was published in August 2023, it also did not include results
|
||||||
|
of the 2021 assessments of the Western and Southern Hudson
|
||||||
|
Bay subpopulations that were published in November 2022, or
|
||||||
|
the newly-discovered subpopulation of South East Greenland
|
||||||
|
reported in June 2022.15.
|
||||||
|
Figure1: Estimates of the
|
||||||
|
global polar bear population, 1960 to date.
|
||||||
|
60
|
||||||
|
40
|
||||||
|
20
|
||||||
|
0
|
||||||
|
1960
|
||||||
|
000s
|
||||||
|
1980 2000 2020
|
||||||
|
3
|
||||||
|
Subpopulation survey results published in 2022
|
||||||
|
For detailed discussions of the changing status and abundance
|
||||||
|
issues over time for all 19 subpopulations, see the State of the Polar
|
||||||
|
Bear Report 2018.
|
||||||
|
16
|
||||||
|
Western Hudson Bay (WH)
|
||||||
|
An aerial survey conducted in September 2021 generated a new
|
||||||
|
subpopulation estimate of 618 (range 385–852), an apparent
|
||||||
|
decline of 27% since the 2016 estimate of 842 (range 562–1121)
|
||||||
|
and about a 40% decline from a 2011 estimate of 949 (range 618–
|
||||||
|
1280) that used similar survey methods. However, the WH 2021
|
||||||
|
report authors stated categorically that this apparent decline since
|
||||||
|
2011 was not statistically significant, in part due to evidence that
|
||||||
|
some bears moved into neighbouring subpopulations combined
|
||||||
|
with the large margins of error. While it seems inconceivable that
|
||||||
|
a decline of 40% over 10 years could be statistically insignificant,
|
||||||
|
recall that a similar conclusion was reached in 2015 regarding the
|
||||||
|
42% increase in abundance of Svalbard bears. Since the estimate
|
||||||
|
calculated in 2004 was 935 (range 794–1076), it seems the abundance of WH polar bears has not changed since 2004.17 Note that
|
||||||
|
a more comprehensive survey was conducted in 2011, generating
|
||||||
|
an estimate of 1030 (range 754–1406). This became the official WH
|
||||||
|
estimate used by the PBSG.18
|
||||||
|
Given the conclusions of the 2021 survey that the 2016 and 2021
|
||||||
|
estimates were not statistically different from the 2011 estimate, it
|
||||||
|
appears that the 2021 comprehensive estimate of 1030 may still
|
||||||
|
be the most valid figure for WH.
|
||||||
|
The 2021 WH survey authors also made it clear that the most
|
||||||
|
recent population estimate was not associated with poor ice conditions, since sea-ice levels had been as good as the 1980s in four
|
||||||
|
out of the five years between 2017 and 2021. Confoundingly, they
|
||||||
|
could not explain why adult females and subadults were underrepresented in the population.
|
||||||
|
No evidence was provided for lack of prey, and although
|
||||||
|
emigration to neighbouring Southern Hudson Bay was largely
|
||||||
|
dismissed as an explanation, the possibility of a movement north
|
||||||
|
into Foxe Basin was not explored.
|
||||||
|
This is odd, since a 2016 genetic study suggested that the
|
||||||
|
northern boundary for WH polar bears should be moved to the
|
||||||
|
north of Southampton Island (a major denning area currently
|
||||||
|
included in FB) and the SH boundary to the north of Akimiski
|
||||||
|
Island in James Bay, adding the entire southern Hudson Bay coast
|
||||||
|
in Ontario, as well as the Belcher Islands, to WH (currently included
|
||||||
|
in SH), leaving only James Bay to represent SH.19
|
||||||
|
In 2023, the PBSG indicated the WH subpopulation was ‘likely
|
||||||
|
decreasing’, based on the 2021 estimate of 618 bears. However,
|
||||||
|
they did not include the caveat from the survey report that this
|
||||||
|
apparent decline was not statistically significant, and also did not
|
||||||
|
incorporate the conclusion of the 2022 Canadian Polar Bear Technical
|
||||||
|
Committee (PBTC) that indigenous knowledge (IK) assessed this
|
||||||
|
subpopulation as ‘increased’. Similarly, the 2023 assessment by the
|
||||||
|
4
|
||||||
|
USFWS listed WH as ‘likely decreased’, based on the 2016 survey
|
||||||
|
only (2021 survey results were not included). It acknowledged
|
||||||
|
that in 2022 IK considered this subpopulation to have ‘increased’.20
|
||||||
|
Southern Hudson Bay (SH)
|
||||||
|
An aerial survey conducted in September 2021 generated a new
|
||||||
|
subpopulation estimate of 1119 (range 860–1454), which represented a 30% increase over five years. The result was considered
|
||||||
|
robust, and reflective of the true size of the population. However,
|
||||||
|
another estimate, of 1003 (range 773–1302), was generated based
|
||||||
|
on the same data. This was considered more comparable to the
|
||||||
|
2016 estimate of 780 (range 590–1029). While the authors did not
|
||||||
|
explicitly address the issue of statistical significance, they concluded
|
||||||
|
that a natural increase in numbers, via increased cub production
|
||||||
|
and survival, must have taken place in conjunction with good sea
|
||||||
|
ice conditions from 2017 to 2020, perhaps in addition to immigration from another unidentified subpopulation.21.
|
||||||
|
In their 2023 assessment, the IUCN PBSG discussed the apparent
|
||||||
|
increased abundance of SH bears, but did not unequivocally state
|
||||||
|
that the subpopulation had increased, instead only implying that
|
||||||
|
an increase may have been possible (‘years of relatively good ice
|
||||||
|
conditions, combined with comparatively reduced harvest from
|
||||||
|
2016–2021 may have buffered the population against further
|
||||||
|
decline or allowed for recovery’). They also did not include the 2022
|
||||||
|
assessment by the PBTC that IK considered the SH subpopulation
|
||||||
|
was ‘stable/likely increased’ (i.e. stable in the James Bay portion,
|
||||||
|
likely increased in southeastern Hudson Bay).22.
|
||||||
|
The 2023 assessment by the USFWS listed SH as ‘likely decreased’,
|
||||||
|
based only on 2016 survey results (2021 survey results were not
|
||||||
|
included), but did acknowledge that in 2022, IK considered this
|
||||||
|
subpopulation to be ‘stable/likely increased.’23.
|
||||||
|
Southeast Greenland (SG)
|
||||||
|
As part of a multiyear project on the status of SG polar bears that
|
||||||
|
began in 2011, surveys were conducted during mid-March and
|
||||||
|
mid-April of 2015–2017 for bears that lived below 64°N latitude.
|
||||||
|
The results were compared with data from bears living in EG further
|
||||||
|
north, which had been collected up to 2021. This southern region
|
||||||
|
of Greenland had not previously been surveyed, or even visited by
|
||||||
|
polar bear scientists, and there are no permanent human inhabitants. Few Inuit hunters even venture into the region.24
|
||||||
|
Based on capture-recapture data, a population estimate of
|
||||||
|
234 (range 111–462) was generated for SG. Weight (indicating
|
||||||
|
body condition or fatness) of almost two dozen females captured
|
||||||
|
in SG averaged 186 kg, which was similar to females in Svalbard
|
||||||
|
in the western Barents Sea (185 kg) in the 1990–2000 period and
|
||||||
|
in EG in recent years (186 kg).
|
||||||
|
Most surprisingly, there was strong evidence that these SG
|
||||||
|
polar bears are the most genetically distinct subpopulation in
|
||||||
|
the Arctic, indicating a lack of interbreeding with bears in EG for
|
||||||
|
at least 200 years.25.
|
||||||
|
5
|
||||||
|
Much emphasis was given by study authors Kirstin Laidre
|
||||||
|
and colleagues to their interpretation that bears in these SG fjords
|
||||||
|
frequently used glacier ice to hunt seals during the summer; in
|
||||||
|
other locations bears only do so occasionally. Seals feed in such
|
||||||
|
‘glacier-front’ habitats in summer because primary productivity
|
||||||
|
is high: melting glaciers in the fjords attract fish because their
|
||||||
|
food – marine plankton – is plentiful. However, the only evidence
|
||||||
|
provided of seal-hunting behaviour by polar bears in summer in
|
||||||
|
SG is one photo, taken by an unidentified photographer, of a bear
|
||||||
|
on glacier ice beside a seal kill taken in September 2016. As noted
|
||||||
|
above, the SG polar bear surveys were conducted in March and
|
||||||
|
April and therefore, frequent summer hunting of seals could not
|
||||||
|
have been observed by the authors, but was simply assumed to
|
||||||
|
have happened.
|
||||||
|
In addition, although the authors imply that glacier-front habitat
|
||||||
|
is rare, it is in fact rather common across the Arctic and widely
|
||||||
|
used by polar bears year-round because the sea ice covering such
|
||||||
|
fjords in late winter and spring (including those in SG) are used by
|
||||||
|
ringed seals as a birthing platform. Moreover, newborn seals are
|
||||||
|
the preferred prey of polar bears, making up roughly two thirds of
|
||||||
|
their diet. Fjords with glacier ice are present all along both coasts
|
||||||
|
of Greenland, in Svalbard, Novaya Zemlya and Franz Josef Land in
|
||||||
|
Russia, and in Baffin and Ellesmere Islands in the Canadian Arctic.26
|
||||||
|
The authors concluded their report with a recommendation
|
||||||
|
that SG be officially recognized by the IUCN PBSG as a polar bear
|
||||||
|
subpopulation distinct from EG for management and conservation
|
||||||
|
purposes. However, despite the fact that Dr Laidre is currently the
|
||||||
|
co-chair of the PBSG, and that in March 2023 the government of
|
||||||
|
Greenland declared SG a protected ‘new and separate management unit’, the PBSG declined to add it as a distinct subpopulation.
|
||||||
|
The 2023 USFWS assessment cited the 2022 Laidre report and its
|
||||||
|
abundance estimate for SG, but regarding a change in boundaries
|
||||||
|
for EG, it stated only that, ‘ecoregion and subpopulation status will
|
||||||
|
likely be re-evaluated by PBSG in 2023’.27
|
||||||
|
4. Population trends
|
||||||
|
In Canada, where roughly two thirds of the world’s polar bear population live, a 2022 update from the PBTC for the first time included
|
||||||
|
assessments based on Inuit IK for each of the 13 subpopulations for
|
||||||
|
which Canada has sole or joint management responsibility. While
|
||||||
|
the ‘scientific’ assessments for trends in abundance for these subpopulations are simply the widely varying ones provided by the
|
||||||
|
PBSG in 2021, those based on IK were either ‘increased’ or ‘stable’.28
|
||||||
|
Later in 2022, the Government of Canada published updated
|
||||||
|
global polar bear population trend maps based on 2021 PBSG
|
||||||
|
‘scientific’ data: no provision was made for the conflicting information from IK discussed above, calling into question whether IK
|
||||||
|
assessments are actually given any weight in assessing current
|
||||||
|
conditions.29 And while the 2023 USFWS assessment included the
|
||||||
|
2022 Canadian IK trend information in their status table, it gave
|
||||||
|
6
|
||||||
|
priority to 2021 PBSG scientific data.30
|
||||||
|
Figure 2 shows a more realistic representation of current
|
||||||
|
polar bear population trends based on all available information
|
||||||
|
(scientific survey results, IK, and studies on health and survival
|
||||||
|
status published up to 31 December 2023, extrapolated to regions
|
||||||
|
lacking recent survey data). This gives the following subpopulation classifications at 2023, including the new subpopulation of
|
||||||
|
SE Greenland (SG):
|
||||||
|
• seven ‘increasing’ or ‘likely increasing’ [KB, DS, MC, GB, CS, BS, SH].
|
||||||
|
• four ‘stable’ or ‘likely stable’ [BB, SB, WH, SG].
|
||||||
|
• nine ‘presumed stable or increasing’ [EG, LS, LP, KS, VM, NB, GB,
|
||||||
|
FB, NW].
|
Loading…
Reference in New Issue
Block a user