Main working files

This commit is contained in:
Mahesh Kommareddi 2024-07-23 16:58:58 -04:00
parent 96d33ef5ab
commit b7e89a301d
27 changed files with 1459 additions and 0 deletions

35
.gitignore vendored Normal file
View File

@ -0,0 +1,35 @@
# Python
__pycache__/
*.py[cod]
*.so
# Virtual Environment
venv/
env/
.env
# Node
node_modules/
npm-debug.log
yarn-error.log
# Build files
build/
dist/
# IDE specific files
.vscode/
.idea/
# OS generated files
.DS_Store
Thumbs.db
# Temporary files
*.swp
*.swo
*~
# Logs
*.log

4
backend/.env.template Normal file
View File

@ -0,0 +1,4 @@
OPENAI_API_KEY=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
AWS_REGION=us-east-1
# MILVUS_HOST=localhost
# MILVUS_PORT=19530

0
backend/README.md Normal file
View File

22
backend/requirements.txt Normal file
View File

@ -0,0 +1,22 @@
flask
flask-cors
flask_socketio
numpy
tensorflow
pymilvus
openai
boto3
ollama
litellm
python-dotenv
scikit-learn
torch>=1.7.0
torchvision>=0.8.1
opencv-python
Pillow>=7.1.2
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
tqdm>=4.41.0
matplotlib>=3.2.2
seaborn>=0.11.0

317
backend/src/app.py Normal file
View File

@ -0,0 +1,317 @@
import subprocess
from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
from flask_socketio import SocketIO, emit
import cv2
import numpy as np
import tempfile
import os
import json
import threading
import time
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from event_detector import detect_event
from description_generator import generate_temporal_description
from werkzeug.utils import secure_filename
from database import insert_video, update_video_embedding, get_all_videos, get_video_by_id, get_vectorized_videos
from vector_search import cosine_similarity_search
from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
from event_detector import process_video
from io import BytesIO
UPLOAD_FOLDER = 'uploads'
TEMP_FOLDER = 'temp'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(TEMP_FOLDER, exist_ok=True)
# Initialize Flask app
app = Flask(__name__, static_folder='../../frontend/build', static_url_path='')
app.config['UPLOAD_FOLDER'] = 'uploads' # Make sure this folder exists
CORS(app, resources={r"/api/*": {"origins": "*"}})
# Load VGG16 model for video embedding
vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
def generate_video_embedding(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, (224, 224))
frame = img_to_array(frame)
frame = np.expand_dims(frame, axis=0)
frame = preprocess_input(frame)
frames.append(frame)
if len(frames) >= 16: # Process 16 frames at a time
break
cap.release()
if not frames:
return None
# Generate embeddings
embeddings = vgg_model.predict(np.vstack(frames))
# Average the embeddings
avg_embedding = np.mean(embeddings, axis=0)
return avg_embedding
socketio = SocketIO(app, cors_allowed_origins="*")
rtsp_url = None
stop_stream = False
ffmpeg_process = None
# Create a directory to store HLS segments
hls_dir = os.path.join(os.getcwd(), 'hls_temp')
os.makedirs(hls_dir, exist_ok=True)
def convert_rtsp_to_hls(rtsp_url):
global ffmpeg_process
output_path = os.path.join(hls_dir, 'stream.m3u8')
ffmpeg_command = [
'ffmpeg',
'-i', rtsp_url,
'-c:v', 'libx264',
'-c:a', 'aac',
'-f', 'hls',
'-hls_time', '1',
'-hls_list_size', '3',
'-hls_flags', 'delete_segments+append_list+omit_endlist',
'-hls_segment_type', 'mpegts',
'-hls_segment_filename', os.path.join(hls_dir, 'segment%d.ts'),
output_path
]
ffmpeg_process = subprocess.Popen(ffmpeg_command)
def process_rtsp_stream():
global rtsp_url, stop_stream
cap = cv2.VideoCapture(rtsp_url)
start_time = time.time()
frame_count = 0
while not stop_stream:
ret, frame = cap.read()
if not ret:
break
current_time = time.time() - start_time
events = detect_event(frame, current_time)
if int(current_time) % 10 == 0:
description = generate_temporal_description(events, 'bedrock', 'anthropic.claude-3-sonnet-20240229-v1:0')
else:
description = None
socketio.emit('analysis_result', {
'events': events,
'description': description,
'timestamp': current_time
})
frame_count += 1
time.sleep(0.033)
cap.release()
@socketio.on('start_rtsp_stream')
def start_rtsp_stream(data):
global rtsp_url, stop_stream
rtsp_url = data['rtsp_url']
stop_stream = False
threading.Thread(target=process_rtsp_stream).start()
convert_rtsp_to_hls(rtsp_url)
return {'hls_url': '/hls/stream.m3u8'}
@socketio.on('stop_rtsp_stream')
def stop_rtsp_stream():
global stop_stream, ffmpeg_process
stop_stream = True
if ffmpeg_process:
ffmpeg_process.terminate()
ffmpeg_process = None
@app.route('/hls/<path:filename>')
def serve_hls(filename):
return send_from_directory(hls_dir, filename)
@app.route('/api/analyze', methods=['POST'])
def analyze_video():
if 'video' not in request.files:
return jsonify({'error': 'No video file provided'}), 400
video = request.files['video']
model_type = request.form.get('model_type', 'bedrock')
model_name = request.form.get('model_name', 'anthropic.claude-3-sonnet-20240229-v1:0')
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
video.save(temp_file.name)
video_path = temp_file.name
try:
events = process_video(video_path)
# Generate temporal descriptions for every 10 seconds
temporal_descriptions = []
for i in range(0, len(events), 10 * 30): # Assuming 30 fps
segment_events = events[i:i+10*30]
if segment_events:
start_time = segment_events[0]['timestamp']
end_time = segment_events[-1]['timestamp']
description = generate_temporal_description(segment_events, model_type, model_name)
temporal_descriptions.append({
'start_time': start_time,
'end_time': end_time,
'description': description
})
return jsonify({
'events': events,
'temporal_descriptions': temporal_descriptions
})
finally:
os.unlink(video_path)
@app.route('/api/upload', methods=['POST'])
def upload_video():
if 'video' not in request.files:
return jsonify({'error': 'No video file provided'}), 400
video = request.files['video']
if video.filename == '':
return jsonify({'error': 'No selected file'}), 400
if video:
filename = secure_filename(video.filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
# Ensure the upload folder exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
video.save(filepath)
try:
video_id = insert_video(filename, filepath)
return jsonify({'message': 'Video uploaded successfully', 'video_id': video_id}), 200
except Exception as e:
return jsonify({'error': f'An error occurred during processing: {str(e)}'}), 500
return jsonify({'error': 'Invalid file'}), 400
@app.route('/api/similar_videos', methods=['POST'])
def find_similar_videos():
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No selected file'}), 400
if file:
filename = secure_filename(file.filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
try:
# Generate embedding for the uploaded file
query_embedding = generate_video_embedding(filepath)
if query_embedding is None:
return jsonify({'error': 'Failed to generate embedding for the uploaded file'}), 500
# Get all videos from database
all_videos = get_all_videos()
if not all_videos:
return jsonify({'error': 'No videos in database to compare'}), 404
database_vectors = np.array([video['embedding'] for video in all_videos if video['embedding'] is not None])
if database_vectors.size == 0:
return jsonify({'error': 'No embeddings found in the database'}), 404
# Perform similarity search
top_indices, similarities = cosine_similarity_search(query_embedding, database_vectors, top_k=5)
similar_videos = [
{
'id': all_videos[i]['id'],
'filename': all_videos[i]['filename'],
'similarity': float(similarities[j])
}
for j, i in enumerate(top_indices)
]
return jsonify(similar_videos)
except Exception as e:
return jsonify({'error': f'An error occurred during processing: {str(e)}'}), 500
finally:
# Clean up temporary file
os.remove(filepath)
return jsonify({'error': 'Invalid file'}), 400
@app.route('/api/vectorized_videos', methods=['GET'])
def get_vectorized_videos():
videos = get_all_videos() # Implement this function in your database.py
return jsonify([{
'id': video['id'],
'filename': video['filename'],
'vectorized': video['embedding'] is not None
} for video in videos])
@app.route('/api/vectorize/<int:video_id>', methods=['POST'])
def vectorize_video(video_id):
video = get_video_by_id(video_id) # Implement this function in your database.py
if not video:
return jsonify({'error': 'Video not found'}), 404
embedding = generate_video_embedding(video['filepath'])
update_video_embedding(video_id, embedding) # Implement this function in your database.py
return jsonify({'message': 'Video vectorized successfully'})
@app.route('/api/thumbnail/<int:video_id>')
def get_thumbnail(video_id):
video = get_video_by_id(video_id)
if not video:
return jsonify({'error': 'Video not found'}), 404
cap = cv2.VideoCapture(video['filepath'])
ret, frame = cap.read()
cap.release()
if not ret:
return jsonify({'error': 'Failed to generate thumbnail'}), 500
_, buffer = cv2.imencode('.jpg', frame)
io_buf = BytesIO(buffer)
io_buf.seek(0)
return send_file(io_buf, mimetype='image/jpeg')
# Serve React App
@app.route('/')
def serve():
return send_from_directory(app.static_folder, 'index.html')
@app.route('/<path:path>')
def static_proxy(path):
file_name = path.split('/')[-1]
dir_name = os.path.join(app.static_folder, '/'.join(path.split('/')[:-1]))
return send_from_directory(dir_name, file_name)
if __name__ == '__main__':
socketio.run(app, debug=True, port=5333)

19
backend/src/config.py Normal file
View File

@ -0,0 +1,19 @@
import os
from dotenv import dotenv_values
# Get the path to the directory this file is in
BASEDIR = os.path.abspath(os.path.dirname(__file__))
# Connect the path with your '.env' file name
env_path = os.path.join(BASEDIR, '..', '.env')
print(f"Looking for .env file at: {env_path}")
# Load the .env file
config = dotenv_values(env_path)
# Get the environment variables
OPENAI_API_KEY = config.get('OPENAI_API_KEY', 'default_key')
AWS_REGION = config.get('AWS_REGION', 'default_region')
print(f"OPENAI_API_KEY: {OPENAI_API_KEY}")
print(f"AWS_REGION: {AWS_REGION}")

103
backend/src/database.py Normal file
View File

@ -0,0 +1,103 @@
import sqlite3
import json
import numpy as np
import os
DATABASE_PATH = 'videos.db'
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
def get_db_connection():
conn = sqlite3.connect(DATABASE_PATH)
conn.row_factory = dict_factory
return conn
def init_db():
conn = get_db_connection()
c = conn.cursor()
# Check if the videos table exists
c.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='videos'")
table_exists = c.fetchone()
if table_exists:
# If the table exists, check if the filepath column exists
c.execute("PRAGMA table_info(videos)")
columns = [column['name'] for column in c.fetchall()]
if 'filepath' not in columns:
# Add the filepath column if it doesn't exist
c.execute("ALTER TABLE videos ADD COLUMN filepath TEXT")
else:
# If the table doesn't exist, create it with all necessary columns
c.execute('''CREATE TABLE videos
(id INTEGER PRIMARY KEY AUTOINCREMENT,
filename TEXT NOT NULL,
filepath TEXT NOT NULL,
embedding TEXT)''')
conn.commit()
conn.close()
def insert_video(filename, filepath):
conn = get_db_connection()
c = conn.cursor()
c.execute("INSERT INTO videos (filename, filepath) VALUES (?, ?)",
(filename, filepath))
video_id = c.lastrowid
conn.commit()
conn.close()
return video_id
def update_video_embedding(video_id, embedding):
conn = get_db_connection()
c = conn.cursor()
embedding_json = json.dumps(embedding.flatten().tolist())
c.execute("UPDATE videos SET embedding = ? WHERE id = ?",
(embedding_json, video_id))
conn.commit()
conn.close()
def get_all_videos():
conn = get_db_connection()
c = conn.cursor()
c.execute("SELECT * FROM videos")
videos = c.fetchall()
conn.close()
for video in videos:
if video['embedding']:
video['embedding'] = np.array(json.loads(video['embedding'])).reshape(1, -1)
return videos
def get_video_by_id(video_id):
conn = get_db_connection()
c = conn.cursor()
c.execute("SELECT * FROM videos WHERE id = ?", (video_id,))
video = c.fetchone()
conn.close()
if video and video['embedding']:
video['embedding'] = np.array(json.loads(video['embedding']))
return video
def get_vectorized_videos():
conn = get_db_connection()
c = conn.cursor()
c.execute("SELECT * FROM videos WHERE embedding IS NOT NULL")
videos = c.fetchall()
conn.close()
for video in videos:
video['embedding'] = np.array(json.loads(video['embedding']))
return videos
# Initialize the database when this module is imported
init_db()

View File

@ -0,0 +1,142 @@
import openai
import boto3
import ollama
import litellm
import json
import base64
from botocore.exceptions import ClientError
from datetime import datetime
from config import OPENAI_API_KEY, AWS_REGION
openai.api_key = OPENAI_API_KEY
class BedrockLanguageModel:
def __init__(self, model_id, region=AWS_REGION):
self.bedrock_client = boto3.client(
service_name='bedrock-runtime',
region_name=region
)
self.model_id = model_id
def generate(self, prompt, image_path=None):
print(f"DEBUG: Generating response for prompt: {prompt}")
try:
request_body = {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 20000,
"temperature": 0.7,
"top_p": 0.9,
"messages": [
{
"role": "user",
"content": []
}
]
}
if image_path:
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
request_body["messages"][0]["content"].append({
"type": "image",
"image": {
"format": "png",
"source": {
"bytes": base64_image
}
}
})
request_body["messages"][0]["content"].append({
"type": "text",
"text": prompt
})
json_payload = json.dumps(request_body)
response = self.bedrock_client.invoke_model(
modelId=self.model_id,
contentType="application/json",
accept="application/json",
body=json_payload
)
response_body = json.loads(response['body'].read())
print(f"DEBUG: Raw response from Bedrock: {response_body}")
content_array = response_body.get('content', [])
if content_array and isinstance(content_array, list):
generated_text = content_array[0].get('text', '')
if not generated_text:
print(f"WARNING: Generated text is empty. Full response: {response_body}")
return "No response generated"
print(f"DEBUG: Generated text: {generated_text}")
return generated_text
else:
print(f"WARNING: Unexpected response format. Full response: {response_body}")
return "Unexpected response format"
except ClientError as e:
print(f"ERROR: Failed to generate text with Bedrock: {str(e)}")
return f"Error: {str(e)}"
except Exception as e:
print(f"ERROR: An unexpected error occurred: {str(e)}")
return f"Error: {str(e)}"
# Initialize the Bedrock model
bedrock_model = BedrockLanguageModel("anthropic.claude-3-sonnet-20240229-v1:0")
def generate_temporal_description(events, model_type='gpt', model_name='gpt-3.5-turbo'):
if not events:
return None
prompt = "Analyze the following sequence of events in a video segment, focusing on object detection, motion, and positioning. Pay special attention to any suspicious activities that might indicate theft:\n\n"
for event in events:
timestamp = f"{event['timestamp']:.2f}"
if event['type'] == 'object_detected':
prompt += f"- At {timestamp}s: {event['description']}\n"
elif event['type'] == 'object_motion':
prompt += f"- At {timestamp}s: {event['description']}\n"
elif event['type'] in ['motion_detected', 'bright_scene', 'dark_scene', 'color_dominance']:
prompt += f"- At {timestamp}s: {event['type']} - {event['description']}\n"
prompt += "\nProvide a concise description of what's happening in this video segment, interpreting the events as if they might be showing security camera activity. Consider the following points:\n"
prompt += "1. The number and types of objects (especially people) detected\n"
prompt += "2. The movement and positioning of these objects over time\n"
prompt += "3. Any suspicious patterns of movement or behavior\n"
prompt += "4. Changes in lighting or scene composition that might be relevant\n"
prompt += "Description:"
try:
if model_type == 'gpt':
response = openai.ChatCompletion.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
max_tokens=150,
n=1,
stop=None,
temperature=0.7,
)
return response.choices[0].message.content.strip()
elif model_type == 'bedrock':
return bedrock_model.generate(prompt)
elif model_type == 'ollama':
response = ollama.generate(model=model_name, prompt=prompt)
return response['response'].strip()
elif model_type == 'litellm':
response = litellm.completion(
model=model_name,
messages=[{"role": "user", "content": prompt}],
max_tokens=150,
temperature=0.7
)
return response.choices[0].message.content.strip()
else:
raise ValueError("Unsupported model type")
except Exception as e:
print(f"Error generating description: {str(e)}")
return None

View File

@ -0,0 +1,117 @@
import cv2
import torch
import numpy as np
from PIL import Image
# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
def detect_event(frame, timestamp, prev_frame=None, prev_objects=None):
events = []
objects = []
# Convert frame to RGB (YOLOv5 expects RGB images)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Perform object detection
results = model(rgb_frame)
# Process results
for *box, conf, cls in results.xyxy[0]: # xyxy, confidence, class
class_name = model.names[int(cls)]
if conf > 0.5: # Confidence threshold
x1, y1, x2, y2 = map(int, box)
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
obj = {
'type': class_name,
'confidence': conf.item(),
'box': (x1, y1, x2, y2),
'center': (center_x, center_y)
}
objects.append(obj)
events.append({
'type': 'object_detected',
'description': f'{class_name} detected with confidence {conf:.2f} at position ({center_x}, {center_y})',
'timestamp': timestamp,
'object': obj
})
# Motion detection for objects
if prev_objects is not None:
for curr_obj in objects:
for prev_obj in prev_objects:
if curr_obj['type'] == prev_obj['type']:
dx = curr_obj['center'][0] - prev_obj['center'][0]
dy = curr_obj['center'][1] - prev_obj['center'][1]
distance = np.sqrt(dx**2 + dy**2)
if distance > 10: # Threshold for significant motion
events.append({
'type': 'object_motion',
'description': f'{curr_obj["type"]} moved {distance:.2f} pixels',
'timestamp': timestamp,
'object': curr_obj,
'motion': (dx, dy)
})
# Color dominance
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
dominant_color = np.argmax(np.mean(frame, axis=(0,1)))
color_names = ['blue', 'green', 'red']
events.append({
'type': 'color_dominance',
'description': f'Dominant color is {color_names[dominant_color]}',
'timestamp': timestamp
})
# Brightness detection
brightness = np.mean(gray)
if brightness > 200:
events.append({
'type': 'bright_scene',
'description': 'The scene is very bright',
'timestamp': timestamp
})
elif brightness < 50:
events.append({
'type': 'dark_scene',
'description': 'The scene is very dark',
'timestamp': timestamp
})
# Overall motion detection
if prev_frame is not None:
frame_diff = cv2.absdiff(prev_frame, gray)
if np.mean(frame_diff) > 30:
events.append({
'type': 'motion_detected',
'description': 'Significant overall motion detected',
'timestamp': timestamp
})
return events, objects
def process_video(video_path):
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = 0
events = []
prev_frame = None
prev_objects = None
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_count += 1
current_time = frame_count / fps
frame_events, objects = detect_event(frame, current_time, prev_frame, prev_objects)
events.extend(frame_events)
prev_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
prev_objects = objects
cap.release()
return events

8
backend/src/models.py Normal file
View File

@ -0,0 +1,8 @@
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
def load_vgg_model():
return VGG16(weights='imagenet', include_top=False, pooling='avg')
def load_mobilenet_model():
return MobileNetV2(weights='imagenet', include_top=True)

34
backend/src/utils.py Normal file
View File

@ -0,0 +1,34 @@
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
def preprocess_frame(frame, target_size=(224, 224)):
frame = cv2.resize(frame, target_size)
frame = img_to_array(frame)
frame = np.expand_dims(frame, axis=0)
frame = preprocess_input(frame)
return frame
def generate_video_embedding(video_path, model):
cap = cv2.VideoCapture(video_path)
frames = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frames.append(preprocess_frame(frame))
if len(frames) >= 16: # Process 16 frames at a time
break
cap.release()
if not frames:
return None
# Generate embeddings
embeddings = model.predict(np.vstack(frames))
# Average the embeddings
avg_embedding = np.mean(embeddings, axis=0)
return avg_embedding

View File

@ -0,0 +1,17 @@
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def cosine_similarity_search(query_vector, database_vectors, top_k=5):
# Ensure query_vector is 2D
if query_vector.ndim == 1:
query_vector = query_vector.reshape(1, -1)
# Ensure database_vectors is 2D
if database_vectors.ndim == 1:
database_vectors = database_vectors.reshape(1, -1)
elif database_vectors.ndim > 2:
database_vectors = database_vectors.reshape(database_vectors.shape[0], -1)
similarities = cosine_similarity(query_vector, database_vectors)
top_indices = np.argsort(similarities[0])[-top_k:][::-1]
return top_indices, similarities[0][top_indices]

View File

@ -0,0 +1,32 @@
import cv2
import numpy as np
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
# Load VGG16 model
vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
def generate_video_embedding(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
while len(frames) < 16: # Process up to 16 frames
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, (224, 224))
frame = img_to_array(frame)
frame = np.expand_dims(frame, axis=0)
frame = preprocess_input(frame)
frames.append(frame)
cap.release()
if not frames:
return None
# Generate embeddings
embeddings = vgg_model.predict(np.vstack(frames))
# Average the embeddings
avg_embedding = np.mean(embeddings, axis=0)
return avg_embedding

View File

@ -0,0 +1,38 @@
import cv2
import time
from event_detector import detect_event
from description_generator import generate_description
from datetime import datetime
def process_video_realtime(video_path, model_type='gpt', model_name='gpt-4o'):
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = 0
start_time = time.time()
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_count += 1
current_time = start_time + (frame_count / fps)
# Detect events
events = detect_event(frame, current_time)
# Generate description every 5 seconds
if frame_count % int(fps * 5) == 0:
description = generate_description(events, model_type, model_name)
if description:
print(f"At {datetime.fromtimestamp(current_time).strftime('%H:%M:%S')}:")
print(description)
print()
# Real-time display (optional)
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

0
frontend/README.md Normal file
View File

42
frontend/package.json Normal file
View File

@ -0,0 +1,42 @@
{
"name": "chrowatch",
"version": "0.1.0",
"private": true,
"dependencies": {
"@testing-library/jest-dom": "^5.16.5",
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0",
"axios": "^0.27.2",
"hls.js": "^1.5.13",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-player": "^2.10.1",
"react-scripts": "5.0.1",
"socket.io-client": "^4.5.1",
"web-vitals": "^2.1.4"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test",
"eject": "react-scripts eject"
},
"eslintConfig": {
"extends": [
"react-app",
"react-app/jest"
]
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
}
}

View File

@ -0,0 +1,45 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta
name="description"
content="Chrolens - AI-powered video analysis and temporal description generation"
/>
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
<!--
manifest.json provides metadata used when your web app is installed on a
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
-->
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
<!--
Notice the use of %PUBLIC_URL% in the tags above.
It will be replaced with the URL of the `public` folder during the build.
Only files inside the `public` folder can be referenced from the HTML.
-->
<title>Chrolens - Video Analysis Tool</title>
<style>
body {
margin: 0;
padding: 0;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
</style>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
<!--
This div with id="root" is where your React app will be rendered.
You can add webfonts, meta tags, or analytics to this file.
The build step will place the bundled scripts into the <body> tag.
-->
</body>
</html>

52
frontend/src/App.js Normal file
View File

@ -0,0 +1,52 @@
// src/App.js
import React, { useState } from 'react';
import VideoUpload from './components/VideoUpload';
import VideoPlayer from './components/VideoPlayer';
import EventList from './components/EventList';
import RTSPVideoAnalysis from './components/RTSPVideoAnalysis';
import SimilarVideoSearch from './components/SimilarVideoSearch';
import VectorizedVideos from './components/VectorizedVideos';
function App() {
const [videoFile, setVideoFile] = useState(null);
const [events, setEvents] = useState([]);
const [temporalDescriptions, setTemporalDescriptions] = useState([]);
const [showRTSPAnalysis, setShowRTSPAnalysis] = useState(false);
const [uploadedVideos, setUploadedVideos] = useState([]);
const handleUploadSuccess = (data) => {
setUploadedVideos(prevVideos => [...prevVideos, data.message]);
// You might want to refresh the list of vectorized videos here
};
const handleAnalysisComplete = (data) => {
setVideoFile(data.video_url); // Assuming the backend returns a URL to the processed video
setEvents(data.events);
setTemporalDescriptions(data.temporal_descriptions);
};
return (
<div className="App">
<h1>Video Analysis</h1>
<button onClick={() => setShowRTSPAnalysis(!showRTSPAnalysis)}>
{showRTSPAnalysis ? 'Show Video Upload' : 'Show RTSP Analysis'}
</button>
{showRTSPAnalysis ? (
<RTSPVideoAnalysis />
) : (
<>
<VideoUpload
onUploadSuccess={handleUploadSuccess}
onAnalysisComplete={handleAnalysisComplete}
/>
{videoFile && <VideoPlayer videoFile={videoFile} />}
<EventList events={events} temporalDescriptions={temporalDescriptions} />
<SimilarVideoSearch />
<VectorizedVideos videos={uploadedVideos} />
</>
)}
</div>
);
}
export default App;

View File

@ -0,0 +1,26 @@
import React from 'react';
function EventList({ events, temporalDescriptions }) {
return (
<div>
<h2>Events</h2>
<ul>
{events && events.map((event, index) => (
<li key={index}>
{event.timestamp.toFixed(2)}s: {event.type} - {event.description}
</li>
))}
</ul>
<h2>Temporal Descriptions</h2>
<ul>
{temporalDescriptions && temporalDescriptions.map((desc, index) => (
<li key={index}>
{desc.start_time.toFixed(2)}s - {desc.end_time.toFixed(2)}s: {desc.description}
</li>
))}
</ul>
</div>
);
}
export default EventList;

View File

@ -0,0 +1,108 @@
import React, { useEffect, useState, useRef } from 'react';
import Hls from 'hls.js';
import io from 'socket.io-client';
const RTSPVideoAnalysis = () => {
const [rtspUrl, setRtspUrl] = useState('');
const [isStreaming, setIsStreaming] = useState(false);
const [events, setEvents] = useState([]);
const [description, setDescription] = useState('');
const [socket, setSocket] = useState(null);
const videoRef = useRef(null);
const hlsRef = useRef(null);
useEffect(() => {
const newSocket = io('http://localhost:5333');
setSocket(newSocket);
newSocket.on('analysis_result', (data) => {
setEvents(data.events);
if (data.description) {
setDescription(data.description);
}
});
return () => newSocket.close();
}, []);
const handleStartStream = () => {
if (socket && rtspUrl) {
socket.emit('start_rtsp_stream', { rtsp_url: rtspUrl }, (response) => {
if (response && response.hls_url) {
if (Hls.isSupported()) {
const hls = new Hls({
manifestLoadingTimeOut: 5000,
manifestLoadingMaxRetry: Infinity,
manifestLoadingRetryDelay: 500,
levelLoadingTimeOut: 5000,
levelLoadingMaxRetry: Infinity,
levelLoadingRetryDelay: 500
});
hlsRef.current = hls;
hls.loadSource(`http://localhost:5333${response.hls_url}`);
hls.attachMedia(videoRef.current);
hls.on(Hls.Events.MANIFEST_PARSED, () => {
videoRef.current.play().catch(e => console.error("Error attempting to play:", e));
});
} else if (videoRef.current.canPlayType('application/vnd.apple.mpegurl')) {
videoRef.current.src = `http://localhost:5333${response.hls_url}`;
videoRef.current.play().catch(e => console.error("Error attempting to play:", e));
}
setIsStreaming(true);
}
});
}
};
const handleStopStream = () => {
if (socket) {
socket.emit('stop_rtsp_stream');
setIsStreaming(false);
if (hlsRef.current) {
hlsRef.current.destroy();
}
if (videoRef.current) {
videoRef.current.src = '';
}
}
};
return (
<div>
<h2>RTSP Video Analysis</h2>
<input
type="text"
value={rtspUrl}
onChange={(e) => setRtspUrl(e.target.value)}
placeholder="Enter RTSP URL"
/>
<button onClick={handleStartStream} disabled={isStreaming}>
Start Stream
</button>
<button onClick={handleStopStream} disabled={!isStreaming}>
Stop Stream
</button>
<video
ref={videoRef}
controls
width="640"
height="360"
style={{ display: isStreaming ? 'block' : 'none' }}
/>
<div>
<h3>Detected Events:</h3>
<ul>
{events.map((event, index) => (
<li key={index}>{event.type}: {event.description}</li>
))}
</ul>
</div>
<div>
<h3>Latest Description:</h3>
<p>{description}</p>
</div>
</div>
);
};
export default RTSPVideoAnalysis;

View File

@ -0,0 +1,54 @@
import React, { useState } from 'react';
import axios from 'axios';
const SimilarVideoSearch = () => {
const [file, setFile] = useState(null);
const [similarVideos, setSimilarVideos] = useState([]);
const handleFileChange = (e) => {
setFile(e.target.files[0]);
};
const handleSubmit = async (e) => {
e.preventDefault();
if (!file) return;
const formData = new FormData();
formData.append('file', file);
try {
const response = await axios.post('http://localhost:5333/api/similar_videos', formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
});
setSimilarVideos(response.data);
} catch (error) {
console.error('Error finding similar videos:', error);
}
};
return (
<div>
<h2>Find Similar Videos</h2>
<form onSubmit={handleSubmit}>
<input type="file" onChange={handleFileChange} accept="image/*,video/*" />
<button type="submit">Find Similar Videos</button>
</form>
{similarVideos.length > 0 && (
<div>
<h3>Similar Videos:</h3>
<ul>
{similarVideos.map((video) => (
<li key={video.id}>
{video.filename} (Similarity: {video.similarity.toFixed(2)})
</li>
))}
</ul>
</div>
)}
</div>
);
};
export default SimilarVideoSearch;

View File

@ -0,0 +1,20 @@
import React from 'react';
const UploadedVideosList = ({ videos }) => {
return (
<div>
<h3>Uploaded Videos</h3>
{videos.length === 0 ? (
<p>No videos uploaded yet.</p>
) : (
<ul>
{videos.map((video, index) => (
<li key={index}>{video}</li>
))}
</ul>
)}
</div>
);
};
export default UploadedVideosList;

View File

@ -0,0 +1,21 @@
// src/components/VectorizedVideos.js
import React from 'react';
const VectorizedVideos = ({ videos }) => {
return (
<div>
<h2>Vectorized Videos</h2>
{videos.length === 0 ? (
<p>No videos have been vectorized yet.</p>
) : (
<ul>
{videos.map((video, index) => (
<li key={index}>{video}</li>
))}
</ul>
)}
</div>
);
};
export default VectorizedVideos;

View File

@ -0,0 +1,16 @@
import React from 'react';
function VideoPlayer({ videoFile }) {
const videoUrl = URL.createObjectURL(videoFile);
return (
<div>
<video width="640" height="480" controls>
<source src={videoUrl} type={videoFile.type} />
Your browser does not support the video tag.
</video>
</div>
);
}
export default VideoPlayer;

View File

@ -0,0 +1,76 @@
// src/components/VideoUpload.js
import React, { useState } from 'react';
import axios from 'axios';
const VideoUpload = ({ onUploadSuccess, onAnalysisComplete }) => {
const [file, setFile] = useState(null);
const [uploading, setUploading] = useState(false);
const [error, setError] = useState(null);
const handleFileChange = (e) => {
setFile(e.target.files[0]);
setError(null);
};
const handleUpload = async (action) => {
if (!file) {
setError('Please select a file to upload');
return;
}
setUploading(true);
setError(null);
const formData = new FormData();
formData.append('video', file);
try {
// First, upload the video
const uploadResponse = await axios.post('/api/upload', formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
});
if (action === 'vectorize') {
// If vectorizing, call the vectorize endpoint
await axios.post(`/api/vectorize/${uploadResponse.data.video_id}`);
onUploadSuccess(uploadResponse.data);
} else if (action === 'analyze') {
// If analyzing, call the analyze endpoint
const analyzeResponse = await axios.post('/api/analyze', formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
});
onAnalysisComplete(analyzeResponse.data);
}
setUploading(false);
} catch (error) {
setUploading(false);
setError('Error processing video: ' + (error.response?.data?.error || error.message));
}
};
return (
<div>
<h2>Upload Video</h2>
<input
type="file"
onChange={handleFileChange}
accept="video/*"
disabled={uploading}
/>
<button onClick={() => handleUpload('vectorize')} disabled={uploading || !file}>
{uploading ? 'Processing...' : 'Upload and Vectorize'}
</button>
<button onClick={() => handleUpload('analyze')} disabled={uploading || !file}>
{uploading ? 'Processing...' : 'Upload and Analyze'}
</button>
{error && <p style={{ color: 'red' }}>{error}</p>}
</div>
);
};
export default VideoUpload;

100
frontend/src/index.css Normal file
View File

@ -0,0 +1,100 @@
/* Reset default styles */
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
/* Set base font and color */
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
font-size: 16px;
line-height: 1.5;
color: #333;
background-color: #f5f5f5;
}
/* Set up a container for your app content */
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
/* Basic heading styles */
h1, h2, h3, h4, h5, h6 {
margin-bottom: 0.5em;
font-weight: 600;
}
h1 { font-size: 2.5em; }
h2 { font-size: 2em; }
h3 { font-size: 1.75em; }
h4 { font-size: 1.5em; }
h5 { font-size: 1.25em; }
h6 { font-size: 1em; }
/* Basic link styles */
a {
color: #0066cc;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
/* Basic button styles */
button {
cursor: pointer;
padding: 10px 15px;
font-size: 1em;
border: none;
border-radius: 4px;
background-color: #0066cc;
color: white;
transition: background-color 0.3s ease;
}
button:hover {
background-color: #0052a3;
}
button:disabled {
background-color: #cccccc;
cursor: not-allowed;
}
/* Basic form styles */
input, textarea, select {
width: 100%;
padding: 10px;
margin-bottom: 10px;
border: 1px solid #ccc;
border-radius: 4px;
font-size: 1em;
}
/* Utility classes */
.text-center { text-align: center; }
.mt-1 { margin-top: 0.5rem; }
.mt-2 { margin-top: 1rem; }
.mt-3 { margin-top: 1.5rem; }
.mb-1 { margin-bottom: 0.5rem; }
.mb-2 { margin-bottom: 1rem; }
.mb-3 { margin-bottom: 1.5rem; }
/* Responsive design */
@media (max-width: 768px) {
body {
font-size: 14px;
}
.container {
padding: 10px;
}
}

11
frontend/src/index.js Normal file
View File

@ -0,0 +1,11 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import './index.css';
import App from './App';
const root = ReactDOM.createRoot(document.getElementById('root'));
root.render(
<React.StrictMode>
<App />
</React.StrictMode>
);