From b7e89a301db431e83463d9662b5635185eaba4e2 Mon Sep 17 00:00:00 2001 From: Mahesh Kommareddi Date: Tue, 23 Jul 2024 16:58:58 -0400 Subject: [PATCH] Main working files --- .gitignore | 35 ++ backend/.env.template | 4 + backend/README.md | 0 backend/requirements.txt | 22 ++ backend/src/app.py | 317 ++++++++++++++++++ backend/src/config.py | 19 ++ backend/src/database.py | 103 ++++++ backend/src/description_generator.py | 142 ++++++++ backend/src/event_detector.py | 117 +++++++ backend/src/models.py | 8 + backend/src/utils.py | 34 ++ backend/src/vector_search.py | 17 + backend/src/video_embedding.py | 32 ++ backend/src/video_processor.py | 38 +++ frontend/README.md | 0 frontend/package.json | 42 +++ frontend/public/index.html | 45 +++ frontend/src/App.js | 52 +++ frontend/src/components/EventList.js | 26 ++ frontend/src/components/RTSPVideoAnalysis.js | 108 ++++++ frontend/src/components/SimilarVideoSearch.js | 54 +++ frontend/src/components/UploadedVideosList.js | 20 ++ frontend/src/components/VectorizedVideos.js | 21 ++ frontend/src/components/VideoPlayer.js | 16 + frontend/src/components/VideoUpload.js | 76 +++++ frontend/src/index.css | 100 ++++++ frontend/src/index.js | 11 + 27 files changed, 1459 insertions(+) create mode 100644 .gitignore create mode 100644 backend/.env.template create mode 100644 backend/README.md create mode 100644 backend/requirements.txt create mode 100644 backend/src/app.py create mode 100644 backend/src/config.py create mode 100644 backend/src/database.py create mode 100644 backend/src/description_generator.py create mode 100644 backend/src/event_detector.py create mode 100644 backend/src/models.py create mode 100644 backend/src/utils.py create mode 100644 backend/src/vector_search.py create mode 100644 backend/src/video_embedding.py create mode 100644 backend/src/video_processor.py create mode 100644 frontend/README.md create mode 100644 frontend/package.json create mode 100644 frontend/public/index.html create mode 100644 frontend/src/App.js create mode 100644 frontend/src/components/EventList.js create mode 100644 frontend/src/components/RTSPVideoAnalysis.js create mode 100644 frontend/src/components/SimilarVideoSearch.js create mode 100644 frontend/src/components/UploadedVideosList.js create mode 100644 frontend/src/components/VectorizedVideos.js create mode 100644 frontend/src/components/VideoPlayer.js create mode 100644 frontend/src/components/VideoUpload.js create mode 100644 frontend/src/index.css create mode 100644 frontend/src/index.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2c0ae5f --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Python +__pycache__/ +*.py[cod] +*.so + +# Virtual Environment +venv/ +env/ +.env + +# Node +node_modules/ +npm-debug.log +yarn-error.log + +# Build files +build/ +dist/ + +# IDE specific files +.vscode/ +.idea/ + +# OS generated files +.DS_Store +Thumbs.db + +# Temporary files +*.swp +*.swo +*~ + +# Logs +*.log + diff --git a/backend/.env.template b/backend/.env.template new file mode 100644 index 0000000..3e3e4a5 --- /dev/null +++ b/backend/.env.template @@ -0,0 +1,4 @@ +OPENAI_API_KEY=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 +AWS_REGION=us-east-1 +# MILVUS_HOST=localhost +# MILVUS_PORT=19530 \ No newline at end of file diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 0000000..e69de29 diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..aa51753 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,22 @@ +flask +flask-cors +flask_socketio +numpy +tensorflow +pymilvus +openai +boto3 +ollama +litellm +python-dotenv +scikit-learn +torch>=1.7.0 +torchvision>=0.8.1 +opencv-python +Pillow>=7.1.2 +PyYAML>=5.3.1 +requests>=2.23.0 +scipy>=1.4.1 +tqdm>=4.41.0 +matplotlib>=3.2.2 +seaborn>=0.11.0 diff --git a/backend/src/app.py b/backend/src/app.py new file mode 100644 index 0000000..12a13ff --- /dev/null +++ b/backend/src/app.py @@ -0,0 +1,317 @@ +import subprocess +from flask import Flask, request, jsonify, send_from_directory +from flask_cors import CORS +from flask_socketio import SocketIO, emit +import cv2 +import numpy as np +import tempfile +import os +import json +import threading +import time +from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input +from tensorflow.keras.preprocessing.image import img_to_array +from event_detector import detect_event +from description_generator import generate_temporal_description +from werkzeug.utils import secure_filename +from database import insert_video, update_video_embedding, get_all_videos, get_video_by_id, get_vectorized_videos +from vector_search import cosine_similarity_search +from flask import Flask, request, jsonify, send_from_directory +from flask_cors import CORS +from event_detector import process_video +from io import BytesIO + +UPLOAD_FOLDER = 'uploads' +TEMP_FOLDER = 'temp' +os.makedirs(UPLOAD_FOLDER, exist_ok=True) +os.makedirs(TEMP_FOLDER, exist_ok=True) + +# Initialize Flask app +app = Flask(__name__, static_folder='../../frontend/build', static_url_path='') +app.config['UPLOAD_FOLDER'] = 'uploads' # Make sure this folder exists +CORS(app, resources={r"/api/*": {"origins": "*"}}) + +# Load VGG16 model for video embedding +vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg') + +def generate_video_embedding(video_path): + cap = cv2.VideoCapture(video_path) + frames = [] + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + frame = cv2.resize(frame, (224, 224)) + frame = img_to_array(frame) + frame = np.expand_dims(frame, axis=0) + frame = preprocess_input(frame) + frames.append(frame) + if len(frames) >= 16: # Process 16 frames at a time + break + cap.release() + + if not frames: + return None + + # Generate embeddings + embeddings = vgg_model.predict(np.vstack(frames)) + + # Average the embeddings + avg_embedding = np.mean(embeddings, axis=0) + + return avg_embedding + +socketio = SocketIO(app, cors_allowed_origins="*") + +rtsp_url = None +stop_stream = False +ffmpeg_process = None + +# Create a directory to store HLS segments +hls_dir = os.path.join(os.getcwd(), 'hls_temp') +os.makedirs(hls_dir, exist_ok=True) + +def convert_rtsp_to_hls(rtsp_url): + global ffmpeg_process + + output_path = os.path.join(hls_dir, 'stream.m3u8') + + ffmpeg_command = [ + 'ffmpeg', + '-i', rtsp_url, + '-c:v', 'libx264', + '-c:a', 'aac', + '-f', 'hls', + '-hls_time', '1', + '-hls_list_size', '3', + '-hls_flags', 'delete_segments+append_list+omit_endlist', + '-hls_segment_type', 'mpegts', + '-hls_segment_filename', os.path.join(hls_dir, 'segment%d.ts'), + output_path + ] + + ffmpeg_process = subprocess.Popen(ffmpeg_command) + +def process_rtsp_stream(): + global rtsp_url, stop_stream + cap = cv2.VideoCapture(rtsp_url) + + start_time = time.time() + frame_count = 0 + + while not stop_stream: + ret, frame = cap.read() + if not ret: + break + + current_time = time.time() - start_time + events = detect_event(frame, current_time) + + if int(current_time) % 10 == 0: + description = generate_temporal_description(events, 'bedrock', 'anthropic.claude-3-sonnet-20240229-v1:0') + else: + description = None + + socketio.emit('analysis_result', { + 'events': events, + 'description': description, + 'timestamp': current_time + }) + + frame_count += 1 + time.sleep(0.033) + + cap.release() + +@socketio.on('start_rtsp_stream') +def start_rtsp_stream(data): + global rtsp_url, stop_stream + rtsp_url = data['rtsp_url'] + stop_stream = False + threading.Thread(target=process_rtsp_stream).start() + convert_rtsp_to_hls(rtsp_url) + return {'hls_url': '/hls/stream.m3u8'} + +@socketio.on('stop_rtsp_stream') +def stop_rtsp_stream(): + global stop_stream, ffmpeg_process + stop_stream = True + if ffmpeg_process: + ffmpeg_process.terminate() + ffmpeg_process = None + +@app.route('/hls/') +def serve_hls(filename): + return send_from_directory(hls_dir, filename) + +@app.route('/api/analyze', methods=['POST']) +def analyze_video(): + if 'video' not in request.files: + return jsonify({'error': 'No video file provided'}), 400 + + video = request.files['video'] + model_type = request.form.get('model_type', 'bedrock') + model_name = request.form.get('model_name', 'anthropic.claude-3-sonnet-20240229-v1:0') + + with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: + video.save(temp_file.name) + video_path = temp_file.name + + try: + events = process_video(video_path) + + # Generate temporal descriptions for every 10 seconds + temporal_descriptions = [] + for i in range(0, len(events), 10 * 30): # Assuming 30 fps + segment_events = events[i:i+10*30] + if segment_events: + start_time = segment_events[0]['timestamp'] + end_time = segment_events[-1]['timestamp'] + description = generate_temporal_description(segment_events, model_type, model_name) + temporal_descriptions.append({ + 'start_time': start_time, + 'end_time': end_time, + 'description': description + }) + + return jsonify({ + 'events': events, + 'temporal_descriptions': temporal_descriptions + }) + + finally: + os.unlink(video_path) + +@app.route('/api/upload', methods=['POST']) +def upload_video(): + if 'video' not in request.files: + return jsonify({'error': 'No video file provided'}), 400 + + video = request.files['video'] + if video.filename == '': + return jsonify({'error': 'No selected file'}), 400 + + if video: + filename = secure_filename(video.filename) + filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + + # Ensure the upload folder exists + os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) + + video.save(filepath) + + try: + video_id = insert_video(filename, filepath) + return jsonify({'message': 'Video uploaded successfully', 'video_id': video_id}), 200 + except Exception as e: + return jsonify({'error': f'An error occurred during processing: {str(e)}'}), 500 + + return jsonify({'error': 'Invalid file'}), 400 + +@app.route('/api/similar_videos', methods=['POST']) +def find_similar_videos(): + if 'file' not in request.files: + return jsonify({'error': 'No file provided'}), 400 + + file = request.files['file'] + if file.filename == '': + return jsonify({'error': 'No selected file'}), 400 + + if file: + filename = secure_filename(file.filename) + filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + file.save(filepath) + + try: + # Generate embedding for the uploaded file + query_embedding = generate_video_embedding(filepath) + + if query_embedding is None: + return jsonify({'error': 'Failed to generate embedding for the uploaded file'}), 500 + + # Get all videos from database + all_videos = get_all_videos() + if not all_videos: + return jsonify({'error': 'No videos in database to compare'}), 404 + + database_vectors = np.array([video['embedding'] for video in all_videos if video['embedding'] is not None]) + + if database_vectors.size == 0: + return jsonify({'error': 'No embeddings found in the database'}), 404 + + # Perform similarity search + top_indices, similarities = cosine_similarity_search(query_embedding, database_vectors, top_k=5) + + similar_videos = [ + { + 'id': all_videos[i]['id'], + 'filename': all_videos[i]['filename'], + 'similarity': float(similarities[j]) + } + for j, i in enumerate(top_indices) + ] + + return jsonify(similar_videos) + + except Exception as e: + return jsonify({'error': f'An error occurred during processing: {str(e)}'}), 500 + + finally: + # Clean up temporary file + os.remove(filepath) + + return jsonify({'error': 'Invalid file'}), 400 + +@app.route('/api/vectorized_videos', methods=['GET']) +def get_vectorized_videos(): + videos = get_all_videos() # Implement this function in your database.py + return jsonify([{ + 'id': video['id'], + 'filename': video['filename'], + 'vectorized': video['embedding'] is not None + } for video in videos]) + +@app.route('/api/vectorize/', methods=['POST']) +def vectorize_video(video_id): + video = get_video_by_id(video_id) # Implement this function in your database.py + if not video: + return jsonify({'error': 'Video not found'}), 404 + + embedding = generate_video_embedding(video['filepath']) + update_video_embedding(video_id, embedding) # Implement this function in your database.py + + return jsonify({'message': 'Video vectorized successfully'}) + +@app.route('/api/thumbnail/') +def get_thumbnail(video_id): + video = get_video_by_id(video_id) + if not video: + return jsonify({'error': 'Video not found'}), 404 + + cap = cv2.VideoCapture(video['filepath']) + ret, frame = cap.read() + cap.release() + + if not ret: + return jsonify({'error': 'Failed to generate thumbnail'}), 500 + + _, buffer = cv2.imencode('.jpg', frame) + io_buf = BytesIO(buffer) + io_buf.seek(0) + + return send_file(io_buf, mimetype='image/jpeg') + + +# Serve React App +@app.route('/') +def serve(): + return send_from_directory(app.static_folder, 'index.html') + +@app.route('/') +def static_proxy(path): + file_name = path.split('/')[-1] + dir_name = os.path.join(app.static_folder, '/'.join(path.split('/')[:-1])) + return send_from_directory(dir_name, file_name) + +if __name__ == '__main__': + socketio.run(app, debug=True, port=5333) diff --git a/backend/src/config.py b/backend/src/config.py new file mode 100644 index 0000000..417a0d0 --- /dev/null +++ b/backend/src/config.py @@ -0,0 +1,19 @@ +import os +from dotenv import dotenv_values + +# Get the path to the directory this file is in +BASEDIR = os.path.abspath(os.path.dirname(__file__)) + +# Connect the path with your '.env' file name +env_path = os.path.join(BASEDIR, '..', '.env') +print(f"Looking for .env file at: {env_path}") + +# Load the .env file +config = dotenv_values(env_path) + +# Get the environment variables +OPENAI_API_KEY = config.get('OPENAI_API_KEY', 'default_key') +AWS_REGION = config.get('AWS_REGION', 'default_region') + +print(f"OPENAI_API_KEY: {OPENAI_API_KEY}") +print(f"AWS_REGION: {AWS_REGION}") \ No newline at end of file diff --git a/backend/src/database.py b/backend/src/database.py new file mode 100644 index 0000000..4d7483f --- /dev/null +++ b/backend/src/database.py @@ -0,0 +1,103 @@ +import sqlite3 +import json +import numpy as np +import os + +DATABASE_PATH = 'videos.db' + +def dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + +def get_db_connection(): + conn = sqlite3.connect(DATABASE_PATH) + conn.row_factory = dict_factory + return conn + +def init_db(): + conn = get_db_connection() + c = conn.cursor() + + # Check if the videos table exists + c.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='videos'") + table_exists = c.fetchone() + + if table_exists: + # If the table exists, check if the filepath column exists + c.execute("PRAGMA table_info(videos)") + columns = [column['name'] for column in c.fetchall()] + + if 'filepath' not in columns: + # Add the filepath column if it doesn't exist + c.execute("ALTER TABLE videos ADD COLUMN filepath TEXT") + else: + # If the table doesn't exist, create it with all necessary columns + c.execute('''CREATE TABLE videos + (id INTEGER PRIMARY KEY AUTOINCREMENT, + filename TEXT NOT NULL, + filepath TEXT NOT NULL, + embedding TEXT)''') + + conn.commit() + conn.close() + +def insert_video(filename, filepath): + conn = get_db_connection() + c = conn.cursor() + c.execute("INSERT INTO videos (filename, filepath) VALUES (?, ?)", + (filename, filepath)) + video_id = c.lastrowid + conn.commit() + conn.close() + return video_id + +def update_video_embedding(video_id, embedding): + conn = get_db_connection() + c = conn.cursor() + embedding_json = json.dumps(embedding.flatten().tolist()) + c.execute("UPDATE videos SET embedding = ? WHERE id = ?", + (embedding_json, video_id)) + conn.commit() + conn.close() + +def get_all_videos(): + conn = get_db_connection() + c = conn.cursor() + c.execute("SELECT * FROM videos") + videos = c.fetchall() + conn.close() + + for video in videos: + if video['embedding']: + video['embedding'] = np.array(json.loads(video['embedding'])).reshape(1, -1) + + return videos + +def get_video_by_id(video_id): + conn = get_db_connection() + c = conn.cursor() + c.execute("SELECT * FROM videos WHERE id = ?", (video_id,)) + video = c.fetchone() + conn.close() + + if video and video['embedding']: + video['embedding'] = np.array(json.loads(video['embedding'])) + + return video + +def get_vectorized_videos(): + conn = get_db_connection() + c = conn.cursor() + c.execute("SELECT * FROM videos WHERE embedding IS NOT NULL") + videos = c.fetchall() + conn.close() + + for video in videos: + video['embedding'] = np.array(json.loads(video['embedding'])) + + return videos + +# Initialize the database when this module is imported +init_db() \ No newline at end of file diff --git a/backend/src/description_generator.py b/backend/src/description_generator.py new file mode 100644 index 0000000..9ead7b9 --- /dev/null +++ b/backend/src/description_generator.py @@ -0,0 +1,142 @@ +import openai +import boto3 +import ollama +import litellm +import json +import base64 +from botocore.exceptions import ClientError +from datetime import datetime +from config import OPENAI_API_KEY, AWS_REGION + +openai.api_key = OPENAI_API_KEY + +class BedrockLanguageModel: + def __init__(self, model_id, region=AWS_REGION): + self.bedrock_client = boto3.client( + service_name='bedrock-runtime', + region_name=region + ) + self.model_id = model_id + + def generate(self, prompt, image_path=None): + print(f"DEBUG: Generating response for prompt: {prompt}") + try: + request_body = { + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": 20000, + "temperature": 0.7, + "top_p": 0.9, + "messages": [ + { + "role": "user", + "content": [] + } + ] + } + + if image_path: + with open(image_path, "rb") as image_file: + base64_image = base64.b64encode(image_file.read()).decode('utf-8') + request_body["messages"][0]["content"].append({ + "type": "image", + "image": { + "format": "png", + "source": { + "bytes": base64_image + } + } + }) + + request_body["messages"][0]["content"].append({ + "type": "text", + "text": prompt + }) + + json_payload = json.dumps(request_body) + + response = self.bedrock_client.invoke_model( + modelId=self.model_id, + contentType="application/json", + accept="application/json", + body=json_payload + ) + + response_body = json.loads(response['body'].read()) + print(f"DEBUG: Raw response from Bedrock: {response_body}") + + content_array = response_body.get('content', []) + if content_array and isinstance(content_array, list): + generated_text = content_array[0].get('text', '') + if not generated_text: + print(f"WARNING: Generated text is empty. Full response: {response_body}") + return "No response generated" + print(f"DEBUG: Generated text: {generated_text}") + return generated_text + else: + print(f"WARNING: Unexpected response format. Full response: {response_body}") + return "Unexpected response format" + + except ClientError as e: + print(f"ERROR: Failed to generate text with Bedrock: {str(e)}") + return f"Error: {str(e)}" + except Exception as e: + print(f"ERROR: An unexpected error occurred: {str(e)}") + return f"Error: {str(e)}" + +# Initialize the Bedrock model +bedrock_model = BedrockLanguageModel("anthropic.claude-3-sonnet-20240229-v1:0") + +def generate_temporal_description(events, model_type='gpt', model_name='gpt-3.5-turbo'): + if not events: + return None + + prompt = "Analyze the following sequence of events in a video segment, focusing on object detection, motion, and positioning. Pay special attention to any suspicious activities that might indicate theft:\n\n" + for event in events: + timestamp = f"{event['timestamp']:.2f}" + if event['type'] == 'object_detected': + prompt += f"- At {timestamp}s: {event['description']}\n" + elif event['type'] == 'object_motion': + prompt += f"- At {timestamp}s: {event['description']}\n" + elif event['type'] in ['motion_detected', 'bright_scene', 'dark_scene', 'color_dominance']: + prompt += f"- At {timestamp}s: {event['type']} - {event['description']}\n" + + prompt += "\nProvide a concise description of what's happening in this video segment, interpreting the events as if they might be showing security camera activity. Consider the following points:\n" + prompt += "1. The number and types of objects (especially people) detected\n" + prompt += "2. The movement and positioning of these objects over time\n" + prompt += "3. Any suspicious patterns of movement or behavior\n" + prompt += "4. Changes in lighting or scene composition that might be relevant\n" + prompt += "Description:" + try: + if model_type == 'gpt': + response = openai.ChatCompletion.create( + model=model_name, + messages=[{"role": "user", "content": prompt}], + max_tokens=150, + n=1, + stop=None, + temperature=0.7, + ) + return response.choices[0].message.content.strip() + + elif model_type == 'bedrock': + return bedrock_model.generate(prompt) + + elif model_type == 'ollama': + response = ollama.generate(model=model_name, prompt=prompt) + return response['response'].strip() + + elif model_type == 'litellm': + response = litellm.completion( + model=model_name, + messages=[{"role": "user", "content": prompt}], + max_tokens=150, + temperature=0.7 + ) + return response.choices[0].message.content.strip() + + else: + raise ValueError("Unsupported model type") + + except Exception as e: + print(f"Error generating description: {str(e)}") + return None \ No newline at end of file diff --git a/backend/src/event_detector.py b/backend/src/event_detector.py new file mode 100644 index 0000000..93ff8dc --- /dev/null +++ b/backend/src/event_detector.py @@ -0,0 +1,117 @@ +import cv2 +import torch +import numpy as np +from PIL import Image + +# Load YOLOv5 model +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) + +def detect_event(frame, timestamp, prev_frame=None, prev_objects=None): + events = [] + objects = [] + + # Convert frame to RGB (YOLOv5 expects RGB images) + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + # Perform object detection + results = model(rgb_frame) + + # Process results + for *box, conf, cls in results.xyxy[0]: # xyxy, confidence, class + class_name = model.names[int(cls)] + if conf > 0.5: # Confidence threshold + x1, y1, x2, y2 = map(int, box) + center_x = (x1 + x2) // 2 + center_y = (y1 + y2) // 2 + obj = { + 'type': class_name, + 'confidence': conf.item(), + 'box': (x1, y1, x2, y2), + 'center': (center_x, center_y) + } + objects.append(obj) + events.append({ + 'type': 'object_detected', + 'description': f'{class_name} detected with confidence {conf:.2f} at position ({center_x}, {center_y})', + 'timestamp': timestamp, + 'object': obj + }) + + # Motion detection for objects + if prev_objects is not None: + for curr_obj in objects: + for prev_obj in prev_objects: + if curr_obj['type'] == prev_obj['type']: + dx = curr_obj['center'][0] - prev_obj['center'][0] + dy = curr_obj['center'][1] - prev_obj['center'][1] + distance = np.sqrt(dx**2 + dy**2) + if distance > 10: # Threshold for significant motion + events.append({ + 'type': 'object_motion', + 'description': f'{curr_obj["type"]} moved {distance:.2f} pixels', + 'timestamp': timestamp, + 'object': curr_obj, + 'motion': (dx, dy) + }) + + # Color dominance + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + dominant_color = np.argmax(np.mean(frame, axis=(0,1))) + color_names = ['blue', 'green', 'red'] + events.append({ + 'type': 'color_dominance', + 'description': f'Dominant color is {color_names[dominant_color]}', + 'timestamp': timestamp + }) + + # Brightness detection + brightness = np.mean(gray) + if brightness > 200: + events.append({ + 'type': 'bright_scene', + 'description': 'The scene is very bright', + 'timestamp': timestamp + }) + elif brightness < 50: + events.append({ + 'type': 'dark_scene', + 'description': 'The scene is very dark', + 'timestamp': timestamp + }) + + # Overall motion detection + if prev_frame is not None: + frame_diff = cv2.absdiff(prev_frame, gray) + if np.mean(frame_diff) > 30: + events.append({ + 'type': 'motion_detected', + 'description': 'Significant overall motion detected', + 'timestamp': timestamp + }) + + return events, objects + +def process_video(video_path): + cap = cv2.VideoCapture(video_path) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = 0 + events = [] + prev_frame = None + prev_objects = None + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + frame_count += 1 + current_time = frame_count / fps + + frame_events, objects = detect_event(frame, current_time, prev_frame, prev_objects) + events.extend(frame_events) + + prev_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + prev_objects = objects + + cap.release() + return events \ No newline at end of file diff --git a/backend/src/models.py b/backend/src/models.py new file mode 100644 index 0000000..91fcf6e --- /dev/null +++ b/backend/src/models.py @@ -0,0 +1,8 @@ +from tensorflow.keras.applications.vgg16 import VGG16 +from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 + +def load_vgg_model(): + return VGG16(weights='imagenet', include_top=False, pooling='avg') + +def load_mobilenet_model(): + return MobileNetV2(weights='imagenet', include_top=True) \ No newline at end of file diff --git a/backend/src/utils.py b/backend/src/utils.py new file mode 100644 index 0000000..257691e --- /dev/null +++ b/backend/src/utils.py @@ -0,0 +1,34 @@ +import cv2 +import numpy as np +from tensorflow.keras.preprocessing.image import img_to_array +from tensorflow.keras.applications.vgg16 import preprocess_input + +def preprocess_frame(frame, target_size=(224, 224)): + frame = cv2.resize(frame, target_size) + frame = img_to_array(frame) + frame = np.expand_dims(frame, axis=0) + frame = preprocess_input(frame) + return frame + +def generate_video_embedding(video_path, model): + cap = cv2.VideoCapture(video_path) + frames = [] + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + frames.append(preprocess_frame(frame)) + if len(frames) >= 16: # Process 16 frames at a time + break + cap.release() + + if not frames: + return None + + # Generate embeddings + embeddings = model.predict(np.vstack(frames)) + + # Average the embeddings + avg_embedding = np.mean(embeddings, axis=0) + + return avg_embedding \ No newline at end of file diff --git a/backend/src/vector_search.py b/backend/src/vector_search.py new file mode 100644 index 0000000..6b64eb9 --- /dev/null +++ b/backend/src/vector_search.py @@ -0,0 +1,17 @@ +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity + +def cosine_similarity_search(query_vector, database_vectors, top_k=5): + # Ensure query_vector is 2D + if query_vector.ndim == 1: + query_vector = query_vector.reshape(1, -1) + + # Ensure database_vectors is 2D + if database_vectors.ndim == 1: + database_vectors = database_vectors.reshape(1, -1) + elif database_vectors.ndim > 2: + database_vectors = database_vectors.reshape(database_vectors.shape[0], -1) + + similarities = cosine_similarity(query_vector, database_vectors) + top_indices = np.argsort(similarities[0])[-top_k:][::-1] + return top_indices, similarities[0][top_indices] \ No newline at end of file diff --git a/backend/src/video_embedding.py b/backend/src/video_embedding.py new file mode 100644 index 0000000..aa734f9 --- /dev/null +++ b/backend/src/video_embedding.py @@ -0,0 +1,32 @@ +import cv2 +import numpy as np +from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input +from tensorflow.keras.preprocessing.image import img_to_array + +# Load VGG16 model +vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg') + +def generate_video_embedding(video_path): + cap = cv2.VideoCapture(video_path) + frames = [] + while len(frames) < 16: # Process up to 16 frames + ret, frame = cap.read() + if not ret: + break + frame = cv2.resize(frame, (224, 224)) + frame = img_to_array(frame) + frame = np.expand_dims(frame, axis=0) + frame = preprocess_input(frame) + frames.append(frame) + cap.release() + + if not frames: + return None + + # Generate embeddings + embeddings = vgg_model.predict(np.vstack(frames)) + + # Average the embeddings + avg_embedding = np.mean(embeddings, axis=0) + + return avg_embedding \ No newline at end of file diff --git a/backend/src/video_processor.py b/backend/src/video_processor.py new file mode 100644 index 0000000..0d484a1 --- /dev/null +++ b/backend/src/video_processor.py @@ -0,0 +1,38 @@ +import cv2 +import time +from event_detector import detect_event +from description_generator import generate_description +from datetime import datetime + +def process_video_realtime(video_path, model_type='gpt', model_name='gpt-4o'): + cap = cv2.VideoCapture(video_path) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = 0 + start_time = time.time() + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + frame_count += 1 + current_time = start_time + (frame_count / fps) + + # Detect events + events = detect_event(frame, current_time) + + # Generate description every 5 seconds + if frame_count % int(fps * 5) == 0: + description = generate_description(events, model_type, model_name) + if description: + print(f"At {datetime.fromtimestamp(current_time).strftime('%H:%M:%S')}:") + print(description) + print() + + # Real-time display (optional) + cv2.imshow('Video', frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() \ No newline at end of file diff --git a/frontend/README.md b/frontend/README.md new file mode 100644 index 0000000..e69de29 diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..06f4214 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,42 @@ +{ + "name": "chrowatch", + "version": "0.1.0", + "private": true, + "dependencies": { + "@testing-library/jest-dom": "^5.16.5", + "@testing-library/react": "^13.4.0", + "@testing-library/user-event": "^13.5.0", + "axios": "^0.27.2", + "hls.js": "^1.5.13", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-player": "^2.10.1", + "react-scripts": "5.0.1", + "socket.io-client": "^4.5.1", + "web-vitals": "^2.1.4" + }, + "scripts": { + "start": "react-scripts start", + "build": "react-scripts build", + "test": "react-scripts test", + "eject": "react-scripts eject" + }, + "eslintConfig": { + "extends": [ + "react-app", + "react-app/jest" + ] + }, + "browserslist": { + "production": [ + ">0.2%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 1 chrome version", + "last 1 firefox version", + "last 1 safari version" + ] + } +} diff --git a/frontend/public/index.html b/frontend/public/index.html new file mode 100644 index 0000000..cb22803 --- /dev/null +++ b/frontend/public/index.html @@ -0,0 +1,45 @@ + + + + + + + + + + + + + Chrolens - Video Analysis Tool + + + + +
+ + + \ No newline at end of file diff --git a/frontend/src/App.js b/frontend/src/App.js new file mode 100644 index 0000000..6149589 --- /dev/null +++ b/frontend/src/App.js @@ -0,0 +1,52 @@ +// src/App.js +import React, { useState } from 'react'; +import VideoUpload from './components/VideoUpload'; +import VideoPlayer from './components/VideoPlayer'; +import EventList from './components/EventList'; +import RTSPVideoAnalysis from './components/RTSPVideoAnalysis'; +import SimilarVideoSearch from './components/SimilarVideoSearch'; +import VectorizedVideos from './components/VectorizedVideos'; + +function App() { + const [videoFile, setVideoFile] = useState(null); + const [events, setEvents] = useState([]); + const [temporalDescriptions, setTemporalDescriptions] = useState([]); + const [showRTSPAnalysis, setShowRTSPAnalysis] = useState(false); + const [uploadedVideos, setUploadedVideos] = useState([]); + + const handleUploadSuccess = (data) => { + setUploadedVideos(prevVideos => [...prevVideos, data.message]); + // You might want to refresh the list of vectorized videos here + }; + + const handleAnalysisComplete = (data) => { + setVideoFile(data.video_url); // Assuming the backend returns a URL to the processed video + setEvents(data.events); + setTemporalDescriptions(data.temporal_descriptions); + }; + + return ( +
+

Video Analysis

+ + {showRTSPAnalysis ? ( + + ) : ( + <> + + {videoFile && } + + + + + )} +
+ ); +} + +export default App; \ No newline at end of file diff --git a/frontend/src/components/EventList.js b/frontend/src/components/EventList.js new file mode 100644 index 0000000..3f92df7 --- /dev/null +++ b/frontend/src/components/EventList.js @@ -0,0 +1,26 @@ +import React from 'react'; + +function EventList({ events, temporalDescriptions }) { + return ( +
+

Events

+
    + {events && events.map((event, index) => ( +
  • + {event.timestamp.toFixed(2)}s: {event.type} - {event.description} +
  • + ))} +
+

Temporal Descriptions

+
    + {temporalDescriptions && temporalDescriptions.map((desc, index) => ( +
  • + {desc.start_time.toFixed(2)}s - {desc.end_time.toFixed(2)}s: {desc.description} +
  • + ))} +
+
+ ); +} + +export default EventList; \ No newline at end of file diff --git a/frontend/src/components/RTSPVideoAnalysis.js b/frontend/src/components/RTSPVideoAnalysis.js new file mode 100644 index 0000000..2ec3c07 --- /dev/null +++ b/frontend/src/components/RTSPVideoAnalysis.js @@ -0,0 +1,108 @@ +import React, { useEffect, useState, useRef } from 'react'; +import Hls from 'hls.js'; +import io from 'socket.io-client'; + +const RTSPVideoAnalysis = () => { + const [rtspUrl, setRtspUrl] = useState(''); + const [isStreaming, setIsStreaming] = useState(false); + const [events, setEvents] = useState([]); + const [description, setDescription] = useState(''); + const [socket, setSocket] = useState(null); + const videoRef = useRef(null); + const hlsRef = useRef(null); + + useEffect(() => { + const newSocket = io('http://localhost:5333'); + setSocket(newSocket); + + newSocket.on('analysis_result', (data) => { + setEvents(data.events); + if (data.description) { + setDescription(data.description); + } + }); + + return () => newSocket.close(); + }, []); + + const handleStartStream = () => { + if (socket && rtspUrl) { + socket.emit('start_rtsp_stream', { rtsp_url: rtspUrl }, (response) => { + if (response && response.hls_url) { + if (Hls.isSupported()) { + const hls = new Hls({ + manifestLoadingTimeOut: 5000, + manifestLoadingMaxRetry: Infinity, + manifestLoadingRetryDelay: 500, + levelLoadingTimeOut: 5000, + levelLoadingMaxRetry: Infinity, + levelLoadingRetryDelay: 500 + }); + hlsRef.current = hls; + hls.loadSource(`http://localhost:5333${response.hls_url}`); + hls.attachMedia(videoRef.current); + hls.on(Hls.Events.MANIFEST_PARSED, () => { + videoRef.current.play().catch(e => console.error("Error attempting to play:", e)); + }); + } else if (videoRef.current.canPlayType('application/vnd.apple.mpegurl')) { + videoRef.current.src = `http://localhost:5333${response.hls_url}`; + videoRef.current.play().catch(e => console.error("Error attempting to play:", e)); + } + setIsStreaming(true); + } + }); + } + }; + + const handleStopStream = () => { + if (socket) { + socket.emit('stop_rtsp_stream'); + setIsStreaming(false); + if (hlsRef.current) { + hlsRef.current.destroy(); + } + if (videoRef.current) { + videoRef.current.src = ''; + } + } + }; + + return ( +
+

RTSP Video Analysis

+ setRtspUrl(e.target.value)} + placeholder="Enter RTSP URL" + /> + + +
+ ); +}; + +export default RTSPVideoAnalysis; \ No newline at end of file diff --git a/frontend/src/components/SimilarVideoSearch.js b/frontend/src/components/SimilarVideoSearch.js new file mode 100644 index 0000000..604b801 --- /dev/null +++ b/frontend/src/components/SimilarVideoSearch.js @@ -0,0 +1,54 @@ +import React, { useState } from 'react'; +import axios from 'axios'; + +const SimilarVideoSearch = () => { + const [file, setFile] = useState(null); + const [similarVideos, setSimilarVideos] = useState([]); + + const handleFileChange = (e) => { + setFile(e.target.files[0]); + }; + + const handleSubmit = async (e) => { + e.preventDefault(); + if (!file) return; + + const formData = new FormData(); + formData.append('file', file); + + try { + const response = await axios.post('http://localhost:5333/api/similar_videos', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }); + setSimilarVideos(response.data); + } catch (error) { + console.error('Error finding similar videos:', error); + } + }; + + return ( +
+

Find Similar Videos

+
+ + +
+ {similarVideos.length > 0 && ( +
+

Similar Videos:

+
    + {similarVideos.map((video) => ( +
  • + {video.filename} (Similarity: {video.similarity.toFixed(2)}) +
  • + ))} +
+
+ )} +
+ ); +}; + +export default SimilarVideoSearch; \ No newline at end of file diff --git a/frontend/src/components/UploadedVideosList.js b/frontend/src/components/UploadedVideosList.js new file mode 100644 index 0000000..5595ebe --- /dev/null +++ b/frontend/src/components/UploadedVideosList.js @@ -0,0 +1,20 @@ +import React from 'react'; + +const UploadedVideosList = ({ videos }) => { + return ( +
+

Uploaded Videos

+ {videos.length === 0 ? ( +

No videos uploaded yet.

+ ) : ( +
    + {videos.map((video, index) => ( +
  • {video}
  • + ))} +
+ )} +
+ ); +}; + +export default UploadedVideosList; \ No newline at end of file diff --git a/frontend/src/components/VectorizedVideos.js b/frontend/src/components/VectorizedVideos.js new file mode 100644 index 0000000..75e4c2c --- /dev/null +++ b/frontend/src/components/VectorizedVideos.js @@ -0,0 +1,21 @@ +// src/components/VectorizedVideos.js +import React from 'react'; + +const VectorizedVideos = ({ videos }) => { + return ( +
+

Vectorized Videos

+ {videos.length === 0 ? ( +

No videos have been vectorized yet.

+ ) : ( +
    + {videos.map((video, index) => ( +
  • {video}
  • + ))} +
+ )} +
+ ); +}; + +export default VectorizedVideos; \ No newline at end of file diff --git a/frontend/src/components/VideoPlayer.js b/frontend/src/components/VideoPlayer.js new file mode 100644 index 0000000..cad7e92 --- /dev/null +++ b/frontend/src/components/VideoPlayer.js @@ -0,0 +1,16 @@ +import React from 'react'; + +function VideoPlayer({ videoFile }) { + const videoUrl = URL.createObjectURL(videoFile); + + return ( +
+ +
+ ); +} + +export default VideoPlayer; \ No newline at end of file diff --git a/frontend/src/components/VideoUpload.js b/frontend/src/components/VideoUpload.js new file mode 100644 index 0000000..bb929c1 --- /dev/null +++ b/frontend/src/components/VideoUpload.js @@ -0,0 +1,76 @@ +// src/components/VideoUpload.js +import React, { useState } from 'react'; +import axios from 'axios'; + +const VideoUpload = ({ onUploadSuccess, onAnalysisComplete }) => { + const [file, setFile] = useState(null); + const [uploading, setUploading] = useState(false); + const [error, setError] = useState(null); + + const handleFileChange = (e) => { + setFile(e.target.files[0]); + setError(null); + }; + + const handleUpload = async (action) => { + if (!file) { + setError('Please select a file to upload'); + return; + } + + setUploading(true); + setError(null); + + const formData = new FormData(); + formData.append('video', file); + + try { + // First, upload the video + const uploadResponse = await axios.post('/api/upload', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }); + + if (action === 'vectorize') { + // If vectorizing, call the vectorize endpoint + await axios.post(`/api/vectorize/${uploadResponse.data.video_id}`); + onUploadSuccess(uploadResponse.data); + } else if (action === 'analyze') { + // If analyzing, call the analyze endpoint + const analyzeResponse = await axios.post('/api/analyze', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }); + onAnalysisComplete(analyzeResponse.data); + } + + setUploading(false); + } catch (error) { + setUploading(false); + setError('Error processing video: ' + (error.response?.data?.error || error.message)); + } + }; + + return ( +
+

Upload Video

+ + + + {error &&

{error}

} +
+ ); +}; + +export default VideoUpload; \ No newline at end of file diff --git a/frontend/src/index.css b/frontend/src/index.css new file mode 100644 index 0000000..9f7cda6 --- /dev/null +++ b/frontend/src/index.css @@ -0,0 +1,100 @@ +/* Reset default styles */ +* { + margin: 0; + padding: 0; + box-sizing: border-box; + } + + /* Set base font and color */ + body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + font-size: 16px; + line-height: 1.5; + color: #333; + background-color: #f5f5f5; + } + + /* Set up a container for your app content */ + .container { + max-width: 1200px; + margin: 0 auto; + padding: 20px; + } + + /* Basic heading styles */ + h1, h2, h3, h4, h5, h6 { + margin-bottom: 0.5em; + font-weight: 600; + } + + h1 { font-size: 2.5em; } + h2 { font-size: 2em; } + h3 { font-size: 1.75em; } + h4 { font-size: 1.5em; } + h5 { font-size: 1.25em; } + h6 { font-size: 1em; } + + /* Basic link styles */ + a { + color: #0066cc; + text-decoration: none; + } + + a:hover { + text-decoration: underline; + } + + /* Basic button styles */ + button { + cursor: pointer; + padding: 10px 15px; + font-size: 1em; + border: none; + border-radius: 4px; + background-color: #0066cc; + color: white; + transition: background-color 0.3s ease; + } + + button:hover { + background-color: #0052a3; + } + + button:disabled { + background-color: #cccccc; + cursor: not-allowed; + } + + /* Basic form styles */ + input, textarea, select { + width: 100%; + padding: 10px; + margin-bottom: 10px; + border: 1px solid #ccc; + border-radius: 4px; + font-size: 1em; + } + + /* Utility classes */ + .text-center { text-align: center; } + .mt-1 { margin-top: 0.5rem; } + .mt-2 { margin-top: 1rem; } + .mt-3 { margin-top: 1.5rem; } + .mb-1 { margin-bottom: 0.5rem; } + .mb-2 { margin-bottom: 1rem; } + .mb-3 { margin-bottom: 1.5rem; } + + /* Responsive design */ + @media (max-width: 768px) { + body { + font-size: 14px; + } + + .container { + padding: 10px; + } + } \ No newline at end of file diff --git a/frontend/src/index.js b/frontend/src/index.js new file mode 100644 index 0000000..2cb1087 --- /dev/null +++ b/frontend/src/index.js @@ -0,0 +1,11 @@ +import React from 'react'; +import ReactDOM from 'react-dom/client'; +import './index.css'; +import App from './App'; + +const root = ReactDOM.createRoot(document.getElementById('root')); +root.render( + + + +);