Main working files

2024-07-23 16:58:58 -04:00 · 2024-07-23 16:58:58 -04:00 · b7e89a301d
commit b7e89a301d
parent 96d33ef5ab
27 changed files with 1459 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,35 @@
 # Python
 __pycache__/
 *.py[cod]
 *.so
 # Virtual Environment
 venv/
 env/
 .env
 # Node
 node_modules/
 npm-debug.log
 yarn-error.log
 # Build files
 build/
 dist/
 # IDE specific files
 .vscode/
 .idea/
 # OS generated files
 .DS_Store
 Thumbs.db
 # Temporary files
 *.swp
 *.swo
 *~
 # Logs
 *.log
--- a/backend/.env.template
+++ b/backend/.env.template
@ -0,0 +1,4 @@
 OPENAI_API_KEY=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
 AWS_REGION=us-east-1
 # MILVUS_HOST=localhost
 # MILVUS_PORT=19530
--- a/backend/README.md
+++ b/backend/README.md
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -0,0 +1,22 @@
 flask
 flask-cors
 flask_socketio
 numpy
 tensorflow
 pymilvus
 openai
 boto3
 ollama
 litellm
 python-dotenv
 scikit-learn
 torch>=1.7.0
 torchvision>=0.8.1
 opencv-python
 Pillow>=7.1.2
 PyYAML>=5.3.1
 requests>=2.23.0
 scipy>=1.4.1
 tqdm>=4.41.0
 matplotlib>=3.2.2
 seaborn>=0.11.0
--- a/backend/src/app.py
+++ b/backend/src/app.py
@ -0,0 +1,317 @@
 import subprocess
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from flask_socketio import SocketIO, emit
 import cv2
 import numpy as np
 import tempfile
 import os
 import json
 import threading
 import time
 from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
 from tensorflow.keras.preprocessing.image import img_to_array
 from event_detector import detect_event
 from description_generator import generate_temporal_description
 from werkzeug.utils import secure_filename
 from database import insert_video, update_video_embedding, get_all_videos, get_video_by_id, get_vectorized_videos
 from vector_search import cosine_similarity_search
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from event_detector import process_video
 from io import BytesIO
 UPLOAD_FOLDER = 'uploads'
 TEMP_FOLDER = 'temp'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(TEMP_FOLDER, exist_ok=True)
 # Initialize Flask app
 app = Flask(__name__, static_folder='../../frontend/build', static_url_path='')
 app.config['UPLOAD_FOLDER'] = 'uploads'  # Make sure this folder exists
 CORS(app, resources={r"/api/*": {"origins": "*"}})
 # Load VGG16 model for video embedding
 vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
 def generate_video_embedding(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frame = img_to_array(frame)
        frame = np.expand_dims(frame, axis=0)
        frame = preprocess_input(frame)
        frames.append(frame)
        if len(frames) >= 16:  # Process 16 frames at a time
            break
    cap.release()
    if not frames:
        return None
    # Generate embeddings
    embeddings = vgg_model.predict(np.vstack(frames))
    # Average the embeddings
    avg_embedding = np.mean(embeddings, axis=0)
    return avg_embedding
 socketio = SocketIO(app, cors_allowed_origins="*")
 rtsp_url = None
 stop_stream = False
 ffmpeg_process = None
 # Create a directory to store HLS segments
 hls_dir = os.path.join(os.getcwd(), 'hls_temp')
 os.makedirs(hls_dir, exist_ok=True)
 def convert_rtsp_to_hls(rtsp_url):
    global ffmpeg_process
    output_path = os.path.join(hls_dir, 'stream.m3u8')
    ffmpeg_command = [
        'ffmpeg',
        '-i', rtsp_url,
        '-c:v', 'libx264',
        '-c:a', 'aac',
        '-f', 'hls',
        '-hls_time', '1',
        '-hls_list_size', '3',
        '-hls_flags', 'delete_segments+append_list+omit_endlist',
        '-hls_segment_type', 'mpegts',
        '-hls_segment_filename', os.path.join(hls_dir, 'segment%d.ts'),
        output_path
    ]
    ffmpeg_process = subprocess.Popen(ffmpeg_command)
 def process_rtsp_stream():
    global rtsp_url, stop_stream
    cap = cv2.VideoCapture(rtsp_url)
    start_time = time.time()
    frame_count = 0
    while not stop_stream:
        ret, frame = cap.read()
        if not ret:
            break
        current_time = time.time() - start_time
        events = detect_event(frame, current_time)
        if int(current_time) % 10 == 0:
            description = generate_temporal_description(events, 'bedrock', 'anthropic.claude-3-sonnet-20240229-v1:0')
        else:
            description = None
        socketio.emit('analysis_result', {
            'events': events,
            'description': description,
            'timestamp': current_time
        })
        frame_count += 1
        time.sleep(0.033)
    cap.release()
@socketio.on('start_rtsp_stream')
 def start_rtsp_stream(data):
    global rtsp_url, stop_stream
    rtsp_url = data['rtsp_url']
    stop_stream = False
    threading.Thread(target=process_rtsp_stream).start()
    convert_rtsp_to_hls(rtsp_url)
    return {'hls_url': '/hls/stream.m3u8'}
@socketio.on('stop_rtsp_stream')
 def stop_rtsp_stream():
    global stop_stream, ffmpeg_process
    stop_stream = True
    if ffmpeg_process:
        ffmpeg_process.terminate()
        ffmpeg_process = None
@app.route('/hls/<path:filename>')
 def serve_hls(filename):
    return send_from_directory(hls_dir, filename)
@app.route('/api/analyze', methods=['POST'])
 def analyze_video():
    if 'video' not in request.files:
        return jsonify({'error': 'No video file provided'}), 400
    video = request.files['video']
    model_type = request.form.get('model_type', 'bedrock')
    model_name = request.form.get('model_name', 'anthropic.claude-3-sonnet-20240229-v1:0')
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
        video.save(temp_file.name)
        video_path = temp_file.name
    try:
        events = process_video(video_path)
        # Generate temporal descriptions for every 10 seconds
        temporal_descriptions = []
        for i in range(0, len(events), 10 * 30):  # Assuming 30 fps
            segment_events = events[i:i+10*30]
            if segment_events:
                start_time = segment_events[0]['timestamp']
                end_time = segment_events[-1]['timestamp']
                description = generate_temporal_description(segment_events, model_type, model_name)
                temporal_descriptions.append({
                    'start_time': start_time,
                    'end_time': end_time,
                    'description': description
                })
        return jsonify({
            'events': events,
            'temporal_descriptions': temporal_descriptions
        })
    finally:
        os.unlink(video_path)
@app.route('/api/upload', methods=['POST'])
 def upload_video():
    if 'video' not in request.files:
        return jsonify({'error': 'No video file provided'}), 400
    video = request.files['video']
    if video.filename == '':
        return jsonify({'error': 'No selected file'}), 400
    if video:
        filename = secure_filename(video.filename)
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        # Ensure the upload folder exists
        os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
        video.save(filepath)
        try:
            video_id = insert_video(filename, filepath)
            return jsonify({'message': 'Video uploaded successfully', 'video_id': video_id}), 200
        except Exception as e:
            return jsonify({'error': f'An error occurred during processing: {str(e)}'}), 500
    return jsonify({'error': 'Invalid file'}), 400
@app.route('/api/similar_videos', methods=['POST'])
 def find_similar_videos():
    if 'file' not in request.files:
        return jsonify({'error': 'No file provided'}), 400
    file = request.files['file']
    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400
    if file:
        filename = secure_filename(file.filename)
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)
        try:
            # Generate embedding for the uploaded file
            query_embedding = generate_video_embedding(filepath)
            if query_embedding is None:
                return jsonify({'error': 'Failed to generate embedding for the uploaded file'}), 500
            # Get all videos from database
            all_videos = get_all_videos()
            if not all_videos:
                return jsonify({'error': 'No videos in database to compare'}), 404
            database_vectors = np.array([video['embedding'] for video in all_videos if video['embedding'] is not None])
            if database_vectors.size == 0:
                return jsonify({'error': 'No embeddings found in the database'}), 404
            # Perform similarity search
            top_indices, similarities = cosine_similarity_search(query_embedding, database_vectors, top_k=5)
            similar_videos = [
                {
                    'id': all_videos[i]['id'],
                    'filename': all_videos[i]['filename'],
                    'similarity': float(similarities[j])
                }
                for j, i in enumerate(top_indices)
            ]
            return jsonify(similar_videos)
        except Exception as e:
            return jsonify({'error': f'An error occurred during processing: {str(e)}'}), 500
        finally:
            # Clean up temporary file
            os.remove(filepath)
    return jsonify({'error': 'Invalid file'}), 400
@app.route('/api/vectorized_videos', methods=['GET'])
 def get_vectorized_videos():
    videos = get_all_videos()  # Implement this function in your database.py
    return jsonify([{
        'id': video['id'],
        'filename': video['filename'],
        'vectorized': video['embedding'] is not None
    } for video in videos])
@app.route('/api/vectorize/<int:video_id>', methods=['POST'])
 def vectorize_video(video_id):
    video = get_video_by_id(video_id)  # Implement this function in your database.py
    if not video:
        return jsonify({'error': 'Video not found'}), 404
    embedding = generate_video_embedding(video['filepath'])
    update_video_embedding(video_id, embedding)  # Implement this function in your database.py
    return jsonify({'message': 'Video vectorized successfully'})
@app.route('/api/thumbnail/<int:video_id>')
 def get_thumbnail(video_id):
    video = get_video_by_id(video_id)
    if not video:
        return jsonify({'error': 'Video not found'}), 404
    cap = cv2.VideoCapture(video['filepath'])
    ret, frame = cap.read()
    cap.release()
    if not ret:
        return jsonify({'error': 'Failed to generate thumbnail'}), 500
    _, buffer = cv2.imencode('.jpg', frame)
    io_buf = BytesIO(buffer)
    io_buf.seek(0)
    return send_file(io_buf, mimetype='image/jpeg')
 # Serve React App
@app.route('/')
 def serve():
    return send_from_directory(app.static_folder, 'index.html')
@app.route('/<path:path>')
 def static_proxy(path):
    file_name = path.split('/')[-1]
    dir_name = os.path.join(app.static_folder, '/'.join(path.split('/')[:-1]))
    return send_from_directory(dir_name, file_name)
 if __name__ == '__main__':
    socketio.run(app, debug=True, port=5333)
--- a/backend/src/config.py
+++ b/backend/src/config.py
@ -0,0 +1,19 @@
 import os
 from dotenv import dotenv_values
 # Get the path to the directory this file is in
 BASEDIR = os.path.abspath(os.path.dirname(__file__))
 # Connect the path with your '.env' file name
 env_path = os.path.join(BASEDIR, '..', '.env')
 print(f"Looking for .env file at: {env_path}")
 # Load the .env file
 config = dotenv_values(env_path)
 # Get the environment variables
 OPENAI_API_KEY = config.get('OPENAI_API_KEY', 'default_key')
 AWS_REGION = config.get('AWS_REGION', 'default_region')
 print(f"OPENAI_API_KEY: {OPENAI_API_KEY}")
 print(f"AWS_REGION: {AWS_REGION}")
--- a/backend/src/database.py
+++ b/backend/src/database.py
@ -0,0 +1,103 @@
 import sqlite3
 import json
 import numpy as np
 import os
 DATABASE_PATH = 'videos.db'
 def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d
 def get_db_connection():
    conn = sqlite3.connect(DATABASE_PATH)
    conn.row_factory = dict_factory
    return conn
 def init_db():
    conn = get_db_connection()
    c = conn.cursor()
    # Check if the videos table exists
    c.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='videos'")
    table_exists = c.fetchone()
    if table_exists:
        # If the table exists, check if the filepath column exists
        c.execute("PRAGMA table_info(videos)")
        columns = [column['name'] for column in c.fetchall()]
        if 'filepath' not in columns:
            # Add the filepath column if it doesn't exist
            c.execute("ALTER TABLE videos ADD COLUMN filepath TEXT")
    else:
        # If the table doesn't exist, create it with all necessary columns
        c.execute('''CREATE TABLE videos
                     (id INTEGER PRIMARY KEY AUTOINCREMENT,
                      filename TEXT NOT NULL,
                      filepath TEXT NOT NULL,
                      embedding TEXT)''')
    conn.commit()
    conn.close()
 def insert_video(filename, filepath):
    conn = get_db_connection()
    c = conn.cursor()
    c.execute("INSERT INTO videos (filename, filepath) VALUES (?, ?)",
              (filename, filepath))
    video_id = c.lastrowid
    conn.commit()
    conn.close()
    return video_id
 def update_video_embedding(video_id, embedding):
    conn = get_db_connection()
    c = conn.cursor()
    embedding_json = json.dumps(embedding.flatten().tolist())
    c.execute("UPDATE videos SET embedding = ? WHERE id = ?",
              (embedding_json, video_id))
    conn.commit()
    conn.close()
 def get_all_videos():
    conn = get_db_connection()
    c = conn.cursor()
    c.execute("SELECT * FROM videos")
    videos = c.fetchall()
    conn.close()
    for video in videos:
        if video['embedding']:
            video['embedding'] = np.array(json.loads(video['embedding'])).reshape(1, -1)
    return videos
 def get_video_by_id(video_id):
    conn = get_db_connection()
    c = conn.cursor()
    c.execute("SELECT * FROM videos WHERE id = ?", (video_id,))
    video = c.fetchone()
    conn.close()
    if video and video['embedding']:
        video['embedding'] = np.array(json.loads(video['embedding']))
    return video
 def get_vectorized_videos():
    conn = get_db_connection()
    c = conn.cursor()
    c.execute("SELECT * FROM videos WHERE embedding IS NOT NULL")
    videos = c.fetchall()
    conn.close()
    for video in videos:
        video['embedding'] = np.array(json.loads(video['embedding']))
    return videos
 # Initialize the database when this module is imported
 init_db()
--- a/backend/src/description_generator.py
+++ b/backend/src/description_generator.py
@ -0,0 +1,142 @@
 import openai
 import boto3
 import ollama
 import litellm
 import json
 import base64
 from botocore.exceptions import ClientError
 from datetime import datetime
 from config import OPENAI_API_KEY, AWS_REGION
 openai.api_key = OPENAI_API_KEY
 class BedrockLanguageModel:
    def __init__(self, model_id, region=AWS_REGION):
        self.bedrock_client = boto3.client(
            service_name='bedrock-runtime',
            region_name=region
        )
        self.model_id = model_id
    def generate(self, prompt, image_path=None):
        print(f"DEBUG: Generating response for prompt: {prompt}")
        try:
            request_body = {
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 20000,
                "temperature": 0.7,
                "top_p": 0.9,
                "messages": [
                    {
                        "role": "user",
                        "content": []
                    }
                ]
            }
            if image_path:
                with open(image_path, "rb") as image_file:
                    base64_image = base64.b64encode(image_file.read()).decode('utf-8')
                request_body["messages"][0]["content"].append({
                    "type": "image",
                    "image": {
                        "format": "png",
                        "source": {
                            "bytes": base64_image
                        }
                    }
                })
            request_body["messages"][0]["content"].append({
                "type": "text",
                "text": prompt
            })
            json_payload = json.dumps(request_body)
            response = self.bedrock_client.invoke_model(
                modelId=self.model_id,
                contentType="application/json",
                accept="application/json",
                body=json_payload
            )
            response_body = json.loads(response['body'].read())
            print(f"DEBUG: Raw response from Bedrock: {response_body}")
            content_array = response_body.get('content', [])
            if content_array and isinstance(content_array, list):
                generated_text = content_array[0].get('text', '')
                if not generated_text:
                    print(f"WARNING: Generated text is empty. Full response: {response_body}")
                    return "No response generated"
                print(f"DEBUG: Generated text: {generated_text}")
                return generated_text
            else:
                print(f"WARNING: Unexpected response format. Full response: {response_body}")
                return "Unexpected response format"
        except ClientError as e:
            print(f"ERROR: Failed to generate text with Bedrock: {str(e)}")
            return f"Error: {str(e)}"
        except Exception as e:
            print(f"ERROR: An unexpected error occurred: {str(e)}")
            return f"Error: {str(e)}"
 # Initialize the Bedrock model
 bedrock_model = BedrockLanguageModel("anthropic.claude-3-sonnet-20240229-v1:0")
 def generate_temporal_description(events, model_type='gpt', model_name='gpt-3.5-turbo'):
    if not events:
        return None
    prompt = "Analyze the following sequence of events in a video segment, focusing on object detection, motion, and positioning. Pay special attention to any suspicious activities that might indicate theft:\n\n"
    for event in events:
        timestamp = f"{event['timestamp']:.2f}"
        if event['type'] == 'object_detected':
            prompt += f"- At {timestamp}s: {event['description']}\n"
        elif event['type'] == 'object_motion':
            prompt += f"- At {timestamp}s: {event['description']}\n"
        elif event['type'] in ['motion_detected', 'bright_scene', 'dark_scene', 'color_dominance']:
            prompt += f"- At {timestamp}s: {event['type']} - {event['description']}\n"
    prompt += "\nProvide a concise description of what's happening in this video segment, interpreting the events as if they might be showing security camera activity. Consider the following points:\n"
    prompt += "1. The number and types of objects (especially people) detected\n"
    prompt += "2. The movement and positioning of these objects over time\n"
    prompt += "3. Any suspicious patterns of movement or behavior\n"
    prompt += "4. Changes in lighting or scene composition that might be relevant\n"
    prompt += "Description:"
    try:
        if model_type == 'gpt':
            response = openai.ChatCompletion.create(
                model=model_name,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=150,
                n=1,
                stop=None,
                temperature=0.7,
            )
            return response.choices[0].message.content.strip()
        elif model_type == 'bedrock':
            return bedrock_model.generate(prompt)
        elif model_type == 'ollama':
            response = ollama.generate(model=model_name, prompt=prompt)
            return response['response'].strip()
        elif model_type == 'litellm':
            response = litellm.completion(
                model=model_name,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=150,
                temperature=0.7
            )
            return response.choices[0].message.content.strip()
        else:
            raise ValueError("Unsupported model type")
    except Exception as e:
        print(f"Error generating description: {str(e)}")
        return None
--- a/backend/src/event_detector.py
+++ b/backend/src/event_detector.py
@ -0,0 +1,117 @@
 import cv2
 import torch
 import numpy as np
 from PIL import Image
 # Load YOLOv5 model
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
 def detect_event(frame, timestamp, prev_frame=None, prev_objects=None):
    events = []
    objects = []
    # Convert frame to RGB (YOLOv5 expects RGB images)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Perform object detection
    results = model(rgb_frame)
    # Process results
    for *box, conf, cls in results.xyxy[0]:  # xyxy, confidence, class
        class_name = model.names[int(cls)]
        if conf > 0.5:  # Confidence threshold
            x1, y1, x2, y2 = map(int, box)
            center_x = (x1 + x2) // 2
            center_y = (y1 + y2) // 2
            obj = {
                'type': class_name,
                'confidence': conf.item(),
                'box': (x1, y1, x2, y2),
                'center': (center_x, center_y)
            }
            objects.append(obj)
            events.append({
                'type': 'object_detected',
                'description': f'{class_name} detected with confidence {conf:.2f} at position ({center_x}, {center_y})',
                'timestamp': timestamp,
                'object': obj
            })
    # Motion detection for objects
    if prev_objects is not None:
        for curr_obj in objects:
            for prev_obj in prev_objects:
                if curr_obj['type'] == prev_obj['type']:
                    dx = curr_obj['center'][0] - prev_obj['center'][0]
                    dy = curr_obj['center'][1] - prev_obj['center'][1]
                    distance = np.sqrt(dx**2 + dy**2)
                    if distance > 10:  # Threshold for significant motion
                        events.append({
                            'type': 'object_motion',
                            'description': f'{curr_obj["type"]} moved {distance:.2f} pixels',
                            'timestamp': timestamp,
                            'object': curr_obj,
                            'motion': (dx, dy)
                        })
    # Color dominance
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    dominant_color = np.argmax(np.mean(frame, axis=(0,1)))
    color_names = ['blue', 'green', 'red']
    events.append({
        'type': 'color_dominance',
        'description': f'Dominant color is {color_names[dominant_color]}',
        'timestamp': timestamp
    })
    # Brightness detection
    brightness = np.mean(gray)
    if brightness > 200:
        events.append({
            'type': 'bright_scene',
            'description': 'The scene is very bright',
            'timestamp': timestamp
        })
    elif brightness < 50:
        events.append({
            'type': 'dark_scene',
            'description': 'The scene is very dark',
            'timestamp': timestamp
        })
    # Overall motion detection
    if prev_frame is not None:
        frame_diff = cv2.absdiff(prev_frame, gray)
        if np.mean(frame_diff) > 30:
            events.append({
                'type': 'motion_detected',
                'description': 'Significant overall motion detected',
                'timestamp': timestamp
            })
    return events, objects
 def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = 0
    events = []
    prev_frame = None
    prev_objects = None
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1
        current_time = frame_count / fps
        frame_events, objects = detect_event(frame, current_time, prev_frame, prev_objects)
        events.extend(frame_events)
        prev_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        prev_objects = objects
    cap.release()
    return events
--- a/backend/src/models.py
+++ b/backend/src/models.py
@ -0,0 +1,8 @@
 from tensorflow.keras.applications.vgg16 import VGG16
 from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
 def load_vgg_model():
    return VGG16(weights='imagenet', include_top=False, pooling='avg')
 def load_mobilenet_model():
    return MobileNetV2(weights='imagenet', include_top=True)
--- a/backend/src/utils.py
+++ b/backend/src/utils.py
@ -0,0 +1,34 @@
 import cv2
 import numpy as np
 from tensorflow.keras.preprocessing.image import img_to_array
 from tensorflow.keras.applications.vgg16 import preprocess_input
 def preprocess_frame(frame, target_size=(224, 224)):
    frame = cv2.resize(frame, target_size)
    frame = img_to_array(frame)
    frame = np.expand_dims(frame, axis=0)
    frame = preprocess_input(frame)
    return frame
 def generate_video_embedding(video_path, model):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(preprocess_frame(frame))
        if len(frames) >= 16:  # Process 16 frames at a time
            break
    cap.release()
    if not frames:
        return None
    # Generate embeddings
    embeddings = model.predict(np.vstack(frames))
    # Average the embeddings
    avg_embedding = np.mean(embeddings, axis=0)
    return avg_embedding
--- a/backend/src/vector_search.py
+++ b/backend/src/vector_search.py
@ -0,0 +1,17 @@
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
 def cosine_similarity_search(query_vector, database_vectors, top_k=5):
    # Ensure query_vector is 2D
    if query_vector.ndim == 1:
        query_vector = query_vector.reshape(1, -1)
    # Ensure database_vectors is 2D
    if database_vectors.ndim == 1:
        database_vectors = database_vectors.reshape(1, -1)
    elif database_vectors.ndim > 2:
        database_vectors = database_vectors.reshape(database_vectors.shape[0], -1)
    similarities = cosine_similarity(query_vector, database_vectors)
    top_indices = np.argsort(similarities[0])[-top_k:][::-1]
    return top_indices, similarities[0][top_indices]
--- a/backend/src/video_embedding.py
+++ b/backend/src/video_embedding.py
@ -0,0 +1,32 @@
 import cv2
 import numpy as np
 from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
 from tensorflow.keras.preprocessing.image import img_to_array
 # Load VGG16 model
 vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
 def generate_video_embedding(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < 16:  # Process up to 16 frames
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frame = img_to_array(frame)
        frame = np.expand_dims(frame, axis=0)
        frame = preprocess_input(frame)
        frames.append(frame)
    cap.release()
    if not frames:
        return None
    # Generate embeddings
    embeddings = vgg_model.predict(np.vstack(frames))
    # Average the embeddings
    avg_embedding = np.mean(embeddings, axis=0)
    return avg_embedding
--- a/backend/src/video_processor.py
+++ b/backend/src/video_processor.py
@ -0,0 +1,38 @@
 import cv2
 import time
 from event_detector import detect_event
 from description_generator import generate_description
 from datetime import datetime
 def process_video_realtime(video_path, model_type='gpt', model_name='gpt-4o'):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = 0
    start_time = time.time()
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1
        current_time = start_time + (frame_count / fps)
        # Detect events
        events = detect_event(frame, current_time)
        # Generate description every 5 seconds
        if frame_count % int(fps * 5) == 0:
            description = generate_description(events, model_type, model_name)
            if description:
                print(f"At {datetime.fromtimestamp(current_time).strftime('%H:%M:%S')}:")
                print(description)
                print()
        # Real-time display (optional)
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
--- a/frontend/README.md
+++ b/frontend/README.md
--- a/frontend/package.json
+++ b/frontend/package.json
@ -0,0 +1,42 @@
 {
    "name": "chrowatch",
    "version": "0.1.0",
    "private": true,
    "dependencies": {
        "@testing-library/jest-dom": "^5.16.5",
        "@testing-library/react": "^13.4.0",
        "@testing-library/user-event": "^13.5.0",
        "axios": "^0.27.2",
        "hls.js": "^1.5.13",
        "react": "^18.2.0",
        "react-dom": "^18.2.0",
        "react-player": "^2.10.1",
        "react-scripts": "5.0.1",
        "socket.io-client": "^4.5.1",
        "web-vitals": "^2.1.4"
    },
    "scripts": {
        "start": "react-scripts start",
        "build": "react-scripts build",
        "test": "react-scripts test",
        "eject": "react-scripts eject"
    },
    "eslintConfig": {
        "extends": [
            "react-app",
            "react-app/jest"
        ]
    },
    "browserslist": {
        "production": [
            ">0.2%",
            "not dead",
            "not op_mini all"
        ],
        "development": [
            "last 1 chrome version",
            "last 1 firefox version",
            "last 1 safari version"
        ]
    }
 }
--- a/frontend/public/index.html
+++ b/frontend/public/index.html
@ -0,0 +1,45 @@
 <!DOCTYPE html>
 <html lang="en">
  <head>
    <meta charset="utf-8" />
    <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <meta name="theme-color" content="#000000" />
    <meta
      name="description"
      content="Chrolens - AI-powered video analysis and temporal description generation"
    />
    <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
    <!--
      manifest.json provides metadata used when your web app is installed on a
      user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
    -->
    <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
    <!--
      Notice the use of %PUBLIC_URL% in the tags above.
      It will be replaced with the URL of the `public` folder during the build.
      Only files inside the `public` folder can be referenced from the HTML.
    -->
    <title>Chrolens - Video Analysis Tool</title>
    <style>
      body {
        margin: 0;
        padding: 0;
        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
          'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
          sans-serif;
        -webkit-font-smoothing: antialiased;
        -moz-osx-font-smoothing: grayscale;
      }
    </style>
  </head>
  <body>
    <noscript>You need to enable JavaScript to run this app.</noscript>
    <div id="root"></div>
    <!--
      This div with id="root" is where your React app will be rendered.
      You can add webfonts, meta tags, or analytics to this file.
      The build step will place the bundled scripts into the <body> tag.
    -->
  </body>
 </html>
--- a/frontend/src/App.js
+++ b/frontend/src/App.js
@ -0,0 +1,52 @@
 // src/App.js
 import React, { useState } from 'react';
 import VideoUpload from './components/VideoUpload';
 import VideoPlayer from './components/VideoPlayer';
 import EventList from './components/EventList';
 import RTSPVideoAnalysis from './components/RTSPVideoAnalysis';
 import SimilarVideoSearch from './components/SimilarVideoSearch';
 import VectorizedVideos from './components/VectorizedVideos';
 function App() {
  const [videoFile, setVideoFile] = useState(null);
  const [events, setEvents] = useState([]);
  const [temporalDescriptions, setTemporalDescriptions] = useState([]);
  const [showRTSPAnalysis, setShowRTSPAnalysis] = useState(false);
  const [uploadedVideos, setUploadedVideos] = useState([]);
  const handleUploadSuccess = (data) => {
    setUploadedVideos(prevVideos => [...prevVideos, data.message]);
    // You might want to refresh the list of vectorized videos here
  };
  const handleAnalysisComplete = (data) => {
    setVideoFile(data.video_url); // Assuming the backend returns a URL to the processed video
    setEvents(data.events);
    setTemporalDescriptions(data.temporal_descriptions);
  };
  return (
    <div className="App">
      <h1>Video Analysis</h1>
      <button onClick={() => setShowRTSPAnalysis(!showRTSPAnalysis)}>
        {showRTSPAnalysis ? 'Show Video Upload' : 'Show RTSP Analysis'}
      </button>
      {showRTSPAnalysis ? (
        <RTSPVideoAnalysis />
      ) : (
        <>
          <VideoUpload 
            onUploadSuccess={handleUploadSuccess} 
            onAnalysisComplete={handleAnalysisComplete} 
          />
          {videoFile && <VideoPlayer videoFile={videoFile} />}
          <EventList events={events} temporalDescriptions={temporalDescriptions} />
          <SimilarVideoSearch />
          <VectorizedVideos videos={uploadedVideos} />
        </>
      )}
    </div>
  );
 }
 export default App;
--- a/frontend/src/components/EventList.js
+++ b/frontend/src/components/EventList.js
@ -0,0 +1,26 @@
 import React from 'react';
 function EventList({ events, temporalDescriptions }) {
  return (
    <div>
      <h2>Events</h2>
      <ul>
        {events && events.map((event, index) => (
          <li key={index}>
            {event.timestamp.toFixed(2)}s: {event.type} - {event.description}
          </li>
        ))}
      </ul>
      <h2>Temporal Descriptions</h2>
      <ul>
        {temporalDescriptions && temporalDescriptions.map((desc, index) => (
          <li key={index}>
            {desc.start_time.toFixed(2)}s - {desc.end_time.toFixed(2)}s: {desc.description}
          </li>
        ))}
      </ul>
    </div>
  );
 }
 export default EventList;
--- a/frontend/src/components/RTSPVideoAnalysis.js
+++ b/frontend/src/components/RTSPVideoAnalysis.js
@ -0,0 +1,108 @@
 import React, { useEffect, useState, useRef } from 'react';
 import Hls from 'hls.js';
 import io from 'socket.io-client';
 const RTSPVideoAnalysis = () => {
  const [rtspUrl, setRtspUrl] = useState('');
  const [isStreaming, setIsStreaming] = useState(false);
  const [events, setEvents] = useState([]);
  const [description, setDescription] = useState('');
  const [socket, setSocket] = useState(null);
  const videoRef = useRef(null);
  const hlsRef = useRef(null);
  useEffect(() => {
    const newSocket = io('http://localhost:5333');
    setSocket(newSocket);
    newSocket.on('analysis_result', (data) => {
      setEvents(data.events);
      if (data.description) {
        setDescription(data.description);
      }
    });
    return () => newSocket.close();
  }, []);
  const handleStartStream = () => {
    if (socket && rtspUrl) {
      socket.emit('start_rtsp_stream', { rtsp_url: rtspUrl }, (response) => {
        if (response && response.hls_url) {
          if (Hls.isSupported()) {
            const hls = new Hls({
              manifestLoadingTimeOut: 5000,
              manifestLoadingMaxRetry: Infinity,
              manifestLoadingRetryDelay: 500,
              levelLoadingTimeOut: 5000,
              levelLoadingMaxRetry: Infinity,
              levelLoadingRetryDelay: 500
            });
            hlsRef.current = hls;
            hls.loadSource(`http://localhost:5333${response.hls_url}`);
            hls.attachMedia(videoRef.current);
            hls.on(Hls.Events.MANIFEST_PARSED, () => {
              videoRef.current.play().catch(e => console.error("Error attempting to play:", e));
            });
          } else if (videoRef.current.canPlayType('application/vnd.apple.mpegurl')) {
            videoRef.current.src = `http://localhost:5333${response.hls_url}`;
            videoRef.current.play().catch(e => console.error("Error attempting to play:", e));
          }
          setIsStreaming(true);
        }
      });
    }
  };
  const handleStopStream = () => {
    if (socket) {
      socket.emit('stop_rtsp_stream');
      setIsStreaming(false);
      if (hlsRef.current) {
        hlsRef.current.destroy();
      }
      if (videoRef.current) {
        videoRef.current.src = '';
      }
    }
  };
  return (
    <div>
      <h2>RTSP Video Analysis</h2>
      <input
        type="text"
        value={rtspUrl}
        onChange={(e) => setRtspUrl(e.target.value)}
        placeholder="Enter RTSP URL"
      />
      <button onClick={handleStartStream} disabled={isStreaming}>
        Start Stream
      </button>
      <button onClick={handleStopStream} disabled={!isStreaming}>
        Stop Stream
      </button>
      <video 
        ref={videoRef} 
        controls 
        width="640" 
        height="360" 
        style={{ display: isStreaming ? 'block' : 'none' }}
      />
      <div>
        <h3>Detected Events:</h3>
        <ul>
          {events.map((event, index) => (
            <li key={index}>{event.type}: {event.description}</li>
          ))}
        </ul>
      </div>
      <div>
        <h3>Latest Description:</h3>
        <p>{description}</p>
      </div>
    </div>
  );
 };
 export default RTSPVideoAnalysis;
--- a/frontend/src/components/SimilarVideoSearch.js
+++ b/frontend/src/components/SimilarVideoSearch.js
@ -0,0 +1,54 @@
 import React, { useState } from 'react';
 import axios from 'axios';
 const SimilarVideoSearch = () => {
  const [file, setFile] = useState(null);
  const [similarVideos, setSimilarVideos] = useState([]);
  const handleFileChange = (e) => {
    setFile(e.target.files[0]);
  };
  const handleSubmit = async (e) => {
    e.preventDefault();
    if (!file) return;
    const formData = new FormData();
    formData.append('file', file);
    try {
      const response = await axios.post('http://localhost:5333/api/similar_videos', formData, {
        headers: {
          'Content-Type': 'multipart/form-data',
        },
      });
      setSimilarVideos(response.data);
    } catch (error) {
      console.error('Error finding similar videos:', error);
    }
  };
  return (
    <div>
      <h2>Find Similar Videos</h2>
      <form onSubmit={handleSubmit}>
        <input type="file" onChange={handleFileChange} accept="image/*,video/*" />
        <button type="submit">Find Similar Videos</button>
      </form>
      {similarVideos.length > 0 && (
        <div>
          <h3>Similar Videos:</h3>
          <ul>
            {similarVideos.map((video) => (
              <li key={video.id}>
                {video.filename} (Similarity: {video.similarity.toFixed(2)})
              </li>
            ))}
          </ul>
        </div>
      )}
    </div>
  );
 };
 export default SimilarVideoSearch;
--- a/frontend/src/components/UploadedVideosList.js
+++ b/frontend/src/components/UploadedVideosList.js
@ -0,0 +1,20 @@
 import React from 'react';
 const UploadedVideosList = ({ videos }) => {
  return (
    <div>
      <h3>Uploaded Videos</h3>
      {videos.length === 0 ? (
        <p>No videos uploaded yet.</p>
      ) : (
        <ul>
          {videos.map((video, index) => (
            <li key={index}>{video}</li>
          ))}
        </ul>
      )}
    </div>
  );
 };
 export default UploadedVideosList;
--- a/frontend/src/components/VectorizedVideos.js
+++ b/frontend/src/components/VectorizedVideos.js
@ -0,0 +1,21 @@
 // src/components/VectorizedVideos.js
 import React from 'react';
 const VectorizedVideos = ({ videos }) => {
  return (
    <div>
      <h2>Vectorized Videos</h2>
      {videos.length === 0 ? (
        <p>No videos have been vectorized yet.</p>
      ) : (
        <ul>
          {videos.map((video, index) => (
            <li key={index}>{video}</li>
          ))}
        </ul>
      )}
    </div>
  );
 };
 export default VectorizedVideos;
--- a/frontend/src/components/VideoPlayer.js
+++ b/frontend/src/components/VideoPlayer.js
@ -0,0 +1,16 @@
 import React from 'react';
 function VideoPlayer({ videoFile }) {
  const videoUrl = URL.createObjectURL(videoFile);
  return (
    <div>
      <video width="640" height="480" controls>
        <source src={videoUrl} type={videoFile.type} />
        Your browser does not support the video tag.
      </video>
    </div>
  );
 }
 export default VideoPlayer;
--- a/frontend/src/components/VideoUpload.js
+++ b/frontend/src/components/VideoUpload.js
@ -0,0 +1,76 @@
 // src/components/VideoUpload.js
 import React, { useState } from 'react';
 import axios from 'axios';
 const VideoUpload = ({ onUploadSuccess, onAnalysisComplete }) => {
  const [file, setFile] = useState(null);
  const [uploading, setUploading] = useState(false);
  const [error, setError] = useState(null);
  const handleFileChange = (e) => {
    setFile(e.target.files[0]);
    setError(null);
  };
  const handleUpload = async (action) => {
    if (!file) {
      setError('Please select a file to upload');
      return;
    }
    setUploading(true);
    setError(null);
    const formData = new FormData();
    formData.append('video', file);
    try {
      // First, upload the video
      const uploadResponse = await axios.post('/api/upload', formData, {
        headers: {
          'Content-Type': 'multipart/form-data',
        },
      });
      if (action === 'vectorize') {
        // If vectorizing, call the vectorize endpoint
        await axios.post(`/api/vectorize/${uploadResponse.data.video_id}`);
        onUploadSuccess(uploadResponse.data);
      } else if (action === 'analyze') {
        // If analyzing, call the analyze endpoint
        const analyzeResponse = await axios.post('/api/analyze', formData, {
          headers: {
            'Content-Type': 'multipart/form-data',
          },
        });
        onAnalysisComplete(analyzeResponse.data);
      }
      setUploading(false);
    } catch (error) {
      setUploading(false);
      setError('Error processing video: ' + (error.response?.data?.error || error.message));
    }
  };
  return (
    <div>
      <h2>Upload Video</h2>
      <input 
        type="file" 
        onChange={handleFileChange} 
        accept="video/*"
        disabled={uploading}
      />
      <button onClick={() => handleUpload('vectorize')} disabled={uploading || !file}>
        {uploading ? 'Processing...' : 'Upload and Vectorize'}
      </button>
      <button onClick={() => handleUpload('analyze')} disabled={uploading || !file}>
        {uploading ? 'Processing...' : 'Upload and Analyze'}
      </button>
      {error && <p style={{ color: 'red' }}>{error}</p>}
    </div>
  );
 };
 export default VideoUpload;
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@ -0,0 +1,100 @@
 /* Reset default styles */
 * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
  }
  /* Set base font and color */
  body {
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
      'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
      sans-serif;
    -webkit-font-smoothing: antialiased;
    -moz-osx-font-smoothing: grayscale;
    font-size: 16px;
    line-height: 1.5;
    color: #333;
    background-color: #f5f5f5;
  }
  /* Set up a container for your app content */
  .container {
    max-width: 1200px;
    margin: 0 auto;
    padding: 20px;
  }
  /* Basic heading styles */
  h1, h2, h3, h4, h5, h6 {
    margin-bottom: 0.5em;
    font-weight: 600;
  }
  h1 { font-size: 2.5em; }
  h2 { font-size: 2em; }
  h3 { font-size: 1.75em; }
  h4 { font-size: 1.5em; }
  h5 { font-size: 1.25em; }
  h6 { font-size: 1em; }
  /* Basic link styles */
  a {
    color: #0066cc;
    text-decoration: none;
  }
  a:hover {
    text-decoration: underline;
  }
  /* Basic button styles */
  button {
    cursor: pointer;
    padding: 10px 15px;
    font-size: 1em;
    border: none;
    border-radius: 4px;
    background-color: #0066cc;
    color: white;
    transition: background-color 0.3s ease;
  }
  button:hover {
    background-color: #0052a3;
  }
  button:disabled {
    background-color: #cccccc;
    cursor: not-allowed;
  }
  /* Basic form styles */
  input, textarea, select {
    width: 100%;
    padding: 10px;
    margin-bottom: 10px;
    border: 1px solid #ccc;
    border-radius: 4px;
    font-size: 1em;
  }
  /* Utility classes */
  .text-center { text-align: center; }
  .mt-1 { margin-top: 0.5rem; }
  .mt-2 { margin-top: 1rem; }
  .mt-3 { margin-top: 1.5rem; }
  .mb-1 { margin-bottom: 0.5rem; }
  .mb-2 { margin-bottom: 1rem; }
  .mb-3 { margin-bottom: 1.5rem; }
  /* Responsive design */
  @media (max-width: 768px) {
    body {
      font-size: 14px;
    }
    .container {
      padding: 10px;
    }
  }
--- a/frontend/src/index.js
+++ b/frontend/src/index.js
@ -0,0 +1,11 @@
 import React from 'react';
 import ReactDOM from 'react-dom/client';
 import './index.css';
 import App from './App';
 const root = ReactDOM.createRoot(document.getElementById('root'));
 root.render(
  <React.StrictMode>
    <App />
  </React.StrictMode>
 );