import pandas as pd import numpy as np # Set random seed for reproducibility np.random.seed(42) # Define probabilities for each stage transition stage_probabilities = { 'Lead': 0.25, 'Prospect': 0.20, 'Applied': 0.25, 'Admitted': 0.24, 'Deposited': 0.05, 'Enrolled': 0.01 } # Ensure the probabilities sum to 1 for initial stage assignment total_probability = sum(stage_probabilities.values()) stage_probabilities_normalized = {k: v / total_probability for k, v in stage_probabilities.items()} # Generate synthetic data for enrollment funnel with student IDs num_students = 30000 stages = ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled'] majors = ['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences'] enrollment_funnel = { 'Student_ID': [f'S{1000+i}' for i in range(num_students)], 'Stage': np.random.choice(list(stage_probabilities_normalized.keys()), size=num_students, p=list(stage_probabilities_normalized.values())), 'Major': np.random.choice(majors, size=num_students), 'Average GPA': np.random.uniform(2.5, 4.0, size=num_students), 'Average SAT Score': np.random.randint(900, 1600, size=num_students), 'Average ACT Score': np.random.randint(18, 36, size=num_students), 'FAFSA Submitted (%)': np.random.uniform(40, 90, size=num_students) } # Create DataFrame df_enrollment_funnel = pd.DataFrame(enrollment_funnel) # Function to simulate student progression through stages def simulate_progression(df): progression = df.copy() for stage in stages[1:]: previous_stage = stages[stages.index(stage) - 1] transition_prob = stage_probabilities.get(stage, 0.5) in_previous_stage = progression['Stage'] == previous_stage progressed = in_previous_stage & (np.random.rand(len(progression)) < transition_prob) progression.loc[progressed, 'Stage'] = stage return progression # Simulate student progression through stages df_enrollment_funnel = simulate_progression(df_enrollment_funnel) # Filter students who reached the 'Deposited' stage but did not enroll deposited_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Deposited'] enrolled_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Enrolled'] # Identify melted students (Deposited but not Enrolled) melted_students = deposited_students[~deposited_students['Student_ID'].isin(enrolled_students['Student_ID'])] # Calculate melt rate total_deposited = len(deposited_students) total_melted = len(melted_students) melt_rate = (total_melted / total_deposited) * 100 if total_deposited > 0 else 0 print(f"Total Deposited Students: {total_deposited}") print(f"Total Melted Students: {total_melted}") print(f"Melt Rate: {melt_rate:.2f}%") # Save melted students to CSV for further analysis melted_students.to_csv('melted_students.csv', index=False) print("Melt rate calculated and melted students identified.")