university-data/melt-rate.py
Mahesh Kommareddi ee58da0890 initial commit
2024-06-10 18:29:30 -04:00

73 lines
2.9 KiB
Python

import pandas as pd
import numpy as np
# Set random seed for reproducibility
np.random.seed(42)
# Define probabilities for each stage transition
stage_probabilities = {
'Lead': 0.25,
'Prospect': 0.20,
'Applied': 0.25,
'Admitted': 0.24,
'Deposited': 0.05,
'Enrolled': 0.01
}
# Ensure the probabilities sum to 1 for initial stage assignment
total_probability = sum(stage_probabilities.values())
stage_probabilities_normalized = {k: v / total_probability for k, v in stage_probabilities.items()}
# Generate synthetic data for enrollment funnel with student IDs
num_students = 30000
stages = ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled']
majors = ['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences']
enrollment_funnel = {
'Student_ID': [f'S{1000+i}' for i in range(num_students)],
'Stage': np.random.choice(list(stage_probabilities_normalized.keys()), size=num_students,
p=list(stage_probabilities_normalized.values())),
'Major': np.random.choice(majors, size=num_students),
'Average GPA': np.random.uniform(2.5, 4.0, size=num_students),
'Average SAT Score': np.random.randint(900, 1600, size=num_students),
'Average ACT Score': np.random.randint(18, 36, size=num_students),
'FAFSA Submitted (%)': np.random.uniform(40, 90, size=num_students)
}
# Create DataFrame
df_enrollment_funnel = pd.DataFrame(enrollment_funnel)
# Function to simulate student progression through stages
def simulate_progression(df):
progression = df.copy()
for stage in stages[1:]:
previous_stage = stages[stages.index(stage) - 1]
transition_prob = stage_probabilities.get(stage, 0.5)
in_previous_stage = progression['Stage'] == previous_stage
progressed = in_previous_stage & (np.random.rand(len(progression)) < transition_prob)
progression.loc[progressed, 'Stage'] = stage
return progression
# Simulate student progression through stages
df_enrollment_funnel = simulate_progression(df_enrollment_funnel)
# Filter students who reached the 'Deposited' stage but did not enroll
deposited_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Deposited']
enrolled_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Enrolled']
# Identify melted students (Deposited but not Enrolled)
melted_students = deposited_students[~deposited_students['Student_ID'].isin(enrolled_students['Student_ID'])]
# Calculate melt rate
total_deposited = len(deposited_students)
total_melted = len(melted_students)
melt_rate = (total_melted / total_deposited) * 100 if total_deposited > 0 else 0
print(f"Total Deposited Students: {total_deposited}")
print(f"Total Melted Students: {total_melted}")
print(f"Melt Rate: {melt_rate:.2f}%")
# Save melted students to CSV for further analysis
melted_students.to_csv('melted_students.csv', index=False)
print("Melt rate calculated and melted students identified.")