73 lines
2.9 KiB
Python
73 lines
2.9 KiB
Python
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
|
||
|
# Set random seed for reproducibility
|
||
|
np.random.seed(42)
|
||
|
|
||
|
# Define probabilities for each stage transition
|
||
|
stage_probabilities = {
|
||
|
'Lead': 0.25,
|
||
|
'Prospect': 0.20,
|
||
|
'Applied': 0.25,
|
||
|
'Admitted': 0.24,
|
||
|
'Deposited': 0.05,
|
||
|
'Enrolled': 0.01
|
||
|
}
|
||
|
|
||
|
# Ensure the probabilities sum to 1 for initial stage assignment
|
||
|
total_probability = sum(stage_probabilities.values())
|
||
|
stage_probabilities_normalized = {k: v / total_probability for k, v in stage_probabilities.items()}
|
||
|
|
||
|
# Generate synthetic data for enrollment funnel with student IDs
|
||
|
num_students = 30000
|
||
|
stages = ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled']
|
||
|
majors = ['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences']
|
||
|
enrollment_funnel = {
|
||
|
'Student_ID': [f'S{1000+i}' for i in range(num_students)],
|
||
|
'Stage': np.random.choice(list(stage_probabilities_normalized.keys()), size=num_students,
|
||
|
p=list(stage_probabilities_normalized.values())),
|
||
|
'Major': np.random.choice(majors, size=num_students),
|
||
|
'Average GPA': np.random.uniform(2.5, 4.0, size=num_students),
|
||
|
'Average SAT Score': np.random.randint(900, 1600, size=num_students),
|
||
|
'Average ACT Score': np.random.randint(18, 36, size=num_students),
|
||
|
'FAFSA Submitted (%)': np.random.uniform(40, 90, size=num_students)
|
||
|
}
|
||
|
|
||
|
# Create DataFrame
|
||
|
df_enrollment_funnel = pd.DataFrame(enrollment_funnel)
|
||
|
|
||
|
# Function to simulate student progression through stages
|
||
|
def simulate_progression(df):
|
||
|
progression = df.copy()
|
||
|
for stage in stages[1:]:
|
||
|
previous_stage = stages[stages.index(stage) - 1]
|
||
|
transition_prob = stage_probabilities.get(stage, 0.5)
|
||
|
in_previous_stage = progression['Stage'] == previous_stage
|
||
|
progressed = in_previous_stage & (np.random.rand(len(progression)) < transition_prob)
|
||
|
progression.loc[progressed, 'Stage'] = stage
|
||
|
return progression
|
||
|
|
||
|
# Simulate student progression through stages
|
||
|
df_enrollment_funnel = simulate_progression(df_enrollment_funnel)
|
||
|
|
||
|
# Filter students who reached the 'Deposited' stage but did not enroll
|
||
|
deposited_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Deposited']
|
||
|
enrolled_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Enrolled']
|
||
|
|
||
|
# Identify melted students (Deposited but not Enrolled)
|
||
|
melted_students = deposited_students[~deposited_students['Student_ID'].isin(enrolled_students['Student_ID'])]
|
||
|
|
||
|
# Calculate melt rate
|
||
|
total_deposited = len(deposited_students)
|
||
|
total_melted = len(melted_students)
|
||
|
melt_rate = (total_melted / total_deposited) * 100 if total_deposited > 0 else 0
|
||
|
|
||
|
print(f"Total Deposited Students: {total_deposited}")
|
||
|
print(f"Total Melted Students: {total_melted}")
|
||
|
print(f"Melt Rate: {melt_rate:.2f}%")
|
||
|
|
||
|
# Save melted students to CSV for further analysis
|
||
|
melted_students.to_csv('melted_students.csv', index=False)
|
||
|
|
||
|
print("Melt rate calculated and melted students identified.")
|