74 lines
3.2 KiB
Python
74 lines
3.2 KiB
Python
|
import pandas as pd
|
|||
|
import numpy as np
|
|||
|
from faker import Faker
|
|||
|
|
|||
|
# Initialize Faker for generating fake data
|
|||
|
fake = Faker()
|
|||
|
|
|||
|
# Set random seed for reproducibility
|
|||
|
np.random.seed(42)
|
|||
|
|
|||
|
# Generate synthetic data for small private universities
|
|||
|
private_universities = {
|
|||
|
'University': [f'Private University {i}' for i in range(1, 21)],
|
|||
|
'Funding from Tuition and Donations (%)': [100] * 20,
|
|||
|
'Board Size': np.random.randint(10, 51, size=20),
|
|||
|
'President Reports To': ['Board of Trustees'] * 20,
|
|||
|
'Enrollment': np.random.randint(1000, 20001, size=20),
|
|||
|
'Average Class Size': np.random.randint(10, 30, size=20),
|
|||
|
'Student-Faculty Ratio': np.round(np.random.uniform(8, 15, size=20), 1),
|
|||
|
'Endowment per Student ($)': np.random.randint(10000, 100000, size=20),
|
|||
|
'Retention Rate (%)': np.round(np.random.uniform(70, 95, size=20), 1)
|
|||
|
}
|
|||
|
|
|||
|
# Generate synthetic data for public universities
|
|||
|
public_universities = {
|
|||
|
'University': [f'Public University {i}' for i in range(1, 21)],
|
|||
|
'Funding from State (%)': np.random.randint(20, 51, size=20),
|
|||
|
'Board Size': np.random.randint(9, 16, size=20),
|
|||
|
'President Reports To': ['State’s Regents'] * 20,
|
|||
|
'Enrollment': np.random.randint(20000, 70001, size=20),
|
|||
|
'State Funding Stability': np.random.choice(['Stable', 'Variable'], size=20),
|
|||
|
'Average Class Size': np.random.randint(30, 50, size=20),
|
|||
|
'Student-Faculty Ratio': np.round(np.random.uniform(15, 25, size=20), 1),
|
|||
|
'Endowment per Student ($)': np.random.randint(5000, 50000, size=20),
|
|||
|
'Retention Rate (%)': np.round(np.random.uniform(60, 85, size=20), 1)
|
|||
|
}
|
|||
|
|
|||
|
# Generate synthetic data for enrollment management
|
|||
|
enrollment_management = {
|
|||
|
'Term': ['Fall', 'Spring', 'Summer'] * 20,
|
|||
|
'Cohort Size': np.random.randint(50, 500, size=60),
|
|||
|
'Financial Aid Packages': np.random.randint(5000, 25000, size=60),
|
|||
|
'Discount Rate (%)': np.random.uniform(30, 60, size=60),
|
|||
|
'First-time Freshmen (%)': np.random.uniform(40, 70, size=60),
|
|||
|
'Transfer Students (%)': np.random.uniform(10, 30, size=60),
|
|||
|
'Graduate Students (%)': np.random.uniform(5, 20, size=60),
|
|||
|
'Audit Students (%)': np.random.uniform(1, 5, size=60)
|
|||
|
}
|
|||
|
|
|||
|
# Generate synthetic data for enrollment funnel
|
|||
|
enrollment_funnel = {
|
|||
|
'Stage': ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled'],
|
|||
|
'Number of Students': np.random.randint(100, 5000, size=6),
|
|||
|
'Major': np.random.choice(['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences'], size=6),
|
|||
|
'Average GPA': np.random.uniform(2.5, 4.0, size=6),
|
|||
|
'Average SAT Score': np.random.randint(900, 1600, size=6),
|
|||
|
'Average ACT Score': np.random.randint(18, 36, size=6),
|
|||
|
'FAFSA Submitted (%)': np.random.uniform(40, 90, size=6)
|
|||
|
}
|
|||
|
|
|||
|
# Create DataFrames
|
|||
|
df_private = pd.DataFrame(private_universities)
|
|||
|
df_public = pd.DataFrame(public_universities)
|
|||
|
df_enrollment_management = pd.DataFrame(enrollment_management)
|
|||
|
df_enrollment_funnel = pd.DataFrame(enrollment_funnel)
|
|||
|
|
|||
|
# Save to CSV files
|
|||
|
df_private.to_csv('private_universities.csv', index=False)
|
|||
|
df_public.to_csv('public_universities.csv', index=False)
|
|||
|
df_enrollment_management.to_csv('enrollment_management.csv', index=False)
|
|||
|
df_enrollment_funnel.to_csv('enrollment_funnel.csv', index=False)
|
|||
|
|
|||
|
print("Synthetic data generated and saved to CSV files.")
|