university-data/small-university.py

74 lines
3.2 KiB
Python
Raw Normal View History

2024-06-10 18:29:30 -04:00
import pandas as pd
import numpy as np
from faker import Faker
# Initialize Faker for generating fake data
fake = Faker()
# Set random seed for reproducibility
np.random.seed(42)
# Generate synthetic data for small private universities
private_universities = {
'University': [f'Private University {i}' for i in range(1, 21)],
'Funding from Tuition and Donations (%)': [100] * 20,
'Board Size': np.random.randint(10, 51, size=20),
'President Reports To': ['Board of Trustees'] * 20,
'Enrollment': np.random.randint(1000, 20001, size=20),
'Average Class Size': np.random.randint(10, 30, size=20),
'Student-Faculty Ratio': np.round(np.random.uniform(8, 15, size=20), 1),
'Endowment per Student ($)': np.random.randint(10000, 100000, size=20),
'Retention Rate (%)': np.round(np.random.uniform(70, 95, size=20), 1)
}
# Generate synthetic data for public universities
public_universities = {
'University': [f'Public University {i}' for i in range(1, 21)],
'Funding from State (%)': np.random.randint(20, 51, size=20),
'Board Size': np.random.randint(9, 16, size=20),
'President Reports To': ['States Regents'] * 20,
'Enrollment': np.random.randint(20000, 70001, size=20),
'State Funding Stability': np.random.choice(['Stable', 'Variable'], size=20),
'Average Class Size': np.random.randint(30, 50, size=20),
'Student-Faculty Ratio': np.round(np.random.uniform(15, 25, size=20), 1),
'Endowment per Student ($)': np.random.randint(5000, 50000, size=20),
'Retention Rate (%)': np.round(np.random.uniform(60, 85, size=20), 1)
}
# Generate synthetic data for enrollment management
enrollment_management = {
'Term': ['Fall', 'Spring', 'Summer'] * 20,
'Cohort Size': np.random.randint(50, 500, size=60),
'Financial Aid Packages': np.random.randint(5000, 25000, size=60),
'Discount Rate (%)': np.random.uniform(30, 60, size=60),
'First-time Freshmen (%)': np.random.uniform(40, 70, size=60),
'Transfer Students (%)': np.random.uniform(10, 30, size=60),
'Graduate Students (%)': np.random.uniform(5, 20, size=60),
'Audit Students (%)': np.random.uniform(1, 5, size=60)
}
# Generate synthetic data for enrollment funnel
enrollment_funnel = {
'Stage': ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled'],
'Number of Students': np.random.randint(100, 5000, size=6),
'Major': np.random.choice(['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences'], size=6),
'Average GPA': np.random.uniform(2.5, 4.0, size=6),
'Average SAT Score': np.random.randint(900, 1600, size=6),
'Average ACT Score': np.random.randint(18, 36, size=6),
'FAFSA Submitted (%)': np.random.uniform(40, 90, size=6)
}
# Create DataFrames
df_private = pd.DataFrame(private_universities)
df_public = pd.DataFrame(public_universities)
df_enrollment_management = pd.DataFrame(enrollment_management)
df_enrollment_funnel = pd.DataFrame(enrollment_funnel)
# Save to CSV files
df_private.to_csv('private_universities.csv', index=False)
df_public.to_csv('public_universities.csv', index=False)
df_enrollment_management.to_csv('enrollment_management.csv', index=False)
df_enrollment_funnel.to_csv('enrollment_funnel.csv', index=False)
print("Synthetic data generated and saved to CSV files.")