university-data/small-university.py
Mahesh Kommareddi ee58da0890 initial commit
2024-06-10 18:29:30 -04:00

74 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
from faker import Faker
# Initialize Faker for generating fake data
fake = Faker()
# Set random seed for reproducibility
np.random.seed(42)
# Generate synthetic data for small private universities
private_universities = {
'University': [f'Private University {i}' for i in range(1, 21)],
'Funding from Tuition and Donations (%)': [100] * 20,
'Board Size': np.random.randint(10, 51, size=20),
'President Reports To': ['Board of Trustees'] * 20,
'Enrollment': np.random.randint(1000, 20001, size=20),
'Average Class Size': np.random.randint(10, 30, size=20),
'Student-Faculty Ratio': np.round(np.random.uniform(8, 15, size=20), 1),
'Endowment per Student ($)': np.random.randint(10000, 100000, size=20),
'Retention Rate (%)': np.round(np.random.uniform(70, 95, size=20), 1)
}
# Generate synthetic data for public universities
public_universities = {
'University': [f'Public University {i}' for i in range(1, 21)],
'Funding from State (%)': np.random.randint(20, 51, size=20),
'Board Size': np.random.randint(9, 16, size=20),
'President Reports To': ['States Regents'] * 20,
'Enrollment': np.random.randint(20000, 70001, size=20),
'State Funding Stability': np.random.choice(['Stable', 'Variable'], size=20),
'Average Class Size': np.random.randint(30, 50, size=20),
'Student-Faculty Ratio': np.round(np.random.uniform(15, 25, size=20), 1),
'Endowment per Student ($)': np.random.randint(5000, 50000, size=20),
'Retention Rate (%)': np.round(np.random.uniform(60, 85, size=20), 1)
}
# Generate synthetic data for enrollment management
enrollment_management = {
'Term': ['Fall', 'Spring', 'Summer'] * 20,
'Cohort Size': np.random.randint(50, 500, size=60),
'Financial Aid Packages': np.random.randint(5000, 25000, size=60),
'Discount Rate (%)': np.random.uniform(30, 60, size=60),
'First-time Freshmen (%)': np.random.uniform(40, 70, size=60),
'Transfer Students (%)': np.random.uniform(10, 30, size=60),
'Graduate Students (%)': np.random.uniform(5, 20, size=60),
'Audit Students (%)': np.random.uniform(1, 5, size=60)
}
# Generate synthetic data for enrollment funnel
enrollment_funnel = {
'Stage': ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled'],
'Number of Students': np.random.randint(100, 5000, size=6),
'Major': np.random.choice(['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences'], size=6),
'Average GPA': np.random.uniform(2.5, 4.0, size=6),
'Average SAT Score': np.random.randint(900, 1600, size=6),
'Average ACT Score': np.random.randint(18, 36, size=6),
'FAFSA Submitted (%)': np.random.uniform(40, 90, size=6)
}
# Create DataFrames
df_private = pd.DataFrame(private_universities)
df_public = pd.DataFrame(public_universities)
df_enrollment_management = pd.DataFrame(enrollment_management)
df_enrollment_funnel = pd.DataFrame(enrollment_funnel)
# Save to CSV files
df_private.to_csv('private_universities.csv', index=False)
df_public.to_csv('public_universities.csv', index=False)
df_enrollment_management.to_csv('enrollment_management.csv', index=False)
df_enrollment_funnel.to_csv('enrollment_funnel.csv', index=False)
print("Synthetic data generated and saved to CSV files.")