import pandas as pd import numpy as np from faker import Faker # Initialize Faker for generating fake data fake = Faker() # Set random seed for reproducibility np.random.seed(42) # Generate synthetic data for small private universities private_universities = { 'University': [f'Private University {i}' for i in range(1, 21)], 'Funding from Tuition and Donations (%)': [100] * 20, 'Board Size': np.random.randint(10, 51, size=20), 'President Reports To': ['Board of Trustees'] * 20, 'Enrollment': np.random.randint(1000, 20001, size=20), 'Average Class Size': np.random.randint(10, 30, size=20), 'Student-Faculty Ratio': np.round(np.random.uniform(8, 15, size=20), 1), 'Endowment per Student ($)': np.random.randint(10000, 100000, size=20), 'Retention Rate (%)': np.round(np.random.uniform(70, 95, size=20), 1) } # Generate synthetic data for public universities public_universities = { 'University': [f'Public University {i}' for i in range(1, 21)], 'Funding from State (%)': np.random.randint(20, 51, size=20), 'Board Size': np.random.randint(9, 16, size=20), 'President Reports To': ['State’s Regents'] * 20, 'Enrollment': np.random.randint(20000, 70001, size=20), 'State Funding Stability': np.random.choice(['Stable', 'Variable'], size=20), 'Average Class Size': np.random.randint(30, 50, size=20), 'Student-Faculty Ratio': np.round(np.random.uniform(15, 25, size=20), 1), 'Endowment per Student ($)': np.random.randint(5000, 50000, size=20), 'Retention Rate (%)': np.round(np.random.uniform(60, 85, size=20), 1) } # Generate synthetic data for enrollment management enrollment_management = { 'Term': ['Fall', 'Spring', 'Summer'] * 20, 'Cohort Size': np.random.randint(50, 500, size=60), 'Financial Aid Packages': np.random.randint(5000, 25000, size=60), 'Discount Rate (%)': np.random.uniform(30, 60, size=60), 'First-time Freshmen (%)': np.random.uniform(40, 70, size=60), 'Transfer Students (%)': np.random.uniform(10, 30, size=60), 'Graduate Students (%)': np.random.uniform(5, 20, size=60), 'Audit Students (%)': np.random.uniform(1, 5, size=60) } # Generate synthetic data for enrollment funnel enrollment_funnel = { 'Stage': ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled'], 'Number of Students': np.random.randint(100, 5000, size=6), 'Major': np.random.choice(['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences'], size=6), 'Average GPA': np.random.uniform(2.5, 4.0, size=6), 'Average SAT Score': np.random.randint(900, 1600, size=6), 'Average ACT Score': np.random.randint(18, 36, size=6), 'FAFSA Submitted (%)': np.random.uniform(40, 90, size=6) } # Create DataFrames df_private = pd.DataFrame(private_universities) df_public = pd.DataFrame(public_universities) df_enrollment_management = pd.DataFrame(enrollment_management) df_enrollment_funnel = pd.DataFrame(enrollment_funnel) # Save to CSV files df_private.to_csv('private_universities.csv', index=False) df_public.to_csv('public_universities.csv', index=False) df_enrollment_management.to_csv('enrollment_management.csv', index=False) df_enrollment_funnel.to_csv('enrollment_funnel.csv', index=False) print("Synthetic data generated and saved to CSV files.")