university-data/discount-rate.py

import random
import pandas as pd
from faker import Faker

fake = Faker()

def generate_student_data(num_students):
    data = []

    for _ in range(num_students):
        # Generate basic student data
        student_id = fake.unique.uuid4()
        student_type = random.choice(['First-time Freshmen', 'Transfer Student', 'Graduate Student', 'Audit Student'])
        major = fake.word().capitalize()
        gpa = round(random.uniform(2.0, 4.0), 2)
        sat_score = random.randint(800, 1600)
        act_score = random.randint(1, 36)
        state = fake.state()
        zip_code = fake.zipcode()
        country = fake.country()
        first_generation = random.choice([True, False])
        need_rank = random.randint(1, 6)
        efc = random.randint(0, 100000)  # Expected Family Contribution
        financial_aid_package = random.randint(1000, 50000)  # Financial aid package amount

        # Published tuition price for calculation of discount rate
        published_tuition_price = random.randint(30000, 70000)
        discount_rate = round((financial_aid_package / published_tuition_price) * 100, 2)

        # Enrollment funnel stages
        funnel_stage = random.choice(['Lead', 'Prospect', 'Applied', 'Admitted', 'Waitlisted', 'Denied', 'Deposited', 'Enrolled'])

        # Generate synthetic data point
        student_data = {
            'Student ID': student_id,
            'Student Type': student_type,
            'Major': major,
            'GPA': gpa,
            'SAT Score': sat_score,
            'ACT Score': act_score,
            'State': state,
            'Zip Code': zip_code,
            'Country': country,
            'First Generation': first_generation,
            'Need Rank': need_rank,
            'EFC': efc,
            'Financial Aid Package': financial_aid_package,
            'Published Tuition Price': published_tuition_price,
            'Discount Rate': discount_rate,
            'Funnel Stage': funnel_stage
        }

        data.append(student_data)

    return pd.DataFrame(data)

# Generate synthetic data for 1000 students
num_students = 1000
student_df = generate_student_data(num_students)

# Save the data to a CSV file
student_df.to_csv('synthetic_student_data.csv', index=False)

# Display the first few rows of the generated data
print(student_df.head())
initial commit 2024-06-10 18:29:30 -04:00			`import random`
			`import pandas as pd`
			`from faker import Faker`

			`fake = Faker()`

			`def generate_student_data(num_students):`
			`data = []`

			`for _ in range(num_students):`
			`# Generate basic student data`
			`student_id = fake.unique.uuid4()`
			`student_type = random.choice(['First-time Freshmen', 'Transfer Student', 'Graduate Student', 'Audit Student'])`
			`major = fake.word().capitalize()`
			`gpa = round(random.uniform(2.0, 4.0), 2)`
			`sat_score = random.randint(800, 1600)`
			`act_score = random.randint(1, 36)`
			`state = fake.state()`
			`zip_code = fake.zipcode()`
			`country = fake.country()`
			`first_generation = random.choice([True, False])`
			`need_rank = random.randint(1, 6)`
			`efc = random.randint(0, 100000) # Expected Family Contribution`
			`financial_aid_package = random.randint(1000, 50000) # Financial aid package amount`

			`# Published tuition price for calculation of discount rate`
			`published_tuition_price = random.randint(30000, 70000)`
			`discount_rate = round((financial_aid_package / published_tuition_price) * 100, 2)`

			`# Enrollment funnel stages`
			`funnel_stage = random.choice(['Lead', 'Prospect', 'Applied', 'Admitted', 'Waitlisted', 'Denied', 'Deposited', 'Enrolled'])`

			`# Generate synthetic data point`
			`student_data = {`
			`'Student ID': student_id,`
			`'Student Type': student_type,`
			`'Major': major,`
			`'GPA': gpa,`
			`'SAT Score': sat_score,`
			`'ACT Score': act_score,`
			`'State': state,`
			`'Zip Code': zip_code,`
			`'Country': country,`
			`'First Generation': first_generation,`
			`'Need Rank': need_rank,`
			`'EFC': efc,`
			`'Financial Aid Package': financial_aid_package,`
			`'Published Tuition Price': published_tuition_price,`
			`'Discount Rate': discount_rate,`
			`'Funnel Stage': funnel_stage`
			`}`

			`data.append(student_data)`

			`return pd.DataFrame(data)`

			`# Generate synthetic data for 1000 students`
			`num_students = 1000`
			`student_df = generate_student_data(num_students)`

			`# Save the data to a CSV file`
			`student_df.to_csv('synthetic_student_data.csv', index=False)`

			`# Display the first few rows of the generated data`
			`print(student_df.head())`