initial commit

This commit is contained in:
Mahesh Kommareddi 2024-06-10 18:29:30 -04:00
commit ee58da0890
9 changed files with 3272 additions and 0 deletions

65
discount-rate.py Normal file
View File

@ -0,0 +1,65 @@
import random
import pandas as pd
from faker import Faker
fake = Faker()
def generate_student_data(num_students):
data = []
for _ in range(num_students):
# Generate basic student data
student_id = fake.unique.uuid4()
student_type = random.choice(['First-time Freshmen', 'Transfer Student', 'Graduate Student', 'Audit Student'])
major = fake.word().capitalize()
gpa = round(random.uniform(2.0, 4.0), 2)
sat_score = random.randint(800, 1600)
act_score = random.randint(1, 36)
state = fake.state()
zip_code = fake.zipcode()
country = fake.country()
first_generation = random.choice([True, False])
need_rank = random.randint(1, 6)
efc = random.randint(0, 100000) # Expected Family Contribution
financial_aid_package = random.randint(1000, 50000) # Financial aid package amount
# Published tuition price for calculation of discount rate
published_tuition_price = random.randint(30000, 70000)
discount_rate = round((financial_aid_package / published_tuition_price) * 100, 2)
# Enrollment funnel stages
funnel_stage = random.choice(['Lead', 'Prospect', 'Applied', 'Admitted', 'Waitlisted', 'Denied', 'Deposited', 'Enrolled'])
# Generate synthetic data point
student_data = {
'Student ID': student_id,
'Student Type': student_type,
'Major': major,
'GPA': gpa,
'SAT Score': sat_score,
'ACT Score': act_score,
'State': state,
'Zip Code': zip_code,
'Country': country,
'First Generation': first_generation,
'Need Rank': need_rank,
'EFC': efc,
'Financial Aid Package': financial_aid_package,
'Published Tuition Price': published_tuition_price,
'Discount Rate': discount_rate,
'Funnel Stage': funnel_stage
}
data.append(student_data)
return pd.DataFrame(data)
# Generate synthetic data for 1000 students
num_students = 1000
student_df = generate_student_data(num_students)
# Save the data to a CSV file
student_df.to_csv('synthetic_student_data.csv', index=False)
# Display the first few rows of the generated data
print(student_df.head())

7
enrollment_funnel.csv Normal file
View File

@ -0,0 +1,7 @@
Stage,Number of Students,Major,Average GPA,Average SAT Score,Average ACT Score,FAFSA Submitted (%)
Lead,623,Health Sciences,3.5316770756441245,907,31,74.35827527016656
Prospect,2493,Engineering,3.267485520320892,1524,31,82.22203363168144
Applied,3217,Science,2.7354665240742904,1152,19,77.48081160042938
Admitted,2693,Arts,3.065928947863109,1591,30,41.523607636679706
Deposited,2068,Science,2.50389253657547,1202,31,83.36074905373616
Enrolled,1853,Science,3.8024516605533805,1538,24,57.707333539656275
1 Stage Number of Students Major Average GPA Average SAT Score Average ACT Score FAFSA Submitted (%)
2 Lead 623 Health Sciences 3.5316770756441245 907 31 74.35827527016656
3 Prospect 2493 Engineering 3.267485520320892 1524 31 82.22203363168144
4 Applied 3217 Science 2.7354665240742904 1152 19 77.48081160042938
5 Admitted 2693 Arts 3.065928947863109 1591 30 41.523607636679706
6 Deposited 2068 Science 2.50389253657547 1202 31 83.36074905373616
7 Enrolled 1853 Science 3.8024516605533805 1538 24 57.707333539656275

61
enrollment_management.csv Normal file
View File

@ -0,0 +1,61 @@
Term,Cohort Size,Financial Aid Packages,Discount Rate (%),First-time Freshmen (%),Transfer Students (%),Graduate Students (%),Audit Students (%)
Fall,441,6816,33.982362667289095,58.77819010304718,29.918624840606583,10.7630274706883,1.1875858711648797
Spring,184,5569,59.08610601342477,64.61280531313804,11.117423093547,16.05651058764529,2.0746899281012294
Summer,244,10442,51.43785312539856,59.544543155733635,24.740711135820177,18.728815125036597,1.0887389681212065
Fall,450,6895,31.232025503036272,46.20053079630236,20.91831540329978,19.380536111117692,2.9926607465835717
Spring,177,24117,41.96462704334384,48.218833819634014,24.11662967757389,5.867958466365611,2.9048427871560567
Summer,82,8863,43.005622127422264,46.437673591251226,29.3730388794827,10.917826979121948,4.325485960250004
Fall,225,17913,52.32127928997346,51.317772496839275,23.76060020350078,6.601340989038266,2.2311089590841977
Spring,492,12455,37.525815820399835,41.16889396930701,26.739277484747475,10.034900971954448,4.265542940852249
Summer,420,9014,35.5300102299411,58.547616530136786,27.337374106216828,7.5451985584334835,4.871895061759292
Fall,424,16093,32.42618899851593,50.096626446661794,26.7696152752797,14.702725425030858,1.3536328934625863
Spring,71,23070,42.84943424820323,59.67167906142227,18.52182950093882,10.824025759848084,4.167271357878532
Summer,287,8009,50.65499702296099,51.56189862896695,14.45152835142061,8.440921185574245,3.3598235885505443
Fall,207,21538,31.745807865253308,60.448410069183694,17.933032039085038,8.989068874463584,2.9201838680739005
Spring,87,15729,57.456411828794415,50.2187370839272,27.837938175535534,10.405176428079024,2.6821431146603003
Summer,279,6409,43.27056689193313,47.82083584707709,12.93209643384051,8.89924794281394,4.138673055777238
Fall,414,17249,37.1936207747221,54.881123628802186,20.266523373847857,11.798612702432465,3.5574454278969694
Spring,100,5784,32.816198702438754,60.78671076075818,14.664561448490556,5.484739266644757,4.2201786700774475
Summer,487,13096,35.48597991321922,50.45009813359592,21.626108344535645,9.196452607534185,4.612604234706505
Fall,313,12560,58.03841992019129,68.09944455567123,27.262770439780077,11.168100813082795,3.469054848389168
Spring,332,17533,49.14811781530051,41.175589800269385,27.607199372768335,14.041728230349477,4.9218509003695115
Summer,76,12343,45.500887722797,52.53838095146736,14.733703873641575,9.064364637726948,3.432351398167541
Fall,275,12206,49.713339855005,69.02741654560742,28.153874126970926,6.9977768783562455,3.5465772864891303
Spring,326,10801,43.07018696033673,56.439156497442625,21.837774776425686,6.143005693987448,3.2192624341689027
Summer,335,24190,51.90117949685455,52.704128269243554,17.00436493400627,19.107977407030752,1.3640083642053211
Fall,146,21921,31.431483830749464,57.05560851249875,24.163623911663997,11.249585866077188,3.905588130600913
Spring,333,10986,46.98111631482229,57.27773725774254,19.633339759282308,13.717439317712394,3.1897852275964462
Summer,416,23225,34.75939342927473,61.94943265898921,17.55975180244509,18.787648277533393,2.8036417896644177
Fall,497,15647,33.60493943416927,43.83069188267397,24.101687285933195,6.241226300529592,4.6418851136130925
Spring,366,13716,40.25639000149205,47.50049347648314,14.974483088605758,18.14992228888689,2.191837806167802
Summer,353,24334,32.75397197440326,57.416320370317294,16.60505058038197,13.27381808816105,3.0944091040600448
Fall,196,17323,32.8247096480568,66.01349821629167,18.689036889765426,7.472513917055014,3.7905674859738534
Spring,53,9780,39.34239928173883,56.85600076431321,15.073643477462774,11.168826748780535,4.185887102695509
Summer,84,7368,59.38531585864526,47.157905793710206,18.104041735628343,16.664034256028383,2.8373872317846898
Fall,241,17039,35.25990809668016,60.39534339700737,21.429460504100213,12.20555122799532,4.368365660074838
Spring,98,11655,30.514833054952508,62.19726281342123,24.819477218213365,19.779290759982565,4.07567096526159
Summer,66,13173,52.90093269011733,47.14708457211938,25.343897062171436,10.651084549069111,1.2649439114074634
Fall,221,9495,54.207389311523386,51.33186658528885,26.455807037533862,16.24367449964108,1.1834450656185318
Spring,269,15893,40.38912963268203,56.0298242059169,24.88423313886596,10.894841734682887,3.483222737688365
Summer,207,18403,43.940214388188345,54.896835720492334,23.620788541938182,17.437463310597714,2.389653632901657
Fall,95,18121,49.4932104792829,51.688542586576105,14.750129425849398,13.536222037486215,1.8365231643140567
Spring,422,15966,31.441767725911014,48.92905523527933,18.004457857440496,5.95267744385002,3.3185997378869536
Summer,55,5853,58.474371947741574,42.9995466148559,19.554297817604553,5.552328014539871,2.36625284182618
Fall,148,21958,56.60041161894142,41.60455890242107,11.657826433867085,7.007781782009498,3.1490536668562803
Spring,429,22532,37.82680870025142,68.75624490649595,20.567402425944184,5.205079472404959,2.840476635934288
Summer,282,14907,30.45913620871154,65.41429432286769,18.72671663213112,6.130385905286935,3.3390644237966147
Fall,86,9777,58.00308924238449,50.64715571388162,26.04218498053468,15.37571595753368,2.6012019556411254
Spring,329,20338,45.03119651745777,68.70402655379368,29.558013375085416,13.015194125441946,3.790670294787727
Summer,398,7491,46.18132345200188,60.3030971327291,21.120100772851693,16.24866124204958,1.7202690893971941
Fall,351,20328,50.518913081944234,54.47562848927927,16.453727765551672,18.69748628386464,3.786005864908316
Spring,230,19599,48.47553493169741,54.79076973315415,10.868015665963455,13.77724298488208,2.6466448574301125
Summer,144,24508,58.31674813682271,42.49853233585779,28.49286660447192,15.89181266107772,4.497270419417669
Fall,148,8051,58.327547913755026,42.75112441775443,28.38231430557319,16.35621803063684,3.0609442194568435
Spring,237,20979,56.01596800665011,58.073227777033566,15.059803359784823,10.667758239136797,4.892441397080417
Summer,165,21388,49.09210792338034,56.61109157037402,23.90822246213295,8.616272484944478,3.407741582163841
Fall,240,5876,54.02847884047199,46.38183685072687,11.508690941795386,8.075679927597873,1.8953962636252824
Spring,302,9142,50.315050271489454,68.38583628645071,13.324308638185059,8.771636592676977,4.287162547377797
Summer,465,20202,47.201011250158004,63.43888151413608,14.336182131729824,9.120976955388631,2.380330511289547
Fall,210,11966,33.855010597017305,43.403938060523444,15.889878936788598,8.108414735120846,2.390476857930734
Spring,305,12079,54.33612530208009,67.92787317061878,29.916627500587413,18.173310012187883,1.1272187259406605
Summer,372,12987,54.61918427231866,69.22744625603231,23.938501117276296,16.35498698258343,3.1948612368296345
1 Term Cohort Size Financial Aid Packages Discount Rate (%) First-time Freshmen (%) Transfer Students (%) Graduate Students (%) Audit Students (%)
2 Fall 441 6816 33.982362667289095 58.77819010304718 29.918624840606583 10.7630274706883 1.1875858711648797
3 Spring 184 5569 59.08610601342477 64.61280531313804 11.117423093547 16.05651058764529 2.0746899281012294
4 Summer 244 10442 51.43785312539856 59.544543155733635 24.740711135820177 18.728815125036597 1.0887389681212065
5 Fall 450 6895 31.232025503036272 46.20053079630236 20.91831540329978 19.380536111117692 2.9926607465835717
6 Spring 177 24117 41.96462704334384 48.218833819634014 24.11662967757389 5.867958466365611 2.9048427871560567
7 Summer 82 8863 43.005622127422264 46.437673591251226 29.3730388794827 10.917826979121948 4.325485960250004
8 Fall 225 17913 52.32127928997346 51.317772496839275 23.76060020350078 6.601340989038266 2.2311089590841977
9 Spring 492 12455 37.525815820399835 41.16889396930701 26.739277484747475 10.034900971954448 4.265542940852249
10 Summer 420 9014 35.5300102299411 58.547616530136786 27.337374106216828 7.5451985584334835 4.871895061759292
11 Fall 424 16093 32.42618899851593 50.096626446661794 26.7696152752797 14.702725425030858 1.3536328934625863
12 Spring 71 23070 42.84943424820323 59.67167906142227 18.52182950093882 10.824025759848084 4.167271357878532
13 Summer 287 8009 50.65499702296099 51.56189862896695 14.45152835142061 8.440921185574245 3.3598235885505443
14 Fall 207 21538 31.745807865253308 60.448410069183694 17.933032039085038 8.989068874463584 2.9201838680739005
15 Spring 87 15729 57.456411828794415 50.2187370839272 27.837938175535534 10.405176428079024 2.6821431146603003
16 Summer 279 6409 43.27056689193313 47.82083584707709 12.93209643384051 8.89924794281394 4.138673055777238
17 Fall 414 17249 37.1936207747221 54.881123628802186 20.266523373847857 11.798612702432465 3.5574454278969694
18 Spring 100 5784 32.816198702438754 60.78671076075818 14.664561448490556 5.484739266644757 4.2201786700774475
19 Summer 487 13096 35.48597991321922 50.45009813359592 21.626108344535645 9.196452607534185 4.612604234706505
20 Fall 313 12560 58.03841992019129 68.09944455567123 27.262770439780077 11.168100813082795 3.469054848389168
21 Spring 332 17533 49.14811781530051 41.175589800269385 27.607199372768335 14.041728230349477 4.9218509003695115
22 Summer 76 12343 45.500887722797 52.53838095146736 14.733703873641575 9.064364637726948 3.432351398167541
23 Fall 275 12206 49.713339855005 69.02741654560742 28.153874126970926 6.9977768783562455 3.5465772864891303
24 Spring 326 10801 43.07018696033673 56.439156497442625 21.837774776425686 6.143005693987448 3.2192624341689027
25 Summer 335 24190 51.90117949685455 52.704128269243554 17.00436493400627 19.107977407030752 1.3640083642053211
26 Fall 146 21921 31.431483830749464 57.05560851249875 24.163623911663997 11.249585866077188 3.905588130600913
27 Spring 333 10986 46.98111631482229 57.27773725774254 19.633339759282308 13.717439317712394 3.1897852275964462
28 Summer 416 23225 34.75939342927473 61.94943265898921 17.55975180244509 18.787648277533393 2.8036417896644177
29 Fall 497 15647 33.60493943416927 43.83069188267397 24.101687285933195 6.241226300529592 4.6418851136130925
30 Spring 366 13716 40.25639000149205 47.50049347648314 14.974483088605758 18.14992228888689 2.191837806167802
31 Summer 353 24334 32.75397197440326 57.416320370317294 16.60505058038197 13.27381808816105 3.0944091040600448
32 Fall 196 17323 32.8247096480568 66.01349821629167 18.689036889765426 7.472513917055014 3.7905674859738534
33 Spring 53 9780 39.34239928173883 56.85600076431321 15.073643477462774 11.168826748780535 4.185887102695509
34 Summer 84 7368 59.38531585864526 47.157905793710206 18.104041735628343 16.664034256028383 2.8373872317846898
35 Fall 241 17039 35.25990809668016 60.39534339700737 21.429460504100213 12.20555122799532 4.368365660074838
36 Spring 98 11655 30.514833054952508 62.19726281342123 24.819477218213365 19.779290759982565 4.07567096526159
37 Summer 66 13173 52.90093269011733 47.14708457211938 25.343897062171436 10.651084549069111 1.2649439114074634
38 Fall 221 9495 54.207389311523386 51.33186658528885 26.455807037533862 16.24367449964108 1.1834450656185318
39 Spring 269 15893 40.38912963268203 56.0298242059169 24.88423313886596 10.894841734682887 3.483222737688365
40 Summer 207 18403 43.940214388188345 54.896835720492334 23.620788541938182 17.437463310597714 2.389653632901657
41 Fall 95 18121 49.4932104792829 51.688542586576105 14.750129425849398 13.536222037486215 1.8365231643140567
42 Spring 422 15966 31.441767725911014 48.92905523527933 18.004457857440496 5.95267744385002 3.3185997378869536
43 Summer 55 5853 58.474371947741574 42.9995466148559 19.554297817604553 5.552328014539871 2.36625284182618
44 Fall 148 21958 56.60041161894142 41.60455890242107 11.657826433867085 7.007781782009498 3.1490536668562803
45 Spring 429 22532 37.82680870025142 68.75624490649595 20.567402425944184 5.205079472404959 2.840476635934288
46 Summer 282 14907 30.45913620871154 65.41429432286769 18.72671663213112 6.130385905286935 3.3390644237966147
47 Fall 86 9777 58.00308924238449 50.64715571388162 26.04218498053468 15.37571595753368 2.6012019556411254
48 Spring 329 20338 45.03119651745777 68.70402655379368 29.558013375085416 13.015194125441946 3.790670294787727
49 Summer 398 7491 46.18132345200188 60.3030971327291 21.120100772851693 16.24866124204958 1.7202690893971941
50 Fall 351 20328 50.518913081944234 54.47562848927927 16.453727765551672 18.69748628386464 3.786005864908316
51 Spring 230 19599 48.47553493169741 54.79076973315415 10.868015665963455 13.77724298488208 2.6466448574301125
52 Summer 144 24508 58.31674813682271 42.49853233585779 28.49286660447192 15.89181266107772 4.497270419417669
53 Fall 148 8051 58.327547913755026 42.75112441775443 28.38231430557319 16.35621803063684 3.0609442194568435
54 Spring 237 20979 56.01596800665011 58.073227777033566 15.059803359784823 10.667758239136797 4.892441397080417
55 Summer 165 21388 49.09210792338034 56.61109157037402 23.90822246213295 8.616272484944478 3.407741582163841
56 Fall 240 5876 54.02847884047199 46.38183685072687 11.508690941795386 8.075679927597873 1.8953962636252824
57 Spring 302 9142 50.315050271489454 68.38583628645071 13.324308638185059 8.771636592676977 4.287162547377797
58 Summer 465 20202 47.201011250158004 63.43888151413608 14.336182131729824 9.120976955388631 2.380330511289547
59 Fall 210 11966 33.855010597017305 43.403938060523444 15.889878936788598 8.108414735120846 2.390476857930734
60 Spring 305 12079 54.33612530208009 67.92787317061878 29.916627500587413 18.173310012187883 1.1272187259406605
61 Summer 372 12987 54.61918427231866 69.22744625603231 23.938501117276296 16.35498698258343 3.1948612368296345

72
melt-rate.py Normal file
View File

@ -0,0 +1,72 @@
import pandas as pd
import numpy as np
# Set random seed for reproducibility
np.random.seed(42)
# Define probabilities for each stage transition
stage_probabilities = {
'Lead': 0.25,
'Prospect': 0.20,
'Applied': 0.25,
'Admitted': 0.24,
'Deposited': 0.05,
'Enrolled': 0.01
}
# Ensure the probabilities sum to 1 for initial stage assignment
total_probability = sum(stage_probabilities.values())
stage_probabilities_normalized = {k: v / total_probability for k, v in stage_probabilities.items()}
# Generate synthetic data for enrollment funnel with student IDs
num_students = 30000
stages = ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled']
majors = ['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences']
enrollment_funnel = {
'Student_ID': [f'S{1000+i}' for i in range(num_students)],
'Stage': np.random.choice(list(stage_probabilities_normalized.keys()), size=num_students,
p=list(stage_probabilities_normalized.values())),
'Major': np.random.choice(majors, size=num_students),
'Average GPA': np.random.uniform(2.5, 4.0, size=num_students),
'Average SAT Score': np.random.randint(900, 1600, size=num_students),
'Average ACT Score': np.random.randint(18, 36, size=num_students),
'FAFSA Submitted (%)': np.random.uniform(40, 90, size=num_students)
}
# Create DataFrame
df_enrollment_funnel = pd.DataFrame(enrollment_funnel)
# Function to simulate student progression through stages
def simulate_progression(df):
progression = df.copy()
for stage in stages[1:]:
previous_stage = stages[stages.index(stage) - 1]
transition_prob = stage_probabilities.get(stage, 0.5)
in_previous_stage = progression['Stage'] == previous_stage
progressed = in_previous_stage & (np.random.rand(len(progression)) < transition_prob)
progression.loc[progressed, 'Stage'] = stage
return progression
# Simulate student progression through stages
df_enrollment_funnel = simulate_progression(df_enrollment_funnel)
# Filter students who reached the 'Deposited' stage but did not enroll
deposited_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Deposited']
enrolled_students = df_enrollment_funnel[df_enrollment_funnel['Stage'] == 'Enrolled']
# Identify melted students (Deposited but not Enrolled)
melted_students = deposited_students[~deposited_students['Student_ID'].isin(enrolled_students['Student_ID'])]
# Calculate melt rate
total_deposited = len(deposited_students)
total_melted = len(melted_students)
melt_rate = (total_melted / total_deposited) * 100 if total_deposited > 0 else 0
print(f"Total Deposited Students: {total_deposited}")
print(f"Total Melted Students: {total_melted}")
print(f"Melt Rate: {melt_rate:.2f}%")
# Save melted students to CSV for further analysis
melted_students.to_csv('melted_students.csv', index=False)
print("Melt rate calculated and melted students identified.")

1951
melted_students.csv Normal file

File diff suppressed because it is too large Load Diff

21
private_universities.csv Normal file
View File

@ -0,0 +1,21 @@
University,Funding from Tuition and Donations (%),Board Size,President Reports To,Enrollment,Average Class Size,Student-Faculty Ratio,Endowment per Student ($),Retention Rate (%)
Private University 1,100,48,Board of Trustees,3433,27,11.0,48044,86.3
Private University 2,100,38,Board of Trustees,6311,13,9.5,61214,92.9
Private University 3,100,24,Board of Trustees,6051,23,12.0,71228,91.3
Private University 4,100,17,Board of Trustees,7420,27,8.2,58984,81.2
Private University 5,100,30,Board of Trustees,18568,18,13.9,50774,72.4
Private University 6,100,48,Board of Trustees,7396,11,11.1,12568,79.3
Private University 7,100,28,Board of Trustees,9666,29,10.8,72592,86.7
Private University 8,100,32,Board of Trustees,19942,24,14.5,77563,86.6
Private University 9,100,20,Board of Trustees,19431,16,13.1,12695,84.8
Private University 10,100,20,Board of Trustees,3747,21,10.3,58190,76.9
Private University 11,100,33,Board of Trustees,1189,17,12.0,15258,84.0
Private University 12,100,45,Board of Trustees,4005,24,11.6,97538,79.6
Private University 13,100,49,Board of Trustees,2899,12,14.7,49504,94.3
Private University 14,100,33,Board of Trustees,2267,23,13.9,43159,91.2
Private University 15,100,12,Board of Trustees,18912,26,13.2,23986,88.0
Private University 16,100,31,Board of Trustees,12394,13,11.8,71858,75.9
Private University 17,100,11,Board of Trustees,4556,27,12.1,22666,76.4
Private University 18,100,33,Board of Trustees,4890,17,14.8,48660,71.0
Private University 19,100,39,Board of Trustees,9838,13,12.2,13561,87.8
Private University 20,100,47,Board of Trustees,15502,11,9.9,36854,72.8
1 University Funding from Tuition and Donations (%) Board Size President Reports To Enrollment Average Class Size Student-Faculty Ratio Endowment per Student ($) Retention Rate (%)
2 Private University 1 100 48 Board of Trustees 3433 27 11.0 48044 86.3
3 Private University 2 100 38 Board of Trustees 6311 13 9.5 61214 92.9
4 Private University 3 100 24 Board of Trustees 6051 23 12.0 71228 91.3
5 Private University 4 100 17 Board of Trustees 7420 27 8.2 58984 81.2
6 Private University 5 100 30 Board of Trustees 18568 18 13.9 50774 72.4
7 Private University 6 100 48 Board of Trustees 7396 11 11.1 12568 79.3
8 Private University 7 100 28 Board of Trustees 9666 29 10.8 72592 86.7
9 Private University 8 100 32 Board of Trustees 19942 24 14.5 77563 86.6
10 Private University 9 100 20 Board of Trustees 19431 16 13.1 12695 84.8
11 Private University 10 100 20 Board of Trustees 3747 21 10.3 58190 76.9
12 Private University 11 100 33 Board of Trustees 1189 17 12.0 15258 84.0
13 Private University 12 100 45 Board of Trustees 4005 24 11.6 97538 79.6
14 Private University 13 100 49 Board of Trustees 2899 12 14.7 49504 94.3
15 Private University 14 100 33 Board of Trustees 2267 23 13.9 43159 91.2
16 Private University 15 100 12 Board of Trustees 18912 26 13.2 23986 88.0
17 Private University 16 100 31 Board of Trustees 12394 13 11.8 71858 75.9
18 Private University 17 100 11 Board of Trustees 4556 27 12.1 22666 76.4
19 Private University 18 100 33 Board of Trustees 4890 17 14.8 48660 71.0
20 Private University 19 100 39 Board of Trustees 9838 13 12.2 13561 87.8
21 Private University 20 100 47 Board of Trustees 15502 11 9.9 36854 72.8

21
public_universities.csv Normal file
View File

@ -0,0 +1,21 @@
University,Funding from State (%),Board Size,President Reports To,Enrollment,State Funding Stability,Average Class Size,Student-Faculty Ratio,Endowment per Student ($),Retention Rate (%)
Public University 1,47,11,States Regents,30230,Stable,46,23.2,35306,70.1
Public University 2,32,11,States Regents,35707,Variable,46,23.0,21646,82.2
Public University 3,38,14,States Regents,41976,Variable,31,16.5,36065,81.3
Public University 4,50,12,States Regents,64262,Variable,31,20.1,30199,83.4
Public University 5,26,10,States Regents,43776,Variable,34,22.0,46976,79.6
Public University 6,36,10,States Regents,50080,Stable,30,23.6,21371,76.7
Public University 7,39,13,States Regents,21306,Variable,30,18.3,16835,74.5
Public University 8,48,14,States Regents,26776,Stable,48,17.2,7049,69.3
Public University 9,23,9,States Regents,47251,Variable,31,22.1,36616,83.5
Public University 10,49,13,States Regents,29474,Stable,41,23.1,43191,84.3
Public University 11,24,14,States Regents,60294,Variable,35,18.5,25932,67.1
Public University 12,42,12,States Regents,41959,Variable,33,16.0,34855,67.6
Public University 13,26,12,States Regents,25530,Variable,40,24.4,12158,72.1
Public University 14,32,12,States Regents,49320,Variable,46,19.0,48016,71.2
Public University 15,34,12,States Regents,23748,Stable,35,20.2,12400,84.9
Public University 16,30,12,States Regents,51968,Variable,34,23.4,47642,64.4
Public University 17,48,14,States Regents,52562,Stable,49,21.8,20151,60.5
Public University 18,23,14,States Regents,33545,Stable,31,22.4,6154,72.3
Public University 19,32,11,States Regents,20663,Stable,35,17.1,9499,64.5
Public University 20,26,10,States Regents,54766,Stable,40,20.4,11295,69.2
1 University Funding from State (%) Board Size President Reports To Enrollment State Funding Stability Average Class Size Student-Faculty Ratio Endowment per Student ($) Retention Rate (%)
2 Public University 1 47 11 State’s Regents 30230 Stable 46 23.2 35306 70.1
3 Public University 2 32 11 State’s Regents 35707 Variable 46 23.0 21646 82.2
4 Public University 3 38 14 State’s Regents 41976 Variable 31 16.5 36065 81.3
5 Public University 4 50 12 State’s Regents 64262 Variable 31 20.1 30199 83.4
6 Public University 5 26 10 State’s Regents 43776 Variable 34 22.0 46976 79.6
7 Public University 6 36 10 State’s Regents 50080 Stable 30 23.6 21371 76.7
8 Public University 7 39 13 State’s Regents 21306 Variable 30 18.3 16835 74.5
9 Public University 8 48 14 State’s Regents 26776 Stable 48 17.2 7049 69.3
10 Public University 9 23 9 State’s Regents 47251 Variable 31 22.1 36616 83.5
11 Public University 10 49 13 State’s Regents 29474 Stable 41 23.1 43191 84.3
12 Public University 11 24 14 State’s Regents 60294 Variable 35 18.5 25932 67.1
13 Public University 12 42 12 State’s Regents 41959 Variable 33 16.0 34855 67.6
14 Public University 13 26 12 State’s Regents 25530 Variable 40 24.4 12158 72.1
15 Public University 14 32 12 State’s Regents 49320 Variable 46 19.0 48016 71.2
16 Public University 15 34 12 State’s Regents 23748 Stable 35 20.2 12400 84.9
17 Public University 16 30 12 State’s Regents 51968 Variable 34 23.4 47642 64.4
18 Public University 17 48 14 State’s Regents 52562 Stable 49 21.8 20151 60.5
19 Public University 18 23 14 State’s Regents 33545 Stable 31 22.4 6154 72.3
20 Public University 19 32 11 State’s Regents 20663 Stable 35 17.1 9499 64.5
21 Public University 20 26 10 State’s Regents 54766 Stable 40 20.4 11295 69.2

73
small-university.py Normal file
View File

@ -0,0 +1,73 @@
import pandas as pd
import numpy as np
from faker import Faker
# Initialize Faker for generating fake data
fake = Faker()
# Set random seed for reproducibility
np.random.seed(42)
# Generate synthetic data for small private universities
private_universities = {
'University': [f'Private University {i}' for i in range(1, 21)],
'Funding from Tuition and Donations (%)': [100] * 20,
'Board Size': np.random.randint(10, 51, size=20),
'President Reports To': ['Board of Trustees'] * 20,
'Enrollment': np.random.randint(1000, 20001, size=20),
'Average Class Size': np.random.randint(10, 30, size=20),
'Student-Faculty Ratio': np.round(np.random.uniform(8, 15, size=20), 1),
'Endowment per Student ($)': np.random.randint(10000, 100000, size=20),
'Retention Rate (%)': np.round(np.random.uniform(70, 95, size=20), 1)
}
# Generate synthetic data for public universities
public_universities = {
'University': [f'Public University {i}' for i in range(1, 21)],
'Funding from State (%)': np.random.randint(20, 51, size=20),
'Board Size': np.random.randint(9, 16, size=20),
'President Reports To': ['States Regents'] * 20,
'Enrollment': np.random.randint(20000, 70001, size=20),
'State Funding Stability': np.random.choice(['Stable', 'Variable'], size=20),
'Average Class Size': np.random.randint(30, 50, size=20),
'Student-Faculty Ratio': np.round(np.random.uniform(15, 25, size=20), 1),
'Endowment per Student ($)': np.random.randint(5000, 50000, size=20),
'Retention Rate (%)': np.round(np.random.uniform(60, 85, size=20), 1)
}
# Generate synthetic data for enrollment management
enrollment_management = {
'Term': ['Fall', 'Spring', 'Summer'] * 20,
'Cohort Size': np.random.randint(50, 500, size=60),
'Financial Aid Packages': np.random.randint(5000, 25000, size=60),
'Discount Rate (%)': np.random.uniform(30, 60, size=60),
'First-time Freshmen (%)': np.random.uniform(40, 70, size=60),
'Transfer Students (%)': np.random.uniform(10, 30, size=60),
'Graduate Students (%)': np.random.uniform(5, 20, size=60),
'Audit Students (%)': np.random.uniform(1, 5, size=60)
}
# Generate synthetic data for enrollment funnel
enrollment_funnel = {
'Stage': ['Lead', 'Prospect', 'Applied', 'Admitted', 'Deposited', 'Enrolled'],
'Number of Students': np.random.randint(100, 5000, size=6),
'Major': np.random.choice(['Science', 'Arts', 'Engineering', 'Business', 'Health Sciences'], size=6),
'Average GPA': np.random.uniform(2.5, 4.0, size=6),
'Average SAT Score': np.random.randint(900, 1600, size=6),
'Average ACT Score': np.random.randint(18, 36, size=6),
'FAFSA Submitted (%)': np.random.uniform(40, 90, size=6)
}
# Create DataFrames
df_private = pd.DataFrame(private_universities)
df_public = pd.DataFrame(public_universities)
df_enrollment_management = pd.DataFrame(enrollment_management)
df_enrollment_funnel = pd.DataFrame(enrollment_funnel)
# Save to CSV files
df_private.to_csv('private_universities.csv', index=False)
df_public.to_csv('public_universities.csv', index=False)
df_enrollment_management.to_csv('enrollment_management.csv', index=False)
df_enrollment_funnel.to_csv('enrollment_funnel.csv', index=False)
print("Synthetic data generated and saved to CSV files.")

1001
synthetic_student_data.csv Normal file

File diff suppressed because it is too large Load Diff