generated from streamlit/Interactive-Data-Explorer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
updated parquet file.py
35 lines (28 loc) · 1.37 KB
/
updated parquet file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import polars as pl
# Load the data
df = pl.read_parquet('data/combined_leagues.parquet')
# Define the income columns
income_columns = [
'Struggling (Less than $10,000)', 'Getting By ($10,000 to $14,999)',
'Getting By ($15,000 to $19,999)', 'Starting Out ($20,000 to $24,999)',
'Starting Out ($25,000 to $29,999)', 'Starting Out ($30,000 to $34,999)',
'Middle Class ($35,000 to $39,999)', 'Middle Class ($40,000 to $44,999)',
'Middle Class ($45,000 to $49,999)', 'Comfortable ($50,000 to $59,999)',
'Comfortable ($60,000 to $74,999)', 'Doing Well ($75,000 to $99,999)',
'Prosperous ($100,000 to $124,999)', 'Prosperous ($125,000 to $149,999)',
'Wealthy ($150,000 to $199,999)', 'Affluent ($200,000 or more)'
]
# Check and drop the existing 'Total Fans' column if it exists
if 'Total Fans' in df.columns:
df = df.drop('Total Fans')
# Add the Total Fans column by summing across income columns
df = df.with_columns([
pl.sum_horizontal([pl.col(col) for col in income_columns]).alias('Total Fans')
])
# Correct the Fandom Level column to title case
df = df.with_columns([
pl.col("Fandom Level").str.to_lowercase().str.to_titlecase().alias("Fandom Level")
])
# Save the updated dataframe back to a parquet file
df.write_parquet('data/updated_combined_leagues.parquet')
print("Parquet file updated with Total Fans column and corrected Fandom Level.")