Spaces:
Running
Running
Commit
·
9eda2f5
1
Parent(s):
7e9d407
Use short team names
Browse files- app.py +4 -0
- convert.py +28 -3
- data.py +14 -2
- pitch_leaderboard.py +25 -15
- pitcher_overview.py +1 -1
- stats.py +76 -1
app.py
CHANGED
|
@@ -5,6 +5,9 @@ from pitcher_overview import create_pitcher_overview
|
|
| 5 |
from pitch_leaderboard import create_pitch_leaderboard
|
| 6 |
|
| 7 |
updated = '2025-07-21'
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
if __name__ == '__main__':
|
| 10 |
with gr.Blocks() as app:
|
|
@@ -14,4 +17,5 @@ if __name__ == '__main__':
|
|
| 14 |
create_pitch_leaderboard()
|
| 15 |
|
| 16 |
gr.Markdown(f'Last updated: {updated}')
|
|
|
|
| 17 |
app.launch()
|
|
|
|
| 5 |
from pitch_leaderboard import create_pitch_leaderboard
|
| 6 |
|
| 7 |
updated = '2025-07-21'
|
| 8 |
+
limitations = '''**General Limitations**
|
| 9 |
+
- Foreign players names are in Hebpurn romanization. Contact me if you need a card for a foreign player.
|
| 10 |
+
'''
|
| 11 |
|
| 12 |
if __name__ == '__main__':
|
| 13 |
with gr.Blocks() as app:
|
|
|
|
| 17 |
create_pitch_leaderboard()
|
| 18 |
|
| 19 |
gr.Markdown(f'Last updated: {updated}')
|
| 20 |
+
gr.Markdown(limitations)
|
| 21 |
app.launch()
|
convert.py
CHANGED
|
@@ -338,12 +338,31 @@ ball_kind_code_to_color = {
|
|
| 338 |
|
| 339 |
|
| 340 |
def get_text_color_from_color(color):
|
| 341 |
-
if color in ['gold', 'paleturquoise', 'turquoise']:
|
| 342 |
return 'black'
|
| 343 |
return 'white'
|
| 344 |
|
| 345 |
ball_kind_to_color = {ball_kind: ball_kind_code_to_color[ball_kind_code[code]] for code, ball_kind in ball_kind.items()}
|
| 346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
team_to_color = {
|
| 348 |
'G': '#f69727',
|
| 349 |
'S': '#abcd05',
|
|
@@ -356,11 +375,17 @@ team_to_color = {
|
|
| 356 |
'L': '#00214b',
|
| 357 |
'M': '#efefef',
|
| 358 |
'B': '#baa834',
|
| 359 |
-
'H': '#fcc700'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
}
|
| 361 |
|
|
|
|
|
|
|
| 362 |
def get_text_color_from_team(team):
|
| 363 |
-
if team in ['DB', 'F', 'L', 'E']:
|
| 364 |
return 'white'
|
| 365 |
else:
|
| 366 |
return 'black'
|
|
|
|
| 338 |
|
| 339 |
|
| 340 |
def get_text_color_from_color(color):
|
| 341 |
+
if color in ['gold', 'khaki', 'paleturquoise', 'turquoise']:
|
| 342 |
return 'black'
|
| 343 |
return 'white'
|
| 344 |
|
| 345 |
ball_kind_to_color = {ball_kind: ball_kind_code_to_color[ball_kind_code[code]] for code, ball_kind in ball_kind.items()}
|
| 346 |
|
| 347 |
+
team_name_short = {
|
| 348 |
+
'G': 'Yomiuri',
|
| 349 |
+
'S': 'Yakult',
|
| 350 |
+
'DB': 'DeNA',
|
| 351 |
+
'D': 'Chunichi',
|
| 352 |
+
'T': 'Hanshin',
|
| 353 |
+
'C': 'Hiroshima',
|
| 354 |
+
'F': 'Nipponham',
|
| 355 |
+
'E': 'Rakuten',
|
| 356 |
+
'L': 'Seibu',
|
| 357 |
+
'M': 'Lotte',
|
| 358 |
+
'B': 'ORIX',
|
| 359 |
+
'H': 'SoftBank',
|
| 360 |
+
'PL': 'Pacific League',
|
| 361 |
+
'CL': 'Central League',
|
| 362 |
+
'WL': 'Western League', # Why is this in the data?
|
| 363 |
+
'EL': 'Eastern League', # Same with this
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
team_to_color = {
|
| 367 |
'G': '#f69727',
|
| 368 |
'S': '#abcd05',
|
|
|
|
| 375 |
'L': '#00214b',
|
| 376 |
'M': '#efefef',
|
| 377 |
'B': '#baa834',
|
| 378 |
+
'H': '#fcc700',
|
| 379 |
+
'PL': '#01a9e4',
|
| 380 |
+
'CL': '#129144',
|
| 381 |
+
'WL': '#552a8d',
|
| 382 |
+
'EL': '#068ed9'
|
| 383 |
}
|
| 384 |
|
| 385 |
+
team_names_short_to_color = {team_name: team_to_color[team] for team, team_name in team_name_short.items()}
|
| 386 |
+
|
| 387 |
def get_text_color_from_team(team):
|
| 388 |
+
if team in ['DB', 'F', 'L', 'E', 'DeNA', 'Nipponham', 'Seibu', 'Rakuten']:
|
| 389 |
return 'white'
|
| 390 |
else:
|
| 391 |
return 'black'
|
data.py
CHANGED
|
@@ -6,6 +6,7 @@ from huggingface_hub import snapshot_download
|
|
| 6 |
|
| 7 |
from convert import (
|
| 8 |
aux_global_id_to_code, presult,
|
|
|
|
| 9 |
ball_kind, ball_kind_code, general_ball_kind, general_ball_kind_code, lr,
|
| 10 |
game_kind
|
| 11 |
)
|
|
@@ -180,7 +181,17 @@ data_df = (
|
|
| 180 |
(pl.col('half_inning') + pl.col('new_batter')).alias('newFiveDigitSerialNumber')
|
| 181 |
)
|
| 182 |
.with_columns(pl.max('new_batter').cast(pl.Int32).over(['gameId', pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)]).alias('inning_pas'))
|
| 183 |
-
.join(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
.with_columns(pl.col('UpdatedAt').dt.strftime('%Y%m%d').alias('date'))
|
| 185 |
.with_columns(
|
| 186 |
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('universal_code') + '_' + pl.col('atBatBallCount'),
|
|
@@ -231,7 +242,8 @@ data_df = (
|
|
| 231 |
.otherwise('GameKindName')
|
| 232 |
.alias('coarse_game_kind'),
|
| 233 |
|
| 234 |
-
pl.when(pl.col('half_inning').str.ends_with(1)).then('HomeTeamNameES').otherwise('VisitorTeamNameES').alias('pitcher_team')
|
|
|
|
| 235 |
)
|
| 236 |
.with_columns(
|
| 237 |
pl.col('presult_id').replace_strict(presult).alias('presult')
|
|
|
|
| 6 |
|
| 7 |
from convert import (
|
| 8 |
aux_global_id_to_code, presult,
|
| 9 |
+
team_name_short,
|
| 10 |
ball_kind, ball_kind_code, general_ball_kind, general_ball_kind_code, lr,
|
| 11 |
game_kind
|
| 12 |
)
|
|
|
|
| 181 |
(pl.col('half_inning') + pl.col('new_batter')).alias('newFiveDigitSerialNumber')
|
| 182 |
)
|
| 183 |
.with_columns(pl.max('new_batter').cast(pl.Int32).over(['gameId', pl.col('newFiveDigitSerialNumber').str.slice(offset=0, length=3)]).alias('inning_pas'))
|
| 184 |
+
.join(
|
| 185 |
+
(
|
| 186 |
+
sched_df[['GameID', 'HomeTeamNameES', 'VisitorTeamNameES']]
|
| 187 |
+
.rename({'GameID': 'gameId'})
|
| 188 |
+
.with_columns(
|
| 189 |
+
pl.col('HomeTeamNameES').replace_strict(team_name_short).alias('home_team_name_short'),
|
| 190 |
+
pl.col('VisitorTeamNameES').replace_strict(team_name_short).alias('visitor_team_name_short')
|
| 191 |
+
)
|
| 192 |
+
),
|
| 193 |
+
on='gameId'
|
| 194 |
+
)
|
| 195 |
.with_columns(pl.col('UpdatedAt').dt.strftime('%Y%m%d').alias('date'))
|
| 196 |
.with_columns(
|
| 197 |
(pl.col('date') + '_' + pl.col('VisitorTeamNameES') + '_' + pl.col('HomeTeamNameES') + '_' + pl.col('newFiveDigitSerialNumber')).alias('universal_code') + '_' + pl.col('atBatBallCount'),
|
|
|
|
| 242 |
.otherwise('GameKindName')
|
| 243 |
.alias('coarse_game_kind'),
|
| 244 |
|
| 245 |
+
pl.when(pl.col('half_inning').str.ends_with(1)).then('HomeTeamNameES').otherwise('VisitorTeamNameES').alias('pitcher_team'),
|
| 246 |
+
pl.when(pl.col('half_inning').str.ends_with(1)).then('home_team_name_short').otherwise('visitor_team_name_short').alias('pitcher_team_name_short')
|
| 247 |
)
|
| 248 |
.with_columns(
|
| 249 |
pl.col('presult_id').replace_strict(presult).alias('presult')
|
pitch_leaderboard.py
CHANGED
|
@@ -7,7 +7,7 @@ from datetime import datetime
|
|
| 7 |
|
| 8 |
from data import data_df
|
| 9 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
| 10 |
-
from convert import ball_kind, ball_kind_to_color, get_text_color_from_color,
|
| 11 |
from plotting import stat_cmap
|
| 12 |
|
| 13 |
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
|
@@ -16,12 +16,22 @@ STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O
|
|
| 16 |
COLUMNS = ['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS
|
| 17 |
|
| 18 |
PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
|
| 19 |
-
TEAMS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
notes = '''**Limitations**
|
| 21 |
-
-
|
| 22 |
-
|
| 23 |
-
**To-do**
|
| 24 |
-
- Color cells according to percentiles
|
| 25 |
'''
|
| 26 |
|
| 27 |
|
|
@@ -35,7 +45,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 35 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 36 |
|
| 37 |
if include_teams is not None:
|
| 38 |
-
data = data.filter(pl.col('
|
| 39 |
|
| 40 |
# both, left, right = [
|
| 41 |
# (
|
|
@@ -62,11 +72,11 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 62 |
compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
|
| 63 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
| 64 |
.drop('pitId', 'ballKind_code', 'qualified')
|
| 65 |
-
.rename({'pitcher_name': 'Pitcher', '
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
# [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
|
| 71 |
)
|
| 72 |
|
|
@@ -78,7 +88,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 78 |
r, g, b = (stat_cmap([pitch_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 79 |
styling_row.append(f'background-color: rgba({r}, {g}, {b})')
|
| 80 |
elif col == 'Team':
|
| 81 |
-
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {
|
| 82 |
elif col in ['Pitch', 'Pitch (General)']:
|
| 83 |
color = ball_kind_to_color[item]
|
| 84 |
styling_row.append(f'color: {get_text_color_from_color(color)}; background-color: {color}')
|
|
@@ -91,7 +101,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 91 |
display_value_row = []
|
| 92 |
for item in row:
|
| 93 |
if isinstance(item, float):
|
| 94 |
-
display_value_row.append(f'{item:.
|
| 95 |
else:
|
| 96 |
display_value_row.append(item)
|
| 97 |
display_value.append(display_value_row)
|
|
@@ -131,7 +141,7 @@ def create_pitch_leaderboard():
|
|
| 131 |
# pin_columns = gr.Checkbox(True, 'Pin columns')
|
| 132 |
leaderboard = gr.DataFrame(
|
| 133 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 134 |
-
column_widths=[200,
|
| 135 |
show_copy_button=True,
|
| 136 |
show_search=True,
|
| 137 |
pinned_columns=3
|
|
|
|
| 7 |
|
| 8 |
from data import data_df
|
| 9 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
| 10 |
+
from convert import ball_kind, ball_kind_to_color, get_text_color_from_color, team_names_short_to_color, get_text_color_from_team
|
| 11 |
from plotting import stat_cmap
|
| 12 |
|
| 13 |
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
|
|
|
| 16 |
COLUMNS = ['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS
|
| 17 |
|
| 18 |
PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
|
| 19 |
+
TEAMS = [
|
| 20 |
+
'Yomiuri',
|
| 21 |
+
'Yakult',
|
| 22 |
+
'DeNA',
|
| 23 |
+
'Chunichi',
|
| 24 |
+
'Hanshin',
|
| 25 |
+
'Hiroshima',
|
| 26 |
+
'Nipponham',
|
| 27 |
+
'Rakuten',
|
| 28 |
+
'Seibu',
|
| 29 |
+
'Lotte',
|
| 30 |
+
'ORIX',
|
| 31 |
+
'SoftBank'
|
| 32 |
+
]
|
| 33 |
notes = '''**Limitations**
|
| 34 |
+
- [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
|
|
|
|
|
|
|
|
|
|
| 35 |
'''
|
| 36 |
|
| 37 |
|
|
|
|
| 45 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 46 |
|
| 47 |
if include_teams is not None:
|
| 48 |
+
data = data.filter(pl.col('pitcher_team_name_short').is_in(include_teams))
|
| 49 |
|
| 50 |
# both, left, right = [
|
| 51 |
# (
|
|
|
|
| 72 |
compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
|
| 73 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
| 74 |
.drop('pitId', 'ballKind_code', 'qualified')
|
| 75 |
+
.rename({'pitcher_name': 'Pitcher', 'pitcher_team_name_short': 'Team', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'})
|
| 76 |
+
.with_columns(
|
| 77 |
+
pl.col(stat).mul(100)
|
| 78 |
+
for stat in PCT_STATS
|
| 79 |
+
)
|
| 80 |
# [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
|
| 81 |
)
|
| 82 |
|
|
|
|
| 88 |
r, g, b = (stat_cmap([pitch_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 89 |
styling_row.append(f'background-color: rgba({r}, {g}, {b})')
|
| 90 |
elif col == 'Team':
|
| 91 |
+
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
|
| 92 |
elif col in ['Pitch', 'Pitch (General)']:
|
| 93 |
color = ball_kind_to_color[item]
|
| 94 |
styling_row.append(f'color: {get_text_color_from_color(color)}; background-color: {color}')
|
|
|
|
| 101 |
display_value_row = []
|
| 102 |
for item in row:
|
| 103 |
if isinstance(item, float):
|
| 104 |
+
display_value_row.append(f'{item:.1f}%')
|
| 105 |
else:
|
| 106 |
display_value_row.append(item)
|
| 107 |
display_value.append(display_value_row)
|
|
|
|
| 141 |
# pin_columns = gr.Checkbox(True, 'Pin columns')
|
| 142 |
leaderboard = gr.DataFrame(
|
| 143 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 144 |
+
column_widths=[200, 100, 200, 200] + [100]*len(STATS),
|
| 145 |
show_copy_button=True,
|
| 146 |
show_search=True,
|
| 147 |
pinned_columns=3
|
pitcher_overview.py
CHANGED
|
@@ -7,7 +7,7 @@ from data import SEASONS, data_df
|
|
| 7 |
from plotting import create_pitcher_overview_card
|
| 8 |
|
| 9 |
notes = '''**Limitations**
|
| 10 |
-
-
|
| 11 |
|
| 12 |
**To-do**
|
| 13 |
- Fix names of foreign players
|
|
|
|
| 7 |
from plotting import create_pitcher_overview_card
|
| 8 |
|
| 9 |
notes = '''**Limitations**
|
| 10 |
+
- Only supports regular season data
|
| 11 |
|
| 12 |
**To-do**
|
| 13 |
- Fix names of foreign players
|
stats.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
import polars as pl
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
|
| 4 |
if start_date is not None:
|
|
@@ -60,7 +63,7 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
| 60 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
| 61 |
pitch_stats = (
|
| 62 |
data
|
| 63 |
-
.group_by(id_col, pitch_col, '
|
| 64 |
.agg(
|
| 65 |
pl.first('pitcher_name'),
|
| 66 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
|
@@ -105,3 +108,75 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
| 105 |
.sort(id_col, 'count', descending=[False, True])
|
| 106 |
)
|
| 107 |
return pitch_stats
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import polars as pl
|
| 2 |
+
from data import data_df
|
| 3 |
+
|
| 4 |
+
from types import SimpleNamespace
|
| 5 |
|
| 6 |
def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
|
| 7 |
if start_date is not None:
|
|
|
|
| 63 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
| 64 |
pitch_stats = (
|
| 65 |
data
|
| 66 |
+
.group_by(id_col, pitch_col, 'pitcher_team_name_short')
|
| 67 |
.agg(
|
| 68 |
pl.first('pitcher_name'),
|
| 69 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
|
|
|
| 108 |
.sort(id_col, 'count', descending=[False, True])
|
| 109 |
)
|
| 110 |
return pitch_stats
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
|
| 114 |
+
source_data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 115 |
+
|
| 116 |
+
# if start_date is not None:
|
| 117 |
+
# source_data = source_data.filter(pl.col('date') >= start_date)
|
| 118 |
+
# if end_date is not None:
|
| 119 |
+
# source_data = source_data.filter(pl.col('date') <= end_date)
|
| 120 |
+
#
|
| 121 |
+
# if game_kind is not None:
|
| 122 |
+
# source_data = source_data.filter(pl.col('coarse_game_kind') == game_kind)
|
| 123 |
+
source_data = filter_data_by_date_and_game_kind(source_data, start_date=start_date, end_date=end_date, game_kind=game_kind)
|
| 124 |
+
|
| 125 |
+
source_data = (
|
| 126 |
+
compute_team_games(source_data)
|
| 127 |
+
.with_columns(
|
| 128 |
+
pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
|
| 129 |
+
pl.col('inning_code').unique().len().over('pitId').alias('IP')
|
| 130 |
+
)
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
if min_ip == 'qualified':
|
| 134 |
+
source_data = source_data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
|
| 135 |
+
else:
|
| 136 |
+
source_data = source_data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
|
| 137 |
+
|
| 138 |
+
if lr is not None:
|
| 139 |
+
source_data = source_data.filter(pl.col('batLR') == lr)
|
| 140 |
+
|
| 141 |
+
pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches).filter(pl.col('pitId') == id)
|
| 142 |
+
|
| 143 |
+
pitch_shapes = (
|
| 144 |
+
source_data
|
| 145 |
+
.filter(
|
| 146 |
+
(pl.col('pitId') == id) &
|
| 147 |
+
pl.col('x').is_not_null() &
|
| 148 |
+
pl.col('y').is_not_null() &
|
| 149 |
+
(pl.col('ballSpeed') > 0)
|
| 150 |
+
)
|
| 151 |
+
[['pitId', 'general_ballKind_code', 'ballKind_code', 'ballSpeed', 'x', 'y']]
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
pitcher_stats = (
|
| 155 |
+
source_data
|
| 156 |
+
.group_by('pitId')
|
| 157 |
+
.agg(
|
| 158 |
+
pl.col('pitcher_name').first(),
|
| 159 |
+
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 160 |
+
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
| 161 |
+
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
| 162 |
+
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 163 |
+
pl.first('qualified')
|
| 164 |
+
)
|
| 165 |
+
.explode('batType')
|
| 166 |
+
.unnest('batType')
|
| 167 |
+
.pivot(on='batType', values='proportion')
|
| 168 |
+
.fill_null(0)
|
| 169 |
+
.with_columns(
|
| 170 |
+
(pl.col('G') + pl.col('B')).alias('GB%'),
|
| 171 |
+
(pl.col('F') + pl.col('P')).alias('FB%'),
|
| 172 |
+
pl.col('L').alias('LD%'),
|
| 173 |
+
)
|
| 174 |
+
.drop('G', 'F', 'B', 'P', 'L')
|
| 175 |
+
.with_columns(
|
| 176 |
+
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=(stat == 'BB%'))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 177 |
+
for stat in ['CSW%', 'K%', 'BB%', 'GB%']
|
| 178 |
+
)
|
| 179 |
+
.filter(pl.col('pitId') == id)
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|