Spaces:
Running
Running
Commit
·
e86b89f
1
Parent(s):
9eda2f5
Add team filtering after percentile computation in pitch leaderboard
Browse files- pitch_leaderboard.py +10 -6
- plotting.py +1 -74
pitch_leaderboard.py
CHANGED
|
@@ -43,9 +43,6 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 43 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
| 44 |
if pitcher_lr != 'Both':
|
| 45 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 46 |
-
|
| 47 |
-
if include_teams is not None:
|
| 48 |
-
data = data.filter(pl.col('pitcher_team_name_short').is_in(include_teams))
|
| 49 |
|
| 50 |
# both, left, right = [
|
| 51 |
# (
|
|
@@ -80,6 +77,9 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 80 |
# [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
|
| 81 |
)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
| 83 |
styling = []
|
| 84 |
for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
|
| 85 |
styling_row = []
|
|
@@ -143,8 +143,8 @@ def create_pitch_leaderboard():
|
|
| 143 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 144 |
column_widths=[200, 100, 200, 200] + [100]*len(STATS),
|
| 145 |
show_copy_button=True,
|
| 146 |
-
show_search=
|
| 147 |
-
pinned_columns=3
|
| 148 |
)
|
| 149 |
|
| 150 |
gr.Markdown(notes)
|
|
@@ -152,7 +152,11 @@ def create_pitch_leaderboard():
|
|
| 152 |
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
|
| 153 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
| 154 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 155 |
-
# pin_columns.input(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
return app
|
| 158 |
|
|
|
|
| 43 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
| 44 |
if pitcher_lr != 'Both':
|
| 45 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# both, left, right = [
|
| 48 |
# (
|
|
|
|
| 77 |
# [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
|
| 78 |
)
|
| 79 |
|
| 80 |
+
if include_teams is not None:
|
| 81 |
+
pitch_stats = pitch_stats.filter(pl.col('Team').is_in(include_teams))
|
| 82 |
+
|
| 83 |
styling = []
|
| 84 |
for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
|
| 85 |
styling_row = []
|
|
|
|
| 143 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 144 |
column_widths=[200, 100, 200, 200] + [100]*len(STATS),
|
| 145 |
show_copy_button=True,
|
| 146 |
+
show_search='filter',
|
| 147 |
+
pinned_columns=3,
|
| 148 |
)
|
| 149 |
|
| 150 |
gr.Markdown(notes)
|
|
|
|
| 152 |
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, include_pitches, include_teams], outputs=leaderboard)
|
| 153 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
| 154 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 155 |
+
# pin_columns.input(
|
| 156 |
+
# lambda _pin_columns : gr.update(pinned_columns=None if _pin_columns else 3),
|
| 157 |
+
# inputs=pin_columns,
|
| 158 |
+
# outputs=leaderboard
|
| 159 |
+
# )
|
| 160 |
|
| 161 |
return app
|
| 162 |
|
plotting.py
CHANGED
|
@@ -10,86 +10,13 @@ import numpy as np
|
|
| 10 |
from types import SimpleNamespace
|
| 11 |
from datetime import date
|
| 12 |
|
| 13 |
-
from data import data_df
|
| 14 |
from convert import ball_kind_code_to_color, get_text_color_from_color
|
| 15 |
-
from stats import
|
| 16 |
|
| 17 |
|
| 18 |
mpl.use('Agg')
|
| 19 |
|
| 20 |
|
| 21 |
-
def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
|
| 22 |
-
source_data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 23 |
-
|
| 24 |
-
# if start_date is not None:
|
| 25 |
-
# source_data = source_data.filter(pl.col('date') >= start_date)
|
| 26 |
-
# if end_date is not None:
|
| 27 |
-
# source_data = source_data.filter(pl.col('date') <= end_date)
|
| 28 |
-
#
|
| 29 |
-
# if game_kind is not None:
|
| 30 |
-
# source_data = source_data.filter(pl.col('coarse_game_kind') == game_kind)
|
| 31 |
-
source_data = filter_data_by_date_and_game_kind(source_data, start_date=start_date, end_date=end_date, game_kind=game_kind)
|
| 32 |
-
|
| 33 |
-
source_data = (
|
| 34 |
-
compute_team_games(source_data)
|
| 35 |
-
.with_columns(
|
| 36 |
-
pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
|
| 37 |
-
pl.col('inning_code').unique().len().over('pitId').alias('IP')
|
| 38 |
-
)
|
| 39 |
-
)
|
| 40 |
-
|
| 41 |
-
if min_ip == 'qualified':
|
| 42 |
-
source_data = source_data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
|
| 43 |
-
else:
|
| 44 |
-
source_data = source_data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
|
| 45 |
-
|
| 46 |
-
if lr is not None:
|
| 47 |
-
source_data = source_data.filter(pl.col('batLR') == lr)
|
| 48 |
-
|
| 49 |
-
pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches).filter(pl.col('pitId') == id)
|
| 50 |
-
|
| 51 |
-
pitch_shapes = (
|
| 52 |
-
source_data
|
| 53 |
-
.filter(
|
| 54 |
-
(pl.col('pitId') == id) &
|
| 55 |
-
pl.col('x').is_not_null() &
|
| 56 |
-
pl.col('y').is_not_null() &
|
| 57 |
-
(pl.col('ballSpeed') > 0)
|
| 58 |
-
)
|
| 59 |
-
[['pitId', 'general_ballKind_code', 'ballKind_code', 'ballSpeed', 'x', 'y']]
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
-
pitcher_stats = (
|
| 63 |
-
source_data
|
| 64 |
-
.group_by('pitId')
|
| 65 |
-
.agg(
|
| 66 |
-
pl.col('pitcher_name').first(),
|
| 67 |
-
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 68 |
-
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
| 69 |
-
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
| 70 |
-
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 71 |
-
pl.first('qualified')
|
| 72 |
-
)
|
| 73 |
-
.explode('batType')
|
| 74 |
-
.unnest('batType')
|
| 75 |
-
.pivot(on='batType', values='proportion')
|
| 76 |
-
.fill_null(0)
|
| 77 |
-
.with_columns(
|
| 78 |
-
(pl.col('G') + pl.col('B')).alias('GB%'),
|
| 79 |
-
(pl.col('F') + pl.col('P')).alias('FB%'),
|
| 80 |
-
pl.col('L').alias('LD%'),
|
| 81 |
-
)
|
| 82 |
-
.drop('G', 'F', 'B', 'P', 'L')
|
| 83 |
-
.with_columns(
|
| 84 |
-
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=(stat == 'BB%'))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 85 |
-
for stat in ['CSW%', 'K%', 'BB%', 'GB%']
|
| 86 |
-
)
|
| 87 |
-
.filter(pl.col('pitId') == id)
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|
| 91 |
-
|
| 92 |
-
|
| 93 |
def get_card_data(id, **kwargs):
|
| 94 |
both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
|
| 95 |
pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
|
|
|
|
| 10 |
from types import SimpleNamespace
|
| 11 |
from datetime import date
|
| 12 |
|
|
|
|
| 13 |
from convert import ball_kind_code_to_color, get_text_color_from_color
|
| 14 |
+
from stats import get_pitcher_stats
|
| 15 |
|
| 16 |
|
| 17 |
mpl.use('Agg')
|
| 18 |
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def get_card_data(id, **kwargs):
|
| 21 |
both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
|
| 22 |
pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
|