Spaces:
Running
Running
Commit
·
65fefb5
1
Parent(s):
5cc9b28
Add more pitch stats
Browse files- data.py +4 -0
- pitch_leaderboard.py +4 -4
- stats.py +53 -4
data.py
CHANGED
|
@@ -241,6 +241,10 @@ data_df = (
|
|
| 241 |
(pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
|
| 242 |
(pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
|
| 243 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
)
|
| 245 |
|
| 246 |
if __name__ == '__main__':
|
|
|
|
| 241 |
(pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
|
| 242 |
(pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
|
| 243 |
)
|
| 244 |
+
.with_columns((pl.col('x').is_between(-60, 60) & pl.col('y').is_between(50, 50+150)).alias('zone'))
|
| 245 |
+
.with_columns((pl.col('x').is_between(-40, 40) & pl.col('y').is_between(75, 75+100)).alias('heart'))
|
| 246 |
+
.with_columns((pl.col('x').is_between(-80, 80) & pl.col('y').is_between(25, 25+200) & ~pl.col('heart')).alias('shadow'))
|
| 247 |
+
.with_columns((pl.col('x').is_between(-100, 101) & pl.col('y').is_between(0, 0+251) & ~pl.col('heart') & ~pl.col('shadow')).alias('chase'))
|
| 248 |
)
|
| 249 |
|
| 250 |
if __name__ == '__main__':
|
pitch_leaderboard.py
CHANGED
|
@@ -2,15 +2,15 @@ import gradio as gr
|
|
| 2 |
import polars as pl
|
| 3 |
|
| 4 |
from datetime import datetime
|
| 5 |
-
from itertools import chain
|
| 6 |
|
| 7 |
from data import data_df
|
| 8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
| 9 |
from convert import ball_kind
|
| 10 |
|
| 11 |
-
STATS = ['Count', 'Usage', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 12 |
-
PCT_STATS = ['Usage', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 13 |
-
STATS_WITH_PCTLS = ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 14 |
|
| 15 |
todo = '''
|
| 16 |
**To-do**
|
|
|
|
| 2 |
import polars as pl
|
| 3 |
|
| 4 |
from datetime import datetime
|
| 5 |
+
# from itertools import chain
|
| 6 |
|
| 7 |
from data import data_df
|
| 8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
| 9 |
from convert import ball_kind
|
| 10 |
|
| 11 |
+
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 12 |
+
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 13 |
+
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 14 |
|
| 15 |
todo = '''
|
| 16 |
**To-do**
|
stats.py
CHANGED
|
@@ -52,6 +52,48 @@ def compute_team_games(data):
|
|
| 52 |
)
|
| 53 |
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
| 56 |
assert player_type in ('pitcher', 'batter')
|
| 57 |
assert pitch_class_type in ('general', 'specific')
|
|
@@ -67,9 +109,16 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
| 67 |
pl.first(pitch_name_col),
|
| 68 |
pl.len().alias('count'),
|
| 69 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 70 |
-
(pl.col('
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
| 72 |
-
(pl.col('
|
|
|
|
| 73 |
)
|
| 74 |
.with_columns(
|
| 75 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
@@ -86,8 +135,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
| 86 |
)
|
| 87 |
.drop('G', 'F', 'B', 'P', 'L', 'null')
|
| 88 |
.with_columns(
|
| 89 |
-
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 90 |
-
for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 91 |
)
|
| 92 |
.rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
| 93 |
.sort(id_col, 'count', descending=[False, True])
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
|
| 55 |
+
# def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
| 56 |
+
# assert player_type in ('pitcher', 'batter')
|
| 57 |
+
# assert pitch_class_type in ('general', 'specific')
|
| 58 |
+
# id_col = 'pitId' if player_type == 'pitcher' else 'batId'
|
| 59 |
+
# pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
| 60 |
+
# pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
| 61 |
+
# pitch_stats = (
|
| 62 |
+
# data
|
| 63 |
+
# .group_by(id_col, pitch_col)
|
| 64 |
+
# .agg(
|
| 65 |
+
# pl.first('pitcher_name'),
|
| 66 |
+
# *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
| 67 |
+
# pl.first(pitch_name_col),
|
| 68 |
+
# pl.len().alias('count'),
|
| 69 |
+
# pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 70 |
+
# (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
| 71 |
+
# (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
| 72 |
+
# (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
|
| 73 |
+
# )
|
| 74 |
+
# .with_columns(
|
| 75 |
+
# (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
| 76 |
+
# (pl.col('count') >= min_pitches).alias('qualified')
|
| 77 |
+
# )
|
| 78 |
+
# .explode('batType')
|
| 79 |
+
# .unnest('batType')
|
| 80 |
+
# .pivot(on='batType', values='proportion')
|
| 81 |
+
# .fill_null(0)
|
| 82 |
+
# .with_columns(
|
| 83 |
+
# (pl.col('G') + pl.col('B')).alias('GB%'),
|
| 84 |
+
# (pl.col('F') + pl.col('P')).alias('FB%'),
|
| 85 |
+
# pl.col('L').alias('LD%').round(2),
|
| 86 |
+
# )
|
| 87 |
+
# .drop('G', 'F', 'B', 'P', 'L', 'null')
|
| 88 |
+
# .with_columns(
|
| 89 |
+
# (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 90 |
+
# for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 91 |
+
# )
|
| 92 |
+
# .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
| 93 |
+
# .sort(id_col, 'count', descending=[False, True])
|
| 94 |
+
# )
|
| 95 |
+
# return pitch_stats
|
| 96 |
+
|
| 97 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
| 98 |
assert player_type in ('pitcher', 'batter')
|
| 99 |
assert pitch_class_type in ('general', 'specific')
|
|
|
|
| 109 |
pl.first(pitch_name_col),
|
| 110 |
pl.len().alias('count'),
|
| 111 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 112 |
+
(pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
|
| 113 |
+
(pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
|
| 114 |
+
((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
|
| 115 |
+
((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
|
| 116 |
+
((pl.col('swing') & ~pl.col('whiff')).sum()/pl.col('swing').sum()).alias('Contact%'),
|
| 117 |
+
((pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(pl.col('zone') & pl.col('swing')).sum()).alias('Z-Contact%'),
|
| 118 |
+
((~pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(~pl.col('zone') & pl.col('swing')).sum()).alias('O-Contact%'),
|
| 119 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
| 120 |
+
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
| 121 |
+
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
| 122 |
)
|
| 123 |
.with_columns(
|
| 124 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
|
|
| 135 |
)
|
| 136 |
.drop('G', 'F', 'B', 'P', 'L', 'null')
|
| 137 |
.with_columns(
|
| 138 |
+
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=((stat in ['FB%', 'LD%'] or 'Contact%' in stat)))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 139 |
+
for stat in ['Zone%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 140 |
)
|
| 141 |
.rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
| 142 |
.sort(id_col, 'count', descending=[False, True])
|