npb_data_app / plotting.py
patrickramos's picture
Add pitcher leaderboard
0b50ce4
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import transforms
from matplotlib.colors import LinearSegmentedColormap
import polars as pl
from pyfonts import load_google_font
from scipy.stats import gaussian_kde
import numpy as np
from PIL import Image
from types import SimpleNamespace
import datetime
from datetime import date
import os
from convert import ball_kind_code_to_color, get_text_color_from_color, team_names_short_to_color, get_text_color_from_team
from stats import get_pitcher_stats, filter_data_by_date_and_game_kind
def get_card_data(id, **kwargs):
both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
pitch_stats = both.pitch_stats.join(left.pitch_stats, on='ballKind_code', how='full', suffix='_left').join(right.pitch_stats, on='ballKind_code', how='full', suffix='_right').fill_null(0).sort('count', descending=True)
return SimpleNamespace(
pitcher_stats=pitcher_stats,
pitch_stats=pitch_stats,
both_pitch_shapes=both.pitch_shapes,
left_pitch_shapes=left.pitch_shapes,
right_pitch_shapes=right.pitch_shapes
)
def plot_arsenal(ax, pitches):
ax.set_xlim(0, 11)
x = np.arange(len(pitches)) + 0.5
y = np.zeros(len(pitches))
ax.scatter(x, y, c=[ball_kind_code_to_color.get(pitch, 'C0') for pitch in pitches], s=170)
for i, pitch in enumerate(pitches):
color = ball_kind_code_to_color.get(pitch, 'C0')
ax.text(x=i+0.5, y=0, s=pitch, horizontalalignment='center', verticalalignment='center', font=font, color=get_text_color_from_color(color))
def plot_usage(ax, usages):
left = 0
height = 0.8
for pitch, usage in usages.iter_rows():
color = ball_kind_code_to_color[pitch]
ax.barh(0, usage, height=height, left=left, color=color)
if usage > 0.1:
ax.text(left+usage/2, 0, f'{usage:.0%}', horizontalalignment='center', verticalalignment='center', size=8, font=font, color=get_text_color_from_color(color))
left += usage
ax.set_xlim(0, 1)
ax.set_ylim(-height/2, height/2*2.75)
x_range = np.arange(-100, 100+1)
y_range = np.arange(0, 250+1)
X, Y = np.meshgrid(x_range, y_range)
def fit_pred_kde(data, **kwargs):
kde = gaussian_kde(data, **kwargs)
Z = kde(np.concat((X, Y)).reshape(2, -1)).reshape(*X.shape)
return Z
def plot_loc(ax, locs):
ax.set_aspect('equal', adjustable='datalim')
ax.set_ylim(-52, 252)
ax.add_patch(plt.Rectangle((-100, 0), width=200, height=250, facecolor='darkgray', edgecolor='dimgray'))
ax.add_patch(plt.Rectangle((-80, 25), width=160, height=200, facecolor='gainsboro', edgecolor='dimgray'))
ax.add_patch(plt.Rectangle((-60, 50), width=120, height=150, fill=False, edgecolor='yellowgreen', linestyle=':'))
ax.add_patch(plt.Rectangle((-40, 75), width=80, height=100, facecolor='ivory', edgecolor='darkgray'))
ax.add_patch(plt.Polygon([(0, -10), (45, -30), (51, -50), (-51, -50), (-45, -30), (0, -10)], facecolor='snow', edgecolor='darkgray'))
for (pitch,), _locs in locs.sort(pl.len().over('general_ballKind_code'), descending=True).group_by('general_ballKind_code', maintain_order=True):
if len(_locs) <= 2:
continue
Z = fit_pred_kde(_locs[['x', 'y']].to_numpy().T)
Z = Z / Z.sum()
Z_flat = Z.ravel()
sorted_Z = np.sort(Z_flat)
sorted_Z_idxs = np.argsort(Z_flat)
Z_cumsum = (sorted_Z).cumsum()
t = Z_flat[sorted_Z_idxs[np.argmin(np.abs(Z_cumsum - (1-0.4)))]]
ax.contourf(X, Y, Z, levels=[t, 1], colors=ball_kind_code_to_color[pitch], alpha=0.5)
ax.contour(X, Y, Z, levels=t.reshape(1), colors=ball_kind_code_to_color[pitch], alpha=0.75)
def plot_velo(ax, velos):
trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
for (pitch,), _velos in velos.sort(pl.len().over('general_ballKind_code'), descending=True).group_by('general_ballKind_code', maintain_order=True):
_velos = _velos.filter(((pl.col('ballSpeed_mph') - pl.col('ballSpeed_mph').mean())/ pl.col('ballSpeed_mph').std()).abs() < 3)
if len(_velos) <= 1:
continue
violin = ax.violinplot(_velos['ballSpeed_mph'], orientation='horizontal', side='high', showextrema=False)
for _violin in violin['bodies']:
_violin.set_facecolor(ball_kind_code_to_color[pitch])
mean = _velos['ballSpeed_mph'].mean()
ax.text(mean, 0.5, round(mean), horizontalalignment='center', verticalalignment='center', color='gray', alpha=0.75, font=font, transform=trans)
stat_cmap = LinearSegmentedColormap.from_list('stat', colors=['dodgerblue', 'snow', 'crimson'])
def plot_pitch_stats(ax, stats, stat_names):
ax.set_aspect('equal', adjustable='datalim')
# axis_to_data = lambda coords: ax.transData.inverted().transform(ax.transAxes.transform(coords))
table = mpl.table.Table(ax)
rows = len(stat_names) + 1
cols = len(stats) + 1
cell_height = 1/rows
cell_width = 1/cols
for row, stat in enumerate(stat_names, start=1):
cell = table.add_cell(row=row, col=0, width=cell_width, height=cell_height, text=stat, loc='center', fontproperties=font, edgecolor='white')
for col, pitch in enumerate(stats['ballKind_code'], start=1):
color = ball_kind_code_to_color.get(pitch, 'C0')
cell = table.add_cell(row=0, col=col, width=cell_width, height=cell_height, text=pitch, loc='center', fontproperties=font, facecolor=color, edgecolor='white')
cell.get_text().set_color(get_text_color_from_color(color))
_stats = stats.filter(pl.col('ballKind_code') == pitch)
qualified = _stats['qualified'].item()
for row, stat_name in enumerate(stat_names, start=1):
stat = _stats[stat_name].item()
stat_pctl = _stats[f'{stat_name}_pctl'].item()
cell = table.add_cell(row=row, col=col, width=cell_width, height=cell_height, text=f'{stat:.0%}', loc='center', fontproperties=font, facecolor=(stat_cmap([0, stat_pctl, 1])[1] if qualified else 'gainsboro'), edgecolor='white')
if not qualified:
cell.get_text().set_color('gray')
ax.add_artist(table)
def plot_pitcher_stats(ax, stats, stat_names):
ax.set_aspect('equal', adjustable='datalim')
table = mpl.table.Table(ax)
cell_height = 1
cell_width = 1/(len(stat_names)*2)
qualified = stats['qualified'].item()
for i, stat_name in enumerate(stat_names):
stat = stats[stat_name].item()
stat_pctl = stats[f'{stat_name}_pctl'].item()
table.add_cell(row=0, col=i*2, width=cell_width, height=cell_height, text=stat_name, loc='center', fontproperties=font, edgecolor='white')
cell = table.add_cell(row=0, col=i*2+1, width=cell_width, height=cell_height, text=f'{stat:.0%}', loc='center', fontproperties=font, facecolor=(stat_cmap([0, stat_pctl, 1])[1] if qualified else 'gainsboro'), edgecolor='white')
if not qualified:
cell.get_text().set_color('gray')
ax.add_artist(table)
font = load_google_font('Saira Extra Condensed', weight='medium')
def create_pitcher_overview_card(id, season, dpi=300):
data = get_card_data(id, start_date=date(season, 1, 1), end_date=date(season, 12, 31), game_kind='Regular Season', min_pitches=100, pitch_class_type='general')
fig = plt.figure(figsize=(1080/300, 1350/300), dpi=dpi)
gs = fig.add_gridspec(8, 6, height_ratios=[1, 1, 1.5, 6, 1, 3, 1, 0.5])
title_ax = fig.add_subplot(gs[0, :])
title_ax.text(x=0, y=0, s=data.pitcher_stats['pitcher_name'].item().upper(), verticalalignment='baseline', font=font, size=20)
# title_ax.text(x=1, y=1, s='2021\n-2023', horizontalalignment='right', verticalalignment='top', font=font, size=8)
title_ax.text(x=0.95, y=0, s=season, horizontalalignment='right', verticalalignment='baseline', font=font, size=20)
title_ax.text(x=1, y=0.5, s='REG', horizontalalignment='right', verticalalignment='center', font=font, size=10, rotation='vertical')
arsenal_ax = fig.add_subplot(gs[1, :])
plot_arsenal(arsenal_ax, data.pitch_stats['ballKind_code'])
usage_l_ax = fig.add_subplot(gs[2, :3])
plot_usage(usage_l_ax, data.pitch_stats[['ballKind_code', 'usage_left']])
usage_l_ax.text(0, 1, 'LHH usage', horizontalalignment='left', verticalalignment='top', linespacing=0.5, color='gray', font=font, size=10, transform=usage_l_ax.transAxes)
usage_r_ax = fig.add_subplot(gs[2, 3:])
plot_usage(usage_r_ax, data.pitch_stats[['ballKind_code', 'usage_right']])
usage_r_ax.text(0, 1, 'RHH usage', horizontalalignment='left', verticalalignment='top', linespacing=0.5, color='gray', font=font, size=10, transform=usage_r_ax.transAxes)
loc_l_ax = fig.add_subplot(gs[3, :3])
loc_l_ax.text(0, 1, 'LHH\nloc', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=loc_l_ax.transAxes)
plot_loc(loc_l_ax, data.left_pitch_shapes)
loc_r_ax = fig.add_subplot(gs[3, 3:])
loc_r_ax.text(0, 1, 'RHH\nloc', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=loc_r_ax.transAxes)
plot_loc(loc_r_ax, data.right_pitch_shapes)
velo_ax = fig.add_subplot(gs[4, :])
plot_velo(velo_ax, data.both_pitch_shapes)
velo_ax.text(0, 1, 'Velo', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=velo_ax.transAxes)
pitch_stats_ax = fig.add_subplot(gs[5, :])
plot_pitch_stats(pitch_stats_ax, data.pitch_stats, ['CSW%', 'GB%'])
pitcher_stats_ax = fig.add_subplot(gs[6, :])
plot_pitcher_stats(pitcher_stats_ax, data.pitcher_stats, ['CSW%', 'K%', 'BB%', 'GB%'])
# k_ax = fig.add_subplot(gs[5, :2])
# plot_stat(k_ax, data.pitcher_stats, 'K%')
# bb_ax = fig.add_subplot(gs[5, 2:4])
# plot_stat(bb_ax, data.pitcher_s`tats, 'BB%')
# gb_ax = fig.add_subplot(gs[5, 4:])
# plot_stat(gb_ax, data.pitcher_stats, 'GB%')
credits_ax = fig.add_subplot(gs[7, :])
credits_ax.text(x=0, y=0.5, s='Data: SPAIA, Sanspo', verticalalignment='center', font=font, size=7)
credits_ax.text(x=1, y=0.5, s='@yakyucosmo', horizontalalignment='right', verticalalignment='center', font=font, size=7)
for ax in [
title_ax,
arsenal_ax,
usage_l_ax, usage_r_ax,
loc_l_ax, loc_r_ax,
velo_ax,
# k_ax, bb_ax, gb_ax,
pitch_stats_ax,
pitcher_stats_ax,
credits_ax
]:
ax.axis('off')
ax.tick_params(
axis='both',
which='both',
length=0,
labelbottom=False,
labelleft=False
)
return fig
# fig = create_card('1600153', season=2023, dpi=300)
# plt.show()
# DAILY/WEEKLY LEADERBOARDS
def get_whiff_leaderboard_data(data, leaders, include_date):
data = (
data
.group_by('pitId', 'pitcher_team_name_short', 'date')
.agg(
pl.col('pitcher_name').first(),
pl.col('whiff').sum().alias('Whiffs')
)
.sort(['Whiffs', 'pitcher_name'], descending=[True, False])
)
# if len(data) > 0:
# data = data.filter(pl.col('Whiffs') >= data['Whiffs'][min(leaders, len(data)-1)])
data = (
data
.rename({'pitcher_name': 'Player', 'pitcher_team_name_short': 'Team'})
.with_columns(
pl.col('date').dt.to_string('%m.%d').alias('Date'),
pl.col('Whiffs').rank(descending=True, method='min').alias('Rank')
)
[['Rank', 'Team', 'Player'] + (['Date'] if include_date else []) + ['Whiffs']]
# .with_row_index('Rank', 1)
)
# data = data.filter(pl.col('Rank') <= leaders)
data = data.filter(pl.col('Rank') <= data.group_by('Rank').agg(pl.len()).sort('Rank').filter(pl.col('len').cum_sum()>=leaders)['Rank'].min())
return data
def get_velo_leaderboard_data(data, leaders):
data = data.sort(['ballSpeed', 'pitcher_name'], descending=[True, False])
# if len(data) > 0:
# data = data.filter(pl.col('ballSpeed') >= data['ballSpeed'][min(leaders, len(data)-1)])
data = (
data
.rename({'ballSpeed': 'KPH', 'pitcher_name': 'Player', 'pitcher_team_name_short': 'Team'})
# .with_row_index('Rank', 1)
.with_columns(
(pl.col('KPH') / 1.609).round(1).alias('MPH'),
pl.col('KPH').rank(descending=True, method='min').alias('Rank')
)
[['Rank', 'Team', 'Player', 'KPH', 'MPH']]
)
# data = data.filter(pl.col('Rank') <= leaders)
data = data.filter(pl.col('Rank') <= data.group_by('Rank').agg(pl.len()).sort('Rank').filter(pl.col('len').cum_sum()>=leaders)['Rank'].min())
return data
def create_daily_weekly_leaderboard(stat, leaderboard_date, time_type, leaders, data):
font = load_google_font('Saira Extra Condensed', weight='medium')
bold_font = load_google_font('Saira Extra Condensed', weight='bold')
date_font = load_google_font('Lekton', weight='bold')
assert stat in ('velo', 'whiff')
assert time_type in ('daily', 'weekly')
if time_type == 'daily':
data = filter_data_by_date_and_game_kind(data, start_date=leaderboard_date, end_date=leaderboard_date)
else:
monday = leaderboard_date - datetime.timedelta(days=leaderboard_date.weekday())
sunday = leaderboard_date + datetime.timedelta(days=6-leaderboard_date.weekday())
data = filter_data_by_date_and_game_kind(data, start_date=monday, end_date=sunday)
leaderboard = get_velo_leaderboard_data(data, leaders) if stat == 'velo' else get_whiff_leaderboard_data(data, leaders, include_date=time_type=='weekly')
stats = [col for col in leaderboard.columns if col not in ['Rank', 'Team', 'Player']]
stat_col_lens = [1 if max(leaderboard[stat].cast(pl.String).str.len_chars().max() or 0, len(stat)) < 5 else 1.5 for stat in stats]
dpi = 300
fig = plt.figure(figsize=(1080/300, 1350/300), dpi=dpi)
gs = fig.add_gridspec(
max(len(leaderboard), 1)+2,
3+len(stats),
height_ratios=[1] + ([9/(len(leaderboard)+1)] * (len(leaderboard)+1) if len(leaderboard) else [1, 8]),
width_ratios=[1, 1, 8-sum(stat_col_lens)] + stat_col_lens
)
data_offset = 2
axs = []
def create_and_add_subplot(indexed_gs):
ax = fig.add_subplot(indexed_gs)
axs.append(ax)
return ax
title_ax = create_and_add_subplot(gs[0, :])
title_ax.text(0, 0.1, f'{time_type.upper()} {stat.upper()} LEADERBOARD', verticalalignment='baseline', font=bold_font, size=15)
if time_type == 'daily':
title_ax.text(1, 0.1, leaderboard_date.strftime('%Y.%m.%d (%a)'), verticalalignment='baseline', horizontalalignment='right', font=date_font, size=7)
else:
title_ax.text(1, 0.1, monday.strftime('%Y.%m.%d (%a)')+'\n-'+sunday.strftime('%Y.%m.%d (%a)'), verticalalignment='baseline', horizontalalignment='right', font=date_font, size=7)
rank_ax = create_and_add_subplot(gs[data_offset-1, 0])
rank_ax.text(0.5, 0, 'RANK', verticalalignment='bottom', horizontalalignment='center', font=bold_font)
team_ax = create_and_add_subplot(gs[data_offset-1, 1])
team_ax.text(0.5, 0, 'TEAM', verticalalignment='bottom', horizontalalignment='center', font=bold_font)
player_ax = create_and_add_subplot(gs[data_offset-1, 2])
player_ax.text(0, 0, 'PLAYER', verticalalignment='bottom', font=bold_font)
for col, stat in enumerate(stats, start=3):
stat_ax = create_and_add_subplot(gs[data_offset-1, col])
stat_ax.text(0.5, 0, stat.upper(), verticalalignment='bottom', horizontalalignment='center', font=bold_font)
midline_ax = create_and_add_subplot(gs[data_offset-1, :])
midline_ax.add_patch(plt.Rectangle((0, 0), 1, 0.01, color='black'))
if len(leaderboard) == 0:
blank_ax = create_and_add_subplot(gs[data_offset:])
for i, row in enumerate(leaderboard.iter_rows()):
rank, team, player, *stats = row
rank_ax = create_and_add_subplot(gs[i+data_offset, 0])
rank_ax.text(0.5, 0.5, rank, verticalalignment='center_baseline', horizontalalignment='center', font=font)
team_ax = create_and_add_subplot(gs[i+data_offset, 1])
image = Image.open(os.path.join('assets', 'white_insignias', f'{team.lower()}.png'))
w, h = image.size
new_longer_side = 512
if w > h:
w, h = (new_longer_side, round(h*new_longer_side/w))
else:
w, h = (round(w*new_longer_side/h), new_longer_side)
image = image.resize((w, h))
ax_s = 512*1.5
team_ax.set_xlim(0, ax_s)
team_ax.set_ylim(0, ax_s)
image = np.array(image)
circle = plt.Circle((ax_s/2, ax_s/2), radius=ax_s/2, color=team_names_short_to_color[team], clip_on=False, zorder=1)
team_ax.add_patch(circle)
team_ax.imshow(
image[..., -1],
cmap=LinearSegmentedColormap.from_list('tmp', [team_names_short_to_color[team], 'black' if team in ('Lotte', 'Hanshin') else 'white']),
extent=((ax_s-w)/2, ax_s-(ax_s-w)/2, (ax_s-h)/2, ax_s-(ax_s-h)/2),
zorder=2
)
player_ax = create_and_add_subplot(gs[i+data_offset, 2])
player_ax.text(0.02, 0.5, player.upper(), verticalalignment='center_baseline', font=font, color=get_text_color_from_team(team))
player_ax.add_patch(plt.Polygon([(0, 0), (0.98, 0), (1, 0.5), (1, 1), (0, 1)], color=team_names_short_to_color[team], clip_on=False))
for col, stat in enumerate(stats, start=3):
stat_ax = create_and_add_subplot(gs[i+data_offset, col])
stat_ax.text(0.5, 0.5, stat, verticalalignment='center_baseline', horizontalalignment='center', font=font)
for ax in axs:
ax.axis('off')
ax.tick_params(
axis='both',
which='both',
length=0,
labelbottom=False,
labelleft=False
)
return fig