Added AWR & Scoring modules

This commit is contained in:
edeymc 2025-11-08 15:30:57 -08:00
parent 9a846094ec
commit 5e5d1d8fed
27 changed files with 245 additions and 8 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

BIN
ParaUtils/.DS_Store vendored Normal file

Binary file not shown.

BIN
ParaUtils/AWR/.DS_Store vendored Normal file

Binary file not shown.

25
ParaUtils/AWR/__init__.py Normal file
View file

@ -0,0 +1,25 @@
import pandas as pd
import numpy as np
from ParaUtils import parse_WPS_rankings
from ParaUtils.Standards import LA2028_Events
from pathlib import Path
module_dir = Path(__file__).parent
def set_AWR(DF):
CAN = DF[DF.NPC == 'CAN']
NR4 = DF[(DF.NPC_Rank > 3) & ~DF.index.isin(CAN.index)]
pool = DF[~(DF.index.isin(CAN.index) | DF.index.isin(NR4.index))]
pool = pool.assign(AWR=pool.Seconds.apply(lambda S: 1+len(pool[pool.Seconds < S])))
return (
pd.concat([
CAN.assign(AWR='CAN'),
NR4.assign(AWR='NR > 3'),
pool
])
.sort_values('Seconds AWR'.split())
)
Current_AWR = (
pd.read_excel(module_dir / '2025 Fall Performance AWR - 2025-10-07.xlsx')
)

BIN
ParaUtils/AWR/year2025/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,48 @@
import pandas as pd
import numpy as np
from ParaUtils import parse_WPS_rankings
from ParaUtils.Standards import LA2028_Events
from ParaUtils.AWR import set_AWR
from pathlib import Path
module_dir = Path(__file__).parent
ML = (
pd.read_excel(module_dir / '2025_10_07 World Para Swimming Online Classification Master List.xlsx',header=8)
.pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns}))
.rename(columns={'ID':'SDMS_ID'})
.loc[:,'NPC Given_Name Family_Name SDMS_ID Gender S SB SM'.split()]
.assign(S=lambda df: df.S.apply(lambda S: int(''.join([c for c in '0'+str(S) if c.isdigit()]))))
.assign(SB=lambda df: df.SB.apply(lambda S: int(''.join([c for c in '0'+str(S) if c.isdigit()]))))
.assign(SM=lambda df: df.SM.apply(lambda S: int(''.join([c for c in '0'+str(S) if c.isdigit()]))))
)
AWR = parse_WPS_rankings(pd.read_excel(module_dir / '2025_10_07 World Para Swimming Online Rankings.xlsx',header=8))
AWR = (
AWR
.pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns}))
#Update SportClass with MasterList Info
.merge(ML['SDMS_ID S SB SM'.split()],on='SDMS_ID',how='inner')
.assign(Category=lambda df: df.Stroke.apply(lambda S: {'Breaststroke':'SB','Medley':'SM'}.get(S,'S')))
.groupby('Category',group_keys=False)
.apply(lambda df: df.assign(SportClass=df.loc[:,df.Category.values[0]]))
#Group by LA2028 Events
.merge(LA2028_Events,on='Gender SportClass Stroke Distance'.split(),how='inner')
#Rank by NPC
.groupby('Gender EventClass Stroke Distance NPC'.split(),group_keys=False)
.apply(lambda df: df.assign(NPC_Rank=df.Seconds.apply(lambda S: 1+len(df[df.Seconds < S]))))
#Set AWR, 3 per NPC excluding Canadians
.groupby('Gender EventClass Stroke Distance'.split(),group_keys=False)
.apply(set_AWR)
#Set 2/3 threshold for DEV Cards
.groupby('Gender EventClass Stroke Distance'.split(),group_keys=False)
.apply(lambda df: df.assign(TopTwoThirds=int(np.floor(len(df)*2/3))))
#Sort
.sort_values('Gender Stroke Distance EventClass Seconds AWR'.split())
)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,89 @@
import pandas as pd
import numpy as np
from ParaUtils import parse_WPS_rankings
from ParaUtils.Standards import WC2025_Events
from ParaUtils.AWR import set_AWR
from pathlib import Path
module_dir = Path(__file__).parent
fnames = ['Rankings 2023.xlsx','Rankings 2024.xlsx','Rankings 2025 2025-04-02.xlsx']
raw = (
pd.concat([
pd.read_excel(module_dir / fname,header=8)
for fname in fnames
])
.pipe(lambda df: parse_WPS_rankings(df))
#best results per athlete regardless of sportclass at performance
.sort_values('Seconds')
.groupby('SDMS_ID Stroke Distance'.split())
.head(1)
#select columns
.pipe(lambda df: df['Gender SportClass Distance Stroke Result Seconds SDMS_ID NPC Given_Name Family_Name Date'.split()])
#set helper column Cat
.assign(Cat=lambda df: df.Stroke.apply(lambda S: {'Breaststroke':'SB','Medley':'SM'}.get(S,'S')))
)
ML = (
pd.concat([
pd.read_excel(module_dir / 'Masterlist 2024.xlsx',header=8).assign(Year=2024),
pd.read_excel(module_dir / 'Masterlist 2025 2025-04-02.xlsx',header=8).assign(Year=2025),
])
.reset_index(drop=True)
.pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns}))
.rename(columns={'ID':'SDMS_ID','Status':'Status_S','Status.1':'Status_SB','Status.2':'Status_SM'})
.sort_values('Year')
.groupby('SDMS_ID')
.tail(1)
.pipe(lambda df: df['SDMS_ID Year S SB SM Status_S Status_SB Status_SM'.split()])
.assign(Helper=lambda df: df.Status_S.astype(str).str.upper().str.contains('N') | df.Status_SB.astype(str).str.upper().str.contains('N') | df.Status_SM.astype(str).str.upper().str.contains('N'))
.pipe(lambda df: df[~df.Helper])
.melt(id_vars='Helper SDMS_ID Year'.split())
.pipe(lambda df: df[~df.value.isin('NE CNC'.split())])
.dropna()
.assign(Cat=lambda df: df.variable.apply(lambda S: S.split('_')[-1]))
.assign(Type=lambda df: df.variable.replace('S','SportClass_S').replace('SB','SportClass_SB').replace('SM','SportClass_SM').apply(lambda S: S.split('_')[0]))
.pipe(lambda df: pd.pivot_table(df,index='SDMS_ID Year Cat'.split(),columns='Type',values='value',aggfunc=min))
.dropna()
.reset_index()
.assign(SportClass=lambda df: df.SportClass.apply(lambda S: int(''.join([c for c in str(S) if c.isdigit()]))))
.drop('Status',axis=1)
.pipe(lambda df: pd.concat([df,(
raw
.pipe(lambda df: df[df.Date.dt.year == 2023])
.assign(Year=2023)
.pipe(lambda df: df['SDMS_ID Year Cat SportClass'.split()])
)]))
.sort_values('Year')
.groupby('SDMS_ID Cat'.split())
.tail(1)
)
AWR = (
raw
#set sportclass to most recent ML or results from 2023
.merge(ML,on='SDMS_ID Cat'.split(),how='inner')
.drop('SportClass_x',axis=1)
.rename(columns={'SportClass_y':'SportClass'})
#filter to 2025 WC events including swimups
.merge(WC2025_Events,on='Gender SportClass Stroke Distance'.split(),how='inner')
#best performance per event per swimmer per eventclass
.sort_values('Seconds')
.groupby('SDMS_ID EventClass Stroke Distance'.split())
.head(1)
#set National Rankings
.groupby('NPC Gender EventClass Stroke Distance'.split(),group_keys=False)
.apply(lambda df: df.assign(NPC_Rank=df.Seconds.apply(lambda S: 1+len(df[df.Seconds < S]))))
#set AWR: 3 per country, no Canadians
.groupby('Gender EventClass Stroke Distance'.split(),group_keys=False)
.apply(set_AWR)
.sort_values('Gender Stroke Distance SportClass Seconds'.split())
)

View file

@ -0,0 +1,39 @@
import pandas as pd
import numpy as np
from pathlib import Path
module_dir = Path(__file__).parent
SNC_Seeds = (
#pd.read_excel(module_dir / 'SNC_Seeds.xlsx')
pd.read_excel(module_dir / '2025 Swimming Canada Para Points Calculator.xlsx')
.assign(SportClass=lambda df: df.SportClass.astype(str))
)
def SNC_Score(DF):
Score = lambda x,scale: 1000*(((1-np.exp(-0.4*(np.exp(scale*x)-1)))/0.95)**3)
prepped = (
DF
.merge(SNC_Seeds,on='Gender Course Stroke Distance SportClass'.split(),how='left')
.assign(V=lambda df: df.Distance/df.Seconds)
)
return (
prepped
.assign(Score=lambda df: df.apply(lambda S: Score(S.V,S.Scale),axis=1).fillna(0).astype(int))
.drop('Scale',axis=1)
)
WPS_Seeds = (
pd.read_excel(module_dir / '2024 World Para Swimming Points Calculator.xlsx',sheet_name='Parameters')
.assign(Gender=lambda df: df.Gender.apply(lambda S: S[0]))
.pipe(lambda df: pd.concat([df,df.Event.str.extract(r"(?P<Distance>\d+) m (Individual )*(?P<Stroke>[A-z]+)")],axis=1))
.assign(SportClass=lambda df: df.Class.apply(lambda S: ''.join([c for c in str(S) if c.isdigit()])))
.astype({'Distance':int})
.assign(Course='LCM')
.pipe(lambda df: df['Gender Course SportClass Distance Stroke a b c'.split()])
)
def WPS_Score(DF):
return (
DF.merge(WPS_Seeds,on='Gender Course SportClass Distance Stroke'.split(),how='left')
.assign(WPS_Score=lambda df: (df.a*np.exp(-np.exp(df.b-(df.c/df.Seconds)))).fillna(0).astype(int))
.drop('a b c'.split(),axis=1)
)

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,40 @@
import pandas as pd
import numpy as np
from pathlib import Path
module_dir = Path(__file__).parent
def add_possible_swimups(DF):
ec = DF.EventClass.values[0]
if ec in [2,3,4,5,6,7,8,9,10,12,13]:
sc = [ec,ec-1]
else:
sc = [ec]
return DF.assign(SportClass=[sc]*len(DF))
def set_swimups(DF):
inclass = DF[DF.EventClass == DF.SportClass]
swimup = DF[~DF.index.isin(inclass) & ~DF.SportClass.isin(DF.EventClass)]
return pd.concat([inclass,swimup])
WC2025_MQS_MET = (
pd.read_excel(module_dir / '2025 WPS WC MQS and METs.xlsx')
)
WC2025_Events = WC2025_MQS_MET.drop('MQS MET'.split(),axis=1)
LA2028_Events = (
pd.read_excel(module_dir / 'Draft LA2028 Events.xlsx')
.ffill()
.groupby('EventClass',group_keys=False)
.apply(add_possible_swimups)
.explode('SportClass')
.reset_index(drop=True)
.groupby('Gender Distance Stroke'.split(),group_keys=False)
.apply(set_swimups)
.sort_values('Gender Stroke Distance EventClass SportClass'.split())
.reset_index(drop=True)
.astype({'Distance':int,'EventClass':int,'SportClass':int})
)
__all__ = 'WC2025_MQS_MET WC2025_Events LA2028_Events'.split()

Binary file not shown.

Binary file not shown.

View file

@ -1,6 +1,6 @@
#Bits and bobs used for many different projects. Junk drawer module
import pandas
import numpy
import pandas as pd
import numpy as np
#Useful functions for parsing race-like durations, and normalizing their string reps
@ -9,7 +9,7 @@ def t2f(S):
return None
m,s = ('0:%s' % S).split(':')[-2:]
try:
return numpy.round(60*int(m) + float(s),2)
return np.round(60*int(m) + float(s),2)
except:
return -1
@ -24,7 +24,7 @@ def f2t(S):
return f'{m:02}:{s:02}.{round(hs*100):02}'
#Canada Summer Games eligible events
CSG = pandas.DataFrame([
CSG = pd.DataFrame([
{'Stroke':'Butterfly','Distance':50,'SportClass':[1,2,3,4,5,6,7]},
{'Stroke':'Butterfly','Distance':100,'SportClass':[8,9,10,11,12,13,14]},
{'Stroke':'Backstroke','Distance':50,'SportClass':[1,2,3,4,5]},
@ -61,7 +61,3 @@ def parse_WPS_rankings(DF):
#Drop excess columns
.pipe(lambda df: df['Gender SportClass Distance Stroke Result Seconds SDMS_ID NPC Given_Name Family_Name Date'.split()])
)
__version__ = "0.1.1"