From 749c8d21b091ac4a3763cfee2a21eb9272617fe5 Mon Sep 17 00:00:00 2001 From: edeymc Date: Wed, 5 Nov 2025 18:35:25 -0800 Subject: [PATCH] Init commit --- ParaUtils/__init__.py | 60 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 ParaUtils/__init__.py diff --git a/ParaUtils/__init__.py b/ParaUtils/__init__.py new file mode 100644 index 0000000..dfa608c --- /dev/null +++ b/ParaUtils/__init__.py @@ -0,0 +1,60 @@ +#Bits and bobs used for many different projects. Junk drawer module + +#Useful functions for parsing race-like durations, and normalizing their string reps +def t2f(S): + if not(S or False): + return None + m,s = ('0:%s' % S).split(':')[-2:] + try: + return np.round(60*int(m) + float(s),2) + except: + return -1 + +def f2t(S): + if not(S or False) or (int(''.join([c for c in '0'+str(S) if c.isdigit()])) == 0) or (S <= 0): + return '' + S = float(S) + m = int(S/60) + s = S-60*m + hs = s-int(s) + s = int(s) + return f'{m:02}:{s:02}.{round(hs*100):02}' + +#Canada Summer Games eligible events +CSG = pd.DataFrame([ + {'Stroke':'Butterfly','Distance':50,'SportClass':[1,2,3,4,5,6,7]}, + {'Stroke':'Butterfly','Distance':100,'SportClass':[8,9,10,11,12,13,14]}, + {'Stroke':'Backstroke','Distance':50,'SportClass':[1,2,3,4,5]}, + {'Stroke':'Backstroke','Distance':100,'SportClass':[1,2,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Breaststroke','Distance':50,'SportClass':[1,2,3]}, + {'Stroke':'Breaststroke','Distance':100,'SportClass':[4,5,6,7,8,9,11,12,13,14]}, + {'Stroke':'Medley','Distance':150,'SportClass':[1,2,3,4]}, + {'Stroke':'Medley','Distance':200,'SportClass':[5,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Freestyle','Distance':50,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Freestyle','Distance':100,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Freestyle','Distance':200,'SportClass':[1,2,3,4,5,14]}, + {'Stroke':'Freestyle','Distance':400,'SportClass':[6,7,8,9,10,11,12,13]}, +]).explode('SportClass').reset_index(drop=True) + +def parse_WPS_rankings(DF): + '''Takes a pandas datafram, usually loaded from an excel file, and cleans it up for use''' + return ( + DF + #Makes .dot notation easier + .pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns})) + + #Drop VACANT events and relay results + .pipe(lambda df: df[df.SDMS_ID.notnull()]) + + #Parse out event information into useful columns + .pipe(lambda df: pd.concat([df,df.Event.str.extract(r"'s (?P\d+) m (Individual )*(?P[A-z]+) [SBM]+(?P\d+)")],axis=1)) + + #Cleanup column dtypes + .assign(Date=lambda df: pd.to_datetime(df.Date)) + .assign(Year=lambda df: df.Date.dt.year) + .assign(Seconds=lambda df: df.Result.apply(t2f)) + .astype({'SportClass':int,'Distance':int,'SDMS_ID':int}) + + #Drop excess columns + .pipe(lambda df: df['Gender SportClass Distance Stroke Result Seconds SDMS_ID NPC Given_Name Family_Name Date'.split()]) + ) \ No newline at end of file