From c041e77ad7569ef8419979f79026d411215077b1 Mon Sep 17 00:00:00 2001 From: edeymc Date: Thu, 6 Nov 2025 09:25:12 -0800 Subject: [PATCH] Re-structured --- ParaUtils/__init__.py | 69 ++----------------------------------------- ParaUtils/utils.py | 67 +++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 6 +++- setup.py | 6 ++++ 4 files changed, 81 insertions(+), 67 deletions(-) create mode 100644 ParaUtils/utils.py create mode 100644 setup.py diff --git a/ParaUtils/__init__.py b/ParaUtils/__init__.py index 8914953..2ce85d1 100644 --- a/ParaUtils/__init__.py +++ b/ParaUtils/__init__.py @@ -1,67 +1,4 @@ -#Bits and bobs used for many different projects. Junk drawer module -import pandas -import numpy +from .utils import * - -#Useful functions for parsing race-like durations, and normalizing their string reps -def t2f(S): - if not(S or False): - return None - m,s = ('0:%s' % S).split(':')[-2:] - try: - return numpy.round(60*int(m) + float(s),2) - except: - return -1 - -def f2t(S): - if not(S or False) or (int(''.join([c for c in '0'+str(S) if c.isdigit()])) == 0) or (S <= 0): - return '' - S = float(S) - m = int(S/60) - s = S-60*m - hs = s-int(s) - s = int(s) - return f'{m:02}:{s:02}.{round(hs*100):02}' - -#Canada Summer Games eligible events -CSG = pandas.DataFrame([ - {'Stroke':'Butterfly','Distance':50,'SportClass':[1,2,3,4,5,6,7]}, - {'Stroke':'Butterfly','Distance':100,'SportClass':[8,9,10,11,12,13,14]}, - {'Stroke':'Backstroke','Distance':50,'SportClass':[1,2,3,4,5]}, - {'Stroke':'Backstroke','Distance':100,'SportClass':[1,2,6,7,8,9,10,11,12,13,14]}, - {'Stroke':'Breaststroke','Distance':50,'SportClass':[1,2,3]}, - {'Stroke':'Breaststroke','Distance':100,'SportClass':[4,5,6,7,8,9,11,12,13,14]}, - {'Stroke':'Medley','Distance':150,'SportClass':[1,2,3,4]}, - {'Stroke':'Medley','Distance':200,'SportClass':[5,6,7,8,9,10,11,12,13,14]}, - {'Stroke':'Freestyle','Distance':50,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]}, - {'Stroke':'Freestyle','Distance':100,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]}, - {'Stroke':'Freestyle','Distance':200,'SportClass':[1,2,3,4,5,14]}, - {'Stroke':'Freestyle','Distance':400,'SportClass':[6,7,8,9,10,11,12,13]}, -]).explode('SportClass').reset_index(drop=True) - -def parse_WPS_rankings(DF): - '''Takes a pandas dataframe, usually loaded from an excel file, and cleans it up for use''' - return ( - DF - #Makes .dot notation easier - .pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns})) - - #Drop VACANT events and relay results - .pipe(lambda df: df[df.SDMS_ID.notnull()]) - - #Parse out event information into useful columns - .pipe(lambda df: pd.concat([df,df.Event.str.extract(r"'s (?P\d+) m (Individual )*(?P[A-z]+) [SBM]+(?P\d+)")],axis=1)) - - #Cleanup column dtypes - .assign(Date=lambda df: pd.to_datetime(df.Date)) - .assign(Year=lambda df: df.Date.dt.year) - .assign(Seconds=lambda df: df.Result.apply(t2f)) - .astype({'SportClass':int,'Distance':int,'SDMS_ID':int}) - - #Drop excess columns - .pipe(lambda df: df['Gender SportClass Distance Stroke Result Seconds SDMS_ID NPC Given_Name Family_Name Date'.split()]) - ) - - - -__version__ = "0.1.1" \ No newline at end of file +__version__ = "0.1.1" +__all__ = 't2f ft2 CSG parse_WPS_rankings.split() diff --git a/ParaUtils/utils.py b/ParaUtils/utils.py new file mode 100644 index 0000000..8914953 --- /dev/null +++ b/ParaUtils/utils.py @@ -0,0 +1,67 @@ +#Bits and bobs used for many different projects. Junk drawer module +import pandas +import numpy + + +#Useful functions for parsing race-like durations, and normalizing their string reps +def t2f(S): + if not(S or False): + return None + m,s = ('0:%s' % S).split(':')[-2:] + try: + return numpy.round(60*int(m) + float(s),2) + except: + return -1 + +def f2t(S): + if not(S or False) or (int(''.join([c for c in '0'+str(S) if c.isdigit()])) == 0) or (S <= 0): + return '' + S = float(S) + m = int(S/60) + s = S-60*m + hs = s-int(s) + s = int(s) + return f'{m:02}:{s:02}.{round(hs*100):02}' + +#Canada Summer Games eligible events +CSG = pandas.DataFrame([ + {'Stroke':'Butterfly','Distance':50,'SportClass':[1,2,3,4,5,6,7]}, + {'Stroke':'Butterfly','Distance':100,'SportClass':[8,9,10,11,12,13,14]}, + {'Stroke':'Backstroke','Distance':50,'SportClass':[1,2,3,4,5]}, + {'Stroke':'Backstroke','Distance':100,'SportClass':[1,2,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Breaststroke','Distance':50,'SportClass':[1,2,3]}, + {'Stroke':'Breaststroke','Distance':100,'SportClass':[4,5,6,7,8,9,11,12,13,14]}, + {'Stroke':'Medley','Distance':150,'SportClass':[1,2,3,4]}, + {'Stroke':'Medley','Distance':200,'SportClass':[5,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Freestyle','Distance':50,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Freestyle','Distance':100,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]}, + {'Stroke':'Freestyle','Distance':200,'SportClass':[1,2,3,4,5,14]}, + {'Stroke':'Freestyle','Distance':400,'SportClass':[6,7,8,9,10,11,12,13]}, +]).explode('SportClass').reset_index(drop=True) + +def parse_WPS_rankings(DF): + '''Takes a pandas dataframe, usually loaded from an excel file, and cleans it up for use''' + return ( + DF + #Makes .dot notation easier + .pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns})) + + #Drop VACANT events and relay results + .pipe(lambda df: df[df.SDMS_ID.notnull()]) + + #Parse out event information into useful columns + .pipe(lambda df: pd.concat([df,df.Event.str.extract(r"'s (?P\d+) m (Individual )*(?P[A-z]+) [SBM]+(?P\d+)")],axis=1)) + + #Cleanup column dtypes + .assign(Date=lambda df: pd.to_datetime(df.Date)) + .assign(Year=lambda df: df.Date.dt.year) + .assign(Seconds=lambda df: df.Result.apply(t2f)) + .astype({'SportClass':int,'Distance':int,'SDMS_ID':int}) + + #Drop excess columns + .pipe(lambda df: df['Gender SportClass Distance Stroke Result Seconds SDMS_ID NPC Given_Name Family_Name Date'.split()]) + ) + + + +__version__ = "0.1.1" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1acbd15..8098176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,9 @@ +[build-system] +requires = ["setuptools>=45", "wheel"] +build-backend = "setuptools.build_meta" + [project] -name = "ParaUtil" +name = "ParaUtils" description = "Bits and bobs useful for many para swimming data manipulation projects" readme = "README.md" license = "MIT" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ba3eb46 --- /dev/null +++ b/setup.py @@ -0,0 +1,6 @@ +from setuptools import setup, find_packages + +setup( + name="ParaUtils", + packages=find_packages(), +) \ No newline at end of file