Re-structured
This commit is contained in:
parent
513964d898
commit
c041e77ad7
4 changed files with 81 additions and 67 deletions
|
|
@ -1,67 +1,4 @@
|
||||||
#Bits and bobs used for many different projects. Junk drawer module
|
from .utils import *
|
||||||
import pandas
|
|
||||||
import numpy
|
|
||||||
|
|
||||||
|
__version__ = "0.1.1"
|
||||||
#Useful functions for parsing race-like durations, and normalizing their string reps
|
__all__ = 't2f ft2 CSG parse_WPS_rankings.split()
|
||||||
def t2f(S):
|
|
||||||
if not(S or False):
|
|
||||||
return None
|
|
||||||
m,s = ('0:%s' % S).split(':')[-2:]
|
|
||||||
try:
|
|
||||||
return numpy.round(60*int(m) + float(s),2)
|
|
||||||
except:
|
|
||||||
return -1
|
|
||||||
|
|
||||||
def f2t(S):
|
|
||||||
if not(S or False) or (int(''.join([c for c in '0'+str(S) if c.isdigit()])) == 0) or (S <= 0):
|
|
||||||
return ''
|
|
||||||
S = float(S)
|
|
||||||
m = int(S/60)
|
|
||||||
s = S-60*m
|
|
||||||
hs = s-int(s)
|
|
||||||
s = int(s)
|
|
||||||
return f'{m:02}:{s:02}.{round(hs*100):02}'
|
|
||||||
|
|
||||||
#Canada Summer Games eligible events
|
|
||||||
CSG = pandas.DataFrame([
|
|
||||||
{'Stroke':'Butterfly','Distance':50,'SportClass':[1,2,3,4,5,6,7]},
|
|
||||||
{'Stroke':'Butterfly','Distance':100,'SportClass':[8,9,10,11,12,13,14]},
|
|
||||||
{'Stroke':'Backstroke','Distance':50,'SportClass':[1,2,3,4,5]},
|
|
||||||
{'Stroke':'Backstroke','Distance':100,'SportClass':[1,2,6,7,8,9,10,11,12,13,14]},
|
|
||||||
{'Stroke':'Breaststroke','Distance':50,'SportClass':[1,2,3]},
|
|
||||||
{'Stroke':'Breaststroke','Distance':100,'SportClass':[4,5,6,7,8,9,11,12,13,14]},
|
|
||||||
{'Stroke':'Medley','Distance':150,'SportClass':[1,2,3,4]},
|
|
||||||
{'Stroke':'Medley','Distance':200,'SportClass':[5,6,7,8,9,10,11,12,13,14]},
|
|
||||||
{'Stroke':'Freestyle','Distance':50,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]},
|
|
||||||
{'Stroke':'Freestyle','Distance':100,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]},
|
|
||||||
{'Stroke':'Freestyle','Distance':200,'SportClass':[1,2,3,4,5,14]},
|
|
||||||
{'Stroke':'Freestyle','Distance':400,'SportClass':[6,7,8,9,10,11,12,13]},
|
|
||||||
]).explode('SportClass').reset_index(drop=True)
|
|
||||||
|
|
||||||
def parse_WPS_rankings(DF):
|
|
||||||
'''Takes a pandas dataframe, usually loaded from an excel file, and cleans it up for use'''
|
|
||||||
return (
|
|
||||||
DF
|
|
||||||
#Makes .dot notation easier
|
|
||||||
.pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns}))
|
|
||||||
|
|
||||||
#Drop VACANT events and relay results
|
|
||||||
.pipe(lambda df: df[df.SDMS_ID.notnull()])
|
|
||||||
|
|
||||||
#Parse out event information into useful columns
|
|
||||||
.pipe(lambda df: pd.concat([df,df.Event.str.extract(r"'s (?P<Distance>\d+) m (Individual )*(?P<Stroke>[A-z]+) [SBM]+(?P<SportClass>\d+)")],axis=1))
|
|
||||||
|
|
||||||
#Cleanup column dtypes
|
|
||||||
.assign(Date=lambda df: pd.to_datetime(df.Date))
|
|
||||||
.assign(Year=lambda df: df.Date.dt.year)
|
|
||||||
.assign(Seconds=lambda df: df.Result.apply(t2f))
|
|
||||||
.astype({'SportClass':int,'Distance':int,'SDMS_ID':int})
|
|
||||||
|
|
||||||
#Drop excess columns
|
|
||||||
.pipe(lambda df: df['Gender SportClass Distance Stroke Result Seconds SDMS_ID NPC Given_Name Family_Name Date'.split()])
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__version__ = "0.1.1"
|
|
||||||
|
|
|
||||||
67
ParaUtils/utils.py
Normal file
67
ParaUtils/utils.py
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
#Bits and bobs used for many different projects. Junk drawer module
|
||||||
|
import pandas
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
|
#Useful functions for parsing race-like durations, and normalizing their string reps
|
||||||
|
def t2f(S):
|
||||||
|
if not(S or False):
|
||||||
|
return None
|
||||||
|
m,s = ('0:%s' % S).split(':')[-2:]
|
||||||
|
try:
|
||||||
|
return numpy.round(60*int(m) + float(s),2)
|
||||||
|
except:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def f2t(S):
|
||||||
|
if not(S or False) or (int(''.join([c for c in '0'+str(S) if c.isdigit()])) == 0) or (S <= 0):
|
||||||
|
return ''
|
||||||
|
S = float(S)
|
||||||
|
m = int(S/60)
|
||||||
|
s = S-60*m
|
||||||
|
hs = s-int(s)
|
||||||
|
s = int(s)
|
||||||
|
return f'{m:02}:{s:02}.{round(hs*100):02}'
|
||||||
|
|
||||||
|
#Canada Summer Games eligible events
|
||||||
|
CSG = pandas.DataFrame([
|
||||||
|
{'Stroke':'Butterfly','Distance':50,'SportClass':[1,2,3,4,5,6,7]},
|
||||||
|
{'Stroke':'Butterfly','Distance':100,'SportClass':[8,9,10,11,12,13,14]},
|
||||||
|
{'Stroke':'Backstroke','Distance':50,'SportClass':[1,2,3,4,5]},
|
||||||
|
{'Stroke':'Backstroke','Distance':100,'SportClass':[1,2,6,7,8,9,10,11,12,13,14]},
|
||||||
|
{'Stroke':'Breaststroke','Distance':50,'SportClass':[1,2,3]},
|
||||||
|
{'Stroke':'Breaststroke','Distance':100,'SportClass':[4,5,6,7,8,9,11,12,13,14]},
|
||||||
|
{'Stroke':'Medley','Distance':150,'SportClass':[1,2,3,4]},
|
||||||
|
{'Stroke':'Medley','Distance':200,'SportClass':[5,6,7,8,9,10,11,12,13,14]},
|
||||||
|
{'Stroke':'Freestyle','Distance':50,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]},
|
||||||
|
{'Stroke':'Freestyle','Distance':100,'SportClass':[1,2,3,4,5,6,7,8,9,10,11,12,13,14]},
|
||||||
|
{'Stroke':'Freestyle','Distance':200,'SportClass':[1,2,3,4,5,14]},
|
||||||
|
{'Stroke':'Freestyle','Distance':400,'SportClass':[6,7,8,9,10,11,12,13]},
|
||||||
|
]).explode('SportClass').reset_index(drop=True)
|
||||||
|
|
||||||
|
def parse_WPS_rankings(DF):
|
||||||
|
'''Takes a pandas dataframe, usually loaded from an excel file, and cleans it up for use'''
|
||||||
|
return (
|
||||||
|
DF
|
||||||
|
#Makes .dot notation easier
|
||||||
|
.pipe(lambda df: df.rename(columns={c:c.replace(' ','_') for c in df.columns}))
|
||||||
|
|
||||||
|
#Drop VACANT events and relay results
|
||||||
|
.pipe(lambda df: df[df.SDMS_ID.notnull()])
|
||||||
|
|
||||||
|
#Parse out event information into useful columns
|
||||||
|
.pipe(lambda df: pd.concat([df,df.Event.str.extract(r"'s (?P<Distance>\d+) m (Individual )*(?P<Stroke>[A-z]+) [SBM]+(?P<SportClass>\d+)")],axis=1))
|
||||||
|
|
||||||
|
#Cleanup column dtypes
|
||||||
|
.assign(Date=lambda df: pd.to_datetime(df.Date))
|
||||||
|
.assign(Year=lambda df: df.Date.dt.year)
|
||||||
|
.assign(Seconds=lambda df: df.Result.apply(t2f))
|
||||||
|
.astype({'SportClass':int,'Distance':int,'SDMS_ID':int})
|
||||||
|
|
||||||
|
#Drop excess columns
|
||||||
|
.pipe(lambda df: df['Gender SportClass Distance Stroke Result Seconds SDMS_ID NPC Given_Name Family_Name Date'.split()])
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__version__ = "0.1.1"
|
||||||
|
|
@ -1,5 +1,9 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=45", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "ParaUtil"
|
name = "ParaUtils"
|
||||||
description = "Bits and bobs useful for many para swimming data manipulation projects"
|
description = "Bits and bobs useful for many para swimming data manipulation projects"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
|
||||||
6
setup.py
Normal file
6
setup.py
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="ParaUtils",
|
||||||
|
packages=find_packages(),
|
||||||
|
)
|
||||||
Loading…
Add table
Reference in a new issue