File:Running times 41Berlin marathon cumulative.svg
Summary
Description |
English: Cumulative distribution of running times of all successful participants of the 41. Berlin Marathon from 2h to 7h. Male and female participants are drawn as two separate curves. |
Date | |
Source | Own work |
Author | Geek3 |
Other versions | Running times 41Berlin marathon.svg |
Source code
Python code
Python source code |
---|
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pylab as pl
import urllib
import StringIO
import os
import re
from lxml import etree
website = 'http://results.scc-events.com/2014/?event=MAL&pid=list&num_results=100'
def fetch_times_from_page(pagename):
page = urllib.urlopen(pagename)
page_text = page.read()
page_xml = etree.parse(StringIO.StringIO(page_text), etree.HTMLParser())
times_txt = []
for element in page_xml.iter('tr'):
if len(element) == 10:
t_text = element[8].text
match = re.match('\A(\d{2}):(\d{2}):(\d{2})\Z', t_text)
if match == None:
continue
hms = match.groups()
if len(hms) == 3:
times_txt.append(t_text)
return times_txt
def fetch_list(pagename):
print 'fetching data from website'
times_txt = []
i = 0
while True:
print 'fetching set', i
new_times = fetch_times_from_page(pagename + '&page=' + str(i))
if len(new_times) == 0:
print 'no more new entries on page', i
break
else:
times_txt = times_txt + new_times
i += 1
return times_txt
times = {}
for gender in ['M', 'W']:
fname = 'marathon40_' + gender + '.txt'
if not os.path.isfile(fname):
# have no local file -> need to download data
pagename = website + '&search[se' + 'x]=' + gender
times_list = fetch_list(pagename)
f = open(fname, 'w')
f.write('\n'.join(times_list))
f.close()
f = open(fname, 'r')
times[gender] = []
for line in f.readlines():
match = re.match('\A(\d{2}):(\d{2}):(\d{2})\Z', line.strip())
if match == None:
continue
hms = match.groups()
if len(hms) == 3:
t = float(hms[0]) + float(hms[1]) / 60. + float(hms[2]) / 3600.
times[gender].append(t)
f.close()
pl.figure(figsize=(50/9., 35/9.))
pl.rc('path', simplify_threshold=0.5)
pl.plot(sorted(times['M']), np.linspace(0, 1, len(times['M'])),
'b-', label='m', linewidth=3)
pl.plot(sorted(times['W']), np.linspace(0, 1, len(times['W'])),
'r-', label='w', linewidth=3)
pl.xlim(2, 7)
pl.xlabel('t [h]')
pl.ylabel('F')
pl.title('41. Berlin Marathon running times')
pl.grid(True)
pl.legend(loc='center right')
pl.tight_layout()
pl.savefig('running_times_41Berlin_marathon_cumulative.svg')
|
Licensing
I, the copyright holder of this work, hereby publish it under the following license:
This file is licensed under the Creative Commons Attribution 3.0 Unported license.
- You are free:
- to share – to copy, distribute and transmit the work
- to remix – to adapt the work
- Under the following conditions:
- attribution – You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.