File:Running times 41Berlin marathon cumulative.svg

Summary

Description
English: Cumulative distribution of running times of all successful participants of the 41. Berlin Marathon from 2h to 7h. Male and female participants are drawn as two separate curves.
Date
Source Own work
Author Geek3
Other versions Running times 41Berlin marathon.svg

Source code

Python code

Python source code
#!/usr/bin/python
# -*- coding: utf8 -*-

import numpy as np
import pylab as pl
import urllib
import StringIO
import os
import re
from lxml import etree

website = 'http://results.scc-events.com/2014/?event=MAL&pid=list&num_results=100'

def fetch_times_from_page(pagename):
    page = urllib.urlopen(pagename)
    page_text = page.read()
    page_xml = etree.parse(StringIO.StringIO(page_text), etree.HTMLParser())
    
    times_txt = []
    for element in page_xml.iter('tr'):
        if len(element) == 10:
            t_text = element[8].text
            match = re.match('\A(\d{2}):(\d{2}):(\d{2})\Z', t_text)
            if match == None:
                continue
            hms = match.groups()
            if len(hms) == 3:
                times_txt.append(t_text)
    return times_txt

def fetch_list(pagename):
    print 'fetching data from website'
    times_txt = []
    i = 0
    while True:
        print 'fetching set', i
        new_times = fetch_times_from_page(pagename + '&page=' + str(i))
        if len(new_times) == 0:
            print 'no more new entries on page', i
            break
        else:
            times_txt = times_txt + new_times
        i += 1
    return times_txt

times = {}
for gender in ['M', 'W']:

    fname = 'marathon40_' + gender + '.txt'
    if not os.path.isfile(fname):
        # have no local file -> need to download data
        pagename = website + '&search[se' + 'x]=' + gender
        times_list = fetch_list(pagename)
        f = open(fname, 'w')
        f.write('\n'.join(times_list))
        f.close()
    
    f = open(fname, 'r')
    times[gender] = []
    for line in f.readlines():
        match = re.match('\A(\d{2}):(\d{2}):(\d{2})\Z', line.strip())
        if match == None:
            continue
        hms = match.groups()
        if len(hms) == 3:
            t = float(hms[0]) + float(hms[1]) / 60. + float(hms[2]) / 3600.
            times[gender].append(t)
    f.close()

pl.figure(figsize=(50/9., 35/9.))
pl.rc('path', simplify_threshold=0.5)
pl.plot(sorted(times['M']), np.linspace(0, 1, len(times['M'])),
    'b-', label='m', linewidth=3)
pl.plot(sorted(times['W']), np.linspace(0, 1, len(times['W'])),
    'r-', label='w', linewidth=3)
pl.xlim(2, 7)
pl.xlabel('t [h]')
pl.ylabel('F')
pl.title('41. Berlin Marathon running times')
pl.grid(True)
pl.legend(loc='center right')
pl.tight_layout()
pl.savefig('running_times_41Berlin_marathon_cumulative.svg')

Licensing

I, the copyright holder of this work, hereby publish it under the following license:
w:en:Creative Commons
attribution
This file is licensed under the Creative Commons Attribution 3.0 Unported license.
You are free:
  • to share – to copy, distribute and transmit the work
  • to remix – to adapt the work
Under the following conditions:
  • attribution – You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
Category:CC-BY-3.0#Running%20times%2041Berlin%20marathon%20cumulative.svg
Category:Self-published work Category:Marathon diagrams Category:Images with Python source code Category:Berlin Marathon Category:Photos by User:Geek3
Category:Berlin Marathon Category:CC-BY-3.0 Category:Images with Python source code Category:Marathon diagrams Category:Photos by User:Geek3 Category:Self-published work Category:Valid SVG created with Matplotlib code