XML
Datagrunnlaget er fila all_results.xml med resultater fra sprintøvelsene i olymiske leker de siste årene.
Du vil se av koden nedenfor at metodene find() og findall() er sentrale ii å lokalisere noder i treet. En parameter er et forenklet XPath [1] uttrykk. Mer om XPath i modulen lxml . Og enda mer i materalet om XSLT
Vi tar utgangspunkt i følgende Python modul.
""" Reading xml, and report """ import sys import xml.etree.ElementTree as TRE """ Load data file and establish tree return the rootnode """ def loadIt(filename): try: tree = TRE.parse(filename) return tree.getroot() except: res=sys.exc_info() print (res[1]) return None """ All athlet names, unsorted as found """ def allAthletNames(root): all_athlets=root.findall('OlympicGame/event/athlet/name') for at in all_athlets: print(at.text) """ All olympics, place and year """ def allOlympics(root): all_games=root.findall('OlympicGame') for ga in all_games: print(ga.attrib['place'],ga.attrib['year']) """ Athlets running a certain dist in a certail year Using reduced XPAth """ def athletsInDistYear(root, dist, year): print(dist,year) S=".//OlympicGame[@year='%s']/event[@dist='%s']/athlet/name"%(year,dist) athletNames=root.findall(S) for at in athletNames: print(at.text) """ Athlets running a certain dist in a certail year, ordered by result Using reduced XPAth """ def timeused(t): return float(t[1]) def athletsOrderedInDistYear(root, dist, year): print(dist,year) order={} S=".//OlympicGame[@year='%s']/event[@dist='%s']/athlet"%(year,dist) athlets=root.findall(S) for at in athlets: order[at.find("./name").text] = at.find("./result").text items=order.items() sortedtems=sorted(items,key=timeused) for it in sortedtems: print(it[0],it[1]) """ Best result of athlet in a distance, regardless of year Using reduced XPAth """ def athletsOrderedInDist(root, dist): print(dist,"all olympics") order={} S=".//event[@dist='%s']/athlet"%(dist) athlets=root.findall(S) for at in athlets: name=at.find("./name").text res=at.find("./result").text if (not name in order) or (float(order[name]) > float(res)): order[name] = res items=order.items() sortedtems=sorted(items,key=timeused) for it in sortedtems: print(it[0],it[1]) """ Best result of athlets in a distance (where and when) , regardless of year Using reduced XPAth """ def timeused2(t): return float(t[1][0]) def athletsOrderedInDistWhere(root, dist): print(dist,"all olympics") order={} games=root.findall(".//OlympicGame") for og in games: athlets=og.findall(".//event[@dist='%s']/athlet"%dist) for at in athlets: name=at.find("./name").text res=at.find("./result").text if (not name in order) or (float(order[name][0]) > float(res)): order[name] = [res,og.attrib['place'],og.attrib['year']] items=order.items() sortedtems=sorted(items,key=timeused2) for it in sortedtems: print(it[1][0],'\t',it[0],',',it[1][1],it[1][2]) root= loadIt('all_results.xml') if root !=None: #allAthletNames(root) #allOlympics(root) #athletsInDistYear(root, '400m', '2004') #athletsOrderedInDistYear(root, '100m', '1992') #athletsOrderedInDist(root, '100m') athletsOrderedInDistWhere(root, '100m')
Resultatet av metoden athletsOrderedInDistWhere() blir:
100m all olympics 09.63 Usain Bolt , London 2012 09.75 Yohan Blake , London 2012 09.79 Justin Gatlin , London 2012 09.80 Tyson Gay , London 2012 09.84 Donovan Bailey , Atlanta 1996 09.86 Francis Obikwelu , Athens 2004 09.87 Maurice Greene , Sidney 2000 09.88 Ryan Bailey , London 2012 09.89 Shawn Crawford , Athens 2004 09.89 Frank Fredericks , Atlanta 1996 09.89 Richard Thompson , Beijing 2008 09.90 Ato Boldon , Atlanta 1996 09.91 Walter Dix , Beijing 2008 09.93 Churandy Martina , Beijing 2008 09.94 Asafa Powell , Athens 2004 09.96 Linford Christie , Barcelona 1992 09.97 Michael Frater , Beijing 2008 09.99 Dennis Mitchell , Atlanta 1996 10.00 Michael Marsh , Atlanta 1996 10.00 Kim Collins , Athens 2004 10.01 Marc Burns , Beijing 2008 10.03 Darvis Patton , Beijing 2008 10.04 Obadele Thompson , Sidney 2000 10.08 Dwain Chambers , Sidney 2000 10.09 Jonathan Drummond , Sidney 2000 10.09 Bruny Surin , Barcelona 1992 10.10 Leroy Burrell , Barcelona 1992 10.12 Olapade Adeniken , Barcelona 1992 10.13 Darren Campbell , Sidney 2000 10.14 Davidson Ezima , Atlanta 1996 10.16 Michael Green , Atlanta 1996 10.22 Raymond Stewart , Barcelona 1992
Kopier Pythonkoden og datafila som nevnt over, kjør de forskjellige metoden og gjør dine egne eksperimenter.