Hi

This file contains my biggest roadblock with programming and that's the 
abstract nature of needing to pass data from one thing to the next.

In my file here I needed to traverse and modify the XML file I don't want to 
restore it or put it in a new variable or other format I just want to alter it 
and let it flow onto the list comprehensions as they were.

Once I can get on top of this mentally I will be able to do so much better, I 
think I am trying to manage it in my head as if it was water and plumbing. 

In particular here I am taking the id from race and putting it into the 
children of each race called nomination. 

I have put a comment above the new code which is causing the difficulty.

from pyquery import PyQuery as pq
import pandas as pd
import argparse
import numpy as np

# from glob import glob


parser = argparse.ArgumentParser(description=None)


def GetArgs(parser):
    """Parser function using argparse"""
    # parser.add_argument('directory', help='directory use',
    #                     action='store', nargs='*')
    parser.add_argument("files", nargs="+")
    return parser.parse_args()

fileList = GetArgs(parser)
# print(fileList.files)


data = []


horseattrs = ('race_id', 'id', 'horse', 'number', 'finished', 'age', 'sex',
              'blinkers', 'trainernumber', 'career', 'thistrack', 'firstup',
              'secondup', 'variedweight', 'weight', 'pricestarting')
meetattrs = ('id', 'venue', 'date', 'rail', 'weather', 'trackcondition')
raceattrs = ('id', 'number', 'shortname', 'stage', 'distance',
             'grade', 'age', 'weightcondition', 'fastesttime', 'sectionaltime')
clubattrs = ('code')

frames = pd.DataFrame([])
noms = []
for items in fileList.files:
    d = pq(filename=items)
    meet = d('meeting')
    club = d('club')
    race = d('race')
    res = d('nomination')

    # d('p').filter(lambda i: i == 1)

    # Here i need to traverse and modify but I don't want to restore the
    # structure just pass it on. So I can use it in the following list
    # comprehensions as I had before.

    for race_el in d('race'):
        race = pq(race_el)
        race_id = race.attr('id')

    for nom_el in race.items('nomination'):
        res.append((pq(nom_el).attr('raceid', race_id)))

    resdata = [[res.eq(i).attr(x)
                for x in horseattrs] for i in range(len(res))]
    # print(dataSets)

    meetdata = [[meet.eq(i).attr(x)
                 for x in meetattrs] for i in range(len(meet))]
    racedata = [[race.eq(i).attr(x)
                 for x in raceattrs] for i in range(len(race))]
    clubdata = [[club.eq(i).attr(x)
                 for x in clubattrs] for i in range(len(club))]
    raceid = [row[0] for row in racedata]

# L = [x + [0] for x in L]
# print(resdata)
# resdata = [raceid[i] for i in raceid  x + i for x in resdata]

# for number of classes equalling nomination in the each category of
# race inset raceid into resdata
#
# print(resdata)
# clubdf = pd.DataFrame(clubdata)
# meetdf = pd.DataFrame(meetdata)
# racedf = pd.DataFrame(racedata)
# resdf = pd.DataFrame(resdata)
# frames = frames.append(clubdf)
# frames = frames.append(meetdf)
#
# frames = frames.append(racedf)
# frames = frames.append(resdf)

# print(frames)
# frames.to_csv('~/testingFrame5.csv', encoding='utf-8')

Thanks

Sayth
-- 
https://mail.python.org/mailman/listinfo/python-list

Reply via email to