I am working scraping the Weather Underground using the XML interface...  

I am hoping to to add this into the pywapi, but that looks like it's been 
abandoned?  I haven't seen any updates in ages to it...

And I'm using the Weather Underground XML API 
(http://wiki.wunderground.com/index.php/API_-_XML)...  And it's working, except 
something is happening odd with the Forecast portion...

When parsed the Forecast, the Highs & Lows are the same value...

I don't see another approach to this though.  Weather Underground is presenting 
the same data structure for the forecast, which is why I am breaking it into a 
list...      I'm not the best expert at XML, but I believe that I have etree 
working fine...  But not necessarily the best way, Is there a better way to 
read this via etree?

The only limitation I have is the code has to be python 2.51, due to 
limitations in the Indigo framework...

The scan_node function scans the individual node, and works fine for the 
Weather forecast...  but due to the duplicate XML tags in the forecast XML 
interface, I had to manually break it out into a list...

But this doesn't explain the issue with the high's not being read properly...

Anyone?

WUND_WEATHER_URL        = 
'http://api.wunderground.com/auto/wui/geo/WXCurrentObXML/index.xml?query=%s'
WUND_FORECAST_URL       = 
'http://api.wunderground.com/auto/wui/geo/ForecastXML/index.xml?query=%s'
WUND_PWS_WEATHER_URL = 
'http://api.wunderground.com/weatherstation/WXCurrentObXML.asp?ID=%s'

def     scan_node ( data, node, ns_wund_data_structure):
        for (category, attrs) in ns_wund_data_structure.iteritems():
                if node.tag in attrs:
                        for attrsname in attrs:
                                if attrsname == node.tag:
                                        if not(category in data.keys() ):
                                                #
                                                #       key not in dictionary, 
create subdictionary
                                                #
                                                data [category] = {}

                                        if node.text <> None:
                                                data [category] 
[node.tag.strip()] = node.text.strip()
        return data

def get_weather_from_wund(location_id, hl = ''):
        url = WUND_WEATHER_URL % (location_id)
        handler = urllib2.urlopen(url)
        tree = parse ( handler)
        handler.close()
        weather_data = {}
        elem = tree.getroot ()
        
        ns_wund_data_structure = { 
                'display_location': ('full', 'city', 'state', 'state_name', 
'country', 'zip', 'latitude', 'longitude', 'elevation'),
                'current_observation': ('station_id', 'observation_time', 
'observation_time_rfc822', 'local_time', 'local_time_rfc822',
                                                                'local_epoch', 
'weather', 'temperature_string', 'temp_f', 'temp_c', 'relative_humidity',
                                                                'wind_string', 
'wind_dir', 'wind_degrees', 'wind_mpg', 'wind_gust', 'pressure_string',
                                                                'pressure_mb', 
'pressure_in', 'dewpoint_string', 'dewpoint_f', 'dewpoint_c', 
                                                                
'heat_index_string', 'heat_index_f', 'heat_index_c', 'windchill_string', 
'windchill_f', 
                                                                'windchill_c', 
'visibility_mi', 'visibility_km', 'forceast_url','history_url',
                                                                'ob_url', 
'icon_url_base', 'icon_url_name', 'icon', 'forecast_url'),
                'icons'                         : ('icon_set', 'icon_url', 
'icon_url_base', 'icon_url_name', 'icon')
        }               

        for category in ns_wund_data_structure:
                weather_data[category] = {}
                
        for node in elem.getchildren():
                children = node.getchildren()
                if children <> []:
                        for subnode in children:
                                weather_data = scan_node( weather_data, 
subnode, ns_wund_data_structure)
                                        
                else:
                        weather_data = scan_node ( weather_data, node, 
ns_wund_data_structure)
        return weather_data

def     walk_tree (root_node, data, dstructure):
        for node in root_node.getchildren():
                children = node.getchildren()
                if children <> []:
                        for subnode in children:
                                if subnode.getchildren() <> []:
                                        walk_tree (subnode, data, dstructure)
                                else:
                                        data = scan_node ( data, subnode, 
dstructure)
                else:
                        data = scan_node ( data, node, dstructure)
        return data
        
def get_forecast_from_wund(location_id, weather_data = None, hl = ''):
        url = WUND_FORECAST_URL % (location_id)
        handler = urllib2.urlopen(url)
        tree = parse ( handler)
        handler.close()
        if weather_data == None:
                weather_data = {}
        elem = tree.getroot ()

        ns_forecast_structure = { 
                'txt_forecast'  : ( 'number', 'forecastday'),
                'high'                  : ('fahrenheit', 'celsius'),
                'low'                   : ('fahrenheit', 'celsius'),
                'simpleforecast': ('forecastday', 'conditions', 'icon', 
'skyicon'),
                'forecastday'   : ('period', 'title', 'fcttext', 'date', 
'high', 'low', 'conditions', 'icon', 'skyicon'),
                'date'                  : ('epoch', 'pretty_short', 'pretty', 
'day', 'month', 'year', 'yday','hour', 'min', 
                                                        'sec', 'isdst', 
'monthname', 'weekday_short', 'weekday', 'ampm', 'tz_short', 'tz_long') }       
        
        weather_data = walk_tree (elem, weather_data, ns_wund_data_structure)
        weather_data["forecast"] = []
        forecast_data = {}
        forecast_root = tree.find ("//simpleforecast")
        
        for subnode in forecast_root.getchildren():
                forecast_data = {}
                forecast_data = walk_tree (subnode, forecast_data, 
ns_forecast_structure)
                weather_data["forecast"].append (forecast_data)
                
        return weather_data

-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to