Good Evening, I have a conundrum regarding JSON objects and converting them to CSV:
Context I am converting XML files to a JSON object (please see snippet below) and then finally producing a CSV file. Here is a an example JSON object: "PAC": { "Account": [{ "PC": "0", "CMC": "0", "WC": "0", "DLA": "0", "CN": null, "FC": { "Int32": ["0", "0", "0", "0", "0"] }, "F": { "Description": null, "Code": "0" } In general, when I convert any of the files from JSON to CSV, I have been successful when using the following strategy (credit to Peter Otten): import csv import json import sys def hook(obj): return obj def flatten(obj): for k, v in obj: if isinstance(v, list): yield from flatten(v) else: yield k, v if __name__ == "__main__": with open("somefileneame.json") as f: data = json.load(f, object_pairs_hook=hook) pairs = list(flatten(data)) writer = csv.writer(sys.stdout) header = writer.writerow([k for k, v in pairs]) row = writer.writerow([v for k, v in pairs]) #writer.writerows for any other iterable object However with the example JSON object (above) i receive the following error when applying this function: ValueError: too many values to unpack Here are some more samples. "FC": {"Int32": ["0","0","0","0","0","0"]} "PBA": {"Double": ["0","0","0","0","0","0","0","0"]} 3. "PBDD": { "DateTime": ["1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM"] }, In the above example, I would like to remove the keys Int32, Double and DateTime. I am wondering if there is a function or methodology that would allow me to remove such nested keys and reassign the new keys to the outer key (in this case above FC, PBA and PBDD) as column headers in a CSV and concatenate all of the values within the list (as corresponding fields). Also, here is how I strategized my XML to CSV conversion (if this is of any use): import xml.etree.cElementTree as ElementTree from xml.etree.ElementTree import XMLParser import json import csv import tokenize import token try: from collections import OrderedDict import json except ImportError: from ordereddict import OrderedDict import simplejson as json import itertools import six import string from csvkit import CSVKitWriter class XmlListConfig(list): def __init__(self, aList): for element in aList: if element: # treat like dict if len(element) == 1 or element[0].tag != element[1].tag: self.append(XmlDictConfig(element)) # treat like list elif element[0].tag == element[1].tag: self.append(XmlListConfig(element)) elif element.text: text = element.text.strip() if text: self.append(text) class XmlDictConfig(dict): ''' Example usage: >>> tree = ElementTree.parse('your_file.xml') >>> root = tree.getroot() >>> xmldict = XmlDictConfig(root) Or, if you want to use an XML string: >>> root = ElementTree.XML(xml_string) >>> xmldict = XmlDictConfig(root) And then use xmldict for what it is..a dictionary. ''' def __init__(self, parent_element): if parent_element.items(): self.update(dict(parent_element.items())) for element in parent_element: if element: # treat like dict - we assume that if the first two tags # in a series are different, then they are all different. if len(element) == 1 or element[0].tag != element[1].tag: aDict = XmlDictConfig(element) # treat like list - we assume that if the first two tags # in a series are the same, then the rest are the same. else: # here, we put the list in dictionary; the key is the # tag name the list elements all share in common, and # the value is the list itself aDict = {element[0].tag: XmlListConfig(element)} # if the tag has attributes, add those to the dict if element.items(): aDict.update(dict(element.items())) self.update({element.tag: aDict}) # this assumes that if you've got an attribute in a tag, # you won't be having any text. This may or may not be a # good idea -- time will tell. It works for the way we are # currently doing XML configuration files... elif element.items(): self.update({element.tag: dict(element.items())}) # finally, if there are no child tags and no attributes, extract # the text else: self.update({element.tag: element.text}) def main(): #Lines 88-89stantiate the class Elementree #and applies the method to recursively traverse from the root node #XmlDictConfig is instantiated in line 90 with open('C:\\Users\\wynsa2\\Desktop\\Python Folder\\PCSU\\Trial2_PCSU\\2-Response.xml', 'r', encoding='utf-8') as f: xml_string = f.read() xml_string= xml_string.replace('�', '') root = ElementTree.XML(xml_string) xmldict = XmlDictConfig(root) json_str = json.dumps(xmldict, sort_keys=True, indent=4, separators=(',', ': ')) newly_formatted_data = json.loads(json_str) #encode into JSON with open('data2.json', 'w') as f: #writing JSON file json.dump(newly_formatted_data, f) I hope that I was clear in my description. Thank you all for your help. Sincerely, Saran -- https://mail.python.org/mailman/listinfo/python-list