Saran Ahluwalia
2015-06-19 01:44:43 UTC
Good Evening,
I have a conundrum regarding JSON objects and converting them to CSV:
*Context*
- I am converting XML files to a JSON object (please see snippet below)
and then finally producing a CSV file. Here is a an example JSON object:
"PAC": {
"Account": [{
"PC": "0",
"CMC": "0",
"WC": "0",
"DLA": "0",
"CN": null,
"FC": {
"Int32": ["0",
"0",
"0",
"0",
"0"]
},
"F": {
"Description": null,
"Code": "0"
}
In general, when I convert any of the files from JSON to CSV, I have been
successful when using the following:
import csv
import json
import sys
def hook(obj):
return obj
def flatten(obj):
for k, v in obj:
if isinstance(v, list):
yield from flatten(v)
else:
yield k, v
if __name__ == "__main__":
with open("somefileneame.json") as f:
data = json.load(f, object_pairs_hook=hook)
pairs = list(flatten(data))
writer = csv.writer(sys.stdout)
header = writer.writerow([k for k, v in pairs])
row = writer.writerow([v for k, v in pairs]) #writer.writerows for any
other iterable object
However with the example JSON object (above) i receive the following error
when applying this function:
ValueError: too many values to unpack
Here are some more samples.
1. "FC": {"Int32": ["0","0","0","0","0","0"]}
2. "PBA": {"Double": ["0","0","0","0","0","0","0","0"]}
3. "PBDD": {
"DateTime": ["1/1/0001
12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM"]
},
In the above example, I would like to remove the keys *Int32*, *Double *and
*DateTime*. I am wondering if there is a function or methodology that
would allow
me to remove such nested keys and reassign the new keys to the outer key
(in this case above *FC, PBA *and *PBDD*) as column headers in a CSV and
concatenate all of the values within the list (as corresponding fields).
Also, here is how I strategized my XML to CSV conversion (if this is of any
use):
import xml.etree.cElementTree as ElementTree
from xml.etree.ElementTree import XMLParser
import json
import csv
import tokenize
import token
try:
from collections import OrderedDict
import json
except ImportError:
from ordereddict import OrderedDict
import simplejson as json
import itertools
import six
import string
from csvkit import CSVKitWriter
class XmlListConfig(list):
def __init__(self, aList):
for element in aList:
if element:
# treat like dict
if len(element) == 1 or element[0].tag != element[1].tag:
self.append(XmlDictConfig(element))
# treat like list
elif element[0].tag == element[1].tag:
self.append(XmlListConfig(element))
elif element.text:
text = element.text.strip()
if text:
self.append(text)
class XmlDictConfig(dict):
'''
'''
def __init__(self, parent_element):
if parent_element.items():
self.update(dict(parent_element.items()))
for element in parent_element:
if element:
# treat like dict - we assume that if the first two tags
# in a series are different, then they are all different.
if len(element) == 1 or element[0].tag != element[1].tag:
aDict = XmlDictConfig(element)
# treat like list - we assume that if the first two tags
# in a series are the same, then the rest are the same.
else:
# here, we put the list in dictionary; the key is the
# tag name the list elements all share in common, and
# the value is the list itself
aDict = {element[0].tag: XmlListConfig(element)}
# if the tag has attributes, add those to the dict
if element.items():
aDict.update(dict(element.items()))
self.update({element.tag: aDict})
# this assumes that if you've got an attribute in a tag,
# you won't be having any text. This may or may not be a
# good idea -- time will tell. It works for the way we are
# currently doing XML configuration files...
elif element.items():
self.update({element.tag: dict(element.items())})
# finally, if there are no child tags and no attributes, extract
# the text
else:
self.update({element.tag: element.text})
def main():
#Lines 88-89stantiate the class Elementree
#and applies the method to recursively traverse from the root node
#XmlDictConfig is instantiated in line 90
with open('C:\\Users\\wynsa2\\Desktop\\Python
Folder\\PCSU\\Trial2_PCSU\\2-Response.xml', 'r', encoding='utf-8') as f:
xml_string = f.read()
xml_string= xml_string.replace('�', '')
root = ElementTree.XML(xml_string)
xmldict = XmlDictConfig(root)
json_str = json.dumps(xmldict, sort_keys=True, indent=4,
separators=(',', ': '))
newly_formatted_data = json.loads(json_str) #encode into JSON
with open('data2.json', 'w') as f: #writing JSON file
json.dump(newly_formatted_data, f)
I hope that I was clear in my description. Thank you all for your help.
Sincerely,
Saran
_______________________________________________
Tutor maillist - ***@python.org
To unsubscribe or change subscription options:
https://mail.python.org/mailman/listinfo/tutor
I have a conundrum regarding JSON objects and converting them to CSV:
*Context*
- I am converting XML files to a JSON object (please see snippet below)
and then finally producing a CSV file. Here is a an example JSON object:
"PAC": {
"Account": [{
"PC": "0",
"CMC": "0",
"WC": "0",
"DLA": "0",
"CN": null,
"FC": {
"Int32": ["0",
"0",
"0",
"0",
"0"]
},
"F": {
"Description": null,
"Code": "0"
}
In general, when I convert any of the files from JSON to CSV, I have been
successful when using the following:
import csv
import json
import sys
def hook(obj):
return obj
def flatten(obj):
for k, v in obj:
if isinstance(v, list):
yield from flatten(v)
else:
yield k, v
if __name__ == "__main__":
with open("somefileneame.json") as f:
data = json.load(f, object_pairs_hook=hook)
pairs = list(flatten(data))
writer = csv.writer(sys.stdout)
header = writer.writerow([k for k, v in pairs])
row = writer.writerow([v for k, v in pairs]) #writer.writerows for any
other iterable object
However with the example JSON object (above) i receive the following error
when applying this function:
ValueError: too many values to unpack
Here are some more samples.
1. "FC": {"Int32": ["0","0","0","0","0","0"]}
2. "PBA": {"Double": ["0","0","0","0","0","0","0","0"]}
3. "PBDD": {
"DateTime": ["1/1/0001
12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM",
"1/1/0001 12:00:00 AM"]
},
In the above example, I would like to remove the keys *Int32*, *Double *and
*DateTime*. I am wondering if there is a function or methodology that
would allow
me to remove such nested keys and reassign the new keys to the outer key
(in this case above *FC, PBA *and *PBDD*) as column headers in a CSV and
concatenate all of the values within the list (as corresponding fields).
Also, here is how I strategized my XML to CSV conversion (if this is of any
use):
import xml.etree.cElementTree as ElementTree
from xml.etree.ElementTree import XMLParser
import json
import csv
import tokenize
import token
try:
from collections import OrderedDict
import json
except ImportError:
from ordereddict import OrderedDict
import simplejson as json
import itertools
import six
import string
from csvkit import CSVKitWriter
class XmlListConfig(list):
def __init__(self, aList):
for element in aList:
if element:
# treat like dict
if len(element) == 1 or element[0].tag != element[1].tag:
self.append(XmlDictConfig(element))
# treat like list
elif element[0].tag == element[1].tag:
self.append(XmlListConfig(element))
elif element.text:
text = element.text.strip()
if text:
self.append(text)
class XmlDictConfig(dict):
'''
tree = ElementTree.parse('your_file.xml')
root = tree.getroot()
xmldict = XmlDictConfig(root)
root = ElementTree.XML(xml_string)
xmldict = XmlDictConfig(root)
And then use xmldict for what it is..a dictionary.root = tree.getroot()
xmldict = XmlDictConfig(root)
root = ElementTree.XML(xml_string)
xmldict = XmlDictConfig(root)
'''
def __init__(self, parent_element):
if parent_element.items():
self.update(dict(parent_element.items()))
for element in parent_element:
if element:
# treat like dict - we assume that if the first two tags
# in a series are different, then they are all different.
if len(element) == 1 or element[0].tag != element[1].tag:
aDict = XmlDictConfig(element)
# treat like list - we assume that if the first two tags
# in a series are the same, then the rest are the same.
else:
# here, we put the list in dictionary; the key is the
# tag name the list elements all share in common, and
# the value is the list itself
aDict = {element[0].tag: XmlListConfig(element)}
# if the tag has attributes, add those to the dict
if element.items():
aDict.update(dict(element.items()))
self.update({element.tag: aDict})
# this assumes that if you've got an attribute in a tag,
# you won't be having any text. This may or may not be a
# good idea -- time will tell. It works for the way we are
# currently doing XML configuration files...
elif element.items():
self.update({element.tag: dict(element.items())})
# finally, if there are no child tags and no attributes, extract
# the text
else:
self.update({element.tag: element.text})
def main():
#Lines 88-89stantiate the class Elementree
#and applies the method to recursively traverse from the root node
#XmlDictConfig is instantiated in line 90
with open('C:\\Users\\wynsa2\\Desktop\\Python
Folder\\PCSU\\Trial2_PCSU\\2-Response.xml', 'r', encoding='utf-8') as f:
xml_string = f.read()
xml_string= xml_string.replace('�', '')
root = ElementTree.XML(xml_string)
xmldict = XmlDictConfig(root)
json_str = json.dumps(xmldict, sort_keys=True, indent=4,
separators=(',', ': '))
newly_formatted_data = json.loads(json_str) #encode into JSON
with open('data2.json', 'w') as f: #writing JSON file
json.dump(newly_formatted_data, f)
I hope that I was clear in my description. Thank you all for your help.
Sincerely,
Saran
_______________________________________________
Tutor maillist - ***@python.org
To unsubscribe or change subscription options:
https://mail.python.org/mailman/listinfo/tutor