Thank you for the feedback on this. I believe that the excel dialect includes just that:
class excel(Dialect): delimiter = ',' quotechar = '"' doublequote = True skipinitialspace = False lineterminator = '\r\n' quoting = QUOTE_MINIMAL On Sat, Jan 9, 2016 at 5:23 PM, Karim <kliat...@gmail.com> wrote: > > > On 09/01/2016 21:54, kbtyo wrote: > >> My specs: >> >> Python 3.4.3 >> Windows 7 >> IDE is Jupyter Notebooks >> >> What I have referenced: >> >> 1) >> http://stackoverflow.com/questions/1546717/python-escaping-strings-for-use-in-xml >> >> 2) >> >> http://stackoverflow.com/questions/7802418/how-to-properly-escape-single-and-double-quotes >> >> 3) >> http://stackoverflow.com/questions/4972210/escaping-characters-in-a-xml-file-with-python >> >> >> Here is the data (in CSV format) and script, respectively, (I have tried >> variations on serializing Column 'E' using both Sax and ElementTree): >> >> i) >> >> A,B,C,D,E,F,G,H,I,J >> "3","8","1","<Request TransactionID="3" RequestType="FOO"><InstitutionISO >> /><CallID>23</CallID><MemberID>12</MemberID><MemberPassword >> /><RequestData><AccountNumber>2</AccountNumber><AccountSuffix>85</AccountSuffix><AccountType>S</AccountType><MPIAcctType>Checking</MPIAcctType><TransactionCount>10</TransactionCount></RequestData></Request>","<Response >> TransactionID="2" >> RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>","1967-12-25 >> 22:18:13.471000","2005-12-25 22:18:13.768000","2","70","0" >> >> ii) >> >> #!/usr/bin/python >> # -*- coding: utf-8 -*- >> import os.path >> import sys >> import csv >> from io import StringIO >> import xml.etree.cElementTree as ElementTree >> from xml.etree.ElementTree import XMLParser >> import xml >> import xml.sax >> from xml.sax import ContentHandler >> >> class MyHandler(xml.sax.handler.ContentHandler): >> def __init__(self): >> self._charBuffer = [] >> self._result = [] >> >> def _getCharacterData(self): >> data = ''.join(self._charBuffer).strip() >> self._charBuffer = [] >> return data.strip() #remove strip() if whitespace is important >> >> def parse(self, f): >> xml.sax.parse(f, self) >> return self._result >> >> def characters(self, data): >> self._charBuffer.append(data) >> >> def startElement(self, name, attrs): >> if name == 'Response': >> self._result.append({}) >> >> def endElement(self, name): >> if not name == 'Response': self._result[-1][name] = >> self._getCharacterData() >> >> def read_data(path): >> with open(path, 'rU', encoding='utf-8') as data: >> reader = csv.DictReader(data, delimiter =',', quotechar="'", >> skipinitialspace=True) >> for row in reader: >> yield row >> >> if __name__ == "__main__": >> empty = '' >> Response = 'sample.csv' >> for idx, row in enumerate(read_data(Response)): >> if idx > 10: break >> data = row['E'] >> print(data) # The before >> data = data[1:-1] >> data = ""'{}'"".format(data) >> print(data) # Sanity check >> # data = '<Response TransactionID="2" >> RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>' >> try: >> root = ElementTree.XML(data) >> # print(root) >> except StopIteration: >> raise >> pass >> # xmlstring = StringIO(data) >> # print(xmlstring) >> # Handler = MyHandler().parse(xmlstring) >> >> >> Specifically, due to the quoting in the CSV file (which is beyond my >> control), I have had to resort to slicing the string (line 51) and then >> formatting it (line 52). >> >> However the print out from the above attempt is as follows: >> >> "<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000' >> <Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000 >> >> File "<string>", line unknown >> ParseError: no element found: line 1, column 69 >> Interestingly - if I assign the variable "data" (as in line 54) I receive >> this: >> >> File "<ipython-input-80-7357c9272b92>", line 56 >> data = '<Response TransactionID="2" >> RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>' >> ^ >> SyntaxError: invalid token >> >> I seek feedback and information on how to address utilizing the most >> Pythonic means to do so. Ideally, is there a method that can leverage >> ElementTree. Thank you, in advance, for your feedback and guidance. >> > > In fact to get rid of double quote simply create your csv reader like > that: > > reader = csv.DictReader(data, dialect='excel', skipinitialspace=True) > > You should then don't need to slice data variable and reformat it. > > Karim > > > -- https://mail.python.org/mailman/listinfo/python-list