msodumper/docrecord.py | 158 ++++++++++++++++++++++++++++++++++++++++++++++--- test/doc/formtext.doc |binary test/doc/test.py | 9 ++ 3 files changed, 159 insertions(+), 8 deletions(-)
New commits: commit 9a72febc7f4162e55ba7e542ea51f35eb1210824 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Tue Nov 4 12:00:33 2014 +0100 dump FFData diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py index 9ad1056..1586c48 100644 --- a/msodumper/docrecord.py +++ b/msodumper/docrecord.py @@ -1045,6 +1045,91 @@ class PICF(DOCDirStream): print '</picf>' +IType = { + 0: "iTypeText", + 1: "iTypeChck", + 2: "iTypeDrop" +} + + +ITypeTxt = { + 0: "iTypeTxtReg", + 1: "iTypeTxtNum", + 2: "iTypeTxtDate", + 3: "iTypeTxtCurDate", + 4: "iTypeTxtCurTime", + 5: "iTypeTxtCalc" +} + + +class FFDataBits(DOCDirStream): + """The FFDataBits structure specifies the type and properties for a form + field that is specified by a FFData.""" + def __init__(self, parent): + DOCDirStream.__init__(self, parent.bytes) + self.pos = parent.pos + self.parent = parent + + def dump(self): + print '<FFDataBits>' + buf = self.readuInt8() + self.printAndSet("iType", buf & 0x0003, dict=IType) # 1..2nd bits + self.printAndSet("iRes", buf & 0x007c) # 3..7th bits + self.printAndSet("fOwnHelp", self.getBit(buf, 8)) + buf = self.readuInt8() + self.printAndSet("fOwnStat", self.getBit(buf, 1)) + self.printAndSet("fProt", self.getBit(buf, 2)) + self.printAndSet("iSize", self.getBit(buf, 3)) + self.printAndSet("iTypeTxt", buf & 0x0038, dict=ITypeTxt) # 4..6th bits + self.printAndSet("fRecalc", self.getBit(buf, 7)) + self.printAndSet("fHasListBox", self.getBit(buf, 8)) + print '</FFDataBits>' + self.parent.pos = self.pos + + +class FFData(DOCDirStream): + """The FFData structure specifies form field data for a text box, check + box, or drop-down list box. (Page 348 of [MS-DOC] spec.)""" + def __init__(self, parent): + DOCDirStream.__init__(self, parent.bytes) + self.pos = parent.pos + self.parent = parent + + def dump(self): + print '<FFData>' + self.printAndSet("version", self.readuInt32()) + self.bits = FFDataBits(self) + self.bits.dump() + self.printAndSet("cch", self.readuInt16()) + self.printAndSet("hps", self.readuInt16()) + xstzName = Xstz(self, "xstzName") + xstzName.dump() + self.pos = xstzName.pos + xstzTextDef = Xstz(self, "xstzTextDef") + xstzTextDef.dump() + self.pos = xstzTextDef.pos + if self.bits.iType == 1 or self.bits.iType == 2: # iTypeChck or iTypeDrop + self.printAndSet("wDef", self.readuInt16()) + xstzTextFormat = Xstz(self, "xstzTextFormat") + xstzTextFormat.dump() + self.pos = xstzTextFormat.pos + xstzHelpText = Xstz(self, "xstzHelpText") + xstzHelpText.dump() + self.pos = xstzHelpText.pos + xstzStatText = Xstz(self, "xstzStatText") + xstzStatText.dump() + self.pos = xstzStatText.pos + xstzEntryMcr = Xstz(self, "xstzEntryMcr") + xstzEntryMcr.dump() + self.pos = xstzEntryMcr.pos + xstzExitMcr = Xstz(self, "xstzExitMcr") + xstzExitMcr.dump() + self.pos = xstzExitMcr.pos + if self.bits.iType == 2: # iTypeDrop + print '<todo what="FFData::dump(): handle hsttbDropList for iTypeDrop"/>' + print '</FFData>' + + class NilPICFAndBinData(DOCDirStream): """The NilPICFAndBinData structure that holds header information and binary data for a hyperlink, form field, or add-in field. The NilPICFAndBinData @@ -1079,7 +1164,7 @@ class NilPICFAndBinData(DOCDirStream): self.printAndSet("ignored15", self.readInt16()) fieldType = chpxFkp.transformeds[-2] if fieldType == " FORMTEXT ": - print '<todo what="NilPICFAndBinData::dump(): FORMTEXT"/>' + FFData(self).dump() else: print '<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType print '</NilPICFAndBinData>' @@ -3137,17 +3222,18 @@ class Xst(DOCDirStream): class Xstz(DOCDirStream): """The Xstz structure is a string. The string is prepended by its length and is null-terminated.""" - def __init__(self, parent): + def __init__(self, parent, name="xstz"): DOCDirStream.__init__(self, parent.bytes) self.pos = parent.pos + self.name = name def dump(self): - print '<xstz type="Xstz" offset="%d">' % self.pos + print '<%s type="Xstz" offset="%d">' % (self.name, self.pos) xst = Xst(self) xst.dump() self.pos = xst.pos self.printAndSet("chTerm", self.readuInt16()) - print '</xstz>' + print '</%s>' % self.name class UpxPapx(DOCDirStream): diff --git a/test/doc/formtext.doc b/test/doc/formtext.doc new file mode 100644 index 0000000..5f92c3e Binary files /dev/null and b/test/doc/formtext.doc differ diff --git a/test/doc/test.py b/test/doc/test.py index e7c42b2..c63bc11 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -227,6 +227,15 @@ class Test(unittest.TestCase): actual = self.root.findall(xpath)[0].attrib['value'] self.assertEqual(expected, actual) + def test_formtext(self): + self.dump('formtext') + + # make sure we find that the max length is 5 chars + expected = "0x5" + xpath = 'stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBteChpx/plcBteChpx/aFC/aPnBteChpx/chpxFkp/rgfc/chpx/prl/sprm/NilPICFAndBinData/FFData/cch' + actual = self.root.findall(xpath)[0].attrib['value'] + self.assertEqual(expected, actual) + if __name__ == '__main__': unittest.main() commit 3701f745d7f3397f4166110a3a743ec7e25dba80 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Tue Nov 4 11:19:45 2014 +0100 let NilPICFAndBinData know its field type diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py index 71e84f3..9ad1056 100644 --- a/msodumper/docrecord.py +++ b/msodumper/docrecord.py @@ -1057,7 +1057,31 @@ class NilPICFAndBinData(DOCDirStream): def dump(self): print '<NilPICFAndBinData>' - print '<todo what="NilPICFAndBinData::dump()"/>' + # self -> sprm -> prl -> chpx -> chpxFkp + chpxFkp = self.parent.parent.parent.parent + self.printAndSet("lcb", self.readInt32()) + self.printAndSet("cbHeader", self.readInt16()) + self.printAndSet("ignored0", self.readInt32()) + self.printAndSet("ignored1", self.readInt32()) + self.printAndSet("ignored2", self.readInt32()) + self.printAndSet("ignored3", self.readInt32()) + self.printAndSet("ignored4", self.readInt32()) + self.printAndSet("ignored5", self.readInt32()) + self.printAndSet("ignored6", self.readInt32()) + self.printAndSet("ignored7", self.readInt32()) + self.printAndSet("ignored8", self.readInt32()) + self.printAndSet("ignored9", self.readInt32()) + self.printAndSet("ignored10", self.readInt32()) + self.printAndSet("ignored11", self.readInt32()) + self.printAndSet("ignored12", self.readInt32()) + self.printAndSet("ignored13", self.readInt32()) + self.printAndSet("ignored14", self.readInt32()) + self.printAndSet("ignored15", self.readInt16()) + fieldType = chpxFkp.transformeds[-2] + if fieldType == " FORMTEXT ": + print '<todo what="NilPICFAndBinData::dump(): FORMTEXT"/>' + else: + print '<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType print '</NilPICFAndBinData>' @@ -1461,8 +1485,9 @@ class GrpPrlAndIstd(DOCDirStream): class Chpx(DOCDirStream): """The Chpx structure specifies a set of properties for text.""" - def __init__(self, bytes, mainStream, offset, transformed=None): - DOCDirStream.__init__(self, bytes, mainStream=mainStream) + def __init__(self, parent, mainStream, offset, transformed=None): + DOCDirStream.__init__(self, parent.bytes, mainStream=mainStream) + self.parent = parent self.pos = offset self.transformed = transformed @@ -1531,6 +1556,7 @@ class ChpxFkp(DOCDirStream): print '<chpxFkp type="ChpxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size) self.crun = self.getuInt8(pos=self.pos + self.size - 1) pos = self.pos + self.transformeds = [] for i in range(self.crun): # rgfc start = self.getuInt32(pos=pos) @@ -1538,12 +1564,13 @@ class ChpxFkp(DOCDirStream): print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) self.transformed = self.quoteAttr(self.pnFkpChpx.mainStream.retrieveOffset(start, end)) print '<transformed value="%s"/>' % self.transformed + self.transformeds.append(self.transformed) pos += 4 # rgbx offset = PLC.getPLCOffset(self.pos, self.crun, 1, i) chpxOffset = self.getuInt8(pos=offset) * 2 - chpx = Chpx(self.bytes, self.mainStream, self.pos + chpxOffset, self.transformed) + chpx = Chpx(self, self.mainStream, self.pos + chpxOffset, self.transformed) chpx.dump() print '</rgfc>' commit 867133098167634e2a69362cd1a491ffb00be612 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Tue Nov 4 10:54:59 2014 +0100 doc: NilPICFAndBinData skeleton diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py index 66f5653..71e84f3 100644 --- a/msodumper/docrecord.py +++ b/msodumper/docrecord.py @@ -1045,6 +1045,22 @@ class PICF(DOCDirStream): print '</picf>' +class NilPICFAndBinData(DOCDirStream): + """The NilPICFAndBinData structure that holds header information and binary + data for a hyperlink, form field, or add-in field. The NilPICFAndBinData + structure MUST be stored in the Data Stream.""" + def __init__(self, parent): + dataStream = parent.mainStream.doc.getDirectoryStreamByName("Data") + DOCDirStream.__init__(self, dataStream.bytes) + self.pos = parent.operand + self.parent = parent + + def dump(self): + print '<NilPICFAndBinData>' + print '<todo what="NilPICFAndBinData::dump()"/>' + print '</NilPICFAndBinData>' + + class PICFAndOfficeArtData(DOCDirStream): """The PICFAndOfficeArtData structure specifies header information and binary data for a picture.""" @@ -1268,6 +1284,7 @@ class Sprm(DOCDirStream): def __init__(self, parent, mainStream=None, transformed=None): DOCDirStream.__init__(self, parent.bytes, mainStream=mainStream) self.parent = parent + self.transformed = transformed self.pos = parent.pos self.operandSizeMap = { 0: 1, @@ -1298,8 +1315,9 @@ class Sprm(DOCDirStream): self.operand = self.getuInt24() elif self.getOperandSize() == 4: self.operand = self.getuInt32() - if self.sprm == 0x6a03 and transformed == r"\x01": - self.ct = PICFAndOfficeArtData(self) + if self.sprm == 0x6a03 and transformed == r"\x01": # sprmCPicLocation + # Can't decide right now, depends on if there will be an sprmCFData later or not. + self.ct = True elif self.sprm == 0x6646: # sprmPHugePapx dataStream = mainStream.doc.getDirectoryStreamByName("Data") dataStream.pos = self.operand @@ -1368,6 +1386,17 @@ class Sprm(DOCDirStream): attrs.append('operand="%s"' % hex(self.operand)) print '<sprm %s%s>' % (" ".join(attrs), {True: "/", False: ""}[close]) if self.ct: + if type(self.ct) == bool: + if self.sprm == 0x6a03 and self.transformed == r"\x01": + haveCFData = False + for prl in self.parent.parent.prls: + if prl.sprm.sprm == 0x0806: # sprmCFData + haveCFData = True + break + if haveCFData: + self.ct = NilPICFAndBinData(self) + else: + self.ct = PICFAndOfficeArtData(self) self.ct.dump() print '</sprm>' _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits