msodumper/docrecord.py | 81 ++++++++++++++++++++++++++++++ msodumper/docstream.py | 130 +++++++++++++++++++++++++++---------------------- 2 files changed, 154 insertions(+), 57 deletions(-)
New commits: commit 061094f9c354464b9b2b2cf105fb83df3ca8b1f5 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Thu Dec 3 17:30:31 2015 +0100 docrecord: dump PropertyBagStore diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py index e8228b6..893dc07 100644 --- a/msodumper/docrecord.py +++ b/msodumper/docrecord.py @@ -4041,6 +4041,72 @@ class SttbListNames(DOCDirStream): print '</sttbListNames>' +class PBString(DOCDirStream): + """Specified by [MS-OSHARED] 2.3.4.5, specifies a null-terminated string.""" + def __init__(self, parent, name): + DOCDirStream.__init__(self, parent.bytes) + self.parent = parent + self.pos = parent.pos + self.name = name + + def dump(self): + print '<%s type="PBString">' % self.name + buf = self.readuInt16() + self.printAndSet("cch", buf & 0x7fff) # bits 0..15 + self.printAndSet("fAnsiString", self.getBit(buf, 15)) + + # TODO support fAnsiString == 0 + bytes = [] + for dummy in range(self.cch): + c = self.readuInt8() + if c == 0: + break + bytes.append(c) + encoding = "ascii" + print '<rgxch value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8') + + print '</%s>' % self.name + self.parent.pos = self.pos + + +class FactoidType(DOCDirStream): + """Specified by [MS-OSHARED] 2.3.4.2, specifies the type of smart tag.""" + def __init__(self, parent): + DOCDirStream.__init__(self, parent.bytes) + self.parent = parent + self.pos = parent.pos + + def dump(self): + print '<factoidType>' + self.printAndSet("cbFactoid", self.readuInt32()) + self.printAndSet("id", self.readuInt32()) + PBString(self, "rgbUri").dump() + # rgbTag + # rgbDownLoadURL + print '</factoidType>' + + +class PropertyBagStore(DOCDirStream): + """Specified by [MS-OSHARED] 2.3.4.1, specifies the shared data for the + smart tags embedded in the document.""" + def __init__(self, parent): + DOCDirStream.__init__(self, parent.bytes) + self.parent = parent + self.pos = parent.pos + + def dump(self): + print '<propBagStore type="PropertyBagStore" offset="%s">' % self.pos + self.printAndSet("cFactoidType", self.readuInt32()) + print '<factoidTypes>' + self.factoidTypes = [] + for i in range(self.cFactoidType): + factoidType = FactoidType(self) + factoidType.dump() + self.factoidTypes.append(factoidType) + print '</factoidTypes>' + print '</propBagStore>' + + class SmartTagData(DOCDirStream): """Specified by [MS-DOC] 2.9.251, stores information about all the smart tags in the document.""" @@ -4051,6 +4117,8 @@ class SmartTagData(DOCDirStream): def dump(self): print '<smartTagData type="SmartTagData" offset="%d" size="%d bytes">' % (self.pos, self.size) + self.propBagStore = PropertyBagStore(self) + self.propBagStore.dump() print '</smartTagData>' commit 2fc6e88471b38cba85bf7337aa5642ec00a5bf08 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Thu Dec 3 15:53:17 2015 +0100 docrecord: initial SmartTagData diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py index 4b6dc4f..e8228b6 100644 --- a/msodumper/docrecord.py +++ b/msodumper/docrecord.py @@ -4041,6 +4041,19 @@ class SttbListNames(DOCDirStream): print '</sttbListNames>' +class SmartTagData(DOCDirStream): + """Specified by [MS-DOC] 2.9.251, stores information about all the smart + tags in the document.""" + def __init__(self, mainStream): + DOCDirStream.__init__(self, mainStream.getTableStream().bytes, mainStream=mainStream) + self.pos = mainStream.fcFactoidData + self.size = mainStream.lcbFactoidData + + def dump(self): + print '<smartTagData type="SmartTagData" offset="%d" size="%d bytes">' % (self.pos, self.size) + print '</smartTagData>' + + class SttbSavedBy(DOCDirStream): """The SttbSavedBy structure is an STTB structure that specifies the save history of this document.""" def __init__(self, mainStream): diff --git a/msodumper/docstream.py b/msodumper/docstream.py index e7f8776..13dddc9 100644 --- a/msodumper/docstream.py +++ b/msodumper/docstream.py @@ -692,6 +692,9 @@ class WordDocumentStream(DOCDirStream): def handleLcbSttbListNames(self): docrecord.SttbListNames(self).dump() + def handleLcbFactoidData(self): + docrecord.SmartTagData(self).dump() + def handleLcbSttbfBkmk(self): docrecord.SttbfBkmk(self).dump() @@ -763,65 +766,78 @@ class WordDocumentStream(DOCDirStream): def __dumpFibRgFcLcb2002(self): self.__dumpFibRgFcLcb2000() fields = [ - "fcUnused1", - "lcbUnused1", - "fcPlcfPgp", - "lcbPlcfPgp", - "fcPlcfuim", - "lcbPlcfuim", - "fcPlfguidUim", - "lcbPlfguidUim", - "fcAtrdExtra", - "lcbAtrdExtra", - "fcPlrsid", - "lcbPlrsid", - "fcSttbfBkmkFactoid", - "lcbSttbfBkmkFactoid", - "fcPlcfBkfFactoid", - "lcbPlcfBkfFactoid", - "fcPlcfcookie", - "lcbPlcfcookie", - "fcPlcfBklFactoid", - "lcbPlcfBklFactoid", - "fcFactoidData", - "lcbFactoidData", - "fcDocUndo", - "lcbDocUndo", - "fcSttbfBkmkFcc", - "lcbSttbfBkmkFcc", - "fcPlcfBkfFcc", - "lcbPlcfBkfFcc", - "fcPlcfBklFcc", - "lcbPlcfBklFcc", - "fcSttbfbkmkBPRepairs", - "lcbSttbfbkmkBPRepairs", - "fcPlcfbkfBPRepairs", - "lcbPlcfbkfBPRepairs", - "fcPlcfbklBPRepairs", - "lcbPlcfbklBPRepairs", - "fcPmsNew", - "lcbPmsNew", - "fcODSO", - "lcbODSO", - "fcPlcfpmiOldXP", - "lcbPlcfpmiOldXP", - "fcPlcfpmiNewXP", - "lcbPlcfpmiNewXP", - "fcPlcfpmiMixedXP", - "lcbPlcfpmiMixedXP", - "fcUnused2", - "lcbUnused2", - "fcPlcffactoid", - "lcbPlcffactoid", - "fcPlcflvcOldXP", - "lcbPlcflvcOldXP", - "fcPlcflvcNewXP", - "lcbPlcflvcNewXP", - "fcPlcflvcMixedXP", - "lcbPlcflvcMixedXP", + ["fcUnused1"], + ["lcbUnused1"], + ["fcPlcfPgp"], + ["lcbPlcfPgp"], + ["fcPlcfuim"], + ["lcbPlcfuim"], + ["fcPlfguidUim"], + ["lcbPlfguidUim"], + ["fcAtrdExtra"], + ["lcbAtrdExtra"], + ["fcPlrsid"], + ["lcbPlrsid"], + ["fcSttbfBkmkFactoid"], + ["lcbSttbfBkmkFactoid"], + ["fcPlcfBkfFactoid"], + ["lcbPlcfBkfFactoid"], + ["fcPlcfcookie"], + ["lcbPlcfcookie"], + ["fcPlcfBklFactoid"], + ["lcbPlcfBklFactoid"], + ["fcFactoidData"], + ["lcbFactoidData", self.handleLcbFactoidData], + ["fcDocUndo"], + ["lcbDocUndo"], + ["fcSttbfBkmkFcc"], + ["lcbSttbfBkmkFcc"], + ["fcPlcfBkfFcc"], + ["lcbPlcfBkfFcc"], + ["fcPlcfBklFcc"], + ["lcbPlcfBklFcc"], + ["fcSttbfbkmkBPRepairs"], + ["lcbSttbfbkmkBPRepairs"], + ["fcPlcfbkfBPRepairs"], + ["lcbPlcfbkfBPRepairs"], + ["fcPlcfbklBPRepairs"], + ["lcbPlcfbklBPRepairs"], + ["fcPmsNew"], + ["lcbPmsNew"], + ["fcODSO"], + ["lcbODSO"], + ["fcPlcfpmiOldXP"], + ["lcbPlcfpmiOldXP"], + ["fcPlcfpmiNewXP"], + ["lcbPlcfpmiNewXP"], + ["fcPlcfpmiMixedXP"], + ["lcbPlcfpmiMixedXP"], + ["fcUnused2"], + ["lcbUnused2"], + ["fcPlcffactoid"], + ["lcbPlcffactoid"], + ["fcPlcflvcOldXP"], + ["lcbPlcflvcOldXP"], + ["fcPlcflvcNewXP"], + ["lcbPlcflvcNewXP"], + ["fcPlcflvcMixedXP"], + ["lcbPlcflvcMixedXP"], ] for i in fields: - self.printAndSet(i, self.readuInt32()) + value = self.readInt32() + hasHandler = len(i) > 1 + # the spec says these must be ignored + needsIgnoring = [] + # a member needs handling if it defines the size of a struct and it's non-zero + needsHandling = i[0].startswith("lcb") and value != 0 and (not i[0] in needsIgnoring) + self.printAndSet(i[0], value, end=((not hasHandler) and (not needsHandling)), offset=True) + if hasHandler or needsHandling: + if needsHandling: + if hasHandler: + i[1]() + else: + print '<todo what="value is non-zero and unhandled"/>' + print '</%s>' % i[0] def __dumpFibRgFcLcb2003(self): self.__dumpFibRgFcLcb2002() _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits