src/docrecord.py | 25 ++++--------------------- src/docstream.py | 23 ++++++++++++++++++----- 2 files changed, 22 insertions(+), 26 deletions(-)
New commits: commit 2c4e52467c405d35823a781e7745a1fe7ae42a30 Author: Miklos Vajna <vmik...@suse.cz> Date: Sat May 4 20:09:32 2013 +0200 retrieveText -> retrieveOffset So finally we have retrieveOffset to look up text based on raw byte offsets and retrieveCP to look up logical character positions. retrieveText tried to do both, without success. diff --git a/src/docrecord.py b/src/docrecord.py index b46d0b1..9fda05f 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -30,23 +30,6 @@ class FcCompressed(DOCDirStream): self.printAndSet("r1", self.r1) print '</fcCompressed>' - def getTransformedValue(self, start, end, logicalPositions = True, logicalLength = True): - offset = self.fc - if self.fCompressed: - offset = self.fc/2 - if logicalPositions: - fro = offset + start - to = offset + end - else: - fro = start - to = end - if self.fCompressed: - return globals.encodeName(self.mainStream.bytes[fro:to]) - else: - if logicalLength: - to += (to - fro) - return globals.encodeName(self.mainStream.bytes[fro:to].decode('utf-16'), lowOnly = True) - class Pcd(DOCDirStream): """The Pcd structure specifies the location of text in the WordDocument Stream and additional properties for this text.""" def __init__(self, bytes, mainStream, offset, size): @@ -217,7 +200,7 @@ class PlcfBkl(DOCDirStream, PLC): end = offset + self.getuInt32(pos = pos) print '<aCP index="%d" bookmarkEnd="%d">' % (i, end) start = self.start.aCP[i] - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(start, end)) + print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveOffset(start, end)) pos += 4 print '</aCP>' print '</plcfBkl>' @@ -252,7 +235,7 @@ class PlcPcd(DOCDirStream, PLC): start, end = self.ranges[i] print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) self.aPcd[i].dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.aPcd[i].fc.getTransformedValue(start, end)) + print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end)) print '</aCP>' print '</plcPcd>' @@ -675,7 +658,7 @@ class ChpxFkp(DOCDirStream): start = self.getuInt32(pos = pos) end = self.getuInt32(pos = pos + 4) print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) - print '<transformed value="%s"/>' % self.quoteAttr(self.pnFkpChpx.mainStream.retrieveText(start, end)) + print '<transformed value="%s"/>' % self.quoteAttr(self.pnFkpChpx.mainStream.retrieveOffset(start, end)) pos += 4 # rgbx @@ -704,7 +687,7 @@ class PapxFkp(DOCDirStream): start = self.getuInt32(pos = pos) end = self.getuInt32(pos = pos + 4) print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(start, end)) + print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveOffset(start, end)) pos += 4 # rgbx diff --git a/src/docstream.py b/src/docstream.py index dec8cb6..d98e0ea 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -674,11 +674,25 @@ class WordDocumentStream(DOCDirStream): index = i return index - def retrieveText(self, start, end): - """Deprecated, use retrieveCPs instead.""" + def retrieveOffset(self, start, end): + """Retrieves text, defined by raw byte offsets.""" + + # Is the given offset compressed? plcPcd = self.clx.pcdt.plcPcd - idx = self.__findText(plcPcd, start) - return plcPcd.aPcd[idx].fc.getTransformedValue(start, end, logicalPositions = False, logicalLength = False) + for i in range(len(plcPcd.aCp)): + aPcd = plcPcd.aPcd[i] + fcCompressed = aPcd.fc + if fcCompressed.fCompressed == 1: + offset = fcCompressed.fc/2 + else: + offset = fcCompressed.fc + if offset <= start: + compressed = fcCompressed.fCompressed + + if compressed: + return globals.encodeName(self.bytes[start:end]) + else: + return globals.encodeName(self.bytes[start:end].decode('utf-16'), lowOnly = True) def retrieveCP(self, cp): """Implements 2.4.1 Retrieving Text.""" @@ -686,7 +700,6 @@ class WordDocumentStream(DOCDirStream): for i in range(len(plcPcd.aCp)): if plcPcd.aCp[i] <= cp: index = i - break aPcd = plcPcd.aPcd[index] fcCompressed = aPcd.fc if fcCompressed.fCompressed == 1: _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits