src/lib/MSPUBCollector.cpp | 9 + src/lib/MSPUBCollector.h | 4 src/lib/MSPUBMetaData.cpp | 236 +++++++++++++++++++++++++++++++++++++++++++++ src/lib/MSPUBMetaData.h | 53 ++++++++++ src/lib/MSPUBParser.cpp | 30 +++++ src/lib/MSPUBParser.h | 1 src/lib/Makefile.am | 62 ++++++----- 7 files changed, 363 insertions(+), 32 deletions(-)
New commits: commit 7d50db7ceeda663451ef2fe6cbc9ea0d25668e1e Author: David Tardon <dtar...@redhat.com> Date: Tue Dec 30 12:57:58 2014 +0100 fix parsing of escher values The list of (ID, value) pairs is terminated by ID == 0 without a value. So, the shortest possible record has length 8: 2 byte ID, followed by 4 byte value, followed by 2 byte terminator ID. Change-Id: I4501e42164b4376f16feca21e32221a4084f22f4 diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp index fac39cb..0dca90c 100644 --- a/src/lib/MSPUBParser.cpp +++ b/src/lib/MSPUBParser.cpp @@ -2311,6 +2311,12 @@ std::map<unsigned short, unsigned> MSPUBParser::extractEscherValues(librevenge:: while (stillReading(input, record.contentsOffset + record.contentsLength)) { unsigned short id = readU16(input); + if (id == 0) + { + if (!stillReading(input, record.contentsOffset + record.contentsLength)) + break; + MSPUB_DEBUG_MSG(("found escher value with ID 0!\n")); + } unsigned value = readU32(input); ret[id] = value; } commit a5aa054c0996cc4e8c428a88c7f14c18e4e4e57e Author: David Tardon <dtar...@redhat.com> Date: Tue Dec 30 12:05:57 2014 +0100 import metadata Change-Id: I64dc77ef7f2e12a5d9f2358b1ebaa5e593282414 diff --git a/src/lib/MSPUBCollector.cpp b/src/lib/MSPUBCollector.cpp index 71e3846..e2cd967 100644 --- a/src/lib/MSPUBCollector.cpp +++ b/src/lib/MSPUBCollector.cpp @@ -237,6 +237,11 @@ void mapTableTextToCells( } // anonymous namespace +void MSPUBCollector::collectMetaData(const librevenge::RVNGPropertyList &metaData) +{ + m_metaData = metaData; +} + void MSPUBCollector::addEOTFont(const librevenge::RVNGString &name, const librevenge::RVNGBinaryData &data) { m_embeddedFonts.push_back(EmbeddedFontInfo(name, data)); @@ -381,7 +386,8 @@ MSPUBCollector::MSPUBCollector(librevenge::RVNGDrawingInterface *painter) : m_tableCellTextEndsByTextId(), m_stringOffsetsByTextId(), m_calculationValuesSeen(), m_pageSeqNumsOrdered(), m_encodingHeuristic(false), m_allText(), - m_calculatedEncoding() + m_calculatedEncoding(), + m_metaData() { } @@ -1699,6 +1705,7 @@ bool MSPUBCollector::go() addBlackToPaletteIfNecessary(); assignShapesToPages(); m_painter->startDocument(librevenge::RVNGPropertyList()); + m_painter->setDocumentMetaData(m_metaData); for (std::list<EmbeddedFontInfo>::const_iterator i = m_embeddedFonts.begin(); i != m_embeddedFonts.end(); ++i) { diff --git a/src/lib/MSPUBCollector.h b/src/lib/MSPUBCollector.h index 7e4b953..5d96c05 100644 --- a/src/lib/MSPUBCollector.h +++ b/src/lib/MSPUBCollector.h @@ -57,6 +57,8 @@ public: virtual ~MSPUBCollector(); // collector functions + void collectMetaData(const librevenge::RVNGPropertyList &metaData); + bool addPage(unsigned seqNum); bool addTextString(const std::vector<TextParagraph> &str, unsigned id); void addTextShape(unsigned stringId, unsigned seqNum); @@ -172,6 +174,8 @@ private: bool m_encodingHeuristic; std::vector<unsigned char> m_allText; mutable boost::optional<const char *> m_calculatedEncoding; + librevenge::RVNGPropertyList m_metaData; + // helper functions std::vector<int> getShapeAdjustValues(const ShapeInfo &info) const; boost::optional<unsigned> getMasterPageSeqNum(unsigned pageSeqNum) const; diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp new file mode 100644 index 0000000..eca048b --- /dev/null +++ b/src/lib/MSPUBMetaData.cpp @@ -0,0 +1,236 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * This file is part of the libmspub project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <cmath> +#include <ctime> + +#include "libmspub_utils.h" +#include "MSPUBMetaData.h" + +libmspub::MSPUBMetaData::MSPUBMetaData() + : m_idsAndOffsets(), m_typedPropertyValues(), m_metaData() +{ +} + +libmspub::MSPUBMetaData::~MSPUBMetaData() +{ +} + +bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input) +{ + if (!input) + return false; + + readPropertySetStream(input); + + return true; +} + +void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream *input) +{ + // ByteOrder + input->seek(2, librevenge::RVNG_SEEK_CUR); + // Version + input->seek(2, librevenge::RVNG_SEEK_CUR); + // SystemIdentifier + input->seek(4, librevenge::RVNG_SEEK_CUR); + // CLSID + input->seek(16, librevenge::RVNG_SEEK_CUR); + // NumPropertySets + input->seek(4, librevenge::RVNG_SEEK_CUR); + // FMTID0 + input->seek(16, librevenge::RVNG_SEEK_CUR); + uint32_t offset0 = readU32(input); + readPropertySet(input, offset0); +} + +void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset) +{ + input->seek(offset, librevenge::RVNG_SEEK_SET); + + // Size + input->seek(4, librevenge::RVNG_SEEK_CUR); + uint32_t numProperties = readU32(input); + for (uint32_t i = 0; i < numProperties; ++i) + readPropertyIdentifierAndOffset(input); + for (uint32_t i = 0; i < numProperties; ++i) + { + if (i >= m_idsAndOffsets.size()) + break; + readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second); + } +} + +#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001 + +uint32_t libmspub::MSPUBMetaData::getCodePage() +{ + for (size_t i = 0; i < m_idsAndOffsets.size(); ++i) + { + if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER) + { + if (i >= m_typedPropertyValues.size()) + break; + return m_typedPropertyValues[i]; + } + } + + return 0; +} + +void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input) +{ + uint32_t propertyIdentifier = readU32(input); + uint32_t offset = readU32(input); + m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset)); +} + +#define VT_I2 0x0002 +#define VT_LPSTR 0x001E + +#define PIDSI_TITLE 0x00000002 +#define PIDSI_SUBJECT 0x00000003 +#define PIDSI_AUTHOR 0x00000004 +#define PIDSI_KEYWORDS 0x00000005 +#define PIDSI_COMMENTS 0x00000006 + +void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset) +{ + input->seek(offset, librevenge::RVNG_SEEK_SET); + uint16_t type = readU16(input); + // Padding + input->seek(2, librevenge::RVNG_SEEK_CUR); + + if (type == VT_I2) + { + uint16_t value = readU16(input); + m_typedPropertyValues[index] = value; + } + else if (type == VT_LPSTR) + { + librevenge::RVNGString string = readCodePageString(input); + if (!string.empty()) + { + if (index >= m_idsAndOffsets.size()) + return; + + switch (m_idsAndOffsets[index].first) + { + case PIDSI_TITLE: + m_metaData.insert("dc:title", string); + break; + case PIDSI_SUBJECT: + m_metaData.insert("dc:subject", string); + break; + case PIDSI_AUTHOR: + m_metaData.insert("meta:initial-creator", string); + break; + case PIDSI_KEYWORDS: + m_metaData.insert("meta:keyword", string); + break; + case PIDSI_COMMENTS: + m_metaData.insert("dc:description", string); + break; + } + } + } +} + +librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::RVNGInputStream *input) +{ + uint32_t size = readU32(input); + + std::vector<unsigned char> characters; + for (uint32_t i = 0; i < size; ++i) + characters.push_back(readU8(input)); + + uint32_t codepage = getCodePage(); + librevenge::RVNGString string; + + if (codepage == 65001) + { + // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx + // says this is UTF-8. + for (std::vector<unsigned char>::const_iterator i = characters.begin(); i != characters.end(); ++i) + string.append((const char)*i); + } + else + { + switch (codepage) + { + case 1252: + // http://msdn.microsoft.com/en-us/goglobal/bb964654 + appendCharacters(string, characters, "windows-1252"); + break; + default: + MSPUB_DEBUG_MSG(("MSPUBMetaData::readCodePageString: Unknown codepage %u found\n", unsigned(codepage))); + } + } + + return string; +} + +bool libmspub::MSPUBMetaData::parseTimes(librevenge::RVNGInputStream *input) +{ + // Parse the header + // HeaderSignature: 8 bytes + // HeaderCLSID: 16 bytes + // MinorVersion: 2 bytes + // MajorVersion: 2 bytes + // ByteOrder: 2 bytes + input->seek(30, librevenge::RVNG_SEEK_CUR); + uint16_t sectorShift = readU16(input); + // MiniSectorShift: 2 bytes + // Reserved: 6 bytes + // NumDirectorySectors: 4 bytes + // NumFATSectors: 4 bytes + input->seek(16, librevenge::RVNG_SEEK_CUR); + uint32_t firstDirSectorLocation = readU32(input); + + // Seek to the Root Directory Entry + size_t sectorSize = pow(2, sectorShift); + input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET); + // DirectoryEntryName: 64 bytes + // DirectoryEntryNameLength: 2 bytes + // ObjectType: 1 byte + // ColorFlag: 1 byte + // LeftSiblingID: 4 bytes + // RightSiblingID: 4 bytes + // ChildID: 4 bytes + // CLSID: 16 bytes + // StateBits: 4 bytes + // CreationTime: 8 bytes + input->seek(108, librevenge::RVNG_SEEK_CUR); + uint64_t modifiedTime = readU64(input); + + // modifiedTime is number of 100ns since Jan 1 1601 + static const uint64_t epoch = 11644473600; + time_t sec = (modifiedTime / 10000000) - epoch; + const struct tm *time = localtime(&sec); + if (time) + { + static const int MAX_BUFFER = 1024; + char buffer[MAX_BUFFER]; + strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time); + librevenge::RVNGString result; + result.append(buffer); + // Visio UI uses modifiedTime for both purposes. + m_metaData.insert("meta:creation-date", result); + m_metaData.insert("dc:date", result); + return true; + } + return false; +} + +const librevenge::RVNGPropertyList &libmspub::MSPUBMetaData::getMetaData() +{ + return m_metaData; +} + +/* vim:set shiftwidth=2 softtabstop=2 expandtab: */ diff --git a/src/lib/MSPUBMetaData.h b/src/lib/MSPUBMetaData.h new file mode 100644 index 0000000..18b14a0 --- /dev/null +++ b/src/lib/MSPUBMetaData.h @@ -0,0 +1,53 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * This file is part of the libmspub project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef __MSPUBMETADATA_H__ +#define __MSPUBMETADATA_H__ + +#include <vector> +#include <utility> +#include <map> +#include <librevenge-stream/librevenge-stream.h> +#include <librevenge/librevenge.h> +#include "libmspub_utils.h" + +namespace libmspub +{ + +class MSPUBMetaData +{ +public: + MSPUBMetaData(); + ~MSPUBMetaData(); + bool parse(librevenge::RVNGInputStream *input); + bool parseTimes(librevenge::RVNGInputStream *input); + const librevenge::RVNGPropertyList &getMetaData(); + +private: + MSPUBMetaData(const MSPUBMetaData &); + MSPUBMetaData &operator=(const MSPUBMetaData &); + + void readPropertySetStream(librevenge::RVNGInputStream *input); + void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset); + void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input); + void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset); + librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input); + + uint32_t getCodePage(); + + std::vector< std::pair<uint32_t, uint32_t> > m_idsAndOffsets; + std::map<uint16_t, uint16_t> m_typedPropertyValues; + librevenge::RVNGPropertyList m_metaData; +}; + +} // namespace libmspub + +#endif // __MSPUBMETADATA_H__ + +/* vim:set shiftwidth=2 softtabstop=2 expandtab: */ diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp index 6ebe098..fac39cb 100644 --- a/src/lib/MSPUBParser.cpp +++ b/src/lib/MSPUBParser.cpp @@ -7,6 +7,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include <cassert> #include <set> #include <sstream> #include <string> @@ -14,6 +15,8 @@ #include <string.h> #include <librevenge-stream/librevenge-stream.h> #include <zlib.h> + +#include "MSPUBMetaData.h" #include "MSPUBParser.h" #include "MSPUBCollector.h" #include "MSPUBBlockID.h" @@ -115,6 +118,11 @@ bool MSPUBParser::parse() MSPUB_DEBUG_MSG(("***NOTE***: Where applicable, the meanings of block/chunk IDs and Types printed below may be found in:\n\t***MSPUBBlockType.h\n\t***MSPUBBlockID.h\n\t***MSPUBContentChunkType.h\n*****\n")); if (!m_input->isStructured()) return false; + librevenge::RVNGInputStream *metaData = m_input->getSubStreamByName("\x05SummaryInformation"); + if (metaData) + // No check: metadata are not important enough to fail if they can't be parsed + parseMetaData(metaData); + delete metaData; librevenge::RVNGInputStream *quill = m_input->getSubStreamByName("Quill/QuillSub/CONTENTS"); if (!quill) { @@ -2525,6 +2533,20 @@ void MSPUBParser::parsePaletteEntry(librevenge::RVNGInputStream *input, MSPUBBlo } } +bool MSPUBParser::parseMetaData(librevenge::RVNGInputStream *const input) +{ + assert(input); + + MSPUBMetaData metaData; + metaData.parse(input); + m_input->seek(0, librevenge::RVNG_SEEK_SET); + metaData.parseTimes(m_input); + m_collector->collectMetaData(metaData.getMetaData()); + + return true; +} + + } /* vim:set shiftwidth=2 softtabstop=2 expandtab: */ diff --git a/src/lib/MSPUBParser.h b/src/lib/MSPUBParser.h index 3d97ffd..54e41aa 100644 --- a/src/lib/MSPUBParser.h +++ b/src/lib/MSPUBParser.h @@ -91,6 +91,7 @@ protected: MSPUBParser(const MSPUBParser &); MSPUBParser &operator=(const MSPUBParser &); virtual bool parseContents(librevenge::RVNGInputStream *input); + bool parseMetaData(librevenge::RVNGInputStream *input); bool parseQuill(librevenge::RVNGInputStream *input); bool parseEscher(librevenge::RVNGInputStream *input); bool parseEscherDelay(librevenge::RVNGInputStream *input); diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index c626442..f54feaa 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -38,6 +38,8 @@ libmspub_@MSPUB_MAJOR_VERSION@_@MSPUB_MINOR_VERSION@_la_SOURCES = \ MSPUBConstants.h \ MSPUBContentChunkType.h \ MSPUBDocument.cpp \ + MSPUBMetaData.cpp \ + MSPUBMetaData.h \ MSPUBParser.cpp \ MSPUBParser.h \ MSPUBParser2k.cpp \ commit 09a84ccf477902196fc94c71f9bc244e2e28ae5c Author: David Tardon <dtar...@redhat.com> Date: Tue Dec 30 11:36:28 2014 +0100 keep the sources list sorted Change-Id: I61061f8f43be40cc3f886a29a74c31dc39d4893f diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index 89c06e1..c626442 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -16,54 +16,54 @@ libmspub_@MSPUB_MAJOR_VERSION@_@MSPUB_MINOR_VERSION@_la_LIBADD = $(REVENGE_LIBS libmspub_@MSPUB_MAJOR_VERSION@_@MSPUB_MINOR_VERSION@_la_DEPENDENCIES = @LIBMSPUB_WIN32_RESOURCE@ libmspub_@MSPUB_MAJOR_VERSION@_@MSPUB_MINOR_VERSION@_la_LDFLAGS = $(version_info) -export-dynamic -no-undefined libmspub_@MSPUB_MAJOR_VERSION@_@MSPUB_MINOR_VERSION@_la_SOURCES = \ - MSPUBCollector.cpp \ - MSPUBDocument.cpp \ - MSPUBParser.cpp \ - MSPUBParser2k.cpp \ - Fill.cpp \ - libmspub_utils.cpp \ - PolygonUtils.cpp \ - ShapeGroupElement.cpp \ + Arrow.h \ + BorderArtInfo.h \ ColorReference.cpp \ - VectorTransformation2D.cpp \ - MSPUBParser97.cpp \ + ColorReference.h \ + Coordinate.h \ Dash.cpp \ - Shadow.cpp \ + Dash.h \ + EmbeddedFontInfo.h \ EscherContainerType.h \ EscherFieldIds.h \ + Fill.cpp \ + Fill.h \ FillType.h \ + Line.h \ + ListInfo.h \ MSPUBBlockID.h \ MSPUBBlockType.h \ + MSPUBCollector.cpp \ MSPUBCollector.h \ MSPUBConstants.h \ MSPUBContentChunkType.h \ + MSPUBDocument.cpp \ + MSPUBParser.cpp \ MSPUBParser.h \ + MSPUBParser2k.cpp \ MSPUBParser2k.h \ + MSPUBParser97.cpp \ + MSPUBParser97.h \ MSPUBTypes.h \ - libmspub_utils.h \ - ShapeFlags.h \ - ShapeType.h \ - Fill.h \ - ColorReference.h \ + Margins.h \ + NumberingDelimiter.h \ + NumberingType.h \ + PolygonUtils.cpp \ PolygonUtils.h \ - Shapes.h \ - VectorTransformation2D.h \ - Coordinate.h \ + Shadow.cpp \ + Shadow.h \ + ShapeFlags.h \ + ShapeGroupElement.cpp \ ShapeGroupElement.h \ ShapeInfo.h \ - Line.h \ - Margins.h \ - MSPUBParser97.h \ - BorderArtInfo.h \ - NumberingType.h \ - NumberingDelimiter.h \ - ListInfo.h \ - Dash.h \ + ShapeType.h \ + Shapes.h \ TableInfo.h \ - Arrow.h \ + VectorTransformation2D.cpp \ + VectorTransformation2D.h \ VerticalAlign.h \ - EmbeddedFontInfo.h \ - Shadow.h + libmspub_utils.cpp \ + libmspub_utils.h if OS_WIN32 commit 3b6a71216cab7172b947dbb6946f8dcb8b81792d Author: David Tardon <dtar...@redhat.com> Date: Tue Dec 30 12:07:54 2014 +0100 fix warning Change-Id: I77492d801c43d178be210323ef1dafc7eefdacb6 diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp index 1810152..6ebe098 100644 --- a/src/lib/MSPUBParser.cpp +++ b/src/lib/MSPUBParser.cpp @@ -821,7 +821,7 @@ bool MSPUBParser::parseShape(librevenge::RVNGInputStream *input, if (bool(cellCount) && (get(cellCount) != ti.m_cells.size())) { - MSPUB_DEBUG_MSG(("%u cell records expected, but read %u\n", get(cellCount), ti.m_cells.size())); + MSPUB_DEBUG_MSG(("%u cell records expected, but read %u\n", get(cellCount), unsigned(ti.m_cells.size()))); } } _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits