src/lib/MSPUBMetaData.cpp | 155 +++++++++++++++++++++++++++++++++++++--------- src/lib/MSPUBMetaData.h | 4 - src/lib/MSPUBParser.cpp | 28 +++++--- src/lib/MSPUBParser.h | 2 4 files changed, 147 insertions(+), 42 deletions(-)
New commits: commit e625c6e60185100438edcb06566a5f8a2a137deb Author: David Tardon <dtar...@redhat.com> Date: Sat Jul 25 09:12:40 2015 +0200 add missing include Change-Id: Ibd003515271ad6211189e12b85eeddd980319770 diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp index 52fdac4..c298bd1 100644 --- a/src/lib/MSPUBMetaData.cpp +++ b/src/lib/MSPUBMetaData.cpp @@ -8,6 +8,7 @@ */ #include <cmath> +#include <cstdio> #include <cstring> #include <ctime> commit 39a9a9ecea325c66fb9d5f503848553ac0e14dcf Author: David Tardon <dtar...@redhat.com> Date: Tue Aug 25 16:12:25 2015 +0200 afl: avoid out of bounds access to vector Change-Id: I51fdad6cca395bb5aadc916ef452ee020f666607 diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp index 9d0446b..52fdac4 100644 --- a/src/lib/MSPUBMetaData.cpp +++ b/src/lib/MSPUBMetaData.cpp @@ -237,6 +237,9 @@ librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::R { uint32_t size = readU32(input); + if (size == 0) + return librevenge::RVNGString(); + std::vector<unsigned char> characters; for (uint32_t i = 0; i < size; ++i) characters.push_back(readU8(input)); commit 0a83689e2f13d0bb584fb004c9065463271ac9e4 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Tue Jul 21 09:32:10 2015 +0200 need space for the terminating null-character Change-Id: Ie9cab1687dd3187819ec8e3e89b3e9355da9b255 diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp index ca8b756..9d0446b 100644 --- a/src/lib/MSPUBMetaData.cpp +++ b/src/lib/MSPUBMetaData.cpp @@ -109,7 +109,8 @@ void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream { data4[i] = readU8(input); } - char FMTID0[36]; + // Pretty-printed GUID is 36 bytes + the terminating null-character. + char FMTID0[37]; sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3, data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]); commit 8eaa9d24d235b1390402c232bed49e81ab2d40f4 Author: David Tardon <dtar...@redhat.com> Date: Sat Jul 25 09:18:10 2015 +0200 C++11 syntax is not allowed Change-Id: Ia705f789b3f9d600b03d25db90972a315e782302 diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp index 96e58a0..ca8b756 100644 --- a/src/lib/MSPUBMetaData.cpp +++ b/src/lib/MSPUBMetaData.cpp @@ -215,15 +215,15 @@ void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream { switch (m_idsAndOffsets[index].first) { - case PIDDSI::PIDDSI_CATEGORY: + case PIDDSI_CATEGORY: m_metaData.insert("librevenge:category", string); break; - case PIDDSI::PIDDSI_LINECOUNT: - // this should actually be PIDDSI::PIDDSI_COMPANY but this + case PIDDSI_LINECOUNT: + // this should actually be PIDDSI_COMPANY but this // is what company is mapped to m_metaData.insert("librevenge:company", string); break; - case PIDDSI::PIDDSI_LANGUAGE: + case PIDDSI_LANGUAGE: m_metaData.insert("dc:language", string); break; } commit 207e6da1240c4255d2b4c5c28d405ace84ed042f Author: David Tardon <dtar...@redhat.com> Date: Sat Feb 6 07:45:22 2016 +0100 extract more metadata Template, language, company and category metadata are extracted from MSPUB files. Company and category are set as custom properties. Change-Id: Ic14bfa11a2a8253c79dd4c4466afc7f6b2ce4ea9 diff --git a/src/lib/MSPUBMetaData.cpp b/src/lib/MSPUBMetaData.cpp index 1234fe2..96e58a0 100644 --- a/src/lib/MSPUBMetaData.cpp +++ b/src/lib/MSPUBMetaData.cpp @@ -8,6 +8,7 @@ */ #include <cmath> +#include <cstring> #include <ctime> #include "libmspub_utils.h" @@ -22,6 +23,60 @@ libmspub::MSPUBMetaData::~MSPUBMetaData() { } +enum PIDDSI +{ + PIDDSI_CODEPAGE = 0x00000001, + PIDDSI_CATEGORY = 0x00000002, + PIDDSI_PRESFORMAT = 0x00000003, + PIDDSI_BYTECOUNT = 0x00000004, + PIDDSI_LINECOUNT = 0x00000005, + PIDDSI_PARACOUNT = 0x00000006, + PIDDSI_SLIDECOUNT = 0x00000007, + PIDDSI_NOTECOUNT = 0x00000008, + PIDDSI_HIDDENCOUNT = 0x00000009, + PIDDSI_MMCLIPCOUNT = 0x0000000A, + PIDDSI_SCALE = 0x0000000B, + PIDDSI_HEADINGPAIR = 0x0000000C, + PIDDSI_DOCPARTS = 0x0000000D, + PIDDSI_MANAGER = 0x0000000E, + PIDDSI_COMPANY = 0x0000000F, + PIDDSI_LINKSDIRTY = 0x00000010, + PIDDSI_CCHWITHSPACES = 0x00000011, + PIDDSI_SHAREDDOC = 0x00000013, + PIDDSI_LINKBASE = 0x00000014, + PIDDSI_HLINKS = 0x00000015, + PIDDSI_HYPERLINKSCHANGED = 0x00000016, + PIDDSI_VERSION = 0x00000017, + PIDDSI_DIGSIG = 0x00000018, + PIDDSI_CONTENTTYPE = 0x0000001A, + PIDDSI_CONTENTSTATUS = 0x0000001B, + PIDDSI_LANGUAGE = 0x0000001C, + PIDDSI_DOCVERSION = 0x0000001D +}; + +enum PIDSI +{ + CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001, + PIDSI_TITLE = 0x00000002, + PIDSI_SUBJECT = 0x00000003, + PIDSI_AUTHOR = 0x00000004, + PIDSI_KEYWORDS = 0x00000005, + PIDSI_COMMENTS = 0x00000006, + PIDSI_TEMPLATE = 0x00000007, + PIDSI_LASTAUTHOR = 0x00000008, + PIDSI_REVNUMBER = 0x00000009, + PIDSI_EDITTIME = 0x0000000A, + PIDSI_LASTPRINTED = 0x0000000B, + PIDSI_CREATE_DTM = 0x0000000C, + PIDSI_LASTSAVE_DTM = 0x0000000D, + PIDSI_PAGECOUNT = 0x0000000E, + PIDSI_WORDCOUNT = 0x0000000F, + PIDSI_CHARCOUNT = 0x00000010, + PIDSI_THUMBNAIL = 0x00000011, + PIDSI_APPNAME = 0x00000012, + PIDSI_DOC_SECURITY = 0x00000013 +}; + bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input) { if (!input) @@ -45,12 +100,24 @@ void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream // NumPropertySets input->seek(4, librevenge::RVNG_SEEK_CUR); // FMTID0 - input->seek(16, librevenge::RVNG_SEEK_CUR); + //input->seek(16, librevenge::RVNG_SEEK_CUR); + uint32_t data1 = readU32(input); + uint16_t data2 = readU16(input); + uint16_t data3 = readU16(input); + uint8_t data4[8]; + for (int i = 0; i < 8; i++) + { + data4[i] = readU8(input); + } + char FMTID0[36]; + sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3, + data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]); + uint32_t offset0 = readU32(input); - readPropertySet(input, offset0); + readPropertySet(input, offset0, FMTID0); } -void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset) +void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID) { input->seek(offset, librevenge::RVNG_SEEK_SET); @@ -63,17 +130,15 @@ void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input { if (i >= m_idsAndOffsets.size()) break; - readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second); + readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID); } } -#define CODEPAGE_PROPERTY_IDENTIFIER 0x00000001 - uint32_t libmspub::MSPUBMetaData::getCodePage() { for (size_t i = 0; i < m_idsAndOffsets.size(); ++i) { - if (m_idsAndOffsets[i].first == CODEPAGE_PROPERTY_IDENTIFIER) + if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER) { if (i >= m_typedPropertyValues.size()) break; @@ -94,13 +159,10 @@ void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGIn #define VT_I2 0x0002 #define VT_LPSTR 0x001E -#define PIDSI_TITLE 0x00000002 -#define PIDSI_SUBJECT 0x00000003 -#define PIDSI_AUTHOR 0x00000004 -#define PIDSI_KEYWORDS 0x00000005 -#define PIDSI_COMMENTS 0x00000006 - -void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset) +void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input, + uint32_t index, + uint32_t offset, + char *FMTID) { input->seek(offset, librevenge::RVNG_SEEK_SET); uint16_t type = readU16(input); @@ -120,23 +182,51 @@ void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream if (index >= m_idsAndOffsets.size()) return; - switch (m_idsAndOffsets[index].first) + if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9")) { - case PIDSI_TITLE: - m_metaData.insert("dc:title", string); - break; - case PIDSI_SUBJECT: - m_metaData.insert("dc:subject", string); - break; - case PIDSI_AUTHOR: - m_metaData.insert("meta:initial-creator", string); - break; - case PIDSI_KEYWORDS: - m_metaData.insert("meta:keyword", string); - break; - case PIDSI_COMMENTS: - m_metaData.insert("dc:description", string); - break; + switch (m_idsAndOffsets[index].first) + { + case PIDSI::PIDSI_TITLE: + m_metaData.insert("dc:title", string); + break; + case PIDSI::PIDSI_SUBJECT: + m_metaData.insert("dc:subject", string); + break; + case PIDSI::PIDSI_AUTHOR: + m_metaData.insert("meta:initial-creator", string); + m_metaData.insert("dc:creator", string); + break; + case PIDSI::PIDSI_KEYWORDS: + m_metaData.insert("meta:keyword", string); + break; + case PIDSI::PIDSI_COMMENTS: + m_metaData.insert("dc:description", string); + break; + case PIDSI::PIDSI_TEMPLATE: + std::string templateHref(string.cstr()); + size_t found = templateHref.find_last_of("/\\"); + if (found != std::string::npos) + string = librevenge::RVNGString(templateHref.substr(found+1).c_str()); + m_metaData.insert("librevenge:template", string); + break; + } + } + else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae")) + { + switch (m_idsAndOffsets[index].first) + { + case PIDDSI::PIDDSI_CATEGORY: + m_metaData.insert("librevenge:category", string); + break; + case PIDDSI::PIDDSI_LINECOUNT: + // this should actually be PIDDSI::PIDDSI_COMPANY but this + // is what company is mapped to + m_metaData.insert("librevenge:company", string); + break; + case PIDDSI::PIDDSI_LANGUAGE: + m_metaData.insert("dc:language", string); + break; + } } } } diff --git a/src/lib/MSPUBMetaData.h b/src/lib/MSPUBMetaData.h index 18b14a0..f0f994c 100644 --- a/src/lib/MSPUBMetaData.h +++ b/src/lib/MSPUBMetaData.h @@ -34,9 +34,9 @@ private: MSPUBMetaData &operator=(const MSPUBMetaData &); void readPropertySetStream(librevenge::RVNGInputStream *input); - void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset); + void readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID); void readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input); - void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset); + void readTypedPropertyValue(librevenge::RVNGInputStream *input, uint32_t index, uint32_t offset, char *FMTID); librevenge::RVNGString readCodePageString(librevenge::RVNGInputStream *input); uint32_t getCodePage(); diff --git a/src/lib/MSPUBParser.cpp b/src/lib/MSPUBParser.cpp index 6b79243..9c746d7 100644 --- a/src/lib/MSPUBParser.cpp +++ b/src/lib/MSPUBParser.cpp @@ -124,11 +124,8 @@ bool MSPUBParser::parse() MSPUB_DEBUG_MSG(("***NOTE***: Where applicable, the meanings of block/chunk IDs and Types printed below may be found in:\n\t***MSPUBBlockType.h\n\t***MSPUBBlockID.h\n\t***MSPUBContentChunkType.h\n*****\n")); if (!m_input->isStructured()) return false; - librevenge::RVNGInputStream *metaData = m_input->getSubStreamByName("\x05SummaryInformation"); - if (metaData) - // No check: metadata are not important enough to fail if they can't be parsed - parseMetaData(metaData); - delete metaData; + // No check: metadata are not important enough to fail if they can't be parsed + parseMetaData(); librevenge::RVNGInputStream *quill = m_input->getSubStreamByName("Quill/QuillSub/CONTENTS"); if (!quill) { @@ -2530,12 +2527,25 @@ void MSPUBParser::parsePaletteEntry(librevenge::RVNGInputStream *input, MSPUBBlo } } -bool MSPUBParser::parseMetaData(librevenge::RVNGInputStream *const input) +bool MSPUBParser::parseMetaData() { - assert(input); - + m_input->seek(0, librevenge::RVNG_SEEK_SET); MSPUBMetaData metaData; - metaData.parse(input); + + librevenge::RVNGInputStream *sumaryInfo = m_input->getSubStreamByName("\x05SummaryInformation"); + if (sumaryInfo) + { + metaData.parse(sumaryInfo); + delete sumaryInfo; + } + + librevenge::RVNGInputStream *docSumaryInfo = m_input->getSubStreamByName("\005DocumentSummaryInformation"); + if (docSumaryInfo) + { + metaData.parse(docSumaryInfo); + delete docSumaryInfo; + } + m_input->seek(0, librevenge::RVNG_SEEK_SET); metaData.parseTimes(m_input); m_collector->collectMetaData(metaData.getMetaData()); diff --git a/src/lib/MSPUBParser.h b/src/lib/MSPUBParser.h index b6145ce..e1edad2 100644 --- a/src/lib/MSPUBParser.h +++ b/src/lib/MSPUBParser.h @@ -91,7 +91,7 @@ protected: MSPUBParser(const MSPUBParser &); MSPUBParser &operator=(const MSPUBParser &); virtual bool parseContents(librevenge::RVNGInputStream *input); - bool parseMetaData(librevenge::RVNGInputStream *input); + bool parseMetaData(); bool parseQuill(librevenge::RVNGInputStream *input); bool parseEscher(librevenge::RVNGInputStream *input); bool parseEscherDelay(librevenge::RVNGInputStream *input); _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits