filter/source/config/cache/cacheitem.hxx | 8 - filter/source/config/cache/typedetection.cxx | 147 +++++++++++++++++++++------ 2 files changed, 115 insertions(+), 40 deletions(-)
New commits: commit 0c782558aee08bfc24e03c715a504a234ab30307 Author: Kohei Yoshida <kohei.yosh...@gmail.com> Date: Fri May 25 16:01:52 2012 -0400 Rank format types in order of complexity. This way we can run detection services in the correct order; from more complex to less complex, since the more complex structure is much easier to test and reject. Change-Id: Iee2d3e2e4f2834f95e6e89975f646e3928114b11 diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx index 93e8015..672a09f 100644 --- a/filter/source/config/cache/typedetection.cxx +++ b/filter/source/config/cache/typedetection.cxx @@ -109,8 +109,101 @@ TypeDetection::~TypeDetection() namespace { /** - * Types with matching pattern first, then extension, then types that are - * supported by the document service come next. + * Rank format types in order of complexity. More complex formats are + * ranked higher so that they get tested sooner over simpler formats. + * + * Guidelines to determine how complex a format is (subject to change): + * + * 1) compressed text (XML, HTML, etc) + * 2) binary + * 3) non-compressed text + * 3.1) structured text + * 3.1.1) dialect of a structured text (e.g. docbook XML) + * 3.1.2) generic structured text (e.g. generic XML) + * 3.2) non-structured text + * + * In each category, rank them from strictly-structured to + * loosely-structured. + */ +int getFlatTypeRank(const rtl::OUString& rType) +{ + // List formats from more complex to less complex. + // TODO: Add more. + static const char* ranks[] = { + // Compressed XML + "writer8_template", + "writer8", + "calc8_template", + "calc8", + "writer_OOXML_Text_Template", + "writer_OOXML", + "writer_MS_Word_2007_Template", + "writer_MS_Word_2007", + "Office Open XML Spreadsheet Template", + "Office Open XML Spreadsheet", + "MS Excel 2007 XML Template", + "MS Excel 2007 XML", + + // Compressed text + "pdf_Portable_Document_Format", + + // Binary + "writer_T602_Document", + "writer_WordPerfect_Document", + "writer_MS_Works_Document", + "writer_MS_Word_97_Vorlage", + "writer_MS_Word_97", + "writer_MS_Word_95_Vorlage", + "writer_MS_Word_95", + "writer_MS_WinWord_60", + "writer_MS_WinWord_5", + "MS Excel 2007 Binary", + "calc_MS_Excel_97_VorlageTemplate", + "calc_MS_Excel_97", + "calc_MS_Excel_95_VorlageTemplate", + "calc_MS_Excel_95", + "calc_MS_Excel_5095_VorlageTemplate", + "calc_MS_Excel_5095", + "calc_MS_Excel_40_VorlageTemplate", + "calc_MS_Excel_40", + "calc_Pocket_Excel_File", + "calc_Lotus", + "calc_QPro", + "calc_SYLK", + "calc_DIF", + "calc_dBase", + + + // Non-compressed XML + "writer_ODT_FlatXML", + "calc_ODS_FlatXML", + "calc_MS_Excel_2003_XML", + "writer_MS_Word_2003_XML", + "writer_DocBook_File", + "XHTML_File", + + // Non-compressed text + "writer_Rich_Text_Format", + "generic_HTML", + "generic_Text" + }; + + size_t n = SAL_N_ELEMENTS(ranks); + + for (size_t i = 0; i < n; ++i) + { + if (rType.equalsAscii(ranks[i])) + return n - i - 1; + } + + // Not ranked. Treat them equally. + return -1; +} + +/** + * Types with matching pattern first, then extension, then custom ranks by + * types, then types that are supported by the document service come next. + * Lastly, sort them alphabetically. */ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool> { @@ -122,7 +215,25 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec if (r1.bMatchByExtension != r2.bMatchByExtension) return r1.bMatchByExtension; - return r1.bPreselectedByDocumentService; + int rank1 = getFlatTypeRank(r1.sType); + int rank2 = getFlatTypeRank(r2.sType); + + if (rank1 != rank2) + return rank1 > rank2; + + if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService) + return r1.bPreselectedByDocumentService; + + // All things being equal, sort them alphabetically. + return r1.sType > r2.sType; + } +}; + +struct EqualByName : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool> +{ + bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const + { + return r1.sType == r2.sType; } }; @@ -177,6 +288,7 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec // Properly prioritize all candidate types. lFlatTypes.sort(SortByPriority()); + lFlatTypes.unique(EqualByName()); ::rtl::OUString sType ; ::rtl::OUString sLastChance; commit 58652054727a29701795f2849c87f320de05c4dd Author: Kohei Yoshida <kohei.yosh...@gmail.com> Date: Fri May 25 14:57:35 2012 -0400 The logic behind these two flags no longer makes sense. Change-Id: Ie2fada1c641d2bc313ddb14903083beab08f8a98 diff --git a/filter/source/config/cache/cacheitem.hxx b/filter/source/config/cache/cacheitem.hxx index d76aa92..486b299 100644 --- a/filter/source/config/cache/cacheitem.hxx +++ b/filter/source/config/cache/cacheitem.hxx @@ -235,12 +235,6 @@ struct FlatDetectionInfo // this type was found by a matching URL Pattern sal_Bool bMatchByPattern; - // the user selected this type explicitly - sal_Bool bPreselectedAsType; - - // the user selected this type implicit by selecting a corresponding filter - sal_Bool bPreselectedByFilter; - // the user selected this type implicit by selecting a corresponding office module sal_Bool bPreselectedByDocumentService; @@ -248,8 +242,6 @@ struct FlatDetectionInfo : sType (::rtl::OUString()) , bMatchByExtension (sal_False ) , bMatchByPattern (sal_False ) - , bPreselectedAsType (sal_False ) - , bPreselectedByFilter (sal_False ) , bPreselectedByDocumentService(sal_False ) {} }; diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx index 303ebfc..93e8015 100644 --- a/filter/source/config/cache/typedetection.cxx +++ b/filter/source/config/cache/typedetection.cxx @@ -505,7 +505,6 @@ sal_Bool TypeDetection::impl_getPreselectionForType(const ::rtl::OUString& sPreS aInfo.sType = sType; aInfo.bMatchByExtension = bMatchByExtension; aInfo.bMatchByPattern = bMatchByPattern; - aInfo.bPreselectedAsType = sal_True; if (bPreferredPreselection) rFlatTypes.push_front(aInfo); @@ -561,17 +560,6 @@ sal_Bool TypeDetection::impl_getPreselectionForFilter(const ::rtl::OUString& sPr sFilter = ::rtl::OUString(); } - // We have to mark all retrieved preselection items as "preselected by filter"! - FlatDetection::iterator pIt; - for ( pIt = rFlatTypes.begin(); - pIt != rFlatTypes.end() ; - ++pIt ) - { - FlatDetectionInfo& rInfo = *pIt; - rInfo.bPreselectedAsType = sal_False; - rInfo.bPreselectedByFilter = sal_True; - } - if (!sFilter.isEmpty()) return sal_True; else @@ -630,8 +618,6 @@ sal_Bool TypeDetection::impl_getPreselectionForDocumentService(const ::rtl::OUSt ++pIt ) { FlatDetectionInfo& rInfo = *pIt; - rInfo.bPreselectedAsType = sal_False; - rInfo.bPreselectedByFilter = sal_False; rInfo.bPreselectedByDocumentService = sal_True ; rFlatTypes.push_back(rInfo); } @@ -733,21 +719,6 @@ void TypeDetection::impl_getPreselection(const css::util::URL& aP // c) if (sDetectService.isEmpty()) { - // accept or not accept flat types without deep detection: that's the question :-) - // May be there exists some states, where we have to use our LastChance feature instead - // of using the flat type directly. - // Here the list of task ID's, which wasrelated to these lines of code: - // #i47159#, #i43404#, #i46494# - - // a flat detected type without the chance for a deep detection ... but preselected by the user - // explicitly (means preselected as type or filter ... not as documentservice!) - // should be accepted. So the user can overrule our detection. - if ( - (aFlatTypeInfo.bPreselectedAsType ) || - (aFlatTypeInfo.bPreselectedByFilter) - ) - return sFlatType; - // flat detected types without any registered deep detection service and not // preselected by the user can be used as LAST CHANCE in case no other type could // be detected. Of course only the first type without deep detector can be used. _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits