From b8b48c71e8cde3b120159b40d28e1c37c49e7fd4 Mon Sep 17 00:00:00 2001
From: Mark Rogers <mark.rogers@powermapper.com>
Date: Wed, 2 Feb 2022 17:39:25 +0000
Subject: [PATCH] fix(podofo): refactored stack overflow recursion guard to fix
 CVE-2018-8002, CVE-2021-30470, CVE-2021-30471,  CVE-2020-18971

---
 .../Libs/podofo/src/podofo/base/PdfParser.cpp      | 46 ++--------------------
 .../Mapper/Libs/podofo/src/podofo/base/PdfParser.h |  1 -
 .../Libs/podofo/src/podofo/base/PdfTokenizer.cpp   | 43 ++++++++++++++++++++
 .../Libs/podofo/src/podofo/base/PdfTokenizer.h     | 39 ++++++++++++++++++
 .../Libs/podofo/src/podofo/doc/PdfNamesTree.cpp    |  2 +
 .../Libs/podofo/src/podofo/doc/PdfOutlines.cpp     |  1 +
 .../Libs/podofo/src/podofo/doc/PdfPagesTree.cpp    |  2 +
 7 files changed, 90 insertions(+), 44 deletions(-)

diff --git a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.cpp b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.cpp
index d9c1db85d..1eef5b7cc 100644
--- a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.cpp
+++ b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.cpp
@@ -75,45 +75,6 @@ namespace PoDoFo {
 bool PdfParser::s_bIgnoreBrokenObjects = true;
 const long nMaxNumIndirectObjects = (1L << 23) - 1L;
 long PdfParser::s_nMaxObjects = nMaxNumIndirectObjects;
-  
-    
-class PdfRecursionGuard
-{
-  // RAII recursion guard ensures m_nRecursionDepth is always decremented
-  // because the destructor is always called when control leaves a method
-  // via return or an exception.
-  // see http://en.cppreference.com/w/cpp/language/raii
-
-  // It's used like this in PdfParser methods
-  // PdfRecursionGuard guard(m_nRecursionDepth);
-
-  public:
-    PdfRecursionGuard( int& nRecursionDepth ) 
-    : m_nRecursionDepth(nRecursionDepth) 
-    { 
-        // be careful changing this limit - overflow limits depend on the OS, linker settings, and how much stack space compiler allocates
-        // 500 limit prevents overflow on Win7 with VC++ 2005 with default linker stack size (1000 caused overflow with same compiler/OS)
-        const int maxRecursionDepth = 500;
-
-        ++m_nRecursionDepth;
-
-        if ( m_nRecursionDepth > maxRecursionDepth )
-        {
-            // avoid stack overflow on documents that have circular cross references in /Prev entries
-            // in trailer and XRef streams (possible via a chain of entries with a loop)
-            PODOFO_RAISE_ERROR( ePdfError_InvalidXRef );
-        }    
-    }
-
-    ~PdfRecursionGuard() 
-    { 
-        --m_nRecursionDepth;    
-    }
-
-  private:
-    // must be a reference so that we modify m_nRecursionDepth in parent class
-    int& m_nRecursionDepth;
-};
 
 PdfParser::PdfParser( PdfVecObjects* pVecObjects )
     : PdfTokenizer(), m_vecObjects( pVecObjects ), m_bStrictParsing( false )
@@ -187,7 +148,6 @@ void PdfParser::Init()
     m_lLastEOFOffset  = 0;
 
     m_nIncrementalUpdates = 0;
-    m_nRecursionDepth = 0;
 }
 
 void PdfParser::ParseFile( const char* pszFilename, bool bLoadOnDemand )
@@ -551,7 +511,7 @@ void PdfParser::MergeTrailer( const PdfObject* pTrailer )
 
 void PdfParser::ReadNextTrailer()
 {
-    PdfRecursionGuard guard(m_nRecursionDepth);
+    PdfTokenizer::RecursionGuard guard;
 
     // ReadXRefcontents has read the first 't' from "trailer" so just check for "railer"
     if( this->IsNextToken( "trailer" ) )
@@ -672,7 +632,7 @@ void PdfParser::ReadXRef( pdf_long* pXRefOffset )
 
 void PdfParser::ReadXRefContents( pdf_long lOffset, bool bPositionAtEnd )
 {
-    PdfRecursionGuard guard(m_nRecursionDepth);
+    PdfTokenizer::RecursionGuard guard;
 
     pdf_int64 nFirstObject = 0;
     pdf_int64 nNumObjects  = 0;
@@ -939,7 +899,7 @@ void PdfParser::ReadXRefSubsection( pdf_int64 & nFirstObject, pdf_int64 & nNumOb
 
 void PdfParser::ReadXRefStreamContents( pdf_long lOffset, bool bReadOnlyTrailer )
 {
-    PdfRecursionGuard guard(m_nRecursionDepth);
+    PdfTokenizer::RecursionGuard guard;
 
     m_device.Device()->Seek( lOffset );
 
diff --git a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.h b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.h
index 55de35133..b94ce8b92 100644
--- a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.h
+++ b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfParser.h
@@ -602,7 +602,6 @@ class PODOFO_API PdfParser : public PdfTokenizer {
     bool          m_bStrictParsing;
 
     int           m_nIncrementalUpdates;
-    int           m_nRecursionDepth;
 
     static bool   s_bIgnoreBrokenObjects;
 
diff --git a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.cpp b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.cpp
index 9167f29da..e0f7e58b0 100644
--- a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.cpp
+++ b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.cpp
@@ -59,6 +59,48 @@
 
 namespace PoDoFo {
 
+// default stack sizes
+// Windows: 1MB on x32, x64, ARM https://docs.microsoft.com/en-us/cpp/build/reference/stack-stack-allocations?view=msvc-160
+// Windows IIS: 512 KB for 64-bit worker processes, 256 KB for 32-bit worker processes
+// macOS: 8MB on main thread, 512KB on secondary threads https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/Multithreading/CreatingThreads/CreatingThreads.html
+// iOS: 1MB on main thread, 512KB on secondary threads
+// Modern Linux distros: usually 8MB on main and secondary threads (but setting ulimit RLIMIT_STACK to unlimited *reduces* the secondary stack size on most architectures: see https://man7.org/linux/man-pages/man3/pthread_create.3.html#NOTES )
+// the amount allocated on stack for local variables and function parameters varies between x86 and x64 
+// in x86 pointers are 32-bit but all function parameters are on stack
+// in x64 pointers are 64-bit but first 4 function params are passed in registers
+// the biggest difference is between debug and non-debug stacks: a debug stack frame can be around 3x larger
+// due to instrumentation like ASAN which put guard bytes around stack variables to detect buffer overflows
+
+const int maxRecursionDepthDefault = 256;
+int PdfTokenizer::RecursionGuard::s_maxRecursionDepth = maxRecursionDepthDefault;
+
+#if defined(PODOFO_MULTI_THREAD)
+thread_local int PdfTokenizer::RecursionGuard::s_nRecursionDepth = 0; // PoDoFo is multi-threaded and requires a C++11 compiler with thread_local support
+#else
+int PdfTokenizer::RecursionGuard::s_nRecursionDepth = 0; // PoDoFo is single-threaded
+#endif   
+
+void PdfTokenizer::RecursionGuard::Enter()
+{
+    ++s_nRecursionDepth;
+
+    if ( s_nRecursionDepth > s_maxRecursionDepth )
+    {
+        // avoid stack overflow on documents that have circular cross references, loops
+        // or very deeply nested structures, can happen with
+        // /Prev entries in trailer and XRef streams (possible via a chain of entries with a loop)
+        // /Kids entries that loop back to self or parent
+        // deeply nested Dictionary or Array objects (possible with lots of [[[[[[[[]]]]]]]] brackets)
+        // mutually recursive loops involving several objects are possible
+        PODOFO_RAISE_ERROR( ePdfError_InvalidXRef );
+    }    
+}
+
+void PdfTokenizer::RecursionGuard::Exit()
+{
+    --s_nRecursionDepth;
+}
+
 namespace PdfTokenizerNameSpace{
 
 static const int g_MapAllocLen = 256;
@@ -392,6 +434,7 @@ void PdfTokenizer::GetNextVariant( PdfVariant& rVariant, PdfEncrypt* pEncrypt )
 
 void PdfTokenizer::GetNextVariant( const char* pszToken, EPdfTokenType eType, PdfVariant& rVariant, PdfEncrypt* pEncrypt )
 {
+    PdfTokenizer::RecursionGuard guard;
     EPdfDataType eDataType = this->DetermineDataType( pszToken, eType, rVariant );
 
     if( eDataType == ePdfDataType_Null ||
diff --git a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.h b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.h
index 4c37af8b5..7a478f576 100644
--- a/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.h
+++ b/Electrum/Mapper/Libs/podofo/src/podofo/base/PdfTokenizer.h
@@ -58,6 +58,7 @@ typedef std::deque<TTokenizerPair>           TTokenizerQueque;
 typedef TTokenizerQueque::iterator           TITokenizerQueque;
 typedef TTokenizerQueque::const_iterator     TCITokenizerQueque;
 
+class PdfObject;
 
 /**
  * A simple tokenizer for PDF files and PDF content streams
@@ -71,6 +72,44 @@ class PODOFO_API PdfTokenizer {
 
     virtual ~PdfTokenizer();
 
+    class RecursionGuard
+    {
+    // RAII recursion guard ensures recursion depth is always decremented
+    // because the destructor is always called when control leaves a method
+    // via return or an exception.
+    // see http://en.cppreference.com/w/cpp/language/raii
+
+    // It's used like this:
+    // PdfRecursionGuard guard;
+
+    public:
+        RecursionGuard() { Enter(); }
+        ~RecursionGuard() { Exit(); }
+        
+        // set maximum recursion depth (set to 0 to disable recursion check)
+        static void SetMaxRecursionDepth( int32_t maxRecursionDepth )
+        {
+            s_maxRecursionDepth = maxRecursionDepth;
+        }
+
+        static int32_t GetMaxRecursionDepth()
+        {
+            return s_maxRecursionDepth;
+        }
+
+    private:
+        void Enter();
+        void Exit();
+
+        static int s_maxRecursionDepth;
+
+    #if defined(PODOFO_MULTI_THREAD)
+        static thread_local int s_nRecursionDepth; // PoDoFo is multi-threaded and requires a C++11 compiler with thread_local support
+    #else
+        static int s_nRecursionDepth; // PoDoFo is single-threaded
+    #endif    
+    };    
+
     /** Reads the next token from the current file position
      *  ignoring all comments.
      *
diff --git a/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfNamesTree.cpp b/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfNamesTree.cpp
index d8343698a..68cfa81e8 100644
--- a/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfNamesTree.cpp
+++ b/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfNamesTree.cpp
@@ -474,6 +474,8 @@ void PdfNamesTree::ToDictionary( const PdfName & tree, PdfDictionary& rDict )
 
 void PdfNamesTree::AddToDictionary( PdfObject* pObj, PdfDictionary & rDict )
 {
+    PdfTokenizer::RecursionGuard guard;
+    
     if( pObj->GetDictionary().HasKey("Kids") )
     {
         const PdfArray & kids       = pObj->MustGetIndirectKey("Kids")->GetArray();
diff --git a/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfOutlines.cpp b/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfOutlines.cpp
index 2a48d89fa..f7b177e70 100644
--- a/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfOutlines.cpp
+++ b/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfOutlines.cpp
@@ -74,6 +74,7 @@ PdfOutlineItem::PdfOutlineItem( PdfObject* pObject, PdfOutlineItem* pParentOutli
     : PdfElement( NULL, pObject ), m_pParentOutline( pParentOutline ), m_pPrev( pPrevious ), 
       m_pNext( NULL ), m_pFirst( NULL ), m_pLast( NULL ), m_pDestination( NULL ), m_pAction( NULL )
 {
+    PdfTokenizer::RecursionGuard guard;
     PdfReference first, next;
 
     if( this->GetObject()->GetDictionary().HasKey( "First" ) )
diff --git a/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfPagesTree.cpp b/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfPagesTree.cpp
index a1da1965f..503c3e7b1 100644
--- a/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfPagesTree.cpp
+++ b/Electrum/Mapper/Libs/podofo/src/podofo/doc/PdfPagesTree.cpp
@@ -315,6 +315,8 @@ void PdfPagesTree::DeletePage( int nPageNumber )
 PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pParent, 
                                       PdfObjectList & rLstParents ) 
 {
+    PdfTokenizer::RecursionGuard guard;
+    
     if( !pParent ) 
     {
         PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
-- 
2.14.1.windows.1

