Date: prev next · Thread: first prev next last
2012 Archives by date, by thread · List index



On Fri, 2012-05-11 at 16:32 +0100, Caolán McNamara wrote:
    So - how about the attached diff, hopefully rather easier to review &
back-port :-)

Seems sane, though it probably makes sense to tweak the trigger for
calling buildPageChainCache as the units for nNew there are in bytes
while the define is in "pages". And we might want to avoid calling it in
the case of a small relative jump in pages forward.

        So I pushed this to master; I attach a speculative back-port to 3.5 -
it gives the rest of the rather substantial speedup we need vs. where we
were; I'd like to get it in. I also tested the
'get-position-of-byte-after-the-end' code path which seems to work
nicely too.

        Thanks,

                Michael.

-- 
michael.meeks@suse.com  <><, Pseudo Engineer, itinerant idiot
From 3cbfc50821ce4ec44a2c27c097cc88f64f47e2c2 Mon Sep 17 00:00:00 2001
From: Michael Meeks <michael.meeks@suse.com>
Date: Fri, 11 May 2012 20:19:21 +0100
Subject: [PATCH] sot: Improve OLE2 offset-to-page computation with a FAT
 layout vector

---
 sot/source/sdstor/stgstrms.cxx |   85 ++++++++++++++++++++++++++++++++++------
 sot/source/sdstor/stgstrms.hxx |    3 +
 2 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/sot/source/sdstor/stgstrms.cxx b/sot/source/sdstor/stgstrms.cxx
index 5e6c1ea..8d3d758 100644
--- a/sot/source/sdstor/stgstrms.cxx
+++ b/sot/source/sdstor/stgstrms.cxx
@@ -26,12 +26,12 @@
  *
  ************************************************************************/
 
+#include <algorithm>
 
 #include <string.h>     // memcpy()
-
+#include <sal/log.hxx>
 #include <osl/file.hxx>
 #include <tools/tempfile.hxx>
-#include <tools/debug.hxx>
 
 #include "sot/stg.hxx"
 #include "stgelem.hxx"
@@ -334,6 +334,37 @@ void StgStrm::SetEntry( StgDirEntry& r )
     r.SetDirty();
 }
 
+/*
+ * The page chain, is basically a singly linked list of slots each
+ * point to the next page. Instead of traversing the file structure
+ * for this each time build a simple flat in-memory vector list
+ * of pages.
+ */
+bool StgStrm::buildPageChainCache()
+{
+    if (nSize > 0)
+        m_aPagesCache.reserve(nSize/nPageSize);
+
+    sal_Int32 nBgn = nStart;
+    while (nBgn >= 0)
+    {
+        m_aPagesCache.push_back(nBgn);
+        sal_Int32 nOldBgn = nBgn;
+        nBgn = pFat->GetNextPage(nBgn);
+        if (nBgn == nOldBgn)
+            return false;
+    }
+
+    return true;
+}
+
+//See fdo#47644 for a .doc with a vast amount of pages where seeking around the
+//document takes a colossal amount of time
+//
+//There's a cost to building a page cache, so only build one if the number of
+//pages to seek through hits some sufficiently high value where it's worth it.
+#define ARBITRARY_LARGE_AMOUNT_OF_PAGES 8 * 512
+
 // Compute page number and offset for the given byte position.
 // If the position is behind the size, set the stream right
 // behind the EOF.
@@ -353,9 +384,35 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
     nPos = nBytePos;
     if( nOld == nNew )
         return sal_True;
+
+    if (m_aPagesCache.empty() && nNew > ARBITRARY_LARGE_AMOUNT_OF_PAGES)
+    {
+        SAL_WARN("sot", "kicking off large seek helper\n");
+        buildPageChainCache();
+    }
+
+    if (!m_aPagesCache.empty())
+    {
+        size_t nIdx = nNew / nPageSize;
+
+        // special case: seek to 1st byte of new, unallocated page
+        // (in case the file size is a multiple of the page size)
+        if( nBytePos == nSize && !nOffset && nIdx == m_aPagesCache.size() )
+        {
+            nIdx--;
+            nOffset = nPageSize;
+        }
+
+        if (nIdx < m_aPagesCache.size())
+        {
+            nPage = m_aPagesCache[ nIdx ];
+            return sal_Bool( nPage >= 0 );
+        }
+    }
+
     if( nNew > nOld )
     {
-        // the new position is behind the current, so an incremental
+        // the new position is after the current, so an incremental
         // positioning is OK. Set the page relative position
         nRel = nNew - nOld;
         nBgn = nPage;
@@ -404,6 +461,8 @@ StgPage* StgStrm::GetPhysPage( sal_Int32 nBytePos, sal_Bool bForce )
 
 sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     sal_Int32 nTo = nStart;
     sal_Int32 nPgs = ( nBytes + nPageSize - 1 ) / nPageSize;
     while( nPgs-- )
@@ -430,6 +489,8 @@ sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes )
 
 sal_Bool StgStrm::SetSize( sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     // round up to page size
     sal_Int32 nOld = ( ( nSize + nPageSize - 1 ) / nPageSize ) * nPageSize;
     sal_Int32 nNew = ( ( nBytes + nPageSize - 1 ) / nPageSize ) * nPageSize;
@@ -528,6 +589,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA
         {
             if( bMake )
             {
+                m_aPagesCache.clear();
+
                 // create a new master page
                 nFAT = nMaxPage++;
                 pMaster = rIo.Copy( nFAT, STG_FREE );
@@ -582,6 +645,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA
 
 sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage )
 {
+    m_aPagesCache.clear();
+
     sal_Bool bRes = sal_True;
     if( nOff < rIo.aHdr.GetFAT1Size() )
         rIo.aHdr.SetFATPage( nOff, nNewPage );
@@ -631,6 +696,8 @@ sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage )
 
 sal_Bool StgFATStrm::SetSize( sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     // Set the number of entries to a multiple of the page size
     short nOld = (short) ( ( nSize + ( nPageSize - 1 ) ) / nPageSize );
     short nNew = (short) (
@@ -747,16 +814,10 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen )
     {
         // determine the actual size of the stream by scanning
         // the FAT chain and counting the # of pages allocated
-        nSize = 0;
-        sal_Int32 nOldBgn = -1;
-        while( nBgn >= 0 && nBgn != nOldBgn )
-        {
-            nOldBgn = nBgn;
-            nBgn = pFat->GetNextPage( nBgn );
-            if( nBgn == nOldBgn )
+        bool bOk = buildPageChainCache();
+        if (!bOk)
                 rIo.SetError( ERRCODE_IO_WRONGFORMAT );
-            nSize += nPageSize;
-        }
+        nSize = m_aPagesCache.size() * nPageSize;
     }
 }
 
diff --git a/sot/source/sdstor/stgstrms.hxx b/sot/source/sdstor/stgstrms.hxx
index a6a0ad1..6b1b8c3 100644
--- a/sot/source/sdstor/stgstrms.hxx
+++ b/sot/source/sdstor/stgstrms.hxx
@@ -30,6 +30,7 @@
 #define _STGSTRMS_HXX
 
 #include <tools/stream.hxx>
+#include <vector>
 
 class StgIo;
 class StgStrm;
@@ -77,6 +78,8 @@ protected:
     sal_Int32 nPage;                        // current logical page
     short nOffset;                      // offset into current page
     short nPageSize;                    // logical page size
+    std::vector<sal_Int32> m_aPagesCache;
+    bool buildPageChainCache();
     sal_Bool  Copy( sal_Int32 nFrom, sal_Int32 nBytes );
     StgStrm( StgIo& );
 public:
-- 
1.7.7


Context


Privacy Policy | Impressum (Legal Info) | Copyright information: Unless otherwise specified, all text and images on this website are licensed under the Creative Commons Attribution-Share Alike 3.0 License. This does not include the source code of LibreOffice, which is licensed under the Mozilla Public License (MPLv2). "LibreOffice" and "The Document Foundation" are registered trademarks of their corresponding registered owners or are in actual use as trademarks in one or more countries. Their respective logos and icons are also subject to international copyright laws. Use thereof is explained in our trademark policy.