Date: prev next · Thread: first prev next last
2012 Archives by date, by thread · List index


Actually, please review this one instead.  I forgot to do a small clean-up.

Kohei
From 6a9dd8decda8ff4ac691d8436d4b79c83eae1ac8 Mon Sep 17 00:00:00 2001
From: Kohei Yoshida <kohei.yoshida@gmail.com>
Date: Fri, 14 Sep 2012 23:17:23 -0400
Subject: [PATCH] Improve performance on updating row flags in pivot table cache.

This change massively improves run-time performance of pivot table
especially when the pivot table's source data range consists of a
tiny portion of actual data trailed by a large portion of empty rows.

For example, let's say you have data from row 1 to row 20000, but
selected the source data range to be row 1 to row 65536.  Without
this change, the pivot table may hang as much as a few minutes
especially when you try to launch a field popup window for the first
time.

Change-Id: I0fb75f5d0421d944739d9bf4ca2314afe5a5a62e
---
 sc/inc/dpcache.hxx                   |    6 +-
 sc/inc/dpcachetable.hxx              |   33 ++----
 sc/inc/dpobject.hxx                  |    4 +
 sc/source/core/data/dpcache.cxx      |   21 ++++
 sc/source/core/data/dpcachetable.cxx |  204 ++++++++++++++++++----------------
 sc/source/core/data/dpgroup.cxx      |    6 +-
 sc/source/core/data/dptabdat.cxx     |    6 +-
 7 files changed, 158 insertions(+), 122 deletions(-)

diff --git a/sc/inc/dpcache.hxx b/sc/inc/dpcache.hxx
index 68b1029..4be73d3 100644
--- a/sc/inc/dpcache.hxx
+++ b/sc/inc/dpcache.hxx
@@ -121,6 +121,7 @@ private:
 
     LabelsType maLabelNames;    // Stores dimension names.
     mdds::flat_segment_tree<SCROW, bool> maEmptyRows;
+    SCROW mnDataSize;
 
     bool mbDisposing;
 
@@ -150,8 +151,9 @@ public:
     bool InitFromDoc(ScDocument* pDoc, const ScRange& rRange);
     bool InitFromDataBase(const  ::com::sun::star::uno::Reference< 
::com::sun::star::sdbc::XRowSet>& xRowSet, const Date& rNullDate);
 
-    SCROW  GetRowCount() const;
-    SCROW  GetItemDataId( sal_uInt16 nDim, SCROW nRow, bool bRepeatIfEmpty ) const;
+    SCROW GetRowCount() const;
+    SCROW GetDataSize() const;
+    SCROW GetItemDataId( sal_uInt16 nDim, SCROW nRow, bool bRepeatIfEmpty ) const;
     rtl::OUString GetDimensionName(LabelsType::size_type nDim) const;
     bool IsRowEmpty(SCROW nRow) const;
     bool ValidQuery(SCROW nRow, const ScQueryParam& rQueryParam) const;
diff --git a/sc/inc/dpcachetable.hxx b/sc/inc/dpcachetable.hxx
index d834910..c55ec26 100644
--- a/sc/inc/dpcachetable.hxx
+++ b/sc/inc/dpcachetable.hxx
@@ -37,18 +37,9 @@
 #include <vector>
 #include <boost/unordered_set.hpp>
 #include <boost/shared_ptr.hpp>
-#include <com/sun/star/uno/Reference.hxx>
-
-namespace com { namespace sun { namespace star {
-    namespace sdbc {
-        class XRowSet;
-    }
-    namespace sheet {
-        struct DataPilotFieldFilter;
-    }
-}}}
-
-class Date;
+
+#include <mdds/flat_segment_tree.hpp>
+
 class ScDPItemData;
 class ScDPCache;
 class ScDocument;
@@ -64,15 +55,7 @@ struct ScQueryParam;
  */
 class SC_DLLPUBLIC ScDPCacheTable
 {
-    struct RowFlag
-    {
-        bool mbShowByFilter:1;
-        bool mbShowByPage:1;
-        bool isActive() const;
-        RowFlag();
-    };
 public:
-
     /** interface class used for filtering of rows. */
     class FilterBase
     {
@@ -142,7 +125,7 @@ public:
     /** Check whether a specified row is active or not.  When a row is active,
         it is used in calculation of the results data.  A row becomes inactive
         when it is filtered out by page field. */
-    bool isRowActive(sal_Int32 nRow) const;
+    bool isRowActive(sal_Int32 nRow, sal_Int32* pLastRow = NULL) const;
 
     /** Set filter on/off flag to each row to control visibility.  The caller
         must ensure that the table is filled before calling this function. */
@@ -185,11 +168,15 @@ private:
     bool isRowQualified(sal_Int32 nRow, const ::std::vector<Criterion>& rCriteria, const 
::boost::unordered_set<sal_Int32>& rRepeatIfEmptyDims) const;
 
 private:
+    typedef mdds::flat_segment_tree<SCROW, bool> RowFlagType;
+
     /** unique field entires for each field (column). */
     ::std::vector< ::std::vector<SCROW> > maFieldEntries;
 
-    /** Row flags. The first row below the header row has the index of 0. */
-    ::std::vector<RowFlag> maRowFlags;
+    /** Rows visible by standard filter query. */
+    RowFlagType maShowByFilter;
+    /** Rows visible by page dimension filtering. */
+    RowFlagType maShowByPage;
 
     const ScDPCache* mpCache;
 };
diff --git a/sc/inc/dpobject.hxx b/sc/inc/dpobject.hxx
index 272b459..f591f12 100644
--- a/sc/inc/dpobject.hxx
+++ b/sc/inc/dpobject.hxx
@@ -53,6 +53,10 @@ namespace com { namespace sun { namespace star {
         class XIndexAccess;
     }
 
+    namespace sdbc {
+        class XRowSet;
+    }
+
     namespace sheet {
         struct DataPilotTablePositionData;
         struct DataPilotTableHeaderData;
diff --git a/sc/source/core/data/dpcache.cxx b/sc/source/core/data/dpcache.cxx
index f1e4531..c754fba 100644
--- a/sc/source/core/data/dpcache.cxx
+++ b/sc/source/core/data/dpcache.cxx
@@ -73,6 +73,7 @@ ScDPCache::ScDPCache(ScDocument* pDoc) :
     mpDoc( pDoc ),
     mnColumnCount ( 0 ),
     maEmptyRows(0, MAXROW, true),
+    mnDataSize(-1),
     mbDisposing(false)
 {
 }
@@ -751,7 +752,21 @@ public:
 
 void ScDPCache::PostInit()
 {
+    OSL_ENSURE(!maFields.empty(), "Cache not initialized!");
+
     maEmptyRows.build_tree();
+    typedef mdds::flat_segment_tree<SCROW, bool>::const_reverse_iterator itr_type;
+    itr_type it = maEmptyRows.rbegin(), itEnd = maEmptyRows.rend();
+    OSL_ENSURE(it != itEnd, "corrupt flat_segment_tree instance!");
+    mnDataSize = maFields[0].maData.size();
+    ++it; // Skip the first position.
+    OSL_ENSURE(it != itEnd, "buggy version of flat_segment_tree is used.");
+    if (it->second)
+    {
+        SCROW nLastNonEmpty = it->first - 1;
+        if (nLastNonEmpty+1 < mnDataSize)
+            mnDataSize = nLastNonEmpty+1;
+    }
 }
 
 void ScDPCache::Clear()
@@ -850,6 +865,12 @@ SCROW ScDPCache::GetRowCount() const
     return maFields[0].maData.size();
 }
 
+SCROW ScDPCache::GetDataSize() const
+{
+    OSL_ENSURE(mnDataSize <= GetRowCount(), "Data size should never be larger than the row 
count.");
+    return mnDataSize >= 0 ? mnDataSize : 0;
+}
+
 const ScDPCache::ItemsType& ScDPCache::GetDimMemberValues(SCCOL nDim) const
 {
     OSL_ENSURE( nDim>=0 && nDim < mnColumnCount ," nDim < mnColumnCount ");
diff --git a/sc/source/core/data/dpcachetable.cxx b/sc/source/core/data/dpcachetable.cxx
index dc7a63a..14f4e8e 100644
--- a/sc/source/core/data/dpcachetable.cxx
+++ b/sc/source/core/data/dpcachetable.cxx
@@ -64,17 +64,6 @@ using ::com::sun::star::uno::UNO_QUERY;
 using ::com::sun::star::uno::UNO_QUERY_THROW;
 using ::com::sun::star::sheet::DataPilotFieldFilter;
 
-bool ScDPCacheTable::RowFlag::isActive() const
-{
-    return mbShowByFilter && mbShowByPage;
-}
-
-ScDPCacheTable::RowFlag::RowFlag() :
-    mbShowByFilter(true),
-    mbShowByPage(true)
-{
-}
-
 ScDPCacheTable::SingleFilter::SingleFilter(const ScDPItemData& rItem) :
     maItem(rItem) {}
 
@@ -125,7 +114,7 @@ ScDPCacheTable::Criterion::Criterion() :
 // ----------------------------------------------------------------------------
 
 ScDPCacheTable::ScDPCacheTable(const ScDPCache* pCache) :
-    mpCache(pCache)
+    maShowByFilter(0, MAXROW+1, false), maShowByPage(0, MAXROW+1, true), mpCache(pCache)
 {
 }
 
@@ -146,124 +135,145 @@ sal_Int32 ScDPCacheTable::getColSize() const
 void ScDPCacheTable::fillTable(
     const ScQueryParam& rQuery, bool bIgnoreEmptyRows, bool bRepeatIfEmpty)
 {
-    const SCROW nRowCount = getRowSize();
-    const SCCOL  nColCount = (SCCOL) getColSize();
-    if ( nRowCount <= 0 || nColCount <= 0)
+    SCROW nRowCount = getRowSize();
+    SCROW nDataSize = mpCache->GetDataSize();
+    SCCOL nColCount = getColSize();
+    if (nRowCount <= 0 || nColCount <= 0)
         return;
 
-    maRowFlags.clear();
-    maRowFlags.reserve(nRowCount);
+    maShowByFilter.clear();
+    maShowByPage.clear();
 
-    // Initialize field entries container.
-    maFieldEntries.clear();
-    maFieldEntries.reserve(nColCount);
-
-    // Data rows
-    for (SCCOL nCol = 0; nCol < nColCount; ++nCol)
+    // Process the non-empty data rows.
+    for (SCROW nRow = 0; nRow < nDataSize; ++nRow)
     {
-        maFieldEntries.push_back( vector<SCROW>() );
-        SCROW nMemCount = getCache()->GetDimMemberCount( nCol );
-        if ( nMemCount )
-        {
-            std::vector<SCROW> aAdded( nMemCount, -1 );
-
-            for (SCROW nRow = 0; nRow < nRowCount; ++nRow )
-            {
-                SCROW nIndex = getCache()->GetItemDataId( nCol, nRow, bRepeatIfEmpty );
-                SCROW nOrder = getOrder( nCol, nIndex );
-
-                if ( nCol == 0 )
-                {
-                    maRowFlags.push_back(RowFlag());
-                    maRowFlags.back().mbShowByFilter = false;
-                }
-
-                if (!getCache()->ValidQuery(nRow, rQuery))
-                    continue;
-
-                if ( bIgnoreEmptyRows &&  getCache()->IsRowEmpty( nRow ) )
-                    continue;
+        if (!getCache()->ValidQuery(nRow, rQuery))
+            continue;
 
-                if ( nCol == 0 )
-                     maRowFlags.back().mbShowByFilter = true;
+        if (bIgnoreEmptyRows && getCache()->IsRowEmpty(nRow))
+            continue;
 
-                aAdded[nOrder] = nIndex;
-            }
-            for ( SCROW nRow = 0; nRow < nMemCount; nRow++ )
-            {
-                if ( aAdded[nRow] != -1 )
-                    maFieldEntries.back().push_back( aAdded[nRow] );
-            }
-        }
+        maShowByFilter.insert_back(nRow, nRow+1, true);
     }
-}
 
-void ScDPCacheTable::fillTable()
-{
-   const SCROW  nRowCount = getRowSize();
-   const SCCOL  nColCount = (SCCOL) getColSize();
-   if ( nRowCount <= 0 || nColCount <= 0)
-        return;
-
-    maRowFlags.clear();
-    maRowFlags.reserve(nRowCount);
+    // Process the trailing empty rows.
+    if (!bIgnoreEmptyRows)
+        maShowByFilter.insert_back(nDataSize, nRowCount, true);
 
+    maShowByFilter.build_tree();
 
     // Initialize field entries container.
     maFieldEntries.clear();
     maFieldEntries.reserve(nColCount);
 
-    // Data rows
+    // Build unique field entries.
     for (SCCOL nCol = 0; nCol < nColCount; ++nCol)
     {
         maFieldEntries.push_back( vector<SCROW>() );
         SCROW nMemCount = getCache()->GetDimMemberCount( nCol );
-        if ( nMemCount )
-        {
-            std::vector< SCROW > pAdded( nMemCount, -1 );
+        if (!nMemCount)
+            continue;
 
-            for (SCROW nRow = 0; nRow < nRowCount; ++nRow )
+        std::vector<SCROW> aAdded(nMemCount, -1);
+        bool bShow = false;
+        SCROW nEndSegment = -1;
+        for (SCROW nRow = 0; nRow < nRowCount; ++nRow)
+        {
+            if (nRow > nEndSegment)
             {
-                SCROW nIndex = getCache()->GetItemDataId( nCol, nRow, false );
-                SCROW nOrder = getOrder( nCol, nIndex );
-
-                if ( nCol == 0 )
+                if (!maShowByFilter.search_tree(nRow, bShow, NULL, &nEndSegment))
                 {
-                     maRowFlags.push_back(RowFlag());
-                     maRowFlags.back().mbShowByFilter = true;
+                    OSL_FAIL("Tree search failed!");
+                    continue;
                 }
-
-                pAdded[nOrder] = nIndex;
+                --nEndSegment; // End position is not inclusive. Move back one.
             }
-            for ( SCROW nRow = 0; nRow < nMemCount; nRow++ )
+
+            if (!bShow)
             {
-                if ( pAdded[nRow] != -1 )
-                    maFieldEntries.back().push_back( pAdded[nRow] );
+                nRow = nEndSegment;
+                continue;
             }
+
+            SCROW nIndex = getCache()->GetItemDataId(nCol, nRow, bRepeatIfEmpty);
+            SCROW nOrder = getOrder(nCol, nIndex);
+            aAdded[nOrder] = nIndex;
+        }
+        for (SCROW nRow = 0; nRow < nMemCount; ++nRow)
+        {
+            if (aAdded[nRow] != -1)
+                maFieldEntries.back().push_back(aAdded[nRow]);
         }
     }
 }
 
-bool ScDPCacheTable::isRowActive(sal_Int32 nRow) const
+void ScDPCacheTable::fillTable()
 {
-    if (nRow < 0 || static_cast<size_t>(nRow) >= maRowFlags.size())
-        // row index out of bound
-        return false;
+   SCROW nRowCount = getRowSize();
+   SCCOL nColCount = getColSize();
+   if (nRowCount <= 0 || nColCount <= 0)
+        return;
+
+   maShowByFilter.clear();
+   maShowByPage.clear();
+   maShowByFilter.insert_front(0, nRowCount, true);
+
+   // Initialize field entries container.
+   maFieldEntries.clear();
+   maFieldEntries.reserve(nColCount);
+
+   // Data rows
+   for (SCCOL nCol = 0; nCol < nColCount; ++nCol)
+   {
+       maFieldEntries.push_back( vector<SCROW>() );
+       SCROW nMemCount = getCache()->GetDimMemberCount( nCol );
+       if (!nMemCount)
+           continue;
+
+       std::vector<SCROW> aAdded(nMemCount, -1);
+
+       for (SCROW nRow = 0; nRow < nRowCount; ++nRow)
+       {
+           SCROW nIndex = getCache()->GetItemDataId(nCol, nRow, false);
+           SCROW nOrder = getOrder(nCol, nIndex);
+           aAdded[nOrder] = nIndex;
+       }
+       for (SCROW nRow = 0; nRow < nMemCount; ++nRow)
+       {
+           if (aAdded[nRow] != -1)
+               maFieldEntries.back().push_back(aAdded[nRow]);
+       }
+   }
+}
+
+bool ScDPCacheTable::isRowActive(sal_Int32 nRow, sal_Int32* pLastRow) const
+{
+    bool bFilter = false, bPage = true;
+    SCROW nLastRowFilter = MAXROW, nLastRowPage = MAXROW;
+    maShowByFilter.search_tree(nRow, bFilter, NULL, &nLastRowFilter);
+    maShowByPage.search_tree(nRow, bPage, NULL, &nLastRowPage);
+    if (pLastRow)
+    {
+        // Return the last row of current segment.
+        *pLastRow = nLastRowFilter < nLastRowPage ? nLastRowFilter : nLastRowPage;
+        *pLastRow -= 1; // End position is not inclusive. Move back one.
+    }
 
-    return maRowFlags[nRow].isActive();
+    return bFilter && bPage;
 }
 
 void ScDPCacheTable::filterByPageDimension(const vector<Criterion>& rCriteria, const 
boost::unordered_set<sal_Int32>& rRepeatIfEmptyDims)
 {
-    sal_Int32 nRowSize = getRowSize();
-    if (nRowSize != static_cast<sal_Int32>(maRowFlags.size()))
+    SCROW nRowSize = getRowSize();
+
+    maShowByPage.clear();
+    for (SCROW nRow = 0; nRow < nRowSize; ++nRow)
     {
-        // sizes of the two tables differ!
-        return;
+        bool bShow = isRowQualified(nRow, rCriteria, rRepeatIfEmptyDims);
+        maShowByPage.insert_back(nRow, nRow+1, bShow);
     }
 
-    for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
-        maRowFlags[nRow].mbShowByPage = isRowQualified(nRow, rCriteria, rRepeatIfEmptyDims);
+    maShowByPage.build_tree();
 }
 
 const ScDPItemData* ScDPCacheTable::getCell(SCCOL nCol, SCROW nRow, bool bRepeatIfEmpty) const
@@ -333,12 +343,15 @@ void ScDPCacheTable::filterTable(const vector<Criterion>& rCriteria, 
Sequence< S
     }
     tableData.push_back(headerRow);
 
-
     for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
     {
-        if (!maRowFlags[nRow].isActive())
+        sal_Int32 nLastRow;
+        if (!isRowActive(nRow, &nLastRow))
+        {
             // This row is filtered out.
+            nRow = nLastRow;
             continue;
+        }
 
         if (!isRowQualified(nRow, rCriteria, rRepeatIfEmptyDims))
             continue;
@@ -378,7 +391,8 @@ SCROW ScDPCacheTable::getOrder(long nDim, SCROW nIndex) const
 void ScDPCacheTable::clear()
 {
     maFieldEntries.clear();
-    maRowFlags.clear();
+    maShowByFilter.clear();
+    maShowByPage.clear();
 }
 
 bool ScDPCacheTable::empty() const
diff --git a/sc/source/core/data/dpgroup.cxx b/sc/source/core/data/dpgroup.cxx
index 91e9cce..ea04b7c 100644
--- a/sc/source/core/data/dpgroup.cxx
+++ b/sc/source/core/data/dpgroup.cxx
@@ -767,8 +767,12 @@ void ScDPGroupTableData::CalcResults(CalcInfo& rInfo, bool bAutoShow)
     sal_Int32 nRowSize = rCacheTable.getRowSize();
     for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
     {
-        if (!rCacheTable.isRowActive(nRow))
+        sal_Int32 nLastRow;
+        if (!rCacheTable.isRowActive(nRow, &nLastRow))
+        {
+            nRow = nLastRow;
             continue;
+        }
 
         CalcRowData aData;
         FillRowDataFromCacheTable(nRow, rCacheTable, rInfo, aData);
diff --git a/sc/source/core/data/dptabdat.cxx b/sc/source/core/data/dptabdat.cxx
index a2566be..0600908 100644
--- a/sc/source/core/data/dptabdat.cxx
+++ b/sc/source/core/data/dptabdat.cxx
@@ -226,8 +226,12 @@ void ScDPTableData::CalcResultsFromCacheTable(const ScDPCacheTable& 
rCacheTable,
     sal_Int32 nRowSize = rCacheTable.getRowSize();
     for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
     {
-        if (!rCacheTable.isRowActive(nRow))
+        sal_Int32 nLastRow;
+        if (!rCacheTable.isRowActive(nRow, &nLastRow))
+        {
+            nRow = nLastRow;
             continue;
+        }
 
         CalcRowData aData;
         FillRowDataFromCacheTable(nRow, rCacheTable, rInfo, aData);
-- 
1.7.3.4


Context


Privacy Policy | Impressum (Legal Info) | Copyright information: Unless otherwise specified, all text and images on this website are licensed under the Creative Commons Attribution-Share Alike 3.0 License. This does not include the source code of LibreOffice, which is licensed under the Mozilla Public License (MPLv2). "LibreOffice" and "The Document Foundation" are registered trademarks of their corresponding registered owners or are in actual use as trademarks in one or more countries. Their respective logos and icons are also subject to international copyright laws. Use thereof is explained in our trademark policy.