I would like to have the attached patch backported to 3-6. This change
massively improves the runtime performance of pivot table that has a
large source data range whose bottom part is mostly empty. Some users,
especially those who are used to Excel tend to do this, and without this
change, Calc hanges for about a few minutes when trying to launch field
popup window for the first time.
I know this is a large change, but IMO worth the backport. FWIW, all
the pivot table related unit tests pass after this change.
The patch is a collection of several different commits from the master
branch squashed into a single commit.
Kohei
--
Kohei Yoshida, LibreOffice hacker, Calc
From 8a04d9200201d463f8638364ae0788cfd223aad5 Mon Sep 17 00:00:00 2001
From: Kohei Yoshida <kohei.yoshida@gmail.com>
Date: Fri, 14 Sep 2012 23:17:23 -0400
Subject: [PATCH] Improve performance on updating row flags in pivot table cache.
This change massively improves run-time performance of pivot table
especially when the pivot table's source data range consists of a
tiny portion of actual data trailed by a large portion of empty rows.
For example, let's say you have data from row 1 to row 20000, but
selected the source data range to be row 1 to row 65536. Without
this change, the pivot table may hang as much as a few minutes
especially when you try to launch a field popup window for the first
time.
Change-Id: I0fb75f5d0421d944739d9bf4ca2314afe5a5a62e
---
sc/inc/dpcache.hxx | 6 +-
sc/inc/dpcachetable.hxx | 25 ++---
sc/inc/dpobject.hxx | 4 +
sc/source/core/data/dpcache.cxx | 21 ++++
sc/source/core/data/dpcachetable.cxx | 195 +++++++++++++++++++---------------
sc/source/core/data/dpgroup.cxx | 6 +-
sc/source/core/data/dptabdat.cxx | 6 +-
7 files changed, 159 insertions(+), 104 deletions(-)
diff --git a/sc/inc/dpcache.hxx b/sc/inc/dpcache.hxx
index 68b1029..4be73d3 100644
--- a/sc/inc/dpcache.hxx
+++ b/sc/inc/dpcache.hxx
@@ -121,6 +121,7 @@ private:
LabelsType maLabelNames; // Stores dimension names.
mdds::flat_segment_tree<SCROW, bool> maEmptyRows;
+ SCROW mnDataSize;
bool mbDisposing;
@@ -150,8 +151,9 @@ public:
bool InitFromDoc(ScDocument* pDoc, const ScRange& rRange);
bool InitFromDataBase(const ::com::sun::star::uno::Reference<
::com::sun::star::sdbc::XRowSet>& xRowSet, const Date& rNullDate);
- SCROW GetRowCount() const;
- SCROW GetItemDataId( sal_uInt16 nDim, SCROW nRow, bool bRepeatIfEmpty ) const;
+ SCROW GetRowCount() const;
+ SCROW GetDataSize() const;
+ SCROW GetItemDataId( sal_uInt16 nDim, SCROW nRow, bool bRepeatIfEmpty ) const;
rtl::OUString GetDimensionName(LabelsType::size_type nDim) const;
bool IsRowEmpty(SCROW nRow) const;
bool ValidQuery(SCROW nRow, const ScQueryParam& rQueryParam) const;
diff --git a/sc/inc/dpcachetable.hxx b/sc/inc/dpcachetable.hxx
index d834910..c0d0625 100644
--- a/sc/inc/dpcachetable.hxx
+++ b/sc/inc/dpcachetable.hxx
@@ -37,18 +37,9 @@
#include <vector>
#include <boost/unordered_set.hpp>
#include <boost/shared_ptr.hpp>
-#include <com/sun/star/uno/Reference.hxx>
-
-namespace com { namespace sun { namespace star {
- namespace sdbc {
- class XRowSet;
- }
- namespace sheet {
- struct DataPilotFieldFilter;
- }
-}}}
-
-class Date;
+
+#include <mdds/flat_segment_tree.hpp>
+
class ScDPItemData;
class ScDPCache;
class ScDocument;
@@ -142,7 +133,7 @@ public:
/** Check whether a specified row is active or not. When a row is active,
it is used in calculation of the results data. A row becomes inactive
when it is filtered out by page field. */
- bool isRowActive(sal_Int32 nRow) const;
+ bool isRowActive(sal_Int32 nRow, sal_Int32* pLastRow = NULL) const;
/** Set filter on/off flag to each row to control visibility. The caller
must ensure that the table is filled before calling this function. */
@@ -185,11 +176,15 @@ private:
bool isRowQualified(sal_Int32 nRow, const ::std::vector<Criterion>& rCriteria, const
::boost::unordered_set<sal_Int32>& rRepeatIfEmptyDims) const;
private:
+ typedef mdds::flat_segment_tree<SCROW, bool> RowFlagType;
+
/** unique field entires for each field (column). */
::std::vector< ::std::vector<SCROW> > maFieldEntries;
- /** Row flags. The first row below the header row has the index of 0. */
- ::std::vector<RowFlag> maRowFlags;
+ /** Rows visible by standard filter query. */
+ RowFlagType maShowByFilter;
+ /** Rows visible by page dimension filtering. */
+ RowFlagType maShowByPage;
const ScDPCache* mpCache;
};
diff --git a/sc/inc/dpobject.hxx b/sc/inc/dpobject.hxx
index 272b459..f591f12 100644
--- a/sc/inc/dpobject.hxx
+++ b/sc/inc/dpobject.hxx
@@ -53,6 +53,10 @@ namespace com { namespace sun { namespace star {
class XIndexAccess;
}
+ namespace sdbc {
+ class XRowSet;
+ }
+
namespace sheet {
struct DataPilotTablePositionData;
struct DataPilotTableHeaderData;
diff --git a/sc/source/core/data/dpcache.cxx b/sc/source/core/data/dpcache.cxx
index f1e4531..c754fba 100644
--- a/sc/source/core/data/dpcache.cxx
+++ b/sc/source/core/data/dpcache.cxx
@@ -73,6 +73,7 @@ ScDPCache::ScDPCache(ScDocument* pDoc) :
mpDoc( pDoc ),
mnColumnCount ( 0 ),
maEmptyRows(0, MAXROW, true),
+ mnDataSize(-1),
mbDisposing(false)
{
}
@@ -751,7 +752,21 @@ public:
void ScDPCache::PostInit()
{
+ OSL_ENSURE(!maFields.empty(), "Cache not initialized!");
+
maEmptyRows.build_tree();
+ typedef mdds::flat_segment_tree<SCROW, bool>::const_reverse_iterator itr_type;
+ itr_type it = maEmptyRows.rbegin(), itEnd = maEmptyRows.rend();
+ OSL_ENSURE(it != itEnd, "corrupt flat_segment_tree instance!");
+ mnDataSize = maFields[0].maData.size();
+ ++it; // Skip the first position.
+ OSL_ENSURE(it != itEnd, "buggy version of flat_segment_tree is used.");
+ if (it->second)
+ {
+ SCROW nLastNonEmpty = it->first - 1;
+ if (nLastNonEmpty+1 < mnDataSize)
+ mnDataSize = nLastNonEmpty+1;
+ }
}
void ScDPCache::Clear()
@@ -850,6 +865,12 @@ SCROW ScDPCache::GetRowCount() const
return maFields[0].maData.size();
}
+SCROW ScDPCache::GetDataSize() const
+{
+ OSL_ENSURE(mnDataSize <= GetRowCount(), "Data size should never be larger than the row
count.");
+ return mnDataSize >= 0 ? mnDataSize : 0;
+}
+
const ScDPCache::ItemsType& ScDPCache::GetDimMemberValues(SCCOL nDim) const
{
OSL_ENSURE( nDim>=0 && nDim < mnColumnCount ," nDim < mnColumnCount ");
diff --git a/sc/source/core/data/dpcachetable.cxx b/sc/source/core/data/dpcachetable.cxx
index dc7a63a..11ac85f 100644
--- a/sc/source/core/data/dpcachetable.cxx
+++ b/sc/source/core/data/dpcachetable.cxx
@@ -70,7 +70,7 @@ bool ScDPCacheTable::RowFlag::isActive() const
}
ScDPCacheTable::RowFlag::RowFlag() :
- mbShowByFilter(true),
+ mbShowByFilter(false),
mbShowByPage(true)
{
}
@@ -125,7 +125,7 @@ ScDPCacheTable::Criterion::Criterion() :
// ----------------------------------------------------------------------------
ScDPCacheTable::ScDPCacheTable(const ScDPCache* pCache) :
- mpCache(pCache)
+ maShowByFilter(0, MAXROW+1, false), maShowByPage(0, MAXROW+1, true), mpCache(pCache)
{
}
@@ -146,124 +146,145 @@ sal_Int32 ScDPCacheTable::getColSize() const
void ScDPCacheTable::fillTable(
const ScQueryParam& rQuery, bool bIgnoreEmptyRows, bool bRepeatIfEmpty)
{
- const SCROW nRowCount = getRowSize();
- const SCCOL nColCount = (SCCOL) getColSize();
- if ( nRowCount <= 0 || nColCount <= 0)
+ SCROW nRowCount = getRowSize();
+ SCROW nDataSize = mpCache->GetDataSize();
+ SCCOL nColCount = getColSize();
+ if (nRowCount <= 0 || nColCount <= 0)
return;
- maRowFlags.clear();
- maRowFlags.reserve(nRowCount);
+ maShowByFilter.clear();
+ maShowByPage.clear();
- // Initialize field entries container.
- maFieldEntries.clear();
- maFieldEntries.reserve(nColCount);
-
- // Data rows
- for (SCCOL nCol = 0; nCol < nColCount; ++nCol)
+ // Process the non-empty data rows.
+ for (SCROW nRow = 0; nRow < nDataSize; ++nRow)
{
- maFieldEntries.push_back( vector<SCROW>() );
- SCROW nMemCount = getCache()->GetDimMemberCount( nCol );
- if ( nMemCount )
- {
- std::vector<SCROW> aAdded( nMemCount, -1 );
-
- for (SCROW nRow = 0; nRow < nRowCount; ++nRow )
- {
- SCROW nIndex = getCache()->GetItemDataId( nCol, nRow, bRepeatIfEmpty );
- SCROW nOrder = getOrder( nCol, nIndex );
-
- if ( nCol == 0 )
- {
- maRowFlags.push_back(RowFlag());
- maRowFlags.back().mbShowByFilter = false;
- }
-
- if (!getCache()->ValidQuery(nRow, rQuery))
- continue;
-
- if ( bIgnoreEmptyRows && getCache()->IsRowEmpty( nRow ) )
- continue;
+ if (!getCache()->ValidQuery(nRow, rQuery))
+ continue;
- if ( nCol == 0 )
- maRowFlags.back().mbShowByFilter = true;
+ if (bIgnoreEmptyRows && getCache()->IsRowEmpty(nRow))
+ continue;
- aAdded[nOrder] = nIndex;
- }
- for ( SCROW nRow = 0; nRow < nMemCount; nRow++ )
- {
- if ( aAdded[nRow] != -1 )
- maFieldEntries.back().push_back( aAdded[nRow] );
- }
- }
+ maShowByFilter.insert_back(nRow, nRow+1, true);
}
-}
-void ScDPCacheTable::fillTable()
-{
- const SCROW nRowCount = getRowSize();
- const SCCOL nColCount = (SCCOL) getColSize();
- if ( nRowCount <= 0 || nColCount <= 0)
- return;
-
- maRowFlags.clear();
- maRowFlags.reserve(nRowCount);
+ // Process the trailing empty rows.
+ if (!bIgnoreEmptyRows)
+ maShowByFilter.insert_back(nDataSize, nRowCount, true);
+ maShowByFilter.build_tree();
// Initialize field entries container.
maFieldEntries.clear();
maFieldEntries.reserve(nColCount);
- // Data rows
+ // Build unique field entries.
for (SCCOL nCol = 0; nCol < nColCount; ++nCol)
{
maFieldEntries.push_back( vector<SCROW>() );
SCROW nMemCount = getCache()->GetDimMemberCount( nCol );
- if ( nMemCount )
- {
- std::vector< SCROW > pAdded( nMemCount, -1 );
+ if (!nMemCount)
+ continue;
- for (SCROW nRow = 0; nRow < nRowCount; ++nRow )
+ std::vector<SCROW> aAdded(nMemCount, -1);
+ bool bShow = false;
+ SCROW nEndSegment = -1;
+ for (SCROW nRow = 0; nRow < nRowCount; ++nRow)
+ {
+ if (nRow > nEndSegment)
{
- SCROW nIndex = getCache()->GetItemDataId( nCol, nRow, false );
- SCROW nOrder = getOrder( nCol, nIndex );
-
- if ( nCol == 0 )
+ if (!maShowByFilter.search_tree(nRow, bShow, NULL, &nEndSegment))
{
- maRowFlags.push_back(RowFlag());
- maRowFlags.back().mbShowByFilter = true;
+ OSL_FAIL("Tree search failed!");
+ continue;
}
-
- pAdded[nOrder] = nIndex;
+ --nEndSegment; // End position is not inclusive. Move back one.
}
- for ( SCROW nRow = 0; nRow < nMemCount; nRow++ )
+
+ if (!bShow)
{
- if ( pAdded[nRow] != -1 )
- maFieldEntries.back().push_back( pAdded[nRow] );
+ nRow = nEndSegment;
+ continue;
}
+
+ SCROW nIndex = getCache()->GetItemDataId(nCol, nRow, bRepeatIfEmpty);
+ SCROW nOrder = getOrder(nCol, nIndex);
+ aAdded[nOrder] = nIndex;
+ }
+ for (SCROW nRow = 0; nRow < nMemCount; ++nRow)
+ {
+ if (aAdded[nRow] != -1)
+ maFieldEntries.back().push_back(aAdded[nRow]);
}
}
}
-bool ScDPCacheTable::isRowActive(sal_Int32 nRow) const
+void ScDPCacheTable::fillTable()
+{
+ SCROW nRowCount = getRowSize();
+ SCCOL nColCount = getColSize();
+ if (nRowCount <= 0 || nColCount <= 0)
+ return;
+
+ maShowByFilter.clear();
+ maShowByPage.clear();
+ maShowByFilter.insert_front(0, nRowCount, true);
+
+ // Initialize field entries container.
+ maFieldEntries.clear();
+ maFieldEntries.reserve(nColCount);
+
+ // Data rows
+ for (SCCOL nCol = 0; nCol < nColCount; ++nCol)
+ {
+ maFieldEntries.push_back( vector<SCROW>() );
+ SCROW nMemCount = getCache()->GetDimMemberCount( nCol );
+ if (!nMemCount)
+ continue;
+
+ std::vector<SCROW> aAdded(nMemCount, -1);
+
+ for (SCROW nRow = 0; nRow < nRowCount; ++nRow)
+ {
+ SCROW nIndex = getCache()->GetItemDataId(nCol, nRow, false);
+ SCROW nOrder = getOrder(nCol, nIndex);
+ aAdded[nOrder] = nIndex;
+ }
+ for (SCROW nRow = 0; nRow < nMemCount; ++nRow)
+ {
+ if (aAdded[nRow] != -1)
+ maFieldEntries.back().push_back(aAdded[nRow]);
+ }
+ }
+}
+
+bool ScDPCacheTable::isRowActive(sal_Int32 nRow, sal_Int32* pLastRow) const
{
- if (nRow < 0 || static_cast<size_t>(nRow) >= maRowFlags.size())
- // row index out of bound
- return false;
+ bool bFilter = false, bPage = true;
+ SCROW nLastRowFilter = MAXROW, nLastRowPage = MAXROW;
+ maShowByFilter.search_tree(nRow, bFilter, NULL, &nLastRowFilter);
+ maShowByPage.search_tree(nRow, bPage, NULL, &nLastRowPage);
+ if (pLastRow)
+ {
+ // Return the last row of current segment.
+ *pLastRow = nLastRowFilter < nLastRowPage ? nLastRowFilter : nLastRowPage;
+ *pLastRow -= 1; // End position is not inclusive. Move back one.
+ }
- return maRowFlags[nRow].isActive();
+ return bFilter && bPage;
}
void ScDPCacheTable::filterByPageDimension(const vector<Criterion>& rCriteria, const
boost::unordered_set<sal_Int32>& rRepeatIfEmptyDims)
{
- sal_Int32 nRowSize = getRowSize();
- if (nRowSize != static_cast<sal_Int32>(maRowFlags.size()))
+ SCROW nRowSize = getRowSize();
+
+ maShowByPage.clear();
+ for (SCROW nRow = 0; nRow < nRowSize; ++nRow)
{
- // sizes of the two tables differ!
- return;
+ bool bShow = isRowQualified(nRow, rCriteria, rRepeatIfEmptyDims);
+ maShowByPage.insert_back(nRow, nRow+1, bShow);
}
- for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
- maRowFlags[nRow].mbShowByPage = isRowQualified(nRow, rCriteria, rRepeatIfEmptyDims);
+ maShowByPage.build_tree();
}
const ScDPItemData* ScDPCacheTable::getCell(SCCOL nCol, SCROW nRow, bool bRepeatIfEmpty) const
@@ -333,12 +354,15 @@ void ScDPCacheTable::filterTable(const vector<Criterion>& rCriteria,
Sequence< S
}
tableData.push_back(headerRow);
-
for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
{
- if (!maRowFlags[nRow].isActive())
+ sal_Int32 nLastRow;
+ if (!isRowActive(nRow, &nLastRow))
+ {
// This row is filtered out.
+ nRow = nLastRow;
continue;
+ }
if (!isRowQualified(nRow, rCriteria, rRepeatIfEmptyDims))
continue;
@@ -378,7 +402,8 @@ SCROW ScDPCacheTable::getOrder(long nDim, SCROW nIndex) const
void ScDPCacheTable::clear()
{
maFieldEntries.clear();
- maRowFlags.clear();
+ maShowByFilter.clear();
+ maShowByPage.clear();
}
bool ScDPCacheTable::empty() const
diff --git a/sc/source/core/data/dpgroup.cxx b/sc/source/core/data/dpgroup.cxx
index 91e9cce..ea04b7c 100644
--- a/sc/source/core/data/dpgroup.cxx
+++ b/sc/source/core/data/dpgroup.cxx
@@ -767,8 +767,12 @@ void ScDPGroupTableData::CalcResults(CalcInfo& rInfo, bool bAutoShow)
sal_Int32 nRowSize = rCacheTable.getRowSize();
for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
{
- if (!rCacheTable.isRowActive(nRow))
+ sal_Int32 nLastRow;
+ if (!rCacheTable.isRowActive(nRow, &nLastRow))
+ {
+ nRow = nLastRow;
continue;
+ }
CalcRowData aData;
FillRowDataFromCacheTable(nRow, rCacheTable, rInfo, aData);
diff --git a/sc/source/core/data/dptabdat.cxx b/sc/source/core/data/dptabdat.cxx
index a2566be..0600908 100644
--- a/sc/source/core/data/dptabdat.cxx
+++ b/sc/source/core/data/dptabdat.cxx
@@ -226,8 +226,12 @@ void ScDPTableData::CalcResultsFromCacheTable(const ScDPCacheTable&
rCacheTable,
sal_Int32 nRowSize = rCacheTable.getRowSize();
for (sal_Int32 nRow = 0; nRow < nRowSize; ++nRow)
{
- if (!rCacheTable.isRowActive(nRow))
+ sal_Int32 nLastRow;
+ if (!rCacheTable.isRowActive(nRow, &nLastRow))
+ {
+ nRow = nLastRow;
continue;
+ }
CalcRowData aData;
FillRowDataFromCacheTable(nRow, rCacheTable, rInfo, aData);
--
1.7.3.4
Context
- [REVIEW 3-6] Improve run-time performance of pivot table · Kohei Yoshida
Privacy Policy |
Impressum (Legal Info) |
Copyright information: Unless otherwise specified, all text and images
on this website are licensed under the
Creative Commons Attribution-Share Alike 3.0 License.
This does not include the source code of LibreOffice, which is
licensed under the Mozilla Public License (
MPLv2).
"LibreOffice" and "The Document Foundation" are
registered trademarks of their corresponding registered owners or are
in actual use as trademarks in one or more countries. Their respective
logos and icons are also subject to international copyright laws. Use
thereof is explained in our
trademark policy.