Hi,
I have submitted a patch for review:
https://gerrit.libreoffice.org/3925
To pull it, you can do:
git pull ssh://gerrit.libreoffice.org:29418/core refs/changes/25/3925/1
resolved fdo#56772 keep track of HTML ON/OFF tokens
Regression introduced with 11cbcb8b08b540b144a5df744e9fba0b6ba8144a
followed by 56d6589368c2e88cffec0c2e518f7c90863eeae6
Deep from svtools/source/svhtml/parhtml.cxx HTMLParser::_GetNextToken()
only a HTML_TABLEDATA_OFF was generated for <td .../> without a matching
HTML_TABLEDATA_ON (actually same for all <XXX/> ON/OFF tokens). This
confuses a state machine that doesn't keep track of such unexpected
closures and also expects all attributes of an element at an ON token.
Only the parser knows this is actually one token but needs to generate
separate ON/OFF tokens.
These bugs mentioned in the original code and commits are still fixed
with this change:
https://bugs.freedesktop.org/show_bug.cgi?id=34666
https://bugs.freedesktop.org/show_bug.cgi?id=36080
https://bugs.freedesktop.org/show_bug.cgi?id=36390
Change-Id: I2b3190d297a35ee3dfda95f9a4841f7c53ed4a92
(cherry picked from commit bb7360ca9929e9b395b3c903f460c9ed5efdce4d)
---
M svtools/inc/svtools/parhtml.hxx
M svtools/source/svhtml/parhtml.cxx
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/svtools/inc/svtools/parhtml.hxx b/svtools/inc/svtools/parhtml.hxx
index f8c0c61..fdfeeab 100644
--- a/svtools/inc/svtools/parhtml.hxx
+++ b/svtools/inc/svtools/parhtml.hxx
@@ -144,6 +144,8 @@
sal_uInt32 nPre_LinePos; // Pos in der Line im PRE-Tag
+ int mnPendingOffToken; ///< OFF token pending for a <XX.../> ON/OFF ON token
+
String aEndToken;
protected:
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 68232e5..91bae59 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -297,7 +297,8 @@
bEndTokenFound(false),
bPre_IgnoreNewPara(false),
bReadNextChar(false),
- bReadComment(false)
+ bReadComment(false),
+ mnPendingOffToken(0)
{
//#i76649, default to UTF-8 for HTML unless we know differently
SetSrcEncoding(RTL_TEXTENCODING_UTF8);
@@ -1057,6 +1058,15 @@
int nRet = 0;
sSaveToken.Erase();
+ if (mnPendingOffToken)
+ {
+ // HTML_<TOKEN>_OFF generated for HTML_<TOKEN>_ON
+ nRet = mnPendingOffToken;
+ mnPendingOffToken = 0;
+ aToken.Erase();
+ return nRet;
+ }
+
// Delete options
if (!maOptions.empty())
maOptions.clear();
@@ -1204,10 +1214,14 @@
ScanText( '>' );
// fdo#34666 fdo#36080 fdo#36390: closing "/>"?:
- // return HTML_<TOKEN>_OFF instead of HTML_<TOKEN>_ON
+ // generate pending HTML_<TOKEN>_OFF for HTML_<TOKEN>_ON
+ // Do not convert this to a single HTML_<TOKEN>_OFF
+ // which lead to fdo#56772.
if ((HTML_TOKEN_ONOFF & nRet) && (aToken.Len() >= 1) &&
- ('/' == aToken.GetChar(aToken.Len()-1))) {
- ++nRet; // HTML_<TOKEN>_ON -> HTML_<TOKEN>_OFF;
+ ('/' == aToken.GetChar(aToken.Len()-1)))
+ {
+ mnPendingOffToken = nRet + 1; // HTML_<TOKEN>_ON ->
HTML_<TOKEN>_OFF
+ aToken.Erase( aToken.Len()-1, 1); // remove trailing '/'
}
if( sal_Unicode(EOF) == nNextCh && rInput.IsEof() )
{
--
To view, visit https://gerrit.libreoffice.org/3925
To unsubscribe, visit https://gerrit.libreoffice.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2b3190d297a35ee3dfda95f9a4841f7c53ed4a92
Gerrit-PatchSet: 1
Gerrit-Project: core
Gerrit-Branch: libreoffice-4-0
Gerrit-Owner: Eike Rathke <erack@redhat.com>
Context
- [PATCH libreoffice-4-0] resolved fdo#56772 keep track of HTML ON/OFF tokens · Eike Rathke (via Code Review)
Privacy Policy |
Impressum (Legal Info) |
Copyright information: Unless otherwise specified, all text and images
on this website are licensed under the
Creative Commons Attribution-Share Alike 3.0 License.
This does not include the source code of LibreOffice, which is
licensed under the Mozilla Public License (
MPLv2).
"LibreOffice" and "The Document Foundation" are
registered trademarks of their corresponding registered owners or are
in actual use as trademarks in one or more countries. Their respective
logos and icons are also subject to international copyright laws. Use
thereof is explained in our
trademark policy.