Hi,
There were a number of problems with the RTF import of stateful
encodings (i.e. where we can't process the characters one by one) - see
commits:
http://cgit.freedesktop.org/libreoffice/core/commit/?id=d7baacd81bbcfaa35b7fbf9981fa3fa7c9fb1cb4
http://cgit.freedesktop.org/libreoffice/core/commit/?id=0915f1b3d77afa694a2ca692aec307846827da99
http://cgit.freedesktop.org/libreoffice/core/commit/?id=00859026749e005759ce4e7115b746b064cd902b
I'm attaching a backport of these patches for -3-5: the first commit
fixes the real problem, the two later amends are correcting two issues
with the original fix.
Thanks,
Miklos
From c419aa5bfdff4041132520d2d39b3b1571376fd1 Mon Sep 17 00:00:00 2001
From: Miklos Vajna <vmiklos@suse.cz>
Date: Mon, 13 Feb 2012 18:42:56 +0100
Subject: [PATCH] fdo#45543 fix RTF import of ms932-encoded characters
(cherry picked from commits d7baacd81bbcfaa35b7fbf9981fa3fa7c9fb1cb4,
0915f1b3d77afa694a2ca692aec307846827da99 and
00859026749e005759ce4e7115b746b064cd902b)
---
writerfilter/source/rtftok/rtfdocumentimpl.cxx | 32 +++++++++++++++++++----
writerfilter/source/rtftok/rtfdocumentimpl.hxx | 6 +++-
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 737350b..824615d 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -305,7 +305,8 @@ RTFDocumentImpl::RTFDocumentImpl(uno::Reference<uno::XComponentContext> const& x
m_bWasInFrame(false),
m_bIsInFrame(false),
m_bHasPage(false),
- m_aUnicodeBuffer()
+ m_aUnicodeBuffer(),
+ m_aHexBuffer()
{
OSL_ASSERT(xInputStream.is());
m_pInStream.reset(utl::UcbStreamHelper::CreateStream(xInputStream, sal_True));
@@ -738,9 +739,13 @@ int RTFDocumentImpl::resolvePict(bool bInline)
int RTFDocumentImpl::resolveChars(char ch)
{
+ if (m_aStates.top().nInternalState != INTERNAL_HEX)
+ checkUnicode(false, true);
+
OStringBuffer aBuf;
bool bUnicodeChecked = false;
+ bool bSkipped = false;
while(!Strm().IsEof() && ch != '{' && ch != '}' && ch != '\\')
{
if (ch != 0x0d && ch != 0x0a)
@@ -749,13 +754,16 @@ int RTFDocumentImpl::resolveChars(char ch)
{
if (!bUnicodeChecked)
{
- checkUnicode();
+ checkUnicode(true, false);
bUnicodeChecked = true;
}
aBuf.append(ch);
}
else
+ {
+ bSkipped = true;
m_aStates.top().nCharsToSkip--;
+ }
}
// read a single char if we're in hex mode
if (m_aStates.top().nInternalState == INTERNAL_HEX)
@@ -764,6 +772,14 @@ int RTFDocumentImpl::resolveChars(char ch)
}
if (m_aStates.top().nInternalState != INTERNAL_HEX && !Strm().IsEof())
Strm().SeekRel(-1);
+
+ if (m_aStates.top().nInternalState == INTERNAL_HEX && m_aStates.top().nDestinationState !=
DESTINATION_LEVELNUMBERS)
+ {
+ if (!bSkipped)
+ m_aHexBuffer.append(ch);
+ return 0;
+ }
+
if (m_aStates.top().nDestinationState == DESTINATION_SKIP)
return 0;
OString aStr = aBuf.makeStringAndClear();
@@ -2031,8 +2047,7 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
{
- if (nKeyword != RTF_U)
- checkUnicode();
+ checkUnicode(nKeyword != RTF_U, true);
RTFSkipDestination aSkip(*this);
int nSprm = 0;
RTFValue::Pointer_t pIntValue(new RTFValue(nParam));
@@ -3370,13 +3385,18 @@ void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown)
m_bSkipUnknown = bSkipUnknown;
}
-void RTFDocumentImpl::checkUnicode()
+void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex)
{
- if (m_aUnicodeBuffer.getLength() > 0)
+ if (bUnicode && m_aUnicodeBuffer.getLength() > 0)
{
OUString aString = m_aUnicodeBuffer.makeStringAndClear();
text(aString);
}
+ if (bHex && m_aHexBuffer.getLength() > 0)
+ {
+ OUString aString = OStringToOUString(m_aHexBuffer.makeStringAndClear(),
m_aStates.top().nCurrentEncoding);
+ text(aString);
+ }
}
RTFParserState::RTFParserState()
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 713fd7d..3644375 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -364,8 +364,8 @@ namespace writerfilter {
void replayBuffer(RTFBuffer_t& rBuffer);
/// If we got tokens indicating we're in a frame.
bool inFrame();
- /// If we have some unicode characters to send.
- void checkUnicode();
+ /// If we have some unicode or hex characters to send.
+ void checkUnicode(bool bUnicode = true, bool bHex = true);
uno::Reference<uno::XComponentContext> const& m_xContext;
uno::Reference<io::XInputStream> const& m_xInputStream;
@@ -454,6 +454,8 @@ namespace writerfilter {
bool m_bHasPage;
// Unicode characters are collected here so we don't have to send them one by one.
rtl::OUStringBuffer m_aUnicodeBuffer;
+ /// Same for hex characters.
+ rtl::OStringBuffer m_aHexBuffer;
};
} // namespace rtftok
} // namespace writerfilter
--
1.7.7
Context
- [REVIEW] fdo#45543 fix RTF import of ms932-encoded characters · Miklos Vajna
Privacy Policy |
Impressum (Legal Info) |
Copyright information: Unless otherwise specified, all text and images
on this website are licensed under the
Creative Commons Attribution-Share Alike 3.0 License.
This does not include the source code of LibreOffice, which is
licensed under the Mozilla Public License (
MPLv2).
"LibreOffice" and "The Document Foundation" are
registered trademarks of their corresponding registered owners or are
in actual use as trademarks in one or more countries. Their respective
logos and icons are also subject to international copyright laws. Use
thereof is explained in our
trademark policy.