Hi,
I have submitted a patch for review:
https://gerrit.libreoffice.org/2436
To pull it, you can do:
git pull ssh://gerrit.libreoffice.org:29418/core refs/changes/36/2436/1
fdo#61507 import/export RTF_UPR and RTF_UD
In short, these commits make the RTF filter import and export unicode
characters in document title properly. Previously we failed to import
such files from Word, and the export result caused problems in Wordpad
(Word handled it fine).
(cherry picked from commits 0805b222f87bf99ec0c53ca678d1c670eb5293a2,
3a934d928e455eca38f124072c20a624a64aa225 and
5de52551a963b932cc23c2ea75f709fa1924520b)
Change-Id: Ic9417d0f23d44149acb3ae3dc9d4c281058a1b36
---
M filter/inc/filter/msfilter/rtfutil.hxx
M filter/source/msfilter/rtfutil.cxx
A sw/qa/extras/rtfexport/data/fdo61507.rtf
M sw/qa/extras/rtfexport/rtfexport.cxx
M sw/source/filter/ww8/rtfexport.cxx
M sw/source/filter/ww8/rtfexport.hxx
M writerfilter/source/rtftok/rtfdocumentimpl.cxx
M writerfilter/source/rtftok/rtfdocumentimpl.hxx
8 files changed, 125 insertions(+), 19 deletions(-)
diff --git a/filter/inc/filter/msfilter/rtfutil.hxx b/filter/inc/filter/msfilter/rtfutil.hxx
index 6f5d82c..aa842fd 100644
--- a/filter/inc/filter/msfilter/rtfutil.hxx
+++ b/filter/inc/filter/msfilter/rtfutil.hxx
@@ -41,10 +41,27 @@
MSFILTER_DLLPUBLIC OString OutHex(sal_uLong nHex, sal_uInt8 nLen);
/// Handles correct unicode and legacy export of a single character.
-MSFILTER_DLLPUBLIC OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc);
+MSFILTER_DLLPUBLIC OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc, bool*
pSuccess = 0, bool bUnicode = true);
-/// Handles correct unicode and legacy export of a string.
-MSFILTER_DLLPUBLIC OString OutString(const String &rStr, rtl_TextEncoding eDestEnc);
+/**
+ * Handles correct unicode and legacy export of a string.
+ *
+ * @param rStr the string to export
+ * @param eDestEnc the legacy encoding to use
+ * @param bUnicode if unicode output is wanted as well, or just legacy
+ */
+MSFILTER_DLLPUBLIC OString OutString(const String &rStr, rtl_TextEncoding eDestEnc, bool bUnicode
= true);
+
+/**
+ * Handles correct unicode and legacy export of a string, when a
+ * '{' \upr '{' keyword ansi_text '}{\*' \ud '{' keyword Unicode_text '}}}'
+ * construct should be used.
+ *
+ * @param pToken the keyword
+ * @param rStr the text to export
+ * @param eDestEnc the legacy encoding to use
+ */
+MSFILTER_DLLPUBLIC OString OutStringUpr(const sal_Char *pToken, const String &rStr,
rtl_TextEncoding eDestEnc);
}
}
diff --git a/filter/source/msfilter/rtfutil.cxx b/filter/source/msfilter/rtfutil.cxx
index ebb72bd..3e2dfb0 100644
--- a/filter/source/msfilter/rtfutil.cxx
+++ b/filter/source/msfilter/rtfutil.cxx
@@ -53,8 +53,10 @@
return OString(pStr);
}
-OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc)
+OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc, bool* pSuccess, bool
bUnicode)
{
+ if (pSuccess)
+ *pSuccess = true;
OStringBuffer aBuf;
const sal_Char* pStr = 0;
// 0x0b instead of \n, etc because of the replacements in SwWW8AttrIter::GetSnippet()
@@ -91,10 +93,13 @@
else {
OUString sBuf(&c, 1);
OString sConverted;
- sBuf.convertToString(&sConverted, eDestEnc, OUSTRING_TO_OSTRING_CVTFLAGS);
+ if (pSuccess)
+ *pSuccess &= sBuf.convertToString(&sConverted, eDestEnc,
RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR);
+ else
+ sBuf.convertToString(&sConverted, eDestEnc, OUSTRING_TO_OSTRING_CVTFLAGS);
const sal_Int32 nLen = sConverted.getLength();
- if (pUCMode)
+ if (pUCMode && bUnicode)
{
if (*pUCMode != nLen)
{
@@ -130,13 +135,13 @@
return aBuf.makeStringAndClear();
}
-OString OutString(const String &rStr, rtl_TextEncoding eDestEnc)
+OString OutString(const String &rStr, rtl_TextEncoding eDestEnc, bool bUnicode)
{
SAL_INFO("filter.ms", OSL_THIS_FUNC << ", rStr = '" << OUString(rStr) << "'");
OStringBuffer aBuf;
int nUCMode = 1;
for (xub_StrLen n = 0; n < rStr.Len(); ++n)
- aBuf.append(OutChar(rStr.GetChar(n), &nUCMode, eDestEnc));
+ aBuf.append(OutChar(rStr.GetChar(n), &nUCMode, eDestEnc, 0, bUnicode));
if (nUCMode != 1) {
aBuf.append(OOO_STRING_SVTOOLS_RTF_UC);
aBuf.append((sal_Int32)1);
@@ -145,6 +150,38 @@
return aBuf.makeStringAndClear();
}
+/// Checks if lossless conversion of the string to eDestEnc is possible or not.
+static bool TryOutString(const String &rStr, rtl_TextEncoding eDestEnc)
+{
+ int nUCMode = 1;
+ for (xub_StrLen n = 0; n < rStr.Len(); ++n)
+ {
+ bool bRet;
+ OutChar(rStr.GetChar(n), &nUCMode, eDestEnc, &bRet);
+ if (!bRet)
+ return false;
+ }
+ return true;
+}
+
+OString OutStringUpr(const sal_Char *pToken, const String &rStr, rtl_TextEncoding eDestEnc)
+{
+ if (TryOutString(rStr, eDestEnc))
+ return OString("{") + pToken + " " + OutString(rStr, eDestEnc) + "}";
+
+ OStringBuffer aRet;
+ aRet.append("{" OOO_STRING_SVTOOLS_RTF_UPR "{");
+ aRet.append(pToken);
+ aRet.append(" ");
+ aRet.append(OutString(rStr, eDestEnc, /*bUnicode =*/ false));
+ aRet.append("}{" OOO_STRING_SVTOOLS_RTF_IGNORE OOO_STRING_SVTOOLS_RTF_UD "{");
+ aRet.append(pToken);
+ aRet.append(" ");
+ aRet.append(OutString(rStr, eDestEnc));
+ aRet.append("}}}");
+ return aRet.makeStringAndClear();
+}
+
}
}
diff --git a/sw/qa/extras/rtfexport/data/fdo61507.rtf b/sw/qa/extras/rtfexport/data/fdo61507.rtf
new file mode 100644
index 0000000..1fe8654
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/fdo61507.rtf
@@ -0,0 +1,12 @@
+{\rtf1
+{\info
+{\upr
+{\title \'c9\'c1???}
+{\*\ud\uc0
+{\title \'c9\'c1
+{\uc1\u336 O\u368 U\u8749 ?}
+}
+}
+}
+}
+Hello.}
diff --git a/sw/qa/extras/rtfexport/rtfexport.cxx b/sw/qa/extras/rtfexport/rtfexport.cxx
index 4f92fbb..18ddf1b 100644
--- a/sw/qa/extras/rtfexport/rtfexport.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport.cxx
@@ -70,6 +70,7 @@
void testTextFrames();
void testFdo53604();
void testFdo52286();
+ void testFdo61507();
CPPUNIT_TEST_SUITE(Test);
#if !defined(MACOSX) && !defined(WNT)
@@ -114,6 +115,7 @@
{"textframes.odt", &Test::testTextFrames},
{"fdo53604.odt", &Test::testFdo53604},
{"fdo52286.odt", &Test::testFdo52286},
+ {"fdo61507.rtf", &Test::testFdo61507},
};
// Don't test the first import of these, for some reason those tests fail
const char* aBlacklist[] = {
@@ -469,6 +471,23 @@
CPPUNIT_ASSERT_EQUAL(sal_Int32(58), getProperty<sal_Int32>(getRun(getParagraph(2), 2),
"CharEscapementHeight"));
}
+void Test::testFdo61507()
+{
+ /*
+ * Unicode-only characters in \title confused Wordpad. Once the exporter
+ * was fixed to guard the problematic characters with \upr and \ud, the
+ * importer didn't cope with these new keywords.
+ */
+
+ uno::Reference<document::XDocumentPropertiesSupplier> xDocumentPropertiesSupplier(mxComponent,
uno::UNO_QUERY);
+ uno::Reference<document::XDocumentProperties>
xDocumentProperties(xDocumentPropertiesSupplier->getDocumentProperties());
+ OUString aExpected = OUString("ÉÁŐŰ∭", 11, RTL_TEXTENCODING_UTF8);
+ CPPUNIT_ASSERT_EQUAL(aExpected, xDocumentProperties->getTitle());
+
+ // Only "Hello.", no additional characters.
+ CPPUNIT_ASSERT_EQUAL(6, getLength());
+}
+
CPPUNIT_TEST_SUITE_REGISTRATION(Test);
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sw/source/filter/ww8/rtfexport.cxx b/sw/source/filter/ww8/rtfexport.cxx
index 8259d59..fc1afec 100644
--- a/sw/source/filter/ww8/rtfexport.cxx
+++ b/sw/source/filter/ww8/rtfexport.cxx
@@ -395,7 +395,7 @@
}
if (xDocProps.is()) {
- OutUnicode(OOO_STRING_SVTOOLS_RTF_TITLE, xDocProps->getTitle());
+ OutUnicode(OOO_STRING_SVTOOLS_RTF_TITLE, xDocProps->getTitle(), true);
OutUnicode(OOO_STRING_SVTOOLS_RTF_SUBJECT, xDocProps->getSubject());
OutUnicode(OOO_STRING_SVTOOLS_RTF_KEYWORDS,
@@ -791,13 +791,18 @@
return m_pWriter->OutLong( Strm(), nVal );
}
-void RtfExport::OutUnicode(const sal_Char *pToken, const String &rContent)
+void RtfExport::OutUnicode(const sal_Char *pToken, const String &rContent, bool bUpr)
{
if (rContent.Len())
{
- Strm() << '{' << pToken << ' ';
- Strm() << msfilter::rtfutil::OutString( rContent, eCurrentEncoding ).getStr();
- Strm() << '}';
+ if (!bUpr)
+ {
+ Strm() << '{' << pToken << ' ';
+ Strm() << msfilter::rtfutil::OutString( rContent, eCurrentEncoding ).getStr();
+ Strm() << '}';
+ }
+ else
+ Strm() << msfilter::rtfutil::OutStringUpr(pToken, rContent, eCurrentEncoding).getStr();
}
}
diff --git a/sw/source/filter/ww8/rtfexport.hxx b/sw/source/filter/ww8/rtfexport.hxx
index c21a5e9..30d4c79 100644
--- a/sw/source/filter/ww8/rtfexport.hxx
+++ b/sw/source/filter/ww8/rtfexport.hxx
@@ -156,7 +156,7 @@
SvStream& Strm();
SvStream& OutULong( sal_uLong nVal );
SvStream& OutLong( long nVal );
- void OutUnicode(const sal_Char *pToken, const String &rContent);
+ void OutUnicode(const sal_Char *pToken, const String &rContent, bool bUpr = false);
void OutDateTime(const sal_Char* pStr, const util::DateTime& rDT );
void OutPageDescription( const SwPageDesc& rPgDsc, sal_Bool bWriteReset, sal_Bool
bCheckForFirstPage );
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index cf1a1aa..d966275 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -1516,7 +1516,11 @@
m_aStates.top().nDestinationState = DESTINATION_PARAGRAPHNUMBERING_TEXTBEFORE;
break;
case RTF_TITLE:
- m_aStates.top().nDestinationState = DESTINATION_TITLE;
+ // \title inside \upr but outside \ud should be ignored.
+ if (m_aStates.top().nDestinationState != DESTINATION_UPR)
+ m_aStates.top().nDestinationState = DESTINATION_TITLE;
+ else
+ m_aStates.top().nDestinationState = DESTINATION_SKIP;
break;
case RTF_SUBJECT:
m_aStates.top().nDestinationState = DESTINATION_SUBJECT;
@@ -1614,6 +1618,13 @@
OPEN_M_TOKEN(SPREPR, sPrePr);
OPEN_M_TOKEN(BOX, box);
OPEN_M_TOKEN(EQARR, eqArr);
+ case RTF_UPR:
+ m_aStates.top().nDestinationState = DESTINATION_UPR;
+ break;
+ case RTF_UD:
+ // Anything inside \ud is just normal Unicode content.
+ m_aStates.top().nDestinationState = DESTINATION_NORMAL;
+ break;
default:
SAL_INFO("writerfilter", OSL_THIS_FUNC << ": TODO handle destination '" <<
lcl_RtfToString(nKeyword) << "'");
// Make sure we skip destinations (even without \*) till we don't handle them
@@ -3807,10 +3818,6 @@
if (m_xDocumentProperties.is())
m_xDocumentProperties->setGenerator(m_aStates.top().aDestinationText.makeStringAndClear());
break;
- case DESTINATION_TITLE:
- if (m_xDocumentProperties.is())
- m_xDocumentProperties->setTitle(m_aStates.top().aDestinationText.makeStringAndClear());
- break;
case DESTINATION_SUBJECT:
if (m_xDocumentProperties.is())
m_xDocumentProperties->setSubject(m_aStates.top().aDestinationText.makeStringAndClear());
@@ -4233,6 +4240,14 @@
aState.nDestinationState == DESTINATION_SHPPICT ||
aState.nDestinationState == DESTINATION_SHAPE)
m_aStates.top().aFrame = aState.aFrame;
+ else if (aState.nDestinationState == DESTINATION_TITLE)
+ {
+ if (m_aStates.top().nDestinationState == DESTINATION_TITLE)
+ // The parent is a title as well, just append what we have so far.
+ m_aStates.top().aDestinationText.append(aState.aDestinationText.makeStringAndClear());
+ else if (m_xDocumentProperties.is())
+ m_xDocumentProperties->setTitle(aState.aDestinationText.makeStringAndClear());
+ }
if (m_pCurrentBuffer == &m_aSuperBuffer)
{
if (!m_bHasFootnote)
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 82490eb..a26d3ff 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -178,6 +178,7 @@
DESTINATION_MGROW,
DESTINATION_MBOX,
DESTINATION_MEQARR,
+ DESTINATION_UPR,
};
enum RTFBorderState
--
To view, visit https://gerrit.libreoffice.org/2436
To unsubscribe, visit https://gerrit.libreoffice.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic9417d0f23d44149acb3ae3dc9d4c281058a1b36
Gerrit-PatchSet: 1
Gerrit-Project: core
Gerrit-Branch: libreoffice-4-0
Gerrit-Owner: Miklos Vajna <vmiklos@suse.cz>
Context
- [PATCH libreoffice-4-0] fdo#61507 import/export RTF_UPR and RTF_UD · Miklos Vajna (via Code Review)
Privacy Policy |
Impressum (Legal Info) |
Copyright information: Unless otherwise specified, all text and images
on this website are licensed under the
Creative Commons Attribution-Share Alike 3.0 License.
This does not include the source code of LibreOffice, which is
licensed under the Mozilla Public License (
MPLv2).
"LibreOffice" and "The Document Foundation" are
registered trademarks of their corresponding registered owners or are
in actual use as trademarks in one or more countries. Their respective
logos and icons are also subject to international copyright laws. Use
thereof is explained in our
trademark policy.