commit baca2ec8d5a457512e25b499c3cacc7a66ca853f
Author: Eike Rathke <erack@redhat.com>
Date: Tue Feb 28 22:14:08 2017 +0100
FastSaxSerializer: SAL_WARN() when writing invalid XML characters
This catches things for OOXML, that could be escaped using _xHHHH_
Change-Id: I937f67dc5edd3c0e5727d74bebb736dc82bdc53d
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index 620fe68..a571829 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -101,6 +101,26 @@ namespace sax_fastparser {
write( sOutput.getStr(), sOutput.getLength(), bEscape );
}
+#if OSL_DEBUG_LEVEL > 0
+ /** Characters not allowed in XML 1.0
+ XML 1.1 would exclude only U+0000
+ */
+ bool invalidChar( char c )
+ {
+ if (static_cast<unsigned char>(c) >= 0x20)
+ return false;
+
+ switch (c)
+ {
+ case 0x09:
+ case 0x0a:
+ case 0x0d:
+ return false;
+ }
+ return true;
+ }
+#endif
+
void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
{
if (nLen == -1)
@@ -112,6 +132,7 @@ namespace sax_fastparser {
return;
}
+ bool bGood = true;
for (sal_Int32 i = 0; i < nLen; ++i)
{
char c = pStr[ i ];
@@ -124,9 +145,26 @@ namespace sax_fastparser {
case '"': writeBytes( """, 6 ); break;
case '\n': writeBytes( " ", 5 ); break;
case '\r': writeBytes( " ", 5 ); break;
- default: writeBytes( &c, 1 ); break;
+ default:
+#if OSL_DEBUG_LEVEL > 0
+ /* FIXME: we should escape such invalid characters
+ * in the _xHHHH_ form OOXML uses. Note that also a
+ * literal "_x0008_" would have to be escaped then
+ * as _x005F_x0008_ (where only the leading '_' is
+ * escaped as _x005F_). */
+ if (invalidChar(pStr[i]))
+ {
+ bGood = false;
+ // The SAL_WARN() for the single character is
+ // issued in writeBytes(), just gather for the
+ // SAL_WARN_IF() below.
+ }
+#endif
+ writeBytes( &c, 1 ); break;
}
}
+ SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) <<
"'");
+ (void)bGood;
}
void FastSaxSerializer::endDocument()
@@ -496,6 +534,21 @@ namespace sax_fastparser {
void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
{
+#if OSL_DEBUG_LEVEL > 0
+ {
+ bool bGood = true;
+ for (size_t i=0; i < nLen; ++i)
+ {
+ if (invalidChar(pStr[i]))
+ {
+ bGood = false;
+ SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" <<
+ std::hex << int(static_cast<unsigned char>(pStr[i])));
+ }
+ }
+ SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) <<
"'");
+ }
+#endif
maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
}
Privacy Policy |
Impressum (Legal Info) |
Copyright information: Unless otherwise specified, all text and images
on this website are licensed under the
Creative Commons Attribution-Share Alike 3.0 License.
This does not include the source code of LibreOffice, which is
licensed under the Mozilla Public License (
MPLv2).
"LibreOffice" and "The Document Foundation" are
registered trademarks of their corresponding registered owners or are
in actual use as trademarks in one or more countries. Their respective
logos and icons are also subject to international copyright laws. Use
thereof is explained in our
trademark policy.