| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2019 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the test suite of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
| 21 | ** included in the packaging of this file. Please review the following |
| 22 | ** information to ensure the GNU General Public License requirements will |
| 23 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
| 24 | ** |
| 25 | ** $QT_END_LICENSE$ |
| 26 | ** |
| 27 | ****************************************************************************/ |
| 28 | |
| 29 | #include <QtTest/QtTest> |
| 30 | #include <QBuffer> |
| 31 | #include <QDebug> |
| 32 | #include <QFontInfo> |
| 33 | #include <QTextDocument> |
| 34 | #include <QTextCursor> |
| 35 | #include <QTextBlock> |
| 36 | #include <QTextDocumentFragment> |
| 37 | #include <QTextList> |
| 38 | #include <QTextTable> |
| 39 | |
| 40 | #include <private/qtextmarkdownimporter_p.h> |
| 41 | |
| 42 | // #define DEBUG_WRITE_HTML |
| 43 | |
| 44 | Q_LOGGING_CATEGORY(lcTests, "qt.text.tests" ) |
| 45 | |
| 46 | static const QChar LineBreak = QChar(0x2028); |
| 47 | static const QChar Tab = QLatin1Char('\t'); |
| 48 | static const QChar Space = QLatin1Char(' '); |
| 49 | static const QChar Period = QLatin1Char('.'); |
| 50 | |
| 51 | class tst_QTextMarkdownImporter : public QObject |
| 52 | { |
| 53 | Q_OBJECT |
| 54 | |
| 55 | private slots: |
| 56 | void headingBulletsContinuations(); |
| 57 | void thematicBreaks(); |
| 58 | void lists_data(); |
| 59 | void lists(); |
| 60 | void nestedSpans_data(); |
| 61 | void nestedSpans(); |
| 62 | void avoidBlankLineAtBeginning_data(); |
| 63 | void avoidBlankLineAtBeginning(); |
| 64 | void pathological_data(); |
| 65 | void pathological(); |
| 66 | |
| 67 | public: |
| 68 | enum CharFormat { |
| 69 | Normal = 0x0, |
| 70 | Italic = 0x1, |
| 71 | Bold = 0x02, |
| 72 | Strikeout = 0x04, |
| 73 | Mono = 0x08, |
| 74 | Link = 0x10 |
| 75 | }; |
| 76 | Q_ENUM(CharFormat) |
| 77 | Q_DECLARE_FLAGS(CharFormats, CharFormat) |
| 78 | }; |
| 79 | |
| 80 | Q_DECLARE_METATYPE(tst_QTextMarkdownImporter::CharFormats) |
| 81 | Q_DECLARE_OPERATORS_FOR_FLAGS(tst_QTextMarkdownImporter::CharFormats) |
| 82 | |
| 83 | void tst_QTextMarkdownImporter::headingBulletsContinuations() |
| 84 | { |
| 85 | const QStringList expectedBlocks = QStringList() << |
| 86 | "heading" << |
| 87 | "bullet 1 continuation line 1, indented via tab" << |
| 88 | "bullet 2 continuation line 2, indented via 4 spaces" << |
| 89 | "bullet 3" << |
| 90 | "continuation paragraph 3, indented via tab" << |
| 91 | "bullet 3.1" << |
| 92 | "continuation paragraph 3.1, indented via 4 spaces" << |
| 93 | "bullet 3.2 continuation line, indented via 2 tabs" << |
| 94 | "bullet 4" << |
| 95 | "continuation paragraph 4, indented via 4 spaces and continuing onto another line too" << |
| 96 | "bullet 5" << |
| 97 | // indenting by only 2 spaces is perhaps non-standard but currently is OK |
| 98 | "continuation paragraph 5, indented via 2 spaces and continuing onto another line too" << |
| 99 | "bullet 6" << |
| 100 | "plain old paragraph at the end" ; |
| 101 | |
| 102 | QFile f(QFINDTESTDATA("data/headingBulletsContinuations.md" )); |
| 103 | QVERIFY(f.open(QFile::ReadOnly | QIODevice::Text)); |
| 104 | QString md = QString::fromUtf8(str: f.readAll()); |
| 105 | f.close(); |
| 106 | |
| 107 | QTextDocument doc; |
| 108 | QTextMarkdownImporter(QTextMarkdownImporter::DialectGitHub).import(doc: &doc, markdown: md); |
| 109 | QTextFrame::iterator iterator = doc.rootFrame()->begin(); |
| 110 | QTextFrame *currentFrame = iterator.currentFrame(); |
| 111 | QStringList::const_iterator expectedIt = expectedBlocks.constBegin(); |
| 112 | int i = 0; |
| 113 | while (!iterator.atEnd()) { |
| 114 | // There are no child frames |
| 115 | QCOMPARE(iterator.currentFrame(), currentFrame); |
| 116 | // Check whether we got the right child block |
| 117 | QTextBlock block = iterator.currentBlock(); |
| 118 | QCOMPARE(block.text().contains(LineBreak), false); |
| 119 | QCOMPARE(block.text().contains(Tab), false); |
| 120 | QVERIFY(!block.text().startsWith(Space)); |
| 121 | int expectedIndentation = 0; |
| 122 | if (block.text().contains(s: QLatin1String("continuation paragraph" ))) |
| 123 | expectedIndentation = (block.text().contains(c: Period) ? 2 : 1); |
| 124 | qCDebug(lcTests) << i << "child block" << block.text() << "indentation" << block.blockFormat().indent(); |
| 125 | QVERIFY(expectedIt != expectedBlocks.constEnd()); |
| 126 | QCOMPARE(block.text(), *expectedIt); |
| 127 | if (i > 2) |
| 128 | QCOMPARE(block.blockFormat().indent(), expectedIndentation); |
| 129 | ++iterator; |
| 130 | ++expectedIt; |
| 131 | ++i; |
| 132 | } |
| 133 | QCOMPARE(expectedIt, expectedBlocks.constEnd()); |
| 134 | |
| 135 | #ifdef DEBUG_WRITE_HTML |
| 136 | { |
| 137 | QFile out("/tmp/headingBulletsContinuations.html" ); |
| 138 | out.open(QFile::WriteOnly); |
| 139 | out.write(doc.toHtml().toLatin1()); |
| 140 | out.close(); |
| 141 | } |
| 142 | #endif |
| 143 | } |
| 144 | |
| 145 | void tst_QTextMarkdownImporter::thematicBreaks() |
| 146 | { |
| 147 | int horizontalRuleCount = 0; |
| 148 | int textLinesCount = 0; |
| 149 | |
| 150 | QFile f(QFINDTESTDATA("data/thematicBreaks.md" )); |
| 151 | QVERIFY(f.open(QFile::ReadOnly | QIODevice::Text)); |
| 152 | QString md = QString::fromUtf8(str: f.readAll()); |
| 153 | f.close(); |
| 154 | |
| 155 | QTextDocument doc; |
| 156 | QTextMarkdownImporter(QTextMarkdownImporter::DialectGitHub).import(doc: &doc, markdown: md); |
| 157 | QTextFrame::iterator iterator = doc.rootFrame()->begin(); |
| 158 | QTextFrame *currentFrame = iterator.currentFrame(); |
| 159 | int i = 0; |
| 160 | while (!iterator.atEnd()) { |
| 161 | // There are no child frames |
| 162 | QCOMPARE(iterator.currentFrame(), currentFrame); |
| 163 | // Check whether the block is text or a horizontal rule |
| 164 | QTextBlock block = iterator.currentBlock(); |
| 165 | if (block.blockFormat().hasProperty(propertyId: QTextFormat::BlockTrailingHorizontalRulerWidth)) |
| 166 | ++horizontalRuleCount; |
| 167 | else if (!block.text().isEmpty()) |
| 168 | ++textLinesCount; |
| 169 | qCDebug(lcTests) << i << (block.blockFormat().hasProperty(propertyId: QTextFormat::BlockTrailingHorizontalRulerWidth) ? QLatin1String("- - -" ) : block.text()); |
| 170 | ++iterator; |
| 171 | ++i; |
| 172 | } |
| 173 | QCOMPARE(horizontalRuleCount, 5); |
| 174 | QCOMPARE(textLinesCount, 9); |
| 175 | |
| 176 | #ifdef DEBUG_WRITE_HTML |
| 177 | { |
| 178 | QFile out("/tmp/thematicBreaks.html" ); |
| 179 | out.open(QFile::WriteOnly); |
| 180 | out.write(doc.toHtml().toLatin1()); |
| 181 | out.close(); |
| 182 | } |
| 183 | #endif |
| 184 | } |
| 185 | |
| 186 | void tst_QTextMarkdownImporter::lists_data() |
| 187 | { |
| 188 | QTest::addColumn<QString>(name: "input" ); |
| 189 | QTest::addColumn<int>(name: "expectedItemCount" ); |
| 190 | QTest::addColumn<bool>(name: "expectedEmptyItems" ); |
| 191 | QTest::addColumn<QString>(name: "rewrite" ); |
| 192 | |
| 193 | // Some of these cases show odd behavior, which is subject to change |
| 194 | // as the importer and the writer are tweaked to fix bugs over time. |
| 195 | QTest::newRow(dataTag: "dot newline" ) << ".\n" << 0 << true << ".\n\n" ; |
| 196 | QTest::newRow(dataTag: "number dot newline" ) << "1.\n" << 1 << true << "1. \n" ; |
| 197 | QTest::newRow(dataTag: "star newline" ) << "*\n" << 1 << true << "* \n" ; |
| 198 | QTest::newRow(dataTag: "hyphen newline" ) << "-\n" << 1 << true << "- \n" ; |
| 199 | QTest::newRow(dataTag: "hyphen space newline" ) << "- \n" << 1 << true << "- \n" ; |
| 200 | QTest::newRow(dataTag: "hyphen space letter newline" ) << "- a\n" << 1 << false << "- a\n" ; |
| 201 | QTest::newRow(dataTag: "hyphen nbsp newline" ) << |
| 202 | QString::fromUtf8(str: "-\u00A0\n" ) << 0 << true << "-\u00A0\n\n" ; |
| 203 | QTest::newRow(dataTag: "nested empty lists" ) << "*\n *\n *\n" << 1 << true << " * \n" ; |
| 204 | QTest::newRow(dataTag: "list nested in empty list" ) << "-\n * a\n" << 2 << false << "- \n * a\n" ; |
| 205 | QTest::newRow(dataTag: "lists nested in empty lists" ) |
| 206 | << "-\n * a\n * b\n- c\n *\n + d\n" << 5 << false |
| 207 | << "- \n * a\n * b\n- c *\n + d\n" ; |
| 208 | QTest::newRow(dataTag: "numeric lists nested in empty lists" ) |
| 209 | << "- \n 1. a\n 2. b\n- c\n 1.\n + d\n" << 4 << false |
| 210 | << "- \n 1. a\n 2. b\n- c 1. + d\n" ; |
| 211 | } |
| 212 | |
| 213 | void tst_QTextMarkdownImporter::lists() |
| 214 | { |
| 215 | QFETCH(QString, input); |
| 216 | QFETCH(int, expectedItemCount); |
| 217 | QFETCH(bool, expectedEmptyItems); |
| 218 | QFETCH(QString, rewrite); |
| 219 | |
| 220 | QTextDocument doc; |
| 221 | doc.setMarkdown(markdown: input); // QTBUG-78870 : don't crash |
| 222 | QTextFrame::iterator iterator = doc.rootFrame()->begin(); |
| 223 | QTextFrame *currentFrame = iterator.currentFrame(); |
| 224 | int i = 0; |
| 225 | int itemCount = 0; |
| 226 | bool emptyItems = true; |
| 227 | while (!iterator.atEnd()) { |
| 228 | // There are no child frames |
| 229 | QCOMPARE(iterator.currentFrame(), currentFrame); |
| 230 | // Check whether the block is text or a horizontal rule |
| 231 | QTextBlock block = iterator.currentBlock(); |
| 232 | if (block.textList()) { |
| 233 | ++itemCount; |
| 234 | if (!block.text().isEmpty()) |
| 235 | emptyItems = false; |
| 236 | } |
| 237 | qCDebug(lcTests, "%d %s%s" , i, |
| 238 | (block.textList() ? "<li>" : "<p>" ), qPrintable(block.text())); |
| 239 | ++iterator; |
| 240 | ++i; |
| 241 | } |
| 242 | QCOMPARE(itemCount, expectedItemCount); |
| 243 | QCOMPARE(emptyItems, expectedEmptyItems); |
| 244 | QCOMPARE(doc.toMarkdown(), rewrite); |
| 245 | } |
| 246 | |
| 247 | void tst_QTextMarkdownImporter::nestedSpans_data() |
| 248 | { |
| 249 | QTest::addColumn<QString>(name: "input" ); |
| 250 | QTest::addColumn<int>(name: "wordToCheck" ); |
| 251 | QTest::addColumn<CharFormats>(name: "expectedFormat" ); |
| 252 | |
| 253 | QTest::newRow(dataTag: "bold italic" ) |
| 254 | << "before ***bold italic*** after" |
| 255 | << 1 << (Bold | Italic); |
| 256 | QTest::newRow(dataTag: "bold strikeout" ) |
| 257 | << "before **~~bold strikeout~~** after" |
| 258 | << 1 << (Bold | Strikeout); |
| 259 | QTest::newRow(dataTag: "italic strikeout" ) |
| 260 | << "before *~~italic strikeout~~* after" |
| 261 | << 1 << (Italic | Strikeout); |
| 262 | QTest::newRow(dataTag: "bold italic strikeout" ) |
| 263 | << "before ***~~bold italic strikeout~~*** after" |
| 264 | << 1 << (Bold | Italic | Strikeout); |
| 265 | QTest::newRow(dataTag: "bold link text" ) |
| 266 | << "before [**bold link**](https://qt.io) after" |
| 267 | << 1 << (Bold | Link); |
| 268 | QTest::newRow(dataTag: "italic link text" ) |
| 269 | << "before [*italic link*](https://qt.io) after" |
| 270 | << 1 << (Italic | Link); |
| 271 | QTest::newRow(dataTag: "bold italic link text" ) |
| 272 | << "before [***bold italic link***](https://qt.io) after" |
| 273 | << 1 << (Bold | Italic | Link); |
| 274 | QTest::newRow(dataTag: "strikeout link text" ) |
| 275 | << "before [~~strikeout link~~](https://qt.io) after" |
| 276 | << 1 << (Strikeout | Link); |
| 277 | QTest::newRow(dataTag: "strikeout bold italic link text" ) |
| 278 | << "before [~~***strikeout bold italic link***~~](https://qt.io) after" |
| 279 | << 1 << (Strikeout | Bold | Italic | Link); |
| 280 | QTest::newRow(dataTag: "bold image alt" ) |
| 281 | << "before [**bold image alt**](/path/to/image.png) after" |
| 282 | << 1 << (Bold | Link); |
| 283 | QTest::newRow(dataTag: "bold strikeout italic image alt" ) |
| 284 | << "before [**~~*bold strikeout italic image alt*~~**](/path/to/image.png) after" |
| 285 | << 1 << (Strikeout | Bold | Italic | Link); |
| 286 | // code spans currently override all surrounding formatting |
| 287 | QTest::newRow(dataTag: "code in italic span" ) |
| 288 | << "before *italic `code` and* after" |
| 289 | << 2 << (Mono | Normal); |
| 290 | // but the format after the code span ends should revert to what it was before |
| 291 | QTest::newRow(dataTag: "code in italic strikeout bold span" ) |
| 292 | << "before *italic ~~strikeout **bold `code` and**~~* after" |
| 293 | << 5 << (Bold | Italic | Strikeout); |
| 294 | } |
| 295 | |
| 296 | void tst_QTextMarkdownImporter::nestedSpans() |
| 297 | { |
| 298 | QFETCH(QString, input); |
| 299 | QFETCH(int, wordToCheck); |
| 300 | QFETCH(CharFormats, expectedFormat); |
| 301 | |
| 302 | QTextDocument doc; |
| 303 | doc.setMarkdown(markdown: input); |
| 304 | |
| 305 | #ifdef DEBUG_WRITE_HTML |
| 306 | { |
| 307 | QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".html" ); |
| 308 | out.open(QFile::WriteOnly); |
| 309 | out.write(doc.toHtml().toLatin1()); |
| 310 | out.close(); |
| 311 | } |
| 312 | #endif |
| 313 | |
| 314 | QTextFrame::iterator iterator = doc.rootFrame()->begin(); |
| 315 | QTextFrame *currentFrame = iterator.currentFrame(); |
| 316 | while (!iterator.atEnd()) { |
| 317 | // There are no child frames |
| 318 | QCOMPARE(iterator.currentFrame(), currentFrame); |
| 319 | // Check the QTextCharFormat of the specified word |
| 320 | QTextCursor cur(iterator.currentBlock()); |
| 321 | cur.movePosition(op: QTextCursor::NextWord, QTextCursor::MoveAnchor, n: wordToCheck); |
| 322 | cur.select(selection: QTextCursor::WordUnderCursor); |
| 323 | QTextCharFormat fmt = cur.charFormat(); |
| 324 | qCDebug(lcTests) << "word" << wordToCheck << cur.selectedText() << "font" << fmt.font() |
| 325 | << "weight" << fmt.fontWeight() << "italic" << fmt.fontItalic() |
| 326 | << "strikeout" << fmt.fontStrikeOut() << "anchor" << fmt.isAnchor() |
| 327 | << "monospace" << QFontInfo(fmt.font()).fixedPitch() // depends on installed fonts (QTBUG-75649) |
| 328 | << fmt.fontFixedPitch() |
| 329 | << fmt.hasProperty(propertyId: QTextFormat::FontFixedPitch) |
| 330 | << "expected" << expectedFormat; |
| 331 | QCOMPARE(fmt.fontWeight() > QFont::Normal, expectedFormat.testFlag(Bold)); |
| 332 | QCOMPARE(fmt.fontItalic(), expectedFormat.testFlag(Italic)); |
| 333 | QCOMPARE(fmt.fontStrikeOut(), expectedFormat.testFlag(Strikeout)); |
| 334 | QCOMPARE(fmt.isAnchor(), expectedFormat.testFlag(Link)); |
| 335 | QCOMPARE(fmt.fontFixedPitch(), expectedFormat.testFlag(Mono)); |
| 336 | QCOMPARE(fmt.hasProperty(QTextFormat::FontFixedPitch), expectedFormat.testFlag(Mono)); |
| 337 | ++iterator; |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | void tst_QTextMarkdownImporter::avoidBlankLineAtBeginning_data() |
| 342 | { |
| 343 | QTest::addColumn<QString>(name: "input" ); |
| 344 | QTest::addColumn<int>(name: "expectedNumberOfParagraphs" ); |
| 345 | |
| 346 | QTest::newRow(dataTag: "Text block" ) << QString("Markdown text" ) << 1; |
| 347 | QTest::newRow(dataTag: "Headline" ) << QString("Markdown text\n============" ) << 1; |
| 348 | QTest::newRow(dataTag: "Code block" ) << QString(" Markdown text" ) << 2; |
| 349 | QTest::newRow(dataTag: "Unordered list" ) << QString("* Markdown text" ) << 1; |
| 350 | QTest::newRow(dataTag: "Ordered list" ) << QString("1. Markdown text" ) << 1; |
| 351 | QTest::newRow(dataTag: "Blockquote" ) << QString("> Markdown text" ) << 1; |
| 352 | } |
| 353 | |
| 354 | void tst_QTextMarkdownImporter::avoidBlankLineAtBeginning() // QTBUG-81060 |
| 355 | { |
| 356 | QFETCH(QString, input); |
| 357 | QFETCH(int, expectedNumberOfParagraphs); |
| 358 | |
| 359 | QTextDocument doc; |
| 360 | QTextMarkdownImporter(QTextMarkdownImporter::DialectGitHub).import(doc: &doc, markdown: input); |
| 361 | QTextFrame::iterator iterator = doc.rootFrame()->begin(); |
| 362 | int i = 0; |
| 363 | while (!iterator.atEnd()) { |
| 364 | QTextBlock block = iterator.currentBlock(); |
| 365 | // Make sure there is no empty paragraph at the beginning of the document |
| 366 | if (i == 0) |
| 367 | QVERIFY(!block.text().isEmpty()); |
| 368 | ++iterator; |
| 369 | ++i; |
| 370 | } |
| 371 | QCOMPARE(i, expectedNumberOfParagraphs); |
| 372 | } |
| 373 | |
| 374 | void tst_QTextMarkdownImporter::pathological_data() |
| 375 | { |
| 376 | QTest::addColumn<QString>(name: "warning" ); |
| 377 | QTest::newRow(dataTag: "fuzz20450" ) << "attempted to insert into a list that no longer exists" ; |
| 378 | QTest::newRow(dataTag: "fuzz20580" ) << "" ; |
| 379 | } |
| 380 | |
| 381 | void tst_QTextMarkdownImporter::pathological() // avoid crashing on crazy input |
| 382 | { |
| 383 | QFETCH(QString, warning); |
| 384 | QString filename = QLatin1String("data/" ) + QTest::currentDataTag() + QLatin1String(".md" ); |
| 385 | QFile f(QFINDTESTDATA(filename)); |
| 386 | QVERIFY(f.open(QFile::ReadOnly)); |
| 387 | #ifdef QT_NO_DEBUG |
| 388 | Q_UNUSED(warning) |
| 389 | #else |
| 390 | if (!warning.isEmpty()) |
| 391 | QTest::ignoreMessage(type: QtWarningMsg, message: warning.toLatin1()); |
| 392 | #endif |
| 393 | QTextDocument().setMarkdown(markdown: f.readAll()); |
| 394 | } |
| 395 | |
| 396 | QTEST_MAIN(tst_QTextMarkdownImporter) |
| 397 | #include "tst_qtextmarkdownimporter.moc" |
| 398 | |