| 1 | /* |
| 2 | This file is part of the KDE libraries |
| 3 | SPDX-FileCopyrightText: 2007 Daniel Laidig <d.laidig@gmx.de> |
| 4 | |
| 5 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 6 | */ |
| 7 | |
| 8 | #include "kcharselectdata_p.h" |
| 9 | |
| 10 | #include <QCoreApplication> |
| 11 | #include <QFile> |
| 12 | #include <QFutureInterface> |
| 13 | #include <QRegularExpression> |
| 14 | #include <QRunnable> |
| 15 | #include <QStringList> |
| 16 | #include <QThreadPool> |
| 17 | #include <qendian.h> |
| 18 | |
| 19 | #include <../test-config.h> |
| 20 | #include <qstandardpaths.h> |
| 21 | #include <string.h> |
| 22 | |
| 23 | /* constants for hangul (de)composition, see UAX #15 */ |
| 24 | #define SBase 0xAC00 |
| 25 | #define LBase 0x1100 |
| 26 | #define VBase 0x1161 |
| 27 | #define TBase 0x11A7 |
| 28 | #define LCount 19 |
| 29 | #define VCount 21 |
| 30 | #define TCount 28 |
| 31 | #define NCount (VCount * TCount) |
| 32 | #define SCount (LCount * NCount) |
| 33 | |
| 34 | class RunIndexCreation : public QFutureInterface<Index>, public QRunnable |
| 35 | { |
| 36 | public: |
| 37 | RunIndexCreation(KCharSelectData *data, const QByteArray &dataFile) |
| 38 | : m_data(data) |
| 39 | , m_dataFile(dataFile) |
| 40 | { |
| 41 | } |
| 42 | |
| 43 | QFuture<Index> start() |
| 44 | { |
| 45 | setRunnable(this); |
| 46 | reportStarted(); |
| 47 | QFuture<Index> f = this->future(); |
| 48 | QThreadPool::globalInstance()->start(runnable: this); |
| 49 | return f; |
| 50 | } |
| 51 | |
| 52 | void run() override |
| 53 | { |
| 54 | Index index = m_data->createIndex(dataFile: m_dataFile); |
| 55 | reportResult(result: index); |
| 56 | reportFinished(result: nullptr); |
| 57 | } |
| 58 | |
| 59 | private: |
| 60 | KCharSelectData *const m_data; |
| 61 | const QByteArray m_dataFile; |
| 62 | }; |
| 63 | |
| 64 | // clang-format off |
| 65 | static const char JAMO_L_TABLE[][4] = { |
| 66 | "G" , "GG" , "N" , "D" , "DD" , "R" , "M" , "B" , "BB" , |
| 67 | "S" , "SS" , "" , "J" , "JJ" , "C" , "K" , "T" , "P" , "H" |
| 68 | }; |
| 69 | |
| 70 | static const char JAMO_V_TABLE[][4] = { |
| 71 | "A" , "AE" , "YA" , "YAE" , "EO" , "E" , "YEO" , "YE" , "O" , |
| 72 | "WA" , "WAE" , "OE" , "YO" , "U" , "WEO" , "WE" , "WI" , |
| 73 | "YU" , "EU" , "YI" , "I" |
| 74 | }; |
| 75 | |
| 76 | static const char JAMO_T_TABLE[][4] = { |
| 77 | "" , "G" , "GG" , "GS" , "N" , "NJ" , "NH" , "D" , "L" , "LG" , "LM" , |
| 78 | "LB" , "LS" , "LT" , "LP" , "LH" , "M" , "B" , "BS" , |
| 79 | "S" , "SS" , "NG" , "J" , "C" , "K" , "T" , "P" , "H" |
| 80 | }; |
| 81 | // clang-format on |
| 82 | |
| 83 | bool KCharSelectData::openDataFile() |
| 84 | { |
| 85 | if (!dataFile.isEmpty()) { |
| 86 | return true; |
| 87 | } else { |
| 88 | QFile file(QStringLiteral(":/kf6/kcharselect/kcharselect-data" )); |
| 89 | file.open(flags: QIODevice::ReadOnly); |
| 90 | dataFile = file.readAll(); |
| 91 | file.close(); |
| 92 | if (dataFile.size() < 40) { |
| 93 | dataFile.clear(); |
| 94 | return false; |
| 95 | } |
| 96 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 97 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 20); |
| 98 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 24); |
| 99 | uint blocks = (offsetEnd - offsetBegin) / 4; |
| 100 | if (blocks <= 167) { // maximum possible number of blocks in BMP |
| 101 | // no remapping |
| 102 | remapType = -1; |
| 103 | } else if (blocks >= 174 && blocks <= 180) { |
| 104 | // remapping introduced in 5.25 |
| 105 | remapType = 0; |
| 106 | } else { |
| 107 | // unknown remapping, abort |
| 108 | dataFile.clear(); |
| 109 | return false; |
| 110 | } |
| 111 | futureIndex = (new RunIndexCreation(this, dataFile))->start(); |
| 112 | return true; |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | // Temporary remapping code points <-> 16 bit database codes |
| 117 | // See kcharselect-generate-datafile.py for details |
| 118 | |
| 119 | quint16 KCharSelectData::mapCodePointToDataBase(uint code) const |
| 120 | { |
| 121 | if (remapType == 0) { |
| 122 | if (code >= 0xE000 && code <= 0xEFFF) { |
| 123 | return 0xFFFF; |
| 124 | } |
| 125 | if (code >= 0xF000 && code <= 0xFFFF) { |
| 126 | return code - 0x1000; |
| 127 | } |
| 128 | if (code >= 0x1F000 && code <= 0x1FFFF) { |
| 129 | return code - 0x10000; |
| 130 | } |
| 131 | } |
| 132 | if (code >= 0x10000) { |
| 133 | return 0xFFFF; |
| 134 | } |
| 135 | return code; |
| 136 | } |
| 137 | |
| 138 | uint KCharSelectData::mapDataBaseToCodePoint(quint16 code) const |
| 139 | { |
| 140 | if (remapType == 0) { |
| 141 | if (code >= 0xE000 && code <= 0xEFFF) { |
| 142 | return code + 0x1000; |
| 143 | } |
| 144 | if (code >= 0xF000) { |
| 145 | return code + 0x10000; |
| 146 | } |
| 147 | } |
| 148 | return code; |
| 149 | } |
| 150 | |
| 151 | quint32 KCharSelectData::getDetailIndex(uint c) const |
| 152 | { |
| 153 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 154 | // Convert from little-endian, so that this code works on PPC too. |
| 155 | // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286 |
| 156 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 12); |
| 157 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 16); |
| 158 | |
| 159 | int min = 0; |
| 160 | int mid; |
| 161 | int max = ((offsetEnd - offsetBegin) / 27) - 1; |
| 162 | |
| 163 | quint16 unicode = mapCodePointToDataBase(code: c); |
| 164 | if (unicode == 0xFFFF) { |
| 165 | return 0; |
| 166 | } |
| 167 | |
| 168 | static quint16 most_recent_searched; |
| 169 | static quint32 most_recent_result; |
| 170 | |
| 171 | if (unicode == most_recent_searched) { |
| 172 | return most_recent_result; |
| 173 | } |
| 174 | |
| 175 | most_recent_searched = unicode; |
| 176 | |
| 177 | while (max >= min) { |
| 178 | mid = (min + max) / 2; |
| 179 | const quint16 midUnicode = qFromLittleEndian<quint16>(src: data + offsetBegin + mid * 27); |
| 180 | if (unicode > midUnicode) { |
| 181 | min = mid + 1; |
| 182 | } else if (unicode < midUnicode) { |
| 183 | max = mid - 1; |
| 184 | } else { |
| 185 | most_recent_result = offsetBegin + mid * 27; |
| 186 | |
| 187 | return most_recent_result; |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | most_recent_result = 0; |
| 192 | return 0; |
| 193 | } |
| 194 | |
| 195 | QString KCharSelectData::formatCode(uint code, int length, const QString &prefix, int base) |
| 196 | { |
| 197 | QString s = QString::number(code, base).toUpper(); |
| 198 | while (s.size() < length) { |
| 199 | s.prepend(c: QLatin1Char('0')); |
| 200 | } |
| 201 | s.prepend(s: prefix); |
| 202 | return s; |
| 203 | } |
| 204 | |
| 205 | QList<uint> KCharSelectData::blockContents(int block) |
| 206 | { |
| 207 | if (!openDataFile()) { |
| 208 | return QList<uint>(); |
| 209 | } |
| 210 | |
| 211 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 212 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 20); |
| 213 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 24); |
| 214 | |
| 215 | int max = ((offsetEnd - offsetBegin) / 4) - 1; |
| 216 | |
| 217 | QList<uint> res; |
| 218 | |
| 219 | if (block > max) { |
| 220 | return res; |
| 221 | } |
| 222 | |
| 223 | quint16 unicodeBegin = qFromLittleEndian<quint16>(src: data + offsetBegin + block * 4); |
| 224 | quint16 unicodeEnd = qFromLittleEndian<quint16>(src: data + offsetBegin + block * 4 + 2); |
| 225 | |
| 226 | while (unicodeBegin < unicodeEnd) { |
| 227 | res.append(t: mapDataBaseToCodePoint(code: unicodeBegin)); |
| 228 | unicodeBegin++; |
| 229 | } |
| 230 | res.append(t: mapDataBaseToCodePoint(code: unicodeBegin)); // Be careful when unicodeEnd==0xffff |
| 231 | |
| 232 | return res; |
| 233 | } |
| 234 | |
| 235 | QList<int> KCharSelectData::sectionContents(int section) |
| 236 | { |
| 237 | section -= 1; |
| 238 | if (!openDataFile()) { |
| 239 | return QList<int>(); |
| 240 | } |
| 241 | |
| 242 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 243 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 28); |
| 244 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 32); |
| 245 | |
| 246 | int max = ((offsetEnd - offsetBegin) / 4) - 1; |
| 247 | |
| 248 | QList<int> res; |
| 249 | |
| 250 | if (section > max) { |
| 251 | return res; |
| 252 | } |
| 253 | |
| 254 | for (int i = 0; i <= max; i++) { |
| 255 | const quint16 currSection = qFromLittleEndian<quint16>(src: data + offsetBegin + i * 4); |
| 256 | if (currSection == section || section < 0) { |
| 257 | res.append(t: qFromLittleEndian<quint16>(src: data + offsetBegin + i * 4 + 2)); |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | return res; |
| 262 | } |
| 263 | |
| 264 | QStringList KCharSelectData::sectionList() |
| 265 | { |
| 266 | if (!openDataFile()) { |
| 267 | return QStringList(); |
| 268 | } |
| 269 | |
| 270 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 271 | const quint32 stringBegin = qFromLittleEndian<quint32>(src: udata + 24); |
| 272 | const quint32 stringEnd = qFromLittleEndian<quint32>(src: udata + 28); |
| 273 | |
| 274 | const char *data = dataFile.constData(); |
| 275 | QStringList list; |
| 276 | quint32 i = stringBegin; |
| 277 | list.append(t: QCoreApplication::translate(context: "KCharSelectData" , key: "All" , disambiguation: "KCharSelect section name" )); |
| 278 | while (i < stringEnd) { |
| 279 | list.append(t: QCoreApplication::translate(context: "KCharSelectData" , key: data + i, disambiguation: "KCharSelect section name" )); |
| 280 | i += qstrlen(str: data + i) + 1; |
| 281 | } |
| 282 | |
| 283 | return list; |
| 284 | } |
| 285 | |
| 286 | QString KCharSelectData::block(uint c) |
| 287 | { |
| 288 | return blockName(index: blockIndex(c)); |
| 289 | } |
| 290 | |
| 291 | QString KCharSelectData::section(uint c) |
| 292 | { |
| 293 | return sectionName(index: sectionIndex(block: blockIndex(c))); |
| 294 | } |
| 295 | |
| 296 | QString KCharSelectData::name(uint c) |
| 297 | { |
| 298 | if (!openDataFile()) { |
| 299 | return QString(); |
| 300 | } |
| 301 | |
| 302 | if ((c & 0xFFFE) == 0xFFFE || (c >= 0xFDD0 && c <= 0xFDEF)) { |
| 303 | return QCoreApplication::translate(context: "KCharSelectData" , key: "<noncharacter>" ); |
| 304 | } else if ((c >= 0x3400 && c <= 0x4DBF) || (c >= 0x4E00 && c <= 0x9FFF) || (c >= 0x20000 && c <= 0x2F7FF)) { |
| 305 | return QLatin1String("CJK UNIFIED IDEOGRAPH-" ) + formatCode(code: c, length: 4, prefix: QString()); |
| 306 | } else if (c >= 0xAC00 && c <= 0xD7AF) { |
| 307 | /* compute hangul syllable name as per UAX #15 */ |
| 308 | int SIndex = c - SBase; |
| 309 | int LIndex; |
| 310 | int VIndex; |
| 311 | int TIndex; |
| 312 | |
| 313 | if (SIndex < 0 || SIndex >= SCount) { |
| 314 | return QString(); |
| 315 | } |
| 316 | |
| 317 | LIndex = SIndex / NCount; |
| 318 | VIndex = (SIndex % NCount) / TCount; |
| 319 | TIndex = SIndex % TCount; |
| 320 | |
| 321 | return QLatin1String("HANGUL SYLLABLE " ) + QLatin1String(JAMO_L_TABLE[LIndex]) + QLatin1String(JAMO_V_TABLE[VIndex]) |
| 322 | + QLatin1String(JAMO_T_TABLE[TIndex]); |
| 323 | } else if (c >= 0xD800 && c <= 0xDB7F) { |
| 324 | return QCoreApplication::translate(context: "KCharSelectData" , key: "<Non Private Use High Surrogate>" ); |
| 325 | } else if (c >= 0xDB80 && c <= 0xDBFF) { |
| 326 | return QCoreApplication::translate(context: "KCharSelectData" , key: "<Private Use High Surrogate>" ); |
| 327 | } else if (c >= 0xDC00 && c <= 0xDFFF) { |
| 328 | return QCoreApplication::translate(context: "KCharSelectData" , key: "<Low Surrogate>" ); |
| 329 | } else if ((c >= 0xE000 && c <= 0xF8FF) || c >= 0xF0000) { |
| 330 | return QCoreApplication::translate(context: "KCharSelectData" , key: "<Private Use>" ); |
| 331 | } else if ((c >= 0xF900 && c <= 0xFAFF) || (c >= 0x2F800 && c <= 0x2FFFF)) { |
| 332 | return QLatin1String("CJK COMPATIBILITY IDEOGRAPH-" ) + formatCode(code: c, length: 4, prefix: QString()); |
| 333 | } |
| 334 | quint16 unicode = mapCodePointToDataBase(code: c); |
| 335 | if (unicode == 0xFFFF) { |
| 336 | return QLatin1String("NON-BMP-CHARACTER-" ) + formatCode(code: c, length: 4, prefix: QString()); |
| 337 | } else { |
| 338 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 339 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 4); |
| 340 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 8); |
| 341 | |
| 342 | int min = 0; |
| 343 | int mid; |
| 344 | int max = ((offsetEnd - offsetBegin) / 6) - 1; |
| 345 | QString s; |
| 346 | |
| 347 | while (max >= min) { |
| 348 | mid = (min + max) / 2; |
| 349 | const quint16 midUnicode = qFromLittleEndian<quint16>(src: data + offsetBegin + mid * 6); |
| 350 | if (unicode > midUnicode) { |
| 351 | min = mid + 1; |
| 352 | } else if (unicode < midUnicode) { |
| 353 | max = mid - 1; |
| 354 | } else { |
| 355 | quint32 offset = qFromLittleEndian<quint32>(src: data + offsetBegin + mid * 6 + 2); |
| 356 | s = QString::fromUtf8(utf8: dataFile.constData() + offset + 1); |
| 357 | break; |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | if (s.isNull()) { |
| 362 | return QCoreApplication::translate(context: "KCharSelectData" , key: "<not assigned>" ); |
| 363 | } else { |
| 364 | return s; |
| 365 | } |
| 366 | } |
| 367 | } |
| 368 | |
| 369 | int KCharSelectData::blockIndex(uint c) |
| 370 | { |
| 371 | if (!openDataFile()) { |
| 372 | return 0; |
| 373 | } |
| 374 | |
| 375 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 376 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 20); |
| 377 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 24); |
| 378 | const quint16 unicode = mapCodePointToDataBase(code: c); |
| 379 | if (unicode == 0xFFFF) { |
| 380 | return 0; |
| 381 | } |
| 382 | |
| 383 | int max = ((offsetEnd - offsetBegin) / 4) - 1; |
| 384 | |
| 385 | int i = 0; |
| 386 | |
| 387 | while (unicode > qFromLittleEndian<quint16>(src: data + offsetBegin + i * 4 + 2) && i < max) { |
| 388 | i++; |
| 389 | } |
| 390 | |
| 391 | return i; |
| 392 | } |
| 393 | |
| 394 | int KCharSelectData::sectionIndex(int block) |
| 395 | { |
| 396 | if (!openDataFile()) { |
| 397 | return 0; |
| 398 | } |
| 399 | |
| 400 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 401 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 28); |
| 402 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 32); |
| 403 | |
| 404 | int max = ((offsetEnd - offsetBegin) / 4) - 1; |
| 405 | |
| 406 | for (int i = 0; i <= max; i++) { |
| 407 | if (qFromLittleEndian<quint16>(src: data + offsetBegin + i * 4 + 2) == block) { |
| 408 | return qFromLittleEndian<quint16>(src: data + offsetBegin + i * 4) + 1; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | return 0; |
| 413 | } |
| 414 | |
| 415 | QString KCharSelectData::blockName(int index) |
| 416 | { |
| 417 | if (!openDataFile()) { |
| 418 | return QString(); |
| 419 | } |
| 420 | |
| 421 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 422 | const quint32 stringBegin = qFromLittleEndian<quint32>(src: udata + 16); |
| 423 | const quint32 stringEnd = qFromLittleEndian<quint32>(src: udata + 20); |
| 424 | |
| 425 | quint32 i = stringBegin; |
| 426 | int currIndex = 0; |
| 427 | |
| 428 | const char *data = dataFile.constData(); |
| 429 | while (i < stringEnd && currIndex < index) { |
| 430 | i += qstrlen(str: data + i) + 1; |
| 431 | currIndex++; |
| 432 | } |
| 433 | |
| 434 | return QCoreApplication::translate(context: "KCharSelectData" , key: data + i, disambiguation: "KCharselect unicode block name" ); |
| 435 | } |
| 436 | |
| 437 | QString KCharSelectData::sectionName(int index) |
| 438 | { |
| 439 | if (index == 0) { |
| 440 | return QCoreApplication::translate(context: "KCharSelectData" , key: "All" , disambiguation: "KCharselect unicode section name" ); |
| 441 | } |
| 442 | if (!openDataFile()) { |
| 443 | return QString(); |
| 444 | } |
| 445 | |
| 446 | index -= 1; |
| 447 | |
| 448 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 449 | const quint32 stringBegin = qFromLittleEndian<quint32>(src: udata + 24); |
| 450 | const quint32 stringEnd = qFromLittleEndian<quint32>(src: udata + 28); |
| 451 | |
| 452 | quint32 i = stringBegin; |
| 453 | int currIndex = 0; |
| 454 | |
| 455 | const char *data = dataFile.constData(); |
| 456 | while (i < stringEnd && currIndex < index) { |
| 457 | i += qstrlen(str: data + i) + 1; |
| 458 | currIndex++; |
| 459 | } |
| 460 | |
| 461 | return QCoreApplication::translate(context: "KCharSelectData" , key: data + i, disambiguation: "KCharselect unicode section name" ); |
| 462 | } |
| 463 | |
| 464 | QStringList KCharSelectData::aliases(uint c) |
| 465 | { |
| 466 | if (!openDataFile()) { |
| 467 | return QStringList(); |
| 468 | } |
| 469 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 470 | const int detailIndex = getDetailIndex(c); |
| 471 | if (detailIndex == 0) { |
| 472 | return QStringList(); |
| 473 | } |
| 474 | |
| 475 | const quint8 count = *(quint8 *)(udata + detailIndex + 6); |
| 476 | quint32 offset = qFromLittleEndian<quint32>(src: udata + detailIndex + 2); |
| 477 | |
| 478 | QStringList aliases; |
| 479 | aliases.reserve(asize: count); |
| 480 | |
| 481 | const char *data = dataFile.constData(); |
| 482 | for (int i = 0; i < count; i++) { |
| 483 | aliases.append(t: QString::fromUtf8(utf8: data + offset)); |
| 484 | offset += qstrlen(str: data + offset) + 1; |
| 485 | } |
| 486 | return aliases; |
| 487 | } |
| 488 | |
| 489 | QStringList KCharSelectData::notes(uint c) |
| 490 | { |
| 491 | if (!openDataFile()) { |
| 492 | return QStringList(); |
| 493 | } |
| 494 | const int detailIndex = getDetailIndex(c); |
| 495 | if (detailIndex == 0) { |
| 496 | return QStringList(); |
| 497 | } |
| 498 | |
| 499 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 500 | const quint8 count = *(quint8 *)(udata + detailIndex + 11); |
| 501 | quint32 offset = qFromLittleEndian<quint32>(src: udata + detailIndex + 7); |
| 502 | |
| 503 | QStringList notes; |
| 504 | notes.reserve(asize: count); |
| 505 | |
| 506 | const char *data = dataFile.constData(); |
| 507 | for (int i = 0; i < count; i++) { |
| 508 | notes.append(t: QString::fromUtf8(utf8: data + offset)); |
| 509 | offset += qstrlen(str: data + offset) + 1; |
| 510 | } |
| 511 | |
| 512 | return notes; |
| 513 | } |
| 514 | |
| 515 | QList<uint> KCharSelectData::seeAlso(uint c) |
| 516 | { |
| 517 | if (!openDataFile()) { |
| 518 | return QList<uint>(); |
| 519 | } |
| 520 | const int detailIndex = getDetailIndex(c); |
| 521 | if (detailIndex == 0) { |
| 522 | return QList<uint>(); |
| 523 | } |
| 524 | |
| 525 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 526 | const quint8 count = *(quint8 *)(udata + detailIndex + 26); |
| 527 | quint32 offset = qFromLittleEndian<quint32>(src: udata + detailIndex + 22); |
| 528 | |
| 529 | QList<uint> seeAlso; |
| 530 | seeAlso.reserve(asize: count); |
| 531 | |
| 532 | for (int i = 0; i < count; i++) { |
| 533 | seeAlso.append(t: mapDataBaseToCodePoint(code: qFromLittleEndian<quint16>(src: udata + offset))); |
| 534 | offset += 2; |
| 535 | } |
| 536 | |
| 537 | return seeAlso; |
| 538 | } |
| 539 | |
| 540 | QStringList KCharSelectData::equivalents(uint c) |
| 541 | { |
| 542 | if (!openDataFile()) { |
| 543 | return QStringList(); |
| 544 | } |
| 545 | const int detailIndex = getDetailIndex(c); |
| 546 | if (detailIndex == 0) { |
| 547 | return QStringList(); |
| 548 | } |
| 549 | |
| 550 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 551 | const quint8 count = *(quint8 *)(udata + detailIndex + 21); |
| 552 | quint32 offset = qFromLittleEndian<quint32>(src: udata + detailIndex + 17); |
| 553 | |
| 554 | QStringList equivalents; |
| 555 | equivalents.reserve(asize: count); |
| 556 | |
| 557 | const char *data = dataFile.constData(); |
| 558 | for (int i = 0; i < count; i++) { |
| 559 | equivalents.append(t: QString::fromUtf8(utf8: data + offset)); |
| 560 | offset += qstrlen(str: data + offset) + 1; |
| 561 | } |
| 562 | |
| 563 | return equivalents; |
| 564 | } |
| 565 | |
| 566 | QStringList KCharSelectData::approximateEquivalents(uint c) |
| 567 | { |
| 568 | if (!openDataFile()) { |
| 569 | return QStringList(); |
| 570 | } |
| 571 | const int detailIndex = getDetailIndex(c); |
| 572 | if (detailIndex == 0) { |
| 573 | return QStringList(); |
| 574 | } |
| 575 | |
| 576 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 577 | const quint8 count = *(quint8 *)(udata + detailIndex + 16); |
| 578 | quint32 offset = qFromLittleEndian<quint32>(src: udata + detailIndex + 12); |
| 579 | |
| 580 | QStringList approxEquivalents; |
| 581 | approxEquivalents.reserve(asize: count); |
| 582 | |
| 583 | const char *data = dataFile.constData(); |
| 584 | for (int i = 0; i < count; i++) { |
| 585 | approxEquivalents.append(t: QString::fromUtf8(utf8: data + offset)); |
| 586 | offset += qstrlen(str: data + offset) + 1; |
| 587 | } |
| 588 | |
| 589 | return approxEquivalents; |
| 590 | } |
| 591 | |
| 592 | QList<uint> KCharSelectData::decomposition(uint c) |
| 593 | { |
| 594 | // for now, only decompose Hangul Syllable into Hangul Jamo |
| 595 | uint SIndex = c - SBase; |
| 596 | if (SIndex >= SCount) { |
| 597 | return QList<uint>(); |
| 598 | } |
| 599 | |
| 600 | uint L = LBase + SIndex / NCount; // Choseong |
| 601 | uint V = VBase + (SIndex % NCount) / TCount; // Jungseong |
| 602 | uint T = TBase + SIndex % TCount; // Jongsung |
| 603 | QList<uint> jamoList; |
| 604 | jamoList.append(t: L); |
| 605 | jamoList.append(t: V); |
| 606 | if (T != TBase) { |
| 607 | jamoList.append(t: T); |
| 608 | } |
| 609 | return jamoList; |
| 610 | } |
| 611 | |
| 612 | QStringList KCharSelectData::unihanInfo(uint c) |
| 613 | { |
| 614 | if (!openDataFile()) { |
| 615 | return QStringList(); |
| 616 | } |
| 617 | |
| 618 | quint16 unicode = mapCodePointToDataBase(code: c); |
| 619 | if (unicode == 0xFFFF) { |
| 620 | return QStringList(); |
| 621 | } |
| 622 | |
| 623 | const char *data = dataFile.constData(); |
| 624 | const uchar *udata = reinterpret_cast<const uchar *>(data); |
| 625 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: udata + 36); |
| 626 | const quint32 offsetEnd = dataFile.size(); |
| 627 | |
| 628 | int min = 0; |
| 629 | int mid; |
| 630 | int max = ((offsetEnd - offsetBegin) / 30) - 1; |
| 631 | |
| 632 | while (max >= min) { |
| 633 | mid = (min + max) / 2; |
| 634 | const quint16 midUnicode = qFromLittleEndian<quint16>(src: udata + offsetBegin + mid * 30); |
| 635 | if (unicode > midUnicode) { |
| 636 | min = mid + 1; |
| 637 | } else if (unicode < midUnicode) { |
| 638 | max = mid - 1; |
| 639 | } else { |
| 640 | QStringList res; |
| 641 | res.reserve(asize: 7); |
| 642 | for (int i = 0; i < 7; i++) { |
| 643 | quint32 offset = qFromLittleEndian<quint32>(src: udata + offsetBegin + mid * 30 + 2 + i * 4); |
| 644 | if (offset != 0) { |
| 645 | res.append(t: QString::fromUtf8(utf8: data + offset)); |
| 646 | } else { |
| 647 | res.append(t: QString()); |
| 648 | } |
| 649 | } |
| 650 | return res; |
| 651 | } |
| 652 | } |
| 653 | |
| 654 | return QStringList(); |
| 655 | } |
| 656 | |
| 657 | QChar::Category KCharSelectData::category(uint c) |
| 658 | { |
| 659 | if (!openDataFile()) { |
| 660 | return QChar::category(ucs4: c); |
| 661 | } |
| 662 | |
| 663 | ushort unicode = mapCodePointToDataBase(code: c); |
| 664 | if (unicode == 0xFFFF) { |
| 665 | return QChar::category(ucs4: c); |
| 666 | } |
| 667 | |
| 668 | const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 669 | const quint32 offsetBegin = qFromLittleEndian<quint32>(src: data + 4); |
| 670 | const quint32 offsetEnd = qFromLittleEndian<quint32>(src: data + 8); |
| 671 | |
| 672 | int min = 0; |
| 673 | int mid; |
| 674 | int max = ((offsetEnd - offsetBegin) / 6) - 1; |
| 675 | |
| 676 | while (max >= min) { |
| 677 | mid = (min + max) / 2; |
| 678 | const quint16 midUnicode = qFromLittleEndian<quint16>(src: data + offsetBegin + mid * 6); |
| 679 | if (unicode > midUnicode) { |
| 680 | min = mid + 1; |
| 681 | } else if (unicode < midUnicode) { |
| 682 | max = mid - 1; |
| 683 | } else { |
| 684 | quint32 offset = qFromLittleEndian<quint32>(src: data + offsetBegin + mid * 6 + 2); |
| 685 | uchar categoryCode = *(data + offset); |
| 686 | Q_ASSERT(categoryCode > 0); |
| 687 | categoryCode--; /* Qt5 changed QChar::Category enum to start from 0 instead of 1 |
| 688 | See QtBase commit d17c76feee9eece4 */ |
| 689 | return QChar::Category(categoryCode); |
| 690 | } |
| 691 | } |
| 692 | |
| 693 | return QChar::category(ucs4: c); |
| 694 | } |
| 695 | |
| 696 | bool KCharSelectData::isPrint(uint c) |
| 697 | { |
| 698 | QChar::Category cat = category(c); |
| 699 | return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned); |
| 700 | } |
| 701 | |
| 702 | bool KCharSelectData::isDisplayable(uint c) |
| 703 | { |
| 704 | // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames. |
| 705 | // They should be seen as non-printable characters, as trying to display them leads |
| 706 | // to a crash caused by a Qt "noBlockInString" assertion. |
| 707 | if (c == 0xFDD0 || c == 0xFDD1) { |
| 708 | return false; |
| 709 | } |
| 710 | |
| 711 | return !isIgnorable(c) && isPrint(c); |
| 712 | } |
| 713 | |
| 714 | bool KCharSelectData::isIgnorable(uint c) |
| 715 | { |
| 716 | /* |
| 717 | * According to the Unicode standard, Default Ignorable Code Points |
| 718 | * should be ignored unless explicitly supported. For example, U+202E |
| 719 | * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying |
| 720 | * it gives the undesired effect of all text being turned RTL. We do not |
| 721 | * have a way to "explicitly" support it, so we will treat it as |
| 722 | * non-printable. |
| 723 | * |
| 724 | * There is a list of these on |
| 725 | * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the |
| 726 | * property Default_Ignorable_Code_Point. |
| 727 | */ |
| 728 | |
| 729 | // NOTE: not very nice to hardcode these here; is it worth it to modify |
| 730 | // the binary data file to hold them? |
| 731 | // clang-format off |
| 732 | return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 || |
| 733 | c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) || |
| 734 | (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) || |
| 735 | (c >= 0x2060 && c <= 0x206F) || c == 0x3164 || |
| 736 | (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 || |
| 737 | (c >= 0xFFF0 && c <= 0xFFF8); |
| 738 | // clang-format on |
| 739 | } |
| 740 | |
| 741 | bool KCharSelectData::isCombining(uint c) |
| 742 | { |
| 743 | return section(c) == QCoreApplication::translate(context: "KCharSelectData" , key: "Combining Diacritics" , disambiguation: "KCharSelect section name" ); |
| 744 | // FIXME: this is an imperfect test. There are many combining characters |
| 745 | // that are outside of this section. See Grapheme_Extend in |
| 746 | // http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt |
| 747 | } |
| 748 | |
| 749 | QString KCharSelectData::display(uint c, const QFont &font) |
| 750 | { |
| 751 | if (!isDisplayable(c)) { |
| 752 | return QLatin1String("<b>" ) + QCoreApplication::translate(context: "KCharSelectData" , key: "Non-printable" ) + QLatin1String("</b>" ); |
| 753 | } else { |
| 754 | QString s = QLatin1String("<font size=\"+4\" face=\"" ) + font.family() + QLatin1String("\">" ); |
| 755 | if (isCombining(c)) { |
| 756 | s += displayCombining(c); |
| 757 | } else { |
| 758 | s += QLatin1String("&#" ) + QString::number(c) + QLatin1Char(';'); |
| 759 | } |
| 760 | s += QLatin1String("</font>" ); |
| 761 | return s; |
| 762 | } |
| 763 | } |
| 764 | |
| 765 | QString KCharSelectData::displayCombining(uint c) |
| 766 | { |
| 767 | /* |
| 768 | * The purpose of this is to make it easier to see how a combining |
| 769 | * character affects the text around it. |
| 770 | * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose, |
| 771 | * as seen in pdfs from Unicode, but there seem to be a lot of alignment |
| 772 | * problems with that. |
| 773 | * |
| 774 | * Eventually, it would be nice to determine whether the character |
| 775 | * combines to the left or to the right, etc. |
| 776 | */ |
| 777 | QString s = QLatin1String(" &#" ) + QString::number(c) + QLatin1String("; " ) + QLatin1String(" (ab&#" ) + QString::number(c) + QLatin1String(";c)" ); |
| 778 | return s; |
| 779 | } |
| 780 | |
| 781 | QString KCharSelectData::categoryText(QChar::Category category) |
| 782 | { |
| 783 | switch (category) { |
| 784 | case QChar::Other_Control: |
| 785 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Other, Control" ); |
| 786 | case QChar::Other_Format: |
| 787 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Other, Format" ); |
| 788 | case QChar::Other_NotAssigned: |
| 789 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Other, Not Assigned" ); |
| 790 | case QChar::Other_PrivateUse: |
| 791 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Other, Private Use" ); |
| 792 | case QChar::Other_Surrogate: |
| 793 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Other, Surrogate" ); |
| 794 | case QChar::Letter_Lowercase: |
| 795 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Letter, Lowercase" ); |
| 796 | case QChar::Letter_Modifier: |
| 797 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Letter, Modifier" ); |
| 798 | case QChar::Letter_Other: |
| 799 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Letter, Other" ); |
| 800 | case QChar::Letter_Titlecase: |
| 801 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Letter, Titlecase" ); |
| 802 | case QChar::Letter_Uppercase: |
| 803 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Letter, Uppercase" ); |
| 804 | case QChar::Mark_SpacingCombining: |
| 805 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Mark, Spacing Combining" ); |
| 806 | case QChar::Mark_Enclosing: |
| 807 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Mark, Enclosing" ); |
| 808 | case QChar::Mark_NonSpacing: |
| 809 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Mark, Non-Spacing" ); |
| 810 | case QChar::Number_DecimalDigit: |
| 811 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Number, Decimal Digit" ); |
| 812 | case QChar::Number_Letter: |
| 813 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Number, Letter" ); |
| 814 | case QChar::Number_Other: |
| 815 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Number, Other" ); |
| 816 | case QChar::Punctuation_Connector: |
| 817 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Punctuation, Connector" ); |
| 818 | case QChar::Punctuation_Dash: |
| 819 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Punctuation, Dash" ); |
| 820 | case QChar::Punctuation_Close: |
| 821 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Punctuation, Close" ); |
| 822 | case QChar::Punctuation_FinalQuote: |
| 823 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Punctuation, Final Quote" ); |
| 824 | case QChar::Punctuation_InitialQuote: |
| 825 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Punctuation, Initial Quote" ); |
| 826 | case QChar::Punctuation_Other: |
| 827 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Punctuation, Other" ); |
| 828 | case QChar::Punctuation_Open: |
| 829 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Punctuation, Open" ); |
| 830 | case QChar::Symbol_Currency: |
| 831 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Symbol, Currency" ); |
| 832 | case QChar::Symbol_Modifier: |
| 833 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Symbol, Modifier" ); |
| 834 | case QChar::Symbol_Math: |
| 835 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Symbol, Math" ); |
| 836 | case QChar::Symbol_Other: |
| 837 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Symbol, Other" ); |
| 838 | case QChar::Separator_Line: |
| 839 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Separator, Line" ); |
| 840 | case QChar::Separator_Paragraph: |
| 841 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Separator, Paragraph" ); |
| 842 | case QChar::Separator_Space: |
| 843 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Separator, Space" ); |
| 844 | default: |
| 845 | return QCoreApplication::translate(context: "KCharSelectData" , key: "Unknown" ); |
| 846 | } |
| 847 | } |
| 848 | |
| 849 | QList<uint> KCharSelectData::find(const QString &needle) |
| 850 | { |
| 851 | QSet<uint> result; |
| 852 | |
| 853 | QList<uint> returnRes; |
| 854 | QString simplified = needle.length() > 1 ? needle.simplified() : needle; |
| 855 | QStringList searchStrings; |
| 856 | |
| 857 | static const QRegularExpression octalExp(QStringLiteral("^\\\\[0-7][0-7\\\\]*$" )); |
| 858 | if (octalExp.match(subject: simplified).hasMatch()) { |
| 859 | // search for C octal escaped UTF-8 |
| 860 | QByteArray utf8; |
| 861 | int byte = -1; |
| 862 | for (int i = 0; i <= simplified.length(); ++i) { |
| 863 | int c = simplified.at(i).unicode(); |
| 864 | if (c >= '0' && c <= '7') { |
| 865 | byte = 8 * byte + c - '0'; |
| 866 | } else if (byte == -1) { |
| 867 | byte = 0; |
| 868 | } else if (byte >= 0x00 && byte <= 0xFF) { |
| 869 | utf8.append(c: (char)byte); |
| 870 | byte = 0; |
| 871 | } |
| 872 | } |
| 873 | simplified = QString::fromUtf8(ba: utf8); |
| 874 | } |
| 875 | |
| 876 | if (simplified.length() <= 2) { |
| 877 | QList<uint> ucs4 = simplified.toUcs4(); |
| 878 | if (ucs4.size() == 1) { |
| 879 | // search for hex representation of the character |
| 880 | searchStrings = QStringList(formatCode(code: ucs4.at(i: 0))); |
| 881 | } else { |
| 882 | searchStrings = splitString(s: simplified); |
| 883 | } |
| 884 | } else { |
| 885 | searchStrings = splitString(s: simplified); |
| 886 | } |
| 887 | |
| 888 | if (searchStrings.isEmpty()) { |
| 889 | return returnRes; |
| 890 | } |
| 891 | |
| 892 | static const QRegularExpression hexExp(QStringLiteral("^(?:|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4,5})$" )); |
| 893 | for (const QString &s : std::as_const(t&: searchStrings)) { |
| 894 | const QRegularExpressionMatch match = hexExp.match(subject: s); |
| 895 | if (match.hasMatch()) { |
| 896 | const QString cap = match.captured(nth: 1); |
| 897 | returnRes.append(t: cap.toInt(ok: nullptr, base: 16)); |
| 898 | // search for "1234" instead of "0x1234" |
| 899 | if (s.length() == 6 || s.length() == 7) { |
| 900 | searchStrings[searchStrings.indexOf(str: s)] = cap; |
| 901 | } |
| 902 | } |
| 903 | // try to parse string as decimal number |
| 904 | bool ok; |
| 905 | int unicode = s.toInt(ok: &ok); |
| 906 | if (ok && unicode >= 0 && unicode <= QChar::LastValidCodePoint) { |
| 907 | returnRes.append(t: unicode); |
| 908 | } |
| 909 | } |
| 910 | |
| 911 | bool firstSubString = true; |
| 912 | for (const QString &s : std::as_const(t&: searchStrings)) { |
| 913 | QSet<uint> partResult = getMatchingChars(s: s.toLower()); |
| 914 | if (firstSubString) { |
| 915 | result = partResult; |
| 916 | firstSubString = false; |
| 917 | } else { |
| 918 | result = result.intersect(other: partResult); |
| 919 | } |
| 920 | } |
| 921 | |
| 922 | // remove results found by matching the code point to prevent duplicate results |
| 923 | // while letting these characters stay at the beginning |
| 924 | for (uint c : std::as_const(t&: returnRes)) { |
| 925 | result.remove(value: c); |
| 926 | } |
| 927 | |
| 928 | QList<uint> sortedResult; |
| 929 | sortedResult.reserve(asize: result.count()); |
| 930 | for (auto c : std::as_const(t&: result)) { |
| 931 | sortedResult.append(t: c); |
| 932 | } |
| 933 | std::sort(first: sortedResult.begin(), last: sortedResult.end()); |
| 934 | |
| 935 | returnRes += sortedResult; |
| 936 | return returnRes; |
| 937 | } |
| 938 | |
| 939 | QSet<uint> KCharSelectData::getMatchingChars(const QString &s) |
| 940 | { |
| 941 | if (dataFile.isEmpty()) { |
| 942 | return QSet<uint>(); |
| 943 | } |
| 944 | futureIndex.waitForFinished(); |
| 945 | const Index index = futureIndex.result(); |
| 946 | Index::const_iterator pos = index.lowerBound(key: s); |
| 947 | QSet<uint> result; |
| 948 | |
| 949 | while (pos != index.constEnd() && pos.key().startsWith(s)) { |
| 950 | for (quint16 c : pos.value()) { |
| 951 | result.insert(value: mapDataBaseToCodePoint(code: c)); |
| 952 | } |
| 953 | ++pos; |
| 954 | } |
| 955 | |
| 956 | return result; |
| 957 | } |
| 958 | |
| 959 | QStringList KCharSelectData::splitString(const QString &s) |
| 960 | { |
| 961 | QStringList result; |
| 962 | int start = 0; |
| 963 | int end = 0; |
| 964 | int length = s.length(); |
| 965 | while (end < length) { |
| 966 | while (end < length && (s[end].isLetterOrNumber() || s[end] == QLatin1Char('+'))) { |
| 967 | end++; |
| 968 | } |
| 969 | if (start != end) { |
| 970 | result.append(t: s.mid(position: start, n: end - start)); |
| 971 | } |
| 972 | start = end; |
| 973 | while (end < length && !(s[end].isLetterOrNumber() || s[end] == QLatin1Char('+'))) { |
| 974 | end++; |
| 975 | start++; |
| 976 | } |
| 977 | } |
| 978 | return result; |
| 979 | } |
| 980 | |
| 981 | void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString &s) |
| 982 | { |
| 983 | const QStringList strings = splitString(s); |
| 984 | for (const QString &s : strings) { |
| 985 | (*index)[s.toLower()].append(t: unicode); |
| 986 | } |
| 987 | } |
| 988 | |
| 989 | Index KCharSelectData::createIndex(const QByteArray &dataFile) |
| 990 | { |
| 991 | Index i; |
| 992 | |
| 993 | // character names |
| 994 | const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData()); |
| 995 | const char *data = dataFile.constData(); |
| 996 | const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(src: udata + 4); |
| 997 | const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(src: udata + 8); |
| 998 | |
| 999 | int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1; |
| 1000 | |
| 1001 | for (int pos = 0; pos <= max; pos++) { |
| 1002 | const quint16 unicode = qFromLittleEndian<quint16>(src: udata + nameOffsetBegin + pos * 6); |
| 1003 | quint32 offset = qFromLittleEndian<quint32>(src: udata + nameOffsetBegin + pos * 6 + 2); |
| 1004 | appendToIndex(index: &i, unicode, s: QString::fromUtf8(utf8: data + offset + 1)); |
| 1005 | } |
| 1006 | |
| 1007 | // details |
| 1008 | const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(src: udata + 12); |
| 1009 | const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(src: udata + 16); |
| 1010 | |
| 1011 | max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1; |
| 1012 | |
| 1013 | for (int pos = 0; pos <= max; pos++) { |
| 1014 | const quint16 unicode = qFromLittleEndian<quint16>(src: udata + detailsOffsetBegin + pos * 27); |
| 1015 | |
| 1016 | // aliases |
| 1017 | const quint8 aliasCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 6); |
| 1018 | quint32 aliasOffset = qFromLittleEndian<quint32>(src: udata + detailsOffsetBegin + pos * 27 + 2); |
| 1019 | |
| 1020 | for (int j = 0; j < aliasCount; j++) { |
| 1021 | appendToIndex(index: &i, unicode, s: QString::fromUtf8(utf8: data + aliasOffset)); |
| 1022 | aliasOffset += qstrlen(str: data + aliasOffset) + 1; |
| 1023 | } |
| 1024 | |
| 1025 | // notes |
| 1026 | const quint8 notesCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 11); |
| 1027 | quint32 notesOffset = qFromLittleEndian<quint32>(src: udata + detailsOffsetBegin + pos * 27 + 7); |
| 1028 | |
| 1029 | for (int j = 0; j < notesCount; j++) { |
| 1030 | appendToIndex(index: &i, unicode, s: QString::fromUtf8(utf8: data + notesOffset)); |
| 1031 | notesOffset += qstrlen(str: data + notesOffset) + 1; |
| 1032 | } |
| 1033 | |
| 1034 | // approximate equivalents |
| 1035 | const quint8 apprCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 16); |
| 1036 | quint32 apprOffset = qFromLittleEndian<quint32>(src: udata + detailsOffsetBegin + pos * 27 + 12); |
| 1037 | |
| 1038 | for (int j = 0; j < apprCount; j++) { |
| 1039 | appendToIndex(index: &i, unicode, s: QString::fromUtf8(utf8: data + apprOffset)); |
| 1040 | apprOffset += qstrlen(str: data + apprOffset) + 1; |
| 1041 | } |
| 1042 | |
| 1043 | // equivalents |
| 1044 | const quint8 equivCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 21); |
| 1045 | quint32 equivOffset = qFromLittleEndian<quint32>(src: udata + detailsOffsetBegin + pos * 27 + 17); |
| 1046 | |
| 1047 | for (int j = 0; j < equivCount; j++) { |
| 1048 | appendToIndex(index: &i, unicode, s: QString::fromUtf8(utf8: data + equivOffset)); |
| 1049 | equivOffset += qstrlen(str: data + equivOffset) + 1; |
| 1050 | } |
| 1051 | |
| 1052 | // see also - convert to string (hex) |
| 1053 | const quint8 seeAlsoCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 26); |
| 1054 | quint32 seeAlsoOffset = qFromLittleEndian<quint32>(src: udata + detailsOffsetBegin + pos * 27 + 22); |
| 1055 | |
| 1056 | for (int j = 0; j < seeAlsoCount; j++) { |
| 1057 | quint16 seeAlso = qFromLittleEndian<quint16>(src: udata + seeAlsoOffset); |
| 1058 | appendToIndex(index: &i, unicode, s: formatCode(code: seeAlso, length: 4, prefix: QString())); |
| 1059 | equivOffset += qstrlen(str: data + equivOffset) + 1; |
| 1060 | } |
| 1061 | } |
| 1062 | |
| 1063 | // unihan data |
| 1064 | // temporary disabled due to the huge amount of data |
| 1065 | // const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36); |
| 1066 | // const quint32 unihanOffsetEnd = dataFile.size(); |
| 1067 | // max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1; |
| 1068 | // |
| 1069 | // for (int pos = 0; pos <= max; pos++) { |
| 1070 | // const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30); |
| 1071 | // for(int j = 0; j < 7; j++) { |
| 1072 | // quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4); |
| 1073 | // if(offset != 0) { |
| 1074 | // appendToIndex(&i, unicode, QString::fromUtf8(data + offset)); |
| 1075 | // } |
| 1076 | // } |
| 1077 | // } |
| 1078 | |
| 1079 | return i; |
| 1080 | } |
| 1081 | |