| 1 | /* |
| 2 | This file is part of the KDE libraries |
| 3 | |
| 4 | SPDX-FileCopyrightText: 2017 Forrest Smith <forrestthewoods@gmail.com> |
| 5 | SPDX-FileCopyrightText: 2021 Waqar Ahmed <waqar.17a@gmail.com> |
| 6 | |
| 7 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 8 | */ |
| 9 | #ifndef KFUZZYMATCHER_H |
| 10 | #define KFUZZYMATCHER_H |
| 11 | |
| 12 | #include <kcoreaddons_export.h> |
| 13 | |
| 14 | #include <QtContainerFwd> |
| 15 | #include <QtGlobal> |
| 16 | |
| 17 | class QString; |
| 18 | class QStringView; |
| 19 | |
| 20 | /*! |
| 21 | * \namespace KFuzzyMatcher |
| 22 | * \inmodule KCoreAddons |
| 23 | * |
| 24 | * \brief This namespace contains functions for fuzzy matching a list of strings |
| 25 | * against a pattern. |
| 26 | * |
| 27 | * This code is ported to Qt from lib_fts: |
| 28 | * https://github.com/forrestthewoods/lib_fts |
| 29 | * which tries to replicate SublimeText like fuzzy matching. |
| 30 | * |
| 31 | * \note |
| 32 | * All character matches will happen sequentially. That means that this function is not |
| 33 | * typo tolerant i.e., "gti" will not match "git", but "gt" will. All methods in here are |
| 34 | * stateless i.e., the input string will not be modified. Also note that strings in all the |
| 35 | * functions in this namespace will be matched case-insensitively. |
| 36 | * |
| 37 | * Limitations: |
| 38 | * \list |
| 39 | * \li Currently this will match only strings with length < 256 correctly. This is because we |
| 40 | * intend on matching a pattern against words / short strings and not paragraphs. |
| 41 | * \li No more than 256 matches will happen. |
| 42 | * \endlist |
| 43 | * |
| 44 | * If you are using this with QSortFilterProxyModel, you need to override both |
| 45 | * QSortFilterProxyModel::lessThan and QSortFilterProxyModel::filterAcceptsRow. |
| 46 | * A simple example: |
| 47 | * |
| 48 | * \code |
| 49 | * bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override |
| 50 | * { |
| 51 | * int score = 0; |
| 52 | * const auto idx = sourceModel()->index(sourceRow, 0, sourceParent); |
| 53 | * const auto actionName = idx.data().toString().splitRef(QLatin1Char(':')).at(1); |
| 54 | * const bool res = kfts::fuzzy_match_sequential(m_pattern, actionName, score); |
| 55 | * // store the score in the source model |
| 56 | * sourceModel()->setData(idx, score, ScoreRole); |
| 57 | * return res; |
| 58 | * } |
| 59 | * |
| 60 | * bool lessThan(const QModelIndex &sourceLeft, const QModelIndex &sourceRight) const override |
| 61 | * { |
| 62 | * // use the score here to sort |
| 63 | * const int l = sourceLeft.data(ScoreRole).toInt(); |
| 64 | * const int r = sourceRight.data(ScoreRole).toInt(); |
| 65 | * return l < r; |
| 66 | * } |
| 67 | * \endcode |
| 68 | * |
| 69 | * Additionally you must not use invalidateFilter() if you go with the above approach. Instead |
| 70 | * use beginResetModel()/endResetModel(): |
| 71 | * |
| 72 | * \code |
| 73 | * Q_SLOT void setFilterString(const QString &string) |
| 74 | * { |
| 75 | * beginResetModel(); |
| 76 | * m_pattern = string; |
| 77 | * endResetModel(); |
| 78 | * } |
| 79 | * \endcode |
| 80 | * |
| 81 | * \brief Namespace for fuzzy matching of strings. |
| 82 | */ |
| 83 | namespace KFuzzyMatcher |
| 84 | { |
| 85 | /*! |
| 86 | * \class KFuzzyMatcher::Result |
| 87 | * \inmodule KCoreAddons |
| 88 | * \brief The result of a fuzzy match. |
| 89 | */ |
| 90 | struct KCOREADDONS_EXPORT Result { |
| 91 | /*! |
| 92 | * \variable KFuzzyMatcher::Result::score |
| 93 | * \brief The score of this match. This can be negative. If matched is \c false |
| 94 | * then the score will be zero. |
| 95 | */ |
| 96 | int score = 0; |
| 97 | /*! |
| 98 | * \variable KFuzzyMatcher::Result::matched |
| 99 | * \brief \c true if match was successful |
| 100 | */ |
| 101 | bool matched = false; |
| 102 | }; |
| 103 | |
| 104 | /*! |
| 105 | * \struct KFuzzyMatcher::Range |
| 106 | * \inmodule KCoreAddons |
| 107 | * \brief A range representing a matched sequence in a string. |
| 108 | * |
| 109 | * \since 5.84 |
| 110 | */ |
| 111 | struct KCOREADDONS_EXPORT Range { |
| 112 | /*! \variable KFuzzyMatcher::Range::start |
| 113 | * \brief The start of the range |
| 114 | */ |
| 115 | int start; |
| 116 | /*! \variable KFuzzyMatcher::Range::length |
| 117 | * \brief The length of the range |
| 118 | */ |
| 119 | int length; |
| 120 | }; |
| 121 | |
| 122 | /*! |
| 123 | * \brief The type of matches to consider when requesting for ranges. |
| 124 | * \sa matchedRanges |
| 125 | * |
| 126 | * \value FullyMatched We want ranges only where the pattern fully matches the user supplied string |
| 127 | * \value All We want ranges for all matches, even if the pattern partially matched the user supplied string |
| 128 | * |
| 129 | * \since 5.84 |
| 130 | */ |
| 131 | enum class RangeType : unsigned char { FullyMatched, All }; |
| 132 | |
| 133 | /*! |
| 134 | * \brief Simple fuzzy matching of chars in \a pattern with chars in \a str |
| 135 | * sequentially. If there is a match, it will return true and false otherwise. |
| 136 | * There is no scoring. You should use this if score is not important for you |
| 137 | * and only matching is important. |
| 138 | * |
| 139 | * If \a pattern is empty, the function will return \c true |
| 140 | * |
| 141 | * \a pattern to search for. For e.g., text entered by a user to filter a |
| 142 | * list |
| 143 | * |
| 144 | * \a str the current string from your list of strings |
| 145 | * |
| 146 | * Returns \c true on sucessful match |
| 147 | * |
| 148 | * \since 5.79 |
| 149 | */ |
| 150 | KCOREADDONS_EXPORT bool matchSimple(QStringView pattern, QStringView str); |
| 151 | |
| 152 | /*! |
| 153 | * \brief This is the main function which does scored fuzzy matching. |
| 154 | * |
| 155 | * The return value of this function contains Result#score which should be used to |
| 156 | * sort the results. Without sorting of the results this function won't very effective. |
| 157 | * |
| 158 | * If \a pattern is empty, the function will return \c true |
| 159 | * |
| 160 | * \a pattern to search for. For e.g., text entered by a user to filter a |
| 161 | * list or model |
| 162 | * |
| 163 | * \a str the current string from your list of strings |
| 164 | * |
| 165 | * Returns a Result type with score of this match and whether the match was |
| 166 | * successful. If there is no match, score is zero. If the match is successful, |
| 167 | * score must be used to sort the results. |
| 168 | * |
| 169 | * \since 5.79 |
| 170 | */ |
| 171 | KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str); |
| 172 | |
| 173 | /*! |
| 174 | * \brief A function which returns the positions + lengths where the \a pattern matched |
| 175 | * inside the \a str. The resulting ranges can then be utilized to show the user where |
| 176 | * the matches occurred. Example: |
| 177 | * |
| 178 | * \code |
| 179 | * String: hello |
| 180 | * Pattern: Hlo |
| 181 | * |
| 182 | * Output: [Range{0, 1}, Range{3, 2}] |
| 183 | * \endcode |
| 184 | * |
| 185 | * In the above example \c "Hlo" matched inside the string \c "hello" in two places i.e., |
| 186 | * position 0 and position 3. At position 0 it matched 'h', and at position 3 it |
| 187 | * matched 'lo'. |
| 188 | * |
| 189 | * The ranges themeselves can't do much so you will have to make the result useful |
| 190 | * in your own way. Some possible uses can be: |
| 191 | * \list |
| 192 | * \li Transform the result into a vector of \c QTextLayout::FormatRange and then paint |
| 193 | * them in the view |
| 194 | * \li Use the result to transform the string into html, for example conver the string from |
| 195 | * above example to "\<b\>H\</b\>el\<b\>lo\</b\>, and then use \c QTextDocument |
| 196 | * to paint it into your view. |
| 197 | * \endlist |
| 198 | * |
| 199 | * Example with the first method: |
| 200 | * \code |
| 201 | * auto ranges = KFuzzyMatcher::matchedRanges(pattern, string); |
| 202 | * QList<QTextLayout::FormatRange> out; |
| 203 | * std::transform(ranges.begin(), ranges.end(), std::back_inserter(out), [](const KFuzzyMatcher::Range &fr){ |
| 204 | * return QTextLayout::FormatRange{fr.start, fr.length, QTextCharFormat()}; |
| 205 | * }); |
| 206 | * |
| 207 | * QTextLayout layout(text, font); |
| 208 | * layout.beginLayout(); |
| 209 | * QTextLine line = layout.createLine(); |
| 210 | * //... |
| 211 | * layout.endLayout(); |
| 212 | * |
| 213 | * layout.setFormats(layout.formats() + out); |
| 214 | * layout.draw(painter, position); |
| 215 | * \endcode |
| 216 | * |
| 217 | * If \a pattern is empty, the function will return an empty vector |
| 218 | * |
| 219 | * if \a type is \c RangeType::All, the function will try to get ranges even if |
| 220 | * the pattern didn't fully match. For example: |
| 221 | * \code |
| 222 | * pattern: "git" |
| 223 | * string: "gti" |
| 224 | * RangeType: All |
| 225 | * |
| 226 | * Output: [Range{0, 1}, Range{2, 1}] |
| 227 | * \endcode |
| 228 | * |
| 229 | * \a pattern to search for. For e.g., text entered by a user to filter a |
| 230 | * list or model |
| 231 | * |
| 232 | * \a str the current string from your list of strings |
| 233 | * |
| 234 | * \a type whether to consider ranges from full matches only or all matches including partial matches |
| 235 | * |
| 236 | * Returns A vector of ranges containing positions and lengths where the pattern |
| 237 | * matched. If there was no match, the vector will be empty |
| 238 | * |
| 239 | * \since 5.84 |
| 240 | */ |
| 241 | KCOREADDONS_EXPORT QList<KFuzzyMatcher::Range> matchedRanges(QStringView pattern, QStringView str, RangeType type = RangeType::FullyMatched); |
| 242 | |
| 243 | } // namespace KFuzzyMatcher |
| 244 | |
| 245 | Q_DECLARE_TYPEINFO(KFuzzyMatcher::Range, Q_PRIMITIVE_TYPE); |
| 246 | |
| 247 | #endif // KFUZZYMATCHER_H |
| 248 | |