| 1 | //===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===// | 
| 2 | // | 
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| 6 | // | 
| 7 | //===----------------------------------------------------------------------===// | 
| 8 | // | 
| 9 | // This file implements a POSIX regular expression matcher.  Both Basic and | 
| 10 | // Extended POSIX regular expressions (ERE) are supported.  EREs were extended | 
| 11 | // to support backreferences in matches. | 
| 12 | // This implementation also supports matching strings with embedded NUL chars. | 
| 13 | // | 
| 14 | //===----------------------------------------------------------------------===// | 
| 15 |  | 
| 16 | #ifndef LLVM_SUPPORT_REGEX_H | 
| 17 | #define LLVM_SUPPORT_REGEX_H | 
| 18 |  | 
| 19 | #include "llvm/ADT/BitmaskEnum.h" | 
| 20 | #include <string> | 
| 21 |  | 
| 22 | struct llvm_regex; | 
| 23 |  | 
| 24 | namespace llvm { | 
| 25 |   class StringRef; | 
| 26 |   template<typename T> class SmallVectorImpl; | 
| 27 |  | 
| 28 |   class Regex { | 
| 29 |   public: | 
| 30 |     enum RegexFlags : unsigned { | 
| 31 |       NoFlags = 0, | 
| 32 |       /// Compile for matching that ignores upper/lower case distinctions. | 
| 33 |       IgnoreCase = 1, | 
| 34 |       /// Compile for newline-sensitive matching. With this flag '[^' bracket | 
| 35 |       /// expressions and '.' never match newline. A ^ anchor matches the | 
| 36 |       /// null string after any newline in the string in addition to its normal | 
| 37 |       /// function, and the $ anchor matches the null string before any | 
| 38 |       /// newline in the string in addition to its normal function. | 
| 39 |       Newline = 2, | 
| 40 |       /// By default, the POSIX extended regular expression (ERE) syntax is | 
| 41 |       /// assumed. Pass this flag to turn on basic regular expressions (BRE) | 
| 42 |       /// instead. | 
| 43 |       BasicRegex = 4, | 
| 44 |  | 
| 45 |       LLVM_MARK_AS_BITMASK_ENUM(BasicRegex) | 
| 46 |     }; | 
| 47 |  | 
| 48 |     Regex(); | 
| 49 |     /// Compiles the given regular expression \p Regex. | 
| 50 |     /// | 
| 51 |     /// \param Regex - referenced string is no longer needed after this | 
| 52 |     /// constructor does finish.  Only its compiled form is kept stored. | 
| 53 |     Regex(StringRef Regex, RegexFlags Flags = NoFlags); | 
| 54 |     Regex(StringRef Regex, unsigned Flags); | 
| 55 |     Regex(const Regex &) = delete; | 
| 56 |     Regex &operator=(Regex regex) { | 
| 57 |       std::swap(a&: preg, b&: regex.preg); | 
| 58 |       std::swap(a&: error, b&: regex.error); | 
| 59 |       return *this; | 
| 60 |     } | 
| 61 |     Regex(Regex &®ex); | 
| 62 |     ~Regex(); | 
| 63 |  | 
| 64 |     /// isValid - returns the error encountered during regex compilation, if | 
| 65 |     /// any. | 
| 66 |     bool isValid(std::string &Error) const; | 
| 67 |     bool isValid() const { return !error; } | 
| 68 |  | 
| 69 |     /// getNumMatches - In a valid regex, return the number of parenthesized | 
| 70 |     /// matches it contains.  The number filled in by match will include this | 
| 71 |     /// many entries plus one for the whole regex (as element 0). | 
| 72 |     unsigned getNumMatches() const; | 
| 73 |  | 
| 74 |     /// matches - Match the regex against a given \p String. | 
| 75 |     /// | 
| 76 |     /// \param Matches - If given, on a successful match this will be filled in | 
| 77 |     /// with references to the matched group expressions (inside \p String), | 
| 78 |     /// the first group is always the entire pattern. | 
| 79 |     /// | 
| 80 |     /// \param Error - If non-null, any errors in the matching will be recorded | 
| 81 |     /// as a non-empty string. If there is no error, it will be an empty string. | 
| 82 |     /// | 
| 83 |     /// This returns true on a successful match. | 
| 84 |     bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr, | 
| 85 |                std::string *Error = nullptr) const; | 
| 86 |  | 
| 87 |     /// sub - Return the result of replacing the first match of the regex in | 
| 88 |     /// \p String with the \p Repl string. Backreferences like "\0" in the | 
| 89 |     /// replacement string are replaced with the appropriate match substring. | 
| 90 |     /// | 
| 91 |     /// Note that the replacement string has backslash escaping performed on | 
| 92 |     /// it. Invalid backreferences are ignored (replaced by empty strings). | 
| 93 |     /// | 
| 94 |     /// \param Error If non-null, any errors in the substitution (invalid | 
| 95 |     /// backreferences, trailing backslashes) will be recorded as a non-empty | 
| 96 |     /// string. If there is no error, it will be an empty string. | 
| 97 |     std::string sub(StringRef Repl, StringRef String, | 
| 98 |                     std::string *Error = nullptr) const; | 
| 99 |  | 
| 100 |     /// If this function returns true, ^Str$ is an extended regular | 
| 101 |     /// expression that matches Str and only Str. | 
| 102 |     static bool isLiteralERE(StringRef Str); | 
| 103 |  | 
| 104 |     /// Turn String into a regex by escaping its special characters. | 
| 105 |     static std::string escape(StringRef String); | 
| 106 |  | 
| 107 |   private: | 
| 108 |     struct llvm_regex *preg; | 
| 109 |     int error; | 
| 110 |   }; | 
| 111 | } | 
| 112 |  | 
| 113 | #endif // LLVM_SUPPORT_REGEX_H | 
| 114 |  |