| 1 | /* This is JavaScriptCore's variant of the PCRE library. While this library | 
| 2 | started out as a copy of PCRE, many of the features of PCRE have been | 
| 3 | removed. This library now supports only the regular expression features | 
| 4 | required by the JavaScript language specification, and has only the functions | 
| 5 | needed by JavaScriptCore and the rest of WebKit. | 
| 6 |  | 
| 7 |                  Originally written by Philip Hazel | 
| 8 |            Copyright (c) 1997-2006 University of Cambridge | 
| 9 |     Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. | 
| 10 |  | 
| 11 | ----------------------------------------------------------------------------- | 
| 12 | Redistribution and use in source and binary forms, with or without | 
| 13 | modification, are permitted provided that the following conditions are met: | 
| 14 |  | 
| 15 |     * Redistributions of source code must retain the above copyright notice, | 
| 16 |       this list of conditions and the following disclaimer. | 
| 17 |  | 
| 18 |     * Redistributions in binary form must reproduce the above copyright | 
| 19 |       notice, this list of conditions and the following disclaimer in the | 
| 20 |       documentation and/or other materials provided with the distribution. | 
| 21 |  | 
| 22 |     * Neither the name of the University of Cambridge nor the names of its | 
| 23 |       contributors may be used to endorse or promote products derived from | 
| 24 |       this software without specific prior written permission. | 
| 25 |  | 
| 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
| 27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
| 29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
| 30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
| 31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
| 32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
| 33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
| 34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
| 35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
| 36 | POSSIBILITY OF SUCH DAMAGE. | 
| 37 | ----------------------------------------------------------------------------- | 
| 38 | */ | 
| 39 |  | 
| 40 | /* This module contains an internal function that is used to match an extended | 
| 41 | class (one that contains characters whose values are > 255). */ | 
| 42 |  | 
| 43 | #include "config.h" | 
| 44 | #include "pcre_internal.h" | 
| 45 |  | 
| 46 | /************************************************* | 
| 47 | *       Match character against an XCLASS        * | 
| 48 | *************************************************/ | 
| 49 |  | 
| 50 | /* This function is called to match a character against an extended class that | 
| 51 | might contain values > 255. | 
| 52 |  | 
| 53 | Arguments: | 
| 54 |   c           the character | 
| 55 |   data        points to the flag byte of the XCLASS data | 
| 56 |  | 
| 57 | Returns:      true if character matches, else false | 
| 58 | */ | 
| 59 |  | 
| 60 | /* Get the next UTF-8 character, advancing the pointer. This is called when we | 
| 61 |  know we are in UTF-8 mode. */ | 
| 62 |  | 
| 63 | static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr) | 
| 64 | { | 
| 65 |     c = *subjectPtr++; | 
| 66 |     if ((c & 0xc0) == 0xc0) { | 
| 67 |         int gcaa = jsc_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ | 
| 68 |         int gcss = 6 * gcaa; | 
| 69 |         c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss; | 
| 70 |         while (gcaa-- > 0) { | 
| 71 |             gcss -= 6; | 
| 72 |             c |= (*subjectPtr++ & 0x3f) << gcss; | 
| 73 |         } | 
| 74 |     } | 
| 75 | } | 
| 76 |  | 
| 77 | bool jsc_pcre_xclass(int c, const unsigned char* data) | 
| 78 | { | 
| 79 |     bool negated = (*data & XCL_NOT); | 
| 80 |      | 
| 81 |     /* Character values < 256 are matched against a bitmap, if one is present. If | 
| 82 |      not, we still carry on, because there may be ranges that start below 256 in the | 
| 83 |      additional data. */ | 
| 84 |      | 
| 85 |     if (c < 256) { | 
| 86 |         if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) | 
| 87 |             return !negated;   /* char found */ | 
| 88 |     } | 
| 89 |      | 
| 90 |     /* First skip the bit map if present. Then match against the list of Unicode | 
| 91 |      properties or large chars or ranges that end with a large char. We won't ever | 
| 92 |      encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ | 
| 93 |      | 
| 94 |     if ((*data++ & XCL_MAP) != 0) | 
| 95 |         data += 32; | 
| 96 |      | 
| 97 |     int t; | 
| 98 |     while ((t = *data++) != XCL_END) { | 
| 99 |         if (t == XCL_SINGLE) { | 
| 100 |             int x; | 
| 101 |             getUTF8CharAndAdvancePointer(c&: x, subjectPtr&: data); | 
| 102 |             if (c == x) | 
| 103 |                 return !negated; | 
| 104 |         } | 
| 105 |         else if (t == XCL_RANGE) { | 
| 106 |             int x, y; | 
| 107 |             getUTF8CharAndAdvancePointer(c&: x, subjectPtr&: data); | 
| 108 |             getUTF8CharAndAdvancePointer(c&: y, subjectPtr&: data); | 
| 109 |             if (c >= x && c <= y) | 
| 110 |                 return !negated; | 
| 111 |         } | 
| 112 |     } | 
| 113 |      | 
| 114 |     return negated;   /* char did not match */ | 
| 115 | } | 
| 116 |  |