| 1 | /* | 
| 2 |  * Copyright (C) 2009 Apple Inc. All rights reserved. | 
| 3 |  * | 
| 4 |  * Redistribution and use in source and binary forms, with or without | 
| 5 |  * modification, are permitted provided that the following conditions | 
| 6 |  * are met: | 
| 7 |  * 1. Redistributions of source code must retain the above copyright | 
| 8 |  *    notice, this list of conditions and the following disclaimer. | 
| 9 |  * 2. Redistributions in binary form must reproduce the above copyright | 
| 10 |  *    notice, this list of conditions and the following disclaimer in the | 
| 11 |  *    documentation and/or other materials provided with the distribution. | 
| 12 |  * | 
| 13 |  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 
| 14 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 15 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 
| 16 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR | 
| 17 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
| 18 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
| 19 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| 20 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 
| 21 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
| 22 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
| 23 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  | 
| 24 |  */ | 
| 25 |  | 
| 26 | #ifndef UStringImpl_h | 
| 27 | #define UStringImpl_h | 
| 28 |  | 
| 29 | #include <limits> | 
| 30 | #include <wtf/CrossThreadRefCounted.h> | 
| 31 | #include <wtf/OwnFastMallocPtr.h> | 
| 32 | #include <wtf/PossiblyNull.h> | 
| 33 | #include <wtf/StringHashFunctions.h> | 
| 34 | #include <wtf/Vector.h> | 
| 35 | #include <wtf/unicode/Unicode.h> | 
| 36 |  | 
| 37 | namespace JSC { | 
| 38 |  | 
| 39 | class IdentifierTable; | 
| 40 |    | 
| 41 | typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar; | 
| 42 |  | 
| 43 | class UStringImpl : Noncopyable { | 
| 44 | public: | 
| 45 |     template<size_t inlineCapacity> | 
| 46 |     static PassRefPtr<UStringImpl> adopt(Vector<UChar, inlineCapacity>& vector) | 
| 47 |     { | 
| 48 |         if (unsigned length = vector.size()) | 
| 49 |             return adoptRef(p: new UStringImpl(vector.releaseBuffer(), length, BufferOwned)); | 
| 50 |         return &empty(); | 
| 51 |     } | 
| 52 |  | 
| 53 |     static PassRefPtr<UStringImpl> create(const UChar* buffer, int length) | 
| 54 |     { | 
| 55 |         UChar* newBuffer; | 
| 56 |         if (PassRefPtr<UStringImpl> impl = tryCreateUninitialized(length, output&: newBuffer)) { | 
| 57 |             copyChars(destination: newBuffer, source: buffer, numCharacters: length); | 
| 58 |             return impl; | 
| 59 |         } | 
| 60 |         return &null(); | 
| 61 |     } | 
| 62 |  | 
| 63 |     static PassRefPtr<UStringImpl> create(PassRefPtr<UStringImpl> rep, int offset, int length) | 
| 64 |     { | 
| 65 |         ASSERT(rep); | 
| 66 |         rep->checkConsistency(); | 
| 67 |         return adoptRef(p: new UStringImpl(rep->m_data + offset, length, rep->bufferOwnerString())); | 
| 68 |     } | 
| 69 |  | 
| 70 |     static PassRefPtr<UStringImpl> create(PassRefPtr<SharedUChar> sharedBuffer, UChar* buffer, int length) | 
| 71 |     { | 
| 72 |         return adoptRef(p: new UStringImpl(buffer, length, sharedBuffer)); | 
| 73 |     } | 
| 74 |  | 
| 75 |     static PassRefPtr<UStringImpl> createUninitialized(unsigned length, UChar*& output) | 
| 76 |     { | 
| 77 |         if (!length) { | 
| 78 |             output = 0; | 
| 79 |             return &empty(); | 
| 80 |         } | 
| 81 |  | 
| 82 |         if (length > ((std::numeric_limits<size_t>::max() - sizeof(UStringImpl)) / sizeof(UChar))) | 
| 83 |             CRASH(); | 
| 84 |         UStringImpl* resultImpl = static_cast<UStringImpl*>(fastMalloc(sizeof(UChar) * length + sizeof(UStringImpl))); | 
| 85 |         output = reinterpret_cast<UChar*>(resultImpl + 1); | 
| 86 |         return adoptRef(p: new(resultImpl) UStringImpl(output, length, BufferInternal)); | 
| 87 |     } | 
| 88 |  | 
| 89 |     static PassRefPtr<UStringImpl> tryCreateUninitialized(unsigned length, UChar*& output) | 
| 90 |     { | 
| 91 |         if (!length) { | 
| 92 |             output = 0; | 
| 93 |             return &empty(); | 
| 94 |         } | 
| 95 |  | 
| 96 |         if (length > ((std::numeric_limits<size_t>::max() - sizeof(UStringImpl)) / sizeof(UChar))) | 
| 97 |             return 0; | 
| 98 |         UStringImpl* resultImpl; | 
| 99 |         if (!tryFastMalloc(n: sizeof(UChar) * length + sizeof(UStringImpl)).getValue(data&: resultImpl)) | 
| 100 |             return 0; | 
| 101 |         output = reinterpret_cast<UChar*>(resultImpl + 1); | 
| 102 |         return adoptRef(p: new(resultImpl) UStringImpl(output, length, BufferInternal)); | 
| 103 |     } | 
| 104 |  | 
| 105 |     SharedUChar* sharedBuffer(); | 
| 106 |     UChar* data() const { return m_data; } | 
| 107 |     int size() const { return m_length; } | 
| 108 |     size_t cost() | 
| 109 |     { | 
| 110 |         // For substrings, return the cost of the base string. | 
| 111 |         if (bufferOwnership() == BufferSubstring) | 
| 112 |             return m_bufferSubstring->cost(); | 
| 113 |  | 
| 114 |         if (m_refCountAndFlags & s_refCountFlagHasReportedCost) | 
| 115 |             return 0; | 
| 116 |         m_refCountAndFlags |= s_refCountFlagHasReportedCost; | 
| 117 |         return m_length; | 
| 118 |     } | 
| 119 |     unsigned hash() const { if (!m_hash) m_hash = computeHash(s: data(), length: m_length); return m_hash; } | 
| 120 |     unsigned existingHash() const { ASSERT(m_hash); return m_hash; } // fast path for Identifiers | 
| 121 |     void setHash(unsigned hash) { ASSERT(hash == computeHash(data(), m_length)); m_hash = hash; } // fast path for Identifiers | 
| 122 |     bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; } | 
| 123 |     void setIsIdentifier(bool isIdentifier) | 
| 124 |     { | 
| 125 |         if (isIdentifier) | 
| 126 |             m_refCountAndFlags |= s_refCountFlagIsIdentifier; | 
| 127 |         else | 
| 128 |             m_refCountAndFlags &= ~s_refCountFlagIsIdentifier; | 
| 129 |     } | 
| 130 |  | 
| 131 |     UStringImpl* ref() { m_refCountAndFlags += s_refCountIncrement; return this; } | 
| 132 |     ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & s_refCountMask)) delete this; } | 
| 133 |  | 
| 134 |     static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) | 
| 135 |     { | 
| 136 |         if (numCharacters <= s_copyCharsInlineCutOff) { | 
| 137 |             for (unsigned i = 0; i < numCharacters; ++i) | 
| 138 |                 destination[i] = source[i]; | 
| 139 |         } else | 
| 140 |             memcpy(dest: destination, src: source, n: numCharacters * sizeof(UChar)); | 
| 141 |     } | 
| 142 |  | 
| 143 |     static unsigned computeHash(const UChar* s, int length) { ASSERT(length >= 0); return WTF::stringHash(data: s, length); } | 
| 144 |     static unsigned computeHash(const char* s, int length) { ASSERT(length >= 0); return WTF::stringHash(data: s, length); } | 
| 145 |     static unsigned computeHash(const char* s) { return WTF::stringHash(data: s); } | 
| 146 |  | 
| 147 |     static UStringImpl& null() { return *s_null; } | 
| 148 |     static UStringImpl& empty() { return *s_empty; } | 
| 149 |  | 
| 150 |     ALWAYS_INLINE void checkConsistency() const | 
| 151 |     { | 
| 152 |         // There is no recursion of substrings. | 
| 153 |         ASSERT(bufferOwnerString()->bufferOwnership() != BufferSubstring); | 
| 154 |         // Static strings cannot be put in identifier tables, because they are globally shared. | 
| 155 |         ASSERT(!isStatic() || !isIdentifier()); | 
| 156 |     } | 
| 157 |  | 
| 158 | private: | 
| 159 |     enum BufferOwnership { | 
| 160 |         BufferInternal, | 
| 161 |         BufferOwned, | 
| 162 |         BufferSubstring, | 
| 163 |         BufferShared, | 
| 164 |     }; | 
| 165 |  | 
| 166 |     // For SmallStringStorage, which allocates an array and uses an in-place new. | 
| 167 |     UStringImpl() { } | 
| 168 |  | 
| 169 |     // Used to construct normal strings with an internal or external buffer. | 
| 170 |     UStringImpl(UChar* data, int length, BufferOwnership ownership) | 
| 171 |         : m_data(data) | 
| 172 |         , m_buffer(0) | 
| 173 |         , m_length(length) | 
| 174 |         , m_refCountAndFlags(s_refCountIncrement | ownership) | 
| 175 |         , m_hash(0) | 
| 176 |     { | 
| 177 |         ASSERT((ownership == BufferInternal) || (ownership == BufferOwned)); | 
| 178 |         checkConsistency(); | 
| 179 |     } | 
| 180 |  | 
| 181 |     // Used to construct static strings, which have an special refCount that can never hit zero. | 
| 182 |     // This means that the static string will never be destroyed, which is important because | 
| 183 |     // static strings will be shared across threads & ref-counted in a non-threadsafe manner. | 
| 184 |     enum StaticStringConstructType { ConstructStaticString }; | 
| 185 |     UStringImpl(UChar* data, int length, StaticStringConstructType) | 
| 186 |         : m_data(data) | 
| 187 |         , m_buffer(0) | 
| 188 |         , m_length(length) | 
| 189 |         , m_refCountAndFlags(s_refCountFlagStatic | BufferOwned) | 
| 190 |         , m_hash(0) | 
| 191 |     { | 
| 192 |         checkConsistency(); | 
| 193 |     } | 
| 194 |  | 
| 195 |     // Used to create new strings that are a substring of an existing string. | 
| 196 |     UStringImpl(UChar* data, int length, PassRefPtr<UStringImpl> base) | 
| 197 |         : m_data(data) | 
| 198 |         , m_bufferSubstring(base.releaseRef()) | 
| 199 |         , m_length(length) | 
| 200 |         , m_refCountAndFlags(s_refCountIncrement | BufferSubstring) | 
| 201 |         , m_hash(0) | 
| 202 |     { | 
| 203 |         // Do use static strings as a base for substrings; UntypedPtrAndBitfield assumes | 
| 204 |         // that all pointers will be at least 8-byte aligned, we cannot guarantee that of | 
| 205 |         // UStringImpls that are not heap allocated. | 
| 206 |         ASSERT(m_bufferSubstring->size()); | 
| 207 |         ASSERT(!m_bufferSubstring->isStatic()); | 
| 208 |         checkConsistency(); | 
| 209 |     } | 
| 210 |  | 
| 211 |     // Used to construct new strings sharing an existing shared buffer. | 
| 212 |     UStringImpl(UChar* data, int length, PassRefPtr<SharedUChar> sharedBuffer) | 
| 213 |         : m_data(data) | 
| 214 |         , m_bufferShared(sharedBuffer.releaseRef()) | 
| 215 |         , m_length(length) | 
| 216 |         , m_refCountAndFlags(s_refCountIncrement | BufferShared) | 
| 217 |         , m_hash(0) | 
| 218 |     { | 
| 219 |         checkConsistency(); | 
| 220 |     } | 
| 221 |  | 
| 222 | #if OS(SOLARIS) && COMPILER(SUNCC) | 
| 223 | public: // Otherwise the compiler complains about operator new not being accessible. | 
| 224 | #endif | 
| 225 | #if COMPILER(WINSCW) || COMPILER(XLC) | 
| 226 |     void* operator new(size_t size) { return Noncopyable::operator new(size); } | 
| 227 | #else | 
| 228 |     using Noncopyable::operator new; | 
| 229 | #endif | 
| 230 | #if OS(SOLARIS) && COMPILER(SUNCC) | 
| 231 | private: | 
| 232 | #endif | 
| 233 |     void* operator new(size_t, void* p) { return p; } | 
| 234 |  | 
| 235 |     ~UStringImpl(); | 
| 236 |  | 
| 237 |     // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. | 
| 238 |     static const int s_minLengthToShare = 10; | 
| 239 |     static const unsigned s_copyCharsInlineCutOff = 20; | 
| 240 |     // We initialize and increment/decrement the refCount for all normal (non-static) strings by the value 2. | 
| 241 |     // We initialize static strings with an odd number (specifically, 1), such that the refCount cannot reach zero. | 
| 242 |     static const unsigned s_refCountMask = 0xFFFFFFF0; | 
| 243 |     static const int s_refCountIncrement = 0x20; | 
| 244 |     static const int s_refCountFlagStatic = 0x10; | 
| 245 |     static const unsigned s_refCountFlagHasReportedCost = 0x8; | 
| 246 |     static const unsigned s_refCountFlagIsIdentifier = 0x4; | 
| 247 |     static const unsigned s_refCountMaskBufferOwnership = 0x3; | 
| 248 |  | 
| 249 |     UStringImpl* bufferOwnerString() { return (bufferOwnership() == BufferSubstring) ? m_bufferSubstring :  this; } | 
| 250 |     const UStringImpl* bufferOwnerString() const { return (bufferOwnership() == BufferSubstring) ? m_bufferSubstring :  this; } | 
| 251 |     SharedUChar* baseSharedBuffer(); | 
| 252 |     unsigned bufferOwnership() const { return m_refCountAndFlags & s_refCountMaskBufferOwnership; } | 
| 253 |     bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; } | 
| 254 |  | 
| 255 |     // unshared data | 
| 256 |     UChar* m_data; | 
| 257 |     union { | 
| 258 |         void* m_buffer; | 
| 259 |         UStringImpl* m_bufferSubstring; | 
| 260 |         SharedUChar* m_bufferShared; | 
| 261 |     }; | 
| 262 |     int m_length; | 
| 263 |     unsigned m_refCountAndFlags; | 
| 264 |     mutable unsigned m_hash; | 
| 265 |  | 
| 266 |     JS_EXPORTDATA static UStringImpl* s_null; | 
| 267 |     JS_EXPORTDATA static UStringImpl* s_empty; | 
| 268 |  | 
| 269 |     friend class JIT; | 
| 270 |     friend class SmallStringsStorage; | 
| 271 |     friend void initializeUString(); | 
| 272 | }; | 
| 273 |  | 
| 274 | bool equal(const UStringImpl*, const UStringImpl*); | 
| 275 |  | 
| 276 | } | 
| 277 |  | 
| 278 | #endif | 
| 279 |  |