diff options
Diffstat (limited to 'src/dutil/dictutil.cpp')
| -rw-r--r-- | src/dutil/dictutil.cpp | 769 |
1 files changed, 769 insertions, 0 deletions
diff --git a/src/dutil/dictutil.cpp b/src/dutil/dictutil.cpp new file mode 100644 index 00000000..1f0f9e43 --- /dev/null +++ b/src/dutil/dictutil.cpp | |||
| @@ -0,0 +1,769 @@ | |||
| 1 | // Copyright (c) .NET Foundation and contributors. All rights reserved. Licensed under the Microsoft Reciprocal License. See LICENSE.TXT file in the project root for full license information. | ||
| 2 | |||
| 3 | #include "precomp.h" | ||
| 4 | |||
| 5 | // These should all be primes, and spaced reasonably apart (currently each is about 4x the last) | ||
| 6 | const DWORD MAX_BUCKET_SIZES[] = { | ||
| 7 | 503, | ||
| 8 | 2017, | ||
| 9 | 7937, | ||
| 10 | 32779, | ||
| 11 | 131111, | ||
| 12 | 524341, | ||
| 13 | 2097709, | ||
| 14 | 8390857, | ||
| 15 | 33563437, | ||
| 16 | 134253719, | ||
| 17 | 537014927, | ||
| 18 | 2148059509 | ||
| 19 | }; | ||
| 20 | |||
| 21 | // However many items are in the cab, let's keep the buckets at least 8 times that to avoid collisions | ||
| 22 | #define MAX_BUCKETS_TO_ITEMS_RATIO 8 | ||
| 23 | |||
| 24 | enum DICT_TYPE | ||
| 25 | { | ||
| 26 | DICT_INVALID = 0, | ||
| 27 | DICT_EMBEDDED_KEY = 1, | ||
| 28 | DICT_STRING_LIST = 2 | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct STRINGDICT_STRUCT | ||
| 32 | { | ||
| 33 | DICT_TYPE dtType; | ||
| 34 | |||
| 35 | // Optional flags to control the behavior of the dictionary. | ||
| 36 | DICT_FLAG dfFlags; | ||
| 37 | |||
| 38 | // Index into MAX_BUCKET_SIZES (array of primes), representing number of buckets we've allocated | ||
| 39 | DWORD dwBucketSizeIndex; | ||
| 40 | |||
| 41 | // Number of items currently stored in the dict buckets | ||
| 42 | DWORD dwNumItems; | ||
| 43 | |||
| 44 | // Byte offset of key within bucket value, for collision checking - see | ||
| 45 | // comments above DictCreateEmbeddedKey() implementation for further details | ||
| 46 | size_t cByteOffset; | ||
| 47 | |||
| 48 | // The actual stored buckets | ||
| 49 | void **ppvBuckets; | ||
| 50 | |||
| 51 | // The actual stored items in the order they were added (used for auto freeing or enumerating) | ||
| 52 | void **ppvItemList; | ||
| 53 | |||
| 54 | // Pointer to the array of items, so the caller is free to resize the array of values out from under us without harm | ||
| 55 | void **ppvValueArray; | ||
| 56 | }; | ||
| 57 | |||
| 58 | const int STRINGDICT_HANDLE_BYTES = sizeof(STRINGDICT_STRUCT); | ||
| 59 | |||
| 60 | static HRESULT StringHash( | ||
| 61 | __in const STRINGDICT_STRUCT *psd, | ||
| 62 | __in DWORD dwNumBuckets, | ||
| 63 | __in_z LPCWSTR pszString, | ||
| 64 | __out LPDWORD pdwHash | ||
| 65 | ); | ||
| 66 | static BOOL IsMatchExact( | ||
| 67 | __in const STRINGDICT_STRUCT *psd, | ||
| 68 | __in DWORD dwMatchIndex, | ||
| 69 | __in_z LPCWSTR wzOriginalString | ||
| 70 | ); | ||
| 71 | static HRESULT GetValue( | ||
| 72 | __in const STRINGDICT_STRUCT *psd, | ||
| 73 | __in_z LPCWSTR pszString, | ||
| 74 | __out_opt void **ppvValue | ||
| 75 | ); | ||
| 76 | static HRESULT GetInsertIndex( | ||
| 77 | __in const STRINGDICT_STRUCT *psd, | ||
| 78 | __in DWORD dwBucketCount, | ||
| 79 | __in void **ppvBuckets, | ||
| 80 | __in_z LPCWSTR pszString, | ||
| 81 | __out DWORD *pdwOutput | ||
| 82 | ); | ||
| 83 | static HRESULT GetIndex( | ||
| 84 | __in const STRINGDICT_STRUCT *psd, | ||
| 85 | __in_z LPCWSTR pszString, | ||
| 86 | __out DWORD *pdwOutput | ||
| 87 | ); | ||
| 88 | static LPCWSTR GetKey( | ||
| 89 | __in const STRINGDICT_STRUCT *psd, | ||
| 90 | __in void *pvValue | ||
| 91 | ); | ||
| 92 | static HRESULT GrowDictionary( | ||
| 93 | __inout STRINGDICT_STRUCT *psd | ||
| 94 | ); | ||
| 95 | // These 2 helper functions allow us to safely handle dictutil consumers resizing | ||
| 96 | // the value array by storing "offsets" instead of raw void *'s in our buckets. | ||
| 97 | static void * TranslateOffsetToValue( | ||
| 98 | __in const STRINGDICT_STRUCT *psd, | ||
| 99 | __in void *pvValue | ||
| 100 | ); | ||
| 101 | static void * TranslateValueToOffset( | ||
| 102 | __in const STRINGDICT_STRUCT *psd, | ||
| 103 | __in void *pvValue | ||
| 104 | ); | ||
| 105 | |||
| 106 | // The dict will store a set of keys (as wide-char strings) and a set of values associated with those keys (as void *'s). | ||
| 107 | // However, to support collision checking, the key needs to be represented in the "value" object (pointed to | ||
| 108 | // by the void *). The "stByteOffset" parameter tells this dict the byte offset of the "key" string pointer | ||
| 109 | // within the "value" object. Use the offsetof() macro to fill this out. | ||
| 110 | // The "ppvArray" parameter gives dictutil the address of your value array. If you provide this parameter, | ||
| 111 | // dictutil will remember all pointer values provided as "offsets" against this array. It is only necessary to provide | ||
| 112 | // this parameter to dictutil if it is possible you will realloc the array. | ||
| 113 | // | ||
| 114 | // Use DictAddValue() and DictGetValue() with this dictionary type. | ||
| 115 | extern "C" HRESULT DAPI DictCreateWithEmbeddedKey( | ||
| 116 | __out_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE* psdHandle, | ||
| 117 | __in DWORD dwNumExpectedItems, | ||
| 118 | __in_opt void **ppvArray, | ||
| 119 | __in size_t cByteOffset, | ||
| 120 | __in DICT_FLAG dfFlags | ||
| 121 | ) | ||
| 122 | { | ||
| 123 | HRESULT hr = S_OK; | ||
| 124 | |||
| 125 | ExitOnNull(psdHandle, hr, E_INVALIDARG, "Handle not specified while creating dict"); | ||
| 126 | |||
| 127 | // Allocate the handle | ||
| 128 | *psdHandle = static_cast<STRINGDICT_HANDLE>(MemAlloc(sizeof(STRINGDICT_STRUCT), FALSE)); | ||
| 129 | ExitOnNull(*psdHandle, hr, E_OUTOFMEMORY, "Failed to allocate dictionary object"); | ||
| 130 | |||
| 131 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(*psdHandle); | ||
| 132 | |||
| 133 | // Fill out the new handle's values | ||
| 134 | psd->dtType = DICT_EMBEDDED_KEY; | ||
| 135 | psd->dfFlags = dfFlags; | ||
| 136 | psd->cByteOffset = cByteOffset; | ||
| 137 | psd->dwBucketSizeIndex = 0; | ||
| 138 | psd->dwNumItems = 0; | ||
| 139 | psd->ppvItemList = NULL; | ||
| 140 | psd->ppvValueArray = ppvArray; | ||
| 141 | |||
| 142 | // Make psd->dwBucketSizeIndex point to the appropriate spot in the prime | ||
| 143 | // array based on expected number of items and items to buckets ratio | ||
| 144 | // Careful: the "-1" in "countof(MAX_BUCKET_SIZES)-1" ensures we don't end | ||
| 145 | // this loop past the end of the array! | ||
| 146 | while (psd->dwBucketSizeIndex < (countof(MAX_BUCKET_SIZES)-1) && | ||
| 147 | MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] < dwNumExpectedItems * MAX_BUCKETS_TO_ITEMS_RATIO) | ||
| 148 | { | ||
| 149 | ++psd->dwBucketSizeIndex; | ||
| 150 | } | ||
| 151 | |||
| 152 | // Finally, allocate our initial buckets | ||
| 153 | psd->ppvBuckets = static_cast<void**>(MemAlloc(sizeof(void *) * MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], TRUE)); | ||
| 154 | ExitOnNull(psd->ppvBuckets, hr, E_OUTOFMEMORY, "Failed to allocate buckets for dictionary"); | ||
| 155 | |||
| 156 | LExit: | ||
| 157 | return hr; | ||
| 158 | } | ||
| 159 | |||
| 160 | // The dict will store a set of keys, with no values associated with them. Use DictAddKey() and DictKeyExists() with this dictionary type. | ||
| 161 | extern "C" HRESULT DAPI DictCreateStringList( | ||
| 162 | __out_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE* psdHandle, | ||
| 163 | __in DWORD dwNumExpectedItems, | ||
| 164 | __in DICT_FLAG dfFlags | ||
| 165 | ) | ||
| 166 | { | ||
| 167 | HRESULT hr = S_OK; | ||
| 168 | |||
| 169 | ExitOnNull(psdHandle, hr, E_INVALIDARG, "Handle not specified while creating dict"); | ||
| 170 | |||
| 171 | // Allocate the handle | ||
| 172 | *psdHandle = static_cast<STRINGDICT_HANDLE>(MemAlloc(sizeof(STRINGDICT_STRUCT), FALSE)); | ||
| 173 | ExitOnNull(*psdHandle, hr, E_OUTOFMEMORY, "Failed to allocate dictionary object"); | ||
| 174 | |||
| 175 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(*psdHandle); | ||
| 176 | |||
| 177 | // Fill out the new handle's values | ||
| 178 | psd->dtType = DICT_STRING_LIST; | ||
| 179 | psd->dfFlags = dfFlags; | ||
| 180 | psd->cByteOffset = 0; | ||
| 181 | psd->dwBucketSizeIndex = 0; | ||
| 182 | psd->dwNumItems = 0; | ||
| 183 | psd->ppvItemList = NULL; | ||
| 184 | psd->ppvValueArray = NULL; | ||
| 185 | |||
| 186 | // Make psd->dwBucketSizeIndex point to the appropriate spot in the prime | ||
| 187 | // array based on expected number of items and items to buckets ratio | ||
| 188 | // Careful: the "-1" in "countof(MAX_BUCKET_SIZES)-1" ensures we don't end | ||
| 189 | // this loop past the end of the array! | ||
| 190 | while (psd->dwBucketSizeIndex < (countof(MAX_BUCKET_SIZES)-1) && | ||
| 191 | MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] < dwNumExpectedItems * MAX_BUCKETS_TO_ITEMS_RATIO) | ||
| 192 | { | ||
| 193 | ++psd->dwBucketSizeIndex; | ||
| 194 | } | ||
| 195 | |||
| 196 | // Finally, allocate our initial buckets | ||
| 197 | psd->ppvBuckets = static_cast<void**>(MemAlloc(sizeof(void *) * MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], TRUE)); | ||
| 198 | ExitOnNull(psd->ppvBuckets, hr, E_OUTOFMEMORY, "Failed to allocate buckets for dictionary"); | ||
| 199 | |||
| 200 | LExit: | ||
| 201 | return hr; | ||
| 202 | } | ||
| 203 | |||
| 204 | extern "C" HRESULT DAPI DictCreateStringListFromArray( | ||
| 205 | __out_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE* psdHandle, | ||
| 206 | __in_ecount(cStringArray) const LPCWSTR* rgwzStringArray, | ||
| 207 | __in const DWORD cStringArray, | ||
| 208 | __in DICT_FLAG dfFlags | ||
| 209 | ) | ||
| 210 | { | ||
| 211 | HRESULT hr = S_OK; | ||
| 212 | STRINGDICT_HANDLE sd = NULL; | ||
| 213 | |||
| 214 | hr = DictCreateStringList(&sd, cStringArray, dfFlags); | ||
| 215 | ExitOnFailure(hr, "Failed to create the string dictionary."); | ||
| 216 | |||
| 217 | for (DWORD i = 0; i < cStringArray; ++i) | ||
| 218 | { | ||
| 219 | const LPCWSTR wzKey = rgwzStringArray[i]; | ||
| 220 | |||
| 221 | hr = DictKeyExists(sd, wzKey); | ||
| 222 | if (E_NOTFOUND != hr) | ||
| 223 | { | ||
| 224 | ExitOnFailure(hr, "Failed to check the string dictionary."); | ||
| 225 | } | ||
| 226 | else | ||
| 227 | { | ||
| 228 | hr = DictAddKey(sd, wzKey); | ||
| 229 | ExitOnFailure(hr, "Failed to add \"%ls\" to the string dictionary.", wzKey); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | *psdHandle = sd; | ||
| 234 | sd = NULL; | ||
| 235 | |||
| 236 | LExit: | ||
| 237 | ReleaseDict(sd); | ||
| 238 | |||
| 239 | return hr; | ||
| 240 | } | ||
| 241 | |||
| 242 | extern "C" HRESULT DAPI DictCompareStringListToArray( | ||
| 243 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdStringList, | ||
| 244 | __in_ecount(cStringArray) const LPCWSTR* rgwzStringArray, | ||
| 245 | __in const DWORD cStringArray | ||
| 246 | ) | ||
| 247 | { | ||
| 248 | HRESULT hr = S_OK; | ||
| 249 | |||
| 250 | for (DWORD i = 0; i < cStringArray; ++i) | ||
| 251 | { | ||
| 252 | hr = DictKeyExists(sdStringList, rgwzStringArray[i]); | ||
| 253 | if (E_NOTFOUND != hr) | ||
| 254 | { | ||
| 255 | ExitOnFailure(hr, "Failed to check the string dictionary."); | ||
| 256 | ExitFunction1(hr = S_OK); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | ExitFunction1(hr = HRESULT_FROM_WIN32(ERROR_NO_MATCH)); | ||
| 261 | |||
| 262 | LExit: | ||
| 263 | return hr; | ||
| 264 | } | ||
| 265 | |||
| 266 | // Todo: Dict should resize itself when (number of items) exceeds (number of buckets / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
| 267 | extern "C" HRESULT DAPI DictAddKey( | ||
| 268 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdHandle, | ||
| 269 | __in_z LPCWSTR pszString | ||
| 270 | ) | ||
| 271 | { | ||
| 272 | HRESULT hr = S_OK; | ||
| 273 | DWORD dwIndex = 0; | ||
| 274 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(sdHandle); | ||
| 275 | |||
| 276 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while adding value to dict"); | ||
| 277 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while adding value to dict"); | ||
| 278 | |||
| 279 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
| 280 | { | ||
| 281 | hr = E_INVALIDARG; | ||
| 282 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
| 283 | } | ||
| 284 | |||
| 285 | if (DICT_STRING_LIST != psd->dtType) | ||
| 286 | { | ||
| 287 | hr = E_INVALIDARG; | ||
| 288 | ExitOnFailure(hr, "Tried to add key without value to wrong dictionary type! This dictionary type is: %d", psd->dtType); | ||
| 289 | } | ||
| 290 | |||
| 291 | if ((psd->dwNumItems + 1) >= MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
| 292 | { | ||
| 293 | hr = GrowDictionary(psd); | ||
| 294 | if (HRESULT_FROM_WIN32(ERROR_DATABASE_FULL) == hr) | ||
| 295 | { | ||
| 296 | // If we fail to proactively grow the dictionary, don't fail unless the dictionary is completely full | ||
| 297 | if (psd->dwNumItems < MAX_BUCKET_SIZES[psd->dwBucketSizeIndex]) | ||
| 298 | { | ||
| 299 | hr = S_OK; | ||
| 300 | } | ||
| 301 | } | ||
| 302 | ExitOnFailure(hr, "Failed to grow dictionary"); | ||
| 303 | } | ||
| 304 | |||
| 305 | hr = GetInsertIndex(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], psd->ppvBuckets, pszString, &dwIndex); | ||
| 306 | ExitOnFailure(hr, "Failed to get index to insert into"); | ||
| 307 | |||
| 308 | hr = MemEnsureArraySize(reinterpret_cast<void **>(&(psd->ppvItemList)), psd->dwNumItems + 1, sizeof(void *), 1000); | ||
| 309 | ExitOnFailure(hr, "Failed to resize list of items in dictionary"); | ||
| 310 | ++psd->dwNumItems; | ||
| 311 | |||
| 312 | hr = StrAllocString(reinterpret_cast<LPWSTR *>(&(psd->ppvBuckets[dwIndex])), pszString, 0); | ||
| 313 | ExitOnFailure(hr, "Failed to allocate copy of string"); | ||
| 314 | |||
| 315 | psd->ppvItemList[psd->dwNumItems-1] = psd->ppvBuckets[dwIndex]; | ||
| 316 | |||
| 317 | LExit: | ||
| 318 | return hr; | ||
| 319 | } | ||
| 320 | |||
| 321 | // Todo: Dict should resize itself when (number of items) exceeds (number of buckets / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
| 322 | extern "C" HRESULT DAPI DictAddValue( | ||
| 323 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdHandle, | ||
| 324 | __in void *pvValue | ||
| 325 | ) | ||
| 326 | { | ||
| 327 | HRESULT hr = S_OK; | ||
| 328 | void *pvOffset = NULL; | ||
| 329 | LPCWSTR wzKey = NULL; | ||
| 330 | DWORD dwIndex = 0; | ||
| 331 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(sdHandle); | ||
| 332 | |||
| 333 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while adding value to dict"); | ||
| 334 | ExitOnNull(pvValue, hr, E_INVALIDARG, "Value not specified while adding value to dict"); | ||
| 335 | |||
| 336 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
| 337 | { | ||
| 338 | hr = E_INVALIDARG; | ||
| 339 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
| 340 | } | ||
| 341 | |||
| 342 | if (DICT_EMBEDDED_KEY != psd->dtType) | ||
| 343 | { | ||
| 344 | hr = E_INVALIDARG; | ||
| 345 | ExitOnFailure(hr, "Tried to add key/value pair to wrong dictionary type! This dictionary type is: %d", psd->dtType); | ||
| 346 | } | ||
| 347 | |||
| 348 | wzKey = GetKey(psd, pvValue); | ||
| 349 | ExitOnNull(wzKey, hr, E_INVALIDARG, "String not specified while adding value to dict"); | ||
| 350 | |||
| 351 | if ((psd->dwNumItems + 1) >= MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
| 352 | { | ||
| 353 | hr = GrowDictionary(psd); | ||
| 354 | if (HRESULT_FROM_WIN32(ERROR_DATABASE_FULL) == hr && psd->dwNumItems + 1 ) | ||
| 355 | { | ||
| 356 | // If we fail to proactively grow the dictionary, don't fail unless the dictionary is completely full | ||
| 357 | if (psd->dwNumItems < MAX_BUCKET_SIZES[psd->dwBucketSizeIndex]) | ||
| 358 | { | ||
| 359 | hr = S_OK; | ||
| 360 | } | ||
| 361 | } | ||
| 362 | ExitOnFailure(hr, "Failed to grow dictionary"); | ||
| 363 | } | ||
| 364 | |||
| 365 | hr = GetInsertIndex(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], psd->ppvBuckets, wzKey, &dwIndex); | ||
| 366 | ExitOnFailure(hr, "Failed to get index to insert into"); | ||
| 367 | |||
| 368 | hr = MemEnsureArraySize(reinterpret_cast<void **>(&(psd->ppvItemList)), psd->dwNumItems + 1, sizeof(void *), 1000); | ||
| 369 | ExitOnFailure(hr, "Failed to resize list of items in dictionary"); | ||
| 370 | ++psd->dwNumItems; | ||
| 371 | |||
| 372 | pvOffset = TranslateValueToOffset(psd, pvValue); | ||
| 373 | psd->ppvBuckets[dwIndex] = pvOffset; | ||
| 374 | psd->ppvItemList[psd->dwNumItems-1] = pvOffset; | ||
| 375 | |||
| 376 | LExit: | ||
| 377 | return hr; | ||
| 378 | } | ||
| 379 | |||
| 380 | extern "C" HRESULT DAPI DictGetValue( | ||
| 381 | __in_bcount(STRINGDICT_HANDLE_BYTES) C_STRINGDICT_HANDLE sdHandle, | ||
| 382 | __in_z LPCWSTR pszString, | ||
| 383 | __out void **ppvValue | ||
| 384 | ) | ||
| 385 | { | ||
| 386 | HRESULT hr = S_OK; | ||
| 387 | |||
| 388 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while searching dict"); | ||
| 389 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while searching dict"); | ||
| 390 | |||
| 391 | const STRINGDICT_STRUCT *psd = static_cast<const STRINGDICT_STRUCT *>(sdHandle); | ||
| 392 | |||
| 393 | if (DICT_EMBEDDED_KEY != psd->dtType) | ||
| 394 | { | ||
| 395 | hr = E_INVALIDARG; | ||
| 396 | ExitOnFailure(hr, "Tried to lookup value in wrong dictionary type! This dictionary type is: %d", psd->dtType); | ||
| 397 | } | ||
| 398 | |||
| 399 | hr = GetValue(psd, pszString, ppvValue); | ||
| 400 | if (E_NOTFOUND == hr) | ||
| 401 | { | ||
| 402 | ExitFunction(); | ||
| 403 | } | ||
| 404 | ExitOnFailure(hr, "Failed to call internal GetValue()"); | ||
| 405 | |||
| 406 | LExit: | ||
| 407 | return hr; | ||
| 408 | } | ||
| 409 | |||
| 410 | extern "C" HRESULT DAPI DictKeyExists( | ||
| 411 | __in_bcount(STRINGDICT_HANDLE_BYTES) C_STRINGDICT_HANDLE sdHandle, | ||
| 412 | __in_z LPCWSTR pszString | ||
| 413 | ) | ||
| 414 | { | ||
| 415 | HRESULT hr = S_OK; | ||
| 416 | |||
| 417 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while searching dict"); | ||
| 418 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while searching dict"); | ||
| 419 | |||
| 420 | const STRINGDICT_STRUCT *psd = static_cast<const STRINGDICT_STRUCT *>(sdHandle); | ||
| 421 | |||
| 422 | // This works with either type of dictionary | ||
| 423 | hr = GetValue(psd, pszString, NULL); | ||
| 424 | if (E_NOTFOUND == hr) | ||
| 425 | { | ||
| 426 | ExitFunction(); | ||
| 427 | } | ||
| 428 | ExitOnFailure(hr, "Failed to call internal GetValue()"); | ||
| 429 | |||
| 430 | LExit: | ||
| 431 | return hr; | ||
| 432 | } | ||
| 433 | |||
| 434 | extern "C" void DAPI DictDestroy( | ||
| 435 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdHandle | ||
| 436 | ) | ||
| 437 | { | ||
| 438 | DWORD i; | ||
| 439 | |||
| 440 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(sdHandle); | ||
| 441 | |||
| 442 | if (DICT_STRING_LIST == psd->dtType) | ||
| 443 | { | ||
| 444 | for (i = 0; i < psd->dwNumItems; ++i) | ||
| 445 | { | ||
| 446 | ReleaseStr(reinterpret_cast<LPWSTR>(psd->ppvItemList[i])); | ||
| 447 | } | ||
| 448 | } | ||
| 449 | |||
| 450 | ReleaseMem(psd->ppvItemList); | ||
| 451 | ReleaseMem(psd->ppvBuckets); | ||
| 452 | ReleaseMem(psd); | ||
| 453 | } | ||
| 454 | |||
| 455 | static HRESULT StringHash( | ||
| 456 | __in const STRINGDICT_STRUCT *psd, | ||
| 457 | __in DWORD dwNumBuckets, | ||
| 458 | __in_z LPCWSTR pszString, | ||
| 459 | __out DWORD *pdwHash | ||
| 460 | ) | ||
| 461 | { | ||
| 462 | HRESULT hr = S_OK; | ||
| 463 | LPCWSTR wzKey = NULL; | ||
| 464 | LPWSTR sczNewKey = NULL; | ||
| 465 | DWORD result = 0; | ||
| 466 | |||
| 467 | if (DICT_FLAG_CASEINSENSITIVE & psd->dfFlags) | ||
| 468 | { | ||
| 469 | hr = StrAllocStringToUpperInvariant(&sczNewKey, pszString, 0); | ||
| 470 | ExitOnFailure(hr, "Failed to convert the string to upper-case."); | ||
| 471 | |||
| 472 | wzKey = sczNewKey; | ||
| 473 | } | ||
| 474 | else | ||
| 475 | { | ||
| 476 | wzKey = pszString; | ||
| 477 | } | ||
| 478 | |||
| 479 | while (*wzKey) | ||
| 480 | { | ||
| 481 | result = ~(*wzKey++ * 509) + result * 65599; | ||
| 482 | } | ||
| 483 | |||
| 484 | *pdwHash = result % dwNumBuckets; | ||
| 485 | |||
| 486 | LExit: | ||
| 487 | ReleaseStr(sczNewKey); | ||
| 488 | |||
| 489 | return hr; | ||
| 490 | } | ||
| 491 | |||
| 492 | static BOOL IsMatchExact( | ||
| 493 | __in const STRINGDICT_STRUCT *psd, | ||
| 494 | __in DWORD dwMatchIndex, | ||
| 495 | __in_z LPCWSTR wzOriginalString | ||
| 496 | ) | ||
| 497 | { | ||
| 498 | LPCWSTR wzMatchString = GetKey(psd, TranslateOffsetToValue(psd, psd->ppvBuckets[dwMatchIndex])); | ||
| 499 | DWORD dwFlags = 0; | ||
| 500 | |||
| 501 | if (DICT_FLAG_CASEINSENSITIVE & psd->dfFlags) | ||
| 502 | { | ||
| 503 | dwFlags |= NORM_IGNORECASE; | ||
| 504 | } | ||
| 505 | |||
| 506 | if (CSTR_EQUAL == ::CompareStringW(LOCALE_INVARIANT, dwFlags, wzOriginalString, -1, wzMatchString, -1)) | ||
| 507 | { | ||
| 508 | return TRUE; | ||
| 509 | } | ||
| 510 | |||
| 511 | return FALSE; | ||
| 512 | } | ||
| 513 | |||
| 514 | static HRESULT GetValue( | ||
| 515 | __in const STRINGDICT_STRUCT *psd, | ||
| 516 | __in_z LPCWSTR pszString, | ||
| 517 | __out_opt void **ppvValue | ||
| 518 | ) | ||
| 519 | { | ||
| 520 | HRESULT hr = S_OK; | ||
| 521 | DWORD dwOriginalIndexCandidate = 0; | ||
| 522 | void *pvCandidateValue = NULL; | ||
| 523 | DWORD dwIndex = 0; | ||
| 524 | |||
| 525 | ExitOnNull(psd, hr, E_INVALIDARG, "Handle not specified while searching dict"); | ||
| 526 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while searching dict"); | ||
| 527 | |||
| 528 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
| 529 | { | ||
| 530 | hr = E_INVALIDARG; | ||
| 531 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
| 532 | } | ||
| 533 | |||
| 534 | hr = StringHash(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], pszString, &dwOriginalIndexCandidate); | ||
| 535 | ExitOnFailure(hr, "Failed to hash the string."); | ||
| 536 | |||
| 537 | DWORD dwIndexCandidate = dwOriginalIndexCandidate; | ||
| 538 | |||
| 539 | pvCandidateValue = TranslateOffsetToValue(psd, psd->ppvBuckets[dwIndexCandidate]); | ||
| 540 | |||
| 541 | // If no match exists in the dict | ||
| 542 | if (NULL == pvCandidateValue) | ||
| 543 | { | ||
| 544 | if (NULL != ppvValue) | ||
| 545 | { | ||
| 546 | *ppvValue = NULL; | ||
| 547 | } | ||
| 548 | ExitFunction1(hr = E_NOTFOUND); | ||
| 549 | } | ||
| 550 | |||
| 551 | hr = GetIndex(psd, pszString, &dwIndex); | ||
| 552 | if (E_NOTFOUND == hr) | ||
| 553 | { | ||
| 554 | ExitFunction(); | ||
| 555 | } | ||
| 556 | ExitOnFailure(hr, "Failed to find index to get"); | ||
| 557 | |||
| 558 | if (NULL != ppvValue) | ||
| 559 | { | ||
| 560 | *ppvValue = TranslateOffsetToValue(psd, psd->ppvBuckets[dwIndex]); | ||
| 561 | } | ||
| 562 | |||
| 563 | LExit: | ||
| 564 | if (FAILED(hr) && NULL != ppvValue) | ||
| 565 | { | ||
| 566 | *ppvValue = NULL; | ||
| 567 | } | ||
| 568 | |||
| 569 | return hr; | ||
| 570 | } | ||
| 571 | |||
| 572 | static HRESULT GetInsertIndex( | ||
| 573 | __in const STRINGDICT_STRUCT *psd, | ||
| 574 | __in DWORD dwBucketCount, | ||
| 575 | __in void **ppvBuckets, | ||
| 576 | __in_z LPCWSTR pszString, | ||
| 577 | __out DWORD *pdwOutput | ||
| 578 | ) | ||
| 579 | { | ||
| 580 | HRESULT hr = S_OK; | ||
| 581 | DWORD dwOriginalIndexCandidate = 0; | ||
| 582 | |||
| 583 | hr = StringHash(psd, dwBucketCount, pszString, &dwOriginalIndexCandidate); | ||
| 584 | ExitOnFailure(hr, "Failed to hash the string."); | ||
| 585 | |||
| 586 | DWORD dwIndexCandidate = dwOriginalIndexCandidate; | ||
| 587 | |||
| 588 | // If we collide, keep iterating forward from our intended position, even wrapping around to zero, until we find an empty bucket | ||
| 589 | #pragma prefast(push) | ||
| 590 | #pragma prefast(disable:26007) | ||
| 591 | while (NULL != ppvBuckets[dwIndexCandidate]) | ||
| 592 | #pragma prefast(pop) | ||
| 593 | { | ||
| 594 | ++dwIndexCandidate; | ||
| 595 | |||
| 596 | // If we got to the end of the array, wrap around to zero index | ||
| 597 | if (dwIndexCandidate >= dwBucketCount) | ||
| 598 | { | ||
| 599 | dwIndexCandidate = 0; | ||
| 600 | } | ||
| 601 | |||
| 602 | // If we wrapped all the way back around to our original index, the dict is full - throw an error | ||
| 603 | if (dwIndexCandidate == dwOriginalIndexCandidate) | ||
| 604 | { | ||
| 605 | // The dict table is full - this error seems to be a reasonably close match | ||
| 606 | hr = HRESULT_FROM_WIN32(ERROR_DATABASE_FULL); | ||
| 607 | ExitOnRootFailure(hr, "Failed to add item '%ls' to dict table because dict table is full of items", pszString); | ||
| 608 | } | ||
| 609 | } | ||
| 610 | |||
| 611 | *pdwOutput = dwIndexCandidate; | ||
| 612 | |||
| 613 | LExit: | ||
| 614 | return hr; | ||
| 615 | } | ||
| 616 | |||
| 617 | static HRESULT GetIndex( | ||
| 618 | __in const STRINGDICT_STRUCT *psd, | ||
| 619 | __in_z LPCWSTR pszString, | ||
| 620 | __out DWORD *pdwOutput | ||
| 621 | ) | ||
| 622 | { | ||
| 623 | HRESULT hr = S_OK; | ||
| 624 | DWORD dwOriginalIndexCandidate = 0; | ||
| 625 | |||
| 626 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
| 627 | { | ||
| 628 | hr = E_INVALIDARG; | ||
| 629 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
| 630 | } | ||
| 631 | |||
| 632 | hr = StringHash(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], pszString, &dwOriginalIndexCandidate); | ||
| 633 | ExitOnFailure(hr, "Failed to hash the string."); | ||
| 634 | |||
| 635 | DWORD dwIndexCandidate = dwOriginalIndexCandidate; | ||
| 636 | |||
| 637 | while (!IsMatchExact(psd, dwIndexCandidate, pszString)) | ||
| 638 | { | ||
| 639 | ++dwIndexCandidate; | ||
| 640 | |||
| 641 | // If we got to the end of the array, wrap around to zero index | ||
| 642 | if (dwIndexCandidate >= MAX_BUCKET_SIZES[psd->dwBucketSizeIndex]) | ||
| 643 | { | ||
| 644 | dwIndexCandidate = 0; | ||
| 645 | } | ||
| 646 | |||
| 647 | // If no match exists in the dict | ||
| 648 | if (NULL == psd->ppvBuckets[dwIndexCandidate]) | ||
| 649 | { | ||
| 650 | ExitFunction1(hr = E_NOTFOUND); | ||
| 651 | } | ||
| 652 | |||
| 653 | // If we wrapped all the way back around to our original index, the dict is full and we found nothing, so return as such | ||
| 654 | if (dwIndexCandidate == dwOriginalIndexCandidate) | ||
| 655 | { | ||
| 656 | ExitFunction1(hr = E_NOTFOUND); | ||
| 657 | } | ||
| 658 | } | ||
| 659 | |||
| 660 | *pdwOutput = dwIndexCandidate; | ||
| 661 | |||
| 662 | LExit: | ||
| 663 | return hr; | ||
| 664 | } | ||
| 665 | |||
| 666 | static LPCWSTR GetKey( | ||
| 667 | __in const STRINGDICT_STRUCT *psd, | ||
| 668 | __in void *pvValue | ||
| 669 | ) | ||
| 670 | { | ||
| 671 | const BYTE *lpByte = reinterpret_cast<BYTE *>(pvValue); | ||
| 672 | |||
| 673 | if (DICT_EMBEDDED_KEY == psd->dtType) | ||
| 674 | { | ||
| 675 | void *pvKey = reinterpret_cast<void *>(reinterpret_cast<BYTE *>(pvValue) + psd->cByteOffset); | ||
| 676 | |||
| 677 | #pragma prefast(push) | ||
| 678 | #pragma prefast(disable:26010) | ||
| 679 | return *(reinterpret_cast<LPCWSTR *>(pvKey)); | ||
| 680 | #pragma prefast(pop) | ||
| 681 | } | ||
| 682 | else | ||
| 683 | { | ||
| 684 | return (reinterpret_cast<LPCWSTR>(lpByte)); | ||
| 685 | } | ||
| 686 | } | ||
| 687 | |||
| 688 | static HRESULT GrowDictionary( | ||
| 689 | __inout STRINGDICT_STRUCT *psd | ||
| 690 | ) | ||
| 691 | { | ||
| 692 | HRESULT hr = S_OK; | ||
| 693 | DWORD dwInsertIndex = 0; | ||
| 694 | LPCWSTR wzKey = NULL; | ||
| 695 | DWORD dwNewBucketSizeIndex = 0; | ||
| 696 | size_t cbAllocSize = 0; | ||
| 697 | void **ppvNewBuckets = NULL; | ||
| 698 | |||
| 699 | dwNewBucketSizeIndex = psd->dwBucketSizeIndex + 1; | ||
| 700 | |||
| 701 | if (dwNewBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
| 702 | { | ||
| 703 | ExitFunction1(hr = HRESULT_FROM_WIN32(ERROR_DATABASE_FULL)); | ||
| 704 | } | ||
| 705 | |||
| 706 | hr = ::SizeTMult(sizeof(void *), MAX_BUCKET_SIZES[dwNewBucketSizeIndex], &cbAllocSize); | ||
| 707 | ExitOnFailure(hr, "Overflow while calculating allocation size to grow dictionary"); | ||
| 708 | |||
| 709 | ppvNewBuckets = static_cast<void**>(MemAlloc(cbAllocSize, TRUE)); | ||
| 710 | ExitOnNull(ppvNewBuckets, hr, E_OUTOFMEMORY, "Failed to allocate %u buckets while growing dictionary", MAX_BUCKET_SIZES[dwNewBucketSizeIndex]); | ||
| 711 | |||
| 712 | for (DWORD i = 0; i < psd->dwNumItems; ++i) | ||
| 713 | { | ||
| 714 | wzKey = GetKey(psd, TranslateOffsetToValue(psd, psd->ppvItemList[i])); | ||
| 715 | ExitOnNull(wzKey, hr, E_INVALIDARG, "String not specified in existing dict value"); | ||
| 716 | |||
| 717 | hr = GetInsertIndex(psd, MAX_BUCKET_SIZES[dwNewBucketSizeIndex], ppvNewBuckets, wzKey, &dwInsertIndex); | ||
| 718 | ExitOnFailure(hr, "Failed to get index to insert into"); | ||
| 719 | |||
| 720 | ppvNewBuckets[dwInsertIndex] = psd->ppvItemList[i]; | ||
| 721 | } | ||
| 722 | |||
| 723 | psd->dwBucketSizeIndex = dwNewBucketSizeIndex; | ||
| 724 | ReleaseMem(psd->ppvBuckets); | ||
| 725 | psd->ppvBuckets = ppvNewBuckets; | ||
| 726 | ppvNewBuckets = NULL; | ||
| 727 | |||
| 728 | LExit: | ||
| 729 | ReleaseMem(ppvNewBuckets); | ||
| 730 | |||
| 731 | return hr; | ||
| 732 | } | ||
| 733 | |||
| 734 | static void * TranslateOffsetToValue( | ||
| 735 | __in const STRINGDICT_STRUCT *psd, | ||
| 736 | __in void *pvValue | ||
| 737 | ) | ||
| 738 | { | ||
| 739 | if (NULL == pvValue) | ||
| 740 | { | ||
| 741 | return NULL; | ||
| 742 | } | ||
| 743 | |||
| 744 | // All offsets are stored as (real offset + 1), so subtract 1 to get back to the real value | ||
| 745 | if (NULL != psd->ppvValueArray) | ||
| 746 | { | ||
| 747 | return reinterpret_cast<void *>(reinterpret_cast<DWORD_PTR>(pvValue) + reinterpret_cast<DWORD_PTR>(*psd->ppvValueArray) - 1); | ||
| 748 | } | ||
| 749 | else | ||
| 750 | { | ||
| 751 | return pvValue; | ||
| 752 | } | ||
| 753 | } | ||
| 754 | |||
| 755 | static void * TranslateValueToOffset( | ||
| 756 | __in const STRINGDICT_STRUCT *psd, | ||
| 757 | __in void *pvValue | ||
| 758 | ) | ||
| 759 | { | ||
| 760 | if (NULL != psd->ppvValueArray) | ||
| 761 | { | ||
| 762 | // 0 has a special meaning - we don't want offset 0 into the array to have NULL for the offset - so add 1 to avoid this issue | ||
| 763 | return reinterpret_cast<void *>(reinterpret_cast<DWORD_PTR>(pvValue) - reinterpret_cast<DWORD_PTR>(*psd->ppvValueArray) + 1); | ||
| 764 | } | ||
| 765 | else | ||
| 766 | { | ||
| 767 | return pvValue; | ||
| 768 | } | ||
| 769 | } | ||
