diff options
Diffstat (limited to 'src/dutil/dictutil.cpp')
-rw-r--r-- | src/dutil/dictutil.cpp | 769 |
1 files changed, 769 insertions, 0 deletions
diff --git a/src/dutil/dictutil.cpp b/src/dutil/dictutil.cpp new file mode 100644 index 00000000..1f0f9e43 --- /dev/null +++ b/src/dutil/dictutil.cpp | |||
@@ -0,0 +1,769 @@ | |||
1 | // Copyright (c) .NET Foundation and contributors. All rights reserved. Licensed under the Microsoft Reciprocal License. See LICENSE.TXT file in the project root for full license information. | ||
2 | |||
3 | #include "precomp.h" | ||
4 | |||
5 | // These should all be primes, and spaced reasonably apart (currently each is about 4x the last) | ||
6 | const DWORD MAX_BUCKET_SIZES[] = { | ||
7 | 503, | ||
8 | 2017, | ||
9 | 7937, | ||
10 | 32779, | ||
11 | 131111, | ||
12 | 524341, | ||
13 | 2097709, | ||
14 | 8390857, | ||
15 | 33563437, | ||
16 | 134253719, | ||
17 | 537014927, | ||
18 | 2148059509 | ||
19 | }; | ||
20 | |||
21 | // However many items are in the cab, let's keep the buckets at least 8 times that to avoid collisions | ||
22 | #define MAX_BUCKETS_TO_ITEMS_RATIO 8 | ||
23 | |||
24 | enum DICT_TYPE | ||
25 | { | ||
26 | DICT_INVALID = 0, | ||
27 | DICT_EMBEDDED_KEY = 1, | ||
28 | DICT_STRING_LIST = 2 | ||
29 | }; | ||
30 | |||
31 | struct STRINGDICT_STRUCT | ||
32 | { | ||
33 | DICT_TYPE dtType; | ||
34 | |||
35 | // Optional flags to control the behavior of the dictionary. | ||
36 | DICT_FLAG dfFlags; | ||
37 | |||
38 | // Index into MAX_BUCKET_SIZES (array of primes), representing number of buckets we've allocated | ||
39 | DWORD dwBucketSizeIndex; | ||
40 | |||
41 | // Number of items currently stored in the dict buckets | ||
42 | DWORD dwNumItems; | ||
43 | |||
44 | // Byte offset of key within bucket value, for collision checking - see | ||
45 | // comments above DictCreateEmbeddedKey() implementation for further details | ||
46 | size_t cByteOffset; | ||
47 | |||
48 | // The actual stored buckets | ||
49 | void **ppvBuckets; | ||
50 | |||
51 | // The actual stored items in the order they were added (used for auto freeing or enumerating) | ||
52 | void **ppvItemList; | ||
53 | |||
54 | // Pointer to the array of items, so the caller is free to resize the array of values out from under us without harm | ||
55 | void **ppvValueArray; | ||
56 | }; | ||
57 | |||
58 | const int STRINGDICT_HANDLE_BYTES = sizeof(STRINGDICT_STRUCT); | ||
59 | |||
60 | static HRESULT StringHash( | ||
61 | __in const STRINGDICT_STRUCT *psd, | ||
62 | __in DWORD dwNumBuckets, | ||
63 | __in_z LPCWSTR pszString, | ||
64 | __out LPDWORD pdwHash | ||
65 | ); | ||
66 | static BOOL IsMatchExact( | ||
67 | __in const STRINGDICT_STRUCT *psd, | ||
68 | __in DWORD dwMatchIndex, | ||
69 | __in_z LPCWSTR wzOriginalString | ||
70 | ); | ||
71 | static HRESULT GetValue( | ||
72 | __in const STRINGDICT_STRUCT *psd, | ||
73 | __in_z LPCWSTR pszString, | ||
74 | __out_opt void **ppvValue | ||
75 | ); | ||
76 | static HRESULT GetInsertIndex( | ||
77 | __in const STRINGDICT_STRUCT *psd, | ||
78 | __in DWORD dwBucketCount, | ||
79 | __in void **ppvBuckets, | ||
80 | __in_z LPCWSTR pszString, | ||
81 | __out DWORD *pdwOutput | ||
82 | ); | ||
83 | static HRESULT GetIndex( | ||
84 | __in const STRINGDICT_STRUCT *psd, | ||
85 | __in_z LPCWSTR pszString, | ||
86 | __out DWORD *pdwOutput | ||
87 | ); | ||
88 | static LPCWSTR GetKey( | ||
89 | __in const STRINGDICT_STRUCT *psd, | ||
90 | __in void *pvValue | ||
91 | ); | ||
92 | static HRESULT GrowDictionary( | ||
93 | __inout STRINGDICT_STRUCT *psd | ||
94 | ); | ||
95 | // These 2 helper functions allow us to safely handle dictutil consumers resizing | ||
96 | // the value array by storing "offsets" instead of raw void *'s in our buckets. | ||
97 | static void * TranslateOffsetToValue( | ||
98 | __in const STRINGDICT_STRUCT *psd, | ||
99 | __in void *pvValue | ||
100 | ); | ||
101 | static void * TranslateValueToOffset( | ||
102 | __in const STRINGDICT_STRUCT *psd, | ||
103 | __in void *pvValue | ||
104 | ); | ||
105 | |||
106 | // The dict will store a set of keys (as wide-char strings) and a set of values associated with those keys (as void *'s). | ||
107 | // However, to support collision checking, the key needs to be represented in the "value" object (pointed to | ||
108 | // by the void *). The "stByteOffset" parameter tells this dict the byte offset of the "key" string pointer | ||
109 | // within the "value" object. Use the offsetof() macro to fill this out. | ||
110 | // The "ppvArray" parameter gives dictutil the address of your value array. If you provide this parameter, | ||
111 | // dictutil will remember all pointer values provided as "offsets" against this array. It is only necessary to provide | ||
112 | // this parameter to dictutil if it is possible you will realloc the array. | ||
113 | // | ||
114 | // Use DictAddValue() and DictGetValue() with this dictionary type. | ||
115 | extern "C" HRESULT DAPI DictCreateWithEmbeddedKey( | ||
116 | __out_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE* psdHandle, | ||
117 | __in DWORD dwNumExpectedItems, | ||
118 | __in_opt void **ppvArray, | ||
119 | __in size_t cByteOffset, | ||
120 | __in DICT_FLAG dfFlags | ||
121 | ) | ||
122 | { | ||
123 | HRESULT hr = S_OK; | ||
124 | |||
125 | ExitOnNull(psdHandle, hr, E_INVALIDARG, "Handle not specified while creating dict"); | ||
126 | |||
127 | // Allocate the handle | ||
128 | *psdHandle = static_cast<STRINGDICT_HANDLE>(MemAlloc(sizeof(STRINGDICT_STRUCT), FALSE)); | ||
129 | ExitOnNull(*psdHandle, hr, E_OUTOFMEMORY, "Failed to allocate dictionary object"); | ||
130 | |||
131 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(*psdHandle); | ||
132 | |||
133 | // Fill out the new handle's values | ||
134 | psd->dtType = DICT_EMBEDDED_KEY; | ||
135 | psd->dfFlags = dfFlags; | ||
136 | psd->cByteOffset = cByteOffset; | ||
137 | psd->dwBucketSizeIndex = 0; | ||
138 | psd->dwNumItems = 0; | ||
139 | psd->ppvItemList = NULL; | ||
140 | psd->ppvValueArray = ppvArray; | ||
141 | |||
142 | // Make psd->dwBucketSizeIndex point to the appropriate spot in the prime | ||
143 | // array based on expected number of items and items to buckets ratio | ||
144 | // Careful: the "-1" in "countof(MAX_BUCKET_SIZES)-1" ensures we don't end | ||
145 | // this loop past the end of the array! | ||
146 | while (psd->dwBucketSizeIndex < (countof(MAX_BUCKET_SIZES)-1) && | ||
147 | MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] < dwNumExpectedItems * MAX_BUCKETS_TO_ITEMS_RATIO) | ||
148 | { | ||
149 | ++psd->dwBucketSizeIndex; | ||
150 | } | ||
151 | |||
152 | // Finally, allocate our initial buckets | ||
153 | psd->ppvBuckets = static_cast<void**>(MemAlloc(sizeof(void *) * MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], TRUE)); | ||
154 | ExitOnNull(psd->ppvBuckets, hr, E_OUTOFMEMORY, "Failed to allocate buckets for dictionary"); | ||
155 | |||
156 | LExit: | ||
157 | return hr; | ||
158 | } | ||
159 | |||
160 | // The dict will store a set of keys, with no values associated with them. Use DictAddKey() and DictKeyExists() with this dictionary type. | ||
161 | extern "C" HRESULT DAPI DictCreateStringList( | ||
162 | __out_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE* psdHandle, | ||
163 | __in DWORD dwNumExpectedItems, | ||
164 | __in DICT_FLAG dfFlags | ||
165 | ) | ||
166 | { | ||
167 | HRESULT hr = S_OK; | ||
168 | |||
169 | ExitOnNull(psdHandle, hr, E_INVALIDARG, "Handle not specified while creating dict"); | ||
170 | |||
171 | // Allocate the handle | ||
172 | *psdHandle = static_cast<STRINGDICT_HANDLE>(MemAlloc(sizeof(STRINGDICT_STRUCT), FALSE)); | ||
173 | ExitOnNull(*psdHandle, hr, E_OUTOFMEMORY, "Failed to allocate dictionary object"); | ||
174 | |||
175 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(*psdHandle); | ||
176 | |||
177 | // Fill out the new handle's values | ||
178 | psd->dtType = DICT_STRING_LIST; | ||
179 | psd->dfFlags = dfFlags; | ||
180 | psd->cByteOffset = 0; | ||
181 | psd->dwBucketSizeIndex = 0; | ||
182 | psd->dwNumItems = 0; | ||
183 | psd->ppvItemList = NULL; | ||
184 | psd->ppvValueArray = NULL; | ||
185 | |||
186 | // Make psd->dwBucketSizeIndex point to the appropriate spot in the prime | ||
187 | // array based on expected number of items and items to buckets ratio | ||
188 | // Careful: the "-1" in "countof(MAX_BUCKET_SIZES)-1" ensures we don't end | ||
189 | // this loop past the end of the array! | ||
190 | while (psd->dwBucketSizeIndex < (countof(MAX_BUCKET_SIZES)-1) && | ||
191 | MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] < dwNumExpectedItems * MAX_BUCKETS_TO_ITEMS_RATIO) | ||
192 | { | ||
193 | ++psd->dwBucketSizeIndex; | ||
194 | } | ||
195 | |||
196 | // Finally, allocate our initial buckets | ||
197 | psd->ppvBuckets = static_cast<void**>(MemAlloc(sizeof(void *) * MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], TRUE)); | ||
198 | ExitOnNull(psd->ppvBuckets, hr, E_OUTOFMEMORY, "Failed to allocate buckets for dictionary"); | ||
199 | |||
200 | LExit: | ||
201 | return hr; | ||
202 | } | ||
203 | |||
204 | extern "C" HRESULT DAPI DictCreateStringListFromArray( | ||
205 | __out_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE* psdHandle, | ||
206 | __in_ecount(cStringArray) const LPCWSTR* rgwzStringArray, | ||
207 | __in const DWORD cStringArray, | ||
208 | __in DICT_FLAG dfFlags | ||
209 | ) | ||
210 | { | ||
211 | HRESULT hr = S_OK; | ||
212 | STRINGDICT_HANDLE sd = NULL; | ||
213 | |||
214 | hr = DictCreateStringList(&sd, cStringArray, dfFlags); | ||
215 | ExitOnFailure(hr, "Failed to create the string dictionary."); | ||
216 | |||
217 | for (DWORD i = 0; i < cStringArray; ++i) | ||
218 | { | ||
219 | const LPCWSTR wzKey = rgwzStringArray[i]; | ||
220 | |||
221 | hr = DictKeyExists(sd, wzKey); | ||
222 | if (E_NOTFOUND != hr) | ||
223 | { | ||
224 | ExitOnFailure(hr, "Failed to check the string dictionary."); | ||
225 | } | ||
226 | else | ||
227 | { | ||
228 | hr = DictAddKey(sd, wzKey); | ||
229 | ExitOnFailure(hr, "Failed to add \"%ls\" to the string dictionary.", wzKey); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | *psdHandle = sd; | ||
234 | sd = NULL; | ||
235 | |||
236 | LExit: | ||
237 | ReleaseDict(sd); | ||
238 | |||
239 | return hr; | ||
240 | } | ||
241 | |||
242 | extern "C" HRESULT DAPI DictCompareStringListToArray( | ||
243 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdStringList, | ||
244 | __in_ecount(cStringArray) const LPCWSTR* rgwzStringArray, | ||
245 | __in const DWORD cStringArray | ||
246 | ) | ||
247 | { | ||
248 | HRESULT hr = S_OK; | ||
249 | |||
250 | for (DWORD i = 0; i < cStringArray; ++i) | ||
251 | { | ||
252 | hr = DictKeyExists(sdStringList, rgwzStringArray[i]); | ||
253 | if (E_NOTFOUND != hr) | ||
254 | { | ||
255 | ExitOnFailure(hr, "Failed to check the string dictionary."); | ||
256 | ExitFunction1(hr = S_OK); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | ExitFunction1(hr = HRESULT_FROM_WIN32(ERROR_NO_MATCH)); | ||
261 | |||
262 | LExit: | ||
263 | return hr; | ||
264 | } | ||
265 | |||
266 | // Todo: Dict should resize itself when (number of items) exceeds (number of buckets / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
267 | extern "C" HRESULT DAPI DictAddKey( | ||
268 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdHandle, | ||
269 | __in_z LPCWSTR pszString | ||
270 | ) | ||
271 | { | ||
272 | HRESULT hr = S_OK; | ||
273 | DWORD dwIndex = 0; | ||
274 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(sdHandle); | ||
275 | |||
276 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while adding value to dict"); | ||
277 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while adding value to dict"); | ||
278 | |||
279 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
280 | { | ||
281 | hr = E_INVALIDARG; | ||
282 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
283 | } | ||
284 | |||
285 | if (DICT_STRING_LIST != psd->dtType) | ||
286 | { | ||
287 | hr = E_INVALIDARG; | ||
288 | ExitOnFailure(hr, "Tried to add key without value to wrong dictionary type! This dictionary type is: %d", psd->dtType); | ||
289 | } | ||
290 | |||
291 | if ((psd->dwNumItems + 1) >= MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
292 | { | ||
293 | hr = GrowDictionary(psd); | ||
294 | if (HRESULT_FROM_WIN32(ERROR_DATABASE_FULL) == hr) | ||
295 | { | ||
296 | // If we fail to proactively grow the dictionary, don't fail unless the dictionary is completely full | ||
297 | if (psd->dwNumItems < MAX_BUCKET_SIZES[psd->dwBucketSizeIndex]) | ||
298 | { | ||
299 | hr = S_OK; | ||
300 | } | ||
301 | } | ||
302 | ExitOnFailure(hr, "Failed to grow dictionary"); | ||
303 | } | ||
304 | |||
305 | hr = GetInsertIndex(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], psd->ppvBuckets, pszString, &dwIndex); | ||
306 | ExitOnFailure(hr, "Failed to get index to insert into"); | ||
307 | |||
308 | hr = MemEnsureArraySize(reinterpret_cast<void **>(&(psd->ppvItemList)), psd->dwNumItems + 1, sizeof(void *), 1000); | ||
309 | ExitOnFailure(hr, "Failed to resize list of items in dictionary"); | ||
310 | ++psd->dwNumItems; | ||
311 | |||
312 | hr = StrAllocString(reinterpret_cast<LPWSTR *>(&(psd->ppvBuckets[dwIndex])), pszString, 0); | ||
313 | ExitOnFailure(hr, "Failed to allocate copy of string"); | ||
314 | |||
315 | psd->ppvItemList[psd->dwNumItems-1] = psd->ppvBuckets[dwIndex]; | ||
316 | |||
317 | LExit: | ||
318 | return hr; | ||
319 | } | ||
320 | |||
321 | // Todo: Dict should resize itself when (number of items) exceeds (number of buckets / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
322 | extern "C" HRESULT DAPI DictAddValue( | ||
323 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdHandle, | ||
324 | __in void *pvValue | ||
325 | ) | ||
326 | { | ||
327 | HRESULT hr = S_OK; | ||
328 | void *pvOffset = NULL; | ||
329 | LPCWSTR wzKey = NULL; | ||
330 | DWORD dwIndex = 0; | ||
331 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(sdHandle); | ||
332 | |||
333 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while adding value to dict"); | ||
334 | ExitOnNull(pvValue, hr, E_INVALIDARG, "Value not specified while adding value to dict"); | ||
335 | |||
336 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
337 | { | ||
338 | hr = E_INVALIDARG; | ||
339 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
340 | } | ||
341 | |||
342 | if (DICT_EMBEDDED_KEY != psd->dtType) | ||
343 | { | ||
344 | hr = E_INVALIDARG; | ||
345 | ExitOnFailure(hr, "Tried to add key/value pair to wrong dictionary type! This dictionary type is: %d", psd->dtType); | ||
346 | } | ||
347 | |||
348 | wzKey = GetKey(psd, pvValue); | ||
349 | ExitOnNull(wzKey, hr, E_INVALIDARG, "String not specified while adding value to dict"); | ||
350 | |||
351 | if ((psd->dwNumItems + 1) >= MAX_BUCKET_SIZES[psd->dwBucketSizeIndex] / MAX_BUCKETS_TO_ITEMS_RATIO) | ||
352 | { | ||
353 | hr = GrowDictionary(psd); | ||
354 | if (HRESULT_FROM_WIN32(ERROR_DATABASE_FULL) == hr && psd->dwNumItems + 1 ) | ||
355 | { | ||
356 | // If we fail to proactively grow the dictionary, don't fail unless the dictionary is completely full | ||
357 | if (psd->dwNumItems < MAX_BUCKET_SIZES[psd->dwBucketSizeIndex]) | ||
358 | { | ||
359 | hr = S_OK; | ||
360 | } | ||
361 | } | ||
362 | ExitOnFailure(hr, "Failed to grow dictionary"); | ||
363 | } | ||
364 | |||
365 | hr = GetInsertIndex(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], psd->ppvBuckets, wzKey, &dwIndex); | ||
366 | ExitOnFailure(hr, "Failed to get index to insert into"); | ||
367 | |||
368 | hr = MemEnsureArraySize(reinterpret_cast<void **>(&(psd->ppvItemList)), psd->dwNumItems + 1, sizeof(void *), 1000); | ||
369 | ExitOnFailure(hr, "Failed to resize list of items in dictionary"); | ||
370 | ++psd->dwNumItems; | ||
371 | |||
372 | pvOffset = TranslateValueToOffset(psd, pvValue); | ||
373 | psd->ppvBuckets[dwIndex] = pvOffset; | ||
374 | psd->ppvItemList[psd->dwNumItems-1] = pvOffset; | ||
375 | |||
376 | LExit: | ||
377 | return hr; | ||
378 | } | ||
379 | |||
380 | extern "C" HRESULT DAPI DictGetValue( | ||
381 | __in_bcount(STRINGDICT_HANDLE_BYTES) C_STRINGDICT_HANDLE sdHandle, | ||
382 | __in_z LPCWSTR pszString, | ||
383 | __out void **ppvValue | ||
384 | ) | ||
385 | { | ||
386 | HRESULT hr = S_OK; | ||
387 | |||
388 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while searching dict"); | ||
389 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while searching dict"); | ||
390 | |||
391 | const STRINGDICT_STRUCT *psd = static_cast<const STRINGDICT_STRUCT *>(sdHandle); | ||
392 | |||
393 | if (DICT_EMBEDDED_KEY != psd->dtType) | ||
394 | { | ||
395 | hr = E_INVALIDARG; | ||
396 | ExitOnFailure(hr, "Tried to lookup value in wrong dictionary type! This dictionary type is: %d", psd->dtType); | ||
397 | } | ||
398 | |||
399 | hr = GetValue(psd, pszString, ppvValue); | ||
400 | if (E_NOTFOUND == hr) | ||
401 | { | ||
402 | ExitFunction(); | ||
403 | } | ||
404 | ExitOnFailure(hr, "Failed to call internal GetValue()"); | ||
405 | |||
406 | LExit: | ||
407 | return hr; | ||
408 | } | ||
409 | |||
410 | extern "C" HRESULT DAPI DictKeyExists( | ||
411 | __in_bcount(STRINGDICT_HANDLE_BYTES) C_STRINGDICT_HANDLE sdHandle, | ||
412 | __in_z LPCWSTR pszString | ||
413 | ) | ||
414 | { | ||
415 | HRESULT hr = S_OK; | ||
416 | |||
417 | ExitOnNull(sdHandle, hr, E_INVALIDARG, "Handle not specified while searching dict"); | ||
418 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while searching dict"); | ||
419 | |||
420 | const STRINGDICT_STRUCT *psd = static_cast<const STRINGDICT_STRUCT *>(sdHandle); | ||
421 | |||
422 | // This works with either type of dictionary | ||
423 | hr = GetValue(psd, pszString, NULL); | ||
424 | if (E_NOTFOUND == hr) | ||
425 | { | ||
426 | ExitFunction(); | ||
427 | } | ||
428 | ExitOnFailure(hr, "Failed to call internal GetValue()"); | ||
429 | |||
430 | LExit: | ||
431 | return hr; | ||
432 | } | ||
433 | |||
434 | extern "C" void DAPI DictDestroy( | ||
435 | __in_bcount(STRINGDICT_HANDLE_BYTES) STRINGDICT_HANDLE sdHandle | ||
436 | ) | ||
437 | { | ||
438 | DWORD i; | ||
439 | |||
440 | STRINGDICT_STRUCT *psd = static_cast<STRINGDICT_STRUCT *>(sdHandle); | ||
441 | |||
442 | if (DICT_STRING_LIST == psd->dtType) | ||
443 | { | ||
444 | for (i = 0; i < psd->dwNumItems; ++i) | ||
445 | { | ||
446 | ReleaseStr(reinterpret_cast<LPWSTR>(psd->ppvItemList[i])); | ||
447 | } | ||
448 | } | ||
449 | |||
450 | ReleaseMem(psd->ppvItemList); | ||
451 | ReleaseMem(psd->ppvBuckets); | ||
452 | ReleaseMem(psd); | ||
453 | } | ||
454 | |||
455 | static HRESULT StringHash( | ||
456 | __in const STRINGDICT_STRUCT *psd, | ||
457 | __in DWORD dwNumBuckets, | ||
458 | __in_z LPCWSTR pszString, | ||
459 | __out DWORD *pdwHash | ||
460 | ) | ||
461 | { | ||
462 | HRESULT hr = S_OK; | ||
463 | LPCWSTR wzKey = NULL; | ||
464 | LPWSTR sczNewKey = NULL; | ||
465 | DWORD result = 0; | ||
466 | |||
467 | if (DICT_FLAG_CASEINSENSITIVE & psd->dfFlags) | ||
468 | { | ||
469 | hr = StrAllocStringToUpperInvariant(&sczNewKey, pszString, 0); | ||
470 | ExitOnFailure(hr, "Failed to convert the string to upper-case."); | ||
471 | |||
472 | wzKey = sczNewKey; | ||
473 | } | ||
474 | else | ||
475 | { | ||
476 | wzKey = pszString; | ||
477 | } | ||
478 | |||
479 | while (*wzKey) | ||
480 | { | ||
481 | result = ~(*wzKey++ * 509) + result * 65599; | ||
482 | } | ||
483 | |||
484 | *pdwHash = result % dwNumBuckets; | ||
485 | |||
486 | LExit: | ||
487 | ReleaseStr(sczNewKey); | ||
488 | |||
489 | return hr; | ||
490 | } | ||
491 | |||
492 | static BOOL IsMatchExact( | ||
493 | __in const STRINGDICT_STRUCT *psd, | ||
494 | __in DWORD dwMatchIndex, | ||
495 | __in_z LPCWSTR wzOriginalString | ||
496 | ) | ||
497 | { | ||
498 | LPCWSTR wzMatchString = GetKey(psd, TranslateOffsetToValue(psd, psd->ppvBuckets[dwMatchIndex])); | ||
499 | DWORD dwFlags = 0; | ||
500 | |||
501 | if (DICT_FLAG_CASEINSENSITIVE & psd->dfFlags) | ||
502 | { | ||
503 | dwFlags |= NORM_IGNORECASE; | ||
504 | } | ||
505 | |||
506 | if (CSTR_EQUAL == ::CompareStringW(LOCALE_INVARIANT, dwFlags, wzOriginalString, -1, wzMatchString, -1)) | ||
507 | { | ||
508 | return TRUE; | ||
509 | } | ||
510 | |||
511 | return FALSE; | ||
512 | } | ||
513 | |||
514 | static HRESULT GetValue( | ||
515 | __in const STRINGDICT_STRUCT *psd, | ||
516 | __in_z LPCWSTR pszString, | ||
517 | __out_opt void **ppvValue | ||
518 | ) | ||
519 | { | ||
520 | HRESULT hr = S_OK; | ||
521 | DWORD dwOriginalIndexCandidate = 0; | ||
522 | void *pvCandidateValue = NULL; | ||
523 | DWORD dwIndex = 0; | ||
524 | |||
525 | ExitOnNull(psd, hr, E_INVALIDARG, "Handle not specified while searching dict"); | ||
526 | ExitOnNull(pszString, hr, E_INVALIDARG, "String not specified while searching dict"); | ||
527 | |||
528 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
529 | { | ||
530 | hr = E_INVALIDARG; | ||
531 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
532 | } | ||
533 | |||
534 | hr = StringHash(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], pszString, &dwOriginalIndexCandidate); | ||
535 | ExitOnFailure(hr, "Failed to hash the string."); | ||
536 | |||
537 | DWORD dwIndexCandidate = dwOriginalIndexCandidate; | ||
538 | |||
539 | pvCandidateValue = TranslateOffsetToValue(psd, psd->ppvBuckets[dwIndexCandidate]); | ||
540 | |||
541 | // If no match exists in the dict | ||
542 | if (NULL == pvCandidateValue) | ||
543 | { | ||
544 | if (NULL != ppvValue) | ||
545 | { | ||
546 | *ppvValue = NULL; | ||
547 | } | ||
548 | ExitFunction1(hr = E_NOTFOUND); | ||
549 | } | ||
550 | |||
551 | hr = GetIndex(psd, pszString, &dwIndex); | ||
552 | if (E_NOTFOUND == hr) | ||
553 | { | ||
554 | ExitFunction(); | ||
555 | } | ||
556 | ExitOnFailure(hr, "Failed to find index to get"); | ||
557 | |||
558 | if (NULL != ppvValue) | ||
559 | { | ||
560 | *ppvValue = TranslateOffsetToValue(psd, psd->ppvBuckets[dwIndex]); | ||
561 | } | ||
562 | |||
563 | LExit: | ||
564 | if (FAILED(hr) && NULL != ppvValue) | ||
565 | { | ||
566 | *ppvValue = NULL; | ||
567 | } | ||
568 | |||
569 | return hr; | ||
570 | } | ||
571 | |||
572 | static HRESULT GetInsertIndex( | ||
573 | __in const STRINGDICT_STRUCT *psd, | ||
574 | __in DWORD dwBucketCount, | ||
575 | __in void **ppvBuckets, | ||
576 | __in_z LPCWSTR pszString, | ||
577 | __out DWORD *pdwOutput | ||
578 | ) | ||
579 | { | ||
580 | HRESULT hr = S_OK; | ||
581 | DWORD dwOriginalIndexCandidate = 0; | ||
582 | |||
583 | hr = StringHash(psd, dwBucketCount, pszString, &dwOriginalIndexCandidate); | ||
584 | ExitOnFailure(hr, "Failed to hash the string."); | ||
585 | |||
586 | DWORD dwIndexCandidate = dwOriginalIndexCandidate; | ||
587 | |||
588 | // If we collide, keep iterating forward from our intended position, even wrapping around to zero, until we find an empty bucket | ||
589 | #pragma prefast(push) | ||
590 | #pragma prefast(disable:26007) | ||
591 | while (NULL != ppvBuckets[dwIndexCandidate]) | ||
592 | #pragma prefast(pop) | ||
593 | { | ||
594 | ++dwIndexCandidate; | ||
595 | |||
596 | // If we got to the end of the array, wrap around to zero index | ||
597 | if (dwIndexCandidate >= dwBucketCount) | ||
598 | { | ||
599 | dwIndexCandidate = 0; | ||
600 | } | ||
601 | |||
602 | // If we wrapped all the way back around to our original index, the dict is full - throw an error | ||
603 | if (dwIndexCandidate == dwOriginalIndexCandidate) | ||
604 | { | ||
605 | // The dict table is full - this error seems to be a reasonably close match | ||
606 | hr = HRESULT_FROM_WIN32(ERROR_DATABASE_FULL); | ||
607 | ExitOnRootFailure(hr, "Failed to add item '%ls' to dict table because dict table is full of items", pszString); | ||
608 | } | ||
609 | } | ||
610 | |||
611 | *pdwOutput = dwIndexCandidate; | ||
612 | |||
613 | LExit: | ||
614 | return hr; | ||
615 | } | ||
616 | |||
617 | static HRESULT GetIndex( | ||
618 | __in const STRINGDICT_STRUCT *psd, | ||
619 | __in_z LPCWSTR pszString, | ||
620 | __out DWORD *pdwOutput | ||
621 | ) | ||
622 | { | ||
623 | HRESULT hr = S_OK; | ||
624 | DWORD dwOriginalIndexCandidate = 0; | ||
625 | |||
626 | if (psd->dwBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
627 | { | ||
628 | hr = E_INVALIDARG; | ||
629 | ExitOnFailure(hr, "Invalid dictionary - bucket size index is out of range"); | ||
630 | } | ||
631 | |||
632 | hr = StringHash(psd, MAX_BUCKET_SIZES[psd->dwBucketSizeIndex], pszString, &dwOriginalIndexCandidate); | ||
633 | ExitOnFailure(hr, "Failed to hash the string."); | ||
634 | |||
635 | DWORD dwIndexCandidate = dwOriginalIndexCandidate; | ||
636 | |||
637 | while (!IsMatchExact(psd, dwIndexCandidate, pszString)) | ||
638 | { | ||
639 | ++dwIndexCandidate; | ||
640 | |||
641 | // If we got to the end of the array, wrap around to zero index | ||
642 | if (dwIndexCandidate >= MAX_BUCKET_SIZES[psd->dwBucketSizeIndex]) | ||
643 | { | ||
644 | dwIndexCandidate = 0; | ||
645 | } | ||
646 | |||
647 | // If no match exists in the dict | ||
648 | if (NULL == psd->ppvBuckets[dwIndexCandidate]) | ||
649 | { | ||
650 | ExitFunction1(hr = E_NOTFOUND); | ||
651 | } | ||
652 | |||
653 | // If we wrapped all the way back around to our original index, the dict is full and we found nothing, so return as such | ||
654 | if (dwIndexCandidate == dwOriginalIndexCandidate) | ||
655 | { | ||
656 | ExitFunction1(hr = E_NOTFOUND); | ||
657 | } | ||
658 | } | ||
659 | |||
660 | *pdwOutput = dwIndexCandidate; | ||
661 | |||
662 | LExit: | ||
663 | return hr; | ||
664 | } | ||
665 | |||
666 | static LPCWSTR GetKey( | ||
667 | __in const STRINGDICT_STRUCT *psd, | ||
668 | __in void *pvValue | ||
669 | ) | ||
670 | { | ||
671 | const BYTE *lpByte = reinterpret_cast<BYTE *>(pvValue); | ||
672 | |||
673 | if (DICT_EMBEDDED_KEY == psd->dtType) | ||
674 | { | ||
675 | void *pvKey = reinterpret_cast<void *>(reinterpret_cast<BYTE *>(pvValue) + psd->cByteOffset); | ||
676 | |||
677 | #pragma prefast(push) | ||
678 | #pragma prefast(disable:26010) | ||
679 | return *(reinterpret_cast<LPCWSTR *>(pvKey)); | ||
680 | #pragma prefast(pop) | ||
681 | } | ||
682 | else | ||
683 | { | ||
684 | return (reinterpret_cast<LPCWSTR>(lpByte)); | ||
685 | } | ||
686 | } | ||
687 | |||
688 | static HRESULT GrowDictionary( | ||
689 | __inout STRINGDICT_STRUCT *psd | ||
690 | ) | ||
691 | { | ||
692 | HRESULT hr = S_OK; | ||
693 | DWORD dwInsertIndex = 0; | ||
694 | LPCWSTR wzKey = NULL; | ||
695 | DWORD dwNewBucketSizeIndex = 0; | ||
696 | size_t cbAllocSize = 0; | ||
697 | void **ppvNewBuckets = NULL; | ||
698 | |||
699 | dwNewBucketSizeIndex = psd->dwBucketSizeIndex + 1; | ||
700 | |||
701 | if (dwNewBucketSizeIndex >= countof(MAX_BUCKET_SIZES)) | ||
702 | { | ||
703 | ExitFunction1(hr = HRESULT_FROM_WIN32(ERROR_DATABASE_FULL)); | ||
704 | } | ||
705 | |||
706 | hr = ::SizeTMult(sizeof(void *), MAX_BUCKET_SIZES[dwNewBucketSizeIndex], &cbAllocSize); | ||
707 | ExitOnFailure(hr, "Overflow while calculating allocation size to grow dictionary"); | ||
708 | |||
709 | ppvNewBuckets = static_cast<void**>(MemAlloc(cbAllocSize, TRUE)); | ||
710 | ExitOnNull(ppvNewBuckets, hr, E_OUTOFMEMORY, "Failed to allocate %u buckets while growing dictionary", MAX_BUCKET_SIZES[dwNewBucketSizeIndex]); | ||
711 | |||
712 | for (DWORD i = 0; i < psd->dwNumItems; ++i) | ||
713 | { | ||
714 | wzKey = GetKey(psd, TranslateOffsetToValue(psd, psd->ppvItemList[i])); | ||
715 | ExitOnNull(wzKey, hr, E_INVALIDARG, "String not specified in existing dict value"); | ||
716 | |||
717 | hr = GetInsertIndex(psd, MAX_BUCKET_SIZES[dwNewBucketSizeIndex], ppvNewBuckets, wzKey, &dwInsertIndex); | ||
718 | ExitOnFailure(hr, "Failed to get index to insert into"); | ||
719 | |||
720 | ppvNewBuckets[dwInsertIndex] = psd->ppvItemList[i]; | ||
721 | } | ||
722 | |||
723 | psd->dwBucketSizeIndex = dwNewBucketSizeIndex; | ||
724 | ReleaseMem(psd->ppvBuckets); | ||
725 | psd->ppvBuckets = ppvNewBuckets; | ||
726 | ppvNewBuckets = NULL; | ||
727 | |||
728 | LExit: | ||
729 | ReleaseMem(ppvNewBuckets); | ||
730 | |||
731 | return hr; | ||
732 | } | ||
733 | |||
734 | static void * TranslateOffsetToValue( | ||
735 | __in const STRINGDICT_STRUCT *psd, | ||
736 | __in void *pvValue | ||
737 | ) | ||
738 | { | ||
739 | if (NULL == pvValue) | ||
740 | { | ||
741 | return NULL; | ||
742 | } | ||
743 | |||
744 | // All offsets are stored as (real offset + 1), so subtract 1 to get back to the real value | ||
745 | if (NULL != psd->ppvValueArray) | ||
746 | { | ||
747 | return reinterpret_cast<void *>(reinterpret_cast<DWORD_PTR>(pvValue) + reinterpret_cast<DWORD_PTR>(*psd->ppvValueArray) - 1); | ||
748 | } | ||
749 | else | ||
750 | { | ||
751 | return pvValue; | ||
752 | } | ||
753 | } | ||
754 | |||
755 | static void * TranslateValueToOffset( | ||
756 | __in const STRINGDICT_STRUCT *psd, | ||
757 | __in void *pvValue | ||
758 | ) | ||
759 | { | ||
760 | if (NULL != psd->ppvValueArray) | ||
761 | { | ||
762 | // 0 has a special meaning - we don't want offset 0 into the array to have NULL for the offset - so add 1 to avoid this issue | ||
763 | return reinterpret_cast<void *>(reinterpret_cast<DWORD_PTR>(pvValue) - reinterpret_cast<DWORD_PTR>(*psd->ppvValueArray) + 1); | ||
764 | } | ||
765 | else | ||
766 | { | ||
767 | return pvValue; | ||
768 | } | ||
769 | } | ||