diff options
| author | tedu <> | 2015-11-18 16:46:49 +0000 |
|---|---|---|
| committer | tedu <> | 2015-11-18 16:46:49 +0000 |
| commit | 080af0bdc45f736632124c830741da029bea8c74 (patch) | |
| tree | 2a0553470ab33150698fc5e0e663e542744960b5 /src/lib/libc/stdlib/icdb.c | |
| parent | f550e8b0135984c31e6bfd14fb53e03d6b739c6f (diff) | |
| download | openbsd-080af0bdc45f736632124c830741da029bea8c74.tar.gz openbsd-080af0bdc45f736632124c830741da029bea8c74.tar.bz2 openbsd-080af0bdc45f736632124c830741da029bea8c74.zip | |
Add icdb, the internal c database. A simpler replacement for the old
Berzerkeley DB code.
Diffstat (limited to '')
| -rw-r--r-- | src/lib/libc/stdlib/icdb.c | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/src/lib/libc/stdlib/icdb.c b/src/lib/libc/stdlib/icdb.c new file mode 100644 index 0000000000..077c59a1e4 --- /dev/null +++ b/src/lib/libc/stdlib/icdb.c | |||
| @@ -0,0 +1,367 @@ | |||
| 1 | /* $OpenBSD: icdb.c,v 1.1 2015/11/18 16:46:49 tedu Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | #include <fcntl.h> | ||
| 18 | #include <stdint.h> | ||
| 19 | #include <stdio.h> | ||
| 20 | #include <stddef.h> | ||
| 21 | #include <stdlib.h> | ||
| 22 | #include <string.h> | ||
| 23 | #include <unistd.h> | ||
| 24 | #include <icdb.h> | ||
| 25 | |||
| 26 | #include <sys/mman.h> | ||
| 27 | #include <sys/stat.h> | ||
| 28 | |||
| 29 | #include <siphash.h> | ||
| 30 | |||
| 31 | /* | ||
| 32 | * Creating a new icdb: icdb_new | ||
| 33 | * Opening existing icdb: icdb_open | ||
| 34 | * | ||
| 35 | * Adding new entries: icdb_add | ||
| 36 | * Adding entries does not update the disk or indices. | ||
| 37 | * | ||
| 38 | * Save to disk: icdb_save | ||
| 39 | * Update indices: icdb_rehash | ||
| 40 | * icdb_save will call rehash. | ||
| 41 | * | ||
| 42 | * Change an existing entry: icdb_update | ||
| 43 | * Changing entries does write to disk. | ||
| 44 | * | ||
| 45 | * Find an entry: icdb_lookup | ||
| 46 | * Looking up an entry is only defined when the indices are synced. | ||
| 47 | * | ||
| 48 | * Close and free resources: icdb_close | ||
| 49 | */ | ||
| 50 | |||
| 51 | /* | ||
| 52 | * There are two major modes of operation. | ||
| 53 | * | ||
| 54 | * Existing databases use the mmap codepath. The entire database is mapped | ||
| 55 | * into the address space for quick access. Individual entries may be updated, | ||
| 56 | * but no new entries added. | ||
| 57 | * | ||
| 58 | * New databases use malloc backed memory instead. The database may be saved | ||
| 59 | * with icdb_save. It should be saved to a new file to avoid corrupting any | ||
| 60 | * open databases in other processes. | ||
| 61 | */ | ||
| 62 | |||
| 63 | /* | ||
| 64 | * An icdb has the following format: | ||
| 65 | * struct icbinfo header | ||
| 66 | * indexes [ uint32_t * indexsize * nkeys ] | ||
| 67 | * entries [ entrysize * nentries ] | ||
| 68 | * | ||
| 69 | * To find an entry in the file, the user specifies which key to use. | ||
| 70 | * The key is hashed and looked up in the index. The index contains the | ||
| 71 | * position of the entry in the entries array. -1 identifies not found. | ||
| 72 | * Chaining is done by rehashing the hash. All keys are fixed size byte arrays. | ||
| 73 | */ | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Header info for icdb. This struct is stored on disk. | ||
| 77 | */ | ||
| 78 | struct icdbinfo { | ||
| 79 | uint32_t magic; /* magic */ | ||
| 80 | uint32_t nentries; /* number of entries stored */ | ||
| 81 | uint32_t entrysize; /* size of each entry */ | ||
| 82 | uint32_t indexsize; /* number of entries in hash index */ | ||
| 83 | uint32_t nkeys; /* number of keys defined */ | ||
| 84 | uint32_t keysize[8]; /* size of each key */ | ||
| 85 | uint32_t keyoffset[8]; /* offset of each key in entry */ | ||
| 86 | SIPHASH_KEY siphashkey; /* random hash key */ | ||
| 87 | }; | ||
| 88 | |||
| 89 | /* | ||
| 90 | * In memory representation with auxiliary data. | ||
| 91 | * idxdata and entries will be written to disk after info. | ||
| 92 | */ | ||
| 93 | struct icdb { | ||
| 94 | struct icdbinfo *info; | ||
| 95 | void *idxdata[8]; | ||
| 96 | void *entries; | ||
| 97 | size_t maplen; | ||
| 98 | uint32_t allocated; | ||
| 99 | int fd; | ||
| 100 | }; | ||
| 101 | |||
| 102 | const uint32_t magic = 0x1ca9d0b7; | ||
| 103 | |||
| 104 | static uint32_t | ||
| 105 | roundup(uint32_t num) | ||
| 106 | { | ||
| 107 | uint32_t r = 2; | ||
| 108 | |||
| 109 | while (r < num * 3 / 2) | ||
| 110 | r *= 2; | ||
| 111 | return r; | ||
| 112 | } | ||
| 113 | |||
| 114 | struct icdb * | ||
| 115 | icdb_new(uint32_t nentries, uint32_t entrysize, | ||
| 116 | uint32_t nkeys, uint32_t *keysizes, uint32_t *keyoffsets) | ||
| 117 | { | ||
| 118 | struct icdb *db; | ||
| 119 | struct icdbinfo *info; | ||
| 120 | int i; | ||
| 121 | |||
| 122 | if (entrysize == 0 || entrysize > 1048576) | ||
| 123 | return NULL; | ||
| 124 | if (nkeys > 8) | ||
| 125 | return NULL; | ||
| 126 | |||
| 127 | if (!(db = calloc(1, sizeof(*db)))) | ||
| 128 | return NULL; | ||
| 129 | if (!(info = calloc(1, sizeof(*info)))) { | ||
| 130 | free(db); | ||
| 131 | return NULL; | ||
| 132 | } | ||
| 133 | db->info = info; | ||
| 134 | db->fd = -1; | ||
| 135 | info->magic = magic; | ||
| 136 | if (nentries) | ||
| 137 | if ((db->entries = reallocarray(NULL, nentries, entrysize))) | ||
| 138 | db->allocated = nentries; | ||
| 139 | info->entrysize = entrysize; | ||
| 140 | info->nkeys = nkeys; | ||
| 141 | for (i = 0; i < nkeys; i++) { | ||
| 142 | info->keysize[i] = keysizes[i]; | ||
| 143 | info->keyoffset[i] = keyoffsets[i]; | ||
| 144 | } | ||
| 145 | return db; | ||
| 146 | } | ||
| 147 | |||
| 148 | struct icdb * | ||
| 149 | icdb_open(const char *name, int flags) | ||
| 150 | { | ||
| 151 | struct icdb *db = NULL; | ||
| 152 | struct icdbinfo *info; | ||
| 153 | struct stat sb; | ||
| 154 | uint8_t *ptr = MAP_FAILED; | ||
| 155 | uint32_t baseoff, indexsize, idxmask, idxlen; | ||
| 156 | int fd, i; | ||
| 157 | |||
| 158 | if ((fd = open(name, flags | O_CLOEXEC)) == -1) | ||
| 159 | return NULL; | ||
| 160 | if (fstat(fd, &sb) != 0) | ||
| 161 | goto fail; | ||
| 162 | ptr = mmap(NULL, sb.st_size, PROT_READ | | ||
| 163 | ((flags & O_RDWR) ? PROT_WRITE : 0), MAP_SHARED, fd, 0); | ||
| 164 | if (ptr == MAP_FAILED) | ||
| 165 | goto fail; | ||
| 166 | info = (struct icdbinfo *)ptr; | ||
| 167 | if (info->magic != magic) | ||
| 168 | goto fail; | ||
| 169 | |||
| 170 | if (!(db = calloc(1, sizeof(*db)))) | ||
| 171 | goto fail; | ||
| 172 | db->info = info; | ||
| 173 | |||
| 174 | indexsize = info->indexsize; | ||
| 175 | idxmask = indexsize - 1; | ||
| 176 | idxlen = indexsize * sizeof(uint32_t); | ||
| 177 | baseoff = sizeof(*info) + idxlen * info->nkeys; | ||
| 178 | |||
| 179 | for (i = 0; i < info->nkeys; i++) | ||
| 180 | db->idxdata[i] = ptr + sizeof(*info) + i * idxlen; | ||
| 181 | db->entries = ptr + baseoff; | ||
| 182 | db->maplen = sb.st_size; | ||
| 183 | db->fd = fd; | ||
| 184 | return db; | ||
| 185 | |||
| 186 | fail: | ||
| 187 | if (ptr != MAP_FAILED) | ||
| 188 | munmap(ptr, sb.st_size); | ||
| 189 | if (fd != -1) | ||
| 190 | close(fd); | ||
| 191 | free(db); | ||
| 192 | return NULL; | ||
| 193 | } | ||
| 194 | |||
| 195 | int | ||
| 196 | icdb_get(struct icdb *db, void *entry, uint32_t idx) | ||
| 197 | { | ||
| 198 | uint32_t entrysize = db->info->entrysize; | ||
| 199 | |||
| 200 | memcpy(entry, (uint8_t *)db->entries + idx * entrysize, entrysize); | ||
| 201 | return 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | int | ||
| 205 | icdb_lookup(struct icdb *db, int keynum, const void *key, void *entry, uint32_t *idxp) | ||
| 206 | { | ||
| 207 | struct icdbinfo *info = db->info; | ||
| 208 | uint32_t entrysize = info->entrysize; | ||
| 209 | uint32_t offset; | ||
| 210 | uint64_t hash; | ||
| 211 | uint32_t indexsize, idxmask, idxlen; | ||
| 212 | uint32_t *idxdata; | ||
| 213 | |||
| 214 | indexsize = info->indexsize; | ||
| 215 | idxmask = indexsize - 1; | ||
| 216 | idxlen = indexsize * sizeof(uint32_t); | ||
| 217 | |||
| 218 | idxdata = db->idxdata[keynum]; | ||
| 219 | |||
| 220 | hash = SipHash24(&info->siphashkey, key, info->keysize[keynum]); | ||
| 221 | while ((offset = idxdata[hash & idxmask]) != -1) { | ||
| 222 | if (icdb_get(db, entry, offset) != 0) | ||
| 223 | return -1; | ||
| 224 | if (memcmp((uint8_t *)entry + info->keyoffset[keynum], key, | ||
| 225 | info->keysize[keynum]) == 0) { | ||
| 226 | if (idxp) | ||
| 227 | *idxp = offset; | ||
| 228 | return 0; | ||
| 229 | } | ||
| 230 | hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); | ||
| 231 | } | ||
| 232 | return 1; | ||
| 233 | } | ||
| 234 | |||
| 235 | int | ||
| 236 | icdb_nentries(struct icdb *db) | ||
| 237 | { | ||
| 238 | return db->info->nentries; | ||
| 239 | } | ||
| 240 | |||
| 241 | const void * | ||
| 242 | icdb_entries(struct icdb *db) | ||
| 243 | { | ||
| 244 | return db->entries; | ||
| 245 | } | ||
| 246 | |||
| 247 | int | ||
| 248 | icdb_update(struct icdb *db, const void *entry, int offset) | ||
| 249 | { | ||
| 250 | struct icdbinfo *info = db->info; | ||
| 251 | uint32_t entrysize = info->entrysize; | ||
| 252 | uint32_t baseoff; | ||
| 253 | uint32_t indexsize, idxmask, idxlen; | ||
| 254 | |||
| 255 | indexsize = info->indexsize; | ||
| 256 | idxmask = indexsize - 1; | ||
| 257 | idxlen = indexsize * sizeof(uint32_t); | ||
| 258 | baseoff = sizeof(*info) + idxlen * info->nkeys; | ||
| 259 | |||
| 260 | memcpy((uint8_t *)db->entries + offset * entrysize, | ||
| 261 | entry, entrysize); | ||
| 262 | if (db->fd != -1) | ||
| 263 | msync(db->entries + offset * entrysize, entrysize, MS_SYNC); | ||
| 264 | return 0; | ||
| 265 | } | ||
| 266 | |||
| 267 | int | ||
| 268 | icdb_add(struct icdb *db, const void *entry) | ||
| 269 | { | ||
| 270 | struct icdbinfo *info = db->info; | ||
| 271 | size_t entrysize = info->entrysize; | ||
| 272 | |||
| 273 | if (db->allocated == info->nentries) { | ||
| 274 | void *p; | ||
| 275 | size_t amt = db->allocated ? db->allocated * 2 : 63; | ||
| 276 | if (!(p = reallocarray(db->entries, amt, entrysize))) | ||
| 277 | return -1; | ||
| 278 | db->allocated = amt; | ||
| 279 | db->entries = p; | ||
| 280 | } | ||
| 281 | memcpy((uint8_t *)db->entries + info->nentries * entrysize, | ||
| 282 | entry, entrysize); | ||
| 283 | info->nentries++; | ||
| 284 | return 0; | ||
| 285 | } | ||
| 286 | |||
| 287 | int | ||
| 288 | icdb_rehash(struct icdb *db) | ||
| 289 | { | ||
| 290 | struct icdbinfo *info = db->info; | ||
| 291 | uint32_t entrysize = info->entrysize; | ||
| 292 | uint32_t indexsize, idxmask, idxlen; | ||
| 293 | int i, j; | ||
| 294 | |||
| 295 | indexsize = info->indexsize = roundup(info->nentries); | ||
| 296 | idxmask = indexsize - 1; | ||
| 297 | idxlen = sizeof(uint32_t) * indexsize; | ||
| 298 | |||
| 299 | arc4random_buf(&info->siphashkey, sizeof(info->siphashkey)); | ||
| 300 | |||
| 301 | for (i = 0; i < info->nkeys; i++) { | ||
| 302 | uint32_t *idxdata = reallocarray(db->idxdata[i], | ||
| 303 | indexsize, sizeof(uint32_t)); | ||
| 304 | if (!idxdata) | ||
| 305 | return -1; | ||
| 306 | memset(idxdata, 0xff, idxlen); | ||
| 307 | db->idxdata[i] = idxdata; | ||
| 308 | } | ||
| 309 | for (j = 0; j < info->nentries; j++) { | ||
| 310 | for (i = 0; i < info->nkeys; i++) { | ||
| 311 | uint32_t *idxdata = db->idxdata[i]; | ||
| 312 | uint64_t hash = SipHash24(&info->siphashkey, | ||
| 313 | (uint8_t *)db->entries + j * entrysize + | ||
| 314 | info->keyoffset[i], info->keysize[i]); | ||
| 315 | while (idxdata[hash & idxmask] != -1) | ||
| 316 | hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); | ||
| 317 | idxdata[hash & idxmask] = j; | ||
| 318 | } | ||
| 319 | } | ||
| 320 | return 0; | ||
| 321 | } | ||
| 322 | |||
| 323 | int | ||
| 324 | icdb_save(struct icdb *db, int fd) | ||
| 325 | { | ||
| 326 | struct icdbinfo *info = db->info; | ||
| 327 | uint32_t entrysize = info->entrysize; | ||
| 328 | uint32_t indexsize, idxlen; | ||
| 329 | int i; | ||
| 330 | |||
| 331 | if (icdb_rehash(db) != 0) | ||
| 332 | return -1; | ||
| 333 | |||
| 334 | indexsize = info->indexsize; | ||
| 335 | idxlen = sizeof(uint32_t) * indexsize; | ||
| 336 | |||
| 337 | if (ftruncate(fd, 0) != 0) | ||
| 338 | return -1; | ||
| 339 | if (write(fd, info, sizeof(*info)) != sizeof(*info)) | ||
| 340 | return -1; | ||
| 341 | for (i = 0; i < info->nkeys; i++) { | ||
| 342 | if (write(fd, db->idxdata[i], idxlen) != idxlen) | ||
| 343 | return -1; | ||
| 344 | } | ||
| 345 | if (write(fd, db->entries, info->nentries * entrysize) != | ||
| 346 | info->nentries * entrysize) | ||
| 347 | return -1; | ||
| 348 | return 0; | ||
| 349 | } | ||
| 350 | |||
| 351 | int | ||
| 352 | icdb_close(struct icdb *db) | ||
| 353 | { | ||
| 354 | int i; | ||
| 355 | |||
| 356 | if (db->fd == -1) { | ||
| 357 | for (i = 0; i < db->info->nkeys; i++) | ||
| 358 | free(db->idxdata[i]); | ||
| 359 | free(db->entries); | ||
| 360 | free(db->info); | ||
| 361 | } else { | ||
| 362 | munmap(db->info, db->maplen); | ||
| 363 | close(db->fd); | ||
| 364 | } | ||
| 365 | free(db); | ||
| 366 | return 0; | ||
| 367 | } | ||
