diff options
Diffstat (limited to 'uid_db.c')
-rw-r--r-- | uid_db.c | 597 |
1 files changed, 597 insertions, 0 deletions
diff --git a/uid_db.c b/uid_db.c new file mode 100644 index 00000000..fdc35b73 --- /dev/null +++ b/uid_db.c @@ -0,0 +1,597 @@ +/* + POP3 UID db + + Copyright (c) 2010 MAD Partners, Ltd. (rweikusat@mssgmbh.com) + + This file is being published in accordance with the GPLv2 terms + contained in the COPYING file being part of the fetchmail + 6.3.17 release, including the OpenSSL exemption. +*/ + +/* Have Solaris expose ffs() from strings.h: */ +#define __EXTENSIONS__ +#define _XOPEN_SOURCE 700 + +/* includes */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> /* ffs() lives here - needs #define on Solaris. */ + +#include "xmalloc.h" +#include "uid_db.h" + +/* constants */ +enum { + MIN_RECORDS = 16 /* arbitrary */ +}; + +/* types */ +struct pat_node { + struct pat_node *ptrs_[3]; + + /* + The bit mask is stored in the nodes (as opposed to the + offset, which is (re-)calculated on demand) because + calculating the mask is a non-trivial operation (at + least on x86). + */ + unsigned bit_ndx, bit_mask; + + struct uid_db_record *rec; +}; + +/* + The idea behind this is that the 'left' pointer of + a node is accessible as ptrs(np)[-1] and the right + one a ptrs(np)[1]. This implies that no separate codepaths + for 'symmetric left- and right-cases' are needed. +*/ +#define ptrs(np) ((np)->ptrs_ + 1) + +/* routines */ +/** various helpers */ +static inline unsigned bit_ofs(unsigned bit_ndx) +{ + return bit_ndx >> 3; +} + +static inline unsigned bit_mask(unsigned bit_ndx) +{ + return 1 << (bit_ndx & 7); +} + +/** PATRICIA trie insertion */ +/*** walkers */ +static struct pat_node *walk_down(struct uid_db *db, struct uid_db_record *rec, + struct pat_node ***edgep, struct pat_node **parentp) +{ + /* + Find the pat node whose id is 'most similar' to the id + stored in rec->id. Return a pointer to this node. + 'parentp' and 'edgep' are output-only parameters which + will point to the parent of returned node and to the edge + pointer going from the parent to the returned node after + the call has returned. + + This routine is intended for inserts only. + */ + struct pat_node *cur, **edge; + unsigned bit_ndx, v = 0, ofs; + + cur = db->pat_root; + ofs = -1; + do { + bit_ndx = cur->bit_ndx; + + if (bit_ofs(bit_ndx) != ofs) { + ofs = bit_ofs(bit_ndx); + v = ofs < rec->id_len ? rec->id[ofs] : 0; + } + + edge = ptrs(cur) + (v & cur->bit_mask ? 1 : -1); + } while ((cur = *edge) && cur->bit_ndx > bit_ndx); + + *parentp = + (struct pat_node *) + ((unsigned char *)edge - (v & bit_mask(bit_ndx) ? + offsetof(struct pat_node, ptrs_) + 2 * sizeof(struct pat_node *) + : offsetof(struct pat_node, ptrs_))); + *edgep = edge; + return cur; +} + +static inline struct pat_node *walk_up(unsigned diff_ndx, struct pat_node **parent) +{ + /* + Walk the chain of parent pointers starting with *parent until a node + is found whose parent has a bit_ndx smaller than diff_ndx. Return + a pointer to this node and update *parent to point to its parent. + */ + struct pat_node *p, *np; + + np = *parent; + + while ((p = *ptrs(np)) && p->bit_ndx > diff_ndx) + np = p; + + *parent = p; + return np; +} + +/*** bit fiddling */ +static inline unsigned first_set_bit_in_char(unsigned v) +{ + return ffs(v) - 1; +} + +static int find_first_diff_bit(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + /* + Return the bit_ndx of the first differing bit in + r0->id and r1->id or -1 if the strings are identical. + */ + struct uid_db_record const *long_id; + unsigned ofs, max; + unsigned char v; + + max = r0->id_len; + if (max > r1->id_len) { + max = r1->id_len; + long_id = r0; + } else + long_id = r1; + + ofs = 0; + do + v = r0->id[ofs] ^ r1->id[ofs]; + while (!v && ++ofs < max); + + if (!v) { + if (r0->id_len == r1->id_len) return -1; + v = long_id->id[ofs]; + } + + return first_set_bit_in_char(v) + ofs * 8; +} + +static inline unsigned bit_set(unsigned bit_ndx, struct uid_db_record const *rec) +{ + /* + Return non-zero if the bit corresponding with bit_ndx is set + in rec->id + */ + unsigned ofs; + + ofs = bit_ofs(bit_ndx); + if (ofs >= rec->id_len) return 0; + return rec->id[ofs] & bit_mask(bit_ndx); +} + +/*** node allocation */ +static struct pat_node *get_pat_node(struct uid_db_record *rec) +{ + /* + Allocate a pat_node, set its rec pointer to rec and the + next pointer of rec to NULL. Return pointer to the pat_node. + */ + struct pat_node *np; + + np = (struct pat_node *)xmalloc(sizeof(*np)); + np->rec = rec; + rec->next = NULL; + return np; +} + +static struct pat_node *get_standalone_node(struct uid_db_record *rec) +{ + /* + Return a pat_node suitable for being inserted on the 'left edge' + of the trie, ie either linked to a node whose left pointer was zero + or being inserted as root node into an empty trie. The bit_ndx of + the pat_node is set to the index corresponding with the highest + set bit in rec->id. + + NB: This is a bad choice when UIDs share a common prefix because + this implies that the root node will cause a bit to be tested which + is non-zero in all other nodes, adding a theoretically redundant + level to the trie. This is (to the best of my knowledge) un- + fortunately unavoidable if nodes with different key lengths need + to be supported. + */ + struct pat_node *np; + + np = get_pat_node(rec); + np->bit_ndx = first_set_bit_in_char(*rec->id); + np->bit_mask = bit_mask(np->bit_ndx); + return np; +} + +/*** various helpers */ +#if 0 +static inline int record_id_equal(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + return + r0->id_len == r1->id_len + && memcmp(r0->id, r1->id, r0->id_len) == 0; +} +#endif + +static struct uid_db_record *append_to_list(struct uid_db_record **recp, + struct uid_db_record *rec) +{ + /* + Append the uid_db_record pointed to by rec to the uid_db_record + list accessible as *recp and return rec. + */ + while (*recp) recp = &(*recp)->next; + *recp = rec; + + rec->next = NULL; + return rec; +} + +/*** insert routine */ +static struct uid_db_record *pat_insert(struct uid_db *db, + struct uid_db_record *rec) +{ + /* + Insert the record pointed to by rec in the (potentially empty) + PATRICIA trie pointed to by db->pat_root and return rec. + */ + struct pat_node *np, *closest, *parent, **edge; + int me, bit_ndx; + + if (!db->pat_root) { + np = get_standalone_node(rec); + ptrs(np)[-1] = *ptrs(np) = NULL; + ptrs(np)[1] = np; + + db->pat_root = np; + return rec; + } + + closest = walk_down(db, rec, &edge, &parent); + + if (closest) { + bit_ndx = find_first_diff_bit(closest->rec, rec); + if (bit_ndx < 0) + return append_to_list(&closest->rec->next, rec); + + np = get_pat_node(rec); + np->bit_ndx = bit_ndx; + np->bit_mask = bit_mask(bit_ndx); + } else + np = get_standalone_node(rec); + + if (parent->bit_ndx > np->bit_ndx) { + closest = walk_up(np->bit_ndx, &parent); + + if (!parent) edge = &db->pat_root; + else edge = ptrs(parent)[-1] == closest ? + ptrs(parent) - 1 : ptrs(parent) + 1; + *ptrs(closest) = np; + } + + *edge = np; + *ptrs(np) = parent; + + me = bit_set(np->bit_ndx, rec) ? 1 : -1; + ptrs(np)[me] = np; + ptrs(np)[-me] = closest; + + return rec; +} + +/** general db insertion */ +static struct uid_db_record *get_uid_db_record(char const *id, unsigned status) +{ + /* + Allocate a uid_db_record structure and set its id pointer to a + dynamically allocated copy of id. The status member of the + new record is set to status and its message number to zero (invalid). + A pointer to it is then returned. + */ + struct uid_db_record *rec; + size_t id_len; + + rec = (struct uid_db_record *)xmalloc(sizeof(*rec)); + + id_len = strlen(id); + rec->id = (char *)memcpy(xmalloc(id_len + 1), id, id_len + 1); + rec->id_len = id_len; + rec->status = status; + rec->num = 0; + + return rec; +} + +static void insert_into_records(struct uid_db *db, + struct uid_db_record *rec) +{ + /* + Insert rec into the records array of the uid_db pointed + to by db. The array is grown as necessary and the + corresponding state variables of the db are updated + accordingly. The pos member of rec is set to its position + in the array. + */ + unsigned next, want; + + next = db->records_next; + + if (next == db->records_max) { + want = db->records_max *= 2; + db->records = (struct uid_db_record **)xrealloc(db->records, want * sizeof(rec)); + } + + rec->pos = next; + db->records[next] = rec; + db->records_next = next + 1; +} + +struct uid_db_record *uid_db_insert(struct uid_db *db, + char const *id, unsigned status) +{ + /* + Create an uid_db_record whose id is id and whose status is + status and insert it into the uid_db pointed to by db. + Return a pointer to the newly created record. + */ + struct uid_db_record *rec; + + rec = get_uid_db_record(id, status); + insert_into_records(db, rec); + return pat_insert(db, rec); +} + +/** message number index */ +void set_uid_db_num(struct uid_db *db, struct uid_db_record *rec, + unsigned num) +{ + /* + Set the message number of the record pointed to by rec to num + and insert it into the num_ndx of the uid_db pointed to by db + at position corresponding with num. The num_ndx lookup array + is grown as needed. Message numbers are expected to 'generally' + be recorded in ascending order and hence, no provisions are + made to deal with the potentially quadratic complexity of + inserting a sequence of numbers into an array such that it + needs to be grown continuously. + */ + struct num_ndx *num_ndx; + unsigned have, want; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value > num) { + have = num_ndx->pos_0_value - num_ndx->end_value + 1; + want = num_ndx->pos_0_value - num + 1; + num_ndx->end_value = num; + + num_ndx->records = (struct uid_db_record **)xrealloc(num_ndx->records, want * sizeof(rec)); + do num_ndx->records[--want] = NULL; while (want > have); + } + + num_ndx->records[uid_db_num_ofs(num_ndx, num)] = rec; +} + +void reset_uid_db_nums(struct uid_db *db) +{ + /* + Reset the message numbers of all uid_db_records stored + in the uid_db pointed to by db. The corresponding num_ndx + lookup array is afterwards freed and the num_ndx end_value + adjusted in order to indicate an 'empty' message number + index. + */ + struct uid_db_record **rec; + struct num_ndx *num_ndx; + unsigned ndx; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value < num_ndx->pos_0_value) { + ndx = num_ndx->pos_0_value - num_ndx->end_value; + while (ndx) { + rec = num_ndx->records + --ndx; + if (*rec) (*rec)->num = 0; + } + + num_ndx->end_value = num_ndx->pos_0_value + 1; + + free(num_ndx->records); + num_ndx->records = NULL; + } +} + +/** search routines */ +struct uid_db_record *find_uid_by_id(struct uid_db *db, char const *id) +{ + /* + Search for an uid_db_record whose id is id in the uid_db pointed + to by db and return a pointer to it or NULL if no such record was + found. + */ + struct pat_node *np; + struct uid_db_record *rec; + unsigned v = 0, bit_ndx, ofs; + size_t len; + + np = db->pat_root; + if (np) { + len = strlen(id); + ofs = -1; + do { + bit_ndx = np->bit_ndx; + + if (bit_ofs(bit_ndx) != ofs) { + ofs = bit_ofs(bit_ndx); + v = ofs < len ? id[ofs] : 0; + } + + np = ptrs(np)[v & np->bit_mask ? 1 : -1]; + } while (np && np->bit_ndx > bit_ndx); + + if (!np) return NULL; + + rec = np->rec; + return rec->id_len == len && memcmp(id, rec->id, len) == 0 ? + rec : NULL; + } + + return NULL; +} + +struct uid_db_record *last_uid_in_db(struct uid_db *db, char const *id) +{ + /* + Return a pointer to the 'last' (insert order) uid_db_record + contained in the uid_db pointed to by db whose id is id or + NULL if no such record exists. + */ + struct uid_db_record *rec; + + rec = find_uid_by_id(db, id); + if (!rec) return NULL; + + while (rec->next) rec = rec->next; + return rec; +} + +/** destruction */ +static void free_uid_list(struct uid_db_record *rec) +{ + if (!rec) return; + + /* + Free the list of uid_db_records starting with + the record pointed to by rec. + */ + if (rec->next) free_uid_list(rec->next); + + xfree(rec->id); + xfree(rec); +} + +static void free_pat_trie(struct pat_node *np) +{ + /* + Free the PATRCIA-trie pointed to by np and all + uid_db_records contained in it. + + The algorithm implemented below is: + + 1. Load the left pointer of the node pointed to by + np into next. + + 2. If the result is not NULL, + 2a) Set the left pointer to NULL. + 2b) Goto 1 if next points to a child of np. + + 3. Load the right pointer of the node pointed to by + np into next. + + 4. If the result is not NULL, + 4a) Set the right pointer to NULL. + 4b) Goto 1 id next points to a child of np. + + 5. Load next with the parent pointer of np. + + 6. Free np->rec and np. + + 7. Set np to next and goto 1 if it is not null. + */ + struct pat_node *next; + + do { + next = ptrs(np)[-1]; + if (next) { + ptrs(np)[-1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = ptrs(np)[1]; + if (next) { + ptrs(np)[1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = *ptrs(np); + + free_uid_list(np->rec); + free(np); + } while ((np = next)); +} + +void free_uid_db(struct uid_db *db) +{ + /* + Free all dynamically allocated memory of the uid_db + pointed to by db. The structure is not reinitialized. + */ + if (db->pat_root) free_pat_trie(db->pat_root); + + xfree(db->records); + xfree(db->num_ndx.records); +} + +/** various public interfaces */ +void init_uid_db(struct uid_db *db) +{ + /* + Initialize the uid_db structure pointed to by db 'properly' + such that it represents an empty database. An array of + size MIN_RECORDS is allocated and assigned to db->records. + */ + struct num_ndx *num_ndx; + + db->pat_root = NULL; + + db->records = (struct uid_db_record **)xmalloc(MIN_RECORDS * sizeof(*db->records)); + db->records_max = MIN_RECORDS; + db->records_next = 0; + + num_ndx = &db->num_ndx; + num_ndx->pos_0_value = num_ndx->end_value = -1; + num_ndx->records = NULL; +} + +void swap_uid_db_data(struct uid_db *db_0, struct uid_db *db_1) +{ + struct uid_db tmp; + + tmp = *db_0; + *db_0 = *db_1; + *db_1 = tmp; +} + +int traverse_uid_db(struct uid_db *db, + uid_db_traversal_routine *r, void *arg) +{ + /* + Traverses the struct uid_db records array in insert order, + invoking the subroutine pointed to by r with a pointer to + each record and the arg pointer as arguments. If the return + value of that is non-zero, traverse_uid_db immediately returns + with this value. Otherwise, zero is returned after the last + record was visited. + + The uid_db_traversal_routine must not modify the uid_db during + traversal. + */ + struct uid_db_record **recs; + unsigned ndx, max; + int rc; + + rc = 0; + ndx = 0; + max = db->records_next; + recs = db->records; + while (ndx < max && (rc = r(recs[ndx], arg)) == 0) + ++ndx; + + return rc; +} |