diff options
author | Matthias Andree <matthias.andree@gmx.de> | 2010-05-24 22:29:53 +0200 |
---|---|---|
committer | Matthias Andree <matthias.andree@gmx.de> | 2016-12-11 22:05:40 +0100 |
commit | cdbac47f9d811eb077c405ceecc6f84a0f8504af (patch) | |
tree | f0a0b16e3c92d70f756ec3e2a2c05956d3224686 | |
parent | b1e81d36e5a324d1e5257de396a3ff8ff99f71ca (diff) | |
download | fetchmail-cdbac47f9d811eb077c405ceecc6f84a0f8504af.tar.gz fetchmail-cdbac47f9d811eb077c405ceecc6f84a0f8504af.tar.bz2 fetchmail-cdbac47f9d811eb077c405ceecc6f84a0f8504af.zip |
UIDL database speedup with Patricia trees.
Import Rainer Weikusat's code that uses Patricia trees instead of linear
lists.
Snapshot Rainer's patch 2010-05-24 19:30:42
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | fetchmail.c | 22 | ||||
-rw-r--r-- | fetchmail.h | 5 | ||||
-rw-r--r-- | pop3.c | 117 | ||||
-rw-r--r-- | uid.c | 180 | ||||
-rw-r--r-- | uid_db.c | 588 | ||||
-rw-r--r-- | uid_db.h | 141 |
7 files changed, 927 insertions, 128 deletions
diff --git a/Makefile.am b/Makefile.am index 3c1a7aab..26c80fd9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -66,7 +66,7 @@ fetchmail_SOURCES= fetchmail.h getopt.h \ driver.c transact.c sink.c smtp.c \ idlist.c uid.c mxget.c md5ify.c cram.c gssapi.c \ opie.c interface.c netrc.c \ - unmime.c conf.c checkalias.c \ + unmime.c conf.c checkalias.c uid_db.h uid_db.c\ lock.h lock.c \ rcfile_l.l rcfile_y.y \ ucs/norm_charmap.c ucs/norm_charmap.h diff --git a/fetchmail.c b/fetchmail.c index 8b0a5c3d..bcd13a50 100644 --- a/fetchmail.c +++ b/fetchmail.c @@ -1597,6 +1597,14 @@ static int query_host(struct query *ctl) return(st); } +static int print_id_of(struct uid_db_record *rec, void *unused) +{ + (void)unused; + + printf("\t%s\n", rec->id); + return 0; +} + static void dump_params (struct runctl *runp, struct query *querylist, flag implicit) /* display query parameters in English */ @@ -2000,20 +2008,14 @@ static void dump_params (struct runctl *runp, if (ctl->server.protocol > P_POP2 && MAILBOX_PROTOCOL(ctl)) { - if (!ctl->oldsaved) + int count; + + if (!(count = uid_db_n_records(&ctl->oldsaved))) printf(GT_(" No UIDs saved from this host.\n")); else { - struct idlist *idp; - int count = 0; - - for (idp = ctl->oldsaved; idp; idp = idp->next) - ++count; - printf(GT_(" %d UIDs saved.\n"), count); - if (outlevel >= O_VERBOSE) - for (idp = ctl->oldsaved; idp; idp = idp->next) - printf("\t%s\n", idp->id); + traverse_uid_db(&ctl->oldsaved, print_id_of, NULL); } } diff --git a/fetchmail.h b/fetchmail.h index f14d51d4..98f07742 100644 --- a/fetchmail.h +++ b/fetchmail.h @@ -39,6 +39,8 @@ struct addrinfo; # include "trio/trio.h" #endif +#include "uid_db.h" + /* We need this for strstr */ #if !defined(HAVE_STRSTR) && !defined(strstr) char *strstr(const char *, const char *); @@ -389,8 +391,7 @@ struct query int smtp_socket; /* socket descriptor for SMTP connection */ unsigned int uid; /* UID of user to deliver to */ struct idlist *skipped; /* messages skipped on the mail server */ - struct idlist *oldsaved, *newsaved; - struct idlist **oldsavedend; + struct uid_db oldsaved, newsaved; char lastdigest[DIGESTLEN]; /* last MD5 hash seen on this connection */ char *folder; /* folder currently being polled */ @@ -21,6 +21,7 @@ #include "fetchmail.h" #include "socket.h" #include "i18n.h" +#include "uid_db.h" #ifdef OPIE_ENABLE #ifdef __cplusplus @@ -821,20 +822,20 @@ static int pop3_fastuidl( int sock, struct query *ctl, unsigned int count, int last_nr = count + 1; while (first_nr < last_nr - 1) { - struct idlist *newl; + struct uid_db_record *rec; try_nr = (first_nr + last_nr) / 2; if ((ok = pop3_getuidl(sock, try_nr, id, sizeof(id))) != 0) return ok; - if ((newl = str_in_list(&ctl->oldsaved, id, FALSE))) + if ((rec = find_uid_by_id(&ctl->oldsaved, id))) { - flag mark = newl->val.status.mark; + flag mark = rec->status; if (mark == UID_DELETED || mark == UID_EXPUNGED) { if (outlevel >= O_VERBOSE) report(stderr, GT_("id=%s (num=%u) was deleted, but is still present!\n"), id, try_nr); /* just mark it as seen now! */ - newl->val.status.mark = mark = UID_SEEN; + rec->status = mark = UID_SEEN; } /* narrow the search region! */ @@ -848,7 +849,7 @@ static int pop3_fastuidl( int sock, struct query *ctl, unsigned int count, int first_nr = try_nr; /* save the number */ - newl->val.status.num = try_nr; + set_uid_db_num(&ctl->oldsaved, rec, try_nr); } else { @@ -857,8 +858,8 @@ static int pop3_fastuidl( int sock, struct query *ctl, unsigned int count, int last_nr = try_nr; /* save it */ - savep = save_str(savep ? &savep : &ctl->oldsaved, id, UID_UNSEEN); - savep->val.status.num = try_nr; + rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, try_nr); } } if (outlevel >= O_DEBUG && last_nr <= count) @@ -886,34 +887,38 @@ static int pop3_slowuidl( int sock, struct query *ctl, int *countp, int *newp) * of messages to get. * + Otherwise run a binary search to determine the last known message */ + struct uid_db_record *rec; int ok, nolinear = 0; - int first_nr, list_len, try_id, try_nr, add_id; + int first_nr, n_recs, try_id, try_nr, add_id; int num; char id [IDLEN+1]; if ((ok = pop3_gettopid(sock, 1, id, sizeof(id))) != 0) return ok; - if( ( first_nr = str_nr_in_list(&ctl->oldsaved, id) ) == -1 ) { + rec = first_uid_in_db(&ctl->oldsaved, id); + if (!rec) { /* the first message is unknown -> all messages are new */ *newp = *countp; return 0; } + first_nr = rec->pos; /* check where we expect the latest known message */ - list_len = count_list( &ctl->oldsaved ); - try_id = list_len - first_nr; /* -1 + 1 */ + n_recs = uid_db_n_records(&ctl->oldsaved); + try_id = n_recs - first_nr; /* -1 + 1 */ if( try_id > 1 ) { if( try_id <= *countp ) { if ((ok = pop3_gettopid(sock, try_id, id, sizeof(id))) != 0) return ok; - - try_nr = str_nr_last_in_list(&ctl->oldsaved, id); + + rec = last_uid_in_db(&ctl->oldsaved, id); + try_nr = rec ? rec->pos : -1; } else { try_id = *countp+1; try_nr = -1; } - if( try_nr != list_len -1 ) { + if( try_nr != n_recs -1 ) { /* some messages inbetween have been deleted... */ if( try_nr == -1 ) { nolinear = 1; @@ -931,7 +936,9 @@ static int pop3_slowuidl( int sock, struct query *ctl, int *countp, int *newp) if ((ok = pop3_gettopid(sock, try_id, id, sizeof(id))) != 0) return ok; - try_nr = str_nr_in_list(&ctl->oldsaved, id); + + rec = find_uid_by_id(&ctl->oldsaved, id); + try_nr = rec ? rec->pos : -1; } if( try_nr == -1 ) { try_id--; @@ -944,17 +951,16 @@ static int pop3_slowuidl( int sock, struct query *ctl, int *countp, int *newp) } } /* the first try_id messages are known -> copy them to the newsaved list */ - for( num = first_nr; num < list_len; num++ ) + for( num = first_nr; num < n_recs; num++ ) { - struct idlist *newl = save_str(&ctl->newsaved, - str_from_nr_list(&ctl->oldsaved, num), - UID_UNSEEN); - newl->val.status.num = num - first_nr + 1; + rec = find_uid_by_pos(&ctl->oldsaved, num); + + rec = uid_db_insert(&ctl->newsaved, rec->id, rec->status); + set_uid_db_num(&ctl->newsaved, rec, num - first_nr + 1); } if( nolinear ) { - free_str_list(&ctl->oldsaved); - ctl->oldsaved = 0; + clear_uid_db(&ctl->oldsaved); last = try_id; } @@ -973,7 +979,7 @@ static int pop3_getrange(int sock, (void)folder; /* Ensure that the new list is properly empty */ - ctl->newsaved = (struct idlist *)NULL; + clear_uid_db(&ctl->newsaved); #ifdef MBOX /* Alain Knaff suggests this, but it's not RFC standard */ @@ -1008,6 +1014,9 @@ static int pop3_getrange(int sock, int fastuidl; char id [IDLEN+1]; + set_uid_db_num_pos_0(&ctl->oldsaved, *countp); + set_uid_db_num_pos_0(&ctl->newsaved, *countp); + /* should we do fast uidl this time? */ fastuidl = ctl->fastuidl; if (*countp > 7 && /* linear search is better if there are few mails! */ @@ -1068,14 +1077,13 @@ static int pop3_getrange(int sock, if (parseuid(buf, &unum, id, sizeof(id)) == PS_SUCCESS) { - struct idlist *old; + struct uid_db_record *old_rec, *new_rec; - newl = save_str(newl ? &newl : &ctl->newsaved, id, UID_UNSEEN); - newl->val.status.num = unum; + new_rec = uid_db_insert(&ctl->newsaved, id, UID_UNSEEN); - if ((old = str_in_list(&ctl->oldsaved, id, FALSE))) + if ((old_rec = find_uid_by_id(&ctl->oldsaved, id))) { - flag mark = old->val.status.mark; + flag mark = old_rec->status; if (mark == UID_DELETED || mark == UID_EXPUNGED) { /* XXX FIXME: switch 3 occurrences from @@ -1085,9 +1093,9 @@ static int pop3_getrange(int sock, if (outlevel >= O_VERBOSE) report(stderr, GT_("id=%s (num=%d) was deleted, but is still present!\n"), id, (int)unum); /* just mark it as seen now! */ - old->val.status.mark = mark = UID_SEEN; + old_rec->status = mark = UID_SEEN; } - newl->val.status.mark = mark; + new_rec->status = mark; if (mark == UID_UNSEEN) { (*newp)++; @@ -1104,10 +1112,14 @@ static int pop3_getrange(int sock, * swap the lists (say, due to socket error), * the same mail will not be downloaded again. */ - old = save_str(&ctl->oldsaved, id, UID_UNSEEN); + old_rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + } /* save the number */ - old->val.status.num = unum; + if (new_rec->status == UID_UNSEEN || !ctl->keep) { + set_uid_db_num(&ctl->oldsaved, old_rec, unum); + set_uid_db_num(&ctl->newsaved, new_rec, unum); + } } else return PS_ERROR; } /* multi-line loop for UIDL reply */ @@ -1176,8 +1188,9 @@ static int pop3_getsizes(int sock, int count, int *sizes) static int pop3_is_old(int sock, struct query *ctl, int num) /* is the given message old? */ { - struct idlist *newl; - if (!ctl->oldsaved) + struct uid_db_record *rec; + + if (!uid_db_n_records(&ctl->oldsaved)) return (num <= last); else if (dofastuidl) { @@ -1188,30 +1201,31 @@ static int pop3_is_old(int sock, struct query *ctl, int num) /* in fast uidl, we manipulate the old list only! */ - if ((newl = id_find(&ctl->oldsaved, num))) + if ((rec = find_uid_by_num(&ctl->oldsaved, num))) { /* we already have the id! */ - return(newl->val.status.mark != UID_UNSEEN); + return(rec->status != UID_UNSEEN); } /* get the uidl first! */ if (pop3_getuidl(sock, num, id, sizeof(id)) != PS_SUCCESS) return(TRUE); - if ((newl = str_in_list(&ctl->oldsaved, id, FALSE))) { + if ((rec = find_uid_by_id(&ctl->oldsaved, id))) { /* we already have the id! */ - newl->val.status.num = num; - return(newl->val.status.mark != UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, num); + return(rec->status != UID_UNSEEN); } /* save it */ - newl = save_str(&ctl->oldsaved, id, UID_UNSEEN); - newl->val.status.num = num; + rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, num); + return(FALSE); + } else { + rec = find_uid_by_num(&ctl->newsaved, num); + return !rec || rec->status != UID_UNSEEN; } - else - return ((newl = id_find(&ctl->newsaved, num)) != NULL && - newl->val.status.mark != UID_UNSEEN); } #ifdef UNUSED @@ -1330,28 +1344,31 @@ static int pop3_fetch(int sock, struct query *ctl, int number, int *lenp) static void mark_uid_seen(struct query *ctl, int number) /* Tell the UID code we've seen this. */ { - struct idlist *sdp; + struct uid_db_record *rec; - if ((sdp = id_find(&ctl->newsaved, number))) - sdp->val.status.mark = UID_SEEN; + if ((rec = find_uid_by_num(&ctl->newsaved, number))) + rec->status = UID_SEEN; /* mark it as seen in oldsaved also! In case, we do not swap the lists * (say, due to socket error), the same mail will not be downloaded * again. */ - if ((sdp = id_find(&ctl->oldsaved, number))) - sdp->val.status.mark = UID_SEEN; + if ((rec = find_uid_by_num(&ctl->oldsaved, number))) + rec->status = UID_SEEN; } static int pop3_delete(int sock, struct query *ctl, int number) /* delete a given message */ { + struct uid_db_record *rec; int ok; mark_uid_seen(ctl, number); /* actually, mark for deletion -- doesn't happen until QUIT time */ ok = gen_transact(sock, "DELE %d", number); if (ok != PS_SUCCESS) return(ok); - delete_str(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, number); + + rec = find_uid_by_num(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, number); + rec->status = UID_DELETED; return(PS_SUCCESS); } @@ -108,6 +108,19 @@ int dofastuidl = 0; static struct idlist *scratchlist; /** Read saved IDs from \a idfile and attach to each host in \a hostlist. */ +static int dump_saved_uid(struct uid_db_record *rec, void *unused) +{ + char *t; + + (void)unused; + + t = sdump(rec->id, rec->id_len); + report_build(stdout, " %s", t); + free(t); + + return 0; +} + void initialize_saved_lists(struct query *hostlist, const char *idfile) { struct stat statbuf; @@ -117,9 +130,9 @@ void initialize_saved_lists(struct query *hostlist, const char *idfile) /* make sure lists are initially empty */ for (ctl = hostlist; ctl; ctl = ctl->next) { ctl->skipped = (struct idlist *)NULL; - ctl->oldsaved = (struct idlist *)NULL; - ctl->newsaved = (struct idlist *)NULL; - ctl->oldsavedend = &ctl->oldsaved; + + init_uid_db(&ctl->oldsaved); + init_uid_db(&ctl->newsaved); } errno = 0; @@ -156,10 +169,10 @@ void initialize_saved_lists(struct query *hostlist, const char *idfile) while (fgets(buf, POPBUFSIZE, tmpfp) != (char *)NULL) { /* - * At this point, we assume the bug has two fields -- a user@host + * At this point, we assume the bug has two fields -- a user@host * part, and an ID part. Either field may contain spurious @ signs. - * The previous version of this code presumed one could split at - * the rightmost '@'. This is not correct, as InterMail puts an + * The previous version of this code presumed one could split at + * the rightmost '@'. This is not correct, as InterMail puts an * '@' in the UIDL. */ @@ -173,7 +186,7 @@ void initialize_saved_lists(struct query *hostlist, const char *idfile) * instead of a Message-ID, as GMX's (www.gmx.net) POP3 * StreamProxy V1.0 does. * - * this is one other trick. The userhost part + * this is one other trick. The userhost part * may contain ' ' in the user part, at least in * the lotus notes case. * So we start looking for the '@' after which the @@ -189,7 +202,7 @@ void initialize_saved_lists(struct query *hostlist, const char *idfile) if ((*delimp1 != ' ') && (*delimp1 != '\t')) break; - /* + /* * It should be safe to assume that id starts after * the " " - after all, we're writing the " " * ourselves in write_saved_lists() :-) @@ -212,15 +225,15 @@ void initialize_saved_lists(struct query *hostlist, const char *idfile) *atsign = '\0'; host = atsign + 1; - /* find proper list and save it */ + /* find uidl db and save it */ for (ctl = hostlist; ctl; ctl = ctl->next) { if (strcasecmp(host, ctl->server.queryname) == 0 && strcasecmp(user, ctl->remotename) == 0) { - save_str(&ctl->oldsaved, id, UID_SEEN); + uid_db_insert(&ctl->oldsaved, id, UID_SEEN); break; } } - /* + /* * If it's not in a host we're querying, * save it anyway. Otherwise we'd lose UIDL * information any time we queried an explicit @@ -246,16 +259,14 @@ void initialize_saved_lists(struct query *hostlist, const char *idfile) for (ctl = hostlist; ctl; ctl = ctl->next) { - report_build(stdout, GT_("Old UID list from %s:"), + report_build(stdout, GT_("Old UID list from %s:"), ctl->server.pollname); - idp = ctl->oldsaved; - if (!idp) + + if (!uid_db_n_records(&ctl->oldsaved)) report_build(stdout, GT_(" <empty>")); - else for (idp = ctl->oldsaved; idp; idp = idp->next) { - char *t = sdump(idp->id, strlen(idp->id)-1); - report_build(stdout, " %s\n", t); - free(t); - } + else + traverse_uid_db(&ctl->oldsaved, dump_saved_uid, NULL); + report_complete(stdout, "\n"); } @@ -273,13 +284,18 @@ void initialize_saved_lists(struct query *hostlist, const char *idfile) /** Assert that all UIDs marked deleted in query \a ctl have actually been expunged. */ -void expunge_uids(struct query *ctl) +static int mark_as_expunged_if(struct uid_db_record *rec, void *unused) { - struct idlist *idl; + (void)unused; - for (idl = dofastuidl ? ctl->oldsaved : ctl->newsaved; idl; idl = idl->next) - if (idl->val.status.mark == UID_DELETED) - idl->val.status.mark = UID_EXPUNGED; + if (rec->status == UID_DELETED) rec->status = UID_EXPUNGED; + return 0; +} + +void expunge_uids(struct query *ctl) +{ + traverse_uid_db(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, + mark_as_expunged_if, NULL); } static const char *str_uidmark(int mark) @@ -303,30 +319,46 @@ static const char *str_uidmark(int mark) } } -static void dump_list(const struct idlist *idp) +static int dump_uid_db_record(struct uid_db_record *rec, void *arg) { - if (!idp) { + unsigned *n_recs; + char *t; + + n_recs = arg; + --*n_recs; + + t = sdump(rec->id, rec->id_len); + report_build(stdout, " %s = %s%s", t, str_uidmark(rec->status), *n_recs ? "," : ""); + free(t); + + return 0; +} + +static void dump_uid_db(struct uid_db *db) +{ + unsigned n_recs; + + n_recs = uid_db_n_records(db); + if (!n_recs) { report_build(stdout, GT_(" <empty>")); - } else while (idp) { - char *t = sdump(idp->id, strlen(idp->id)); - report_build(stdout, " %s = %s%s", t, str_uidmark(idp->val.status.mark), idp->next ? "," : ""); - free(t); - idp = idp->next; + return; } + + traverse_uid_db(db, dump_uid_db_record, &n_recs); } /* finish a query */ -void uid_swap_lists(struct query *ctl) +void uid_swap_lists(struct query *ctl) { /* debugging code */ if (outlevel >= O_DEBUG) { if (dofastuidl) { report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); - dump_list(ctl->oldsaved); + dump_uid_db(&ctl->oldsaved); } else { report_build(stdout, GT_("New UID list from %s:"), ctl->server.pollname); - dump_list(ctl->newsaved); + dump_uid_db(&ctl->newsaved); } report_complete(stdout, "\n"); } @@ -347,15 +379,10 @@ void uid_swap_lists(struct query *ctl) * with UIDLs from that account in .fetchids, there is no way for * them to ever get garbage-collected. */ - if (ctl->newsaved) + if (uid_db_n_records(&ctl->newsaved)) { - /* old state of mailbox may now be irrelevant */ - struct idlist *temp = ctl->oldsaved; - if (outlevel >= O_DEBUG) - report(stdout, GT_("swapping UID lists\n")); - ctl->oldsaved = ctl->newsaved; - ctl->newsaved = (struct idlist *) NULL; - free_str_list(&temp); + swap_uid_db_data(&ctl->newsaved, &ctl->oldsaved); + clear_uid_db(&ctl->newsaved); } /* in fast uidl, there is no need to swap lists: the old state of * mailbox cannot be discarded! */ @@ -372,29 +399,53 @@ void uid_discard_new_list(struct query *ctl) /* this is now a merged list! the mails which were seen in this * poll are marked here. */ report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); - dump_list(ctl->oldsaved); + dump_uid_db(&ctl->oldsaved); report_complete(stdout, "\n"); } - if (ctl->newsaved) + if (uid_db_n_records(&ctl->newsaved)) { /* new state of mailbox is not reliable */ if (outlevel >= O_DEBUG) report(stdout, GT_("discarding new UID list\n")); - free_str_list(&ctl->newsaved); - ctl->newsaved = (struct idlist *) NULL; + clear_uid_db(&ctl->newsaved); } } /** Reset the number associated with each id */ void uid_reset_num(struct query *ctl) { - struct idlist *idp; - for (idp = ctl->oldsaved; idp; idp = idp->next) - idp->val.status.num = 0; + reset_uid_db_nums(&ctl->oldsaved); } /** Write list of seen messages, at end of run. */ +static int count_seen_deleted(struct uid_db_record *rec, void *arg) +{ + if (rec->status == UID_SEEN || rec->status == UID_DELETED) + ++*(long *)arg; + return 0; +} + +struct write_saved_info { + struct query *ctl; + FILE *fp; +}; + +static int write_uid_db_record(struct uid_db_record *rec, void *arg) +{ + struct write_saved_info *info; + int rc; + + if (!(rec->status == UID_SEEN || rec->status == UID_DELETED)) + return 0; + + info = arg; + rc = fprintf(info->fp, "%s@%s %s\n", + info->ctl->remotename, info->ctl->server.queryname, + rec->id); + return rc < 0 ? -1 : 0; +} + void write_saved_lists(struct query *hostlist, const char *idfile) { long idcount; @@ -404,12 +455,8 @@ void write_saved_lists(struct query *hostlist, const char *idfile) /* if all lists are empty, nuke the file */ idcount = 0; - for (ctl = hostlist; ctl; ctl = ctl->next) { - for (idp = ctl->oldsaved; idp; idp = idp->next) - if (idp->val.status.mark == UID_SEEN - || idp->val.status.mark == UID_DELETED) - idcount++; - } + for (ctl = hostlist; ctl; ctl = ctl->next) + traverse_uid_db(&ctl->oldsaved, count_seen_deleted, &idcount); /* either nuke the file or write updated last-seen IDs */ if (!idcount && !scratchlist) @@ -428,19 +475,22 @@ void write_saved_lists(struct query *hostlist, const char *idfile) report(stdout, GT_("Writing fetchids file.\n")); (void)unlink(newnam); /* remove file/link first */ if ((tmpfp = fopen(newnam, "w")) != (FILE *)NULL) { + struct write_saved_info info; int errflg = 0; + + info.fp = tmpfp; + for (ctl = hostlist; ctl; ctl = ctl->next) { - for (idp = ctl->oldsaved; idp; idp = idp->next) - if (idp->val.status.mark == UID_SEEN - || idp->val.status.mark == UID_DELETED) - if (fprintf(tmpfp, "%s@%s %s\n", - ctl->remotename, ctl->server.queryname, idp->id) < 0) { - int e = errno; - report(stderr, GT_("Write error on fetchids file %s: %s\n"), newnam, strerror(e)); - errflg = 1; - goto bailout; - } + info.ctl = ctl; + + if (traverse_uid_db(&ctl->oldsaved, write_uid_db_record, &info) < 0) { + int e = errno; + report(stderr, GT_("Write error on fetchids file %s: %s\n"), newnam, strerror(e)); + errflg = 1; + goto bailout; + } } + for (idp = scratchlist; idp; idp = idp->next) if (EOF == fputs(idp->id, tmpfp)) { int e = errno; diff --git a/uid_db.c b/uid_db.c new file mode 100644 index 00000000..9648e01a --- /dev/null +++ b/uid_db.c @@ -0,0 +1,588 @@ +/* + POP3 UID db + + Copyright (c) 2010 MAD Partners, Ltd. (rweikusat@mssgmbh.com) + + This file is being published in accordance with the GPLv2 terms + contained in the COPYING file being part of the fetchmail + 6.3.17 release, including the OpenSSL exemption. +*/ + +/* includes */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xmalloc.h" +#include "uid_db.h" + +/* constants */ +enum { + MIN_RECORDS = 16 /* arbitrary */ +}; + +/* types */ +struct pat_node { + struct pat_node *ptrs_[3]; + + /* + The bit mask is stored in the nodes (as opposed to the + offset, which is (re-)calculated on demand) because + calculating the mask is a non-trivial operation (at + least on x86). + */ + unsigned bit_ndx, bit_mask; + + struct uid_db_record *rec; +}; + +/* + The idea behind this is that the 'left' pointer of + a node is accessible as ptrs(np)[-1] and the right + one a ptrs(np)[1]. This implies that no separate codepaths + for 'symmetric left- and right-cases' are needed. +*/ +#define ptrs(np) ((np)->ptrs_ + 1) + +/* routines */ +/** various helpers */ +static inline unsigned bit_ofs(unsigned bit_ndx) +{ + return bit_ndx >> 3; +} + +static inline unsigned bit_mask(unsigned bit_ndx) +{ + return 1 << (bit_ndx & 7); +} + +/** PATRICIA trie insertion */ +/*** walkers */ +static struct pat_node *walk_down(struct uid_db *db, struct uid_db_record *rec, + struct pat_node ***edgep, struct pat_node **parentp) +{ + /* + Find the pat node whose id is 'most similar' to the id + stored in rec->id. Return a pointer to this node. + 'parentp' and 'edgep' are output-only parameters which + will point to the parent of returned node and to the edge + pointer going from the parent to the returned node after + the call has returned. + + This routine is intended for inserts only. + */ + struct pat_node *cur, **edge; + unsigned bit_ndx, v, ofs; + + cur = db->pat_root; + ofs = -1; + do { + bit_ndx = cur->bit_ndx; + + if (bit_ofs(bit_ndx) != ofs) { + ofs = bit_ofs(bit_ndx); + v = ofs < rec->id_len ? rec->id[ofs] : 0; + } + + edge = ptrs(cur) + (v & cur->bit_mask ? 1 : -1); + } while ((cur = *edge) && cur->bit_ndx > bit_ndx); + + *parentp = + (struct pat_node *) + ((unsigned char *)edge - (v & bit_mask(bit_ndx) ? + offsetof(struct pat_node, ptrs_[2]) + : offsetof(struct pat_node, ptrs_[0]))); + *edgep = edge; + return cur; +} + +static inline struct pat_node *walk_up(unsigned diff_ndx, struct pat_node **parent) +{ + /* + Walk the chain of parent pointers starting with *parent until a node + is found whose parent has a bit_ndx smaller than diff_ndx. Return + a pointer to this node and update *parent to point to its parent. + */ + struct pat_node *p, *np; + + np = *parent; + + while ((p = *ptrs(np)) && p->bit_ndx > diff_ndx) + np = p; + + *parent = p; + return np; +} + +/*** bit fiddling */ +static inline unsigned first_set_bit_in_char(unsigned v) +{ + return ffs(v) - 1; +} + +static int find_first_diff_bit(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + /* + Return the bit_ndx of the first differing bit in + r0->id and r1->id or -1 if the strings are identical. + */ + struct uid_db_record const *long_id; + unsigned ofs, max; + unsigned char v; + + max = r0->id_len; + if (max > r1->id_len) { + max = r1->id_len; + long_id = r0; + } else + long_id = r1; + + ofs = 0; + do + v = r0->id[ofs] ^ r1->id[ofs]; + while (!v && ++ofs < max); + + if (!v) { + if (r0->id_len == r1->id_len) return -1; + v = long_id->id[ofs]; + } + + return first_set_bit_in_char(v) + ofs * 8; +} + +static inline unsigned bit_set(unsigned bit_ndx, struct uid_db_record const *rec) +{ + /* + Return non-zero if the bit corresponding with bit_ndx is set + in rec->id + */ + unsigned ofs; + + ofs = bit_ofs(bit_ndx); + if (ofs >= rec->id_len) return 0; + return rec->id[ofs] & bit_mask(bit_ndx); +} + +/*** node allocation */ +static struct pat_node *get_pat_node(struct uid_db_record *rec) +{ + /* + Allocate a pat_node, set its rec pointer to rec and the + next pointer of rec to NULL. Return pointer to the pat_node. + */ + struct pat_node *np; + + np = xmalloc(sizeof(*np)); + np->rec = rec; + rec->next = NULL; + return np; +} + +static struct pat_node *get_standalone_node(struct uid_db_record *rec) +{ + /* + Return a pat_node suitable for being inserted on the 'left edge' + of the trie, ie either linked to a node whose left pointer was zero + or being inserted as root node into an empty trie. The bit_ndx of + the pat_node is set to the index corresponding with the highest + set bit in rec->id. + + NB: This is a bad choice when UIDs share a common prefix because + this implies that the root node will cause a bit to be tested which + is non-zero in all other nodes, adding a theoretically redundant + level to the trie. This is (to the best of my knowledge) un- + fortunately unavoidable if nodes with different key lengths need + to be supported. + */ + struct pat_node *np; + + np = get_pat_node(rec); + np->bit_ndx = first_set_bit_in_char(*rec->id); + np->bit_mask = bit_mask(np->bit_ndx); + return np; +} + +/*** various helpers */ +static inline int record_id_equal(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + return + r0->id_len == r1->id_len + && memcmp(r0->id, r1->id, r0->id_len) == 0; +} + +static struct uid_db_record *append_to_list(struct uid_db_record **recp, + struct uid_db_record *rec) +{ + /* + Append the uid_db_record pointed to by rec to the uid_db_record + list accessible as *recp and return rec. + */ + while (*recp) recp = &(*recp)->next; + *recp = rec; + + rec->next = NULL; + return rec; +} + +/*** insert routine */ +static struct uid_db_record *pat_insert(struct uid_db *db, + struct uid_db_record *rec) +{ + /* + Insert the record pointed to by rec in the (potentially empty) + PATRICIA trie pointed to by db->pat_root and return rec. + */ + struct pat_node *np, *closest, *parent, **edge; + int me, bit_ndx; + + if (!db->pat_root) { + np = get_standalone_node(rec); + ptrs(np)[-1] = *ptrs(np) = NULL; + ptrs(np)[1] = np; + + db->pat_root = np; + return rec; + } + + closest = walk_down(db, rec, &edge, &parent); + + if (closest) { + bit_ndx = find_first_diff_bit(closest->rec, rec); + if (bit_ndx < 0) + return append_to_list(&closest->rec->next, rec); + + np = get_pat_node(rec); + np->bit_ndx = bit_ndx; + np->bit_mask = bit_mask(bit_ndx); + } else + np = get_standalone_node(rec); + + if (parent->bit_ndx > np->bit_ndx) { + closest = walk_up(np->bit_ndx, &parent); + + if (!parent) edge = &db->pat_root; + else edge = ptrs(parent)[-1] == closest ? + ptrs(parent) - 1 : ptrs(parent) + 1; + *ptrs(closest) = np; + } + + *edge = np; + *ptrs(np) = parent; + + me = bit_set(np->bit_ndx, rec) ? 1 : -1; + ptrs(np)[me] = np; + ptrs(np)[-me] = closest; + + return rec; +} + +/** general db insertion */ +static struct uid_db_record *get_uid_db_record(char const *id, unsigned status) +{ + /* + Allocate a uid_db_record structure and set its id pointer to a + dynamically allocated copy of id. The status member of the + new record is set to status and its message number to zero (invalid). + A pointer to it is then returned. + */ + struct uid_db_record *rec; + size_t id_len; + + rec = xmalloc(sizeof(*rec)); + + id_len = strlen(id); + rec->id = memcpy(xmalloc(id_len + 1), id, id_len + 1); + rec->id_len = id_len; + rec->status = status; + rec->num = 0; + + return rec; +} + +static void insert_into_records(struct uid_db *db, + struct uid_db_record *rec) +{ + /* + Insert rec into the records array of the uid_db pointed + to by db. The array is grown as necessary and the + corresponding state variables of the db are updated + accordingly. The pos member of rec is set to its position + in the array. + */ + unsigned next, want; + + next = db->records_next; + + if (next == db->records_max) { + want = db->records_max *= 2; + db->records = xrealloc(db->records, want * sizeof(rec)); + } + + rec->pos = next; + db->records[next] = rec; + db->records_next = next + 1; +} + +struct uid_db_record *uid_db_insert(struct uid_db *db, + char const *id, unsigned status) +{ + /* + Create an uid_db_record whose id is id and whose status is + status and insert it into the uid_db pointed to by db. + Return a pointer to the newly created record. + */ + struct uid_db_record *rec; + + rec = get_uid_db_record(id, status); + insert_into_records(db, rec); + return pat_insert(db, rec); +} + +/** message number index */ +void set_uid_db_num(struct uid_db *db, struct uid_db_record *rec, + unsigned num) +{ + /* + Set the message number of the record pointed to by rec to num + and insert it into the num_ndx of the uid_db pointed to by db + at position corresponding with num. The num_ndx lookup array + is grown as needed. Message numbers are expected to 'generally' + be recorded in ascending order and hence, no provisions are + made to deal with the potentially quadratic complexity of + inserting a sequence of numbers into an array such that it + needs to be grown continuously. + */ + struct num_ndx *num_ndx; + unsigned have, want; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value > num) { + have = num_ndx->pos_0_value - num_ndx->end_value + 1; + want = num_ndx->pos_0_value - num + 1; + num_ndx->end_value = num; + + num_ndx->records = xrealloc(num_ndx->records, want * sizeof(rec)); + do num_ndx->records[--want] = NULL; while (want > have); + } + + num_ndx->records[uid_db_num_ofs(num_ndx, num)] = rec; +} + +void reset_uid_db_nums(struct uid_db *db) +{ + /* + Reset the message numbers of all uid_db_records stored + in the uid_db pointed to by db. The corresponding num_ndx + lookup array is afterwards freed and the num_ndx end_value + adjusted in order to indicate an 'empty' message number + index. + */ + struct uid_db_record **rec; + struct num_ndx *num_ndx; + unsigned ndx; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value < num_ndx->pos_0_value) { + ndx = num_ndx->pos_0_value - num_ndx->end_value; + while (ndx) { + rec = num_ndx->records + --ndx; + if (*rec) (*rec)->num = 0; + } + + num_ndx->end_value = num_ndx->pos_0_value + 1; + + free(num_ndx->records); + num_ndx->records = NULL; + } +} + +/** search routines */ +struct uid_db_record *find_uid_by_id(struct uid_db *db, char const *id) +{ + /* + Search for an uid_db_record whose id is id in the uid_db pointed + to by db and return a pointer to it or NULL if no such record was + found. + */ + struct pat_node *np; + struct uid_db_record *rec; + unsigned v, bit_ndx, ofs; + size_t len; + + np = db->pat_root; + if (np) { + len = strlen(id); + ofs = -1; + do { + bit_ndx = np->bit_ndx; + + if (bit_ofs(bit_ndx) != ofs) { + ofs = bit_ofs(bit_ndx); + v = ofs < len ? id[ofs] : 0; + } + + np = ptrs(np)[v & np->bit_mask ? 1 : -1]; + } while (np && np->bit_ndx > bit_ndx); + + if (!np) return NULL; + + rec = np->rec; + return rec->id_len == len && memcmp(id, rec->id, len) == 0 ? + rec : NULL; + } + + return NULL; +} + +struct uid_db_record *last_uid_in_db(struct uid_db *db, char const *id) +{ + /* + Return a pointer to the 'last' (insert order) uid_db_record + contained in the uid_db pointed to by db whose id is id or + NULL if no such record exists. + */ + struct uid_db_record *rec; + + rec = find_uid_by_id(db, id); + if (!rec) return NULL; + + while (rec->next) rec = rec->next; + return rec; +} + +/** destruction */ +static void free_uid_list(struct uid_db_record *rec) +{ + /* + Free the list of uid_db_records starting with + the record pointed to by rec. + */ + if (rec->next) free_uid_list(rec->next); + + xfree(rec->id); + xfree(rec); +} + +static void free_pat_trie(struct pat_node *np) +{ + /* + Free the PATRCIA-trie pointed to by np and all + uid_db_records contained in it. + + The algorithm implemented below is: + + 1. Load the left pointer of the node pointed to by + np into next. + + 2. If the result is not NULL, + 2a) Set the left pointer to NULL. + 2b) Goto 1 if next points to a child of np. + + 3. Load the right pointer of the node pointed to by + np into next. + + 4. If the result is not NULL, + 4a) Set the right pointer to NULL. + 4b) Goto 1 id next points to a child of np. + + 5. Load next with the parent pointer of np. + + 6. Free np->rec and np. + + 7. Set np to next and goto 1 if it is not null. + */ + struct pat_node *next; + + do { + next = ptrs(np)[-1]; + if (next) { + ptrs(np)[-1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = ptrs(np)[1]; + if (next) { + ptrs(np)[1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = *ptrs(np); + + free_uid_list(np->rec); + free(np); + } while ((np = next)); +} + +void free_uid_db(struct uid_db *db) +{ + /* + Free all dynamically allocated memory of the uid_db + pointed to by db. The structure is not reinitialized. + */ + if (db->pat_root) free_pat_trie(db->pat_root); + + xfree(db->records); + xfree(db->num_ndx.records); +} + +/** various public interfaces */ +void init_uid_db(struct uid_db *db) +{ + /* + Initialize the uid_db structure pointed to by db 'properly' + such that it represents an empty database. An array of + size MIN_RECORDS is allocated and assigned to db->records. + */ + struct num_ndx *num_ndx; + + db->pat_root = NULL; + + db->records = xmalloc(MIN_RECORDS * sizeof(*db->records)); + db->records_max = MIN_RECORDS; + db->records_next = 0; + + num_ndx = &db->num_ndx; + num_ndx->pos_0_value = num_ndx->end_value = -1; + num_ndx->records = NULL; +} + +void swap_uid_db_data(struct uid_db *db_0, struct uid_db *db_1) +{ + struct uid_db tmp; + + tmp = *db_0; + *db_0 = *db_1; + *db_1 = tmp; +} + +int traverse_uid_db(struct uid_db *db, + uid_db_traversal_routine *r, void *arg) +{ + /* + Traverses the struct uid_db records array in insert order, + invoking the subroutine pointed to by r with a pointer to + each record and the arg pointer as arguments. If the return + value of that is non-zero, traverse_uid_db immediately returns + with this value. Otherwise, zero is returned after the last + record was visited. + + The uid_db_traversal_routine must not modify the uid_db during + traversal. + */ + struct uid_db_record **recs; + unsigned ndx, max; + int rc; + + rc = 0; + ndx = 0; + max = db->records_next; + recs = db->records; + while (ndx < max && (rc = r(recs[ndx], arg)) == 0) + ++ndx; + + return rc; +} diff --git a/uid_db.h b/uid_db.h new file mode 100644 index 00000000..f76f740f --- /dev/null +++ b/uid_db.h @@ -0,0 +1,141 @@ +/* + POP3 UID database + + Copyright (c) 2010 MAD Partners, Ltd. (rweikusat@mssgmbh.com) + + This file is being published in accordance with the GPLv2 terms + contained in the COPYING file being part of the fetchmail + 6.3.17 release, including the OpenSSL exemption. +*/ +#ifndef fetchmail_uid_db_h +#define fetchmail_uid_db_h + +/* includes */ +#include <stddef.h> + +/* types */ +struct uid_db_record { + char *id; + size_t id_len; + + /* + num - message number assigned by server + status - message status (eg seen, deleted, ...) + pos - position in record list + */ + unsigned num, status, pos; + + struct uid_db_record *next; +}; + +struct num_ndx { + /* + Used to find uid records by message number. + + pos_0_value - highest message number + end_value - lowest known message number + + Grows downwards because the fastuidl-code may record + message numbers in non-ascending order but the + lookup array should ideally only be large enough to + store pointers to interesting ('new') messages. + */ + struct uid_db_record **records; + unsigned pos_0_value, end_value; +}; + +struct uid_db +{ + struct pat_node *pat_root; + + struct uid_db_record **records; + unsigned records_max, records_next; + + struct num_ndx num_ndx; +}; + +typedef int uid_db_traversal_routine(struct uid_db_record *, void *); + +/* routines */ +/** initialization/ finalization */ +void init_uid_db(struct uid_db *db); + +void free_uid_db(struct uid_db *db); + +static inline void clear_uid_db(struct uid_db *db) +{ + free_uid_db(db); + init_uid_db(db); +} + +/** message number index handling */ +static inline unsigned uid_db_num_ofs(struct num_ndx const *num_ndx, unsigned num) +{ + return num_ndx->pos_0_value - num; +} + +void set_uid_db_num(struct uid_db *db, struct uid_db_record *rec, + unsigned num); + +static inline void set_uid_db_num_pos_0(struct uid_db *db, unsigned num) +{ + db->num_ndx.pos_0_value = num; + db->num_ndx.end_value = num + 1; +} + +void reset_uid_db_nums(struct uid_db *db); + +/** various uidl db manipulatiors */ +struct uid_db_record *uid_db_insert(struct uid_db *db, + char const *id, unsigned status); + +void swap_uid_db_data(struct uid_db *db_0, struct uid_db *db_1); + +/** search routines */ +struct uid_db_record *find_uid_by_id(struct uid_db *db, char const *id); + +static inline struct uid_db_record * +find_uid_by_num(struct uid_db *db, unsigned num) +{ + struct num_ndx *num_ndx; + + num_ndx = &db->num_ndx; + return num >= num_ndx->end_value ? + num_ndx->records[uid_db_num_ofs(num_ndx, num)] : NULL; +} + +static inline struct uid_db_record * +find_uid_by_pos(struct uid_db *db, unsigned pos) +{ + return pos < db->records_next ? db->records[pos] : NULL; +} + +static inline struct uid_db_record * +first_uid_in_db(struct uid_db *db, char const *id) +{ + return find_uid_by_id(db, id); +} + +struct uid_db_record *last_uid_in_db(struct uid_db *db, char const *id); + +/** various accessors */ +static inline unsigned uid_db_n_records(struct uid_db const *db) +{ + return db->records_next; +} + +/* + Traverses the struct uid_db records array in insert order, + invoking the subroutine pointed to by r with a pointer to + each record and the arg pointer as arguments. If the return + value of that is non-zero, traverse_uid_db immediately returns + with this value. Otherwise, zero is returned after the last + record was visited. + + The uid_db_traversal_routine must not modify the uid_db during + traversal. +*/ +int traverse_uid_db(struct uid_db *db, + uid_db_traversal_routine *r, void *arg); + +#endif |