diff options
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | conf.c | 2 | ||||
-rw-r--r-- | driver.c | 215 | ||||
-rw-r--r-- | etrn.c | 1 | ||||
-rw-r--r-- | fetchmail.c | 25 | ||||
-rw-r--r-- | fetchmail.h | 14 | ||||
-rw-r--r-- | fetchmail.man | 29 | ||||
-rwxr-xr-x | fetchmailconf | 13 | ||||
-rw-r--r-- | imap.c | 32 | ||||
-rw-r--r-- | odmr.c | 1 | ||||
-rw-r--r-- | options.c | 14 | ||||
-rw-r--r-- | pop2.c | 1 | ||||
-rw-r--r-- | pop3.c | 203 | ||||
-rw-r--r-- | rcfile_l.l | 2 | ||||
-rw-r--r-- | rcfile_y.y | 4 | ||||
-rw-r--r-- | uid.c | 103 |
16 files changed, 539 insertions, 121 deletions
@@ -14,6 +14,7 @@ option. * Smash all NULs out of headers right after the socket read. * Dup-killer code now keys on an MD5 hash of the raw headers. +* Sunil Shetye's patches to break up fetching of sizes and UIDLs. fetchmail-6.2.4 (Wed Aug 13 04:27:35 EDT 2003), 22625 lines: @@ -376,6 +376,8 @@ void dump_config(struct runctl *runp, struct query *querylist) numdump("limit", ctl->limit); numdump("warnings", ctl->warnings); numdump("fetchlimit", ctl->fetchlimit); + numdump("fetchsizelimit", ctl->fetchsizelimit); + numdump("fastuidl", ctl->fastuidl); numdump("batchlimit", ctl->batchlimit); #ifdef SSL_ENABLE booldump("ssl", ctl->use_ssl); @@ -415,11 +415,55 @@ static void mark_oversized(struct query *ctl, int num, int size) } static int fetch_messages(int mailserver_socket, struct query *ctl, - int count, int *msgsizes, int *msgcodes, int maxfetch, + int count, int *msgsizes, int maxfetch, int *fetches, int *dispatches, int *deletions) /* fetch messages in lockstep mode */ { - int num, err, len; + flag force_retrieval; + int num, firstnum = 1, lastnum = 0, err, len; + int fetchsizelimit = ctl->fetchsizelimit; + int msgsize; + + if (ctl->server.base_protocol->getpartialsizes && NUM_NONZERO(fetchsizelimit)) + { + /* for POP3, we can get the size of one mail only! Unfortunately, this + * protocol specific test cannot be done elsewhere as the protocol + * could be "auto". */ + if (ctl->server.protocol == P_POP3) + fetchsizelimit = 1; + + /* Time to allocate memory to store the sizes */ + xalloca(msgsizes, int *, sizeof(int) * fetchsizelimit); + } + + /* + * What forces this code is that in POP2 and + * IMAP2bis you can't fetch a message without + * having it marked `seen'. In POP3 and IMAP4, on the + * other hand, you can (peek_capable is set by + * each driver module to convey this; it's not a + * method constant because of the difference between + * IMAP2bis and IMAP4, and because POP3 doesn't peek + * if fetchall is on). + * + * The result of being unable to peek is that if there's + * any kind of transient error (DNS lookup failure, or + * sendmail refusing delivery due to process-table limits) + * the message will be marked "seen" on the server without + * having been delivered. This is not a big problem if + * fetchmail is running in foreground, because the user + * will see a "skipped" message when it next runs and get + * clued in. + * + * But in daemon mode this leads to the message + * being silently ignored forever. This is not + * acceptable. + * + * We compensate for this by checking the error + * count from the previous pass and forcing all + * messages to be considered new if it's nonzero. + */ + force_retrieval = !peek_capable && (ctl->errcount > 0); for (num = 1; num <= count; num++) { @@ -427,30 +471,82 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, flag suppress_forward = FALSE; flag suppress_readbody = FALSE; flag retained = FALSE; + int msgcode = MSGLEN_UNKNOWN; - if (msgcodes[num-1] < 0) + /* check if the message is old + * Note: the size of the message may not be known here */ + if (ctl->fetchall || force_retrieval) + ; + else if (ctl->server.base_protocol->is_old && (ctl->server.base_protocol->is_old)(mailserver_socket,ctl,num)) + msgcode = MSGLEN_OLD; + if (msgcode == MSGLEN_OLD) { - if ((msgcodes[num-1] == MSGLEN_TOOLARGE) && !check_only) - { - mark_oversized(ctl, num, msgsizes[num-1]); - suppress_delete = TRUE; - } /* To avoid flooding the syslog when using --keep, * report "Skipped message" only when: * 1) --verbose is on, or - * 2) fetchmail does not use syslog, or - * 3) the message was skipped for some other - * reason than being old. + * 2) fetchmail does not use syslog */ if ( (outlevel >= O_VERBOSE) || - (outlevel > O_SILENT && (!run.use_syslog || msgcodes[num-1] != MSGLEN_OLD)) + (outlevel > O_SILENT && !run.use_syslog) ) { report_build(stdout, + GT_("skipping message %s@%s:%d"), + ctl->remotename, ctl->server.truename, num); + } + + goto flagthemail; + } + + if (ctl->server.base_protocol->getpartialsizes && NUM_NONZERO(fetchsizelimit) && + lastnum < num) + { + /* Instead of getting the sizes of all mails at the start, we get + * the sizes in blocks of fetchsizelimit. This leads to better + * performance when there are too many mails (say, 10000) in + * the mailbox and either we are not getting all the mails at + * one go (--fetchlimit 100) or there is a frequent socket + * error while just getting the sizes of all mails! */ + + int i; + int oldstage = stage; + firstnum = num; + lastnum = num + fetchsizelimit - 1; + if (lastnum > count) + lastnum = count; + for (i = 0; i < fetchsizelimit; i++) + msgsizes[i] = 0; + + stage = STAGE_GETSIZES; + err = (ctl->server.base_protocol->getpartialsizes)(mailserver_socket, num, lastnum, msgsizes); + if (err != 0) + return err; + stage = oldstage; + } + + msgsize = msgsizes ? msgsizes[num-firstnum] : 0; + + /* check if the message is oversized */ + if (NUM_NONZERO(ctl->limit) && (msgsize > ctl->limit)) + msgcode = MSGLEN_TOOLARGE; +/* else if (msgsize == 512) + msgcode = MSGLEN_OLD; (hmh) sample code to skip message */ + + if (msgcode < 0) + { + if ((msgcode == MSGLEN_TOOLARGE) && !check_only) + { + mark_oversized(ctl, num, msgsize); + suppress_delete = TRUE; + } + if (outlevel > O_SILENT) + { + /* old messages are already handled above */ + report_build(stdout, GT_("skipping message %s@%s:%d (%d octets)"), ctl->remotename, ctl->server.truename, num, - msgsizes[num-1]); - switch (msgcodes[num-1]) + msgsize); + switch (msgcode) { case MSGLEN_INVALID: /* @@ -483,7 +579,7 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, report(stdout, GT_("couldn't fetch headers, message %s@%s:%d (%d octets)\n"), ctl->remotename, ctl->server.truename, num, - msgsizes[num-1]); + msgsize); continue; } else if (err != 0) @@ -492,7 +588,7 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, /* -1 means we didn't see a size in the response */ if (len == -1) { - len = msgsizes[num - 1]; + len = msgsize; wholesize = TRUE; } @@ -515,7 +611,7 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, * Read the message headers and ship them to the * output sink. */ - err = readheaders(mailserver_socket, len, msgsizes[num-1], + err = readheaders(mailserver_socket, len, msgsize, ctl, num, /* pass the suppress_readbody flag only if the underlying * protocol does not fetch the body separately */ @@ -573,7 +669,7 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, * string. This violates RFC2060. */ if (len == -1) - len = msgsizes[num-1] - msgblk.msglen; + len = msgsize - msgblk.msglen; if (outlevel > O_SILENT && !wholesize) report_complete(stdout, GT_(" (%d body octets) "), len); @@ -630,13 +726,13 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, * QUALCOMM server (at least) seems to be * reporting the on-disk size correctly. */ - if (msgblk.msglen != msgsizes[num-1]) + if (msgblk.msglen != msgsize) { if (outlevel >= O_DEBUG) report(stdout, GT_("message %s@%s:%d was not the expected length (%d actual != %d expected)\n"), ctl->remotename, ctl->server.truename, num, - msgblk.msglen, msgsizes[num-1]); + msgblk.msglen, msgsize); } /* end-of-message processing starts here */ @@ -649,6 +745,7 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, (*fetches)++; } +flagthemail: /* * At this point in flow of control, either * we've bombed on a protocol error or had @@ -668,9 +765,8 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, } else if (ctl->server.base_protocol->delete && !suppress_delete - && ((msgcodes[num-1] >= 0 && !ctl->keep) - || (msgcodes[num-1] == MSGLEN_OLD|| - msgcodes[num-1] == MSGLEN_TOOLARGE) && ctl->flush)) + && ((msgcode >= 0 && !ctl->keep) + || (msgcode == MSGLEN_OLD && ctl->flush))) { (*deletions)++; if (outlevel > O_SILENT) @@ -689,14 +785,14 @@ static int fetch_messages(int mailserver_socket, struct query *ctl, * 3) the message was skipped for some other * reason than just being old. */ - (outlevel > O_SILENT && (!run.use_syslog || msgcodes[num-1] != MSGLEN_OLD)) + (outlevel > O_SILENT && (!run.use_syslog || msgcode != MSGLEN_OLD)) ) report_complete(stdout, GT_(" not flushed\n")); /* maybe we mark this message as seen now? */ if (ctl->server.base_protocol->mark_seen && !suppress_delete - && (msgcodes[num-1] >= 0 && ctl->keep)) + && (msgcode >= 0 && ctl->keep)) { err = (ctl->server.base_protocol->mark_seen)(mailserver_socket, ctl, num); if (err != 0) @@ -827,7 +923,6 @@ const int maxfetch; /* maximum number of messages to fetch */ char buf[MSGBUFSIZE+1], *realhost; int count, new, bytes, deletions = 0; int *msgsizes = (int *)NULL; - int *msgcodes = (int *)NULL; #if INET6_ENABLE int fetches, dispatches, oldphase; #else /* INET6_ENABLE */ @@ -1302,37 +1397,7 @@ is restored.")); } else if (count > 0) { - flag force_retrieval; - int i, num; - - /* - * What forces this code is that in POP2 and - * IMAP2bis you can't fetch a message without - * having it marked `seen'. In POP3 and IMAP4, on the - * other hand, you can (peek_capable is set by - * each driver module to convey this; it's not a - * method constant because of the difference between - * IMAP2bis and IMAP4, and because POP3 doesn't peek - * if fetchall is on). - * - * The result of being unable to peek is that if there's - * any kind of transient error (DNS lookup failure, or - * sendmail refusing delivery due to process-table limits) - * the message will be marked "seen" on the server without - * having been delivered. This is not a big problem if - * fetchmail is running in foreground, because the user - * will see a "skipped" message when it next runs and get - * clued in. - * - * But in daemon mode this leads to the message - * being silently ignored forever. This is not - * acceptable. - * - * We compensate for this by checking the error - * count from the previous pass and forcing all - * messages to be considered new if it's nonzero. - */ - force_retrieval = !peek_capable && (ctl->errcount > 0); + int i; /* * Don't trust the message count passed by the server. @@ -1347,23 +1412,23 @@ is restored.")); return(PS_PROTOCOL); } - /* OK, we're going to gather size info next */ - xalloca(msgsizes, int *, sizeof(int) * count); - xalloca(msgcodes, int *, sizeof(int) * count); - for (i = 0; i < count; i++) { - msgsizes[i] = 0; - msgcodes[i] = MSGLEN_UNKNOWN; - } - /* * We need the size of each message before it's * loaded in order to pass it to the ESMTP SIZE * option. If the protocol has a getsizes method, * we presume this means it doesn't get reliable * sizes from message fetch responses. + * + * If the protocol supports getting sizes of subset of + * messages, we skip this step now. */ - if (proto->getsizes) + if (proto->getsizes && + !(proto->getpartialsizes && NUM_NONZERO(ctl->fetchsizelimit))) { + xalloca(msgsizes, int *, sizeof(int) * count); + for (i = 0; i < count; i++) + msgsizes[i] = 0; + stage = STAGE_GETSIZES; err = (proto->getsizes)(mailserver_socket, count, msgsizes); if (err != 0) @@ -1377,25 +1442,12 @@ is restored.")); } } - /* mark some messages not to be retrieved */ - for (num = 1; num <= count; num++) - { - if (NUM_NONZERO(ctl->limit) && (msgsizes[num-1] > ctl->limit)) - msgcodes[num-1] = MSGLEN_TOOLARGE; - else if (ctl->fetchall || force_retrieval) - continue; - else if (ctl->server.base_protocol->is_old && (ctl->server.base_protocol->is_old)(mailserver_socket,ctl,num)) - msgcodes[num-1] = MSGLEN_OLD; -/* else if (msgsizes[num-1] == 512) - msgcodes[num-1] = MSGLEN_OLD; (hmh) sample code to skip message */ - } - /* read, forward, and delete messages */ stage = STAGE_FETCH; /* fetch in lockstep mode */ err = fetch_messages(mailserver_socket, ctl, - count, msgsizes, msgcodes, + count, msgsizes, maxfetch, &fetches, &dispatches, &deletions); if (err) @@ -1565,7 +1617,8 @@ const struct method *proto; /* protocol method table */ * If no expunge limit or we do expunges within the driver, * then just do one session, passing in any fetchlimit. */ - if (proto->retry || !NUM_SPECIFIED(ctl->expunge)) + if ((ctl->keep && !ctl->flush) || + proto->retry || !NUM_SPECIFIED(ctl->expunge)) return(do_session(ctl, proto, NUM_VALUE_OUT(ctl->fetchlimit))); /* * There's an expunge limit, and it isn't handled in the driver itself. @@ -135,6 +135,7 @@ const static struct method etrn = NULL, /* no need to get authentication */ etrn_getrange, /* initialize message sending */ NULL, /* we cannot get a list of sizes */ + NULL, /* we cannot get a list of sizes of subsets */ NULL, /* how do we tell a message is old? */ NULL, /* no way to fetch headers */ NULL, /* no way to fetch body */ diff --git a/fetchmail.c b/fetchmail.c index 6b26a6b2..2901bb67 100644 --- a/fetchmail.c +++ b/fetchmail.c @@ -641,13 +641,20 @@ int main(int argc, char **argv) continue; #endif /* (defined(linux) && !INET6_ENABLE) || defined(__FreeBSD__) */ + dofastuidl = 0; /* this is reset in the driver if required */ + querystatus = query_host(ctl); + if (NUM_NONZERO(ctl->fastuidl)) + ctl->fastuidlcount = (ctl->fastuidlcount + 1) % ctl->fastuidl; #ifdef POP3_ENABLE /* leave the UIDL state alone if there have been any errors */ if (!check_only && ((querystatus==PS_SUCCESS) || (querystatus==PS_NOMAIL) || (querystatus==PS_MAXFETCH))) uid_swap_lists(ctl); + else + uid_discard_new_list(ctl); + uid_reset_num(ctl); #endif /* POP3_ENABLE */ if (querystatus == PS_SUCCESS) @@ -872,6 +879,8 @@ static void optmerge(struct query *h2, struct query *h1, int force) FLAG_MERGE(limit); FLAG_MERGE(warnings); FLAG_MERGE(fetchlimit); + FLAG_MERGE(fetchsizelimit); + FLAG_MERGE(fastuidl); FLAG_MERGE(batchlimit); #ifdef SSL_ENABLE FLAG_MERGE(use_ssl); @@ -910,6 +919,8 @@ static int load_params(int argc, char **argv, int optind) def_opts.warnings = WARNING_INTERVAL; def_opts.remotename = user; def_opts.listener = SMTP_MODE; + def_opts.fetchsizelimit = 100; + def_opts.fastuidl = 10; /* get the location of rcfile */ rcfiledir[0] = 0; @@ -1632,6 +1643,20 @@ static void dump_params (struct runctl *runp, ctl->fetchlimit, ctl->fetchlimit); else if (outlevel >= O_VERBOSE) printf(GT_(" No received-message limit (--fetchlimit 0).\n")); + if (NUM_NONZERO(ctl->fetchsizelimit)) + printf(GT_(" Fetch message size limit is %d (--fetchsizelimit %d).\n"), + ctl->fetchsizelimit, ctl->fetchsizelimit); + else if (outlevel >= O_VERBOSE) + printf(GT_(" No fetch message size limit (--fetchsizelimit 0).\n")); + if (NUM_NONZERO(ctl->fastuidl) && MAILBOX_PROTOCOL(ctl)) + { + if (ctl->fastuidl == 1) + printf(GT_(" Do binary search of UIDs during each poll (--fastuidl 1).\n")); + else + printf(GT_(" Do binary search of UIDs during %d out of %d polls (--fastuidl %d).\n"), ctl->fastuidl - 1, ctl->fastuidl, ctl->fastuidl); + } + else if (outlevel >= O_VERBOSE) + printf(GT_(" Do linear search of UIDs during each poll (--fastuidl 0).\n")); if (NUM_NONZERO(ctl->batchlimit)) printf(GT_(" SMTP message batch limit is %d.\n"), ctl->batchlimit); else if (outlevel >= O_VERBOSE) diff --git a/fetchmail.h b/fetchmail.h index 6665ab06..ac89f42c 100644 --- a/fetchmail.h +++ b/fetchmail.h @@ -147,7 +147,7 @@ struct idlist { struct { - short num; + int num; flag mark; /* UID-index information */ #define UID_UNSEEN 0 /* hasn't been seen */ #define UID_SEEN 1 /* seen, but not deleted */ @@ -182,6 +182,8 @@ struct method /* describe methods for protocol state machine */ /* get message range to fetch */ int (*getsizes)(int, int, int *); /* get sizes of messages */ + int (*getpartialsizes)(int, int, int, int *); + /* get sizes of subset of messages */ int (*is_old)(int, struct query *, int); /* check for old message */ int (*fetch_headers)(int, struct query *, int, int *); @@ -289,6 +291,9 @@ struct query int limit; /* limit size of retrieved messages */ int warnings; /* size warning interval */ int fetchlimit; /* max # msgs to get in single poll */ + int fetchsizelimit; /* max # msg sizes to get in a request */ + int fastuidl; /* do binary search for new UIDLs? */ + int fastuidlcount; /* internal count for frequency of binary search */ int batchlimit; /* max # msgs to pass in single SMTP session */ int expunge; /* max # msgs to pass between expunges */ flag use_ssl; /* use SSL encrypted session */ @@ -511,6 +516,8 @@ unsigned char *reply_hack(unsigned char *, const unsigned char *, int *); unsigned char *nxtaddr(const unsigned char *); /* uid.c: UID support */ +extern int dofastuidl; + void initialize_saved_lists(struct query *, const char *); struct idlist *save_str(struct idlist **, const char *, flag); void free_str_list(struct idlist **); @@ -518,17 +525,20 @@ struct idlist *copy_str_list(struct idlist *idl); void save_str_pair(struct idlist **, const char *, const char *); void free_str_pair_list(struct idlist **); int delete_str(struct idlist **, long); -int str_in_list(struct idlist **, const char *, const flag); +struct idlist *str_in_list(struct idlist **, const char *, const flag); int str_nr_in_list(struct idlist **, const char *); int str_nr_last_in_list(struct idlist **, const char *); void str_set_mark( struct idlist **, const char *, const flag); int count_list( struct idlist **idl ); char *str_from_nr_list( struct idlist **idl, long number ); char *str_find(struct idlist **, long); +struct idlist *id_find(struct idlist **idl, long); char *idpair_find(struct idlist **, const char *); void append_str_list(struct idlist **, struct idlist **); void expunge_uids(struct query *); void uid_swap_lists(struct query *); +void uid_discard_new_list(struct query *ctl); +void uid_reset_num(struct query *ctl); void write_saved_lists(struct query *, const char *); /* rcfile_y.y */ diff --git a/fetchmail.man b/fetchmail.man index 06dc9b41..8d29fefb 100644 --- a/fetchmail.man +++ b/fetchmail.man @@ -487,6 +487,29 @@ poll. By default there is no limit. An explicit --fetchlimit of 0 overrides any limits set in your run control file. This option does not work with ETRN or ODMR. .TP +.B \-\-fetchsizelimit <number> +(Keyword: fetchsizelimit) +Limit the number of sizes of messages accepted from a given server in +a single transaction. This option is useful in reducing the delay in +downloading the first mail when there are too many mails in the +mailbox. By default, the limit is 100. If set to 0, sizes of all +messages are downloaded at the start. +This option does not work with ETRN or ODMR. For POP3, the only valid +non-zero value is 1. +.TP +.B \-\-fastuidl <number> +(Keyword: fastuidl) +Do a binary instead of linear search for the first unseen UID. Binary +search avoids downloading the UIDs of all mails. This saves time +(especially in daemon mode) where downloading the same set of UIDs in +each poll is a waste of bandwidth. The number `n' indicates how rarely +a linear search should be done. In daemon mode, linear search is used +once followed by binary searches in `n-1' polls if `n' is greater than +1; binary search is always used if `n' is 1; linear search is always +used if `n' is 0. In non-daemon mode, binary search is used if `n' is +1; otherwise linear search is used. +This option works with POP3 only. +.TP .B \-e <count> | \-\-expunge <count> (keyword: expunge) Arrange for deletions to be made final after a given number of @@ -1479,6 +1502,12 @@ T} fetchlimit -B T{ Max # messages to fetch in single connect T} +fetchsizelimit \& T{ +Max # message sizes to fetch in single transaction +T} +fastuidl \& T{ +Use binary search for first unseen message (POP3 only) +T} expunge -e T{ Perform an expunge on every #th message (IMAP and POP3 only) T} diff --git a/fetchmailconf b/fetchmailconf index 88564c56..ef41a5df 100755 --- a/fetchmailconf +++ b/fetchmailconf @@ -250,6 +250,8 @@ class User: self.limit = 0 # Message size limit self.warnings = 3600 # Size warning interval (see tunable.h) self.fetchlimit = 0 # Max messages fetched per batch + self.fetchsizelimit = 100 # Max message sizes fetched per transaction + self.fastuidl = 10 # Do fast uidl 9 out of 10 times self.batchlimit = 0 # Max message forwarded per batch self.expunge = 0 # Interval between expunges (IMAP) self.ssl = 0 # Enable Seccure Socket Layer @@ -287,6 +289,8 @@ class User: ('limit', 'Int'), ('warnings', 'Int'), ('fetchlimit', 'Int'), + ('fetchsizelimit', 'Int'), + ('fastuidl', 'Int'), ('batchlimit', 'Int'), ('expunge', 'Int'), ('ssl', 'Boolean'), @@ -347,6 +351,10 @@ class User: res = res + " warnings " + `self.warnings` if self.fetchlimit != UserDefaults.fetchlimit: res = res + " fetchlimit " + `self.fetchlimit` + if self.fetchsizelimit != UserDefaults.fetchsizelimit: + res = res + " fetchsizelimit " + `self.fetchsizelimit` + if self.fastuidl != UserDefaults.fastuidl: + res = res + " fastuidl " + `self.fastuidl` if self.batchlimit != UserDefaults.batchlimit: res = res + " batchlimit " + `self.batchlimit` if self.ssl and self.ssl != UserDefaults.ssl: @@ -1701,6 +1709,11 @@ class UserEdit(Frame, MyWidget): self.warnings, '30').pack(side=TOP, fill=X) LabeledEntry(limwin, 'Max messages to fetch per poll:', self.fetchlimit, '30').pack(side=TOP, fill=X) + LabeledEntry(limwin, 'Max message sizes to fetch per transaction:', + self.fetchsizelimit, '30').pack(side=TOP, fill=X) + if self.parent.server.protocol not in ('ETRN', 'ODMR'): + LabeledEntry(limwin, 'Use fast UIDL:', + self.fastuidl, '30').pack(side=TOP, fill=X) LabeledEntry(limwin, 'Max messages to forward per poll:', self.batchlimit, '30').pack(side=TOP, fill=X) if self.parent.server.protocol not in ('ETRN', 'ODMR'): @@ -785,8 +785,8 @@ static int imap_getrange(int sock, return(PS_SUCCESS); } -static int imap_getsizes(int sock, int count, int *sizes) -/* capture the sizes of all messages */ +static int imap_getpartialsizes(int sock, int first, int last, int *sizes) +/* capture the sizes of messages #first-#last */ { char buf [MSGBUFSIZE+1]; @@ -824,14 +824,15 @@ static int imap_getsizes(int sock, int count, int *sizes) * on the fact that the sizes array has been preinitialized with a * known-bad size value. */ - /* if fetchall is specified, startcount is 1; - * else if there is new mail, startcount is first unseen message; - * else startcount is greater than count. - */ - if (count == startcount) - gen_send(sock, "FETCH %d RFC822.SIZE", count); - else if (count > startcount) - gen_send(sock, "FETCH %d:%d RFC822.SIZE", startcount, count); + + /* expunges change the fetch numbers */ + first -= expunged; + last -= expunged; + + if (last == first) + gen_send(sock, "FETCH %d RFC822.SIZE", last); + else if (last > first) + gen_send(sock, "FETCH %d:%d RFC822.SIZE", first, last); else /* no unseen messages! */ return(PS_SUCCESS); for (;;) @@ -852,8 +853,8 @@ static int imap_getsizes(int sock, int count, int *sizes) break; else if (sscanf(buf, "* %u FETCH (RFC822.SIZE %u)", &num, &size) == 2) { - if (num > 0 && num <= count) - sizes[num - 1] = size; + if (num >= first && num <= last) + sizes[num - first] = size; else report(stderr, "Warning: ignoring bogus data for message sizes returned by the server.\n"); } @@ -862,6 +863,12 @@ static int imap_getsizes(int sock, int count, int *sizes) return(PS_SUCCESS); } +static int imap_getsizes(int sock, int count, int *sizes) +/* capture the sizes of all messages */ +{ + return imap_getpartialsizes(sock, 1, count, sizes); +} + static int imap_is_old(int sock, struct query *ctl, int number) /* is the given message old? */ { @@ -1119,6 +1126,7 @@ const static struct method imap = imap_getauth, /* get authorization */ imap_getrange, /* query range of messages */ imap_getsizes, /* get sizes of messages (used for ESMTP SIZE option) */ + imap_getpartialsizes, /* get sizes of subset of messages (used for ESMTP SIZE option) */ imap_is_old, /* no UID check */ imap_fetch_headers, /* request given message headers */ imap_fetch_body, /* request given message body */ @@ -224,6 +224,7 @@ const static struct method odmr = NULL, /* no need to get authentication */ odmr_getrange, /* initialize message sending */ NULL, /* we cannot get a list of sizes */ + NULL, /* we cannot get a list of sizes of subsets */ NULL, /* how do we tell a message is old? */ NULL, /* no way to fetch headers */ NULL, /* no way to fetch body */ @@ -86,6 +86,8 @@ #define LA_SSLFINGERPRINT 60 #endif +#define LA_FETCHSIZELIMIT 61 +#define LA_FASTUIDL 62 /* options still left: CDgGhHjJoORwWxXYz */ static const char *shortoptions = @@ -140,6 +142,8 @@ static const struct option longoptions[] = { {"batchlimit",required_argument, (int *) 0, LA_BATCHLIMIT }, {"fetchlimit",required_argument, (int *) 0, LA_FETCHLIMIT }, + {"fetchsizelimit",required_argument, (int *) 0, LA_FETCHSIZELIMIT }, + {"fastuidl", required_argument, (int *) 0, LA_FASTUIDL }, {"expunge", required_argument, (int *) 0, LA_EXPUNGE }, {"mda", required_argument, (int *) 0, LA_MDA }, {"bsmtp", required_argument, (int *) 0, LA_BSMTP }, @@ -529,6 +533,14 @@ struct query *ctl; /* option record to be initialized */ c = xatoi(optarg, &errflag); ctl->fetchlimit = NUM_VALUE_IN(c); break; + case LA_FETCHSIZELIMIT: + c = xatoi(optarg, &errflag); + ctl->fetchsizelimit = NUM_VALUE_IN(c); + break; + case LA_FASTUIDL: + c = xatoi(optarg, &errflag); + ctl->fastuidl = NUM_VALUE_IN(c); + break; case 'e': case LA_EXPUNGE: c = xatoi(optarg, &errflag); @@ -705,6 +717,8 @@ struct query *ctl; /* option record to be initialized */ P(GT_(" -Z, --antispam, set antispam response values\n")); P(GT_(" -b, --batchlimit set batch limit for SMTP connections\n")); P(GT_(" -B, --fetchlimit set fetch limit for server connections\n")); + P(GT_(" --fetchsizelimit set fetch message size limit\n")); + P(GT_(" --fastuidl do a binary search for UIDLs\n")); P(GT_(" -e, --expunge set max deletions between expunges\n")); P(GT_(" -m, --mda set MDA to use for forwarding\n")); P(GT_(" --bsmtp set output BSMTP file\n")); @@ -141,6 +141,7 @@ const static struct method pop2 = pop2_getauth, /* get authorization */ pop2_getrange, /* query range of messages */ NULL, /* no way to get sizes */ + NULL, /* no way to get sizes of subsets */ NULL, /* messages are always new */ pop2_fetch, /* request given message */ NULL, /* no way to fetch body alone */ @@ -613,6 +613,77 @@ static int pop3_gettopid( int sock, int num , char *id) return 0; } +static int pop3_getuidl( int sock, int num , char *id) +{ + int ok; + char buf [POPBUFSIZE+1]; + gen_send(sock, "UIDL %d", num); + if ((ok = pop3_ok(sock, buf)) != 0) + return(ok); + if (sscanf(buf, "%d %s", &num, id) != 2) + return(PS_PROTOCOL); + return(PS_SUCCESS); +} + +static int pop3_fastuidl( int sock, struct query *ctl, unsigned int count, int *newp) +{ + int ok; + unsigned int first_nr, last_nr, try_nr; + char id [IDLEN+1]; + + first_nr = 0; + last_nr = count + 1; + while (first_nr < last_nr - 1) + { + struct idlist *new; + + try_nr = (first_nr + last_nr) / 2; + if( (ok = pop3_getuidl( sock, try_nr, id )) != 0 ) + return ok; + if ((new = str_in_list(&ctl->oldsaved, id, FALSE))) + { + flag mark = new->val.status.mark; + if (mark == UID_DELETED || mark == UID_EXPUNGED) + { + if (outlevel >= O_VERBOSE) + report(stderr, GT_("id=%s (num=%d) was deleted, but is still present!\n"), id, try_nr); + /* just mark it as seen now! */ + new->val.status.mark = mark = UID_SEEN; + } + + /* narrow the search region! */ + if (mark == UID_UNSEEN) + { + if (outlevel >= O_DEBUG) + report(stdout, GT_("%u is unseen\n"), try_nr); + last_nr = try_nr; + } + else + first_nr = try_nr; + + /* save the number */ + new->val.status.num = try_nr; + } + else + { + if (outlevel >= O_DEBUG) + report(stdout, GT_("%u is unseen\n"), try_nr); + last_nr = try_nr; + + /* save it */ + new = save_str(&ctl->oldsaved, id, UID_UNSEEN); + new->val.status.num = try_nr; + } + } + if (outlevel >= O_DEBUG && last_nr <= count) + report(stdout, GT_("%u is first unseen\n"), last_nr); + + /* update last! */ + *newp = count - first_nr; + last = first_nr; + return 0; +} + static int pop3_slowuidl( int sock, struct query *ctl, int *countp, int *newp) { /* This approach tries to get the message headers from the @@ -737,8 +808,23 @@ static int pop3_getrange(int sock, *newp = -1; if (*countp > 0 && !ctl->fetchall) { + int fastuidl; char id [IDLEN+1]; + /* should we do fast uidl this time? */ + fastuidl = ctl->fastuidl; + if (*countp > 7 && /* linear search is better if there are few mails! */ + !ctl->flush && /* with flush, it is safer to disable fastuidl */ + NUM_NONZERO (fastuidl)) + { + if (fastuidl == 1) + dofastuidl = 1; + else + dofastuidl = ctl->fastuidlcount != 0; + } + else + dofastuidl = 0; + if (!ctl->server.uidl) { gen_send(sock, "LAST"); ok = pop3_ok(sock, buf); @@ -755,6 +841,8 @@ static int pop3_getrange(int sock, } else { + if (dofastuidl) + return(pop3_fastuidl( sock, ctl, *countp, newp)); /* grab the mailbox's UID list */ if ((ok = gen_transact(sock, "UIDL")) != 0) { @@ -776,17 +864,41 @@ static int pop3_getrange(int sock, break; else if (sscanf(buf, "%d %s", &num, id) == 2) { - struct idlist *new; + struct idlist *old, *new; new = save_str(&ctl->newsaved, id, UID_UNSEEN); new->val.status.num = num; - if (str_in_list(&ctl->oldsaved, id, FALSE)) { - new->val.status.mark = UID_SEEN; - str_set_mark(&ctl->oldsaved, id, UID_SEEN); + if ((old = str_in_list(&ctl->oldsaved, id, FALSE))) + { + flag mark = old->val.status.mark; + if (mark == UID_DELETED || mark == UID_EXPUNGED) + { + if (outlevel >= O_VERBOSE) + report(stderr, GT_("id=%s (num=%d) was deleted, but is still present!\n"), id, num); + /* just mark it as seen now! */ + old->val.status.mark = mark = UID_SEEN; + } + new->val.status.mark = mark; + if (mark == UID_UNSEEN) + { + (*newp)++; + if (outlevel >= O_DEBUG) + report(stdout, GT_("%u is unseen\n"), num); + } } else + { (*newp)++; + if (outlevel >= O_DEBUG) + report(stdout, GT_("%u is unseen\n"), num); + /* add it to oldsaved also! In case, we do not + * swap the lists (say, due to socket error), + * the same mail will not be downloaded again. + */ + old = save_str(&ctl->oldsaved, id, UID_UNSEEN); + old->val.status.num = num; + } } } } @@ -796,6 +908,32 @@ static int pop3_getrange(int sock, return(PS_SUCCESS); } +static int pop3_getpartialsizes(int sock, int first, int last, int *sizes) +/* capture the size of message #first */ +{ + int ok; + char buf [POPBUFSIZE+1]; + unsigned int num, size; + + /* for POP3, we can get the size of one mail only! */ + if (first != last) + { + report(stderr, "cannot get a range of message sizes (%d-%d).\n", first, last); + return(PS_PROTOCOL); + } + gen_send(sock, "LIST %d", first); + if ((ok = pop3_ok(sock, buf)) != 0) + return(ok); + if (sscanf(buf, "%u %u", &num, &size) == 2) { + if (num == first) + sizes[0] = size; + else + /* warn about possible attempt to induce buffer overrun */ + report(stderr, "Warning: ignoring bogus data for message sizes returned by server.\n"); + } + return(ok); +} + static int pop3_getsizes(int sock, int count, int *sizes) /* capture the sizes of all messages */ { @@ -829,11 +967,42 @@ static int pop3_getsizes(int sock, int count, int *sizes) static int pop3_is_old(int sock, struct query *ctl, int num) /* is the given message old? */ { + struct idlist *new; if (!ctl->oldsaved) return (num <= last); + else if (dofastuidl) + { + char id [IDLEN+1]; + + if (num <= last) + return(TRUE); + + /* in fast uidl, we manipulate the old list only! */ + + if ((new = id_find(&ctl->oldsaved, num))) + { + /* we already have the id! */ + return(new->val.status.mark != UID_UNSEEN); + } + + /* get the uidl first! */ + if (pop3_getuidl(sock, num, id) != PS_SUCCESS) + return(TRUE); + + if ((new = str_in_list(&ctl->oldsaved, id, FALSE))) { + /* we already have the id! */ + new->val.status.num = num; + return(new->val.status.mark != UID_UNSEEN); + } + + /* save it */ + new = save_str(&ctl->oldsaved, id, UID_UNSEEN); + new->val.status.num = num; + return(FALSE); + } else - return (str_in_list(&ctl->oldsaved, - str_find(&ctl->newsaved, num), FALSE)); + return ((new = id_find(&ctl->newsaved, num)) != NULL && + new->val.status.mark != UID_UNSEEN); } #ifdef UNUSED @@ -944,17 +1113,16 @@ static int pop3_fetch(int sock, struct query *ctl, int number, int *lenp) static void mark_uid_seen(struct query *ctl, int number) /* Tell the UID code we've seen this. */ { - if (ctl->newsaved) - { - struct idlist *sdp; + struct idlist *sdp; - for (sdp = ctl->newsaved; sdp; sdp = sdp->next) - if (sdp->val.status.num == number) - { - sdp->val.status.mark = UID_SEEN; - save_str(&ctl->oldsaved, sdp->id,UID_SEEN); - } - } + if ((sdp = id_find(&ctl->newsaved, number))) + sdp->val.status.mark = UID_SEEN; + /* mark it as seen in oldsaved also! In case, we do not swap the lists + * (say, due to socket error), the same mail will not be downloaded + * again. + */ + if ((sdp = id_find(&ctl->oldsaved, number))) + sdp->val.status.mark = UID_SEEN; } static int pop3_delete(int sock, struct query *ctl, int number) @@ -966,7 +1134,7 @@ static int pop3_delete(int sock, struct query *ctl, int number) ok = gen_transact(sock, "DELE %d", number); if (ok != PS_SUCCESS) return(ok); - delete_str(&ctl->newsaved, number); + delete_str(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, number); return(PS_SUCCESS); } @@ -1023,6 +1191,7 @@ const static struct method pop3 = pop3_getauth, /* get authorization */ pop3_getrange, /* query range of messages */ pop3_getsizes, /* we can get a list of sizes */ + pop3_getpartialsizes, /* we can get the size of 1 mail */ pop3_is_old, /* how do we tell a message is old? */ pop3_fetch, /* request given message */ NULL, /* no way to fetch body alone */ @@ -124,6 +124,8 @@ plugin { return PLUGIN; } plugout { return PLUGOUT; } batchlimit { return BATCHLIMIT; } fetchlimit { return FETCHLIMIT; } +fetchsizelimit { return FETCHSIZELIMIT; } +fastuidl { return FASTUIDL; } expunge { return EXPUNGE; } properties { return PROPERTIES; } @@ -67,7 +67,7 @@ extern char * yytext; %token SMTPADDRESS SMTPNAME SPAMRESPONSE PRECONNECT POSTCONNECT LIMIT WARNINGS %token NETSEC INTERFACE MONITOR PLUGIN PLUGOUT %token IS HERE THERE TO MAP WILDCARD -%token BATCHLIMIT FETCHLIMIT EXPUNGE PROPERTIES +%token BATCHLIMIT FETCHLIMIT FETCHSIZELIMIT FASTUIDL EXPUNGE PROPERTIES %token SET LOGFILE DAEMON SYSLOG IDFILE INVISIBLE POSTMASTER BOUNCEMAIL %token SPAMBOUNCE SHOWDOTS %token <proto> PROTO AUTHTYPE @@ -374,6 +374,8 @@ user_option : TO localnames HERE | LIMIT NUMBER {current.limit = NUM_VALUE_IN($2);} | WARNINGS NUMBER {current.warnings = NUM_VALUE_IN($2);} | FETCHLIMIT NUMBER {current.fetchlimit = NUM_VALUE_IN($2);} + | FETCHSIZELIMIT NUMBER {current.fetchsizelimit = NUM_VALUE_IN($2);} + | FASTUIDL NUMBER {current.fastuidl = NUM_VALUE_IN($2);} | BATCHLIMIT NUMBER {current.batchlimit = NUM_VALUE_IN($2);} | EXPUNGE NUMBER {current.expunge = NUM_VALUE_IN($2);} @@ -63,9 +63,45 @@ * be picked up by the next run. If there are no un-expunged * messages, the file is deleted. * + * One disadvantage of UIDL is that all the UIDs have to be downloaded + * before a search for new messages can be done. Typically, new messages + * are appended to mailboxes. Hence, downloading all UIDs just to download + * a few new mails is a waste of bandwidth. If new messages are always at + * the end of the mailbox, fast UIDL will decrease the time required to + * download new mails. + * + * During fast UIDL, the UIDs of all messages are not downloaded! The first + * unseen message is searched for by using a binary search on UIDs. UIDs + * after the first unseen message are downloaded as and when needed. + * + * The advantages of fast UIDL are (this is noticeable only when the + * mailbox has too many mails): + * + * - There is no need to download the UIDs of all mails right at the start. + * - There is no need to save all the UIDs in memory separately in + * `newsaved' list. + * - There is no need to download the UIDs of seen mail (except for the + * first binary search). + * - The first new mail is downloaded considerably faster. + * + * The disadvantages are: + * + * - Since all UIDs are not downloaded, it is not possible to swap old and + * new list. The current state of the mailbox is essentially a merged state + * of old and new mails. + * - If an intermediate mail has been temporarily refused (say, due to 4xx + * code from the smtp server), this mail may not get downloaded. + * - If 'flush' is used, such intermediate mails will also get deleted. + * + * The first two disadvantages can be overcome by doing a linear search + * once in a while (say, every 10th poll). Also, with flush, fast UIDL + * should be disabled. + * * Note: some comparisons (those used for DNS address lists) are caseblind! */ +int dofastuidl = 0; + /* UIDs associated with un-queried hosts */ static struct idlist *scratchlist; @@ -305,20 +341,20 @@ void free_str_pair_list(struct idlist **idl) } #endif -int str_in_list(struct idlist **idl, const char *str, const flag caseblind) +struct idlist *str_in_list(struct idlist **idl, const char *str, const flag caseblind) /* is a given ID in the given list? (comparison may be caseblind) */ { struct idlist *walk; if (caseblind) { for( walk = *idl; walk; walk = walk->next ) if( strcasecmp( str, (char *)walk->id) == 0 ) - return 1; + return walk; } else { for( walk = *idl; walk; walk = walk->next ) if( strcmp( str, (char *)walk->id) == 0 ) - return 1; + return walk; } - return 0; + return NULL; } int str_nr_in_list( struct idlist **idl, const char *str ) @@ -389,6 +425,16 @@ char *str_find(struct idlist **idl, long number) return(str_find(&(*idl)->next, number)); } +struct idlist *id_find(struct idlist **idl, long number) +/* return the id of the given number in the given list. */ +{ + struct idlist *idp; + for (idp = *idl; idp; idp = idp->next) + if (idp->val.status.num == number) + return(idp); + return(0); +} + char *idpair_find(struct idlist **idl, const char *id) /* return the id of the given id in the given list (caseblind comparison) */ { @@ -449,7 +495,7 @@ void expunge_uids(struct query *ctl) { struct idlist *idl; - for (idl = ctl->newsaved; idl; idl = idl->next) + for (idl = dofastuidl ? ctl->oldsaved : ctl->newsaved; idl; idl = idl->next) if (idl->val.status.mark == UID_DELETED) idl->val.status.mark = UID_EXPUNGED; } @@ -462,8 +508,11 @@ void uid_swap_lists(struct query *ctl) { struct idlist *idp; - report_build(stdout, GT_("New UID list from %s:"), ctl->server.pollname); - for (idp = ctl->newsaved; idp; idp = idp->next) + if (dofastuidl) + report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); + else + report_build(stdout, GT_("New UID list from %s:"), ctl->server.pollname); + for (idp = dofastuidl ? ctl->oldsaved : ctl->newsaved; idp; idp = idp->next) report_build(stdout, " %s = %d", idp->id, idp->val.status.mark); if (!idp) report_build(stdout, GT_(" <empty>")); @@ -495,10 +544,48 @@ void uid_swap_lists(struct query *ctl) ctl->oldsaved = ctl->newsaved; ctl->newsaved = (struct idlist *) NULL; } - else if (outlevel >= O_DEBUG) + /* in fast uidl, there is no need to swap lists: the old state of + * mailbox cannot be discarded! */ + else if (outlevel >= O_DEBUG && !dofastuidl) report(stdout, GT_("not swapping UID lists, no UIDs seen this query\n")); } +void uid_discard_new_list(struct query *ctl) +/* finish a query which had errors */ +{ + /* debugging code */ + if (ctl->server.uidl && outlevel >= O_DEBUG) + { + struct idlist *idp; + + /* this is now a merged list! the mails which were seen in this + * poll are marked here. */ + report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); + for (idp = ctl->oldsaved; idp; idp = idp->next) + report_build(stdout, " %s = %d", idp->id, idp->val.status.mark); + if (!idp) + report_build(stdout, GT_(" <empty>")); + report_complete(stdout, "\n"); + } + + if (ctl->newsaved) + { + /* new state of mailbox is not reliable */ + if (outlevel >= O_DEBUG) + report(stdout, GT_("discarding new UID list\n")); + free_str_list(&ctl->newsaved); + ctl->newsaved = (struct idlist *) NULL; + } +} + +void uid_reset_num(struct query *ctl) +/* reset the number associated with each id */ +{ + struct idlist *idp; + for (idp = ctl->oldsaved; idp; idp = idp->next) + idp->val.status.num = 0; +} + void write_saved_lists(struct query *hostlist, const char *idfile) /* perform end-of-run write of seen-messages list */ { |