# Add undocumented SCAN extension to Cyrus to keep PINE/ALPINE happy Index: imap/Makefile.in =================================================================== RCS file: /cvs/src/cyrus/imap/Makefile.in,v retrieving revision 1.191 diff -u -d -r1.191 Makefile.in --- imap/Makefile.in 18 Oct 2007 18:48:02 -0000 1.191 +++ imap/Makefile.in 3 Jan 2008 15:19:13 -0000 @@ -99,7 +99,8 @@ LOBJS= append.o mailbox.o mboxlist.o mupdate-client.o mboxname.o message.o \ global.o imap_err.o mupdate_err.o proc.o setproctitle.o \ convert_code.o duplicate.o saslclient.o saslserver.o signals.o \ - annotate.o search_engines.o squat.o squat_internal.o mbdump.o \ + annotate.o search_engines.o scan_engines.o \ + squat.o squat_internal.o mbdump.o \ imapparse.o telemetry.o user.o notify.o protocol.o idle.o quota_db.o \ sync_log.o $(SEEN) mboxkey.o backend.o tls.o message_guid.o Index: imap/imapd.c =================================================================== RCS file: /cvs/src/cyrus/imap/imapd.c,v retrieving revision 1.537 diff -u -d -r1.537 imapd.c --- imap/imapd.c 27 Nov 2007 15:31:28 -0000 1.537 +++ imap/imapd.c 3 Jan 2008 15:19:14 -0000 @@ -233,6 +233,7 @@ void cmd_reconstruct(const char *tag, const char *name, int recursive); void cmd_find(char *tag, char *namespace, char *pattern); void cmd_list(char *tag, int listopts, char *reference, char *pattern); +void cmd_scan(char *tag, char *reference, char *pattern, char *content); void cmd_changesub(char *tag, char *namespace, char *name, int add); void cmd_getacl(const char *tag, const char *name); void cmd_listrights(char *tag, char *name, char *identifier); @@ -301,6 +302,7 @@ static int mailboxdata(char *name, int matchlen, int maycreate, void *rock); static int listdata(char *name, int matchlen, int maycreate, void *rock); +static int scandata(char *name, int matchlen, int maycreate, void *rock); static void mstringdata(char *cmd, char *name, int matchlen, int maycreate, int listopts); @@ -1731,6 +1733,18 @@ snmp_increment(STATUS_COUNT, 1); } + else if (!strcmp(cmd.s, "Scan")) { + c = getastring(imapd_in, imapd_out, &arg1); + if (c != ' ') goto missingargs; + c = getastring(imapd_in, imapd_out, &arg2); + if (c != ' ') goto missingargs; + c = getastring(imapd_in, imapd_out, &arg3); + if (c == '\r') c = prot_getc(imapd_in); + if (c != '\n') goto extraargs; + cmd_scan(tag.s, arg1.s, arg2.s, arg3.s); + + snmp_increment(SCAN_COUNT, 1); + } else goto badcmd; break; @@ -2594,6 +2608,7 @@ if (idle_enabled()) { prot_printf(imapd_out, " IDLE"); } + prot_printf(imapd_out, " SCAN"); #ifdef ENABLE_LISTEXT prot_printf(imapd_out, " LISTEXT LIST-SUBSCRIBED"); @@ -5879,6 +5894,91 @@ } /* + * Perform a SCAN command (ghastly undocumented IMAP extension!) + */ +void cmd_scan(char *tag, char *reference, char *pattern, char *contents) +{ + char *buf = NULL; + int patlen = 0; + int reflen = 0; + static int ignorereference = 0; + clock_t start = clock(); + char mytime[100]; + int (*findall)(struct namespace *namespace, const char *pattern, + int isadmin, char *userid, + struct auth_state *auth_state, int (*proc)(), + void *rock); + + /* Ignore the reference argument? + (the behavior in 1.5.10 & older) */ + if (ignorereference == 0) { + ignorereference = config_getswitch(IMAPOPT_IGNOREREFERENCE); + } + + /* Reset state in mstringdata */ + mstringdata(NULL, NULL, 0, 0, 0); + + if (!pattern[0]) { + /* Special case: query top-level hierarchy separator */ + prot_printf(imapd_out, "* LIST (\\Noselect) \"%c\" \"\"\r\n", + imapd_namespace.hier_sep); + } else { + /* Do we need to concatenate fields? */ + if (!ignorereference || pattern[0] == imapd_namespace.hier_sep) { + /* Either + * - name begins with dot + * - we're configured to honor the reference argument */ + + /* Allocate a buffer, figure out how to stick the arguments + together, do it, then do that instead of using pattern. */ + patlen = strlen(pattern); + reflen = strlen(reference); + + buf = xmalloc(patlen + reflen + 1); + buf[0] = '\0'; + + if (*reference) { + /* check for LIST A. .B, change to LIST "" A.B */ + if (reference[reflen-1] == imapd_namespace.hier_sep && + pattern[0] == imapd_namespace.hier_sep) { + reference[--reflen] = '\0'; + } + strcpy(buf, reference); + } + strcat(buf, pattern); + pattern = buf; + } + + /* Translate any separators in pattern */ + mboxname_hiersep_tointernal(&imapd_namespace, pattern, 0); + + /* Check to see if we should only list the personal namespace */ + if (!strcmp(pattern, "*") && + config_getswitch(IMAPOPT_FOOLSTUPIDCLIENTS)) { + if (buf) free(buf); + buf = xstrdup("INBOX*"); + pattern = buf; + findall = mboxlist_findall; + } + else { + findall = imapd_namespace.mboxlist_findall; + } + + (*findall)(&imapd_namespace, pattern, + imapd_userisadmin, imapd_userid, imapd_authstate, + scandata, &contents); + + scandata((char *)0, 0, 0, &contents); + + if (buf) free(buf); + } + snprintf(mytime, sizeof(mytime), "%2.3f", + (clock() - start) / (double) CLOCKS_PER_SEC); + prot_printf(imapd_out, "%s OK %s (%s secs %d calls)\r\n", tag, + error_message(IMAP_OK_COMPLETED), mytime, mstringdatacalls); +} + +/* * Perform a SUBSCRIBE (add is nonzero) or * UNSUBSCRIBE (add is zero) command */ @@ -9393,6 +9493,52 @@ return 0; } +/* + * Issue a SCAN untagged response + */ +static int scandata(char *name, int matchlen, int maycreate, void *rock) +{ + char mailboxname[MAX_MAILBOX_NAME+1]; + char *contents = *((char **)rock); + static struct mailbox mailbox; + int r = 0; + int doclose = 0; + + if (name == NULL) { + /* Forces flush for any cached data */ + mstringdata("LIST", name, matchlen, maycreate, 0); + return(0); + } + + /* YYY Better way of doing this? Surely can't be the only place in + the code which has to fight silly INBOX. semi-internal notation */ + if (!strcasecmp(name, "INBOX")) + snprintf(mailboxname, sizeof(mailboxname), "user.%s", imapd_userid); + else if (!strncasecmp(name, "INBOX.", 6)) + snprintf(mailboxname, sizeof(mailboxname), + "user.%s.%s", imapd_userid, name+6); + + if (!r) r = mailbox_open_header(mailboxname, imapd_authstate, &mailbox); + + if (!r) { + doclose = 1; + r = mailbox_open_index(&mailbox); + } + + if (!r && !(mailbox.myrights & ACL_READ)) { + r = (imapd_userisadmin || (mailbox.myrights & ACL_LOOKUP)) ? + IMAP_PERMISSION_DENIED : IMAP_MAILBOX_NONEXISTENT; + } + + if (!r && (index_scan(&mailbox, contents) > 0)) + mstringdata("LIST", name, matchlen, maycreate, 0); + + if (doclose) + mailbox_close(&mailbox); + + return(0); +} + /* Reset the given sasl_conn_t to a sane state */ static int reset_saslconn(sasl_conn_t **conn) { Index: imap/imapd.h =================================================================== RCS file: /cvs/src/cyrus/imap/imapd.h,v retrieving revision 1.67 diff -u -d -r1.67 imapd.h --- imap/imapd.h 26 Oct 2007 15:31:40 -0000 1.67 +++ imap/imapd.h 3 Jan 2008 15:19:14 -0000 @@ -255,6 +255,7 @@ extern int index_search(struct mailbox *mailbox, struct searchargs *searchargs, int usinguid); extern int find_thread_algorithm(char *arg); +extern int index_scan(struct mailbox *mailbox, char *contents); extern int index_sort(struct mailbox *mailbox, struct sortcrit *sortcrit, struct searchargs *searchargs, int usinguid); extern int index_thread(struct mailbox *mailbox, int algorithm, Index: imap/index.c =================================================================== RCS file: /cvs/src/cyrus/imap/index.c,v retrieving revision 1.238 diff -u -d -r1.238 index.c --- imap/index.c 11 Dec 2007 12:00:13 -0000 1.238 +++ imap/index.c 3 Jan 2008 15:19:15 -0000 @@ -72,6 +72,7 @@ #include "message.h" #include "parseaddr.h" #include "search_engines.h" +#include "scan_engines.h" #include "seen.h" #include "strhash.h" #include "stristr.h" @@ -1034,6 +1035,87 @@ return r; } +static int +index_scan_work(const char *s, unsigned long len, + const char *match, unsigned long min) +{ + while (len > min) { + if (!strncasecmp(s, match, min)) + return(1); + s++; + len--; + } + return(0); +} + +/* + * Guts of the SCAN command, lifted from _index_search() + * + * Returns number of hits + */ +int +index_scan(mailbox, contents) +struct mailbox *mailbox; +char *contents; +{ + unsigned *msgno_list; + unsigned msgno; + struct mapfile msgfile; + int n = 0; + int listindex; + int listcount; + struct searchargs searchargs; + struct strlist strlist; + struct index_record record; + unsigned long length; + + if (!(contents && contents[0])) + return(0); + + if (mailbox->exists <= 0) + return 0; + + length = strlen(contents); + + memset(&searchargs, 0, sizeof(struct searchargs)); + searchargs.text = &strlist; + + /* Charset 3 is ISO-8859-1 */ + strlist.s = charset_convert(contents, 3, NULL, 0); + strlist.p = charset_compilepat(strlist.s); + strlist.next = NULL; + + msgno_list = (unsigned *) xmalloc(mailbox->exists * sizeof(unsigned)); + + listcount = scan_prefilter_messages(msgno_list, mailbox, &searchargs); + + for (listindex = 0; listindex < listcount; listindex++) { + msgno = msgno_list[listindex]; + + if (mailbox_read_index_record(mailbox, msgno, &record)) + continue; + + msgfile.base = 0; + msgfile.size = 0; + + if (mailbox_map_message(mailbox, record.uid, + &msgfile.base, &msgfile.size)) + continue; + + if (index_scan_work(msgfile.base, msgfile.size, contents, length)) + n++; + + mailbox_unmap_message(mailbox, record.uid, + &msgfile.base, &msgfile.size); + } + + free(strlist.s); + free(strlist.p); + free(msgno_list); + + return(n); +} + /* * Guts of the SEARCH command. * Index: imap/scan_engines.c =================================================================== RCS file: imap/scan_engines.c diff -N imap/scan_engines.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ imap/scan_engines.c 3 Jan 2008 15:19:15 -0000 @@ -0,0 +1,358 @@ +/* scan_engines.c -- Prefiltering routines for SCAN */ +/* Largely lifted from ./search_engines.c */ + +/* DPC DANGER DANGER DANGER DPC + * + * Several references to imapd_exists in search_engines.c need to be + * mailbox->exists in this file. Otherwise everything compiles okay, but + * you end up with: + * + * a) No results if no mailbox selected (imapd_exists == -1) or small + * b) Trampling over unalloced memory when imapd_exists > mailbox->exists + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "imapd.h" +#include "global.h" +#include "xmalloc.h" +#include "xstrlcpy.h" +#include "xstrlcat.h" + +#include "squat.h" + +static unsigned +my_getuid(struct mailbox *mailbox, unsigned long index) +{ + struct index_record record; + + if (mailbox_read_index_record(mailbox, index, &record)) + return(0); + + return(record.uid); +} + +static unsigned +my_finduid(struct mailbox *mailbox, unsigned uid) +{ + int low=1, high=mailbox->exists, mid = 1; + unsigned miduid; + struct index_record record; + + while (low <= high) { + mid = (high - low)/2 + low; + + if (mailbox_read_index_record(mailbox, mid, &record)) + return(-1); + + miduid = record.uid; + + if (miduid == uid) { + return mid; + } + else if (miduid > uid) { + high = mid - 1; + } + else { + low = mid + 1; + } + } + return high; + +} + +typedef struct { + unsigned char* vector; + struct mailbox* mailbox; + char const* part_types; + int found_validity; +} SquatSearchResult; + +static int +vector_len(struct mailbox *mailbox) { + return ((mailbox->exists) >> 3) + 1; +} + +/* The document name is of the form + +pnnn.vvv + +Where p is a part_type character (denoting which segment of the message +is represented by the document), nnn is the UID of the message, and vvv +is the UID validity value. + +This function parses the document name and returns the message +UID only if the name has the right part type and it corresponds +to a real message UID. +*/ +static int parse_doc_name(struct mailbox *mailbox, + SquatSearchResult* r, char const* doc_name) { + int ch = doc_name[0]; + char const* t = r->part_types; + unsigned doc_UID; + unsigned index; + + if (ch == 'v' && strncmp(doc_name, "validity.", 9) == 0) { + if ((unsigned)atoi(doc_name + 9) == r->mailbox->uidvalidity) { + r->found_validity = 1; + } + return -1; + } + + /* make sure that the document part type is one of the ones we're + accepting */ + while (*t != 0 && *t != ch) { + t++; + } + if (*t == 0) { + return -1; + } + + doc_UID = atoi(++doc_name); + while ((*doc_name >= '0' && *doc_name <= '9') || *doc_name == '-') { + ++doc_name; + } + if (*doc_name != 0) { + return -1; + } + + /* now we need to convert the UID to the message sequence number */ + index = my_finduid(mailbox, doc_UID); + if (index < 1 || index > mailbox->exists || + doc_UID != my_getuid(mailbox, index)) { + return(-1); + } + return(index); +} + +static int +drop_indexed_docs(void* closure, SquatListDoc const* doc) +{ + SquatSearchResult* r = (SquatSearchResult*)closure; + int doc_ID = parse_doc_name(r->mailbox, r, doc->doc_name); + + if (doc_ID >= 0) { + unsigned char* vect = r->vector; + vect[doc_ID >> 3] &= ~(1 << (doc_ID & 0x7)); + } + return SQUAT_CALLBACK_CONTINUE; +} + +static int +fill_with_hits(void* closure, char const* doc) +{ + SquatSearchResult* r = (SquatSearchResult*)closure; + int doc_ID = parse_doc_name(r->mailbox, r, doc); + + if (doc_ID >= 0) { + unsigned char* vect = r->vector; + vect[doc_ID >> 3] |= 1 << (doc_ID & 0x7); + } + return SQUAT_CALLBACK_CONTINUE; +} + +static int +search_strlist(SquatSearchIndex* index, struct mailbox* mailbox, + unsigned char* output, unsigned char* tmp, struct strlist* strs, + char const* part_types) +{ + SquatSearchResult r; + int i; + int len = vector_len(mailbox); + + r.part_types = part_types; + r.vector = tmp; + r.mailbox = mailbox; + while (strs != NULL) { + char const* s = strs->s; + + memset(tmp, 0, len); + if (squat_search_execute(index, s, strlen(s), fill_with_hits, &r) + != SQUAT_OK) { + syslog(LOG_DEBUG, "SQUAT string list search failed on string %s " + "with part types %s", s, part_types); + return 0; + } + for (i = 0; i < len; i++) { + output[i] &= tmp[i]; + } + + strs = strs->next; + } + return 1; +} + +static unsigned char* search_squat_do_query(SquatSearchIndex* index, + struct mailbox* mailbox, struct searchargs* args) { + int vlen = vector_len(mailbox); + unsigned char* vect = xmalloc(vlen); + unsigned char* t_vect = xmalloc(vlen); + unsigned char* result = vect; + struct searchsub* sub; + + memset(vect, 255, vlen); + + if (!(search_strlist(index, mailbox, vect, t_vect, args->to, "t") + && search_strlist(index, mailbox, vect, t_vect, args->from, "f") + && search_strlist(index, mailbox, vect, t_vect, args->cc, "c") + && search_strlist(index, mailbox, vect, t_vect, args->bcc, "b") + && search_strlist(index, mailbox, vect, t_vect, args->subject, "s") + && search_strlist(index, mailbox, vect, t_vect, args->header_name, "h") + && search_strlist(index, mailbox, vect, t_vect, args->header, "h") + && search_strlist(index, mailbox, vect, t_vect, args->body, "m") + && search_strlist(index, mailbox, vect, t_vect, args->text, "mh"))) { + result = NULL; + goto cleanup; + } + + sub = args->sublist; + while (sub != NULL) { + if (args->sublist->sub2 == NULL) { + /* do nothing; because our search is conservative (may include false + positives) we can't compute the NOT (since the result might include + false negatives, which we do not allow) */ + /* Note that it's OK to do nothing. We'll just be returning more + false positives. */ + } else { + unsigned char* sub1_vect = + search_squat_do_query(index, mailbox, args->sublist->sub1); + unsigned char* sub2_vect; + int i; + + if (sub1_vect == NULL) { + result = NULL; + goto cleanup; + } + + sub2_vect + = search_squat_do_query(index, mailbox, args->sublist->sub2); + + if (sub2_vect == NULL) { + result = NULL; + free(sub1_vect); + goto cleanup; + } + + for (i = 0; i < vlen; i++) { + vect[i] &= sub1_vect[i] | sub2_vect[i]; + } + + free(sub1_vect); + free(sub2_vect); + } + + sub = sub->next; + } + + cleanup: + free(t_vect); + if (result != vect) { + free(vect); + } + + return result; +} + +static int search_squat(unsigned *msg_list, struct mailbox *mailbox, + struct searchargs *searchargs) +{ + char index_file_name[MAX_MAILBOX_PATH+1], *path; + int fd; + SquatSearchIndex* index; + unsigned char* msg_vector; + int result; + + path = mailbox->mpath && + (config_metapartition_files & IMAP_ENUM_METAPARTITION_FILES_SQUAT) ? + mailbox->mpath : mailbox->path; + strlcpy(index_file_name, path, sizeof(index_file_name)); + strlcat(index_file_name, FNAME_SQUAT_INDEX, sizeof(index_file_name)); + if ((fd = open(index_file_name, O_RDONLY)) < 0) { + syslog(LOG_DEBUG, "SQUAT failed to open index file"); + return -1; /* probably not found. Just bail */ + } + if ((index = squat_search_open(fd)) == NULL) { + syslog(LOG_DEBUG, "SQUAT failed to open index"); + close(fd); + return -1; + } + if ((msg_vector = search_squat_do_query(index, mailbox, searchargs)) + == NULL) { + result = -1; + } else { + unsigned i; + unsigned vlen = vector_len(mailbox); + unsigned char* unindexed_vector = xmalloc(vlen); + SquatSearchResult r; + + memset(unindexed_vector, 255, vlen); + r.vector = unindexed_vector; + r.mailbox = mailbox; + r.part_types = "tfcbsmh"; + r.found_validity = 0; + if (squat_search_list_docs(index, drop_indexed_docs, &r) + != SQUAT_OK) { + syslog(LOG_DEBUG, "SQUAT failed to get list of indexed documents"); + result = -1; + } else if (!r.found_validity) { + syslog(LOG_DEBUG, "SQUAT didn't find validity record"); + result = -1; + } else { + /* Add in any unindexed messages. They must be searched manually. */ + for (i = 0; i < vlen; i++) { + msg_vector[i] |= unindexed_vector[i]; + } + + result = 0; + for (i = 1; i <= mailbox->exists; i++) { + if ((msg_vector[i >> 3] & (1 << (i & 7))) != 0) { + msg_list[result] = i; + result++; + } + } + } + free(msg_vector); + free(unindexed_vector); + } + squat_search_close(index); + close(fd); + return result; +} + +int +scan_prefilter_messages(unsigned* msg_list, struct mailbox *mailbox, + struct searchargs *searchargs) +{ + unsigned i; + int count; + + if (SQUAT_ENGINE) { + count = search_squat(msg_list, mailbox, searchargs); + if (count >= 0) { + syslog(LOG_DEBUG, "SQUAT returned %d messages", count); + return count; + } else { + /* otherwise, we failed for some reason, so do the default */ + syslog(LOG_DEBUG, "SQUAT failed"); + } + } + + /* Just put in all possible messages. This falls back to Cyrus' default + search. */ + for (i = 0; i < mailbox->exists; i++) { + msg_list[i] = i + 1; + } + return(mailbox->exists); +} + Index: imap/scan_engines.h =================================================================== RCS file: imap/scan_engines.h diff -N imap/scan_engines.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ imap/scan_engines.h 3 Jan 2008 15:19:15 -0000 @@ -0,0 +1,17 @@ +/* scan_engines.h -- Prefiltering routines for SCAN */ +/* Lifted from ./search_engines.h */ + +#ifndef INCLUDED_SCAN_ENGINES_H +#define INCLUDED_SCAN_ENGINES_H + +#include "imapd.h" + +/* Fill the msg_list with a list of message IDs which could match the + * searchargs. + * Return the number of message IDs inserted. + */ +extern int scan_prefilter_messages(unsigned* msg_list, + struct mailbox *mailbox, + struct searchargs *searchargs); + +#endif