# Add following uuid_mode options to Cyrus: # none, prealloc, shortmd5, md5, shortsha1 diff -udNr cyrus-imapd-2.3.8/imap/append.c cyrus-imapd/imap/append.c --- cyrus-imapd-2.3.8/imap/append.c 2007-02-05 18:41:45.000000000 +0000 +++ cyrus-imapd/imap/append.c 2007-07-12 11:40:12.000000000 +0100 @@ -408,6 +408,7 @@ char stagedir[MAX_MAILBOX_PATH+1], stagefile[MAX_MAILBOX_PATH+1]; FILE *f; int r; + enum enum_value config_uuidmode = config_getenum(IMAPOPT_UUID_MODE); assert(mailboxname != NULL); assert(stagep != NULL); @@ -416,8 +417,10 @@ stage->parts = xzmalloc(5 * (MAX_MAILBOX_PATH+1) * sizeof(char)); stage->partend = stage->parts + 5 * (MAX_MAILBOX_PATH+1) * sizeof(char); - /* Assign new, shared MessageID */ - message_uuid_assign(&stage->uuid); + if (config_uuidmode == IMAP_ENUM_UUID_MODE_PREALLOC) { + /* Assign UUID from per process pool if configured (schema 1) */ + message_uuid_assign(&stage->uuid); + } snprintf(stage->fname, sizeof(stage->fname), "%d-%d-%d", (int) getpid(), (int) internaldate, msgnum); @@ -472,7 +475,7 @@ FILE *destfile; int i, r; int userflag, emptyflag; - + enum enum_value config_uuidmode = config_getenum(IMAPOPT_UUID_MODE); /* for staging */ char stagefile[MAX_MAILBOX_PATH+1]; int sflen; @@ -640,8 +643,10 @@ } } } - /* Copy Message UUID from stage */ - message_uuid_copy(&message_index.uuid, &stage->uuid); + /* Copy UUID from per process pool if configured (schema 1) */ + if (config_uuidmode == IMAP_ENUM_UUID_MODE_PREALLOC) { + message_uuid_copy(&message_index.uuid, &stage->uuid); + } /* Write out index file entry */ r = mailbox_append_index(mailbox, &message_index, @@ -704,6 +709,7 @@ FILE *destfile; int i, r; int userflag, emptyflag; + enum enum_value config_uuidmode = config_getenum(IMAPOPT_UUID_MODE); assert(mailbox->format == MAILBOX_FORMAT_NORMAL); assert(size != 0); @@ -801,8 +807,14 @@ } } - /* Assign new Message-UUID */ - message_uuid_assign(&message_index.uuid); + /* NB: append_fromstage() only used by nntpd. Everything else goes via + * append_newstage() [allocates common UUID] and append_fromstage() + * [copies UUID that was previously defined for stage file] */ + + /* Assign UUID from per process pool if configured (schema 1) */ + if (config_uuidmode == IMAP_ENUM_UUID_MODE_PREALLOC) { + message_uuid_assign(&message_index.uuid); + } /* Write out index file entry; if we abort later, it's not important */ @@ -1004,7 +1016,7 @@ message_index[msg].uid); } - /* Assign messageID for this message */ + /* Message is copy of existing UUID */ message_uuid_copy(&message_index[msg].uuid, ©msg[msg].uuid); } diff -udNr cyrus-imapd-2.3.8/imap/mailbox.c cyrus-imapd/imap/mailbox.c --- cyrus-imapd-2.3.8/imap/mailbox.c 2007-02-05 18:41:47.000000000 +0000 +++ cyrus-imapd/imap/mailbox.c 2007-07-11 18:03:03.000000000 +0100 @@ -1521,7 +1521,7 @@ { unsigned long exists; unsigned msgno; - bit32 oldstart_offset, oldrecord_size, recsize_diff; + bit32 oldstart_offset, oldrecord_size; char buf[INDEX_HEADER_SIZE > INDEX_RECORD_SIZE ? INDEX_HEADER_SIZE : INDEX_RECORD_SIZE]; char *bufp; @@ -1530,13 +1530,16 @@ bit32 numansweredflag = 0; bit32 numdeletedflag = 0; bit32 numflaggedflag = 0; + int old_minor_version = 0; /* Copy existing header so we can upgrade it */ memcpy(buf, index_base, INDEX_HEADER_SIZE); exists = ntohl(*((bit32 *)(buf+OFFSET_EXISTS))); - if (ntohl(*((bit32 *)(buf+OFFSET_MINOR_VERSION))) <= 5) { + old_minor_version = ntohl(*((bit32 *)(buf+OFFSET_MINOR_VERSION))); + + if (old_minor_version <= 5) { quota_offset = sizeof(bit32); /* upgrade quota to 64-bits (bump existing fields) */ memmove(buf+OFFSET_QUOTA_MAILBOX_USED, buf+OFFSET_QUOTA_MAILBOX_USED64, @@ -1545,7 +1548,7 @@ *((bit32 *)(buf+OFFSET_QUOTA_MAILBOX_USED64)) = htonl(0); } - if (ntohl(*((bit32 *)(buf+OFFSET_MINOR_VERSION))) < 8) { + if (old_minor_version < 8) { #ifdef HAVE_LONG_LONG_INT *((bit64 *)(buf+OFFSET_HIGHESTMODSEQ_64)) = htonll(1); #else @@ -1564,7 +1567,14 @@ /* save old record_size; change record_size */ oldrecord_size = ntohl(*((bit32 *)(buf+OFFSET_RECORD_SIZE))); *((bit32 *)(buf+OFFSET_RECORD_SIZE)) = htonl(INDEX_RECORD_SIZE); - recsize_diff = INDEX_RECORD_SIZE - oldrecord_size; + + if (oldrecord_size > INDEX_RECORD_SIZE) { + char *err = xmalloc(MAX_MAILBOX_NAME+128); + snprintf(err, MAX_MAILBOX_NAME+128, + "Mailbox %s needs reconstruct: Record size %d > %d", + mailbox->name, oldrecord_size, INDEX_RECORD_SIZE); + fatal(err, EC_SOFTWARE); + } /* upgrade other fields as necessary */ if (oldstart_offset < OFFSET_POP3_LAST_LOGIN-quota_offset+sizeof(bit32)) { @@ -1607,9 +1617,7 @@ fwrite(buf, 1, INDEX_HEADER_SIZE, newindex); /* Write the rest of new index */ - memset(buf, 0, INDEX_RECORD_SIZE); for (msgno = 1; msgno <= exists; msgno++) { - /* Write the existing (old) part of the index record */ bufp = (char *) (index_base + oldstart_offset + (msgno - 1)*oldrecord_size); @@ -1621,30 +1629,56 @@ if (sysflags & FLAG_FLAGGED) numflaggedflag++; } - fwrite(bufp, oldrecord_size, 1, newindex); + if (old_minor_version == MAILBOX_MINOR_VERSION) { + /* Just copy the original data as is */ + fwrite(bufp, INDEX_RECORD_SIZE, 1, newindex); + continue; + } - if (recsize_diff) { - /* We need to upgrade the index record to include new fields. */ + /* We need to upgrade the index record to include new fields. */ + memset(buf, 0, INDEX_RECORD_SIZE); + memcpy(buf, bufp, oldrecord_size); - /* Currently, this means adding a content_lines placeholder. - * We use BIT32_MAX rather than 0, since a message body can - * be empty. We'll calculate the actual value on demand. - */ - if (oldrecord_size < OFFSET_CONTENT_LINES+sizeof(bit32)) { - *((bit32 *)(buf+OFFSET_CONTENT_LINES)) = htonl(BIT32_MAX); - } + /* Extending UUIDs from 12 to 20 bytes in the middle of the + * index_record means that we really need to use old_minor_version + * rather than oldrecord_size for everything after OFFSET_MESSAGE_UUID. + * + * Use it for content_lines/cache_version above for consistency. + */ - /* Set the initial cache version to 0, that is, with the old - * format of the cached headers */ - if (oldrecord_size < OFFSET_CACHE_VERSION+sizeof(bit32)) { - *((bit32 *)(buf+OFFSET_CACHE_VERSION)) = htonl(0); - } + /* Content_lines added with minor version 5 */ + /* We use BIT32_MAX rather than 0, since a message body can + * be empty. We'll calculate the actual value on demand. + */ + if (old_minor_version < 5) { + *((bit32 *)(buf+OFFSET_CONTENT_LINES)) = htonl(BIT32_MAX); + } - /* Reset undefined MessageUUIDs to NULL value (slow copy) */ - if (oldrecord_size < OFFSET_MESSAGE_UUID+MESSAGE_UUID_PACKED_SIZE) - memset(buf+OFFSET_MESSAGE_UUID, 0, MESSAGE_UUID_PACKED_SIZE); + /* Cache_version added with minor version 6 */ + /* Set the initial cache version to 0, that is, with the old + * format of the cached headers */ + if (old_minor_version < 6) { + *((bit32 *)(buf+OFFSET_CACHE_VERSION)) = htonl(0); + } - /* Set the initial modseq to 1 */ + if (old_minor_version < 7) { + /* 12 byte UUIDs added with minor version 7. Default to NIL UUID */ + memset(buf+OFFSET_MESSAGE_UUID, 0, MESSAGE_UUID_PACKED_SIZE); + } else if (old_minor_version < 10) { + /* UUIDs extended from 12 to 20 bytes with minor version 10 */ + void *src = (buf+OFFSET_MESSAGE_UUID)+12; + void *dst = (buf+OFFSET_MESSAGE_UUID)+20; + int len = INDEX_RECORD_SIZE - (OFFSET_MESSAGE_UUID+20); + + /* Move everything after MESSAGE_UUID up by 8 bytes */ + memmove(dst, src, len); + /* Pad new extended UUID with zeros */ + memset(src, 0, 8); + } + + /* modseq added with minor version 8 */ + /* Set the initial modseq to 1 */ + if (old_minor_version < 8) { if (oldrecord_size < OFFSET_MODSEQ+4) { #ifdef HAVE_LONG_LONG_INT *((bit64 *)(buf+OFFSET_MODSEQ_64)) = htonll(1); @@ -1653,9 +1687,8 @@ *((bit32 *)(buf+OFFSET_MODSEQ)) = htonl(1); #endif } - - fwrite(buf+oldrecord_size, recsize_diff, 1, newindex); } + fwrite(buf, INDEX_RECORD_SIZE, 1, newindex); } if (calculate_flagcounts) { diff -udNr cyrus-imapd-2.3.8/imap/mailbox.h cyrus-imapd/imap/mailbox.h --- cyrus-imapd-2.3.8/imap/mailbox.h 2006-11-30 17:11:19.000000000 +0000 +++ cyrus-imapd/imap/mailbox.h 2007-07-11 11:45:55.000000000 +0100 @@ -86,7 +86,7 @@ #define MAILBOX_FORMAT_NORMAL 0 #define MAILBOX_FORMAT_NETNEWS 1 -#define MAILBOX_MINOR_VERSION 9 +#define MAILBOX_MINOR_VERSION 10 #define MAILBOX_CACHE_MINOR_VERSION 2 #define FNAME_HEADER "/cyrus.header" diff -udNr cyrus-imapd-2.3.8/imap/message.c cyrus-imapd/imap/message.c --- cyrus-imapd-2.3.8/imap/message.c 2007-02-05 18:41:47.000000000 +0000 +++ cyrus-imapd/imap/message.c 2007-07-12 08:57:57.000000000 +0100 @@ -75,6 +75,10 @@ #include "global.h" #include "retry.h" +#include "md5global.h" +#include "md5.h" +#include "sha1.h" + /* Message being parsed */ struct msg { const char *base; @@ -137,6 +141,12 @@ * Cached headers. Only filled in at top-level */ struct ibuf cacheheaders; + + /* MD5 UUID. Only filled in at top level */ + unsigned char msg_md5[16]; + + /* SHA1 UUID. Only filled in at top level */ + unsigned char msg_sha1[20]; }; /* List of Content-type parameters */ @@ -425,6 +435,17 @@ return 0; } +/* Small utility routine to compute message MD5 sum */ +static void +message_md5_calculate(unsigned char *md5, /* 16 bytes */ + unsigned char *source, unsigned len) +{ + MD5_CTX ctx; + + MD5Init(&ctx); + MD5Update(&ctx, source, len); + MD5Final(md5, &ctx); +} /* * Parse the message at 'msg_base' of length 'msg_len' in 'mailbox'. @@ -436,6 +457,7 @@ struct body *body) { struct msg msg; + enum enum_value config_uuidmode = config_getenum(IMAPOPT_UUID_MODE); msg.base = msg_base; msg.len = msg_len; @@ -445,6 +467,15 @@ message_parse_body(&msg, MAILBOX_FORMAT_NORMAL, body, DEFAULT_CONTENT_TYPE, (struct boundary *)0); + if ((config_uuidmode == IMAP_ENUM_UUID_MODE_SHORTMD5) || + (config_uuidmode == IMAP_ENUM_UUID_MODE_MD5)) { + message_md5_calculate(&body->msg_md5[0], + (unsigned char *)msg_base, msg_len); + } + if (config_uuidmode == IMAP_ENUM_UUID_MODE_SHORTSHA1) { + sha1((unsigned char *)msg_base, msg_len, &body->msg_sha1[0]); + } + return 0; } @@ -530,6 +561,7 @@ struct body *body; { int n; + enum enum_value config_uuidmode = config_getenum(IMAPOPT_UUID_MODE); message_index->sentdate = message_parse_date(body->date, 0); message_index->size = body->header_size + body->content_size; @@ -548,6 +580,21 @@ return IMAP_IOERROR; } + /* Copy in MD5 UUID unless UUID already assigned to the message + * (allows parent to decide which source of UUIDs to use) + */ + if ((config_uuidmode == IMAP_ENUM_UUID_MODE_SHORTMD5) && + message_uuid_isnull(&message_index->uuid)) { + message_uuid_shortmd5(&message_index->uuid, &body->msg_md5[0]); + } + if ((config_uuidmode == IMAP_ENUM_UUID_MODE_MD5) && + message_uuid_isnull(&message_index->uuid)) { + message_uuid_md5(&message_index->uuid, &body->msg_md5[0]); + } + if ((config_uuidmode == IMAP_ENUM_UUID_MODE_SHORTSHA1) && + message_uuid_isnull(&message_index->uuid)) { + message_uuid_shortsha1(&message_index->uuid, &body->msg_sha1[0]); + } return 0; } diff -udNr cyrus-imapd-2.3.8/imap/reconstruct.c cyrus-imapd/imap/reconstruct.c --- cyrus-imapd-2.3.8/imap/reconstruct.c 2007-02-05 18:41:48.000000000 +0000 +++ cyrus-imapd/imap/reconstruct.c 2007-07-11 11:33:06.000000000 +0100 @@ -993,6 +993,7 @@ } } + /* NB: message_crease_record will reconstruct MD5 UUID if NULL */ if (((r = message_parse_file(msgfile, NULL, NULL, &body)) != 0) || ((r = message_create_record(&mailbox, &message_index, body)) != 0)) { fclose(msgfile); diff -udNr cyrus-imapd-2.3.8/lib/imapoptions cyrus-imapd/lib/imapoptions --- cyrus-imapd-2.3.8/lib/imapoptions 2007-02-07 18:58:07.000000000 +0000 +++ cyrus-imapd/lib/imapoptions 2007-07-12 11:39:17.000000000 +0100 @@ -1014,6 +1014,9 @@ mailbox hierarchy. The default is to use the netnews separator character '.'. */ +{ "uuid_mode", "prealloc", ENUM("none", "prealloc", "shortmd5", "md5", "shortsha1")} +/* The way to calculate UUIDs */ + { "virtdomains", "off", ENUM("off", "userid", "on") } /* Enable virtual domain support. If enabled, the user's domain will be determined by splitting a fully qualified userid at the last '@' diff -udNr cyrus-imapd-2.3.8/lib/message_uuid.c cyrus-imapd/lib/message_uuid.c --- cyrus-imapd-2.3.8/lib/message_uuid.c 2006-11-30 17:11:22.000000000 +0000 +++ cyrus-imapd/lib/message_uuid.c 2007-07-12 12:10:13.000000000 +0100 @@ -54,11 +54,14 @@ /* Private interface */ -/* 96-bit UUID allocation space divided into 256 possible schemas based +/* 120-bit UUID allocation space divided into 256 possible schemas based * on first byte. Currently two UUID schemas defined: * * Schema 0 => NULL values. * Schema 1 => UUIDs allocated by master process in 2^24 bit chunks. + * Schema 2 => First 11 bytes from MD5(msg body) + * Schema 3 => MD5(msg body) [16 bytes] + * Schema 4 => First 19 bytes from SHA1(msg body) */ static int schema = 0; @@ -381,6 +384,48 @@ return(1); } +/* message_uuid_shortmd5() *********************************************** + * + * Generate schema 2 (shortmd5) UUID from message MD5 + * + ************************************************************************/ + +void +message_uuid_shortmd5(struct message_uuid *uuid, unsigned char *md5) +{ + memset(&uuid->value[0], 0, MESSAGE_UUID_SIZE); + uuid->value[0] = 2; + memcpy(&uuid->value[1], md5, 11); +} + +/* message_uuid_md5() **************************************************** + * + * Generate schema 3 (md5) UUID from message MD5 + * + ************************************************************************/ + +void +message_uuid_md5(struct message_uuid *uuid, unsigned char *md5) +{ + memset(&uuid->value[0], 0, MESSAGE_UUID_SIZE); + uuid->value[0] = 3; + memcpy(&uuid->value[1], md5, 16); +} + +/* message_uuid_shortsha1() ********************************************** + * + * Generate schema 4 (shortsha1) UUID from message SHA1 + * + ************************************************************************/ + +void +message_uuid_shortsha1(struct message_uuid *uuid, unsigned char *sha1) +{ + memset(&uuid->value[0], 0, MESSAGE_UUID_SIZE); + uuid->value[0] = 4; + memcpy(&uuid->value[1], sha1, 19); +} + /* Routines for manipulating packed values */ /* message_uuid_pack() *************************************************** diff -udNr cyrus-imapd-2.3.8/lib/message_uuid.h cyrus-imapd/lib/message_uuid.h --- cyrus-imapd-2.3.8/lib/message_uuid.h 2006-11-30 17:11:22.000000000 +0000 +++ cyrus-imapd/lib/message_uuid.h 2007-07-12 08:54:37.000000000 +0100 @@ -1,9 +1,9 @@ #ifndef MESSAGE_UUID_H /* Public interface */ -#define MESSAGE_UUID_SIZE (12) /* Size of UUID byte sequence */ -#define MESSAGE_UUID_PACKED_SIZE (12) /* Size on disk */ -#define MESSAGE_UUID_TEXT_SIZE (24) /* UUID as hex */ +#define MESSAGE_UUID_SIZE (20) /* Size of UUID byte sequence */ +#define MESSAGE_UUID_PACKED_SIZE (20) /* Size on disk */ +#define MESSAGE_UUID_TEXT_SIZE (40) /* UUID as hex */ struct message_uuid { unsigned char value[MESSAGE_UUID_SIZE]; /* Matches packed encoding */ @@ -52,6 +52,18 @@ /* Routines for manipulating packed values */ +void +message_uuid_shortmd5(struct message_uuid *uuid, unsigned char *md5); + /* Generate UUID from message MD5 */ + +void +message_uuid_md5(struct message_uuid *uuid, unsigned char *md5); + /* Generate UUID from message MD5 */ + +void +message_uuid_shortsha1(struct message_uuid *uuid, unsigned char *sha1); + /* Generate UUID from message SHA1 */ + int message_uuid_pack(struct message_uuid *uuid, char *packed); /* Store Message UID as packed sequence (MESSAGE_UUID_PACKED_SIZE) diff -udNr cyrus-imapd-2.3.8/master/message_uuid_master.c cyrus-imapd/master/message_uuid_master.c --- cyrus-imapd-2.3.8/master/message_uuid_master.c 2006-11-30 17:11:23.000000000 +0000 +++ cyrus-imapd/master/message_uuid_master.c 2007-07-12 12:19:32.000000000 +0100 @@ -95,9 +95,11 @@ * (also allows for overflow from 32 bit time_t if ever an issue). * 3->6 32 bit counter initialised as time that master starts * 7->8 16 bit process counter for UUID range. - * 9-11 24 bit counter for UUID prefix within child process. + * 9-11 24 bit counter for UUID prefix within child process. * (means max 1048576 messages per child process) * + * 12-19: Set to be zero. + * * Numbers stored big-endian. */ @@ -158,6 +160,7 @@ s[10] = (uuid_private.count & 0x00ff00) >> 8; s[11] = (uuid_private.count & 0x0000ff); + memset(&s[12], 0, 8); return(1); } diff -udNr cyrus-imapd-2.3.8/master/message_uuid_master.h cyrus-imapd/master/message_uuid_master.h --- cyrus-imapd-2.3.8/master/message_uuid_master.h 2006-11-30 17:11:23.000000000 +0000 +++ cyrus-imapd/master/message_uuid_master.h 2007-07-12 12:16:21.000000000 +0100 @@ -2,9 +2,9 @@ #define MASTER_UUID_FILE "master_uuid" -#define MESSAGE_UUID_SIZE (12) /* Size of UUID byte sequence */ -#define MESSAGE_UUID_PACKED_SIZE (12) /* Size on disk */ -#define MESSAGE_UUID_TEXT_SIZE (24) /* UUID as hex */ +#define MESSAGE_UUID_SIZE (20) /* Size of UUID byte sequence */ +#define MESSAGE_UUID_PACKED_SIZE (20) /* Size on disk */ +#define MESSAGE_UUID_TEXT_SIZE (40) /* UUID as hex */ struct message_uuid { unsigned char value[MESSAGE_UUID_SIZE]; /* Matches packed encoding */