From a373e9e6f8b26e700257de062f04f13ddfb96a5f Mon Sep 17 00:00:00 2001 From: Aurélien Bompard Date: Thu, 1 Aug 2013 12:20:57 +0000 Subject: Store the user_id directly in the email table --- kittystore/scripts.py | 22 +++++++++++++++++----- kittystore/storm/model.py | 20 +------------------- kittystore/storm/schema/__init__.py | 27 ++++++--------------------- kittystore/storm/schema/patch_10.py | 33 +++++++++------------------------ kittystore/storm/search.py | 4 ++-- kittystore/storm/store.py | 27 +++++++++------------------ 6 files changed, 44 insertions(+), 89 deletions(-) diff --git a/kittystore/scripts.py b/kittystore/scripts.py index 4ae2690..3930330 100644 --- a/kittystore/scripts.py +++ b/kittystore/scripts.py @@ -85,12 +85,24 @@ def updatedb(): ## More complex post-update actions: # Fill in the user_id from Mailman - from kittystore.storm.model import UserAddress, Email - if store.db.find(UserAddress).count() == 0: - for address in store.db.find(Email.sender_email - ).config(distinct=True): - store._store_mailman_user(address) + from kittystore.storm.model import Email + user_ids = store.db.find(Email.user_id).config(distinct=True) + if user_ids.count() <= 1 and user_ids.one() is None: + print "Updating user_id fields from Mailman, this can take some time..." + emails = store.db.find(Email) + emails_total = emails.count() + user_id_cache = {} # speed up the lookup process + for num, email in enumerate(emails): + if email.sender_email in user_id_cache: + email.user_id = user_id_cache[email.sender_email] + else: + email.user_id = store._store_mailman_user(email.sender_email) + user_id_cache[email.sender_email] = email.user_id + if (num+1) % 10 == 0: + sys.stdout.write("\r%s/%s" % (num+1, emails_total)) + sys.stdout.flush() store.commit() + print " ...done!" diff --git a/kittystore/storm/model.py b/kittystore/storm/model.py index c827640..0b1a48a 100644 --- a/kittystore/storm/model.py +++ b/kittystore/storm/model.py @@ -63,6 +63,7 @@ class Email(Storm): message_id = Unicode() sender_name = Unicode() sender_email = Unicode() + user_id = Unicode() subject = Unicode() content = Unicode() date = DateTime() @@ -89,8 +90,6 @@ class Email(Storm): ("EmailFull.list_name", "EmailFull.message_id")) full = Proxy(full_email, "EmailFull.full") mlist = Reference(list_name, "List.name") - _user_address = Reference(sender_email, "UserAddress.address") - user_id = Proxy(_user_address, "UserAddress.user_id") def __init__(self, list_name, message_id): self.list_name = unicode(list_name) @@ -263,20 +262,3 @@ class Category(Storm): def __init__(self, name): self.name = unicode(name) - - -class UserAddress(Storm): - """ - The link between an email address and the Mailman user UUID. - """ - __storm_table__ = "user_address" - __storm_primary__ = "user_id", "address" - - user_id = Unicode() - address = Unicode() - emails = ReferenceSet(address, Email.sender_email, - order_by=Email.date) - - def __init__(self, user_id, address): - self.user_id = unicode(user_id) - self.address = unicode(address) diff --git a/kittystore/storm/schema/__init__.py b/kittystore/storm/schema/__init__.py index 89f1df6..017accd 100644 --- a/kittystore/storm/schema/__init__.py +++ b/kittystore/storm/schema/__init__.py @@ -27,6 +27,7 @@ CREATES = { message_id VARCHAR(255) NOT NULL, sender_name VARCHAR(255) NOT NULL, sender_email VARCHAR(255) NOT NULL, + user_id VARCHAR(255), subject TEXT NOT NULL, content TEXT NOT NULL, date DATETIME NOT NULL, @@ -61,23 +62,17 @@ CREATES = { PRIMARY KEY (list_name, message_id, counter), FOREIGN KEY (list_name, message_id) REFERENCES email(list_name, message_id) ON DELETE CASCADE - );""", """ - CREATE TABLE "user_address" ( - user_id VARCHAR(255) NOT NULL, - address VARCHAR(255) NOT NULL, - PRIMARY KEY (user_id, address) );""", 'CREATE INDEX "ix_email_list_name" ON "email" (list_name);', 'CREATE INDEX "ix_email_date" ON "email" (date);', 'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" (list_name, message_id_hash);', 'CREATE INDEX "ix_email_sender_email" ON "email" (sender_email);', + 'CREATE INDEX "ix_email_user_id" ON "email" (user_id);', 'CREATE INDEX "ix_email_subject" ON "email" (subject);', 'CREATE INDEX "ix_email_thread_id" ON "email" (thread_id);', 'CREATE INDEX "ix_email_thread_order" ON "email" (thread_order);', 'CREATE INDEX "ix_thread_date_active" ON "thread" (date_active);', 'CREATE UNIQUE INDEX "ix_category_name" ON "category" (name);', - 'CREATE INDEX "ix_user_address_user_id" ON "user_address" (user_id);', - 'CREATE UNIQUE INDEX "ix_user_address_address" ON "user_address" (address);', ], "postgres": [ """ @@ -115,6 +110,7 @@ CREATES = { message_id VARCHAR(255) NOT NULL, sender_name VARCHAR(255) NOT NULL, sender_email VARCHAR(255) NOT NULL, + user_id VARCHAR(255), subject TEXT NOT NULL, content TEXT NOT NULL, date TIMESTAMP WITHOUT TIME ZONE NOT NULL, @@ -149,23 +145,17 @@ CREATES = { PRIMARY KEY (list_name, message_id, counter), FOREIGN KEY (list_name, message_id) REFERENCES email(list_name, message_id) ON DELETE CASCADE - );""", """ - CREATE TABLE "user_address" ( - user_id VARCHAR(255) NOT NULL, - address VARCHAR(255) NOT NULL, - PRIMARY KEY (user_id, address) );""", 'CREATE INDEX "ix_email_list_name" ON "email" USING btree (list_name);', 'CREATE INDEX "ix_email_date" ON "email" USING btree (date);', 'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" USING btree (list_name, message_id_hash);', 'CREATE INDEX "ix_email_sender_email" ON "email" USING btree (sender_email);', + 'CREATE INDEX "ix_email_user_id" ON "email" USING btree (user_id);', 'CREATE INDEX "ix_email_subject" ON "email" USING btree (subject);', 'CREATE INDEX "ix_email_thread_id" ON "email" USING btree (thread_id);', 'CREATE INDEX "ix_email_thread_order" ON "email" USING btree (thread_order);', 'CREATE INDEX "ix_thread_date_active" ON "thread" USING btree (date_active);', 'CREATE UNIQUE INDEX "ix_category_name" ON "category" USING btree (name);', - 'CREATE INDEX "ix_user_address_user_id" ON "user_address" USING btree (user_id);', - 'CREATE UNIQUE INDEX "ix_user_address_address" ON "user_address" USING btree (address);', ], "mysql": [ """ @@ -193,6 +183,7 @@ CREATES = { message_id VARCHAR(255) NOT NULL, sender_name VARCHAR(255) NOT NULL COLLATE utf8_general_ci, sender_email VARCHAR(255) NOT NULL, + user_id VARCHAR(255), subject TEXT NOT NULL COLLATE utf8_general_ci, content TEXT NOT NULL COLLATE utf8_general_ci, date DATETIME NOT NULL, @@ -227,23 +218,17 @@ CREATES = { PRIMARY KEY (list_name, message_id, counter), FOREIGN KEY (list_name, message_id) REFERENCES email(list_name, message_id) ON DELETE CASCADE - );""", """ - CREATE TABLE `user_address` ( - user_id VARCHAR(255) NOT NULL, - address VARCHAR(255) NOT NULL, - PRIMARY KEY (user_id, address) );""", 'CREATE INDEX `ix_email_list_name` ON `email` (list_name);', 'CREATE INDEX `ix_email_date` ON `email` (date);', 'CREATE UNIQUE INDEX `ix_email_list_name_message_id_hash` ON `email` (list_name, message_id_hash);', 'CREATE INDEX `ix_email_sender_email` ON `email` (sender_email(255));', + 'CREATE INDEX `ix_email_user_id` ON `email` (user_id(255));', 'CREATE INDEX `ix_email_subject` ON `email` (subject(255));', 'CREATE INDEX `ix_email_thread_id` ON `email` (thread_id);', 'CREATE INDEX `ix_email_thread_order` ON `email` (thread_order);', 'CREATE INDEX `ix_thread_date_active` ON `thread` (date_active);', 'CREATE UNIQUE INDEX `ix_category_name` ON `category` (name);', - 'CREATE INDEX `ix_user_address_user_id` ON `user_address` (user_id);', - 'CREATE UNIQUE INDEX `ix_user_address_address` ON `user_address` (address);', ], } diff --git a/kittystore/storm/schema/patch_10.py b/kittystore/storm/schema/patch_10.py index f04f8e8..dc41cbe 100644 --- a/kittystore/storm/schema/patch_10.py +++ b/kittystore/storm/schema/patch_10.py @@ -6,32 +6,17 @@ from . import get_db_type SQL = { - "sqlite": [""" - CREATE TABLE "user_address" ( - user_id VARCHAR(255) NOT NULL, - address VARCHAR(255) NOT NULL, - PRIMARY KEY (user_id, address) - );""", - 'CREATE INDEX "ix_user_address_user_id" ON "user_address" (user_id);', - 'CREATE UNIQUE INDEX "ix_user_address_address" ON "user_address" (address);', + "sqlite": [ + 'ALTER TABLE "email" ADD COLUMN user_id VARCHAR(255);', + 'CREATE INDEX "ix_email_user_id" ON "email" (user_id);', ], - "postgres": [""" - CREATE TABLE "user_address" ( - user_id VARCHAR(255) NOT NULL, - address VARCHAR(255) NOT NULL, - PRIMARY KEY (user_id, address) - );""", - 'CREATE INDEX "ix_user_address_user_id" ON "user_address" USING btree (user_id);', - 'CREATE UNIQUE INDEX "ix_user_address_address" ON "user_address" USING btree (address);', + "postgres": [ + 'ALTER TABLE "email" ADD COLUMN user_id VARCHAR(255);', + 'CREATE INDEX "ix_email_user_id" ON "email" USING btree (user_id);', ], - "mysql": [""" - CREATE TABLE `user_address` ( - user_id VARCHAR(255) NOT NULL, - address VARCHAR(255) NOT NULL, - PRIMARY KEY (user_id, address) - );""", - 'CREATE INDEX `ix_user_address_user_id` ON `user_address` (user_id);', - 'CREATE UNIQUE INDEX `ix_user_address_address` ON `user_address` (address);', + "mysql": [ + 'ALTER TABLE `email` ADD COLUMN user_id VARCHAR(255);', + 'CREATE INDEX `ix_email_user_id` ON `email` (user_id(255));', ], } diff --git a/kittystore/storm/search.py b/kittystore/storm/search.py index 4a7b1fc..3098aa6 100644 --- a/kittystore/storm/search.py +++ b/kittystore/storm/search.py @@ -32,7 +32,7 @@ def email_to_search_doc(email): "list_name": email.list_name, "message_id": email.message_id, "sender": u"%s %s" % (email.sender_name, email.sender_email), - "user_id": email.user_id or "", + "user_id": email.user_id, "subject": email.subject, "content": email.content, "date": email.date, # UTC @@ -147,7 +147,7 @@ class SearchEngine(object): def upgrade(self, store): """Upgrade the schema""" - if "user_id" not in self.index.schema or True: + if "user_id" not in self.index.schema: print "Rebuilding the search index to include the new user_id field..." shutil.rmtree(self.location) self._index = None diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py index c41292a..dd70bc0 100644 --- a/kittystore/storm/store.py +++ b/kittystore/storm/store.py @@ -32,8 +32,7 @@ from kittystore.scrub import Scrubber from kittystore.utils import get_ref_and_thread_id from kittystore.analysis import compute_thread_order_and_depth -from .model import (List, Email, Attachment, Thread, EmailFull, Category, - UserAddress) +from .model import List, Email, Attachment, Thread, EmailFull, Category class StormStore(object): @@ -143,7 +142,7 @@ class StormStore(object): email.content, attachments = scrubber.scrub() # store the Mailman user - self._store_mailman_user(email.sender_email) + email.user_id = self._store_mailman_user(email.sender_email) #category = 'Question' # TODO: enum + i18n ? #if ('agenda' in message.get('Subject', '').lower() or @@ -183,11 +182,7 @@ class StormStore(object): if self.debug: print "Can't get the user from Mailman: %s" % e else: - user_already_there = self.db.find(UserAddress, - address=address).count() - if not user_already_there: - user = UserAddress(mm_user.user_id, address) - self.db.add(user) + return unicode(mm_user.user_id) def attach_to_thread(self, email, thread): @@ -572,28 +567,24 @@ class StormStore(object): """ Returns a user's first post on a list """ result = self.db.find(Email, And( Email.list_name == unicode(list_name), - Email.sender_email == UserAddress.address, - UserAddress.user_id == unicode(user_id), + Email.user_id == unicode(user_id), )).order_by(Email.archived_date ).config(limit=1).one() return result def get_sender_name(self, user_id): """ Returns a user's fullname when given his user_id """ - result = self.db.find(Email.sender_name, And( - Email.sender_email == UserAddress.address, - UserAddress.user_id == unicode(user_id), - )).config(limit=1).one() + result = self.db.find(Email.sender_name, + Email.user_id == unicode(user_id) + ).config(limit=1).one() return result def get_message_hashes_by_user_id(self, user_id, list_name=None): """ Returns a user's email hashes """ if list_name is None: - clause = And(Email.sender_email == UserAddress.address, - UserAddress.user_id == unicode(user_id)) + clause = Email.user_id == unicode(user_id) else: - clause = And(Email.sender_email == UserAddress.address, - UserAddress.user_id == unicode(user_id), + clause = And(Email.user_id == unicode(user_id), Email.list_name == unicode(list_name)) result = self.db.find(Email.message_id_hash, clause) return list(result) -- cgit