summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-11-09 13:51:18 +0100
committerAurélien Bompard <aurelien@bompard.org>2012-11-09 13:51:18 +0100
commit6a1eaf953aedfb2de06ef43119741d612a6f779b (patch)
tree061ab301091bd7c7a3e856b392c703e21e473f80
parentff7b371366aaf51302c5f021af5d5e2bda02565c (diff)
downloadkittystore-6a1eaf953aedfb2de06ef43119741d612a6f779b.tar.gz
kittystore-6a1eaf953aedfb2de06ef43119741d612a6f779b.tar.xz
kittystore-6a1eaf953aedfb2de06ef43119741d612a6f779b.zip
Add a thread object in the model
New object in the model: the Thread. It starts by having an active_date, corresponding to the date of the last email in the thread. Later on, it will also hold the category, tags, maybe ratings, etc. A schema migration is provided, upgrade should be automatic.
-rw-r--r--kittystore/storm/model.py71
-rw-r--r--kittystore/storm/schema/__init__.py30
-rw-r--r--kittystore/storm/schema/patch_2.py62
-rw-r--r--kittystore/storm/store.py114
4 files changed, 208 insertions, 69 deletions
diff --git a/kittystore/storm/model.py b/kittystore/storm/model.py
index e87a1fd..d87be6d 100644
--- a/kittystore/storm/model.py
+++ b/kittystore/storm/model.py
@@ -15,7 +15,8 @@ license.
import datetime
from zope.interface import implements
-from storm.locals import Unicode, RawStr, Int, ReferenceSet
+from storm.locals import Unicode, RawStr, Int, ReferenceSet, Reference
+from storm.expr import Desc
from mailman.interfaces.messages import IMessage
from kittystore.utils import get_message_id_hash
@@ -94,6 +95,71 @@ class Attachment(object):
encoding = Unicode()
size = Int()
content = RawStr()
+ # reference to the email
+ email = Reference((list_name, message_id),
+ (Email.list_name, Email.message_id))
+
+
+class Thread(object):
+ """
+ A thread of archived email, from a mailing-list. It is identified by both
+ the list name and the thread id.
+ """
+
+ __storm_table__ = "thread"
+ __storm_primary__ = "list_name", "thread_id"
+
+ list_name = Unicode()
+ thread_id = Unicode()
+ date_active = DateTime()
+ emails = ReferenceSet(
+ (list_name, thread_id),
+ (Email.list_name, Email.thread_id),
+ order_by=Email.date
+ )
+ _starting_email = None
+
+ def __init__(self, list_name, thread_id, date_active=None):
+ self.list_name = unicode(list_name)
+ self.thread_id = unicode(thread_id)
+ self.date_active = date_active
+
+ @property
+ def starting_email(self):
+ """Return (and cache) the email starting this thread"""
+ if self._starting_email is None:
+ self._starting_email = self.emails.find(Email.in_reply_to == None).one()
+ if self._starting_email is None:
+ # probably a partial import, we don't have the real first email
+ self._starting_email = self.emails.order_by(Email.date).first()
+ return self._starting_email
+
+ @property
+ def last_email(self):
+ return self.emails.order_by(Desc(Email.date)).first()
+
+ @property
+ def participants(self):
+ """Set of email senders in this thread"""
+ p = []
+ for sender in self.emails.find().config(distinct=True
+ ).values(Email.sender_name):
+ p.append(sender)
+ return p
+
+ @property
+ def email_ids(self):
+ return list(self.emails.find().values(Email.message_id))
+
+ def __len__(self):
+ return self.emails.count()
+
+ def __storm_pre_flush__(self):
+ """Auto-set the active date from the last email in thread"""
+ if self.date_active is not None:
+ return
+ self.date_active = list(self.emails.order_by(Desc(Email.date)
+ ).config(limit=1).values(Email.date))[0]
# References
@@ -103,4 +169,7 @@ Email.attachments = ReferenceSet(
Email.message_id),
(Attachment.list_name,
Attachment.message_id),
+ order_by=Attachment.counter
)
+Email.thread = Reference((Email.list_name, Email.thread_id),
+ (Thread.list_name, Thread.thread_id))
diff --git a/kittystore/storm/schema/__init__.py b/kittystore/storm/schema/__init__.py
index e32085e..50fda8d 100644
--- a/kittystore/storm/schema/__init__.py
+++ b/kittystore/storm/schema/__init__.py
@@ -9,6 +9,12 @@ CREATES = {
display_name TEXT,
PRIMARY KEY (name)
);""", """
+ CREATE TABLE "thread" (
+ list_name VARCHAR(255) NOT NULL,
+ thread_id VARCHAR(255) NOT NULL,
+ date_active DATETIME NOT NULL,
+ PRIMARY KEY (list_name, thread_id)
+ );""", """
CREATE TABLE "email" (
list_name VARCHAR(255) NOT NULL,
message_id VARCHAR(255) NOT NULL,
@@ -22,7 +28,9 @@ CREATES = {
thread_id VARCHAR(255) NOT NULL,
"full" BLOB NOT NULL,
archived_date DATETIME DEFAULT CURRENT_TIMESTAMP,
- PRIMARY KEY (list_name, message_id)
+ PRIMARY KEY (list_name, message_id),
+ FOREIGN KEY (list_name, thread_id)
+ REFERENCES thread(list_name, thread_id) ON DELETE CASCADE
);""", """
CREATE TABLE "attachment" (
list_name VARCHAR(255) NOT NULL,
@@ -33,13 +41,16 @@ CREATES = {
name VARCHAR(255),
size INTEGER NOT NULL,
content BLOB NOT NULL,
- PRIMARY KEY (list_name, message_id, counter)
+ PRIMARY KEY (list_name, message_id, counter),
+ FOREIGN KEY (list_name, message_id)
+ REFERENCES email(list_name, message_id) ON DELETE CASCADE
);""",
'CREATE INDEX "ix_email_list_name" ON "email" (list_name);',
'CREATE INDEX "ix_email_date" ON "email" (date);',
'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" (list_name, message_id_hash);',
'CREATE INDEX "ix_email_subject" ON "email" (subject);',
'CREATE INDEX "ix_email_thread_id" ON "email" (thread_id);',
+ 'CREATE INDEX "ix_thread_date_active" ON "thread" (date_active);',
],
"postgres": [ """
@@ -48,6 +59,12 @@ CREATES = {
display_name TEXT,
PRIMARY KEY (name)
);""", """
+ CREATE TABLE "thread" (
+ list_name VARCHAR(255) NOT NULL,
+ thread_id VARCHAR(255) NOT NULL,
+ date_active TIMESTAMP WITH TIME ZONE NOT NULL,
+ PRIMARY KEY (list_name, thread_id)
+ );""", """
CREATE TABLE "email" (
list_name VARCHAR(255) NOT NULL,
message_id VARCHAR(255) NOT NULL,
@@ -61,7 +78,9 @@ CREATES = {
thread_id VARCHAR(255) NOT NULL,
"full" BYTEA NOT NULL,
archived_date TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP,
- PRIMARY KEY (list_name, message_id)
+ PRIMARY KEY (list_name, message_id),
+ FOREIGN KEY (list_name, thread_id)
+ REFERENCES thread(list_name, thread_id) ON DELETE CASCADE
);""", """
CREATE TABLE "attachment" (
list_name VARCHAR(255) NOT NULL,
@@ -72,13 +91,16 @@ CREATES = {
name VARCHAR(255),
size INTEGER NOT NULL,
content BYTEA NOT NULL,
- PRIMARY KEY (list_name, message_id, counter)
+ PRIMARY KEY (list_name, message_id, counter),
+ FOREIGN KEY (list_name, message_id)
+ REFERENCES email(list_name, message_id) ON DELETE CASCADE
);""",
'CREATE INDEX "ix_email_list_name" ON "email" USING btree (list_name);',
'CREATE INDEX "ix_email_date" ON "email" USING btree (date);',
'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" USING btree (list_name, message_id_hash);',
'CREATE INDEX "ix_email_subject" ON "email" USING btree (subject);',
'CREATE INDEX "ix_email_thread_id" ON "email" USING btree (thread_id);',
+ 'CREATE INDEX "ix_thread_date_active" ON "thread" USING btree (date_active);',
],
}
diff --git a/kittystore/storm/schema/patch_2.py b/kittystore/storm/schema/patch_2.py
new file mode 100644
index 0000000..8a96df1
--- /dev/null
+++ b/kittystore/storm/schema/patch_2.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from storm.expr import And
+from storm.locals import Desc
+
+from . import get_db_type
+from kittystore.storm.model import Thread, Email
+
+
+SQL = {
+ "sqlite": [ """
+ CREATE TABLE "thread" (
+ list_name VARCHAR(255) NOT NULL,
+ thread_id VARCHAR(255) NOT NULL,
+ date_active DATETIME NOT NULL,
+ PRIMARY KEY (list_name, thread_id)
+ );""",
+ 'CREATE INDEX "ix_thread_date_active" ON "thread" (date_active);',
+ ],
+ "postgres": [ """
+ CREATE TABLE "thread" (
+ list_name VARCHAR(255) NOT NULL,
+ thread_id VARCHAR(255) NOT NULL,
+ date_active TIMESTAMP WITH TIME ZONE NOT NULL,
+ PRIMARY KEY (list_name, thread_id)
+ );""",
+ 'CREATE INDEX "ix_thread_date_active" ON "thread" USING btree (date_active);',
+ ],
+ }
+
+
+def apply(store):
+ """Add the thread table"""
+ dbtype = get_db_type(store)
+ for statement in SQL[dbtype]:
+ store.execute(statement)
+ for email in store.find(Email, Email.in_reply_to == None):
+ thread = Thread(email.list_name, email.thread_id)
+ store.add(thread)
+ store.flush()
+ for email in store.find(Email):
+ # in case of partial imports, some threads are missing their original
+ # email (the one without an in-reply-to header)
+ thread_count = store.find(Thread, And(
+ Thread.list_name == email.list_name,
+ Thread.thread_id == email.thread_id,
+ )).count()
+ if thread_count == 0:
+ # this email has no associated thread, create it
+ thread = Thread(email.list_name, email.thread_id)
+ store.add(thread)
+ store.flush()
+ if dbtype == "postgres":
+ store.execute('ALTER TABLE email '
+ 'ADD FOREIGN KEY (list_name, thread_id) '
+ 'REFERENCES thread(list_name, thread_id) ON DELETE CASCADE;')
+ store.execute('ALTER TABLE attachment '
+ 'ADD FOREIGN KEY (list_name, message_id) '
+ 'REFERENCES email(list_name, message_id) ON DELETE CASCADE;')
+ store.commit()
diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py
index b51e2c9..4e91346 100644
--- a/kittystore/storm/store.py
+++ b/kittystore/storm/store.py
@@ -28,7 +28,7 @@ from mailman.interfaces.messages import IMessageStore
from storm.locals import Desc
from storm.expr import And, Or
-from .model import List, Email, Attachment
+from .model import List, Email, Attachment, Thread
class StormStore(object):
@@ -99,8 +99,10 @@ class StormStore(object):
return email.message_id_hash
# Find thread id
+ new_thread = False
ref, thread_id = get_ref_and_thread_id(message, list_name, self)
if thread_id is None:
+ new_thread = True
# make up the thread_id if not found
thread_id = email.message_id_hash
email.thread_id = thread_id
@@ -108,8 +110,8 @@ class StormStore(object):
from_name, from_email = parseaddr(message['From'])
from_name = header_to_unicode(from_name)
- email.sender_name = from_name
- email.sender_email = unicode(from_email)
+ email.sender_name = from_name.strip()
+ email.sender_email = unicode(from_email).strip()
email.subject = header_to_unicode(message.get('Subject'))
email.full = message.as_string() # Before scrubbing
scrubber = Scrubber(list_name, message, self)
@@ -125,6 +127,16 @@ class StormStore(object):
# # i18n!
# category = 'Agenda'
+ if new_thread:
+ thread = Thread(list_name, thread_id, email.date)
+ else:
+ thread = self.db.find(Thread, And(
+ Thread.list_name == list_name,
+ Thread.thread_id == thread_id,
+ )).one()
+ thread.date_active = email.date
+ self.db.add(thread)
+
self.db.add(email)
self.flush()
return email.message_id_hash
@@ -152,6 +164,13 @@ class StormStore(object):
if msg is None:
raise MessageNotFound(list_name, message_id)
self.db.delete(msg)
+ # Remove the thread if necessary
+ thread = self.db.find(Thread, And(
+ Thread.list_name == msg.list_name,
+ Thread.thread_id == msg.thread_id,
+ )).one()
+ if len(thread.emails) == 0:
+ self.db.delete(thread)
self.flush()
def get_list_size(self, list_name):
@@ -297,9 +316,8 @@ class StormStore(object):
"""
return list(self.db.find(List.name).order_by(List.name))
- def get_messages(self, list_name, start, end, threads=False):
- """ Return all emails between two given dates, optionnaly selecting
- only the thread-starting ones.
+ def get_messages(self, list_name, start, end):
+ """ Return all emails between two given dates.
:param list_name: The name of the mailing list in which these emails
should be searched.
@@ -309,20 +327,29 @@ class StormStore(object):
the interval to query.
:returns: The list of messages.
"""
- conditions = [
- Email.list_name == unicode(list_name),
- Email.date >= start,
- Email.date < end,
- ]
- if threads:
- # Beginning of thread == No 'References' header
- conditions.append(Email.in_reply_to == None)
- emails = self.db.find(Email, And(*conditions)
- ).order_by(Desc(Email.date))
+ emails = self.db.find(Email, And(
+ Email.list_name == unicode(list_name),
+ Email.date >= start,
+ Email.date < end,
+ )).order_by(Desc(Email.date))
return list(emails)
+ def get_thread(self, list_name, thread_id):
+ """ Return the specified thread.
+
+ :param list_name: The name of the mailing list in which this email
+ should be searched.
+ :param thread_id: The thread_id as used in the web-pages. Used here to
+ uniquely identify the thread in the database.
+ :returns: The thread object.
+ """
+ return self.db.find(Thread, And(
+ Thread.list_name == unicode(list_name),
+ Thread.thread_id == unicode(thread_id)
+ )).one()
+
def get_threads(self, list_name, start, end):
- """ Return all the thread-starting emails between two given dates.
+ """ Return all the threads active between two given dates.
:param list_name: The name of the mailing list in which this email
should be searched.
@@ -332,7 +359,12 @@ class StormStore(object):
the interval to query.
:returns: The list of thread-starting messages.
"""
- return self.get_messages(list_name, start, end, threads=True)
+ threads = self.db.find(Thread, And(
+ Thread.list_name == unicode(list_name),
+ Thread.date_active >= start,
+ Thread.date_active < end,
+ )).order_by(Desc(Thread.date_active))
+ return list(threads)
def get_start_date(self, list_name):
""" Get the date of the first archived email in a list.
@@ -349,52 +381,6 @@ class StormStore(object):
else:
return None
- def get_messages_in_thread(self, list_name, thread_id):
- """ Return all the emails present in a thread. This thread
- is uniquely identified by its thread_id.
-
- :param list_name: The name of the mailing list in which this email
- should be searched.
- :param thread_id: The thread_id as used in the web-pages. Used here to
- uniquely identify the thread in the database.
- :returns: The list of messages in the thread.
- """
- emails = self.db.find(Email, And(
- Email.list_name == unicode(list_name),
- Email.thread_id == unicode(thread_id),
- )).order_by(Email.date)
- return list(emails)
-
- def get_thread_length(self, list_name, thread_id):
- """ Return the number of email present in a thread. This thread
- is uniquely identified by its thread_id.
-
- :param list_name: The name of the mailing list to query.
- :param thread_id: The unique identifier of the thread as specified in
- the database.
- :returns: The number of messages in the thread.
- :rtype: int
- """
- return self.db.find(Email, And(
- Email.list_name == unicode(list_name),
- Email.thread_id == unicode(thread_id),
- )).count()
-
- def get_thread_participants(self, list_name, thread_id):
- """ Return the list of participant in a thread. This thread
- is uniquely identified by its thread_id.
-
- :param list_name: The name of the mailing list to query.
- :param thread_id: The unique identifier of the thread as specified in
- the database.
- :return: The list of message sender names in the thread.
- """
- participants = self.db.find(Email.sender_name, And(
- Email.list_name == unicode(list_name),
- Email.thread_id == unicode(thread_id),
- )).config(distinct=True)
- return list(participants)
-
def get_thread_neighbors(self, list_name, thread_id):
""" Return the previous and the next threads of the specified thread,
in date order. The returned objects are the emails starting the