summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2013-01-09 15:23:46 +0100
committerAurélien Bompard <aurelien@bompard.org>2013-01-09 15:23:46 +0100
commit9dcdaaee79418541514aa6aaac6d7400f792b731 (patch)
tree7bca0a7b250441979a85135b58845b3b642c0389
parent9c2a8559164b2360a4234c59cc567b20afe29af6 (diff)
downloadkittystore-9dcdaaee79418541514aa6aaac6d7400f792b731.tar.gz
kittystore-9dcdaaee79418541514aa6aaac6d7400f792b731.tar.xz
kittystore-9dcdaaee79418541514aa6aaac6d7400f792b731.zip
Import the thread sorting code from HyperKitty
-rw-r--r--kittystore/analysis.py47
-rw-r--r--kittystore/storm/model.py7
-rw-r--r--kittystore/storm/schema/__init__.py9
-rw-r--r--kittystore/storm/schema/patch_5.py37
-rw-r--r--kittystore/storm/store.py4
-rw-r--r--kittystore/test/test_analysis.py129
6 files changed, 232 insertions, 1 deletions
diff --git a/kittystore/analysis.py b/kittystore/analysis.py
new file mode 100644
index 0000000..0f49b78
--- /dev/null
+++ b/kittystore/analysis.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2011-2012 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+"""
+Analysis of messages or threads of messages
+
+Author: Aurelien Bompard <abompard@fedoraproject.org>
+"""
+
+
+import networkx as nx
+
+
+def compute_thread_order_and_depth(thread):
+ graph = nx.DiGraph()
+ thread_pos = {"d": 0, "o": 0} # depth, order
+ def walk_successors(msgid):
+ obj = graph.node[msgid]["obj"]
+ obj.thread_depth = thread_pos["d"]
+ obj.thread_order = thread_pos["o"]
+ thread_pos["d"] += 1
+ thread_pos["o"] += 1
+ for succ in sorted(graph.successors(msgid),
+ key=lambda m: graph.node[m]["num"]):
+ walk_successors(succ)
+ thread_pos["d"] -= 1
+ for index, email in enumerate(thread.emails):
+ graph.add_node(email.message_id, num=index, obj=email)
+ if email.in_reply_to is not None:
+ graph.add_edge(email.in_reply_to, email.message_id)
+ walk_successors(thread.starting_email.message_id)
diff --git a/kittystore/storm/model.py b/kittystore/storm/model.py
index 10dfe1a..f0596ea 100644
--- a/kittystore/storm/model.py
+++ b/kittystore/storm/model.py
@@ -74,6 +74,8 @@ class Email(Storm):
message_id_hash = Unicode()
thread_id = Unicode()
archived_date = DateTime(default_factory=datetime.datetime.now)
+ thread_depth = Int(default=0)
+ thread_order = Int(default=0)
# path is required by IMessage, but it makes no sense here
path = None
# References
@@ -152,6 +154,11 @@ class Thread(Storm):
(Email.list_name, Email.thread_id),
order_by=Email.date
)
+ emails_by_reply = ReferenceSet(
+ (list_name, thread_id),
+ (Email.list_name, Email.thread_id),
+ order_by=Email.thread_order
+ )
_starting_email = None
def __init__(self, list_name, thread_id, date_active=None):
diff --git a/kittystore/storm/schema/__init__.py b/kittystore/storm/schema/__init__.py
index 76c53f8..eebae1c 100644
--- a/kittystore/storm/schema/__init__.py
+++ b/kittystore/storm/schema/__init__.py
@@ -27,6 +27,8 @@ CREATES = {
in_reply_to VARCHAR(255), -- How about replies from another list ?
message_id_hash VARCHAR(255) NOT NULL,
thread_id VARCHAR(255) NOT NULL,
+ thread_order INTEGER NOT NULL DEFAULT 0,
+ thread_depth INTEGER NOT NULL DEFAULT 0,
archived_date DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (list_name, message_id),
FOREIGN KEY (list_name, thread_id)
@@ -58,6 +60,7 @@ CREATES = {
'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" (list_name, message_id_hash);',
'CREATE INDEX "ix_email_subject" ON "email" (subject);',
'CREATE INDEX "ix_email_thread_id" ON "email" (thread_id);',
+ 'CREATE INDEX "ix_email_thread_order" ON "email" (thread_order);',
'CREATE INDEX "ix_thread_date_active" ON "thread" (date_active);',
],
@@ -85,6 +88,8 @@ CREATES = {
in_reply_to VARCHAR(255), -- How about replies from another list ?
message_id_hash VARCHAR(255) NOT NULL,
thread_id VARCHAR(255) NOT NULL,
+ thread_order INTEGER NOT NULL DEFAULT 0,
+ thread_depth INTEGER NOT NULL DEFAULT 0,
archived_date TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (list_name, message_id),
FOREIGN KEY (list_name, thread_id)
@@ -116,6 +121,7 @@ CREATES = {
'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" USING btree (list_name, message_id_hash);',
'CREATE INDEX "ix_email_subject" ON "email" USING btree (subject);',
'CREATE INDEX "ix_email_thread_id" ON "email" USING btree (thread_id);',
+ 'CREATE INDEX "ix_email_thread_order" ON "email" USING btree (thread_order);',
'CREATE INDEX "ix_thread_date_active" ON "thread" USING btree (date_active);',
],
@@ -143,6 +149,8 @@ CREATES = {
in_reply_to VARCHAR(255), -- How about replies from another list ?
message_id_hash VARCHAR(255) NOT NULL,
thread_id VARCHAR(255) NOT NULL,
+ thread_order INTEGER NOT NULL DEFAULT 0,
+ thread_depth INTEGER NOT NULL DEFAULT 0,
archived_date DATETIME,
PRIMARY KEY (list_name, message_id),
FOREIGN KEY (list_name, thread_id)
@@ -174,6 +182,7 @@ CREATES = {
'CREATE UNIQUE INDEX `ix_email_list_name_message_id_hash` ON `email` (list_name, message_id_hash);',
'CREATE INDEX `ix_email_subject` ON `email` (subject(255));',
'CREATE INDEX `ix_email_thread_id` ON `email` (thread_id);',
+ 'CREATE INDEX `ix_email_thread_order` ON `email` (thread_order);',
'CREATE INDEX `ix_thread_date_active` ON `thread` (date_active);',
],
diff --git a/kittystore/storm/schema/patch_5.py b/kittystore/storm/schema/patch_5.py
new file mode 100644
index 0000000..1ce101b
--- /dev/null
+++ b/kittystore/storm/schema/patch_5.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from . import get_db_type
+from kittystore.storm.model import Thread
+from kittystore.analysis import compute_thread_order_and_depth
+
+
+SQL = {
+ "sqlite": [
+ 'ALTER TABLE "email" ADD COLUMN "thread_order" INTEGER NOT NULL DEFAULT 0;',
+ 'ALTER TABLE "email" ADD COLUMN "thread_depth" INTEGER NOT NULL DEFAULT 0;',
+ 'CREATE INDEX "ix_email_thread_order" ON "email" (thread_order);',
+ ],
+ "postgres": [
+ 'ALTER TABLE "email" ADD COLUMN "thread_order" INTEGER NOT NULL DEFAULT 0;',
+ 'ALTER TABLE "email" ADD COLUMN "thread_depth" INTEGER NOT NULL DEFAULT 0;',
+ 'CREATE INDEX "ix_email_thread_order" ON "email" USING btree (thread_order);',
+ ],
+ "mysql": [
+ 'ALTER TABLE `email` ADD COLUMN `thread_order` INTEGER NOT NULL DEFAULT 0;',
+ 'ALTER TABLE `email` ADD COLUMN `thread_depth` INTEGER NOT NULL DEFAULT 0;',
+ 'CREATE INDEX `ix_email_thread_order` ON `email` (thread_order);',
+ ],
+ }
+
+
+def apply(store):
+ """Add the thread_order and thread_depth columns and populate them"""
+ dbtype = get_db_type(store)
+ for statement in SQL[dbtype]:
+ store.execute(statement)
+ for thread in store.find(Thread):
+ compute_thread_order_and_depth(thread)
+ store.add(thread)
+ store.commit()
diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py
index 4e444cd..701deef 100644
--- a/kittystore/storm/store.py
+++ b/kittystore/storm/store.py
@@ -28,6 +28,7 @@ from kittystore.utils import parseaddr, parsedate
from kittystore.utils import header_to_unicode
from kittystore.scrub import Scrubber
from kittystore.utils import get_ref_and_thread_id
+from kittystore.analysis import compute_thread_order_and_depth
from .model import List, Email, Attachment, Thread, EmailFull
@@ -152,9 +153,10 @@ class StormStore(object):
self.db.add(email)
self.db.add(email_full)
- self.flush()
+ compute_thread_order_and_depth(thread)
for attachment in attachments:
self.add_attachment(list_name, msg_id, *attachment)
+ self.flush()
return email.message_id_hash
def delete_message(self, message_id):
diff --git a/kittystore/test/test_analysis.py b/kittystore/test/test_analysis.py
new file mode 100644
index 0000000..bd6cef2
--- /dev/null
+++ b/kittystore/test/test_analysis.py
@@ -0,0 +1,129 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=R0904,C0103
+# - Too many public methods
+# - Invalid name XXX (should match YYY)
+
+import unittest
+from datetime import datetime
+
+from mailman.email.message import Message
+
+from kittystore.storm import get_storm_store
+from kittystore.storm.model import Email, Thread
+from kittystore.analysis import compute_thread_order_and_depth
+
+from kittystore.test import FakeList
+
+
+def make_fake_email(num=1, list_name="example-list", date=None):
+ msg = Email(list_name, "<msg%d>" % num)
+ msg.thread_id = u"<msg%d>" % num
+ msg.sender_name = u"sender%d" % num
+ msg.sender_email = u"sender%d@example.com" % num
+ msg.subject = u"subject %d" % num
+ msg.content = u"message %d" % num
+ if date is None:
+ msg.date = datetime.now()
+ else:
+ msg.date = date
+ msg.timezone = 0
+ return msg
+
+
+class TestThreadOrderDepth(unittest.TestCase):
+
+ def setUp(self):
+ self.store = get_storm_store("sqlite:")
+
+ def tearDown(self):
+ self.store.flush()
+ self.store.rollback()
+ self.store.close()
+
+ def test_simple_thread(self):
+ # A basic thread: msg2 replies to msg1
+ thread = Thread("example-list", "<msg1>")
+ self.store.db.add(thread)
+ msg1 = make_fake_email(1)
+ msg1.thread_order = msg1.thread_depth = 42
+ self.store.db.add(msg1)
+ msg2 = make_fake_email(2)
+ msg2.thread_id = u"<msg1>"
+ msg2.in_reply_to = u"<msg1>"
+ msg2.thread_order = msg2.thread_depth = 42
+ self.store.db.add(msg2)
+ self.store.flush()
+ compute_thread_order_and_depth(thread)
+ self.assertEqual(msg1.thread_order, 0)
+ self.assertEqual(msg1.thread_depth, 0)
+ self.assertEqual(msg2.thread_order, 1)
+ self.assertEqual(msg2.thread_depth, 1)
+
+ def test_classical_thread(self):
+ # msg1
+ # |-msg2
+ # | `-msg4
+ # `-msg3
+ thread = Thread("example-list", "<msg1>")
+ self.store.db.add(thread)
+ msg1 = make_fake_email(1)
+ msg2 = make_fake_email(2)
+ msg3 = make_fake_email(3)
+ msg4 = make_fake_email(4)
+ # All in the same thread
+ msg2.thread_id = msg3.thread_id = msg4.thread_id = u"<msg1>"
+ # Set up the reply tree
+ msg2.in_reply_to = msg3.in_reply_to = u"<msg1>"
+ msg4.in_reply_to = u"<msg2>"
+ # Init with false values
+ msg1.thread_order = msg1.thread_depth = \
+ msg2.thread_order = msg2.thread_depth = \
+ msg3.thread_order = msg3.thread_depth = \
+ msg4.thread_order = msg4.thread_depth = 42
+ self.store.db.add(msg1)
+ self.store.db.add(msg2)
+ self.store.db.add(msg3)
+ self.store.db.add(msg4)
+ self.store.flush()
+ compute_thread_order_and_depth(thread)
+ self.assertEqual(msg1.thread_order, 0)
+ self.assertEqual(msg1.thread_depth, 0)
+ self.assertEqual(msg2.thread_order, 1)
+ self.assertEqual(msg2.thread_depth, 1)
+ self.assertEqual(msg3.thread_order, 3)
+ self.assertEqual(msg3.thread_depth, 1)
+ self.assertEqual(msg4.thread_order, 2)
+ self.assertEqual(msg4.thread_depth, 2)
+
+ def test_add_in_classical_thread(self):
+ # msg1
+ # |-msg2
+ # | `-msg4
+ # `-msg3
+ ml = FakeList("example-list")
+ msgs = []
+ for num in range(1, 5):
+ msg = Message()
+ msg["From"] = "sender%d@example.com" % num
+ msg["Message-ID"] = "<msg%d>" % num
+ msg.set_payload("message %d" % num)
+ msgs.append(msg)
+ msgs[1]["In-Reply-To"] = "<msg1>"
+ msgs[2]["In-Reply-To"] = "<msg1>"
+ msgs[3]["In-Reply-To"] = "<msg2>"
+ for msg in msgs:
+ self.store.add_to_list(ml, msg)
+ msgs = []
+ for num in range(1, 5):
+ msg = self.store.get_message_by_id_from_list(
+ "example-list", "msg%d" % num)
+ msgs.append(msg)
+ msg1, msg2, msg3, msg4 = msgs
+ self.assertEqual(msg1.thread_order, 0)
+ self.assertEqual(msg1.thread_depth, 0)
+ self.assertEqual(msg2.thread_order, 1)
+ self.assertEqual(msg2.thread_depth, 1)
+ self.assertEqual(msg3.thread_order, 3)
+ self.assertEqual(msg3.thread_depth, 1)
+ self.assertEqual(msg4.thread_order, 2)
+ self.assertEqual(msg4.thread_depth, 2)