diff options
author | Aurélien Bompard <aurelien@bompard.org> | 2013-01-09 15:23:46 +0100 |
---|---|---|
committer | Aurélien Bompard <aurelien@bompard.org> | 2013-01-09 15:23:46 +0100 |
commit | 9dcdaaee79418541514aa6aaac6d7400f792b731 (patch) | |
tree | 7bca0a7b250441979a85135b58845b3b642c0389 /kittystore | |
parent | 9c2a8559164b2360a4234c59cc567b20afe29af6 (diff) | |
download | kittystore-9dcdaaee79418541514aa6aaac6d7400f792b731.tar.gz kittystore-9dcdaaee79418541514aa6aaac6d7400f792b731.tar.xz kittystore-9dcdaaee79418541514aa6aaac6d7400f792b731.zip |
Import the thread sorting code from HyperKitty
Diffstat (limited to 'kittystore')
-rw-r--r-- | kittystore/analysis.py | 47 | ||||
-rw-r--r-- | kittystore/storm/model.py | 7 | ||||
-rw-r--r-- | kittystore/storm/schema/__init__.py | 9 | ||||
-rw-r--r-- | kittystore/storm/schema/patch_5.py | 37 | ||||
-rw-r--r-- | kittystore/storm/store.py | 4 | ||||
-rw-r--r-- | kittystore/test/test_analysis.py | 129 |
6 files changed, 232 insertions, 1 deletions
diff --git a/kittystore/analysis.py b/kittystore/analysis.py new file mode 100644 index 0000000..0f49b78 --- /dev/null +++ b/kittystore/analysis.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2011-2012 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +""" +Analysis of messages or threads of messages + +Author: Aurelien Bompard <abompard@fedoraproject.org> +""" + + +import networkx as nx + + +def compute_thread_order_and_depth(thread): + graph = nx.DiGraph() + thread_pos = {"d": 0, "o": 0} # depth, order + def walk_successors(msgid): + obj = graph.node[msgid]["obj"] + obj.thread_depth = thread_pos["d"] + obj.thread_order = thread_pos["o"] + thread_pos["d"] += 1 + thread_pos["o"] += 1 + for succ in sorted(graph.successors(msgid), + key=lambda m: graph.node[m]["num"]): + walk_successors(succ) + thread_pos["d"] -= 1 + for index, email in enumerate(thread.emails): + graph.add_node(email.message_id, num=index, obj=email) + if email.in_reply_to is not None: + graph.add_edge(email.in_reply_to, email.message_id) + walk_successors(thread.starting_email.message_id) diff --git a/kittystore/storm/model.py b/kittystore/storm/model.py index 10dfe1a..f0596ea 100644 --- a/kittystore/storm/model.py +++ b/kittystore/storm/model.py @@ -74,6 +74,8 @@ class Email(Storm): message_id_hash = Unicode() thread_id = Unicode() archived_date = DateTime(default_factory=datetime.datetime.now) + thread_depth = Int(default=0) + thread_order = Int(default=0) # path is required by IMessage, but it makes no sense here path = None # References @@ -152,6 +154,11 @@ class Thread(Storm): (Email.list_name, Email.thread_id), order_by=Email.date ) + emails_by_reply = ReferenceSet( + (list_name, thread_id), + (Email.list_name, Email.thread_id), + order_by=Email.thread_order + ) _starting_email = None def __init__(self, list_name, thread_id, date_active=None): diff --git a/kittystore/storm/schema/__init__.py b/kittystore/storm/schema/__init__.py index 76c53f8..eebae1c 100644 --- a/kittystore/storm/schema/__init__.py +++ b/kittystore/storm/schema/__init__.py @@ -27,6 +27,8 @@ CREATES = { in_reply_to VARCHAR(255), -- How about replies from another list ? message_id_hash VARCHAR(255) NOT NULL, thread_id VARCHAR(255) NOT NULL, + thread_order INTEGER NOT NULL DEFAULT 0, + thread_depth INTEGER NOT NULL DEFAULT 0, archived_date DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (list_name, message_id), FOREIGN KEY (list_name, thread_id) @@ -58,6 +60,7 @@ CREATES = { 'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" (list_name, message_id_hash);', 'CREATE INDEX "ix_email_subject" ON "email" (subject);', 'CREATE INDEX "ix_email_thread_id" ON "email" (thread_id);', + 'CREATE INDEX "ix_email_thread_order" ON "email" (thread_order);', 'CREATE INDEX "ix_thread_date_active" ON "thread" (date_active);', ], @@ -85,6 +88,8 @@ CREATES = { in_reply_to VARCHAR(255), -- How about replies from another list ? message_id_hash VARCHAR(255) NOT NULL, thread_id VARCHAR(255) NOT NULL, + thread_order INTEGER NOT NULL DEFAULT 0, + thread_depth INTEGER NOT NULL DEFAULT 0, archived_date TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (list_name, message_id), FOREIGN KEY (list_name, thread_id) @@ -116,6 +121,7 @@ CREATES = { 'CREATE UNIQUE INDEX "ix_email_list_name_message_id_hash" ON "email" USING btree (list_name, message_id_hash);', 'CREATE INDEX "ix_email_subject" ON "email" USING btree (subject);', 'CREATE INDEX "ix_email_thread_id" ON "email" USING btree (thread_id);', + 'CREATE INDEX "ix_email_thread_order" ON "email" USING btree (thread_order);', 'CREATE INDEX "ix_thread_date_active" ON "thread" USING btree (date_active);', ], @@ -143,6 +149,8 @@ CREATES = { in_reply_to VARCHAR(255), -- How about replies from another list ? message_id_hash VARCHAR(255) NOT NULL, thread_id VARCHAR(255) NOT NULL, + thread_order INTEGER NOT NULL DEFAULT 0, + thread_depth INTEGER NOT NULL DEFAULT 0, archived_date DATETIME, PRIMARY KEY (list_name, message_id), FOREIGN KEY (list_name, thread_id) @@ -174,6 +182,7 @@ CREATES = { 'CREATE UNIQUE INDEX `ix_email_list_name_message_id_hash` ON `email` (list_name, message_id_hash);', 'CREATE INDEX `ix_email_subject` ON `email` (subject(255));', 'CREATE INDEX `ix_email_thread_id` ON `email` (thread_id);', + 'CREATE INDEX `ix_email_thread_order` ON `email` (thread_order);', 'CREATE INDEX `ix_thread_date_active` ON `thread` (date_active);', ], diff --git a/kittystore/storm/schema/patch_5.py b/kittystore/storm/schema/patch_5.py new file mode 100644 index 0000000..1ce101b --- /dev/null +++ b/kittystore/storm/schema/patch_5.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from . import get_db_type +from kittystore.storm.model import Thread +from kittystore.analysis import compute_thread_order_and_depth + + +SQL = { + "sqlite": [ + 'ALTER TABLE "email" ADD COLUMN "thread_order" INTEGER NOT NULL DEFAULT 0;', + 'ALTER TABLE "email" ADD COLUMN "thread_depth" INTEGER NOT NULL DEFAULT 0;', + 'CREATE INDEX "ix_email_thread_order" ON "email" (thread_order);', + ], + "postgres": [ + 'ALTER TABLE "email" ADD COLUMN "thread_order" INTEGER NOT NULL DEFAULT 0;', + 'ALTER TABLE "email" ADD COLUMN "thread_depth" INTEGER NOT NULL DEFAULT 0;', + 'CREATE INDEX "ix_email_thread_order" ON "email" USING btree (thread_order);', + ], + "mysql": [ + 'ALTER TABLE `email` ADD COLUMN `thread_order` INTEGER NOT NULL DEFAULT 0;', + 'ALTER TABLE `email` ADD COLUMN `thread_depth` INTEGER NOT NULL DEFAULT 0;', + 'CREATE INDEX `ix_email_thread_order` ON `email` (thread_order);', + ], + } + + +def apply(store): + """Add the thread_order and thread_depth columns and populate them""" + dbtype = get_db_type(store) + for statement in SQL[dbtype]: + store.execute(statement) + for thread in store.find(Thread): + compute_thread_order_and_depth(thread) + store.add(thread) + store.commit() diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py index 4e444cd..701deef 100644 --- a/kittystore/storm/store.py +++ b/kittystore/storm/store.py @@ -28,6 +28,7 @@ from kittystore.utils import parseaddr, parsedate from kittystore.utils import header_to_unicode from kittystore.scrub import Scrubber from kittystore.utils import get_ref_and_thread_id +from kittystore.analysis import compute_thread_order_and_depth from .model import List, Email, Attachment, Thread, EmailFull @@ -152,9 +153,10 @@ class StormStore(object): self.db.add(email) self.db.add(email_full) - self.flush() + compute_thread_order_and_depth(thread) for attachment in attachments: self.add_attachment(list_name, msg_id, *attachment) + self.flush() return email.message_id_hash def delete_message(self, message_id): diff --git a/kittystore/test/test_analysis.py b/kittystore/test/test_analysis.py new file mode 100644 index 0000000..bd6cef2 --- /dev/null +++ b/kittystore/test/test_analysis.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# pylint: disable=R0904,C0103 +# - Too many public methods +# - Invalid name XXX (should match YYY) + +import unittest +from datetime import datetime + +from mailman.email.message import Message + +from kittystore.storm import get_storm_store +from kittystore.storm.model import Email, Thread +from kittystore.analysis import compute_thread_order_and_depth + +from kittystore.test import FakeList + + +def make_fake_email(num=1, list_name="example-list", date=None): + msg = Email(list_name, "<msg%d>" % num) + msg.thread_id = u"<msg%d>" % num + msg.sender_name = u"sender%d" % num + msg.sender_email = u"sender%d@example.com" % num + msg.subject = u"subject %d" % num + msg.content = u"message %d" % num + if date is None: + msg.date = datetime.now() + else: + msg.date = date + msg.timezone = 0 + return msg + + +class TestThreadOrderDepth(unittest.TestCase): + + def setUp(self): + self.store = get_storm_store("sqlite:") + + def tearDown(self): + self.store.flush() + self.store.rollback() + self.store.close() + + def test_simple_thread(self): + # A basic thread: msg2 replies to msg1 + thread = Thread("example-list", "<msg1>") + self.store.db.add(thread) + msg1 = make_fake_email(1) + msg1.thread_order = msg1.thread_depth = 42 + self.store.db.add(msg1) + msg2 = make_fake_email(2) + msg2.thread_id = u"<msg1>" + msg2.in_reply_to = u"<msg1>" + msg2.thread_order = msg2.thread_depth = 42 + self.store.db.add(msg2) + self.store.flush() + compute_thread_order_and_depth(thread) + self.assertEqual(msg1.thread_order, 0) + self.assertEqual(msg1.thread_depth, 0) + self.assertEqual(msg2.thread_order, 1) + self.assertEqual(msg2.thread_depth, 1) + + def test_classical_thread(self): + # msg1 + # |-msg2 + # | `-msg4 + # `-msg3 + thread = Thread("example-list", "<msg1>") + self.store.db.add(thread) + msg1 = make_fake_email(1) + msg2 = make_fake_email(2) + msg3 = make_fake_email(3) + msg4 = make_fake_email(4) + # All in the same thread + msg2.thread_id = msg3.thread_id = msg4.thread_id = u"<msg1>" + # Set up the reply tree + msg2.in_reply_to = msg3.in_reply_to = u"<msg1>" + msg4.in_reply_to = u"<msg2>" + # Init with false values + msg1.thread_order = msg1.thread_depth = \ + msg2.thread_order = msg2.thread_depth = \ + msg3.thread_order = msg3.thread_depth = \ + msg4.thread_order = msg4.thread_depth = 42 + self.store.db.add(msg1) + self.store.db.add(msg2) + self.store.db.add(msg3) + self.store.db.add(msg4) + self.store.flush() + compute_thread_order_and_depth(thread) + self.assertEqual(msg1.thread_order, 0) + self.assertEqual(msg1.thread_depth, 0) + self.assertEqual(msg2.thread_order, 1) + self.assertEqual(msg2.thread_depth, 1) + self.assertEqual(msg3.thread_order, 3) + self.assertEqual(msg3.thread_depth, 1) + self.assertEqual(msg4.thread_order, 2) + self.assertEqual(msg4.thread_depth, 2) + + def test_add_in_classical_thread(self): + # msg1 + # |-msg2 + # | `-msg4 + # `-msg3 + ml = FakeList("example-list") + msgs = [] + for num in range(1, 5): + msg = Message() + msg["From"] = "sender%d@example.com" % num + msg["Message-ID"] = "<msg%d>" % num + msg.set_payload("message %d" % num) + msgs.append(msg) + msgs[1]["In-Reply-To"] = "<msg1>" + msgs[2]["In-Reply-To"] = "<msg1>" + msgs[3]["In-Reply-To"] = "<msg2>" + for msg in msgs: + self.store.add_to_list(ml, msg) + msgs = [] + for num in range(1, 5): + msg = self.store.get_message_by_id_from_list( + "example-list", "msg%d" % num) + msgs.append(msg) + msg1, msg2, msg3, msg4 = msgs + self.assertEqual(msg1.thread_order, 0) + self.assertEqual(msg1.thread_depth, 0) + self.assertEqual(msg2.thread_order, 1) + self.assertEqual(msg2.thread_depth, 1) + self.assertEqual(msg3.thread_order, 3) + self.assertEqual(msg3.thread_depth, 1) + self.assertEqual(msg4.thread_order, 2) + self.assertEqual(msg4.thread_depth, 2) |