diff options
| author | Aurélien Bompard <aurelien@bompard.org> | 2013-11-08 15:21:46 +0100 |
|---|---|---|
| committer | Aurélien Bompard <aurelien@bompard.org> | 2013-11-08 16:35:10 +0100 |
| commit | 7260a8b8c0ee8fd9fd848e8c35869679e3c88056 (patch) | |
| tree | ac53de657588e103a6688f8cea1002ee68ca83de | |
| parent | 3d58de2ee6dec074eb8de867584ba30867a54c22 (diff) | |
| download | kittystore-7260a8b8c0ee8fd9fd848e8c35869679e3c88056.tar.gz kittystore-7260a8b8c0ee8fd9fd848e8c35869679e3c88056.tar.xz kittystore-7260a8b8c0ee8fd9fd848e8c35869679e3c88056.zip | |
Cache some values in the database
| -rw-r--r-- | kittystore/caching/__init__.py | 66 | ||||
| -rw-r--r-- | kittystore/caching/email.py | 44 | ||||
| -rw-r--r-- | kittystore/caching/mlist.py | 66 | ||||
| -rw-r--r-- | kittystore/import.py | 13 | ||||
| -rw-r--r-- | kittystore/scripts.py | 49 | ||||
| -rw-r--r-- | kittystore/storm/__init__.py | 6 | ||||
| -rw-r--r-- | kittystore/storm/model.py | 5 | ||||
| -rw-r--r-- | kittystore/storm/schema/__init__.py | 9 | ||||
| -rw-r--r-- | kittystore/storm/schema/patch_13.py | 35 | ||||
| -rw-r--r-- | kittystore/storm/search.py | 2 | ||||
| -rw-r--r-- | kittystore/storm/store.py | 38 | ||||
| -rw-r--r-- | kittystore/test/__init__.py | 11 | ||||
| -rw-r--r-- | kittystore/test/test_caching.py | 116 | ||||
| -rw-r--r-- | kittystore/test/test_storm_store.py | 21 | ||||
| -rw-r--r-- | kittystore/utils.py | 4 | ||||
| -rwxr-xr-x | setup.py | 1 |
16 files changed, 408 insertions, 78 deletions
diff --git a/kittystore/caching/__init__.py b/kittystore/caching/__init__.py new file mode 100644 index 0000000..a67202c --- /dev/null +++ b/kittystore/caching/__init__.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +""" +Some data is cached in the database for faster access. This module re-computes +these values, for example in a periodic manner. +""" + +import datetime +from urllib2 import HTTPError +from pkg_resources import resource_listdir +import mailmanclient + + +class CachedValue(object): + + def on_new_message(self, store, mlist, message): + pass + + def on_new_thread(self, store, mlist, thread): + pass + + def refresh(self, store): + pass + + def _get_mailman_client(self, settings): + try: + mm_client = mailmanclient.Client('%s/3.0' % + settings.MAILMAN_REST_SERVER, + settings.MAILMAN_API_USER, + settings.MAILMAN_API_PASS) + except (HTTPError, mailmanclient.MailmanConnectionError): + raise HTTPError + return mm_client + + +class CacheManager(object): + + _cached_values = [] + auto_refresh = True + _last_refresh = None + + def discover(self): + """ + Discover subclasses of CachedValue. This only search direct submodules + of kittystore.caching. + """ + submodules = [ f[:-3] for f in resource_listdir("kittystore.caching", "") + if f.endswith(".py") and f != "__init__.py" ] + for submod_name in submodules: + __import__("kittystore.caching.%s" % submod_name) + self._cached_values = [ C() for C in CachedValue.__subclasses__() ] + + def on_new_message(self, store, mlist, message): + if self.auto_refresh and datetime.date.today() != self._last_refresh: + return self.refresh(store) # Refresh at least once a day + for cval in self._cached_values: + cval.on_new_message(store, mlist, message) + + def on_new_thread(self, store, mlist, thread): + for cval in self._cached_values: + cval.on_new_thread(store, mlist, thread) + + def refresh(self, store): + for cval in self._cached_values: + cval.refresh(store) + self._last_refresh = datetime.date.today() diff --git a/kittystore/caching/email.py b/kittystore/caching/email.py new file mode 100644 index 0000000..6e7ba77 --- /dev/null +++ b/kittystore/caching/email.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +""" +Cached values concerning mailing-lists +""" + +from urllib2 import HTTPError +import mailmanclient + +from kittystore.caching import CachedValue + + +class MailmanUserCache(CachedValue): + + _mm_client = None + _user_id_cache = {} + + def _get_mailman_client(self, settings): + """Only instanciate the mailman client once""" + if self._mm_client is None: + self._mm_client = CachedValue._get_mailman_client(self, settings) + return self._mm_client + + def _get_user_id(self, store, message): + address = message.sender_email + if address not in self._user_id_cache: + mm_client = self._get_mailman_client(store.settings) + mm_user = mm_client.get_user(address) + self._user_id_cache[address] = unicode(mm_user.user_id) + return self._user_id_cache[address] + + def on_new_message(self, store, mlist, message): + try: + message.user_id = self._get_user_id(store, message) + except (HTTPError, mailmanclient.MailmanConnectionError): + return # Can't refresh at this time + + def refresh(self, store): + # XXX: Storm-specific + from kittystore.storm.model import Email + try: + for message in store.db.find(Email, Email.user_id == None): + message.user_id = self._get_user_id(store, message) + except (HTTPError, mailmanclient.MailmanConnectionError): + return # Can't refresh at this time diff --git a/kittystore/caching/mlist.py b/kittystore/caching/mlist.py new file mode 100644 index 0000000..1d0808b --- /dev/null +++ b/kittystore/caching/mlist.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +Cached values concerning mailing-lists +""" + +import datetime +from urllib2 import HTTPError + +import mailmanclient + +from kittystore.caching import CachedValue +from kittystore.utils import daterange + + +class ListPropertiesCache(CachedValue): + + props = ("display_name", "description", "subject_prefix", "archive_policy") + + def on_new_message(self, store, mlist, message): + l = store.get_list(mlist.fqdn_listname) + for propname in self.props: + setattr(l, propname, getattr(mlist, propname)) + + def refresh(self, store): + try: + mm_client = self._get_mailman_client(store.settings) + except HTTPError: + return # Can't refresh at this time + for list_name in store.get_list_names(): + try: + mm_mlist = mm_client.get_list(list_name) + except (HTTPError, mailmanclient.MailmanConnectionError): + continue + if mm_mlist: + self.on_new_message(store, mm_mlist, None) + + +class ListActivityCache(CachedValue): + """ + Refresh the recent_participants_count and recent_threads_count properties. + """ + + def _refresh_list(self, store, mlist): + # Get stats for last 30 days + today = datetime.datetime.utcnow() + #today -= datetime.timedelta(days=400) #debug + # the upper boundary is excluded in the search, add one day + end_date = today + datetime.timedelta(days=1) + begin_date = end_date - datetime.timedelta(days=32) + days = daterange(begin_date, end_date) + # now compute the values + threads = store.get_threads(list_name=mlist.name, + start=begin_date, end=end_date) + participants = set() + for thread in threads: + participants.update(thread.participants) + mlist.recent_participants_count = len(participants) + mlist.recent_threads_count = len(threads) + + def on_new_message(self, store, mlist, message): + l = store.get_list(mlist.fqdn_listname) + self._refresh_list(store, l) + + def refresh(self, store): + for mlist in store.get_lists(): + self._refresh_list(store, mlist) diff --git a/kittystore/import.py b/kittystore/import.py index c72c431..63423f2 100644 --- a/kittystore/import.py +++ b/kittystore/import.py @@ -41,6 +41,7 @@ import mailmanclient from mailman.interfaces.archiver import ArchivePolicy from storm.exceptions import DatabaseError from kittystore.scripts import get_store_from_options, StoreFromOptionsError +from kittystore.test import FakeList PREFIX_RE = re.compile("^\[([\w\s_-]+)\] ") @@ -90,18 +91,8 @@ def awarify(date): class DownloadError(Exception): pass -class DummyMailingList(object): - # pylint: disable=R0903 - # (Too few public methods) - def __init__(self, address): - self.fqdn_listname = unicode(address) - self.display_name = None - self.subject_prefix = None - self.archive_policy = ArchivePolicy.public - - def get_mailinglist(list_name, settings, opts): - mlist = DummyMailingList(list_name) + mlist = FakeList(list_name) try: mm_client = mailmanclient.Client('%s/3.0' % settings.MAILMAN_REST_SERVER, diff --git a/kittystore/scripts.py b/kittystore/scripts.py index 3930330..6e0f33a 100644 --- a/kittystore/scripts.py +++ b/kittystore/scripts.py @@ -81,28 +81,39 @@ def updatedb(): "ORDER BY version DESC LIMIT 1" ))[0][0] print "Done, the current schema version is %d." % version + print "Refreshing the cache, this can take some time..." + store.refresh_cache() + store.commit() + print " ...done!" ## More complex post-update actions: + # (none yet) - # Fill in the user_id from Mailman - from kittystore.storm.model import Email - user_ids = store.db.find(Email.user_id).config(distinct=True) - if user_ids.count() <= 1 and user_ids.one() is None: - print "Updating user_id fields from Mailman, this can take some time..." - emails = store.db.find(Email) - emails_total = emails.count() - user_id_cache = {} # speed up the lookup process - for num, email in enumerate(emails): - if email.sender_email in user_id_cache: - email.user_id = user_id_cache[email.sender_email] - else: - email.user_id = store._store_mailman_user(email.sender_email) - user_id_cache[email.sender_email] = email.user_id - if (num+1) % 10 == 0: - sys.stdout.write("\r%s/%s" % (num+1, emails_total)) - sys.stdout.flush() - store.commit() - print " ...done!" + + +# +# Manual cache refresh +# + +def cache_refresh(): + parser = OptionParser(usage="%prog -s settings_module") + parser.add_option("-s", "--settings", + help="the Python path to a Django-like settings module") + parser.add_option("-p", "--pythonpath", + help="a directory to add to the Python path") + parser.add_option("-d", "--debug", action="store_true", + help="show SQL queries") + opts, args = parser.parse_args() + if args: + parser.error("no arguments allowed.") + print 'Refreshing the cache...' + try: + store = get_store_from_options(opts) + except (StoreFromOptionsError, AttributeError), e: + parser.error(e.args[0]) + store.refresh_cache() + store.commit() + print " ...done!" diff --git a/kittystore/storm/__init__.py b/kittystore/storm/__init__.py index 2ea90ee..3736ad7 100644 --- a/kittystore/storm/__init__.py +++ b/kittystore/storm/__init__.py @@ -13,6 +13,7 @@ from .model import List, Email from . import schema from .store import StormStore from .search import SearchEngine +from kittystore.caching import CacheManager class ThreadSafeStorePool(object): @@ -50,7 +51,10 @@ def create_store(settings, debug): search_index = SearchEngine(search) else: search_index = None - return StormStore(store, search_index, settings, debug) + cache_manager = CacheManager() + cache_manager.discover() + return StormStore(store, search_index, settings, cache_manager, + debug=debug) def get_storm_store(settings, debug=False): diff --git a/kittystore/storm/model.py b/kittystore/storm/model.py index 29cd546..338cfb2 100644 --- a/kittystore/storm/model.py +++ b/kittystore/storm/model.py @@ -40,13 +40,18 @@ class List(Storm): """ An archived mailing-list. """ + # When updating this model, remember to update the fake version + # in test/__init__.py __storm_table__ = "list" name = Unicode(primary=True) display_name = Unicode() + description = Unicode() subject_prefix = Unicode() archive_policy = Enum(ArchivePolicy) + recent_participants_count = Int() # cache + recent_threads_count = Int() # cache def __init__(self, name): self.name = unicode(name) diff --git a/kittystore/storm/schema/__init__.py b/kittystore/storm/schema/__init__.py index 8177707..73f9861 100644 --- a/kittystore/storm/schema/__init__.py +++ b/kittystore/storm/schema/__init__.py @@ -7,8 +7,11 @@ CREATES = { CREATE TABLE "list" ( name VARCHAR(255) NOT NULL, display_name TEXT, + description TEXT, subject_prefix TEXT, archive_policy INTEGER, + recent_participants_count INTEGER, + recent_threads_count INTEGER, PRIMARY KEY (name) );""", """ CREATE TABLE "category" ( @@ -81,8 +84,11 @@ CREATES = { CREATE TABLE "list" ( name VARCHAR(255) NOT NULL, display_name TEXT, + description TEXT, subject_prefix TEXT, archive_policy INTEGER, + recent_participants_count INTEGER, + recent_threads_count INTEGER, PRIMARY KEY (name) );""", """ CREATE TABLE "category" ( @@ -166,8 +172,11 @@ CREATES = { CREATE TABLE `list` ( name VARCHAR(255) NOT NULL, display_name TEXT, + description TEXT, subject_prefix TEXT, archive_policy INTEGER, + recent_participants_count INTEGER, + recent_threads_count INTEGER, PRIMARY KEY (name) );""", """ CREATE TABLE `category` ( diff --git a/kittystore/storm/schema/patch_13.py b/kittystore/storm/schema/patch_13.py new file mode 100644 index 0000000..731d81e --- /dev/null +++ b/kittystore/storm/schema/patch_13.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from . import get_db_type + + +SQL = { + "sqlite": [ + 'ALTER TABLE "list" ADD COLUMN description TEXT;', + 'ALTER TABLE "list" ADD COLUMN recent_participants_count INTEGER;', + 'ALTER TABLE "list" ADD COLUMN recent_threads_count INTEGER;', + ], + "postgres": [ + 'ALTER TABLE "list" ADD COLUMN description TEXT;', + 'ALTER TABLE "list" ADD COLUMN recent_participants_count INTEGER;', + 'ALTER TABLE "list" ADD COLUMN recent_threads_count INTEGER;', + ], + "mysql": [ + 'ALTER TABLE `list` ADD COLUMN description TEXT;', + 'ALTER TABLE `list` ADD COLUMN recent_participants_count INTEGER;', + 'ALTER TABLE `list` ADD COLUMN recent_threads_count INTEGER;', + ], + } + + +def apply(store): + """ + Add the description, recent_participants_count and recent_threads_count + columns. + """ + dbtype = get_db_type(store) + for statement in SQL[dbtype]: + store.execute(statement) + store.commit() diff --git a/kittystore/storm/search.py b/kittystore/storm/search.py index d3e5f35..5534348 100644 --- a/kittystore/storm/search.py +++ b/kittystore/storm/search.py @@ -58,7 +58,7 @@ class SearchEngine(object): stem_ana = StemmingAnalyzer() return Schema( list_name=ID(stored=True), - message_id=ID(stored=True), + message_id=ID(stored=True, unique=True), sender=TEXT(field_boost=1.5), user_id=TEXT, subject=TEXT(field_boost=2.0, analyzer=stem_ana), diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py index b640229..0d291ce 100644 --- a/kittystore/storm/store.py +++ b/kittystore/storm/store.py @@ -43,7 +43,7 @@ class StormStore(object): implements(IMessageStore) - def __init__(self, db, search_index, settings, debug=False): + def __init__(self, db, search_index, settings, cache_manager=None, debug=False): """ Constructor. Create the session using the engine defined in the url. @@ -54,6 +54,7 @@ class StormStore(object): self.debug = debug self.search_index = search_index self.settings = settings + self._cache_manager = cache_manager # IMessageStore methods @@ -95,10 +96,7 @@ class StormStore(object): if l is None: l = List(list_name) self.db.add(l) - l.display_name = mlist.display_name - l.subject_prefix = mlist.subject_prefix - l.archive_policy = mlist.archive_policy - if l.archive_policy == ArchivePolicy.never: + if mlist.archive_policy == ArchivePolicy.never: print "Archiving disabled by list policy for %s" % list_name return None if not message.has_key("Message-Id"): @@ -157,9 +155,6 @@ class StormStore(object): # warning: scrubbing modifies the msg in-place email.content, attachments = scrubber.scrub() - # store the Mailman user - email.user_id = self._store_mailman_user(email.sender_email) - #category = 'Question' # TODO: enum + i18n ? #if ('agenda' in message.get('Subject', '').lower() or # 'reminder' in message.get('Subject', '').lower()): @@ -184,20 +179,13 @@ class StormStore(object): # search indexing if self.search_index is not None: self.search_index.add(email) - return email.message_id_hash + # caching + if self._cache_manager is not None: + self._cache_manager.on_new_message(self, mlist, email) + if new_thread: + self._cache_manager.on_new_thread(self, mlist, thread) - def _store_mailman_user(self, address): - try: - mm_client = mailmanclient.Client('%s/3.0' % - self.settings.MAILMAN_REST_SERVER, - self.settings.MAILMAN_API_USER, - self.settings.MAILMAN_API_PASS) - mm_user = mm_client.get_user(address) - except (HTTPError, mailmanclient.MailmanConnectionError), e: - if self.debug: - print "Can't get the user from Mailman: %s" % e - else: - return unicode(mm_user.user_id) + return email.message_id_hash def attach_to_thread(self, email, thread): @@ -689,3 +677,11 @@ class StormStore(object): def rollback(self): self.db.rollback() + + + # Caching + + def refresh_cache(self): + if self._cache_manager is None: + return + self._cache_manager.refresh(self) diff --git a/kittystore/test/__init__.py b/kittystore/test/__init__.py index 0270307..72e9b24 100644 --- a/kittystore/test/__init__.py +++ b/kittystore/test/__init__.py @@ -13,11 +13,14 @@ get_test_file.__test__ = False class FakeList(object): # pylint: disable=R0903 # (Too few public methods) + display_name = None + description = None + subject_prefix = None + archive_policy = ArchivePolicy.public + recent_participants_count = 0 + recent_threads_count = 0 def __init__(self, name): - self.fqdn_listname = name - self.display_name = None - self.subject_prefix = None - self.archive_policy = ArchivePolicy.public + self.fqdn_listname = unicode(name) class SettingsModule: KITTYSTORE_URL = "sqlite:" diff --git a/kittystore/test/test_caching.py b/kittystore/test/test_caching.py new file mode 100644 index 0000000..24b7555 --- /dev/null +++ b/kittystore/test/test_caching.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# pylint: disable=R0904,C0103 +# - Too many public methods +# - Invalid name XXX (should match YYY) + +import unittest +import datetime + +from mock import Mock +from mailman.email.message import Message +from mailman.interfaces.archiver import ArchivePolicy + +from kittystore import get_store +from kittystore.caching import CacheManager +from kittystore.caching.mlist import ListPropertiesCache +from kittystore.test import get_test_file, FakeList, SettingsModule + + +class CacheManagerTestCase(unittest.TestCase): + + def setUp(self): + self.cm = CacheManager() + self.cm.auto_refresh = False + + def test_discover(self): + self.cm.discover() + self.assertNotEqual(len(self.cm._cached_values), 0) + + def test_old_refresh(self): + self.cm.auto_refresh = True + cv = Mock() + self.cm._cached_values = [cv] + yesterday = datetime.date.today() - datetime.timedelta(days=1) + self.cm._last_refresh = yesterday + self.cm.on_new_message(None, None, None) + self.assertEqual(self.cm._last_refresh, datetime.date.today()) + self.assertTrue(cv.refresh.called) + self.assertFalse(cv.on_new_message.called) + + def test_on_new_message(self): + msg = Message() + msg["From"] = "dummy@example.com" + msg["Message-ID"] = "<dummy>" + msg.set_payload("Dummy message") + ml = FakeList("example-list") + self.cm.on_new_message = Mock() + self.cm.on_new_thread = Mock() + store = get_store(SettingsModule()) + store._cache_manager = self.cm + try: + store.add_to_list(ml, msg) + finally: + store.close() + self.assertTrue(self.cm.on_new_message.called) + self.assertTrue(self.cm.on_new_thread.called) + + def test_no_new_thread(self): + ml = FakeList("example-list") + msg1 = Message() + msg1["From"] = "dummy@example.com" + msg1["Message-ID"] = "<dummy1>" + msg1.set_payload("Dummy message") + msg2 = Message() + msg2["From"] = "dummy@example.com" + msg2["Message-ID"] = "<dummy2>" + msg2["In-Reply-To"] = "<dummy1>" + msg2.set_payload("Dummy message") + self.cm.on_new_message = Mock() + self.cm.on_new_thread = Mock() + store = get_store(SettingsModule()) + store._cache_manager = self.cm + try: + store.add_to_list(ml, msg1) + store.add_to_list(ml, msg2) + finally: + store.close() + self.assertEqual(self.cm.on_new_message.call_count, 2) + self.assertEqual(self.cm.on_new_thread.call_count, 1) + + +class ListCacheTestCase(unittest.TestCase): + + def setUp(self): + self.store = get_store(SettingsModule()) + self.store._cache_manager.auto_refresh = False + + def tearDown(self): + self.store.close() + + def test_properties_on_new_message(self): + #updater = ListPropertiesCache() + ml = FakeList("example-list") + ml.display_name = u"name 1" + ml.subject_prefix = u"[prefix 1]" + ml.description = u"desc 1" + msg = Message() + msg["From"] = "dummy@example.com" + msg["Message-ID"] = "<dummy>" + msg.set_payload("Dummy message") + self.store.add_to_list(ml, msg) + ml_db = self.store.get_lists()[0] + self.assertEqual(ml_db.display_name, "name 1") + self.assertEqual(ml_db.subject_prefix, "[prefix 1]") + ml.display_name = u"name 2" + ml.subject_prefix = u"[prefix 2]" + ml.description = u"desc 2" + ml.archive_policy = ArchivePolicy.private + msg.replace_header("Message-ID", "<dummy2>") + self.store.add_to_list(ml, msg) + #ml_db = self.store.db.find(List).one() + self.assertEqual(ml_db.display_name, "name 2") + self.assertEqual(ml_db.subject_prefix, "[prefix 2]") + self.assertEqual(ml_db.description, "desc 2") + self.assertEqual(ml_db.archive_policy, ArchivePolicy.private) + + diff --git a/kittystore/test/test_storm_store.py b/kittystore/test/test_storm_store.py index 585001a..4c2101c 100644 --- a/kittystore/test/test_storm_store.py +++ b/kittystore/test/test_storm_store.py @@ -93,27 +93,6 @@ class TestStormStore(unittest.TestCase): self.assertEqual(self.store.db.find(Email).count(), 1) self.assertEqual(self.store.db.find(Attachment).count(), 1) - def test_update_list(self): - """List records must be updated when changed in Mailman""" - msg = Message() - msg["From"] = "dummy@example.com" - msg["Message-ID"] = "<dummy>" - msg.set_payload("Dummy message") - ml = FakeList("example-list") - ml.display_name = u"name 1" - ml.subject_prefix = u"[prefix 1]" - self.store.add_to_list(ml, msg) - ml_db = self.store.db.find(List).one() - self.assertEqual(ml_db.display_name, "name 1") - self.assertEqual(ml_db.subject_prefix, "[prefix 1]") - ml.display_name = u"name 2" - ml.subject_prefix = u"[prefix 2]" - self.store.add_to_list(ml, msg) - ml_db = self.store.db.find(List).one() - self.assertEqual(ml_db.display_name, "name 2") - self.assertEqual(ml_db.subject_prefix, "[prefix 2]") - - def test_thread_neighbors(self): ml = FakeList("example-list") # Create 3 threads diff --git a/kittystore/utils.py b/kittystore/utils.py index 59fd94a..ce7c1b3 100644 --- a/kittystore/utils.py +++ b/kittystore/utils.py @@ -145,3 +145,7 @@ def get_ref_and_thread_id(message, list_name, store): thread_id = unicode(ref_msg.thread_id) return ref_id, thread_id + +def daterange(start_date, end_date): + for n in range(int((end_date - start_date).days)): + yield start_date + timedelta(n) @@ -49,6 +49,7 @@ setup( 'kittystore-import = kittystore.import:main', 'kittystore-updatedb = kittystore.scripts:updatedb', 'kittystore-download21 = kittystore.scripts:dl_archives', + 'kittystore-refresh-cache = kittystore.scripts:cache_refresh', ], }, ) |
