summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2013-11-28 10:59:58 +0100
committerAurélien Bompard <aurelien@bompard.org>2013-11-28 10:59:58 +0100
commit38136fd2fdda860915dc8573f14229bd4b42ab94 (patch)
tree299f865b4e140b8a1b2191d331090c662176bf55
parent2046c4530ca421f6f7819ed4aac81f6eb6d6b1a8 (diff)
downloadkittystore-38136fd2fdda860915dc8573f14229bd4b42ab94.tar.gz
kittystore-38136fd2fdda860915dc8573f14229bd4b42ab94.tar.xz
kittystore-38136fd2fdda860915dc8573f14229bd4b42ab94.zip
Improvements in the caching system
-rw-r--r--kittystore/caching/email.py30
-rw-r--r--kittystore/caching/thread.py1
-rw-r--r--kittystore/import.py5
3 files changed, 29 insertions, 7 deletions
diff --git a/kittystore/caching/email.py b/kittystore/caching/email.py
index 527d1e3..697afea 100644
--- a/kittystore/caching/email.py
+++ b/kittystore/caching/email.py
@@ -25,8 +25,15 @@ class MailmanUser(CachedValue):
address = message.sender_email
if address not in self._user_id_cache:
mm_client = self._get_mailman_client(store.settings)
- mm_user = mm_client.get_user(address)
- self._user_id_cache[address] = unicode(mm_user.user_id)
+ try:
+ mm_user = mm_client.get_user(address)
+ except HTTPError, e:
+ if e.code == 404:
+ self._user_id_cache[address] = None
+ else:
+ raise
+ else:
+ self._user_id_cache[address] = unicode(mm_user.user_id)
return self._user_id_cache[address]
def on_new_message(self, store, mlist, message):
@@ -50,12 +57,23 @@ class MailmanUser(CachedValue):
return # Can't update at this time
def refresh(self, store):
+ # There can be millions of emails, break into smaller chuncks to avoid
+ # hogging up the memory
+ print "Getting missing email user ids from Mailman"
# XXX: Storm-specific
from kittystore.storm.model import Email
+ buffer_size = 50000
+ prev_count = store.db.find(Email, Email.user_id == None).count()
try:
- for num, message in enumerate(store.db.find(Email, Email.user_id == None)):
- message.user_id = self._get_user_id(store, message)
- if num % 1000 == 0:
- store.commit() # otherwise we'll blow up the memory
+ while True:
+ for message in store.db.find(Email,
+ Email.user_id == None)[:buffer_size]:
+ message.user_id = self._get_user_id(store, message)
+ store.commit()
+ count = store.db.find(Email, Email.user_id == None).count()
+ if count == 0 or count == prev_count:
+ break # done, or no improvement (former members)
+ prev_count = count
+ print "%d emails left to refresh" % count
except (HTTPError, mailmanclient.MailmanConnectionError):
return # Can't refresh at this time
diff --git a/kittystore/caching/thread.py b/kittystore/caching/thread.py
index 62edb5d..6fdb712 100644
--- a/kittystore/caching/thread.py
+++ b/kittystore/caching/thread.py
@@ -18,6 +18,7 @@ class ThreadStats(CachedValue):
len(message.thread.participants)
def refresh(self, store):
+ print "Refreshing thread statistics"
# XXX: Storm-specific
from kittystore.storm.model import Thread
for num, thread in enumerate(store.db.find(Thread)):
diff --git a/kittystore/import.py b/kittystore/import.py
index a184ebe..3605446 100644
--- a/kittystore/import.py
+++ b/kittystore/import.py
@@ -283,6 +283,8 @@ def parse_args():
help="show a whole lot more of output")
parser.add_option("--no-download", action="store_true",
help="don't download attachments")
+ parser.add_option("--no-refresh", action="store_true",
+ help="don't refresh the cache after importing")
parser.add_option("-D", "--duplicates", action="store_true",
help="do not skip duplicate emails (same Message-ID header), "
"import them with a different Message-ID")
@@ -324,5 +326,6 @@ def main():
if opts.verbose:
print ' %s emails are stored into the database' \
% store.get_list_size(opts.list_name)
- store.refresh_cache(full=True)
+ if not opts.no_refresh:
+ store.refresh_cache(full=True)
store.commit()