From 089f72b237ea4cd8711e6ded643f43687fd39490 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Sun, 18 Mar 2012 09:14:05 +0100 Subject: Start implementing the archiver with mongo in the background --- README | 1 + lib/mongo.py | 83 +++++++++++++++++ settings.py | 3 +- static/css/stats.css | 16 ++-- static/css/style.css | 27 +++++- templates/base2.html | 6 +- templates/index2.html | 9 +- templates/month_view2.html | 110 +++++++++++++--------- templates/recent_activities.html | 193 ++++++++++++++++++++++----------------- templatetags/__init__.py | 0 templatetags/poll_extras.py | 31 +++++++ urls.py | 10 +- views/mockup.py | 11 ++- views/pages.py | 127 ++++++++++++-------------- 14 files changed, 408 insertions(+), 219 deletions(-) create mode 100644 lib/mongo.py create mode 100644 templatetags/__init__.py create mode 100644 templatetags/poll_extras.py diff --git a/README b/README index 288462d..1acdb67 100644 --- a/README +++ b/README @@ -22,6 +22,7 @@ source bin/activate # Install django and dependencies easy_install django easy_install bunch +easy_install urlgrabber # Install notmuch -- these are bindings that come with the notmuch C library # The easiest way is probably to install them for your OS vendor and then # symlink them into the virtualenv similar to this: diff --git a/lib/mongo.py b/lib/mongo.py new file mode 100644 index 0000000..b8f3b2f --- /dev/null +++ b/lib/mongo.py @@ -0,0 +1,83 @@ +#-*- coding: utf-8 -*- + +import pymongo +from bunch import Bunch + +connection = pymongo.Connection('localhost', 27017) + +def get_emails_thread(table, start_email, thread): + db = connection[table] + db.mails.create_index('Date') + db.mails.ensure_index('Date') + db.mails.create_index('In-Reply-To') + db.mails.ensure_index('In-Reply-To') + db.mails.create_index('Message-ID') + db.mails.ensure_index('Message-ID') + for el in db.mails.find({'In-Reply-To': start_email['Message-ID']}, + sort=[('Date', pymongo.DESCENDING)]): + thread.append(el) + get_emails_thread(el, thread) + return thread + + +def get_archives(table, start, end): + db = connection[table] + db.mails.create_index('Date') + db.mails.ensure_index('Date') + db.mails.create_index('In-Reply-To') + db.mails.ensure_index('In-Reply-To') + # Beginning of thread == No 'In-Reply-To' header + archives = [] + for el in db.mails.find({'In-Reply-To': {'$exists':False}, + "Date": {"$gte": start, "$lt": end}}, + sort=[('Date', pymongo.DESCENDING)]): + archives.append(el) + return archives + + +def get_thread(table, start, end): + db = connection[table] + db.mails.create_index('Date') + db.mails.ensure_index('Date') + db.mails.create_index('In-Reply-To') + db.mails.ensure_index('In-Reply-To') + # Beginning of thread == No 'In-Reply-To' header + archives = Bunch() + for el in db.mails.find({'In-Reply-To': {'$exists':False}, + "Date": {"$gte": start, "$lt": end}}, + sort=[('Date', pymongo.DESCENDING)]): + thread = get_emails_thread(el, [el]) + #print el['Subject'], len(thread) + archives[el['Subject']] = thread + return archives + + +def get_thread_length(table, thread_id): + db = connection[table] + db.mails.create_index('Thread-ID') + db.mails.ensure_index('Thread-ID') + return db.mails.find({'Thread-ID': thread_id}).count() + + +def get_thread_participants(table, thread_id): + db = connection[table] + db.mails.create_index('Thread-ID') + db.mails.ensure_index('Thread-ID') + authors = set() + for mail in db.mails.find({'Thread-ID': thread_id}): + authors.add(mail['From']) + return len(authors) + +def get_archives_length(table): + db = connection[table] + archives = {} + for entry in db.mails.find(): + date = entry['Date'] + if date.year in archives: + archives[date.year].add(date.month) + else: + archives[date.year] = set([date.month]) + for key in archives: + archives[key] = list(archives[key]) + return archives + diff --git a/settings.py b/settings.py index 1faf4c2..0dd291e 100644 --- a/settings.py +++ b/settings.py @@ -109,7 +109,7 @@ MIDDLEWARE_CLASSES = ( 'django.contrib.messages.middleware.MessageMiddleware', ) -ROOT_URLCONF = 'hyperkitty.urls' +ROOT_URLCONF = 'urls' TEMPLATE_DIRS = ( # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". @@ -129,6 +129,7 @@ INSTALLED_APPS = ( # 'django.contrib.admin', # Uncomment the next line to enable admin documentation: # 'django.contrib.admindocs', + 'trunk', ) # A sample logging configuration. The only tangible logging diff --git a/static/css/stats.css b/static/css/stats.css index cd39079..6a1d2ec 100644 --- a/static/css/stats.css +++ b/static/css/stats.css @@ -26,29 +26,31 @@ h2 { /* The content section of the page */ .content { - width: 90%; + width: 1024px; margin: auto; } #top_discussion { - width: 45%; + width: 40%; margin-right: 22px; + margin-left: 11%; } #discussion_by_topic { - width: 45%; + width: 40%; margin-top: 20px; margin-right: 22px; + margin-left: 11%; } #most_active { float: right; - width: 45%; + width: 40%; } #discussion_marker { float: right; - width: 45%; + width: 40%; margin-top: 20px; } @@ -75,7 +77,7 @@ h2 { } .thread_stats ul li { - margin-right:20px; + margin-right:10px; } .category { @@ -110,7 +112,7 @@ h2 { .maker_id, .marker_name{ font-weight: bold; - font-size: 125%; + font-size: 115%; vertical-align: top; padding-right: 20px; } diff --git a/static/css/style.css b/static/css/style.css index 00e46d8..631684f 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -204,6 +204,31 @@ a { margin: auto; } +#recent_activities{ + width: 90%; + margin-top: 20px; + margin-right: 20px; + float: right; +} + +#archives{ + width: 8%; + margin-top: 20px; + float: left; +/* + margin-right: 2px; +*/ +} + +#archives ul { + padding: 0; + margin: 0; +} + +#archives li { + list-style-type: none; +} + /* Thread list */ .thread_title { @@ -237,7 +262,7 @@ a { /* Part containing the body of the mail which can be shown/hidden */ .expander { - width: 768px; + width: 665px; background-image: linear-gradient(bottom, rgb(236,236,236) 11%, rgb(255,255,255) 100%); background-image: -o-linear-gradient(bottom, rgb(236,236,236) 11%, rgb(255,255,255) 100%); background-image: -moz-linear-gradient(bottom, rgb(236,236,236) 11%, rgb(255,255,255) 100%); diff --git a/templates/base2.html b/templates/base2.html index 8490bc6..c477a99 100644 --- a/templates/base2.html +++ b/templates/base2.html @@ -21,7 +21,11 @@ {% block header %}
{% if month_participants and month_discussions %} diff --git a/templates/index2.html b/templates/index2.html index 440e66f..ac2e3c7 100644 --- a/templates/index2.html +++ b/templates/index2.html @@ -3,11 +3,14 @@ {% block title %}{{ app_name }}{% endblock %} {% block content %} +

+ These are the list available on this server: +

diff --git a/templates/month_view2.html b/templates/month_view2.html index b006d35..b763a4d 100644 --- a/templates/month_view2.html +++ b/templates/month_view2.html @@ -1,4 +1,5 @@ {% extends "base2.html" %} +{% load poll_extras %} {% block title %}{{ app_name }}{% endblock %} @@ -17,59 +18,78 @@ {% block content %} - {% for email in threads %} - -
-
- {{email.title}} - {{email.age}} ago -
-
- {% if email.category_tag %} - - {% else %} -
- {{email.category}} +
+ {% for email in threads %} + +
+
+ {{email.Subject}} + {{email.Date}}
- {% endif %} -
- {% if email.avatar %} - avatar
+
+ {% if email.category_tag %} + + {% else %} + {% endif %} - {{email.author}} +
+ {% if email.avatar %} + avatar
+ {% endif %} + {{email.From}} +
+
+ + {{email.Content}} + +
-
- - {{email.body}} - +
+
    +
  • Tags:
  • + {% for tag in email.tags %} +
  • {{tag}}
  • + {% endfor %} +
+
    +
  • {{email.participants}} participants
  • +
  • {{email.answers}} comments
  • +
+
-
-
    -
  • Tags:
  • - {% for tag in email.tags %} -
  • {{tag}}
  • + + {% endfor %} +
+
+ {% for key, value in archives_length|sort %} +

{{ key }}

+
+ -
    -
  • {{email.participants|length}} participants
  • -
  • {{email.answers|length}} comments
  • -
-
+ {% endfor %}
- - {% endfor %} + {% endblock %} diff --git a/templates/recent_activities.html b/templates/recent_activities.html index 84dd366..52a3968 100644 --- a/templates/recent_activities.html +++ b/templates/recent_activities.html @@ -1,99 +1,128 @@ -{% extends "base.html" %} +{% extends "base2.html" %} +{% load poll_extras %} {% block title %} {{ app_name }} {% endblock %} {% block additional_headers %} + + + + + {% endblock %} {% block content %} -
-

Recently active discussions

- {% for email in top_threads %} - -
- #{{forloop.counter}} - {{email.title}} -
- -
-
- - {% endfor %} -
+
+
+

Recently active discussions

+ {% for email in most_active_threads %} + +
+ #{{forloop.counter}} + {{email.Subject}} +
+ +
+
+ + {% endfor %} +
-
-

Top discussions the last 30 days

- {% for email in most_active_threads %} - -
- #{{forloop.counter}} - {{email.title}} -
- -
-
- - {% endfor %} -
-
-

Prominent discussion maker

- {% for author in top_author %} - -
-
#{{forloop.counter}}
-
- {% if author.avatar %} - avatar - {% endif %} -
-
- {{author.name}}
- +{{author.kudos}} kudos -
-
- - {% endfor %} +
+

Top discussions the last 30 days

+ {% for email in top_threads %} + +
+ #{{forloop.counter}} + {{email.Subject}} +
+ +
+
+ + {% endfor %} +
-

Tag cloud

+
+

Prominent discussion maker

+ {% for author in top_author %} + +
+
#{{forloop.counter}}
+
+ {% if author.avatar %} + avatar + {% endif %} +
+
+ {{author.name}}
+ +{{author.kudos}} kudos +
+
+ + {% endfor %} + +

Tag cloud

+
+ +
+

Discussion by topic the last 30 days

+ {% for category, thread in threads_per_category.items %} +
+

{{category}}

+
    + {% for email in thread %} +
  • {{email.title}}
  • + {% endfor %} +
+
+ {% endfor %} +
- -
-

Discussion by topic the last 30 days

- {% for category, thread in threads_per_category.items %} +
+ {% for key, value in archives_length|sort %} +

{{ key }}

-

{{category}}

-
diff --git a/templatetags/__init__.py b/templatetags/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/templatetags/poll_extras.py b/templatetags/poll_extras.py new file mode 100644 index 0000000..f57a4a7 --- /dev/null +++ b/templatetags/poll_extras.py @@ -0,0 +1,31 @@ +from django import template +from django.utils.datastructures import SortedDict + +register = template.Library() + +@register.filter(name='sort') +def listsort(value): + if isinstance(value, dict): + new_dict = SortedDict() + key_list = value.keys() + key_list.sort() + key_list.reverse() + for key in key_list: + values = value[key] + values.sort() + values.reverse() + new_dict[key] = values + return new_dict.items() + elif isinstance(value, list): + new_list = list(value) + new_list.sort() + return new_list + else: + return value + listsort.is_safe = True + +@register.filter(name="tomonth") +def to_month(value): + months = ('January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', 'December') + return months[value -1] diff --git a/urls.py b/urls.py index 2a035ac..3746d3e 100644 --- a/urls.py +++ b/urls.py @@ -15,13 +15,15 @@ urlpatterns = patterns('', url(r'^2$', 'views.pages.index'), url(r'^2/$', 'views.pages.index'), # This will be the new archives page - url(r'^2/archives/(?P.*@.*)/(?P\d{4})/(?P\d{2})/$', 'views.pages.archives'), - url(r'^2/archives/(?P.*@.*)/(?P\d{4})/(?P\d{2})$', 'views.pages.archives'), + url(r'^2/archives/(?P.*@.*)/(?P\d{4})/(?P\d\d?)/$', 'views.pages.archives'), + url(r'^2/archives/(?P.*@.*)/(?P\d{4})/(?P\d\d?)$', 'views.pages.archives'), url(r'^2/archives/(?P.*@.*)/$', 'views.pages.archives'), url(r'^2/archives/(?P.*@.*)$', 'views.pages.archives'), # This will be the new recent page - url(r'^2/recent/(?P.*@.*)/$', 'views.pages.recent'), - url(r'^2/recent/(?P.*@.*)$', 'views.pages.recent'), + url(r'^2/list$', 'views.pages.index'), + url(r'^2/list/$', 'views.pages.index'), + url(r'^2/list/(?P.*@.*)/$', 'views.pages.list'), + url(r'^2/list/(?P.*@.*)$', 'views.pages.list'), # Search url(r'^2/search$', 'views.pages.search'), url(r'^2/search/(?P.*@.*)$', 'views.pages.search_keyword'), diff --git a/views/mockup.py b/views/mockup.py index 096c58c..170c7be 100644 --- a/views/mockup.py +++ b/views/mockup.py @@ -13,14 +13,15 @@ from django import forms from django.http import HttpResponse, HttpResponseRedirect from django.template import RequestContext, loader from django.conf import settings -import notmuch -import urlgrabber +#import notmuch +#import urlgrabber -from hyperkitty.lib.mockup import generate_random_thread, generate_top_author, \ +from lib.mockup import generate_random_thread, generate_top_author, \ generate_thread_per_category, get_email_tag -from hyperkitty.lib import gravatar_url -from lib.notmuch import get_ro_db +from lib import gravatar_url + +#from lib.notmuch import get_ro_db # Move this into settings.py ARCHIVE_DIR = '/home/toshio/mm3/mailman/var/archives/hyperkitty/' diff --git a/views/pages.py b/views/pages.py index 850ccc4..fcd1ce3 100644 --- a/views/pages.py +++ b/views/pages.py @@ -11,15 +11,15 @@ from django import forms from django.http import HttpResponse, HttpResponseRedirect from django.template import RequestContext, loader from django.conf import settings -import urlgrabber +#import urlgrabber -from hyperkitty.lib.mockup import generate_thread_per_category, generate_top_author +from bunch import Bunch -from lib.notmuch import get_thread_info, get_ro_db +from lib.mockup import generate_thread_per_category, generate_top_author -# Move this into settings.py -ARCHIVE_DIR = '/home/toshio/mm3/mailman/var/archives/hyperkitty/' +from lib import mongo +# Move this into settings.py MONTH_PARTICIPANTS = 284 MONTH_DISCUSSIONS = 82 logger = logging.getLogger(__name__) @@ -32,14 +32,14 @@ class SearchForm(forms.Form): ) ) - def index(request): t = loader.get_template('index2.html') search_form = SearchForm(auto_id=False) base_url = settings.MAILMAN_API_URL % { 'username': settings.MAILMAN_USER, 'password': settings.MAILMAN_PASS} - data = json.load(urlgrabber.urlopen(urljoin(base_url, 'lists'))) - list_data = sorted(data['entries'], key=lambda elem: (elem['mail_host'], elem['list_name'])) + #data = json.load(urlgrabber.urlopen(urljoin(base_url, 'lists'))) + #list_data = sorted(data['entries'], key=lambda elem: (elem['mail_host'], elem['list_name'])) + list_data = ['devel@fp.o', 'packaging@fp.o'] c = RequestContext(request, { 'app_name': settings.APP_NAME, 'lists': list_data, @@ -54,56 +54,40 @@ def archives(request, mlist_fqdn, year=None, month=None): if year or month: try: begin_date = datetime(int(year), int(month), 1) - end_date = begin_date + timedelta(days=32) + end_date = datetime(int(year), int(month) +1, 1) month_string = begin_date.strftime('%B %Y') except ValueError, err: logger.error('Wrong format given for the date') if not end_date: - end_date = datetime.utcnow() - begin_date = end_date - timedelta(days=32) + today = datetime.utcnow() + begin_date = datetime(today.year, today.month, 1) + end_date = datetime(today.year, today.month+1, 1) month_string = 'Past thirty days' - begin_timestamp = timegm(begin_date.timetuple()) - end_timestamp = timegm(end_date.timetuple()) list_name = mlist_fqdn.split('@')[0] search_form = SearchForm(auto_id=False) t = loader.get_template('month_view2.html') - try: - db = get_ro_db(os.path.join(ARCHIVE_DIR, mlist_fqdn)) - except IOError: - logger.error('No archive for mailing list %s' % mlist_fqdn) - return + print begin_date, end_date + threads = mongo.get_archives(list_name, start=begin_date, + end=end_date) - msgs = db.create_query('%s..%s' % (begin_timestamp, end_timestamp)).search_messages() participants = set() - discussions = set() - for msg in msgs: - message = json.loads(msg.format_message_as_json()) + cnt = 0 + for msg in threads: + msg = Bunch(msg) # Statistics on how many participants and threads this month - participants.add(message['headers']['From']) - discussions.add(msg.get_thread_id()) + participants.add(msg['From']) + msg.participants = mongo.get_thread_participants(list_name, + msg['Thread-ID']) + msg.answers = mongo.get_thread_length(list_name, + msg['Thread-ID']) + threads[cnt] = msg + cnt = cnt + 1 - # Collect data about each thread - threads = [] - for thread_id in discussions: - # Note: can't use tuple() due to a bug in notmuch - thread = [thread for thread in db.create_query('thread:%s' % thread_id).search_threads()] - if len(thread) != 1: - logger.warning('Unknown thread_id %(thread)s from %(mlist)s:' - ' %(start)s-%(end)s' % { - 'thread': thread_id, 'mlist': mlist_fqdn, - 'start': begin_timestamp, 'end': end_timestamp}) - continue - thread = thread[0] - thread_info = get_thread_info(thread) - threads.append(thread_info) - - # For threads, we need to have threads ordered by - # youngest to oldest with the oldest message within thread - threads.sort(key=lambda entry: entry.most_recent, reverse=True) + archives_length = mongo.get_archives_length(list_name) c = RequestContext(request, { 'app_name': settings.APP_NAME, @@ -112,47 +96,49 @@ def archives(request, mlist_fqdn, year=None, month=None): 'search_form': search_form['keyword'], 'month': month_string, 'month_participants': len(participants), - 'month_discussions': len(discussions), + 'month_discussions': len(threads), 'threads': threads, + 'archives_length': archives_length, }) return HttpResponse(t.render(c)) -def recent(request, mlist_fqdn): +def list(request, mlist_fqdn=None): + if not mlist_fqdn: + return HttpResponseRedirect('/2/') t = loader.get_template('recent_activities.html') search_form = SearchForm(auto_id=False) list_name = mlist_fqdn.split('@')[0] # Get stats for last 30 days - end_date = datetime.utcnow() + today = datetime.utcnow() + end_date = datetime(today.year, today.month, today.day) begin_date = end_date - timedelta(days=32) - begin_timestamp = timegm(begin_date.timetuple()) - end_timestamp = timegm(end_date.timetuple()) - try: - db = get_ro_db(os.path.join(ARCHIVE_DIR, mlist_fqdn)) - except IOError: - logger.error('No archive for mailing list %s' % mlist_fqdn) - return + print begin_date, end_date + threads = mongo.get_archives(table=list_name,start=begin_date, + end=end_date) - msgs = db.create_query('%s..%s' % (begin_timestamp, end_timestamp)).search_messages() participants = set() - discussions = set() - for msg in msgs: - message = json.loads(msg.format_message_as_json()) + cnt = 0 + for msg in threads: + msg = Bunch(msg) # Statistics on how many participants and threads this month - participants.add(message['headers']['From']) - discussions.add(msg.get_thread_id()) - - thread_query = db.create_query('%s..%s' % (begin_timestamp, end_timestamp)).search_threads() - top_threads = [] - for thread in thread_query: - thread_info = get_thread_info(thread) - top_threads.append(thread_info) - # top threads are the ones with the most posts - top_threads.sort(key=lambda entry: len(entry.answers), reverse=True) + participants.add(msg['From']) + msg.participants = mongo.get_thread_participants(list_name, + msg['Thread-ID']) + msg.answers = mongo.get_thread_length(list_name, + msg['Thread-ID']) + threads[cnt] = msg + cnt = cnt + 1 + print len(threads) + + # top threads are the one with the most answers + top_threads = sorted(threads, key=lambda entry: entry.answers, reverse=True) # active threads are the ones that have the most recent posting - active_threads = sorted(top_threads, key=lambda entry: entry.most_recent, reverse=True) + active_threads = sorted(threads, key=lambda entry: entry.Date, reverse=True) + + archives_length = mongo.get_archives_length(list_name) # top authors are the ones that have the most kudos. How do we determine # that? Most likes for their post? @@ -169,11 +155,12 @@ def recent(request, mlist_fqdn): 'search_form': search_form['keyword'], 'month': 'Recent activity', 'month_participants': len(participants), - 'month_discussions': len(discussions), - 'top_threads': top_threads, - 'most_active_threads': active_threads, + 'month_discussions': len(threads), + 'top_threads': top_threads[:5], + 'most_active_threads': active_threads[:5], 'top_author': authors, 'threads_per_category': threads_per_category, + 'archives_length': archives_length, }) return HttpResponse(t.render(c)) -- cgit