summaryrefslogtreecommitdiffstats
path: root/lib/mongo.py
blob: e429eeaeb42d6fa1793ed88f2c95e9e35c2c0258 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#-*- coding: utf-8 -*-

import pymongo
import re
from bunch import Bunch

connection = pymongo.Connection('localhost', 27017)

def get_emails_thread(table, start_email, thread):
    db = connection[table]
    db.mails.create_index('Date')
    db.mails.ensure_index('Date')
    db.mails.create_index('In-Reply-To')
    db.mails.ensure_index('In-Reply-To')
    db.mails.create_index('Message-ID')
    db.mails.ensure_index('Message-ID')
    regex = '.*%s.*' % start_email['Message-ID']
    for el in db.mails.find({'References': re.compile(regex, re.IGNORECASE)},
        sort=[('Date', pymongo.DESCENDING)]):
        thread.append(el)
        get_emails_thread(el, thread)
    return thread


def get_archives(table, start, end):
    db = connection[table]
    db.mails.create_index('Date')
    db.mails.ensure_index('Date')
    db.mails.create_index('References')
    db.mails.ensure_index('References')
    # Beginning of thread == No 'References' header
    archives = []
    for el in db.mails.find({'References': {'$exists':False},
            "Date": {"$gte": start, "$lt": end}}, 
            sort=[('Date', pymongo.DESCENDING)]):
        archives.append(el)
    return archives


def get_thread(table, start, end):
    db = connection[table]
    db.mails.create_index('Date')
    db.mails.ensure_index('Date')
    db.mails.create_index('References')
    db.mails.ensure_index('References')
    # Beginning of thread == No 'References' header
    archives = Bunch()
    for el in db.mails.find({'References': {'$exists':False},
            "Date": {"$gte": start, "$lt": end}}, 
            sort=[('Date', pymongo.DESCENDING)]):
        thread = get_emails_thread(el, [el])
        #print el['Subject'], len(thread)
        archives[el['Subject']] = thread
    return archives


def get_thread_length(table, thread_id):
    db = connection[table]
    db.mails.create_index('Thread-ID')
    db.mails.ensure_index('Thread-ID')
    return db.mails.find({'Thread-ID': thread_id}).count()


def get_thread_participants(table, thread_id):
    db = connection[table]
    db.mails.create_index('Thread-ID')
    db.mails.ensure_index('Thread-ID')
    authors = set()
    for mail in db.mails.find({'Thread-ID': thread_id}):
        authors.add(mail['From'])
    return len(authors)


def get_archives_length(table):
    db = connection[table]
    db.mails.create_index('Date')
    db.mails.ensure_index('Date')
    archives = {}
    for date in db.mails.distinct('Date'):
        if date.year in archives:
            archives[date.year].add(date.month)
        else:
            archives[date.year] = set([date.month])
    for key in archives:
        archives[key] = list(archives[key])
    return archives


def search_archives(table, query):
    db = connection[table]
    db.mails.create_index('Date')
    db.mails.ensure_index('Date')
    for el in query:
        db.mails.create_index(str(el))
        db.mails.ensure_index(str(el))
    return db.mails.find(query,
        sort=[('Date', pymongo.DESCENDING)]).limit(50)