1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
#-*- coding: utf-8 -*-
import pymongo
import re
from bunch import Bunch
connection = pymongo.Connection('localhost', 27017)
def get_emails_thread(table, start_email, thread):
db = connection[table]
db.mails.create_index('Date')
db.mails.ensure_index('Date')
db.mails.create_index('In-Reply-To')
db.mails.ensure_index('In-Reply-To')
db.mails.create_index('Message-ID')
db.mails.ensure_index('Message-ID')
regex = '.*%s.*' % start_email['Message-ID']
for el in db.mails.find({'References': re.compile(regex, re.IGNORECASE)},
sort=[('Date', pymongo.DESCENDING)]):
thread.append(el)
get_emails_thread(el, thread)
return thread
def get_archives(table, start, end):
db = connection[table]
db.mails.create_index('Date')
db.mails.ensure_index('Date')
db.mails.create_index('References')
db.mails.ensure_index('References')
# Beginning of thread == No 'References' header
archives = []
for el in db.mails.find({'References': {'$exists':False},
"Date": {"$gte": start, "$lt": end}},
sort=[('Date', pymongo.DESCENDING)]):
archives.append(el)
return archives
def get_thread(table, start, end):
db = connection[table]
db.mails.create_index('Date')
db.mails.ensure_index('Date')
db.mails.create_index('References')
db.mails.ensure_index('References')
# Beginning of thread == No 'References' header
archives = Bunch()
for el in db.mails.find({'References': {'$exists':False},
"Date": {"$gte": start, "$lt": end}},
sort=[('Date', pymongo.DESCENDING)]):
thread = get_emails_thread(el, [el])
#print el['Subject'], len(thread)
archives[el['Subject']] = thread
return archives
def get_thread_length(table, thread_id):
db = connection[table]
db.mails.create_index('Thread-ID')
db.mails.ensure_index('Thread-ID')
return db.mails.find({'Thread-ID': thread_id}).count()
def get_thread_participants(table, thread_id):
db = connection[table]
db.mails.create_index('Thread-ID')
db.mails.ensure_index('Thread-ID')
authors = set()
for mail in db.mails.find({'Thread-ID': thread_id}):
authors.add(mail['From'])
return len(authors)
def get_archives_length(table):
db = connection[table]
db.mails.create_index('Date')
db.mails.ensure_index('Date')
archives = {}
for date in db.mails.distinct('Date'):
if date.year in archives:
archives[date.year].add(date.month)
else:
archives[date.year] = set([date.month])
for key in archives:
archives[key] = list(archives[key])
return archives
def search_archives(table, query):
db = connection[table]
db.mails.create_index('Date')
db.mails.ensure_index('Date')
for el in query:
db.mails.create_index(str(el))
db.mails.ensure_index(str(el))
return db.mails.find(query,
sort=[('Date', pymongo.DESCENDING)]).limit(50)
|