-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathlemma_counts.py
32 lines (28 loc) · 1.13 KB
/
lemma_counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import collections
import os.path
class LemmaCounts:
def __init__(self):
self.all = collections.Counter()
self.by_author = collections.defaultdict(collections.Counter)
self.by_work = collections.defaultdict(collections.Counter)
def load(self, filename):
with open(filename) as f:
for line in f:
work_id, lemma, count = line.strip().split()
author_id, _ = work_id.split(":")
count = int(count)
self.all[lemma] += count
self.by_author[author_id][lemma] += count
self.by_work[work_id][lemma] += count
def get_counts(self, prefix=None):
"""
no prefix returns a Counter of all lemmas
author prefix like `0540` will return Counter of lemmas in that author
work prefix like `0540:001` will return Counter of lemmas in that work
"""
if prefix is None:
return self.all
elif ":" not in prefix:
return self.by_author.get(prefix, collections.Counter())
else:
return self.by_work.get(prefix, collections.Counter())