-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathAutoCompModule.py
More file actions
123 lines (112 loc) · 4.46 KB
/
Copy pathAutoCompModule.py
File metadata and controls
123 lines (112 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pymongo, os, sys, re
from pymongo import Connection
from mongoHelper import Helper
weight3=50
class AutoCompModule:
# Auto completion module
# Using the MongoDB server
# Holds three dictionaries :
# dict - holds the amount of x's appearances in the learned text
# dictBy2 - holds the amount of (x,y) appearances in the learned text
# dictBy2 - holds the amount of (x,y,z) appearances in the learned text
def __init__(self,DBName):
connect = 'mongodb://project:project1234@yeda.cs.technion.ac.il/'
self.conn = Connection(connect+DBName)
self.dict = self.conn[DBName]['dict']
self.dictBy2 = self.conn[DBName]['dictBy2']
self.dictBy3 = self.conn[DBName]['dictBy3']
self.name = DBName
self.helper = Helper()
# Dropping the database to delete all data
def dropDicts(self,DBName):
self.conn.drop_database(DBName)
# Method to learn from a single file
# For each file the method detects all the information mentioned above
# Definitions :
# pprev,prev,word are the three last seen words (where word is the current word)
def learnSingle(self,fileName):
h = self.helper
with open(fileName,encoding='utf-8') as input:
for line in input:
pprev = prev = None
for word in line.split():
if re.match("[.,\"\(\);:%?!-@#$^&*\{\[\}\]\']",word):
pprev = prev = word = None
continue
h.insert(h.dict1,word)
if prev!=None:
h.insert(h.dict2,(prev,word))
if pprev!=None:
h.insert(h.dict3,(pprev,prev,word))
pprev=prev
prev = word
# Method to learn from multiple files
# Uses learnSingle Method
def learn(self,inputDir):
size = len(os.listdir(inputDir))
i=1
if os.path.isdir(inputDir):
for f in sorted(os.listdir(inputDir)):
self.learnSingle(inputDir + '/' + f)
sys.stdout.flush()
print(str(int((i*100)/size))+"%",end="\r")
i+=1
print ("SUCCESS LEARNING FINISH")
else:
print ("ERROR!!")
self.helper.dictsToDbList()
self.dict.insert(self.helper.list1)
self.dictBy2.insert(self.helper.list2)
self.dictBy3.insert(self.helper.list3)
# Method that suggests the next word
# For a given pprev and prev (definitions mentioned above) it finds the most likely word, one time
# using only prev and the second using both pprev and prev
#
# This method returns both NONE and NOT NONE values
# None values are returned when there is no match to prev (or pprev and prev) in the dictionaries
# or when they are given as NONE
def suggest(self,pprev=None,prev=None):
if prev is None:
return None , None
if pprev is None:
a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])
if a is not None:
return a["second"] , None
else:
return None, None
a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])
b = self.dictBy3.find_one({"first": pprev, "second": prev},sort=[("grade",-1),("third",1)])
if b is not None:
return a["second"] , b["third"]
else:
return None , None
def suggest2(self,pprev=None,prev=None,x=5):
if prev is None:
return None , None
i=0
lst=[]
for a in self.dictBy2.find({"first": prev}).sort([('grade',-1),('second',1)]):
if i<x:
lst.append(a)
i+=1
else:
break
if lst == []:
return None, None
else:
res1 = [[a["grade"],a["second"]] for a in lst]
if pprev is None:
return res1, None
else:
i=0
lstBy3=[]
for a in self.dictBy3.find({"first": pprev,"second":prev}).sort([('grade',-1),('second',1)]):
if i<x:
lstBy3.append(a)
i+=1
else:
break
if lstBy3 == []:
return res1, None
else:
return res1,[[weight3*a["grade"],a["third"]] for a in lstBy3]