2 files changed, 19 insertions, 3 deletions
diff --git a/silpa/modules/ngram/ml.txt b/silpa/modules/ngram/ml.txt
new file mode 100644
index 0000000..4c48980
--- /dev/null
+++ b/silpa/modules/ngram/ml.txt
@@ -0,0 +1 @@
+കടലില്‍ ജീവിക്കുന്ന ഒരു സസ്തനിയാണ് നീലത്തിമിംഗലം. ബലീന്‍ തിമിംഗലങ്ങളുടെ ഒരു ഉപജാതിയാണിവ. ലോകത്ത് ഇന്നുവരെയുള്ളതില്‍ ഏറ്റവും വലിയ ജീവിയായി കണക്കാക്കപ്പെടുന്ന നീലത്തിമിംഗലങ്ങള്‍ക്ക് 33 മീ. നീളവും 181 മെട്രിക് ടണിലധികം ഭാരവും ഉണ്ടാകാം. നീണ്ട ശരീരപ്രകൃതിയുള്ള നീലത്തിമിംഗലങ്ങളുടെ ശരീരം നീലകലര്‍ന്ന ചാരനിറത്തോടെയാണുണ്ടാവുക, ശരീരത്തിനടിഭാഗത്തേക്ക് നിറം കുറവായിരിക്കും. നീലത്തിമിംഗലങ്ങള്‍ക്ക് കുറഞ്ഞത് മൂന്നുപജാതികളെങ്കിലും ഉണ്ടെന്നു കരുതുന്നു.
diff --git a/silpa/modules/ngram/visualizer.py b/silpa/modules/ngram/visualizer.py
index 0debe28..d46baeb 100644
--- a/silpa/modules/ngram/visualizer.py
+++ b/silpa/modules/ngram/visualizer.py
@@ -26,10 +26,12 @@ import codecs
 class NGramVisualizer:
 	depth=0
 	def loadCorpus(self, corpus_file_name):	
+		limiters = [".","!","?",",",";"]
 		graph_dict = dict()
 		line = []
 		line_number = 0
 		rule_number = 0
+		corpus=""
 		corpus_file = codecs. open(corpus_file_name,encoding='utf-8', errors='ignore')
 		while 1:
 			line_number = line_number +1 
@@ -42,10 +44,22 @@ class NGramVisualizer:
 			line = text.strip()
 			if(line == ""):
 				  continue 
+			corpus=corpus+" "+line
+		sentences=[]
+		sentence = ""
+		start = 0
+		for index in range(0,len(corpus)):
+			for delimit in limiters:
+				if corpus[index] == delimit:
+					sentence = corpus[start:index]
+					sentences.append(sentence)
+					start = index+1
+		for line in sentences:
 			words=line.split(" ")
 			word_count=len(words)
 			prev_word=""
 			for word in words:
+				#print word
 				word=word.strip()
 				if(prev_word==""):
 					prev_word=word	
@@ -56,7 +70,8 @@ class NGramVisualizer:
 					else:
 						graph_dict[prev_word]=word
 					prev_word=word	
-			prev_word=""		
+			prev_word=""
+
 		return graph_dict
 	def generate_full_graph(self, start_word, graph_dict,outputimage):
 		
@@ -95,8 +110,8 @@ class NGramVisualizer:
 		
 if __name__ == "__main__":
 	ngv=NGramVisualizer () 
-	graph_dict=ngv.loadCorpus ("hi.txt")
+	graph_dict=ngv.loadCorpus ("ml.txt")
 	graph=pydot.Dot()
-	graph=ngv.generate_graph(graph_dict, graph,u"भारत")
+	graph=ngv.generate_graph(graph_dict, graph,u"നീലത്തിമിംഗലങ്ങള്‍ക്ക്")
 	print graph.to_string().encode("utf-8")
 	#graph.write("ngvgraph-hi.png","dot", "raw" )