summaryrefslogtreecommitdiffstats
path: root/payyans-doc-converter/payyans-doc-converter.py
blob: 24498a09258f6b0b0289a3d4d80dd3f570d30268 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python
#
# Copyright (C) 2009 Rajeesh K Nambiar <rajeeshknambiar@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# at your option) any later version.
#       
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#       
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

import sys
import os
from optparse import OptionParser

# import the oorunner helper module we've written
import oorunner
# Payyans
from payyans import Payyans

class OOWrapper:
	def __init__(self):
		# Find OpenOffice.
		_oopaths=(
			  ('/usr/lib64/ooo-2.0/program',   '/usr/lib64/ooo-2.0/program'),
			  ('/opt/openoffice.org3/program', '/opt/openoffice.org/basis3.0/program'),
		 )
		for p in _oopaths:
		    if os.path.exists(p[0]):
			OPENOFFICE_PATH    = p[0]
			OPENOFFICE_BIN     = os.path.join(OPENOFFICE_PATH, 'soffice')
			OPENOFFICE_LIBPATH = p[1]

			# Add to path so we can find uno.
			if sys.path.count(OPENOFFICE_LIBPATH) == 0:
				sys.path.insert(0, OPENOFFICE_LIBPATH)
				# This is required for loadComponentFromURL to work properly                                 	
                		os.putenv('URE_BOOTSTRAP','vnd.sun.star.pathname:' + OPENOFFICE_PATH + '/fundamentalrc')
			break	

		# start the openoffice instance
		oor = oorunner.OORunner()
		# get the central desktop object
		self.desktop = oor.connect()
		self.infile  = None
		self.outfile = None

	def createTextFilter(self):
		# Needed for FilterName - to export to TXT
		import uno
		from com.sun.star.beans import PropertyValue
		TXT	  = PropertyValue()
		TXT.Name  = "FilterName"
		TXT.Value = "Text"
		return TXT

	def convertDocToText(self, docFile):
		''' Convert the Document file to Text format '''
		self.infile = os.path.abspath(docFile)
		if not os.path.exists(self.infile):
			raise SystemExit ("Input file doesn't exist")
		
		self.document = self.desktop.loadComponentFromURL("file://"+self.infile, "_blank", 0, ())
		filter = self.createTextFilter()
		(fname, ext) = os.path.splitext(self.infile)
		self.textfile = fname + ".txt"
		self.document.storeAsURL("file://" + self.textfile, (filter,))
		
		self.closeOffice()

	def closeOffice(self):

		# Close the document
		self.document.dispose()
		# Close the OpenOffice desktop
		self.desktop.terminate()

	def covertDocWithPayyans(self, inFile, mapFile, outFile, direction):
		''' Call Payyans to do the actual conversion '''
		# @direction : a2u/u2a for ASCII-to-Unicode and vice versa
		self.convertDocToText(inFile)
		p=Payyans(self.textfile, os.path.abspath(outFile), os.path.abspath(mapFile))
		if not p:
			raise SystemExit("Couldn't create Payyan instance")
		if direction == "a2u":
			p.ascii2unicode()
		else:
			p.unicode2ascii()


if __name__ == "__main__":

	usage = "usage: %prog [options] arg"
	parser = OptionParser(usage)
	parser.add_option("-i", "--input-file", dest="input_filename",   help="the input file in ascii format")
	parser.add_option("-o", "--output-file", dest="output_filename",   help="the output file name")
	parser.add_option("-d", "--direction", dest="direction", help="'a2u': Ascii to Unicode, 'u2a': Unicode to Ascii")
	parser.add_option("-m", "--mapping-file", dest="mapping_filename", help="the ascii to unicode mapping file name")
	(options, args) = parser.parse_args()
	infile = outfile = mapfile = "" 
	if (options.input_filename):
		infile    = os.path.abspath(options.input_filename)
	if (options.output_filename):
		outfile   = os.path.abspath(options.output_filename)
	if (options.mapping_filename):
		mapfile   = os.path.abspath(options.mapping_filename)
	direction = options.direction
	if not os.path.exists(infile):
		raise SystemExit("Error : Input file doesn't exist")
	if not os.path.exists(mapfile):
		raise SystemExit("Error : Mapping file doesn't exist")
	if not direction in ['a2u', 'u2a']:
	 	raise SystemExit("Error :Direction should be either 'a2u' or 'u2a'")
	
	app = OOWrapper()	 
	app.covertDocWithPayyans(infile, mapfile, outfile, direction)