summaryrefslogtreecommitdiffstats
path: root/input-methods/sulekha/Transliteration.c
diff options
context:
space:
mode:
Diffstat (limited to 'input-methods/sulekha/Transliteration.c')
-rw-r--r--input-methods/sulekha/Transliteration.c772
1 files changed, 0 insertions, 772 deletions
diff --git a/input-methods/sulekha/Transliteration.c b/input-methods/sulekha/Transliteration.c
deleted file mode 100644
index b653daa..0000000
--- a/input-methods/sulekha/Transliteration.c
+++ /dev/null
@@ -1,772 +0,0 @@
-/* Transliteration.c
- *
- * Copyright (C) 2007-2008
- * Santhosh Thottingal<santhosh00@gmail.com>,
- * Swathanthra Malayalam Computing.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include<stdio.h>
-#include<string.h>
-
-#include<stdlib.h>
-#include "Transliteration.h"
-/*
-Transliterate the a dhvani phonetic string to malayalam.
-Algorithm:
-1)For all vowels, if it is at the 0th position print as it is
-2)If the vowel is in between/end of the string print the sign of the vowel except for A
-3)For all consonants print the malayalam character
-4)If a phonetic character is not identified print '?'
-5)Octal C escaped strings are used for printing the Unicode Malayalam string
-*/
-
-main1 (int argc, const char *argv[])
-{
- const char *word;
- const char *transliterated_word;
- int word_length = 0;
- if (argc == 1)
- {
- printf ("Usage: %s word\n", argv[0]);
- exit (0);
- }
- word = argv[1];
- transliterated_word = transliterate_ml (word, 0, strlen (word));
-// printf ("%s\n",word);
- printf ("%s\n", transliterated_word);
- return 0;
-}
-
-char *
-transliterate_ml (gchar *phonetic_string, int start, int end)
-{
- char *ml_string=NULL;
- int length = 0;
- int i = start;
- length = end - start;
- ml_string = (char *) malloc (length * 4 * sizeof (char));
- ml_string[0]='\0';
- while (i < end)
- {
-
- switch (phonetic_string[i])
- {
- //Vowels
- case 'a':
- if (i == 0) //first letter, use swaram as such
- { // as in amaram
-
- if (i < end && phonetic_string[i + 1] == 'a')
- { // as in aana
- strcat (ml_string, "\340\264\206"); //aa letter
- i++;
- }
- else if (i < end && phonetic_string[i + 1] == 'i')
- { // as in airaavatham
- strcat (ml_string, "\340\264\202"); //ai letter
- i++;
- }
- else if (i < end && phonetic_string[i + 1] == 'u')
- { // as in airaavatham
- strcat (ml_string, "\340\264\224"); //au aushadham
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\205"); //a letter
- }
- }
- else
- { //swara chihnam
- if (i < end && phonetic_string[i + 1] == 'a') //aa sign
- { // as in kaazhcha
- strcat (ml_string, "\340\264\276");
- i++;
- }
- else if (i < end && phonetic_string[i + 1] == 'i')
- { // as in kaitha
- strcat (ml_string, "\340\265\210"); //ai sign
- i++;
- }
- else if (i < end && phonetic_string[i + 1] == 'u')
- { // as in kauravar
- strcat (ml_string, "\340\265\227"); //au sign
- i++;
- }
- }
- break;
-
- case 'A':
- (i == 0) ? strcat (ml_string, "\340\264\206") : strcat (ml_string,
- "\340\264\276");
- break;
- case 'i':
- if (i == 0)
- {
- if (i < end
- && ((phonetic_string[i + 1] == 'i')
- || (phonetic_string[i + 1] == 'e')))
- { //ii/ee letter
- strcat (ml_string, "\340\264\210");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\207"); // i/e letter
- }
- }
- else
- {
- if (i < end && ((phonetic_string[i + 1] == 'i') || (phonetic_string[i + 1] == 'e'))) //ii/ee sign
- {
- strcat (ml_string, "\340\265\200");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\277"); // i/e sign
- }
- }
- break;
- case 'I':
- (i == 0) ? strcat (ml_string, "\340\264\210") : strcat (ml_string,
- "\340\265\200");
- break;
- case 'u':
- if (i == 0)
- {
- if (i < end
- && ((phonetic_string[i + 1] == 'o')
- || (phonetic_string[i + 1] == 'u')))
- { //ii/ee letter
- strcat (ml_string, "\340\264\212");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\211"); // u letter
- }
- }
- else
- {
- if (i < end && ((phonetic_string[i + 1] == 'u') || (phonetic_string[i + 1] == 'o'))) //ii/ee sign
- {
- strcat (ml_string, "\340\265\202"); //uu sign
- i++;
- }
- else
- {
- strcat (ml_string, "\340\265\201"); // u sign
- }
- }
- break;
- case 'U':
- (i == 0) ? strcat (ml_string, "\340\264\212") : strcat (ml_string,
- "\340\265\202");
- break;
- case '^':
- (i == 0) ? strcat (ml_string, "\340\264\213") : strcat (ml_string,
- "\340\265\203");
- break;
- case 'e':
- if (i == 0)
- {
- if (i < end
- && ((phonetic_string[i + 1] == 'e')
- || (phonetic_string[i + 1] == 'a')))
- { //ii/ee letter
- strcat (ml_string, "\340\264\217");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\207"); // e letter
- }
- }
- else
- {
- if (i < end && ((phonetic_string[i + 1] == 'e'))) //e/ee sign
- {
- strcat (ml_string, "\340\265\200"); //ee sign
- i++;
- }
- else
- {
- strcat (ml_string, "\340\265\206"); // u sign
- }
- }
- break;
- case 'E':
- (i == 0) ? strcat (ml_string, "\340\264\217") : strcat (ml_string,
- "\340\265\207");
- break;
-// case '@': //ai
-// (i == 0) ? strcat (ml_string, "\340\264\220") : strcat (ml_string,
-// "\340\265\210");
-// break;
- case 'o':
- if (i == 0)
- {
- if (i < end && ((phonetic_string[i + 1] == 'o')))
- { //oo letter
- strcat (ml_string, "\340\264\222");
- i++;
- }
- else
- {
- strcat (ml_string, " \340\264\223"); // o letter
- }
- }
- else
- {
- if (i < end && ((phonetic_string[i + 1] == 'o'))) //oo sign
- {
- strcat (ml_string, "\340\265\213"); //oo sign
- i++;
- }
- else if (i < end && ((phonetic_string[i + 1] == 'u'))) //ou sign
- {
- strcat (ml_string, "\340\265\214"); //ou sign
- i++;
- }
- else
- {
- strcat (ml_string, "\340\265\212"); // o sign
- }
- }
- break;
- case 'O':
- (i == 0) ? strcat (ml_string, "\340\264\223") : strcat (ml_string,
- "\340\265\213");
- break;
- case '`':
- (i == 0) ? strcat (ml_string, "\340\264\224") : strcat (ml_string,
- "\340\265\227");
- break;
-// case '.': //am
-// strcat (ml_string, "\340\264\202");
-// break;
- case '~': //chandrakkala
- strcat (ml_string, "\340\265\215");
- break;
- case ':': //Ah
- strcat (ml_string, "\340\264\203");
- break;
- //Consonants
- case 'k':
-
-
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\264\225");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\225");
- i++;
- }
- else if (i < end && (phonetic_string[i + 1] == 'h'))
- { //kh
- strcat (ml_string, "\340\264\226");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\225"); //just a k
-
- }
- break;
- case 'K':
- if (i == start)
- { //start of a word- may be a Name.Place..starting with K
- strcat (ml_string, "\340\264\225"); //just a k
- }
-
- else if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //KK
- strcat (ml_string, "\340\264\225");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\225");
- i++;
- }
- else if (i < end && (phonetic_string[i + 1] == 'h'))
- { //Kh as in Khalid
- strcat (ml_string, "\340\264\226");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\226"); //just a k
-
- }
-
- break;
- case 'g':
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\264\227");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\227");
- i++;
- }
- else if (i < end && (phonetic_string[i + 1] == 'h'))
- { //gh
- strcat (ml_string, "\340\264\230");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\227"); //just a g as in gaanam
-
- }
- break;
- case 'G':
-
- if (i == start)
- { //start of a word- may be a Name.Place..starting with G
- strcat (ml_string, "\340\264\227"); //just a g
- }
-
- else if (i < end && (phonetic_string[i + 1] == 'h'))
- { //Kh as in Gha
- strcat (ml_string, "\340\264\230");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\230"); //just a G
-
- }
- break;
- case 'c':
-
-
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //kk/cc
- strcat (ml_string, "\340\264\225");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\225");
- i++;
- }
- else if (i < end && (phonetic_string[i + 1] == 'h'))
- {
- if (i == start)
- {
- //Ch as in chaaya
- strcat (ml_string, "\340\264\232");
- i++;
-
- }
- else
- {
-//most of the time the ch in side the manglish means chcha as an pacha *wild guess. leaving this to aspell :)
-//let he decides
- strcat (ml_string, "\340\264\232");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\232");
- i++;
- }
- }
- else if (i == start && (phonetic_string[i + 1] != 'h'))
- { //start of a word- may be a Name. as in cibu
- strcat (ml_string, "\340\264\270"); //just a c/k
- }
- else
- {
- strcat (ml_string, "\340\264\226"); //just a k/c
-
- }
-
- break;
- case 'C':
-
-
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //kk/CC
- strcat (ml_string, "\340\264\225");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\225");
- i++;
- }
- else if (i < end
- && ((phonetic_string[i + 1] == 'h')
- || (phonetic_string[i + 1] == 'H')))
- {
- if (i == start)
- {
- //Ch as in chaaya
- strcat (ml_string, "\340\264\232");
- i++;
-
- }
- else
- {
-//most of the time the ch in side the manglish means chcha as an pacha *wild guess. leaving this to aspell :)
-//let he decides
- strcat (ml_string, "\340\264\232");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\232");
- i++;
- }
- }
- else if (i == start && (phonetic_string[i + 1] != 'h'))
- { //start of a word- may be a Name. as in cibu
- strcat (ml_string, "\340\264\270"); //just a c/k
- }
- else
- {
- strcat (ml_string, "\340\264\226"); //just a k/c
-
- }
-
- break;
- case 'j':
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama //jja
- strcat (ml_string, "\340\264\234"); //ja
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\234"); //ja
- i++;
- }
- else if (i < end && (phonetic_string[i + 1] == 'h'))
- { //jh
- strcat (ml_string, "\340\264\235");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\234"); //just a j as in janam
-
- }
- break;
- case 'J':
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama //jja
- strcat (ml_string, "\340\264\234"); //ja
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\234"); //ja
- i++;
- }
- else if (i < end && (phonetic_string[i + 1] == 'h'))
- { //jh
- strcat (ml_string, "\340\264\235");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\234"); //just a j as in janam
- }
- break;
- case 't':
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama //tta
- strcat (ml_string, "\340\264\237"); //ja
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\237"); //ja
- i++;
- }
- else if (i < end
- && ((phonetic_string[i + 1] == 'h')
- || (phonetic_string[i + 1] == 'H')))
- { //th
- strcat (ml_string, "\340\264\244");
- i++;
- }
- else
- {
- //usually the words starting with t is very less. A t in the starting is tha most probably.
- if (i == start)
- {
- strcat (ml_string, "\340\264\244"); //tha
- }
- else
- {
- strcat (ml_string, "\340\264\237"); //just a t
- }
- }
- break;
- case 'T':
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama //tta
- strcat (ml_string, "\340\264\237"); //ja
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\237"); //ja
- i++;
- }
- else if (i < end
- && ((phonetic_string[i + 1] == 'h')
- || (phonetic_string[i + 1] == 'H')))
- { //th
- strcat (ml_string, "\340\264\244");
- i++;
- }
- else
- {
- //usually the words starting with t is very less. A t in the starting is tha most probably.
- if (i == start)
- {
- strcat (ml_string, "\340\264\244"); //tha
- }
- else
- {
- strcat (ml_string, "\340\264\237"); //just a t
- }
- }
- break;
- case 'd':
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama //dda
- strcat (ml_string, "\340\264\246"); //da
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\246"); //da
- i++;
- }
- else if (i < end
- && ((phonetic_string[i + 1] == 'h')
- || (phonetic_string[i + 1] == 'H')))
- { //th
- strcat (ml_string, "\340\264\247");
- i++;
- }
- else
- {
- strcat (ml_string, "\340\264\246"); //da
- }
- break;
- case 'D':
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama //dda
- strcat (ml_string, "\340\264\246"); //Da
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\246"); //Da
- i++;
-
- }
- else if (i < end
- && ((phonetic_string[i + 1] == 'h')
- || (phonetic_string[i + 1] == 'H')))
- { //Dh as in viDhi
- strcat (ml_string, "\340\264\242");
- i++;
- }
- else
- {
- if (i == start)
- {
- strcat (ml_string, "\340\264\246"); //Da
- }
- else
- {
- strcat (ml_string, "\340\264\241"); //Da
- }
- }
- break;
- case 'N':
-
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\264\243");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\243");
- i++;
- }
- else if (i == start)
- {
- strcat (ml_string, "\340\264\250");
- }
- else
- {
-
- if (((phonetic_string[i + 1] == 'a') ||(phonetic_string[i + 1] == 'A')||(phonetic_string[i + 1] == 'e')||(phonetic_string[i + 1] == 'E')||(phonetic_string[i + 1] == 'i')||(phonetic_string[i + 1] == 'I')||(phonetic_string[i + 1] == 'o')||(phonetic_string[i + 1] == 'O')))
- {
- strcat (ml_string, "\340\264\243");
- }
- else if((i < end ))
- {
-
- //chillu N
- strcat (ml_string, "\340\264\243");
- strcat (ml_string, "\340\265\215"); //virama
- strcat (ml_string, "\342\200\215"); //zwj
- }
- }
- //words ending with N chillu is less
- break;
-
- case 'n':
-
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\264\250");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\250");
- i++;
- }
- else if (i < end && (phonetic_string[i + 1] =='j'))
- { //nja as in njaan
- strcat (ml_string, "\340\264\250");
- strcat (ml_string, "\340\265\215");
- strcat (ml_string, "\340\264\250");
- i++;
- }
-
- else if (i == start)
- {
- strcat (ml_string, "\340\264\250");
- }
- else
- {
-
- if (((phonetic_string[i + 1] == 'a') ||(phonetic_string[i + 1] == 'A')||(phonetic_string[i + 1] == 'e')||(phonetic_string[i + 1] == 'E')||(phonetic_string[i + 1] == 'i')||(phonetic_string[i + 1] == 'I')||(phonetic_string[i + 1] == 'o')||(phonetic_string[i + 1] == 'O')))
- {
- strcat (ml_string, "\340\264\250");
- }
- else if((i < end ))
- {
-
- //chillu n
- strcat (ml_string, "\340\264\250");
- strcat (ml_string, "\340\265\215"); //virama
- strcat (ml_string, "\342\200\215"); //zwj
- }
- }
- //words ending with N chillu is less
- break;
- case 'p':
- strcat (ml_string, "\340\264\252");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'f':
- strcat (ml_string, "\340\264\253");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'b':
- strcat (ml_string, "\340\264\254");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'B':
- strcat (ml_string, "\340\264\255");
- break;
- case 'm':
- if (i == end - 1 && phonetic_string[i] != phonetic_string[i - 1]) //end of word. most probably it is an anuswaram
- {
- strcat (ml_string, "\340\264\202");
-
- }
- else
- {
- strcat (ml_string, "\340\264\256");
- }
-
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'y':
- strcat (ml_string, "\340\264\257");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'r':
- strcat (ml_string, "\340\264\260");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'l':
- strcat (ml_string, "\340\264\262");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'v':
- strcat (ml_string, "\340\264\265");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case '$':
- strcat (ml_string, "\340\264\266");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 's':
- strcat (ml_string, "\340\264\270");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'S':
- strcat (ml_string, "\340\264\267");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'h':
- strcat (ml_string, "\340\264\271");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'L':
- strcat (ml_string, "\340\264\263");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'z':
- strcat (ml_string, "\340\264\264");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- case 'R':
- strcat (ml_string, "\340\264\261");
- if (i < end && (phonetic_string[i + 1] == phonetic_string[i]))
- { //koottaxaram - put a virama
- strcat (ml_string, "\340\265\215");
- }
- break;
- default:
- strcat (ml_string, "?"); //Not recognized
- break;
- }
- i++;
- }
-printf("%s\n",ml_string);
- return ml_string;
-}