diff options
Diffstat (limited to 'input-methods/sulekha/Transliteration.c')
-rw-r--r-- | input-methods/sulekha/Transliteration.c | 772 |
1 files changed, 772 insertions, 0 deletions
diff --git a/input-methods/sulekha/Transliteration.c b/input-methods/sulekha/Transliteration.c new file mode 100644 index 0000000..b653daa --- /dev/null +++ b/input-methods/sulekha/Transliteration.c @@ -0,0 +1,772 @@ +/* Transliteration.c + * + * Copyright (C) 2007-2008 + * Santhosh Thottingal<santhosh00@gmail.com>, + * Swathanthra Malayalam Computing. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include<stdio.h> +#include<string.h> + +#include<stdlib.h> +#include "Transliteration.h" +/* +Transliterate the a dhvani phonetic string to malayalam. +Algorithm: +1)For all vowels, if it is at the 0th position print as it is +2)If the vowel is in between/end of the string print the sign of the vowel except for A +3)For all consonants print the malayalam character +4)If a phonetic character is not identified print '?' +5)Octal C escaped strings are used for printing the Unicode Malayalam string +*/ + +main1 (int argc, const char *argv[]) +{ + const char *word; + const char *transliterated_word; + int word_length = 0; + if (argc == 1) + { + printf ("Usage: %s word\n", argv[0]); + exit (0); + } + word = argv[1]; + transliterated_word = transliterate_ml (word, 0, strlen (word)); +// printf ("%s\n",word); + printf ("%s\n", transliterated_word); + return 0; +} + +char * +transliterate_ml (gchar *phonetic_string, int start, int end) +{ + char *ml_string=NULL; + int length = 0; + int i = start; + length = end - start; + ml_string = (char *) malloc (length * 4 * sizeof (char)); + ml_string[0]='\0'; + while (i < end) + { + + switch (phonetic_string[i]) + { + //Vowels + case 'a': + if (i == 0) //first letter, use swaram as such + { // as in amaram + + if (i < end && phonetic_string[i + 1] == 'a') + { // as in aana + strcat (ml_string, "\340\264\206"); //aa letter + i++; + } + else if (i < end && phonetic_string[i + 1] == 'i') + { // as in airaavatham + strcat (ml_string, "\340\264\202"); //ai letter + i++; + } + else if (i < end && phonetic_string[i + 1] == 'u') + { // as in airaavatham + strcat (ml_string, "\340\264\224"); //au aushadham + i++; + } + else + { + strcat (ml_string, "\340\264\205"); //a letter + } + } + else + { //swara chihnam + if (i < end && phonetic_string[i + 1] == 'a') //aa sign + { // as in kaazhcha + strcat (ml_string, "\340\264\276"); + i++; + } + else if (i < end && phonetic_string[i + 1] == 'i') + { // as in kaitha + strcat (ml_string, "\340\265\210"); //ai sign + i++; + } + else if (i < end && phonetic_string[i + 1] == 'u') + { // as in kauravar + strcat (ml_string, "\340\265\227"); //au sign + i++; + } + } + break; + + case 'A': + (i == 0) ? strcat (ml_string, "\340\264\206") : strcat (ml_string, + "\340\264\276"); + break; + case 'i': + if (i == 0) + { + if (i < end + && ((phonetic_string[i + 1] == 'i') + || (phonetic_string[i + 1] == 'e'))) + { //ii/ee letter + strcat (ml_string, "\340\264\210"); + i++; + } + else + { + strcat (ml_string, "\340\264\207"); // i/e letter + } + } + else + { + if (i < end && ((phonetic_string[i + 1] == 'i') || (phonetic_string[i + 1] == 'e'))) //ii/ee sign + { + strcat (ml_string, "\340\265\200"); + i++; + } + else + { + strcat (ml_string, "\340\264\277"); // i/e sign + } + } + break; + case 'I': + (i == 0) ? strcat (ml_string, "\340\264\210") : strcat (ml_string, + "\340\265\200"); + break; + case 'u': + if (i == 0) + { + if (i < end + && ((phonetic_string[i + 1] == 'o') + || (phonetic_string[i + 1] == 'u'))) + { //ii/ee letter + strcat (ml_string, "\340\264\212"); + i++; + } + else + { + strcat (ml_string, "\340\264\211"); // u letter + } + } + else + { + if (i < end && ((phonetic_string[i + 1] == 'u') || (phonetic_string[i + 1] == 'o'))) //ii/ee sign + { + strcat (ml_string, "\340\265\202"); //uu sign + i++; + } + else + { + strcat (ml_string, "\340\265\201"); // u sign + } + } + break; + case 'U': + (i == 0) ? strcat (ml_string, "\340\264\212") : strcat (ml_string, + "\340\265\202"); + break; + case '^': + (i == 0) ? strcat (ml_string, "\340\264\213") : strcat (ml_string, + "\340\265\203"); + break; + case 'e': + if (i == 0) + { + if (i < end + && ((phonetic_string[i + 1] == 'e') + || (phonetic_string[i + 1] == 'a'))) + { //ii/ee letter + strcat (ml_string, "\340\264\217"); + i++; + } + else + { + strcat (ml_string, "\340\264\207"); // e letter + } + } + else + { + if (i < end && ((phonetic_string[i + 1] == 'e'))) //e/ee sign + { + strcat (ml_string, "\340\265\200"); //ee sign + i++; + } + else + { + strcat (ml_string, "\340\265\206"); // u sign + } + } + break; + case 'E': + (i == 0) ? strcat (ml_string, "\340\264\217") : strcat (ml_string, + "\340\265\207"); + break; +// case '@': //ai +// (i == 0) ? strcat (ml_string, "\340\264\220") : strcat (ml_string, +// "\340\265\210"); +// break; + case 'o': + if (i == 0) + { + if (i < end && ((phonetic_string[i + 1] == 'o'))) + { //oo letter + strcat (ml_string, "\340\264\222"); + i++; + } + else + { + strcat (ml_string, " \340\264\223"); // o letter + } + } + else + { + if (i < end && ((phonetic_string[i + 1] == 'o'))) //oo sign + { + strcat (ml_string, "\340\265\213"); //oo sign + i++; + } + else if (i < end && ((phonetic_string[i + 1] == 'u'))) //ou sign + { + strcat (ml_string, "\340\265\214"); //ou sign + i++; + } + else + { + strcat (ml_string, "\340\265\212"); // o sign + } + } + break; + case 'O': + (i == 0) ? strcat (ml_string, "\340\264\223") : strcat (ml_string, + "\340\265\213"); + break; + case '`': + (i == 0) ? strcat (ml_string, "\340\264\224") : strcat (ml_string, + "\340\265\227"); + break; +// case '.': //am +// strcat (ml_string, "\340\264\202"); +// break; + case '~': //chandrakkala + strcat (ml_string, "\340\265\215"); + break; + case ':': //Ah + strcat (ml_string, "\340\264\203"); + break; + //Consonants + case 'k': + + + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\264\225"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\225"); + i++; + } + else if (i < end && (phonetic_string[i + 1] == 'h')) + { //kh + strcat (ml_string, "\340\264\226"); + i++; + } + else + { + strcat (ml_string, "\340\264\225"); //just a k + + } + break; + case 'K': + if (i == start) + { //start of a word- may be a Name.Place..starting with K + strcat (ml_string, "\340\264\225"); //just a k + } + + else if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //KK + strcat (ml_string, "\340\264\225"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\225"); + i++; + } + else if (i < end && (phonetic_string[i + 1] == 'h')) + { //Kh as in Khalid + strcat (ml_string, "\340\264\226"); + i++; + } + else + { + strcat (ml_string, "\340\264\226"); //just a k + + } + + break; + case 'g': + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\264\227"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\227"); + i++; + } + else if (i < end && (phonetic_string[i + 1] == 'h')) + { //gh + strcat (ml_string, "\340\264\230"); + i++; + } + else + { + strcat (ml_string, "\340\264\227"); //just a g as in gaanam + + } + break; + case 'G': + + if (i == start) + { //start of a word- may be a Name.Place..starting with G + strcat (ml_string, "\340\264\227"); //just a g + } + + else if (i < end && (phonetic_string[i + 1] == 'h')) + { //Kh as in Gha + strcat (ml_string, "\340\264\230"); + i++; + } + else + { + strcat (ml_string, "\340\264\230"); //just a G + + } + break; + case 'c': + + + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //kk/cc + strcat (ml_string, "\340\264\225"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\225"); + i++; + } + else if (i < end && (phonetic_string[i + 1] == 'h')) + { + if (i == start) + { + //Ch as in chaaya + strcat (ml_string, "\340\264\232"); + i++; + + } + else + { +//most of the time the ch in side the manglish means chcha as an pacha *wild guess. leaving this to aspell :) +//let he decides + strcat (ml_string, "\340\264\232"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\232"); + i++; + } + } + else if (i == start && (phonetic_string[i + 1] != 'h')) + { //start of a word- may be a Name. as in cibu + strcat (ml_string, "\340\264\270"); //just a c/k + } + else + { + strcat (ml_string, "\340\264\226"); //just a k/c + + } + + break; + case 'C': + + + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //kk/CC + strcat (ml_string, "\340\264\225"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\225"); + i++; + } + else if (i < end + && ((phonetic_string[i + 1] == 'h') + || (phonetic_string[i + 1] == 'H'))) + { + if (i == start) + { + //Ch as in chaaya + strcat (ml_string, "\340\264\232"); + i++; + + } + else + { +//most of the time the ch in side the manglish means chcha as an pacha *wild guess. leaving this to aspell :) +//let he decides + strcat (ml_string, "\340\264\232"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\232"); + i++; + } + } + else if (i == start && (phonetic_string[i + 1] != 'h')) + { //start of a word- may be a Name. as in cibu + strcat (ml_string, "\340\264\270"); //just a c/k + } + else + { + strcat (ml_string, "\340\264\226"); //just a k/c + + } + + break; + case 'j': + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama //jja + strcat (ml_string, "\340\264\234"); //ja + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\234"); //ja + i++; + } + else if (i < end && (phonetic_string[i + 1] == 'h')) + { //jh + strcat (ml_string, "\340\264\235"); + i++; + } + else + { + strcat (ml_string, "\340\264\234"); //just a j as in janam + + } + break; + case 'J': + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama //jja + strcat (ml_string, "\340\264\234"); //ja + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\234"); //ja + i++; + } + else if (i < end && (phonetic_string[i + 1] == 'h')) + { //jh + strcat (ml_string, "\340\264\235"); + i++; + } + else + { + strcat (ml_string, "\340\264\234"); //just a j as in janam + } + break; + case 't': + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama //tta + strcat (ml_string, "\340\264\237"); //ja + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\237"); //ja + i++; + } + else if (i < end + && ((phonetic_string[i + 1] == 'h') + || (phonetic_string[i + 1] == 'H'))) + { //th + strcat (ml_string, "\340\264\244"); + i++; + } + else + { + //usually the words starting with t is very less. A t in the starting is tha most probably. + if (i == start) + { + strcat (ml_string, "\340\264\244"); //tha + } + else + { + strcat (ml_string, "\340\264\237"); //just a t + } + } + break; + case 'T': + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama //tta + strcat (ml_string, "\340\264\237"); //ja + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\237"); //ja + i++; + } + else if (i < end + && ((phonetic_string[i + 1] == 'h') + || (phonetic_string[i + 1] == 'H'))) + { //th + strcat (ml_string, "\340\264\244"); + i++; + } + else + { + //usually the words starting with t is very less. A t in the starting is tha most probably. + if (i == start) + { + strcat (ml_string, "\340\264\244"); //tha + } + else + { + strcat (ml_string, "\340\264\237"); //just a t + } + } + break; + case 'd': + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama //dda + strcat (ml_string, "\340\264\246"); //da + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\246"); //da + i++; + } + else if (i < end + && ((phonetic_string[i + 1] == 'h') + || (phonetic_string[i + 1] == 'H'))) + { //th + strcat (ml_string, "\340\264\247"); + i++; + } + else + { + strcat (ml_string, "\340\264\246"); //da + } + break; + case 'D': + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama //dda + strcat (ml_string, "\340\264\246"); //Da + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\246"); //Da + i++; + + } + else if (i < end + && ((phonetic_string[i + 1] == 'h') + || (phonetic_string[i + 1] == 'H'))) + { //Dh as in viDhi + strcat (ml_string, "\340\264\242"); + i++; + } + else + { + if (i == start) + { + strcat (ml_string, "\340\264\246"); //Da + } + else + { + strcat (ml_string, "\340\264\241"); //Da + } + } + break; + case 'N': + + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\264\243"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\243"); + i++; + } + else if (i == start) + { + strcat (ml_string, "\340\264\250"); + } + else + { + + if (((phonetic_string[i + 1] == 'a') ||(phonetic_string[i + 1] == 'A')||(phonetic_string[i + 1] == 'e')||(phonetic_string[i + 1] == 'E')||(phonetic_string[i + 1] == 'i')||(phonetic_string[i + 1] == 'I')||(phonetic_string[i + 1] == 'o')||(phonetic_string[i + 1] == 'O'))) + { + strcat (ml_string, "\340\264\243"); + } + else if((i < end )) + { + + //chillu N + strcat (ml_string, "\340\264\243"); + strcat (ml_string, "\340\265\215"); //virama + strcat (ml_string, "\342\200\215"); //zwj + } + } + //words ending with N chillu is less + break; + + case 'n': + + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\264\250"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\250"); + i++; + } + else if (i < end && (phonetic_string[i + 1] =='j')) + { //nja as in njaan + strcat (ml_string, "\340\264\250"); + strcat (ml_string, "\340\265\215"); + strcat (ml_string, "\340\264\250"); + i++; + } + + else if (i == start) + { + strcat (ml_string, "\340\264\250"); + } + else + { + + if (((phonetic_string[i + 1] == 'a') ||(phonetic_string[i + 1] == 'A')||(phonetic_string[i + 1] == 'e')||(phonetic_string[i + 1] == 'E')||(phonetic_string[i + 1] == 'i')||(phonetic_string[i + 1] == 'I')||(phonetic_string[i + 1] == 'o')||(phonetic_string[i + 1] == 'O'))) + { + strcat (ml_string, "\340\264\250"); + } + else if((i < end )) + { + + //chillu n + strcat (ml_string, "\340\264\250"); + strcat (ml_string, "\340\265\215"); //virama + strcat (ml_string, "\342\200\215"); //zwj + } + } + //words ending with N chillu is less + break; + case 'p': + strcat (ml_string, "\340\264\252"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'f': + strcat (ml_string, "\340\264\253"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'b': + strcat (ml_string, "\340\264\254"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'B': + strcat (ml_string, "\340\264\255"); + break; + case 'm': + if (i == end - 1 && phonetic_string[i] != phonetic_string[i - 1]) //end of word. most probably it is an anuswaram + { + strcat (ml_string, "\340\264\202"); + + } + else + { + strcat (ml_string, "\340\264\256"); + } + + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'y': + strcat (ml_string, "\340\264\257"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'r': + strcat (ml_string, "\340\264\260"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'l': + strcat (ml_string, "\340\264\262"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'v': + strcat (ml_string, "\340\264\265"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case '$': + strcat (ml_string, "\340\264\266"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 's': + strcat (ml_string, "\340\264\270"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'S': + strcat (ml_string, "\340\264\267"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'h': + strcat (ml_string, "\340\264\271"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'L': + strcat (ml_string, "\340\264\263"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'z': + strcat (ml_string, "\340\264\264"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + case 'R': + strcat (ml_string, "\340\264\261"); + if (i < end && (phonetic_string[i + 1] == phonetic_string[i])) + { //koottaxaram - put a virama + strcat (ml_string, "\340\265\215"); + } + break; + default: + strcat (ml_string, "?"); //Not recognized + break; + } + i++; + } +printf("%s\n",ml_string); + return ml_string; +} |