From a84534956250badc05e9b190f1559309591c4f15 Mon Sep 17 00:00:00 2001 From: Santhosh Thottingal Date: Sat, 15 Aug 2009 14:21:12 +0530 Subject: The patterns for all languages made compatible with tex rules for hyphenation Don't break on either side of zwj/zwnj for all languages LEFTHYPHENMIN and RIGHTHYPHENMIN properties removed. It can be configured from applications --- hyphenation/ChangeLog | 19 +++-- hyphenation/hyph_bn_IN.dic | 184 ++++++++++++++++++++++++++++-------------- hyphenation/hyph_gu_IN.dic | 184 ++++++++++++++++++++++++++---------------- hyphenation/hyph_hi_IN.dic | 193 +++++++++++++++++++++++++++----------------- hyphenation/hyph_kn_IN.dic | 192 +++++++++++++++++++++++++++---------------- hyphenation/hyph_ml_IN.dic | 193 +++++++++++++++++++++++++++----------------- hyphenation/hyph_mr_IN.dic | 193 +++++++++++++++++++++++++++----------------- hyphenation/hyph_or_IN.dic | 177 ++++++++++++++++++++++++---------------- hyphenation/hyph_pa_IN.dic | 175 +++++++++++++++++++++++++--------------- hyphenation/hyph_ta_IN.dic | 139 ++++++++++++++++++-------------- hyphenation/hyph_te_IN.dic | 197 ++++++++++++++++++++++++++++----------------- 11 files changed, 1145 insertions(+), 701 deletions(-) diff --git a/hyphenation/ChangeLog b/hyphenation/ChangeLog index 3854d2b..28fae7f 100644 --- a/hyphenation/ChangeLog +++ b/hyphenation/ChangeLog @@ -1,14 +1,19 @@ ChangeLog -------------- +2009-08-13: Version 0.4 + * The patters for all languages made compatible with tex rules for hyphenation + * Don't break on either side of zwj/zwnj for all languages + * LEFTHYPHENMIN and RIGHTHYPHENMIN properties removed. It can be configured from applications + 2009-08-12: Version 0.3 - * Removed unnecessary zwj/zwnj rules - * Added LEFTHYPHENMIN and RIGHTHYPHENMIN for rules - * Added Marathi Rules + * Removed unnecessary zwj/zwnj rules + * Added LEFTHYPHENMIN and RIGHTHYPHENMIN for rules + * Added Marathi Rules 2009-05-12: Version 0.2 - * Vowel sign + [anuswaram|visargam|chandrabindu] fix for Bengali as reported by Runa B. - * Vowel sign + [visargam] fix for Malayalam + * Vowel sign + [anuswaram|visargam|chandrabindu] fix for Bengali as reported by Runa B. + * Vowel sign + [visargam] fix for Malayalam 2008-12-14: Version 0.1 - * Initial version - * Hyphenation Patterns for 9 Languages + * Initial version + * Hyphenation Patterns for 9 Languages diff --git a/hyphenation/hyph_bn_IN.dic b/hyphenation/hyph_bn_IN.dic index 942c68f..45ed3ba 100755 --- a/hyphenation/hyph_bn_IN.dic +++ b/hyphenation/hyph_bn_IN.dic @@ -16,65 +16,125 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -অ1 -আ1 -ই1 -ঈ1 -উ1 -ঊ1 -ঋ1 -এ1 -ঐ1 -ঔ1 -া1 -ি1 -ী1 -ু1 -ে1 -ো1 -ৈ1 -ৌ1 -ৗ1 -্2 -2ঃ1 -2ং1 -2ঁ1 -1ন -ন্2 -1র -র্2 -1ল -ল্2 -্2 -1ণ -ণ্2 -1ক -1গ -1খ -1ঘ -1ঙ -1চ -1ছ -1জ -1ঝ -1ঞ -1ট -1ঠ -1ড -1ঢ -1ত -1থ -1দ -1ধ -1প -1ফ -1ব -1ভ -1ম -1য -1শ -1ষ -1স -1হ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1অ1 +1আ1 +1ই1 +1ঈ1 +1উ1 +1ঊ1 +1ঋ1 +1ৠ1 +1ঌ1 +1ৡ1 +1এ1 +1ঐ1 +1ও1 +1ঔ1 +% Break after any dependent vowel, but not before. +2া1 +2ি1 +2ী1 +2ু1 +2ূ1 +2ৃ1 +2ৄ1 +2ৢ1 +2ৣ1 +2ে1 +2ৈ1 +2ো1 +2ৌ1 +% Break before or after any consonant. +1ক1 +1খ1 +1গ1 +1ঘ1 +1ঙ1 +1চ1 +1ছ1 +1জ1 +1ঝ1 +1ঞ1 +1ট1 +1ঠ1 +1ড1 +1ড়1 +1ঢ1 +1ঢ়1 +1ণ1 +1ত1 +1থ1 +1দ1 +1ধ1 +1ন1 +1প1 +1ফ1 +1ব1 +1ভ1 +1ম1 +1য1 +1য়1 +1র1 +1ল1 +1শ1 +1ষ1 +1স1 +1হ1 +% Do not break after khanda ta. +1ৎ2 +% Do not break before a final consonant or conjunct. +2ক্. +2খ্. +2গ্. +2ঘ্. +2ঙ্. +2চ্. +2ছ্. +2জ্. +2ঝ্. +2ঞ্. +2ট্. +2ঠ্. +2ড্. +2ড়্. +2ঢ্. +2ঢ়্. +2ণ্. +2ত্. +2থ্. +2দ্. +2ধ্. +2ন্. +2প্. +2ফ্. +2ব্. +2ভ্. +2ম্. +2য্. +2য়্. +2র্. +2ল্. +2শ্. +2ষ্. +2স্. +2হ্. +2র্ক. +2র্ট. +2র্ত. +2র্প. +% Do not break before chandrabindu, anusvara, visarga, avagraha, +% nukta and au length mark. +2ঁ +2ং +2ঃ +2ঽ +2় +2ৗ +% Do not break either side of virama (may be within conjunct). +2্2 diff --git a/hyphenation/hyph_gu_IN.dic b/hyphenation/hyph_gu_IN.dic index 5702d4e..d42149f 100755 --- a/hyphenation/hyph_gu_IN.dic +++ b/hyphenation/hyph_gu_IN.dic @@ -1,5 +1,5 @@ UTF-8 -% Hyphenation for Gujarati +% Hyphenation for Guajrati % Copyright (C) 2008-2009 Santhosh Thottingal % % This library is free software; you can redistribute it and/or @@ -16,72 +16,116 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -અ1 -આ1 -ઇ1 -ઈ1 -ઉ1 -ઊ1 -ઋ1 -એ1 -ઐ1 -ઔ1 -ા1 -િ1 -ી1 -ુ1 -ૂ1 -ૃ1 -ે1 -ો1 -ૈ1 -ૌ1 -્2 -ઃ1 -ં1 -1ન -ન્2 -2ન્‍ -1ર -ર્2 -2ર્‍ -1લ -લ્2 -2લ્‍ -1ળ -ળ્2 -2ળ્‍ -1ણ -ણ્2 -2ણ્‍ -1ક -1ગ -1ખ -1ઘ -1ઙ -1ચ -1છ -1જ -1ઝ -1ઞ -1ટ -1ઠ -1ડ -1ઢ -1ત -1થ -1દ -1ધ -1પ -1ફ -1બ -1ભ -1મ -1ય -1વ -1શ -1ષ -1સ -1હ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1અ1 +1આ1 +1ઇ1 +1ઈ1 +1ઉ1 +1ઊ1 +1ઋ1 +1ૠ1 +1એ1 +1ઐ1 +1ઓ1 +1ઔ1 +% Break after any dependent vowel but not before. +2ા1 +2િ1 +2ી1 +2ુ1 +2ૂ1 +2ૃ1 +2ૄ1 +2ૢ1 +2ૣ1 +2ે1 +2ૈ1 +2ો1 +2ૌ1 +% Break before or after any consonant. +1ક1 +1ખ1 +1ગ1 +1ઘ1 +1ઙ1 +1ચ1 +1છ1 +1જ1 +1ઝ1 +1ઞ1 +1ટ1 +1ઠ1 +1ડ1 +1ઢ1 +1ણ1 +1ત1 +1થ1 +1દ1 +1ધ1 +1ન1 +1પ1 +1ફ1 +1બ1 +1ભ1 +1મ1 +1ય1 +1ર1 +1લ1 +1ળ1 +1વ1 +1શ1 +1ષ1 +1સ1 +1હ1 +% Do not break before a final consonant or conjunct. +2ક્. +2ખ્. +2ગ્. +2ઘ્. +2ઙ્. +2ચ્. +2છ્. +2જ્. +2ઝ્. +2ઞ્. +2ટ્. +2ઠ્. +2ડ્. +2ઢ્. +2ણ્. +2ત્. +2થ્. +2દ્. +2ધ્. +2ન્. +2પ્. +2ફ્. +2બ્. +2ભ્. +2મ્. +2ય્. +2ર્. +2લ્. +2ળ્. +2વ્. +2શ્. +2ષ્. +2સ્. +2હ્. +2ર્ક્. +2ર્ટ્. +2ર્ત્. +2ર્પ્. +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ઁ +2ઃ +2ઽ +% Do not break either side of virama (may be within conjunct). +2્2 diff --git a/hyphenation/hyph_hi_IN.dic b/hyphenation/hyph_hi_IN.dic index b6fc5a2..985eeaf 100755 --- a/hyphenation/hyph_hi_IN.dic +++ b/hyphenation/hyph_hi_IN.dic @@ -16,78 +16,121 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -अ1 -आ1 -इ1 -ई1 -उ1 -ऊ1 -ऋ1 -ऎ1 -ए1 -ऐ1 -ऒ1 -औ1 -ा1 -ि1 -ी1 -ु1 -ू1 -ृ1 -ॆ1 -े1 -ॊ1 -ो1 -ै1 -ौ1 -्2 -ः1 -ं1 -1न -न्2 -2न्‍ -1र -र्2 -2र्‍ -1ल -ल्2 -2ल्‍ -1ळ -ळ्2 -2ळ्‍ -1ण -ण्2 -2ण्‍ -1क -1ग -1ख -1घ -1ङ -1च -1छ -1ज -1झ -1ञ -1ट -1ठ -1ड -1ढ -1त -1थ -1द -1ध -1प -1फ -1ब -1भ -1म -1य -1व -1श -1ष -1स -1ह -1ऴ -1ऱ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1अ1 +1आ1 +1इ1 +1ई1 +1उ1 +1ऊ1 +1ऋ1 +1ॠ1 +1ऌ1 +1ॡ1 +1ए1 +1ऐ1 +1ओ1 +1औ1 +% Break after any dependent vowel but not before. +2ा1 +2ि1 +2ी1 +2ु1 +2ू1 +2ृ1 +2ॄ1 +2ॢ1 +2ॣ1 +2े1 +2ै1 +2ो1 +2ौ1 +% Break before or after any consonant. +1क1 +1ख1 +1ग1 +1घ1 +1ङ1 +1च1 +1छ1 +1ज1 +1झ1 +1ञ1 +1ट1 +1ठ1 +1ड1 +1ढ1 +1ण1 +1त1 +1थ1 +1द1 +1ध1 +1न1 +1प1 +1फ1 +1ब1 +1भ1 +1म1 +1य1 +1र1 +1ल1 +1ळ1 +1व1 +1श1 +1ष1 +1स1 +1ह1 +% Do not break before a final consonant or conjunct. +2क्. +2ख्. +2ग्. +2घ्. +2ङ्. +2च्. +2छ्. +2ज्. +2झ्. +2ञ्. +2ट्. +2ठ्. +2ड्. +2ढ्. +2ण्. +2त्. +2थ्. +2द्. +2ध्. +2न्. +2प्. +2फ्. +2ब्. +2भ्. +2म्. +2य्. +2र्. +2ल्. +2ळ्. +2व्. +2श्. +2ष्. +2स्. +2ह्. +2र्क्. +2र्ट्. +2र्त्. +2र्प्. +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ँ +2ं +2ः +2ऽ +2॑ +2॒ +% Do not break either side of virama (may be within conjunct). +2्2 diff --git a/hyphenation/hyph_kn_IN.dic b/hyphenation/hyph_kn_IN.dic index b230487..ebd0b04 100755 --- a/hyphenation/hyph_kn_IN.dic +++ b/hyphenation/hyph_kn_IN.dic @@ -16,73 +16,125 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -ಅ1 -ಆ1 -ಇ1 -ಈ1 -ಉ1 -ಊ1 -ಋ1 -ಎ1 -ಏ1 -ಐ1 -ಒ1 -ಔ1 -ಾ1 -ಿ1 -ೀ1 -ು1 -ೂ1 -ೃ1 -ೆ1 -ೇ1 -ೊ1 -ೋ1 -ೈ1 -ೌ1 -್2 -2ಃ1 -2ಂ1 -1ನ -ನ್2 -1ರ -ರ್2 -2ರ್‍ -1ಲ -ಲ್2 -1ಳ -ಳ್2 -1ಣ -ಣ್2 -1ಕ -1ಗ -1ಖ -1ಘ -1ಙ -1ಚ -1ಛ -1ಜ -1ಝ -1ಞ -1ಟ -1ಠ -1ಡ -1ಢ -1ತ -1ಥ -1ದ -1ಧ -1ಪ -1ಫ -1ಬ -1ಭ -1ಮ -1ಯ -1ವ -1ಶ -1ಷ -1ಸ -1ಹ -1ಱ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1ಅ1 +1ಆ1 +1ಇ1 +1ಈ1 +1ಉ1 +1ಊ1 +1ಋ1 +1ೠ1 +1ಌ1 +1ೡ1 +1ಎ1 +1ಏ1 +1ಐ1 +1ಒ1 +1ಓ1 +1ಔ1 +% Break after any dependent vowel, but not before. +2ಾ1 +2ಿ1 +2ೀ1 +2ು1 +2ೂ1 +2ೃ1 +2ೄ1 +2ೆ1 +2ೇ1 +2ೈ1 +2ೊ1 +2ೋ1 +2ೌ1 +% Break before or after any consonant. +1ಕ1 +1ಖ1 +1ಗ1 +1ಘ1 +1ಙ1 +1ಚ1 +1ಛ1 +1ಜ1 +1ಝ1 +1ಞ1 +1ಟ1 +1ಠ1 +1ಡ1 +1ಢ1 +1ಣ1 +1ತ1 +1ಥ1 +1ದ1 +1ಧ1 +1ನ1 +1ಪ1 +1ಫ1 +1ಬ1 +1ಭ1 +1ಮ1 +1ಯ1 +1ರ1 +1ಱ1 % can occur in Sanskrit? +1ಲ1 +1ಳ1 +1ೞ1 % can occur in Sanskrit? +1ವ1 +1ಶ1 +1ಷ1 +1ಸ1 +1ಹ1 +% Do not break before a final consonant or conjunct. +2ಕ್. +2ಖ್. +2ಗ್. +2ಘ್. +2ಙ್. +2ಚ್. +2ಛ್. +2ಜ್. +2ಝ್. +2ಞ್. +2ಟ್. +2ಠ್. +2ಡ್. +2ಢ್. +2ಣ್. +2ತ್. +2ಥ್. +2ದ್. +2ಧ್. +2ನ್. +2ಪ್. +2ಫ್. +2ಬ್. +2ಭ್. +2ಮ್. +2ಯ್. +2ರ್. +2ಱ್. +2ಲ್. +2ಳ್. +2ವ್. +2ಶ್. +2ಷ್. +2ಸ್. +2ಹ್. +2ರ್ಕ. +2ರ್ಟ. +2ರ್ತ. +2ರ್ಪ. +% Do not break before anusvara, visarga, avagraha, +% length mark and ai length mark. +2ಂ +2ಃ +2ಽ +2ೕ +2ೖ +% Do not break either side of virama (may be within conjunct). +2್2 diff --git a/hyphenation/hyph_ml_IN.dic b/hyphenation/hyph_ml_IN.dic index 72f9fb0..5836fe4 100755 --- a/hyphenation/hyph_ml_IN.dic +++ b/hyphenation/hyph_ml_IN.dic @@ -16,79 +16,126 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -അ1 -ആ1 -ഇ1 -ഈ1 -ഉ1 -ഊ1 -ഋ1 -എ1 -ഏ1 -ഐ1 -ഒ1 -ഔ1 -ാ1 -ി1 -ീ1 -ു1 -ൂ1 -ൃ1 -െ1 -േ1 -ൊ1 -ോ1 -ൈ1 -ൌ1 -ൗ1 -്2 -2ഃ1 -2ം1 -1ന -ന്2 -2ന്‍ -1ര -ര്2 -2ര്‍ -1ല -ല്2 +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1അ1 +1ആ1 +1ഇ1 +1ഈ1 +1ഉ1 +1ഊ1 +1ഋ1 +1ൠ1 +1ഌ1 +1ൡ1 +1എ1 +1ഏ1 +1ഐ1 +1ഒ1 +1ഓ1 +1ഔ1 +% Break after any dependent vowel, but not before. +2ാ1 +2ി1 +2ീ1 +2ു1 +2ൂ1 +2ൃ1 +2െ1 +2േ1 +2ൈ1 +2ൊ1 +2ോ1 +2ൌ1 +% Break before or after any consonant. +1ക1 +1ഖ1 +1ഗ1 +1ഘ1 +1ങ1 +1ച1 +1ഛ1 +1ജ1 +1ഝ1 +1ഞ1 +1ട1 +1ഠ1 +1ഡ1 +1ഢ1 +1ണ1 +1ത1 +1ഥ1 +1ദ1 +1ധ1 +1ന1 +1പ1 +1ഫ1 +1ബ1 +1ഭ1 +1മ1 +1യ1 +1ര1 +1റ1 +1ല1 +1ള1 +1ഴ1 +1വ1 +1ശ1 +1ഷ1 +1സ1 +1ഹ1 +% Do not break before a final consonant or conjunct. +2ക്. +2ഖ്. +2ഗ്. +2ഘ്. +2ങ്. +2ച്. +2ഛ്. +2ജ്. +2ഝ്. +2ഞ്. +2ട്. +2ഠ്. +2ഡ്. +2ഢ്. +2ണ്. +2ത്. +2ഥ്. +2ദ്. +2ധ്. +2ന്. +2പ്. +2ഫ്. +2ബ്. +2ഭ്. +2മ്. +2യ്. +2ര്. +2റ്. +2ല്. +2ള്. +2ഴ്. +2വ്. +2ശ്. +2ഷ്. +2സ്. +2ഹ്. +% Do not break before anusvara, visarga and length mark. +2ം +2ഃ +2ൗ +% Do not break either side of virama (may be within conjunct). +2്2 +% Do not break left side of chillu 2ല്‍ -1ള -ള്2 2ള്‍ -1ണ -ണ്2 +2ന്‍ 2ണ്‍ -1ക -1ഗ -1ഖ -1ഘ -1ങ -1ച -1ഛ -1ജ -1ഝ -1ഞ -1ട -1ഠ -1ഡ -1ഢ -1ത -1ഥ -1ദ -1ധ -1പ -1ഫ -1ബ -1ഭ -1മ -1യ -1വ -1ശ -1ഷ -1സ -1ഹ -1ഴ -1റ +2ര്‍ +2ക്‍ + diff --git a/hyphenation/hyph_mr_IN.dic b/hyphenation/hyph_mr_IN.dic index 3edd148..e0eb801 100755 --- a/hyphenation/hyph_mr_IN.dic +++ b/hyphenation/hyph_mr_IN.dic @@ -16,78 +16,121 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -अ1 -आ1 -इ1 -ई1 -उ1 -ऊ1 -ऋ1 -ऎ1 -ए1 -ऐ1 -ऒ1 -औ1 -ा1 -ि1 -ी1 -ु1 -ू1 -ृ1 -ॆ1 -े1 -ॊ1 -ो1 -ै1 -ौ1 -्2 -ः1 -ं1 -1न -न्2 -2न्‍ -1र -र्2 -2र्‍ -1ल -ल्2 -2ल्‍ -1ळ -ळ्2 -2ळ्‍ -1ण -ण्2 -2ण्‍ -1क -1ग -1ख -1घ -1ङ -1च -1छ -1ज -1झ -1ञ -1ट -1ठ -1ड -1ढ -1त -1थ -1द -1ध -1प -1फ -1ब -1भ -1म -1य -1व -1श -1ष -1स -1ह -1ऴ -1ऱ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1अ1 +1आ1 +1इ1 +1ई1 +1उ1 +1ऊ1 +1ऋ1 +1ॠ1 +1ऌ1 +1ॡ1 +1ए1 +1ऐ1 +1ओ1 +1औ1 +% Break after any dependent vowel but not before. +2ा1 +2ि1 +2ी1 +2ु1 +2ू1 +2ृ1 +2ॄ1 +2ॢ1 +2ॣ1 +2े1 +2ै1 +2ो1 +2ौ1 +% Break before or after any consonant. +1क1 +1ख1 +1ग1 +1घ1 +1ङ1 +1च1 +1छ1 +1ज1 +1झ1 +1ञ1 +1ट1 +1ठ1 +1ड1 +1ढ1 +1ण1 +1त1 +1थ1 +1द1 +1ध1 +1न1 +1प1 +1फ1 +1ब1 +1भ1 +1म1 +1य1 +1र1 +1ल1 +1ळ1 +1व1 +1श1 +1ष1 +1स1 +1ह1 +% Do not break before a final consonant or conjunct. +2क्. +2ख्. +2ग्. +2घ्. +2ङ्. +2च्. +2छ्. +2ज्. +2झ्. +2ञ्. +2ट्. +2ठ्. +2ड्. +2ढ्. +2ण्. +2त्. +2थ्. +2द्. +2ध्. +2न्. +2प्. +2फ्. +2ब्. +2भ्. +2म्. +2य्. +2र्. +2ल्. +2ळ्. +2व्. +2श्. +2ष्. +2स्. +2ह्. +2र्क्. +2र्ट्. +2र्त्. +2र्प्. +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ँ +2ं +2ः +2ऽ +2॑ +2॒ +% Do not break either side of virama (may be within conjunct). +2्2 diff --git a/hyphenation/hyph_or_IN.dic b/hyphenation/hyph_or_IN.dic index c865b7b..09e1fbf 100755 --- a/hyphenation/hyph_or_IN.dic +++ b/hyphenation/hyph_or_IN.dic @@ -16,71 +16,112 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -ଅ1 -ଆ1 -ଇ1 -ଈ1 -ଉ1 -ଊ1 -ଋ1 -ଏ1 -ଐ1 -ଔ1 -ା1 -ି1 -ୀ1 -ୁ1 -େ1 -ୋ1 -ୈ1 -ୌ1 -ୗ1 -୍2 -ଃ1 -ଂ1 -1ନ -ନ୍2 -2ନ୍‍ -1ର -ର୍2 -2ର୍‍ -1ଲ -ଲ୍2 -2ଲ୍‍ -1ଳ -ଳ୍2 -2ଳ୍‍ -1ଣ -ଣ୍2 -2ଣ୍‍ -1କ -1ଗ -1ଖ -1ଘ -1ଙ -1ଚ -1ଛ -1ଜ -1ଝ -1ଞ -1ଟ -1ଠ -1ଡ -1ଢ -1ତ -1ଥ -1ଦ -1ଧ -1ପ -1ଫ -1ବ -1ଭ -1ମ -1ଯ -1ଵ -1ଶ -1ଷ -1ସ -1ହ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1ଅ1 +1ଆ1 +1ଇ1 +1ଈ1 +1ଉ1 +1ଊ1 +1ଋ1 +1ୠ1 +1ଌ1 +1ୡ1 +1ଏ1 +1ଐ1 +1ଓ1 +1ଔ1 +% Break after any dependent vowel, but not before. +2ା1 +2ି1 +2ୀ1 +2ୁ1 +2ୂ1 +2ୃ1 +2େ1 +2ୈ1 +2ୋ1 +2ୌ1 +% Break before or after any consonant. +1କ1 +1ଖ1 +1ଗ1 +1ଘ1 +1ଙ1 +1ଚ1 +1ଛ1 +1ଜ1 +1ଝ1 +1ଞ1 +1ଟ1 +1ଠ1 +1ଡ1 +1ଢ1 +1ଣ1 +1ତ1 +1ଥ1 +1ଦ1 +1ଧ1 +1ନ1 +1ପ1 +1ଫ1 +1ବ1 +1ଭ1 +1ମ1 +1ଯ1 +1ର1 +1ଲ1 +1ଳ1 +1ଵ1 +1ଶ1 +1ଷ1 +1ସ1 +1ହ1 +% Do not break before a final consonant or conjunct. +2କ୍. +2ଖ୍. +2ଗ୍. +2ଘ୍. +2ଙ୍. +2ଚ୍. +2ଛ୍. +2ଜ୍. +2ଝ୍. +2ଞ୍. +2ଟ୍. +2ଠ୍. +2ଡ୍. +2ଢ୍. +2ଣ୍. +2ତ୍. +2ଥ୍. +2ଦ୍. +2ଧ୍. +2ନ୍. +2ପ୍. +2ଫ୍. +2ବ୍. +2ଭ୍. +2ମ୍. +2ଯ୍. +2ର୍. +2୍. +2ଲ୍. +2ଳ୍. +2୍. +2ଵ୍. +2ଶ୍. +2ଷ୍. +2ସ୍. +2ହ୍. +% Do not break before anusvara, visarga and length mark. +2ଂ +2ଃ +2ୗ +% Do not break either side of virama (may be within conjunct). +2୍2 diff --git a/hyphenation/hyph_pa_IN.dic b/hyphenation/hyph_pa_IN.dic index a6b791e..a722ecd 100755 --- a/hyphenation/hyph_pa_IN.dic +++ b/hyphenation/hyph_pa_IN.dic @@ -1,5 +1,5 @@ UTF-8 -% Hyphenation for Panjabi(Gurmughi) +% Hyphenation for Hindi % Copyright (C) 2008-2009 Santhosh Thottingal % % This library is free software; you can redistribute it and/or @@ -16,69 +16,110 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -ਅ1 -ਆ1 -ਇ1 -ਈ1 -ਉ1 -ਊ1 -ਏ1 -ਐ1 -ਔ1 -ਾ1 -ਿ1 -ੀ1 -ੁ1 -ੂ1 -ੇ1 -ੋ1 -ੈ1 -ੌ1 -੍2 -ਃ1 -ਂ1 -1ਨ -ਨ੍2 -2ਨ੍‍ -1ਰ -ਰ੍2 -2ਰ੍‍ -1ਲ -ਲ੍2 -2ਲ੍‍ -1ਲ਼ -ਲ਼੍2 -2ਲ਼੍‍ -1ਣ -ਣ੍2 -2ਣ੍‍ -1ਕ -1ਗ -1ਖ -1ਘ -1ਙ -1ਚ -1ਛ -1ਜ -1ਝ -1ਞ -1ਟ -1ਠ -1ਡ -1ਢ -1ਤ -1ਥ -1ਦ -1ਧ -1ਪ -1ਫ -1ਬ -1ਭ -1ਮ -1ਯ -1ਵ -1ਸ਼ -1ਸ -1ਹ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1ਅ1 +1ਆ1 +1ਇ1 +1ਈ1 +1ਉ1 +1ਊ1 +1ਏ1 +1ਐ1 +1ਓ1 +1ਔ1 +% Break after any dependent vowel but not before. +2ਾ1 +2ਿ1 +2ੀ1 +2ੁ1 +2ੂ1 +2ੇ1 +2ੈ1 +2ੋ1 +2ੌ1 +% Break before or after any consonant. +1ਕ1 +1ਖ1 +1ਗ1 +1ਘ1 +1ਙ1 +1ਚ1 +1ਛ1 +1ਜ1 +1ਝ1 +1ਞ1 +1ਟ1 +1ਠ1 +1ਡ1 +1ਢ1 +1ਣ1 +1ਤ1 +1ਥ1 +1ਦ1 +1ਧ1 +1ਨ1 +1ਪ1 +1ਫ1 +1ਬ1 +1ਭ1 +1ਮ1 +1ਯ1 +1ਰ1 +1ਲ1 +1ਲ਼1 +1ਵ1 +1ਸ਼1 +11 +1ਸ1 +1ਹ1 +% Do not break before a final consonant or conjunct. +2ਕ੍. +2ਖ੍. +2ਗ੍. +2ਘ੍. +2ਙ੍. +2ਚ੍. +2ਛ੍. +2ਜ੍. +2ਝ੍. +2ਞ੍. +2ਟ੍. +2ਠ੍. +2ਡ੍. +2ਢ੍. +2ਣ੍. +2ਤ੍. +2ਥ੍. +2ਦ੍. +2ਧ੍. +2ਨ੍. +2ਪ੍. +2ਫ੍. +2ਬ੍. +2ਭ੍. +2ਮ੍. +2ਯ੍. +2ਰ੍. +2ਲ੍. +2ਲ਼੍. +2ਵ੍. +2ਸ਼੍. +2੍. +2ਸ੍. +2ਹ੍. +2ਰ੍ਕ੍. +2ਰ੍ਟ੍. +2ਰ੍ਤ੍. +2ਰ੍ਪ੍. +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ਁ +2ਂ +2ਃ +% Do not break either side of virama (may be within conjunct). +2੍2 diff --git a/hyphenation/hyph_ta_IN.dic b/hyphenation/hyph_ta_IN.dic index dc7616b..9a5c889 100755 --- a/hyphenation/hyph_ta_IN.dic +++ b/hyphenation/hyph_ta_IN.dic @@ -16,62 +16,83 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -அ1 -ஆ1 -இ1 -ஈ1 -உ1 -ஊ1 -எ1 -ஏ1 -ஐ1 -ஒ1 -ஔ1 -ா1 -ி1 -ீ1 -ு1 -ூ1 -ெ1 -ே1 -ொ1 -ோ1 -ை1 -ௌ1 -ௗ1 -்2 -ஃ1 -ஂ1 -1ந -ந்2 -2ந்‍ -1ர -ர்2 -2ர்‍ -1ல -ல்2 -2ல்‍ -1ள -ள்2 -2ள்‍ -1ண -ண்2 -2ண்‍ -1க -1ங -1ச -1ஜ -1ஞ -1ட -1த -1ப -1ம -1ய -1வ -1ஷ -1ஸ -1ஹ -1ழ -1ற +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1அ1 +1ஆ1 +1இ1 +1ஈ1 +1உ1 +1ஊ1 +1எ1 +1ஏ1 +1ஐ1 +1ஒ1 +1ஓ1 +1ஔ1 +% Break after any dependent vowel, but not before. +2ா1 +2ி1 +2ீ1 +2ு1 +2ூ1 +2ெ1 +2ே1 +2ை1 +2ொ1 +2ோ1 +2ௌ1 +% Break before or after any consonant. +1க1 +1ங1 +1ச1 +1ஜ1 +1ஞ1 +1ட1 +1ண1 +1த1 +1ந1 +1ப1 +1ம1 +1ய1 +1ர1 +1ற1 +1ல1 +1ள1 +1ழ1 +1வ1 +1ஷ1 +1ஸ1 +1ஹ1 +% Do not break before a final consonant or conjunct. +2க். +2ங். +2ச். +2ஜ். +2ஞ். +2ட். +2ண். +2த். +2ந். +2ப். +2ம். +2ய். +2ர். +2ற். +2ல். +2ள். +2ழ். +2வ். +2ஷ். +2ஸ். +2ஹ். +% Do not break before anusvara, visarga and length mark. +2ஂ +2ஃ +2ௗ +% Do not break either side of virama (may be within conjunct). +2்2 diff --git a/hyphenation/hyph_te_IN.dic b/hyphenation/hyph_te_IN.dic index c7eb3ea..e96a51e 100755 --- a/hyphenation/hyph_te_IN.dic +++ b/hyphenation/hyph_te_IN.dic @@ -1,5 +1,5 @@ UTF-8 -% Hyphenation for Telugu +% Hyphenation for Malayalam % Copyright (C) 2008-2009 Santhosh Thottingal % % This library is free software; you can redistribute it and/or @@ -16,77 +16,124 @@ UTF-8 % License along with this library; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % -LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 2 -అ1 -ఆ1 -ఇ1 -ఈ1 -ఉ1 -ఊ1 -ఋ1 -ఎ1 -ఏ1 -ఐ1 -ఒ1 -ఔ1 -ా1 -ి1 -ీ1 -ు1 -ూ1 -ృ1 -ె1 -ే1 -ొ1 -ో1 -ై1 -ౌ1 -్2 -ః1 -ం1 -1న -న్2 -2న్‍ -1ర -ర్2 -2ర్‍ -1ల -ల్2 -2ల్‍ -1ళ -ళ్2 -2ళ్‍ -1ణ -ణ్2 -2ణ్‍ -1క -1గ -1ఖ -1ఘ -1ఙ -1చ -1ఛ -1జ -1ఝ -1ఞ -1ట -1ఠ -1డ -1ఢ -1త -1థ -1ద -1ధ -1ప -1ఫ -1బ -1భ -1మ -1య -1వ -1శ -1ష -1స -1హ -1ఱ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +2‍2 +2‌2 +% Break before or after any independent vowel. +1అ1 +1ఆ1 +1ఇ1 +1ఈ1 +1ఉ1 +1ఊ1 +1ఋ1 +1ౠ1 +1ఌ1 +1ౡ1 +1ఎ1 +1ఏ1 +1ఐ1 +1ఒ1 +1ఓ1 +1ఔ1 +% Break after any dependent vowel, but not before. +2ా1 +2ి1 +2ీ1 +2ు1 +2ూ1 +2ృ1 +2ౄ1 +2ె1 +2ే1 +2ై1 +2ొ1 +2ో1 +2ౌ1 +% Break before or after any consonant. +1క1 +1ఖ1 +1గ1 +1ఘ1 +1ఙ1 +1చ1 +1ఛ1 +1జ1 +1ఝ1 +1ఞ1 +1ట1 +1ఠ1 +1డ1 +1ఢ1 +1ణ1 +1త1 +1థ1 +1ద1 +1ధ1 +1న1 +1ప1 +1ఫ1 +1బ1 +1భ1 +1మ1 +1య1 +1ర1 +1ఱ1 % can occur in Sanskrit? +1ల1 +1ళ1 +1వ1 +1శ1 +1ష1 +1స1 +1హ1 +% Do not break before a final consonant or conjunct. +2క్. +2ఖ్. +2గ్. +2ఘ్. +2ఙ్. +2చ్. +2ఛ్. +2జ్. +2ఝ్. +2ఞ్. +2ట్. +2ఠ్. +2డ్. +2ఢ్. +2ణ్. +2త్. +2థ్. +2ద్. +2ధ్. +2న్. +2ప్. +2ఫ్. +2బ్. +2భ్. +2మ్. +2య్. +2ర్. +2ఱ్. +2ల్. +2ళ్. +2వ్. +2శ్. +2ష్. +2స్. +2హ్. +2ర్క్. +2ర్ట్. +2ర్త్. +2ర్ప్. +% Do not break before chandrabindu, anusvara, visarga, +% length mark and ai length mark. +2ఁ +2ం +2ః +2ౕ +2ౖ +% Do not break either side of virama (may be within conjunct). +2్2 -- cgit