summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--install/po/Makefile.in33
-rwxr-xr-xtests/i18n.py (renamed from install/po/test_i18n.py)179
-rw-r--r--tests/test_ipalib/test_text.py163
3 files changed, 228 insertions, 147 deletions
diff --git a/install/po/Makefile.in b/install/po/Makefile.in
index 4bee861a8..d65ba0c70 100644
--- a/install/po/Makefile.in
+++ b/install/po/Makefile.in
@@ -15,6 +15,7 @@ MSGINIT = @MSGINIT@
MSGMERGE = @MSGMERGE@
MSGCMP = @MSGCMP@
TX = @TX@
+IPA_TEST_I18N = ../../tests/i18n.py
DOMAIN = @GETTEXT_DOMAIN@
MSGMERGE_UPDATE = $(MSGMERGE) --update
@@ -129,7 +130,7 @@ update-pot:
echo "$(DOMAIN).pot unmodified" ; \
fi || :
@rm -f $(DOMAIN).pot.update $(DOMAIN).pot.update.tmp $(DOMAIN).pot.tmp
- ./test_i18n.py --show-strings --validate-pot $(DOMAIN).pot
+ $(IPA_TEST_I18N) --show-strings --validate-pot $(DOMAIN).pot
msg-stats:
@pot_count=`$(MSGFMT) --statistics $(DOMAIN).pot 2>&1 | \
@@ -169,38 +170,14 @@ distclean: clean
maintainer-clean: distclean
-# We test our translations by taking the original untranslated string
-# (e.g. msgid) and prepend a prefix character and then append a suffix
-# character. The test consists of asserting that the first character in the
-# translated string is the prefix, the last character in the translated string
-# is the suffix and the everything between the first and last character exactly
-# matches the original msgid.
-#
-# We use unicode characters not in the ascii character set for the prefix and
-# suffix to enhance the test. To make reading the translated string easier the
-# prefix is the unicode right pointing arrow and the suffix left pointing arrow,
-# thus the translated string looks like the original string enclosed in
-# arrows. In ASCII art the string "foo" would render as:
-# -->foo<--
-#
-# Unicode right pointing arrow: u'\u2192', utf-8 = '\xe2\x86\x92'
-# Unicode left pointing arrow: u'\u2190', utf-8 = '\xe2\x86\x90'
-#
-# The sed command below performs the prefix and suffix substitution.
-#
-# When msginit is invoked with an English target locale it copies the msgid
-# into the msgstr. This is an undocumented feature of msginit. Otherwise the
-# msgstr will be set to the empty string (i.e. untranslated). We depend on
-# the msgid being copied to the msgstr.
-
test:
- ./test_i18n.py --test-gettext
+ $(IPA_TEST_I18N) --test-gettext
validate-pot:
- ./test_i18n.py --show-strings --validate-pot $(DOMAIN).pot
+ $(IPA_TEST_I18N) --show-strings --validate-pot $(DOMAIN).pot
validate-po:
- ./test_i18n.py --show-strings --validate-po $(po_files)
+ $(IPA_TEST_I18N) --show-strings --validate-po $(po_files)
debug:
@echo Python potfiles:
diff --git a/install/po/test_i18n.py b/tests/i18n.py
index beb43ccaa..067bc5e39 100755
--- a/install/po/test_i18n.py
+++ b/tests/i18n.py
@@ -19,6 +19,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
+# WARNING: Do not import ipa modules, this is also used as a
+# stand-alone script (invoked from install/po Makefile).
import optparse
import sys
import gettext
@@ -86,14 +88,108 @@ _shell_substitution_regexp = re.compile(r'\$(\s*)([({]?)(\s*)\w+(\s*)([)}]?)')
# group 4: whitespace between variable and ending delimiter
# group 5: ending delimiter
-# We do not permit anonymous substitutions in translation strings
-# (e.g. '%s occurred' % error) because they do not provide the
-# necessary context to translators, they would only see
-# '%s occurred'. Instead a keyword substitution should be used
-# (e.g. '%(error)s occurred' % {'error': error_message})
+printf_fmt_re = re.compile(
+ r"%" # start
+ "(\d+\$)?" # fmt_arg (group 1)
+ "(([#0 +'I]|-(?!\d))*)" # flags (group 2)
+ "(([+-]?([1-9][0-9]*)?)|(\*|\*\d+\$))?" # width (group 4)
+ "(\.((-?\d*)|(\*|)|(\*\d+\$)))?" # precision (group 8)
+ "(h|hh|l|ll|L|j|z|t)?" # length (group 13)
+ "([diouxXeEfFgGaAcspnm%])") # conversion (group 14)
-# Python anonymous format substitutions, e.g. %s, %d, %f, etc.
-python_anonymous_substitutions_regexp = re.compile(r'%[srduoxf]\b') # e.g. %s
+#-------------------------------------------------------------------------------
+
+def get_prog_langs(entry):
+ '''
+ Given an entry in a pot or po file return a set of the
+ programming languges it was found in. It needs to be a set
+ because the same msgid may appear in more than one file which may
+ be in different programming languages.
+
+ Note: One might think you could use the c-format etc. flags to
+ attached to entry to make this determination, but you can't. Those
+ flags refer to the style of the string not the programming
+ language it came from. Also the flags are often omitted and/or are
+ inaccurate.
+
+ For now we just look at the file extension. If we knew the path to
+ the file we could use other heuristics such as looking for the
+ shbang interpreter string.
+
+ The set of possible language types witch might be returned are:
+
+ * c
+ * python
+
+ '''
+ result = set()
+
+ for location in entry.occurrences:
+ filename = location[0]
+ ext = os.path.splitext(filename)[1]
+
+ if ext in ('.c', '.h', '.cxx', '.cpp', '.hxx'):
+ result.add('c')
+ elif ext in ('.py'):
+ result.add('python')
+
+ return result
+
+def parse_printf_fmt(s):
+ '''
+ Parse a printf style format string and return a list of format
+ conversions found in the string.
+
+ Each conversion specification is introduced by the character %, and
+ ends with a conversion specifier. In between there may be (in this
+ order) zero or more flags, an optional minimum field width, an
+ optional precision and an optional length modifier. See "man 3
+ printf" for details.
+
+ Each item in the returned list is a dict whose keys are the
+ sub-parts of a conversion specification. The key and values are:
+
+ fmt
+ The entire format conversion specification
+ fmt_arg
+ The positional index of the matching argument in the argument
+ list, e.g. %1$ indicates the first argument in the argument
+ will be read for this conversion, excludes the leading % but
+ includes the trailing $, 1$ is the fmt_arg in %1$.
+ flags
+ The flag characaters, e.g. 0 is the flag in %08d
+ width
+ The width field, e.g. 20 is the width in %20s
+ precision
+ The precisioin field, e.g. .2 is the precision in %8.2f
+ length
+ The length modifier field, e.g. l is the length modifier in %ld
+ conversion
+ The conversion specifier character, e.g. d is the conversion
+ specification character in %ld
+
+ If the part is not found in the format it's value will be None.
+ '''
+
+ result = []
+
+ # get list of all matches, but skip escaped %
+ matches = [x for x in printf_fmt_re.finditer(s) if x.group(0) != "%%"]
+
+ # build dict of each sub-part of the format, append to result
+ for match in matches:
+ parts = {}
+ parts['fmt'] = match.group(0)
+ parts['fmt_arg'] = match.group(1)
+ parts['flags'] = match.group(2) or None
+ parts['width'] = match.group(4) or None
+ parts['precision'] = match.group(8)
+ parts['length'] = match.group(13)
+ parts['conversion'] = match.group(14)
+
+ result.append(parts)
+
+ return result
def validate_substitutions_match(s1, s2, s1_name='string1', s2_name='string2'):
'''
@@ -233,22 +329,35 @@ def validate_substitution_syntax(s, s_name='string'):
return errors
-def validate_anonymous_substitutions(s, s_name='string'):
+def validate_positional_substitutions(s, prog_langs, s_name='string'):
'''
- We do not permit multiple anonymous substitutions in translation
+ We do not permit multiple positional substitutions in translation
strings (e.g. '%s') because they do not allow translators to reorder the
wording. Instead keyword substitutions should be used when there are
more than one.
'''
errors = []
+ fmts = parse_printf_fmt(s)
+ n_fmts = len(fmts)
- matches = list(python_anonymous_substitutions_regexp.finditer(s))
-
- if len(matches) > 1:
- for match in python_anonymous_substitutions_regexp.finditer(s):
- errors.append("%s has anonymous substitution '%s', use keyword substitution instead" %
- (s_name, match.group(0)))
+ errors = []
+ if n_fmts > 1:
+ for i, fmt_parts in enumerate(fmts):
+ fmt = fmt_parts['fmt']
+ fmt_arg = fmt_parts['fmt_arg']
+ width = fmt_parts['width']
+
+ if width == '*':
+ errors.append("Error: * width arg in format '%s should be indexed" % fmt)
+
+ if fmt_arg is None:
+ if 'c' in prog_langs:
+ errors.append("%s format '%s' is positional, should use indexed argument" %
+ (s_name, fmt))
+ else:
+ errors.append("%s format '%s' is positional, should use keyword substitution" %
+ (s_name, fmt))
if errors:
if show_strings:
@@ -265,7 +374,7 @@ def validate_file(file_path, validation_mode):
* validate_substitutions_match()
* validate_substitution_syntax()
- * validate_anonymous_substitutions()
+ * validate_positional_substitutions()
Returns the number of entries with errors.
'''
@@ -290,7 +399,8 @@ def validate_file(file_path, validation_mode):
have_msgstr = msgstr.strip() != ''
if validation_mode == 'pot':
if have_msgid:
- errors = validate_anonymous_substitutions(msgid, 'msgid')
+ prog_langs = get_prog_langs(entry)
+ errors = validate_positional_substitutions(msgid, prog_langs, 'msgid')
entry_errors.extend(errors)
if validation_mode == 'po':
if have_msgid and have_msgstr:
@@ -387,23 +497,28 @@ def validate_unicode_edit(msgid, msgstr):
def test_translations(po_file, lang, domain, locale_dir):
- try:
+ # The test installs the test message catalog under the xh_ZA
+ # (e.g. Zambia Xhosa) language by default. It would be nice to
+ # use a dummy language not associated with any real language,
+ # but the setlocale function demands the locale be a valid
+ # known locale, Zambia Xhosa is a reasonable choice :)
- # The test installs the test message catalog under the xh_ZA
- # (e.g. Zambia Xhosa) language by default. It would be nice to
- # use a dummy language not associated with any real language,
- # but the setlocale function demands the locale be a valid
- # known locale, Zambia Xhosa is a reasonable choice :)
+ os.environ['LANG'] = lang
- os.environ['LANG'] = lang
+ # Create a gettext translation object specifying our domain as
+ # 'ipa' and the locale_dir as 'test_locale' (i.e. where to
+ # look for the message catalog). Then use that translation
+ # object to obtain the translation functions.
- # Create a gettext translation object specifying our domain as
- # 'ipa' and the locale_dir as 'test_locale' (i.e. where to
- # look for the message catalog). Then use that translation
- # object to obtain the translation functions.
+ t = gettext.translation(domain, locale_dir)
- t = gettext.translation(domain, locale_dir)
+ get_msgstr = t.ugettext
+ get_msgstr_plural = t.ungettext
+ return po_file_iterate(po_file, get_msgstr, get_msgstr_plural)
+
+def po_file_iterate(po_file, get_msgstr, get_msgstr_plural):
+ try:
# Iterate over the msgid's
if not os.path.isfile(po_file):
print >>sys.stderr, 'file does not exist "%s"' % (po_file)
@@ -422,8 +537,8 @@ def test_translations(po_file, lang, domain, locale_dir):
if entry.msgid_plural:
msgid = entry.msgid
msgid_plural = entry.msgid_plural
- msgstr = t.ungettext(msgid, msgid_plural, 1)
- msgstr_plural = t.ungettext(msgid, msgid_plural, 2)
+ msgstr = get_msgstr_plural(msgid, msgid_plural, 1)
+ msgstr_plural = get_msgstr_plural(msgid, msgid_plural, 2)
try:
n_translations += 1
@@ -448,7 +563,7 @@ def test_translations(po_file, lang, domain, locale_dir):
else:
msgid = entry.msgid
- msgstr = t.ugettext(msgid)
+ msgstr = get_msgstr(msgid)
try:
n_translations += 1
diff --git a/tests/test_ipalib/test_text.py b/tests/test_ipalib/test_text.py
index 1931ca4fe..9f60785ff 100644
--- a/tests/test_ipalib/test_text.py
+++ b/tests/test_ipalib/test_text.py
@@ -22,10 +22,13 @@ Test the `ipalib.text` module.
"""
import os
+import shutil
+import tempfile
import re
import nose
import locale
from tests.util import raises, assert_equal
+from tests.i18n import create_po, po_file_iterate
from ipalib.request import context
from ipalib import request
from ipalib import text
@@ -35,93 +38,6 @@ singular = '%(count)d goose makes a %(dish)s'
plural = '%(count)d geese make a %(dish)s'
-# Unicode right pointing arrow
-prefix = u'\u2192' # utf-8 == '\xe2\x86\x92'
-# Unicode left pointing arrow
-suffix = u'\u2190' # utf-8 == '\xe2\x86\x90'
-
-def get_msgid(po_file):
- 'Get the first non-empty msgid from the po file'
-
- msgid_re = re.compile(r'^\s*msgid\s+"(.+)"\s*$')
- f = open(po_file)
- for line in f.readlines():
- match = msgid_re.search(line)
- if match:
- msgid = match.group(1)
- f.close()
- return msgid
- f.close()
- raise ValueError('No msgid found in %s' % po_file)
-
-def test_gettext():
- '''
- Test gettext translation
-
- We test our translations by taking the original untranslated
- string (e.g. msgid) and prepend a prefix character and then append
- a suffix character. The test consists of asserting that the first
- character in the translated string is the prefix, the last
- character in the translated string is the suffix and the
- everything between the first and last character exactly matches
- the original msgid.
-
- We use unicode characters not in the ascii character set for the
- prefix and suffix to enhance the test. To make reading the
- translated string easier the prefix is the unicode right pointing
- arrow and the suffix left pointing arrow, thus the translated
- string looks like the original string enclosed in arrows. In ASCII
- art the string "foo" would render as: "-->foo<--"
- '''
-
- localedir='install/po/test_locale'
- test_file='install/po/test.po'
-
- lang = os.environ['LANG']
- os.environ['LANG'] = 'xh_ZA'
-
- # Tell gettext that our domain is 'ipa', that locale_dir is
- # 'test_locale' (i.e. where to look for the message catalog)
- _ = text.GettextFactory('ipa', localedir)
-
- # We need a translatable string to test with, read one from the
- # test po file
- if not file_exists(test_file):
- raise nose.SkipTest(
- 'Test language not available, run "make test_lang" in install/po'
- )
- msgid = get_msgid(test_file)
-
- # Get the localized instance of the msgid, it should be a Gettext
- # instance.
- localized = _(msgid)
- assert(isinstance(localized, text.Gettext))
-
- # Get the translated string from the Gettext instance by invoking
- # unicode on it.
- translated = unicode(localized)
-
- # Perform the verifications on the translated string.
-
- # Verify the first character is the test prefix
- assert(translated[0] == prefix)
-
- # Verify the last character is the test suffix
- assert(translated[-1] == suffix)
-
- # Verify everything between the first and last character is the
- # original untranslated string
- assert(translated[1:-1] == msgid)
-
- # Reset the language and assure we don't get the test values
- context.__dict__.clear()
- os.environ['LANG'] = lang
-
- translated = unicode(localized)
-
- assert(translated[0] != prefix)
- assert(translated[-1] != suffix)
-
def test_create_translation():
f = text.create_translation
key = ('foo', None)
@@ -129,6 +45,79 @@ def test_create_translation():
assert context.__dict__[key] is t
+class test_TestLang(object):
+ def setUp(self):
+ self.tmp_dir = None
+ self.saved_lang = None
+
+ self.lang = 'xh_ZA'
+ self.domain = 'ipa'
+
+ self.ipa_i18n_dir = os.path.join(os.path.dirname(__file__), '../../install/po')
+
+ self.pot_basename = '%s.pot' % self.domain
+ self.po_basename = '%s.po' % self.lang
+ self.mo_basename = '%s.mo' % self.domain
+
+ self.tmp_dir = tempfile.mkdtemp()
+ self.saved_lang = os.environ['LANG']
+
+ self.locale_dir = os.path.join(self.tmp_dir, 'test_locale')
+ self.msg_dir = os.path.join(self.locale_dir, self.lang, 'LC_MESSAGES')
+
+ if not os.path.exists(self.msg_dir):
+ os.makedirs(self.msg_dir)
+
+ self.pot_file = os.path.join(self.ipa_i18n_dir, self.pot_basename)
+ self.mo_file = os.path.join(self.msg_dir, self.mo_basename)
+ self.po_file = os.path.join(self.tmp_dir, self.po_basename)
+
+ result = create_po(self.pot_file, self.po_file, self.mo_file)
+ if result:
+ raise nose.SkipTest('Unable to create po file "%s" & mo file "%s" from pot file "%s"' %
+ (self.po_file, self.mo_file, self.pot_file))
+
+ if not file_exists(self.po_file):
+ raise nose.SkipTest('Test po file unavailable, run "make test" in install/po')
+
+ if not file_exists(self.mo_file):
+ raise nose.SkipTest('Test mo file unavailable, run "make test" in install/po')
+
+ self.po_file_iterate = po_file_iterate
+
+ def tearDown(self):
+ if self.saved_lang is not None:
+ os.environ['LANG'] = self.saved_lang
+
+ if self.tmp_dir is not None:
+ shutil.rmtree(self.tmp_dir)
+
+ def test_test_lang(self):
+ print "test_test_lang"
+ # The test installs the test message catalog under the xh_ZA
+ # (e.g. Zambia Xhosa) language by default. It would be nice to
+ # use a dummy language not associated with any real language,
+ # but the setlocale function demands the locale be a valid
+ # known locale, Zambia Xhosa is a reasonable choice :)
+
+ os.environ['LANG'] = self.lang
+
+ # Create a gettext translation object specifying our domain as
+ # 'ipa' and the locale_dir as 'test_locale' (i.e. where to
+ # look for the message catalog). Then use that translation
+ # object to obtain the translation functions.
+
+ def get_msgstr(msg):
+ gt = text.GettextFactory(localedir=self.locale_dir)(msg)
+ return unicode(gt)
+
+ def get_msgstr_plural(singular, plural, count):
+ ng = text.NGettextFactory(localedir=self.locale_dir)(singular, plural, count)
+ return ng(count)
+
+ result = self.po_file_iterate(self.po_file, get_msgstr, get_msgstr_plural)
+ assert result == 0
+
class test_LazyText(object):
klass = text.LazyText