diff options
-rw-r--r-- | install/po/Makefile.in | 33 | ||||
-rwxr-xr-x | tests/i18n.py (renamed from install/po/test_i18n.py) | 179 | ||||
-rw-r--r-- | tests/test_ipalib/test_text.py | 163 |
3 files changed, 228 insertions, 147 deletions
diff --git a/install/po/Makefile.in b/install/po/Makefile.in index 4bee861a8..d65ba0c70 100644 --- a/install/po/Makefile.in +++ b/install/po/Makefile.in @@ -15,6 +15,7 @@ MSGINIT = @MSGINIT@ MSGMERGE = @MSGMERGE@ MSGCMP = @MSGCMP@ TX = @TX@ +IPA_TEST_I18N = ../../tests/i18n.py DOMAIN = @GETTEXT_DOMAIN@ MSGMERGE_UPDATE = $(MSGMERGE) --update @@ -129,7 +130,7 @@ update-pot: echo "$(DOMAIN).pot unmodified" ; \ fi || : @rm -f $(DOMAIN).pot.update $(DOMAIN).pot.update.tmp $(DOMAIN).pot.tmp - ./test_i18n.py --show-strings --validate-pot $(DOMAIN).pot + $(IPA_TEST_I18N) --show-strings --validate-pot $(DOMAIN).pot msg-stats: @pot_count=`$(MSGFMT) --statistics $(DOMAIN).pot 2>&1 | \ @@ -169,38 +170,14 @@ distclean: clean maintainer-clean: distclean -# We test our translations by taking the original untranslated string -# (e.g. msgid) and prepend a prefix character and then append a suffix -# character. The test consists of asserting that the first character in the -# translated string is the prefix, the last character in the translated string -# is the suffix and the everything between the first and last character exactly -# matches the original msgid. -# -# We use unicode characters not in the ascii character set for the prefix and -# suffix to enhance the test. To make reading the translated string easier the -# prefix is the unicode right pointing arrow and the suffix left pointing arrow, -# thus the translated string looks like the original string enclosed in -# arrows. In ASCII art the string "foo" would render as: -# -->foo<-- -# -# Unicode right pointing arrow: u'\u2192', utf-8 = '\xe2\x86\x92' -# Unicode left pointing arrow: u'\u2190', utf-8 = '\xe2\x86\x90' -# -# The sed command below performs the prefix and suffix substitution. -# -# When msginit is invoked with an English target locale it copies the msgid -# into the msgstr. This is an undocumented feature of msginit. Otherwise the -# msgstr will be set to the empty string (i.e. untranslated). We depend on -# the msgid being copied to the msgstr. - test: - ./test_i18n.py --test-gettext + $(IPA_TEST_I18N) --test-gettext validate-pot: - ./test_i18n.py --show-strings --validate-pot $(DOMAIN).pot + $(IPA_TEST_I18N) --show-strings --validate-pot $(DOMAIN).pot validate-po: - ./test_i18n.py --show-strings --validate-po $(po_files) + $(IPA_TEST_I18N) --show-strings --validate-po $(po_files) debug: @echo Python potfiles: diff --git a/install/po/test_i18n.py b/tests/i18n.py index beb43ccaa..067bc5e39 100755 --- a/install/po/test_i18n.py +++ b/tests/i18n.py @@ -19,6 +19,8 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # +# WARNING: Do not import ipa modules, this is also used as a +# stand-alone script (invoked from install/po Makefile). import optparse import sys import gettext @@ -86,14 +88,108 @@ _shell_substitution_regexp = re.compile(r'\$(\s*)([({]?)(\s*)\w+(\s*)([)}]?)') # group 4: whitespace between variable and ending delimiter # group 5: ending delimiter -# We do not permit anonymous substitutions in translation strings -# (e.g. '%s occurred' % error) because they do not provide the -# necessary context to translators, they would only see -# '%s occurred'. Instead a keyword substitution should be used -# (e.g. '%(error)s occurred' % {'error': error_message}) +printf_fmt_re = re.compile( + r"%" # start + "(\d+\$)?" # fmt_arg (group 1) + "(([#0 +'I]|-(?!\d))*)" # flags (group 2) + "(([+-]?([1-9][0-9]*)?)|(\*|\*\d+\$))?" # width (group 4) + "(\.((-?\d*)|(\*|)|(\*\d+\$)))?" # precision (group 8) + "(h|hh|l|ll|L|j|z|t)?" # length (group 13) + "([diouxXeEfFgGaAcspnm%])") # conversion (group 14) -# Python anonymous format substitutions, e.g. %s, %d, %f, etc. -python_anonymous_substitutions_regexp = re.compile(r'%[srduoxf]\b') # e.g. %s +#------------------------------------------------------------------------------- + +def get_prog_langs(entry): + ''' + Given an entry in a pot or po file return a set of the + programming languges it was found in. It needs to be a set + because the same msgid may appear in more than one file which may + be in different programming languages. + + Note: One might think you could use the c-format etc. flags to + attached to entry to make this determination, but you can't. Those + flags refer to the style of the string not the programming + language it came from. Also the flags are often omitted and/or are + inaccurate. + + For now we just look at the file extension. If we knew the path to + the file we could use other heuristics such as looking for the + shbang interpreter string. + + The set of possible language types witch might be returned are: + + * c + * python + + ''' + result = set() + + for location in entry.occurrences: + filename = location[0] + ext = os.path.splitext(filename)[1] + + if ext in ('.c', '.h', '.cxx', '.cpp', '.hxx'): + result.add('c') + elif ext in ('.py'): + result.add('python') + + return result + +def parse_printf_fmt(s): + ''' + Parse a printf style format string and return a list of format + conversions found in the string. + + Each conversion specification is introduced by the character %, and + ends with a conversion specifier. In between there may be (in this + order) zero or more flags, an optional minimum field width, an + optional precision and an optional length modifier. See "man 3 + printf" for details. + + Each item in the returned list is a dict whose keys are the + sub-parts of a conversion specification. The key and values are: + + fmt + The entire format conversion specification + fmt_arg + The positional index of the matching argument in the argument + list, e.g. %1$ indicates the first argument in the argument + will be read for this conversion, excludes the leading % but + includes the trailing $, 1$ is the fmt_arg in %1$. + flags + The flag characaters, e.g. 0 is the flag in %08d + width + The width field, e.g. 20 is the width in %20s + precision + The precisioin field, e.g. .2 is the precision in %8.2f + length + The length modifier field, e.g. l is the length modifier in %ld + conversion + The conversion specifier character, e.g. d is the conversion + specification character in %ld + + If the part is not found in the format it's value will be None. + ''' + + result = [] + + # get list of all matches, but skip escaped % + matches = [x for x in printf_fmt_re.finditer(s) if x.group(0) != "%%"] + + # build dict of each sub-part of the format, append to result + for match in matches: + parts = {} + parts['fmt'] = match.group(0) + parts['fmt_arg'] = match.group(1) + parts['flags'] = match.group(2) or None + parts['width'] = match.group(4) or None + parts['precision'] = match.group(8) + parts['length'] = match.group(13) + parts['conversion'] = match.group(14) + + result.append(parts) + + return result def validate_substitutions_match(s1, s2, s1_name='string1', s2_name='string2'): ''' @@ -233,22 +329,35 @@ def validate_substitution_syntax(s, s_name='string'): return errors -def validate_anonymous_substitutions(s, s_name='string'): +def validate_positional_substitutions(s, prog_langs, s_name='string'): ''' - We do not permit multiple anonymous substitutions in translation + We do not permit multiple positional substitutions in translation strings (e.g. '%s') because they do not allow translators to reorder the wording. Instead keyword substitutions should be used when there are more than one. ''' errors = [] + fmts = parse_printf_fmt(s) + n_fmts = len(fmts) - matches = list(python_anonymous_substitutions_regexp.finditer(s)) - - if len(matches) > 1: - for match in python_anonymous_substitutions_regexp.finditer(s): - errors.append("%s has anonymous substitution '%s', use keyword substitution instead" % - (s_name, match.group(0))) + errors = [] + if n_fmts > 1: + for i, fmt_parts in enumerate(fmts): + fmt = fmt_parts['fmt'] + fmt_arg = fmt_parts['fmt_arg'] + width = fmt_parts['width'] + + if width == '*': + errors.append("Error: * width arg in format '%s should be indexed" % fmt) + + if fmt_arg is None: + if 'c' in prog_langs: + errors.append("%s format '%s' is positional, should use indexed argument" % + (s_name, fmt)) + else: + errors.append("%s format '%s' is positional, should use keyword substitution" % + (s_name, fmt)) if errors: if show_strings: @@ -265,7 +374,7 @@ def validate_file(file_path, validation_mode): * validate_substitutions_match() * validate_substitution_syntax() - * validate_anonymous_substitutions() + * validate_positional_substitutions() Returns the number of entries with errors. ''' @@ -290,7 +399,8 @@ def validate_file(file_path, validation_mode): have_msgstr = msgstr.strip() != '' if validation_mode == 'pot': if have_msgid: - errors = validate_anonymous_substitutions(msgid, 'msgid') + prog_langs = get_prog_langs(entry) + errors = validate_positional_substitutions(msgid, prog_langs, 'msgid') entry_errors.extend(errors) if validation_mode == 'po': if have_msgid and have_msgstr: @@ -387,23 +497,28 @@ def validate_unicode_edit(msgid, msgstr): def test_translations(po_file, lang, domain, locale_dir): - try: + # The test installs the test message catalog under the xh_ZA + # (e.g. Zambia Xhosa) language by default. It would be nice to + # use a dummy language not associated with any real language, + # but the setlocale function demands the locale be a valid + # known locale, Zambia Xhosa is a reasonable choice :) - # The test installs the test message catalog under the xh_ZA - # (e.g. Zambia Xhosa) language by default. It would be nice to - # use a dummy language not associated with any real language, - # but the setlocale function demands the locale be a valid - # known locale, Zambia Xhosa is a reasonable choice :) + os.environ['LANG'] = lang - os.environ['LANG'] = lang + # Create a gettext translation object specifying our domain as + # 'ipa' and the locale_dir as 'test_locale' (i.e. where to + # look for the message catalog). Then use that translation + # object to obtain the translation functions. - # Create a gettext translation object specifying our domain as - # 'ipa' and the locale_dir as 'test_locale' (i.e. where to - # look for the message catalog). Then use that translation - # object to obtain the translation functions. + t = gettext.translation(domain, locale_dir) - t = gettext.translation(domain, locale_dir) + get_msgstr = t.ugettext + get_msgstr_plural = t.ungettext + return po_file_iterate(po_file, get_msgstr, get_msgstr_plural) + +def po_file_iterate(po_file, get_msgstr, get_msgstr_plural): + try: # Iterate over the msgid's if not os.path.isfile(po_file): print >>sys.stderr, 'file does not exist "%s"' % (po_file) @@ -422,8 +537,8 @@ def test_translations(po_file, lang, domain, locale_dir): if entry.msgid_plural: msgid = entry.msgid msgid_plural = entry.msgid_plural - msgstr = t.ungettext(msgid, msgid_plural, 1) - msgstr_plural = t.ungettext(msgid, msgid_plural, 2) + msgstr = get_msgstr_plural(msgid, msgid_plural, 1) + msgstr_plural = get_msgstr_plural(msgid, msgid_plural, 2) try: n_translations += 1 @@ -448,7 +563,7 @@ def test_translations(po_file, lang, domain, locale_dir): else: msgid = entry.msgid - msgstr = t.ugettext(msgid) + msgstr = get_msgstr(msgid) try: n_translations += 1 diff --git a/tests/test_ipalib/test_text.py b/tests/test_ipalib/test_text.py index 1931ca4fe..9f60785ff 100644 --- a/tests/test_ipalib/test_text.py +++ b/tests/test_ipalib/test_text.py @@ -22,10 +22,13 @@ Test the `ipalib.text` module. """ import os +import shutil +import tempfile import re import nose import locale from tests.util import raises, assert_equal +from tests.i18n import create_po, po_file_iterate from ipalib.request import context from ipalib import request from ipalib import text @@ -35,93 +38,6 @@ singular = '%(count)d goose makes a %(dish)s' plural = '%(count)d geese make a %(dish)s' -# Unicode right pointing arrow -prefix = u'\u2192' # utf-8 == '\xe2\x86\x92' -# Unicode left pointing arrow -suffix = u'\u2190' # utf-8 == '\xe2\x86\x90' - -def get_msgid(po_file): - 'Get the first non-empty msgid from the po file' - - msgid_re = re.compile(r'^\s*msgid\s+"(.+)"\s*$') - f = open(po_file) - for line in f.readlines(): - match = msgid_re.search(line) - if match: - msgid = match.group(1) - f.close() - return msgid - f.close() - raise ValueError('No msgid found in %s' % po_file) - -def test_gettext(): - ''' - Test gettext translation - - We test our translations by taking the original untranslated - string (e.g. msgid) and prepend a prefix character and then append - a suffix character. The test consists of asserting that the first - character in the translated string is the prefix, the last - character in the translated string is the suffix and the - everything between the first and last character exactly matches - the original msgid. - - We use unicode characters not in the ascii character set for the - prefix and suffix to enhance the test. To make reading the - translated string easier the prefix is the unicode right pointing - arrow and the suffix left pointing arrow, thus the translated - string looks like the original string enclosed in arrows. In ASCII - art the string "foo" would render as: "-->foo<--" - ''' - - localedir='install/po/test_locale' - test_file='install/po/test.po' - - lang = os.environ['LANG'] - os.environ['LANG'] = 'xh_ZA' - - # Tell gettext that our domain is 'ipa', that locale_dir is - # 'test_locale' (i.e. where to look for the message catalog) - _ = text.GettextFactory('ipa', localedir) - - # We need a translatable string to test with, read one from the - # test po file - if not file_exists(test_file): - raise nose.SkipTest( - 'Test language not available, run "make test_lang" in install/po' - ) - msgid = get_msgid(test_file) - - # Get the localized instance of the msgid, it should be a Gettext - # instance. - localized = _(msgid) - assert(isinstance(localized, text.Gettext)) - - # Get the translated string from the Gettext instance by invoking - # unicode on it. - translated = unicode(localized) - - # Perform the verifications on the translated string. - - # Verify the first character is the test prefix - assert(translated[0] == prefix) - - # Verify the last character is the test suffix - assert(translated[-1] == suffix) - - # Verify everything between the first and last character is the - # original untranslated string - assert(translated[1:-1] == msgid) - - # Reset the language and assure we don't get the test values - context.__dict__.clear() - os.environ['LANG'] = lang - - translated = unicode(localized) - - assert(translated[0] != prefix) - assert(translated[-1] != suffix) - def test_create_translation(): f = text.create_translation key = ('foo', None) @@ -129,6 +45,79 @@ def test_create_translation(): assert context.__dict__[key] is t +class test_TestLang(object): + def setUp(self): + self.tmp_dir = None + self.saved_lang = None + + self.lang = 'xh_ZA' + self.domain = 'ipa' + + self.ipa_i18n_dir = os.path.join(os.path.dirname(__file__), '../../install/po') + + self.pot_basename = '%s.pot' % self.domain + self.po_basename = '%s.po' % self.lang + self.mo_basename = '%s.mo' % self.domain + + self.tmp_dir = tempfile.mkdtemp() + self.saved_lang = os.environ['LANG'] + + self.locale_dir = os.path.join(self.tmp_dir, 'test_locale') + self.msg_dir = os.path.join(self.locale_dir, self.lang, 'LC_MESSAGES') + + if not os.path.exists(self.msg_dir): + os.makedirs(self.msg_dir) + + self.pot_file = os.path.join(self.ipa_i18n_dir, self.pot_basename) + self.mo_file = os.path.join(self.msg_dir, self.mo_basename) + self.po_file = os.path.join(self.tmp_dir, self.po_basename) + + result = create_po(self.pot_file, self.po_file, self.mo_file) + if result: + raise nose.SkipTest('Unable to create po file "%s" & mo file "%s" from pot file "%s"' % + (self.po_file, self.mo_file, self.pot_file)) + + if not file_exists(self.po_file): + raise nose.SkipTest('Test po file unavailable, run "make test" in install/po') + + if not file_exists(self.mo_file): + raise nose.SkipTest('Test mo file unavailable, run "make test" in install/po') + + self.po_file_iterate = po_file_iterate + + def tearDown(self): + if self.saved_lang is not None: + os.environ['LANG'] = self.saved_lang + + if self.tmp_dir is not None: + shutil.rmtree(self.tmp_dir) + + def test_test_lang(self): + print "test_test_lang" + # The test installs the test message catalog under the xh_ZA + # (e.g. Zambia Xhosa) language by default. It would be nice to + # use a dummy language not associated with any real language, + # but the setlocale function demands the locale be a valid + # known locale, Zambia Xhosa is a reasonable choice :) + + os.environ['LANG'] = self.lang + + # Create a gettext translation object specifying our domain as + # 'ipa' and the locale_dir as 'test_locale' (i.e. where to + # look for the message catalog). Then use that translation + # object to obtain the translation functions. + + def get_msgstr(msg): + gt = text.GettextFactory(localedir=self.locale_dir)(msg) + return unicode(gt) + + def get_msgstr_plural(singular, plural, count): + ng = text.NGettextFactory(localedir=self.locale_dir)(singular, plural, count) + return ng(count) + + result = self.po_file_iterate(self.po_file, get_msgstr, get_msgstr_plural) + assert result == 0 + class test_LazyText(object): klass = text.LazyText |