1818
1919
2020with imports_under_tool ("i18n" ):
21- from pygettext import parse_spec
21+ import pygettext
22+ from pygettext import make_escapes , parse_spec
2223
2324
2425def normalize_POT_file (pot ):
@@ -517,6 +518,110 @@ def test_parse_keyword_spec(self):
517518 self .assertEqual (str (cm .exception ), message )
518519
519520
521+ class TestCharacterEscapes (unittest .TestCase ):
522+ # Pygettext always escapes the following characters:
523+ special_chars = {
524+ '\\ ' : r'\\' ,
525+ '\t ' : r'\t' ,
526+ '\r ' : r'\r' ,
527+ '\n ' : r'\n' ,
528+ '\" ' : r'\"' ,
529+ }
530+
531+ def tearDownClass ():
532+ # Reset the global 'escapes' dict to the default
533+ make_escapes (pass_nonascii = True )
534+
535+ def test_special_chars (self ):
536+ # special_chars are always escaped regardless of the
537+ # --escape option
538+ for pass_nonascii in (True , False ):
539+ make_escapes (pass_nonascii = pass_nonascii )
540+ with self .subTest (pass_nonascii = pass_nonascii ):
541+ for char in self .special_chars :
542+ self .assertEqual (pygettext .escape (char , encoding = 'utf-8' ),
543+ self .special_chars [char ])
544+
545+ def _char_to_octal_escape (self , char ):
546+ """Convert a character to its octal escape representation."""
547+ return r"\%03o" % ord (char )
548+
549+ def _octal_escape_to_string (self , escaped ):
550+ """Convert an octal escape representation to string."""
551+ octal_escapes = re .findall (r'\\([0-7]{3})' , escaped )
552+ bytestr = bytes ([int (n , 8 ) for n in octal_escapes ])
553+ return bytestr .decode ('utf-8' )
554+
555+ def test_not_escaped (self ):
556+ """
557+ Test escaping when the --escape is not used.
558+
559+ When --escape is not used, only some characters withing the ASCII
560+ range are escaoped. Characters >= 128 are not escaped.
561+ """
562+ # This is the same as invoking pygettext without
563+ # the --escape option (the default behavior).
564+ make_escapes (pass_nonascii = True )
565+ # The encoding option is not used when --escape is not passed
566+ encoding = 'foo'
567+
568+ # First 32 characters use octal escapes (except for special chars)
569+ for i in range (32 ):
570+ char = chr (i )
571+ if char in self .special_chars :
572+ continue
573+ self .assertEqual (pygettext .escape (char , encoding = encoding ),
574+ self ._char_to_octal_escape (char ))
575+
576+ # Characters 32-126 are not escaped (except for special chars)
577+ for i in range (32 , 127 ):
578+ char = chr (i )
579+ if char in self .special_chars :
580+ continue
581+ self .assertEqual (pygettext .escape (char , encoding = encoding ), char )
582+
583+ # chr(127) uses octal escape
584+ self .assertEqual (pygettext .escape (chr (127 ), encoding = encoding ),
585+ '\\ 177' )
586+
587+ # All characters >= 128 are not escaped
588+ for i in range (128 , 256 ):
589+ char = chr (i )
590+ self .assertEqual (pygettext .escape (char , encoding = encoding ), char )
591+
592+
593+ def test_escaped (self ):
594+ """
595+ Test escaping when --escape is used.
596+
597+ When --escape is used, all characters are escaped, including
598+ """
599+ make_escapes (pass_nonascii = False )
600+ encoding = 'utf-8'
601+
602+ # First 32 characters use octal escapes (except for special chars)
603+ for i in range (32 ):
604+ char = chr (i )
605+ if char in self .special_chars :
606+ continue
607+ self .assertEqual (pygettext .escape (char , encoding = encoding ),
608+ self ._char_to_octal_escape (char ))
609+
610+ # Characters 32-126 are not escaped (except for special chars)
611+ for i in range (32 , 127 ):
612+ char = chr (i )
613+ if char in self .special_chars :
614+ continue
615+ self .assertEqual (pygettext .escape (char , encoding = encoding ), char )
616+
617+ # Characters >= 127 are escaped
618+ for i in range (127 , 256 ):
619+ char = chr (i )
620+ escaped = pygettext .escape (char , encoding = encoding )
621+ decoded_char = self ._octal_escape_to_string (escaped )
622+ self .assertEqual (char , decoded_char )
623+
624+
520625def extract_from_snapshots ():
521626 snapshots = {
522627 'messages.py' : (),
@@ -526,6 +631,8 @@ def extract_from_snapshots():
526631 'custom_keywords.py' : ('--keyword=foo' , '--keyword=nfoo:1,2' ,
527632 '--keyword=pfoo:1c,2' ,
528633 '--keyword=npfoo:1c,2,3' , '--keyword=_:1,2' ),
634+ # Test escaping non-ASCII characters
635+ 'escapes.py' : ('--escape' ,),
529636 }
530637
531638 for filename , args in snapshots .items ():
0 commit comments