You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
267 lines
7.9 KiB
267 lines
7.9 KiB
# -*- coding: utf-8 -*-
|
|
# Copyright (C) 2011 Sphere Systems Ltd
|
|
# Author: Andrew Bird
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
"""Unittests for the gsm encoding/decoding module"""
|
|
|
|
import unittest
|
|
import messaging.sms.gsm0338 # imports GSM7 codec
|
|
|
|
# Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
|
MAP = {
|
|
# unichr(0x0000): (0x0000, 0x00), # Null
|
|
'@': (0x0040, 0x00),
|
|
'£': (0x00a3, 0x01),
|
|
'$': (0x0024, 0x02),
|
|
'¥': (0x00a5, 0x03),
|
|
'è': (0x00e8, 0x04),
|
|
'é': (0x00e9, 0x05),
|
|
'ù': (0x00f9, 0x06),
|
|
'ì': (0x00ec, 0x07),
|
|
'ò': (0x00f2, 0x08),
|
|
'Ç': (0x00c7, 0x09), # LATIN CAPITAL LETTER C WITH CEDILLA
|
|
chr(0x000a): (0x000a, 0x0a), # Linefeed
|
|
'Ø': (0x00d8, 0x0b),
|
|
'ø': (0x00f8, 0x0c),
|
|
chr(0x000d): (0x000d, 0x0d), # Carriage return
|
|
'Å': (0x00c5, 0x0e),
|
|
'å': (0x00e5, 0x0f),
|
|
'Δ': (0x0394, 0x10),
|
|
'_': (0x005f, 0x11),
|
|
'Φ': (0x03a6, 0x12),
|
|
'Γ': (0x0393, 0x13),
|
|
'Λ': (0x039b, 0x14),
|
|
'Ω': (0x03a9, 0x15),
|
|
'Π': (0x03a0, 0x16),
|
|
'Ψ': (0x03a8, 0x17),
|
|
'Σ': (0x03a3, 0x18),
|
|
'Θ': (0x0398, 0x19),
|
|
'Ξ': (0x039e, 0x1a),
|
|
chr(0x00a0): (0x00a0, 0x1b), # Escape to extension table (displayed
|
|
# as NBSP, on decode of invalid escape
|
|
# sequence)
|
|
'Æ': (0x00c6, 0x1c),
|
|
'æ': (0x00e6, 0x1d),
|
|
'ß': (0x00df, 0x1e),
|
|
'É': (0x00c9, 0x1f),
|
|
' ': (0x0020, 0x20),
|
|
'!': (0x0021, 0x21),
|
|
'"': (0x0022, 0x22),
|
|
'#': (0x0023, 0x23),
|
|
'¤': (0x00a4, 0x24),
|
|
'%': (0x0025, 0x25),
|
|
'&': (0x0026, 0x26),
|
|
'\'': (0x0027, 0x27),
|
|
'{': (0x007b, 0x1b28),
|
|
'}': (0x007d, 0x1b29),
|
|
'*': (0x002a, 0x2a),
|
|
'+': (0x002b, 0x2b),
|
|
',': (0x002c, 0x2c),
|
|
'-': (0x002d, 0x2d),
|
|
'.': (0x002e, 0x2e),
|
|
'\\': (0x005c, 0x1b2f),
|
|
'0': (0x0030, 0x30),
|
|
'1': (0x0031, 0x31),
|
|
'2': (0x0032, 0x32),
|
|
'3': (0x0033, 0x33),
|
|
'4': (0x0034, 0x34),
|
|
'5': (0x0035, 0x35),
|
|
'6': (0x0036, 0x36),
|
|
'7': (0x0037, 0x37),
|
|
'8': (0x0038, 0x38),
|
|
'9': (0x0039, 0x39),
|
|
':': (0x003a, 0x3a),
|
|
';': (0x003b, 0x3b),
|
|
'[': (0x005b, 0x1b3c),
|
|
chr(0x000c): (0x000c, 0x1b0a), # Formfeed
|
|
']': (0x005d, 0x1b3e),
|
|
'?': (0x003f, 0x3f),
|
|
'|': (0x007c, 0x1b40),
|
|
'A': (0x0041, 0x41),
|
|
'B': (0x0042, 0x42),
|
|
'C': (0x0043, 0x43),
|
|
'D': (0x0044, 0x44),
|
|
'E': (0x0045, 0x45),
|
|
'F': (0x0046, 0x46),
|
|
'G': (0x0047, 0x47),
|
|
'H': (0x0048, 0x48),
|
|
'I': (0x0049, 0x49),
|
|
'J': (0x004a, 0x4a),
|
|
'K': (0x004b, 0x4b),
|
|
'L': (0x004c, 0x4c),
|
|
'M': (0x004d, 0x4d),
|
|
'N': (0x004e, 0x4e),
|
|
'O': (0x004f, 0x4f),
|
|
'P': (0x0050, 0x50),
|
|
'Q': (0x0051, 0x51),
|
|
'R': (0x0052, 0x52),
|
|
'S': (0x0053, 0x53),
|
|
'T': (0x0054, 0x54),
|
|
'U': (0x0055, 0x55),
|
|
'V': (0x0056, 0x56),
|
|
'W': (0x0057, 0x57),
|
|
'X': (0x0058, 0x58),
|
|
'Y': (0x0059, 0x59),
|
|
'Z': (0x005a, 0x5a),
|
|
'Ä': (0x00c4, 0x5b),
|
|
'Ö': (0x00d6, 0x5c),
|
|
'Ñ': (0x00d1, 0x5d),
|
|
'Ü': (0x00dc, 0x5e),
|
|
'§': (0x00a7, 0x5f),
|
|
'¿': (0x00bf, 0x60),
|
|
'a': (0x0061, 0x61),
|
|
'b': (0x0062, 0x62),
|
|
'c': (0x0063, 0x63),
|
|
'd': (0x0064, 0x64),
|
|
'€': (0x20ac, 0x1b65),
|
|
'f': (0x0066, 0x66),
|
|
'g': (0x0067, 0x67),
|
|
'h': (0x0068, 0x68),
|
|
'<': (0x003c, 0x3c),
|
|
'j': (0x006a, 0x6a),
|
|
'k': (0x006b, 0x6b),
|
|
'l': (0x006c, 0x6c),
|
|
'm': (0x006d, 0x6d),
|
|
'n': (0x006e, 0x6e),
|
|
'~': (0x007e, 0x1b3d),
|
|
'p': (0x0070, 0x70),
|
|
'q': (0x0071, 0x71),
|
|
'r': (0x0072, 0x72),
|
|
's': (0x0073, 0x73),
|
|
't': (0x0074, 0x74),
|
|
'>': (0x003e, 0x3e),
|
|
'v': (0x0076, 0x76),
|
|
'i': (0x0069, 0x69),
|
|
'x': (0x0078, 0x78),
|
|
'^': (0x005e, 0x1b14),
|
|
'z': (0x007a, 0x7a),
|
|
'ä': (0x00e4, 0x7b),
|
|
'ö': (0x00f6, 0x7c),
|
|
'ñ': (0x00f1, 0x7d),
|
|
'ü': (0x00fc, 0x7e),
|
|
'à': (0x00e0, 0x7f),
|
|
'¡': (0x00a1, 0x40),
|
|
'/': (0x002f, 0x2f),
|
|
'o': (0x006f, 0x6f),
|
|
'u': (0x0075, 0x75),
|
|
'w': (0x0077, 0x77),
|
|
'y': (0x0079, 0x79),
|
|
'e': (0x0065, 0x65),
|
|
'=': (0x003d, 0x3d),
|
|
'(': (0x0028, 0x28),
|
|
')': (0x0029, 0x29),
|
|
}
|
|
|
|
GREEK_MAP = { # Note: these might look like Latin uppercase, but they aren't
|
|
'Α': (0x0391, 0x41),
|
|
'Β': (0x0392, 0x42),
|
|
'Ε': (0x0395, 0x45),
|
|
'Η': (0x0397, 0x48),
|
|
'Ι': (0x0399, 0x49),
|
|
'Κ': (0x039a, 0x4b),
|
|
'Μ': (0x039c, 0x4d),
|
|
'Ν': (0x039d, 0x4e),
|
|
'Ο': (0x039f, 0x4f),
|
|
'Ρ': (0x03a1, 0x50),
|
|
'Τ': (0x03a4, 0x54),
|
|
'Χ': (0x03a7, 0x58),
|
|
'Υ': (0x03a5, 0x59),
|
|
'Ζ': (0x0396, 0x5a),
|
|
}
|
|
|
|
QUIRK_MAP = {
|
|
'ç': (0x00e7, 0x09),
|
|
}
|
|
|
|
BAD = -1
|
|
|
|
|
|
class TestEncodingFunctions(unittest.TestCase):
|
|
|
|
def test_encoding_supported_unicode_gsm(self):
|
|
|
|
for key in list(MAP.keys()):
|
|
# Use 'ignore' so that we see the code tested, not an exception
|
|
s_gsm = key.encode('gsm0338', 'ignore')
|
|
|
|
if len(s_gsm) == 1:
|
|
i_gsm = ord(s_gsm)
|
|
elif len(s_gsm) == 2:
|
|
i_gsm = (ord(s_gsm[0]) << 8) + ord(s_gsm[1])
|
|
else:
|
|
i_gsm = BAD # so we see the comparison, not an exception
|
|
|
|
# We shouldn't generate an invalid escape sequence
|
|
if key == chr(0x00a0):
|
|
self.assertEqual(BAD, i_gsm)
|
|
else:
|
|
self.assertEqual(MAP[key][1], i_gsm)
|
|
|
|
def test_encoding_supported_greek_unicode_gsm(self):
|
|
# Note: Conversion is one way, hence no corresponding decode test
|
|
|
|
for key in list(GREEK_MAP.keys()):
|
|
# Use 'replace' so that we trigger the mapping
|
|
s_gsm = key.encode('gsm0338', 'replace')
|
|
|
|
if len(s_gsm) == 1:
|
|
i_gsm = ord(s_gsm)
|
|
else:
|
|
i_gsm = BAD # so we see the comparison, not an exception
|
|
|
|
self.assertEqual(GREEK_MAP[key][1], i_gsm)
|
|
|
|
def test_encoding_supported_quirk_unicode_gsm(self):
|
|
# Note: Conversion is one way, hence no corresponding decode test
|
|
|
|
for key in list(QUIRK_MAP.keys()):
|
|
# Use 'replace' so that we trigger the mapping
|
|
s_gsm = key.encode('gsm0338', 'replace')
|
|
|
|
if len(s_gsm) == 1:
|
|
i_gsm = ord(s_gsm)
|
|
else:
|
|
i_gsm = BAD # so we see the comparison, not an exception
|
|
|
|
self.assertEqual(QUIRK_MAP[key][1], i_gsm)
|
|
|
|
def test_decoding_supported_unicode_gsm(self):
|
|
for key in list(MAP.keys()):
|
|
i_gsm = MAP[key][1]
|
|
if i_gsm <= 0xff:
|
|
s_gsm = chr(i_gsm)
|
|
elif i_gsm <= 0xffff:
|
|
s_gsm = chr((i_gsm & 0xff00) >> 8)
|
|
s_gsm += chr(i_gsm & 0x00ff)
|
|
|
|
s_unicode = s_gsm.decode('gsm0338', 'strict')
|
|
self.assertEqual(MAP[key][0], ord(s_unicode))
|
|
|
|
def test_is_gsm_text_true(self):
|
|
for key in list(MAP.keys()):
|
|
if key == chr(0x00a0):
|
|
continue
|
|
self.assertEqual(messaging.sms.gsm0338.is_gsm_text(key), True)
|
|
|
|
def test_is_gsm_text_false(self):
|
|
self.assertEqual(
|
|
messaging.sms.gsm0338.is_gsm_text(chr(0x00a0)), False)
|
|
|
|
for i in range(1, 0xffff + 1):
|
|
if chr(i) not in MAP:
|
|
# Note: it's a little odd, but on error we want to see values
|
|
if messaging.sms.gsm0338.is_gsm_text(chr(i)) is not False:
|
|
self.assertEqual(BAD, i)
|