djing/messaging/test/test_gsm_encoding.py


								# -*- coding: utf-8 -*-

								# Copyright (C) 2011  Sphere Systems Ltd

								# Author:  Andrew Bird

								#

								# This program is free software; you can redistribute it and/or modify

								# it under the terms of the GNU General Public License as published by

								# the Free Software Foundation; either version 2 of the License, or

								# (at your option) any later version.

								#

								# This program is distributed in the hope that it will be useful,

								# but WITHOUT ANY WARRANTY; without even the implied warranty of

								# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								# GNU General Public License for more details.

								#

								# You should have received a copy of the GNU General Public License along

								# with this program; if not, write to the Free Software Foundation, Inc.,

								# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

								"""Unittests for the gsm encoding/decoding module"""


								import unittest

								import messaging.sms.gsm0338  # imports GSM7 codec


								# Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT

								MAP = {

								#    unichr(0x0000): (0x0000, 0x00),  # Null

								    '@': (0x0040, 0x00),

								    '£': (0x00a3, 0x01),

								    '$': (0x0024, 0x02),

								    '¥': (0x00a5, 0x03),

								    'è': (0x00e8, 0x04),

								    'é': (0x00e9, 0x05),

								    'ù': (0x00f9, 0x06),

								    'ì': (0x00ec, 0x07),

								    'ò': (0x00f2, 0x08),

								    'Ç': (0x00c7, 0x09),  #   LATIN CAPITAL LETTER C WITH CEDILLA

								    chr(0x000a): (0x000a, 0x0a),  # Linefeed

								    'Ø': (0x00d8, 0x0b),

								    'ø': (0x00f8, 0x0c),

								    chr(0x000d): (0x000d, 0x0d),  # Carriage return

								    'Å': (0x00c5, 0x0e),

								    'å': (0x00e5, 0x0f),

								    'Δ': (0x0394, 0x10),

								    '_': (0x005f, 0x11),

								    'Φ': (0x03a6, 0x12),

								    'Γ': (0x0393, 0x13),

								    'Λ': (0x039b, 0x14),

								    'Ω': (0x03a9, 0x15),

								    'Π': (0x03a0, 0x16),

								    'Ψ': (0x03a8, 0x17),

								    'Σ': (0x03a3, 0x18),

								    'Θ': (0x0398, 0x19),

								    'Ξ': (0x039e, 0x1a),

								    chr(0x00a0): (0x00a0, 0x1b),  #  Escape to extension table (displayed

								                                     #  as NBSP, on decode of invalid escape

								                                     #  sequence)

								    'Æ': (0x00c6, 0x1c),

								    'æ': (0x00e6, 0x1d),

								    'ß': (0x00df, 0x1e),

								    'É': (0x00c9, 0x1f),

								    ' ': (0x0020, 0x20),

								    '!': (0x0021, 0x21),

								    '"': (0x0022, 0x22),

								    '#': (0x0023, 0x23),

								    '¤': (0x00a4, 0x24),

								    '%': (0x0025, 0x25),

								    '&': (0x0026, 0x26),

								    '\'': (0x0027, 0x27),

								    '{': (0x007b, 0x1b28),

								    '}': (0x007d, 0x1b29),

								    '*': (0x002a, 0x2a),

								    '+': (0x002b, 0x2b),

								    ',': (0x002c, 0x2c),

								    '-': (0x002d, 0x2d),

								    '.': (0x002e, 0x2e),

								    '\\': (0x005c, 0x1b2f),

								    '0': (0x0030, 0x30),

								    '1': (0x0031, 0x31),

								    '2': (0x0032, 0x32),

								    '3': (0x0033, 0x33),

								    '4': (0x0034, 0x34),

								    '5': (0x0035, 0x35),

								    '6': (0x0036, 0x36),

								    '7': (0x0037, 0x37),

								    '8': (0x0038, 0x38),

								    '9': (0x0039, 0x39),

								    ':': (0x003a, 0x3a),

								    ';': (0x003b, 0x3b),

								    '[': (0x005b, 0x1b3c),

								    chr(0x000c): (0x000c, 0x1b0a),  # Formfeed

								    ']': (0x005d, 0x1b3e),

								    '?': (0x003f, 0x3f),

								    '|': (0x007c, 0x1b40),

								    'A': (0x0041, 0x41),

								    'B': (0x0042, 0x42),

								    'C': (0x0043, 0x43),

								    'D': (0x0044, 0x44),

								    'E': (0x0045, 0x45),

								    'F': (0x0046, 0x46),

								    'G': (0x0047, 0x47),

								    'H': (0x0048, 0x48),

								    'I': (0x0049, 0x49),

								    'J': (0x004a, 0x4a),

								    'K': (0x004b, 0x4b),

								    'L': (0x004c, 0x4c),

								    'M': (0x004d, 0x4d),

								    'N': (0x004e, 0x4e),

								    'O': (0x004f, 0x4f),

								    'P': (0x0050, 0x50),

								    'Q': (0x0051, 0x51),

								    'R': (0x0052, 0x52),

								    'S': (0x0053, 0x53),

								    'T': (0x0054, 0x54),

								    'U': (0x0055, 0x55),

								    'V': (0x0056, 0x56),

								    'W': (0x0057, 0x57),

								    'X': (0x0058, 0x58),

								    'Y': (0x0059, 0x59),

								    'Z': (0x005a, 0x5a),

								    'Ä': (0x00c4, 0x5b),

								    'Ö': (0x00d6, 0x5c),

								    'Ñ': (0x00d1, 0x5d),

								    'Ü': (0x00dc, 0x5e),

								    '§': (0x00a7, 0x5f),

								    '¿': (0x00bf, 0x60),

								    'a': (0x0061, 0x61),

								    'b': (0x0062, 0x62),

								    'c': (0x0063, 0x63),

								    'd': (0x0064, 0x64),

								    '€': (0x20ac, 0x1b65),

								    'f': (0x0066, 0x66),

								    'g': (0x0067, 0x67),

								    'h': (0x0068, 0x68),

								    '<': (0x003c, 0x3c),

								    'j': (0x006a, 0x6a),

								    'k': (0x006b, 0x6b),

								    'l': (0x006c, 0x6c),

								    'm': (0x006d, 0x6d),

								    'n': (0x006e, 0x6e),

								    '~': (0x007e, 0x1b3d),

								    'p': (0x0070, 0x70),

								    'q': (0x0071, 0x71),

								    'r': (0x0072, 0x72),

								    's': (0x0073, 0x73),

								    't': (0x0074, 0x74),

								    '>': (0x003e, 0x3e),

								    'v': (0x0076, 0x76),

								    'i': (0x0069, 0x69),

								    'x': (0x0078, 0x78),

								    '^': (0x005e, 0x1b14),

								    'z': (0x007a, 0x7a),

								    'ä': (0x00e4, 0x7b),

								    'ö': (0x00f6, 0x7c),

								    'ñ': (0x00f1, 0x7d),

								    'ü': (0x00fc, 0x7e),

								    'à': (0x00e0, 0x7f),

								    '¡': (0x00a1, 0x40),

								    '/': (0x002f, 0x2f),

								    'o': (0x006f, 0x6f),

								    'u': (0x0075, 0x75),

								    'w': (0x0077, 0x77),

								    'y': (0x0079, 0x79),

								    'e': (0x0065, 0x65),

								    '=': (0x003d, 0x3d),

								    '(': (0x0028, 0x28),

								    ')': (0x0029, 0x29),

								}


								GREEK_MAP = {  # Note: these might look like Latin uppercase, but they aren't

								    'Α': (0x0391, 0x41),

								    'Β': (0x0392, 0x42),

								    'Ε': (0x0395, 0x45),

								    'Η': (0x0397, 0x48),

								    'Ι': (0x0399, 0x49),

								    'Κ': (0x039a, 0x4b),

								    'Μ': (0x039c, 0x4d),

								    'Ν': (0x039d, 0x4e),

								    'Ο': (0x039f, 0x4f),

								    'Ρ': (0x03a1, 0x50),

								    'Τ': (0x03a4, 0x54),

								    'Χ': (0x03a7, 0x58),

								    'Υ': (0x03a5, 0x59),

								    'Ζ': (0x0396, 0x5a),

								}


								QUIRK_MAP = {

								    'ç': (0x00e7, 0x09),

								}


								BAD = -1


								class TestEncodingFunctions(unittest.TestCase):


								    def test_encoding_supported_unicode_gsm(self):


								        for key in list(MAP.keys()):

								            # Use 'ignore' so that we see the code tested, not an exception

								            s_gsm = key.encode('gsm0338', 'ignore')


								            if len(s_gsm) == 1:

								                i_gsm = ord(s_gsm)

								            elif len(s_gsm) == 2:

								                i_gsm = (ord(s_gsm[0]) << 8) + ord(s_gsm[1])

								            else:

								                i_gsm = BAD  # so we see the comparison, not an exception


								            # We shouldn't generate an invalid escape sequence

								            if key == chr(0x00a0):

								                self.assertEqual(BAD, i_gsm)

								            else:

								                self.assertEqual(MAP[key][1], i_gsm)


								    def test_encoding_supported_greek_unicode_gsm(self):

								        # Note: Conversion is one way, hence no corresponding decode test


								        for key in list(GREEK_MAP.keys()):

								            # Use 'replace' so that we trigger the mapping

								            s_gsm = key.encode('gsm0338', 'replace')


								            if len(s_gsm) == 1:

								                i_gsm = ord(s_gsm)

								            else:

								                i_gsm = BAD  # so we see the comparison, not an exception


								            self.assertEqual(GREEK_MAP[key][1], i_gsm)


								    def test_encoding_supported_quirk_unicode_gsm(self):

								        # Note: Conversion is one way, hence no corresponding decode test


								        for key in list(QUIRK_MAP.keys()):

								            # Use 'replace' so that we trigger the mapping

								            s_gsm = key.encode('gsm0338', 'replace')


								            if len(s_gsm) == 1:

								                i_gsm = ord(s_gsm)

								            else:

								                i_gsm = BAD  # so we see the comparison, not an exception


								            self.assertEqual(QUIRK_MAP[key][1], i_gsm)


								    def test_decoding_supported_unicode_gsm(self):

								        for key in list(MAP.keys()):

								            i_gsm = MAP[key][1]

								            if i_gsm <= 0xff:

								                s_gsm = chr(i_gsm)

								            elif i_gsm <= 0xffff:

								                s_gsm = chr((i_gsm & 0xff00) >> 8)

								                s_gsm += chr(i_gsm & 0x00ff)


								            s_unicode = s_gsm.decode('gsm0338', 'strict')

								            self.assertEqual(MAP[key][0], ord(s_unicode))


								    def test_is_gsm_text_true(self):

								        for key in list(MAP.keys()):

								            if key == chr(0x00a0):

								                continue

								            self.assertEqual(messaging.sms.gsm0338.is_gsm_text(key), True)


								    def test_is_gsm_text_false(self):

								        self.assertEqual(

								            messaging.sms.gsm0338.is_gsm_text(chr(0x00a0)), False)


								        for i in range(1, 0xffff + 1):

								            if chr(i) not in MAP:

								                # Note: it's a little odd, but on error we want to see values

								                if messaging.sms.gsm0338.is_gsm_text(chr(i)) is not False:

								                    self.assertEqual(BAD, i)