PageRenderTime 78ms CodeModel.GetById 26ms app.highlight 45ms RepoModel.GetById 1ms app.codeStats 0ms

/protocols/smpp/src/main/java/org/mobicents/protocols/smpp/encoding/DefaultAlphabetEncoding.java

http://mobicents.googlecode.com/
Java | 271 lines | 237 code | 6 blank | 28 comment | 0 complexity | 9b15cacfd66f132f43380443426581df MD5 | raw file
  1/*
  2 * JBoss, Home of Professional Open Source
  3 * Copyright 2011, Red Hat, Inc. and individual contributors
  4 * by the @authors tag. See the copyright.txt in the distribution for a
  5 * full listing of individual contributors.
  6 *
  7 * This is free software; you can redistribute it and/or modify it
  8 * under the terms of the GNU Lesser General Public License as
  9 * published by the Free Software Foundation; either version 2.1 of
 10 * the License, or (at your option) any later version.
 11 *
 12 * This software is distributed in the hope that it will be useful,
 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 15 * Lesser General Public License for more details.
 16 *
 17 * You should have received a copy of the GNU Lesser General Public
 18 * License along with this software; if not, write to the Free
 19 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 20 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 21 */
 22
 23package org.mobicents.protocols.smpp.encoding;
 24
 25import java.io.ByteArrayOutputStream;
 26
 27/**
 28 * This class encodes and decodes Java Strings to and from the SMS default
 29 * alphabet. It also supports the default extension table. The default alphabet
 30 * and it's extension table is defined in GSM 03.38.
 31 * @version $Id: DefaultAlphabetEncoding.java 452 2009-01-15 16:56:36Z orank $
 32 */
 33public class DefaultAlphabetEncoding extends AlphabetEncoding {
 34    private static final int DCS = 0;
 35
 36    public static final int EXTENDED_ESCAPE = 0x1b;
 37
 38    /** Page break (extended table). */
 39    public static final int PAGE_BREAK = 0x0a;
 40
 41    protected final char[] CHAR_TABLE = {
 42        '@',      '\u00a3', '$',      '\u00a5', '\u00e8', '\u00e9', '\u00f9', '\u00ec',
 43        '\u00f2', '\u00c7', '\n',     '\u00d8', '\u00f8', '\r',     '\u00c5', '\u00e5',
 44        '\u0394', '_',      '\u03a6', '\u0393', '\u039b', '\u03a9', '\u03a0', '\u03a8',
 45        '\u03a3', '\u0398', '\u039e', ' ',      '\u00c6', '\u00e6', '\u00df', '\u00c9',
 46        ' ',      '!',      '"',      '#',      '\u00a4', '%',      '&',      '\'',
 47        '(',      ')',      '*',      '+',      ',',      '-',      '.',      '/',
 48        '0',      '1',      '2',      '3',      '4',      '5',      '6',      '7',
 49        '8',      '9',      ':',      ';',      '<',      '=',      '>',      '?',
 50        '\u00a1', 'A',      'B',      'C',      'D',      'E',      'F',      'G',
 51        'H',      'I',      'J',      'K',      'L',      'M',      'N',      'O',
 52        'P',      'Q',      'R',      'S',      'T',      'U',      'V',      'W',
 53        'X',      'Y',      'Z',      '\u00c4', '\u00d6', '\u00d1', '\u00dc', '\u00a7',
 54        '\u00bf', 'a',      'b',      'c',      'd',      'e',      'f',      'g',
 55        'h',      'i',      'j',      'k',      'l',      'm',      'n',      'o',
 56        'p',      'q',      'r',      's',      't',      'u',      'v',      'w',
 57        'x',      'y',      'z',      '\u00e4', '\u00f6', '\u00f1', '\u00fc', '\u00e0',
 58    };
 59
 60    /**
 61     * Extended character table. Characters in this table are accessed by the
 62     * 'escape' character in the base table. It is important that none of the
 63     * 'inactive' characters ever be matchable with a valid base-table
 64     * character as this breaks the encoding loop.
 65     * @see #EXTENDED_ESCAPE
 66     */
 67    protected final char[] EXT_CHAR_TABLE = {
 68            0, 0, 0, 0, 0, 0, 0, 0,
 69            0, 0, 0, 0, 0, 0, 0, 0,
 70            0, 0, 0, 0, '^', 0, 0, 0,
 71            0, 0, 0, 0, 0, 0, 0, 0,
 72            0, 0, 0, 0, 0, 0, 0, 0,
 73            '{', '}', 0, 0, 0, 0, 0, '\\',
 74            0, 0, 0, 0, 0, 0, 0, 0,
 75            0, 0, 0, 0, '[', '~', ']', 0,
 76            '|', 0, 0, 0, 0, 0, 0, 0,
 77            0, 0, 0, 0, 0, 0, 0, 0,
 78            0, 0, 0, 0, 0, 0, 0, 0,
 79            0, 0, 0, 0, 0, 0, 0, 0,
 80            0, 0, 0, 0, 0, '\u20ac', 0, 0,
 81            0, 0, 0, 0, 0, 0, 0, 0,
 82            0, 0, 0, 0, 0, 0, 0, 0,
 83            0, 0, 0, 0, 0, 0, 0, 0,
 84    };
 85
 86    /**
 87     * @see #setUnknownCharReplacement(int)
 88     */
 89    private int unknownCharReplacement = 0x3f;
 90    
 91    public DefaultAlphabetEncoding() {
 92        super(DCS);
 93    }
 94
 95    /**
 96     * Set the byte to use when there is no code point for a Unicode character.
 97     * This byte will be inserted into an encoded byte array if the String
 98     * being encoded contains a character that the GSM default alphabet
 99     * has no code point for. The default is to insert the code point for
100     * the '?' character - that is, byte 0x3f.
101     * @param unknownCharReplacement A code point for one of the characters
102     * in the basic character table.
103     * @throws IllegalArgumentException If <code>0 &lt; unknownCharReplacement
104     * &lt; 127</code> or <code>unknownCharReplacement</code> is <code>0x1b
105     * </code> (the extended escape character).
106     */
107    public void setUnknownCharReplacement(int unknownCharReplacement) {
108        if (unknownCharReplacement < 0 || unknownCharReplacement > 127
109                || unknownCharReplacement == EXTENDED_ESCAPE) {
110            throw new IllegalArgumentException(
111                    "Illegal replacement code point " + unknownCharReplacement);
112        }
113        this.unknownCharReplacement = unknownCharReplacement;
114    }
115    
116    /**
117     * Get the current code point in use for unknown characters.
118     * @return The current code point in use for unknown characters.
119     * @see #setUnknownCharReplacement(int)
120     */
121    public int getUnknownCharReplacement() {
122        return unknownCharReplacement;
123    }
124    
125    /**
126     * Decode an SMS default alphabet-encoded octet string into a Java String.
127     */
128    @Override
129    public String decode(byte[] data, int offset, int length) {
130        if (data == null) {
131            throw new NullPointerException("Data cannot be null");
132        }
133        char[] table = CHAR_TABLE;
134        StringBuffer buf = new StringBuffer();
135
136        for (int i = offset; i < (offset + length); i++) {
137            int code = (int) data[i] & 0x000000ff;
138            if (code == EXTENDED_ESCAPE) {
139                // take next char from extension table
140                table = EXT_CHAR_TABLE;
141            } else {
142                if (code >= table.length) {
143                    code = unknownCharReplacement;
144                }
145                buf.append(table[code]);
146                // Go back to the default table.
147                table = CHAR_TABLE;
148            }
149        }
150
151        return buf.toString();
152    }
153
154    /**
155     * Encode a Java String into a byte array using the SMS Default alphabet.
156     */
157    @Override
158    public byte[] encode(String s) {
159        if (s == null) {
160            return new byte[0];
161        }
162
163        char[] c = s.toCharArray();
164        ByteArrayOutputStream enc = new ByteArrayOutputStream(256);
165
166        for (int loop = 0; loop < c.length; loop++) {
167            int search = 0;
168            for (; search < CHAR_TABLE.length; search++) {
169                if (search == EXTENDED_ESCAPE) {
170                    continue;
171                }
172
173                if (c[loop] == CHAR_TABLE[search]) {
174                    enc.write((byte) search);
175                    break;
176               }
177
178                if (c[loop] == EXT_CHAR_TABLE[search]) {
179                    enc.write((byte) EXTENDED_ESCAPE);
180                    enc.write((byte) search);
181                    break;
182               }
183            }
184            if (search == CHAR_TABLE.length) {
185                enc.write((byte) unknownCharReplacement);
186            }
187        }
188
189        return enc.toByteArray();
190    }
191
192    public int getCharSize() {
193        return 7;
194    }
195
196    /**
197     * Pack a byte array according to the GSM bit-packing algorithm.
198     * The GSM specification defines a simple compression mechanism for its
199     * default alphabet to pack more message characters into a smaller space.
200     * Since the alphabet only contains 128 symbols, each one can be represented
201     * in 7 bits. The packing algorithm squeezes the bits for each symbol
202     * "down" into the preceeding byte (so bit 7 of the first byte actually
203     * contains bit 0 of the second symbol in a default alphabet string, bits
204     * 6 and 7 in the second byte contain bits 0 and 1 of the third symbol etc.)
205     * Since the maximum short message length is 140 <b>bytes</b>, you save
206     * one bit per byte using the default alphabet giving you a total of
207     * 140 + (140 / 8) = 160 characters to use. This is where the 160 character
208     * limit comes from in SMPP packets.
209     * <p>
210     * Having said all that, most SMSCs do <b>NOT</b> use the packing
211     * algorithm when communicating over TCP/IP. They either use a full
212     * 8-bit alphabet such as ASCII or Latin-1, or they accept the default
213     * alphabet in its unpacked form. As such, you will be unlikely to
214     * need this method.
215     * </p>
216     * @param unpacked The unpacked byte array. 
217     * @return A new byte array containing the bytes in their packed form.
218     */
219    public byte[] pack(byte[] unpacked) {
220        int packedLen = unpacked.length - (unpacked.length / 8);
221        byte[] packed = new byte[packedLen];
222        if (unpacked.length == 0) {
223            return packed;
224        }
225        for (int i = 0, j = -1; i < packed.length; i++, j++) {
226            int shiftRight = i % 7;
227            int shiftLeft = 8 - (shiftRight + 1);
228            if (shiftRight == 0) {
229                j++;
230            }
231            int b = ((int) unpacked[j] & 0xff) >>> shiftRight;
232            if (j + 1 < unpacked.length) {
233                b |= (((int) unpacked[j + 1]) & 0xff) << shiftLeft;
234            }
235            packed[i] = (byte) b;
236        }
237        return packed;
238    }
239
240    /**
241     * Unpack a byte array according to the GSM bit-packing algorithm.
242     * Read the full description in the documentation of the
243     * <code>pack</code> method.
244     * @see #pack(byte[])
245     * @param packed The packed byte array.
246     * @return A new byte array containing the unpacked bytes.
247     */
248    public byte[] unpack(byte[] packed) {
249        int unpackedLen = (packed.length * 8) / 7;
250        byte[] unpacked = new byte[unpackedLen];
251        if (packed.length == 0) {
252            return unpacked;
253        }
254        for (int i = 0, j = 0; i < packed.length; i++, j++) {
255            int shiftLeft = i % 7;
256            int shiftRight = 8 - shiftLeft;
257            if (shiftLeft == 0) {
258                unpacked[j] = (byte) ((int) packed[i] & 0x7f);
259            } else {
260                int b = ((int) packed[i - 1] & 0xff) >>> shiftRight;
261                b |= ((int) packed[i] << shiftLeft) & 0x7f;
262                unpacked[j] = (byte) b;
263                if (shiftLeft == 6) {
264                    j++;
265                    unpacked[j] = (byte) (((int) packed[i] & 0xff) >>> 1);
266                }
267            }
268        }
269        return unpacked;
270    }
271}