/protocols/smpp/src/main/java/org/mobicents/protocols/smpp/encoding/DefaultAlphabetEncoding.java
Java | 271 lines | 237 code | 6 blank | 28 comment | 0 complexity | 9b15cacfd66f132f43380443426581df MD5 | raw file
1/* 2 * JBoss, Home of Professional Open Source 3 * Copyright 2011, Red Hat, Inc. and individual contributors 4 * by the @authors tag. See the copyright.txt in the distribution for a 5 * full listing of individual contributors. 6 * 7 * This is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU Lesser General Public License as 9 * published by the Free Software Foundation; either version 2.1 of 10 * the License, or (at your option) any later version. 11 * 12 * This software is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this software; if not, write to the Free 19 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 20 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 21 */ 22 23package org.mobicents.protocols.smpp.encoding; 24 25import java.io.ByteArrayOutputStream; 26 27/** 28 * This class encodes and decodes Java Strings to and from the SMS default 29 * alphabet. It also supports the default extension table. The default alphabet 30 * and it's extension table is defined in GSM 03.38. 31 * @version $Id: DefaultAlphabetEncoding.java 452 2009-01-15 16:56:36Z orank $ 32 */ 33public class DefaultAlphabetEncoding extends AlphabetEncoding { 34 private static final int DCS = 0; 35 36 public static final int EXTENDED_ESCAPE = 0x1b; 37 38 /** Page break (extended table). */ 39 public static final int PAGE_BREAK = 0x0a; 40 41 protected final char[] CHAR_TABLE = { 42 '@', '\u00a3', '$', '\u00a5', '\u00e8', '\u00e9', '\u00f9', '\u00ec', 43 '\u00f2', '\u00c7', '\n', '\u00d8', '\u00f8', '\r', '\u00c5', '\u00e5', 44 '\u0394', '_', '\u03a6', '\u0393', '\u039b', '\u03a9', '\u03a0', '\u03a8', 45 '\u03a3', '\u0398', '\u039e', ' ', '\u00c6', '\u00e6', '\u00df', '\u00c9', 46 ' ', '!', '"', '#', '\u00a4', '%', '&', '\'', 47 '(', ')', '*', '+', ',', '-', '.', '/', 48 '0', '1', '2', '3', '4', '5', '6', '7', 49 '8', '9', ':', ';', '<', '=', '>', '?', 50 '\u00a1', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 51 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 52 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 53 'X', 'Y', 'Z', '\u00c4', '\u00d6', '\u00d1', '\u00dc', '\u00a7', 54 '\u00bf', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 55 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 56 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 57 'x', 'y', 'z', '\u00e4', '\u00f6', '\u00f1', '\u00fc', '\u00e0', 58 }; 59 60 /** 61 * Extended character table. Characters in this table are accessed by the 62 * 'escape' character in the base table. It is important that none of the 63 * 'inactive' characters ever be matchable with a valid base-table 64 * character as this breaks the encoding loop. 65 * @see #EXTENDED_ESCAPE 66 */ 67 protected final char[] EXT_CHAR_TABLE = { 68 0, 0, 0, 0, 0, 0, 0, 0, 69 0, 0, 0, 0, 0, 0, 0, 0, 70 0, 0, 0, 0, '^', 0, 0, 0, 71 0, 0, 0, 0, 0, 0, 0, 0, 72 0, 0, 0, 0, 0, 0, 0, 0, 73 '{', '}', 0, 0, 0, 0, 0, '\\', 74 0, 0, 0, 0, 0, 0, 0, 0, 75 0, 0, 0, 0, '[', '~', ']', 0, 76 '|', 0, 0, 0, 0, 0, 0, 0, 77 0, 0, 0, 0, 0, 0, 0, 0, 78 0, 0, 0, 0, 0, 0, 0, 0, 79 0, 0, 0, 0, 0, 0, 0, 0, 80 0, 0, 0, 0, 0, '\u20ac', 0, 0, 81 0, 0, 0, 0, 0, 0, 0, 0, 82 0, 0, 0, 0, 0, 0, 0, 0, 83 0, 0, 0, 0, 0, 0, 0, 0, 84 }; 85 86 /** 87 * @see #setUnknownCharReplacement(int) 88 */ 89 private int unknownCharReplacement = 0x3f; 90 91 public DefaultAlphabetEncoding() { 92 super(DCS); 93 } 94 95 /** 96 * Set the byte to use when there is no code point for a Unicode character. 97 * This byte will be inserted into an encoded byte array if the String 98 * being encoded contains a character that the GSM default alphabet 99 * has no code point for. The default is to insert the code point for 100 * the '?' character - that is, byte 0x3f. 101 * @param unknownCharReplacement A code point for one of the characters 102 * in the basic character table. 103 * @throws IllegalArgumentException If <code>0 < unknownCharReplacement 104 * < 127</code> or <code>unknownCharReplacement</code> is <code>0x1b 105 * </code> (the extended escape character). 106 */ 107 public void setUnknownCharReplacement(int unknownCharReplacement) { 108 if (unknownCharReplacement < 0 || unknownCharReplacement > 127 109 || unknownCharReplacement == EXTENDED_ESCAPE) { 110 throw new IllegalArgumentException( 111 "Illegal replacement code point " + unknownCharReplacement); 112 } 113 this.unknownCharReplacement = unknownCharReplacement; 114 } 115 116 /** 117 * Get the current code point in use for unknown characters. 118 * @return The current code point in use for unknown characters. 119 * @see #setUnknownCharReplacement(int) 120 */ 121 public int getUnknownCharReplacement() { 122 return unknownCharReplacement; 123 } 124 125 /** 126 * Decode an SMS default alphabet-encoded octet string into a Java String. 127 */ 128 @Override 129 public String decode(byte[] data, int offset, int length) { 130 if (data == null) { 131 throw new NullPointerException("Data cannot be null"); 132 } 133 char[] table = CHAR_TABLE; 134 StringBuffer buf = new StringBuffer(); 135 136 for (int i = offset; i < (offset + length); i++) { 137 int code = (int) data[i] & 0x000000ff; 138 if (code == EXTENDED_ESCAPE) { 139 // take next char from extension table 140 table = EXT_CHAR_TABLE; 141 } else { 142 if (code >= table.length) { 143 code = unknownCharReplacement; 144 } 145 buf.append(table[code]); 146 // Go back to the default table. 147 table = CHAR_TABLE; 148 } 149 } 150 151 return buf.toString(); 152 } 153 154 /** 155 * Encode a Java String into a byte array using the SMS Default alphabet. 156 */ 157 @Override 158 public byte[] encode(String s) { 159 if (s == null) { 160 return new byte[0]; 161 } 162 163 char[] c = s.toCharArray(); 164 ByteArrayOutputStream enc = new ByteArrayOutputStream(256); 165 166 for (int loop = 0; loop < c.length; loop++) { 167 int search = 0; 168 for (; search < CHAR_TABLE.length; search++) { 169 if (search == EXTENDED_ESCAPE) { 170 continue; 171 } 172 173 if (c[loop] == CHAR_TABLE[search]) { 174 enc.write((byte) search); 175 break; 176 } 177 178 if (c[loop] == EXT_CHAR_TABLE[search]) { 179 enc.write((byte) EXTENDED_ESCAPE); 180 enc.write((byte) search); 181 break; 182 } 183 } 184 if (search == CHAR_TABLE.length) { 185 enc.write((byte) unknownCharReplacement); 186 } 187 } 188 189 return enc.toByteArray(); 190 } 191 192 public int getCharSize() { 193 return 7; 194 } 195 196 /** 197 * Pack a byte array according to the GSM bit-packing algorithm. 198 * The GSM specification defines a simple compression mechanism for its 199 * default alphabet to pack more message characters into a smaller space. 200 * Since the alphabet only contains 128 symbols, each one can be represented 201 * in 7 bits. The packing algorithm squeezes the bits for each symbol 202 * "down" into the preceeding byte (so bit 7 of the first byte actually 203 * contains bit 0 of the second symbol in a default alphabet string, bits 204 * 6 and 7 in the second byte contain bits 0 and 1 of the third symbol etc.) 205 * Since the maximum short message length is 140 <b>bytes</b>, you save 206 * one bit per byte using the default alphabet giving you a total of 207 * 140 + (140 / 8) = 160 characters to use. This is where the 160 character 208 * limit comes from in SMPP packets. 209 * <p> 210 * Having said all that, most SMSCs do <b>NOT</b> use the packing 211 * algorithm when communicating over TCP/IP. They either use a full 212 * 8-bit alphabet such as ASCII or Latin-1, or they accept the default 213 * alphabet in its unpacked form. As such, you will be unlikely to 214 * need this method. 215 * </p> 216 * @param unpacked The unpacked byte array. 217 * @return A new byte array containing the bytes in their packed form. 218 */ 219 public byte[] pack(byte[] unpacked) { 220 int packedLen = unpacked.length - (unpacked.length / 8); 221 byte[] packed = new byte[packedLen]; 222 if (unpacked.length == 0) { 223 return packed; 224 } 225 for (int i = 0, j = -1; i < packed.length; i++, j++) { 226 int shiftRight = i % 7; 227 int shiftLeft = 8 - (shiftRight + 1); 228 if (shiftRight == 0) { 229 j++; 230 } 231 int b = ((int) unpacked[j] & 0xff) >>> shiftRight; 232 if (j + 1 < unpacked.length) { 233 b |= (((int) unpacked[j + 1]) & 0xff) << shiftLeft; 234 } 235 packed[i] = (byte) b; 236 } 237 return packed; 238 } 239 240 /** 241 * Unpack a byte array according to the GSM bit-packing algorithm. 242 * Read the full description in the documentation of the 243 * <code>pack</code> method. 244 * @see #pack(byte[]) 245 * @param packed The packed byte array. 246 * @return A new byte array containing the unpacked bytes. 247 */ 248 public byte[] unpack(byte[] packed) { 249 int unpackedLen = (packed.length * 8) / 7; 250 byte[] unpacked = new byte[unpackedLen]; 251 if (packed.length == 0) { 252 return unpacked; 253 } 254 for (int i = 0, j = 0; i < packed.length; i++, j++) { 255 int shiftLeft = i % 7; 256 int shiftRight = 8 - shiftLeft; 257 if (shiftLeft == 0) { 258 unpacked[j] = (byte) ((int) packed[i] & 0x7f); 259 } else { 260 int b = ((int) packed[i - 1] & 0xff) >>> shiftRight; 261 b |= ((int) packed[i] << shiftLeft) & 0x7f; 262 unpacked[j] = (byte) b; 263 if (shiftLeft == 6) { 264 j++; 265 unpacked[j] = (byte) (((int) packed[i] & 0xff) >>> 1); 266 } 267 } 268 } 269 return unpacked; 270 } 271}