PageRenderTime 38ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/protocols/smpp/src/main/java/org/mobicents/protocols/smpp/encoding/DefaultAlphabetEncoding.java

http://mobicents.googlecode.com/
Java | 271 lines | 237 code | 6 blank | 28 comment | 0 complexity | 9b15cacfd66f132f43380443426581df MD5 | raw file
Possible License(s): LGPL-3.0, GPL-3.0, LGPL-2.1, GPL-2.0, CC-BY-SA-3.0, CC0-1.0, Apache-2.0, BSD-3-Clause
  1. /*
  2. * JBoss, Home of Professional Open Source
  3. * Copyright 2011, Red Hat, Inc. and individual contributors
  4. * by the @authors tag. See the copyright.txt in the distribution for a
  5. * full listing of individual contributors.
  6. *
  7. * This is free software; you can redistribute it and/or modify it
  8. * under the terms of the GNU Lesser General Public License as
  9. * published by the Free Software Foundation; either version 2.1 of
  10. * the License, or (at your option) any later version.
  11. *
  12. * This software is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this software; if not, write to the Free
  19. * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  20. * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
  21. */
  22. package org.mobicents.protocols.smpp.encoding;
  23. import java.io.ByteArrayOutputStream;
  24. /**
  25. * This class encodes and decodes Java Strings to and from the SMS default
  26. * alphabet. It also supports the default extension table. The default alphabet
  27. * and it's extension table is defined in GSM 03.38.
  28. * @version $Id: DefaultAlphabetEncoding.java 452 2009-01-15 16:56:36Z orank $
  29. */
  30. public class DefaultAlphabetEncoding extends AlphabetEncoding {
  31. private static final int DCS = 0;
  32. public static final int EXTENDED_ESCAPE = 0x1b;
  33. /** Page break (extended table). */
  34. public static final int PAGE_BREAK = 0x0a;
  35. protected final char[] CHAR_TABLE = {
  36. '@', '\u00a3', '$', '\u00a5', '\u00e8', '\u00e9', '\u00f9', '\u00ec',
  37. '\u00f2', '\u00c7', '\n', '\u00d8', '\u00f8', '\r', '\u00c5', '\u00e5',
  38. '\u0394', '_', '\u03a6', '\u0393', '\u039b', '\u03a9', '\u03a0', '\u03a8',
  39. '\u03a3', '\u0398', '\u039e', ' ', '\u00c6', '\u00e6', '\u00df', '\u00c9',
  40. ' ', '!', '"', '#', '\u00a4', '%', '&', '\'',
  41. '(', ')', '*', '+', ',', '-', '.', '/',
  42. '0', '1', '2', '3', '4', '5', '6', '7',
  43. '8', '9', ':', ';', '<', '=', '>', '?',
  44. '\u00a1', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
  45. 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
  46. 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
  47. 'X', 'Y', 'Z', '\u00c4', '\u00d6', '\u00d1', '\u00dc', '\u00a7',
  48. '\u00bf', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  49. 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  50. 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  51. 'x', 'y', 'z', '\u00e4', '\u00f6', '\u00f1', '\u00fc', '\u00e0',
  52. };
  53. /**
  54. * Extended character table. Characters in this table are accessed by the
  55. * 'escape' character in the base table. It is important that none of the
  56. * 'inactive' characters ever be matchable with a valid base-table
  57. * character as this breaks the encoding loop.
  58. * @see #EXTENDED_ESCAPE
  59. */
  60. protected final char[] EXT_CHAR_TABLE = {
  61. 0, 0, 0, 0, 0, 0, 0, 0,
  62. 0, 0, 0, 0, 0, 0, 0, 0,
  63. 0, 0, 0, 0, '^', 0, 0, 0,
  64. 0, 0, 0, 0, 0, 0, 0, 0,
  65. 0, 0, 0, 0, 0, 0, 0, 0,
  66. '{', '}', 0, 0, 0, 0, 0, '\\',
  67. 0, 0, 0, 0, 0, 0, 0, 0,
  68. 0, 0, 0, 0, '[', '~', ']', 0,
  69. '|', 0, 0, 0, 0, 0, 0, 0,
  70. 0, 0, 0, 0, 0, 0, 0, 0,
  71. 0, 0, 0, 0, 0, 0, 0, 0,
  72. 0, 0, 0, 0, 0, 0, 0, 0,
  73. 0, 0, 0, 0, 0, '\u20ac', 0, 0,
  74. 0, 0, 0, 0, 0, 0, 0, 0,
  75. 0, 0, 0, 0, 0, 0, 0, 0,
  76. 0, 0, 0, 0, 0, 0, 0, 0,
  77. };
  78. /**
  79. * @see #setUnknownCharReplacement(int)
  80. */
  81. private int unknownCharReplacement = 0x3f;
  82. public DefaultAlphabetEncoding() {
  83. super(DCS);
  84. }
  85. /**
  86. * Set the byte to use when there is no code point for a Unicode character.
  87. * This byte will be inserted into an encoded byte array if the String
  88. * being encoded contains a character that the GSM default alphabet
  89. * has no code point for. The default is to insert the code point for
  90. * the '?' character - that is, byte 0x3f.
  91. * @param unknownCharReplacement A code point for one of the characters
  92. * in the basic character table.
  93. * @throws IllegalArgumentException If <code>0 &lt; unknownCharReplacement
  94. * &lt; 127</code> or <code>unknownCharReplacement</code> is <code>0x1b
  95. * </code> (the extended escape character).
  96. */
  97. public void setUnknownCharReplacement(int unknownCharReplacement) {
  98. if (unknownCharReplacement < 0 || unknownCharReplacement > 127
  99. || unknownCharReplacement == EXTENDED_ESCAPE) {
  100. throw new IllegalArgumentException(
  101. "Illegal replacement code point " + unknownCharReplacement);
  102. }
  103. this.unknownCharReplacement = unknownCharReplacement;
  104. }
  105. /**
  106. * Get the current code point in use for unknown characters.
  107. * @return The current code point in use for unknown characters.
  108. * @see #setUnknownCharReplacement(int)
  109. */
  110. public int getUnknownCharReplacement() {
  111. return unknownCharReplacement;
  112. }
  113. /**
  114. * Decode an SMS default alphabet-encoded octet string into a Java String.
  115. */
  116. @Override
  117. public String decode(byte[] data, int offset, int length) {
  118. if (data == null) {
  119. throw new NullPointerException("Data cannot be null");
  120. }
  121. char[] table = CHAR_TABLE;
  122. StringBuffer buf = new StringBuffer();
  123. for (int i = offset; i < (offset + length); i++) {
  124. int code = (int) data[i] & 0x000000ff;
  125. if (code == EXTENDED_ESCAPE) {
  126. // take next char from extension table
  127. table = EXT_CHAR_TABLE;
  128. } else {
  129. if (code >= table.length) {
  130. code = unknownCharReplacement;
  131. }
  132. buf.append(table[code]);
  133. // Go back to the default table.
  134. table = CHAR_TABLE;
  135. }
  136. }
  137. return buf.toString();
  138. }
  139. /**
  140. * Encode a Java String into a byte array using the SMS Default alphabet.
  141. */
  142. @Override
  143. public byte[] encode(String s) {
  144. if (s == null) {
  145. return new byte[0];
  146. }
  147. char[] c = s.toCharArray();
  148. ByteArrayOutputStream enc = new ByteArrayOutputStream(256);
  149. for (int loop = 0; loop < c.length; loop++) {
  150. int search = 0;
  151. for (; search < CHAR_TABLE.length; search++) {
  152. if (search == EXTENDED_ESCAPE) {
  153. continue;
  154. }
  155. if (c[loop] == CHAR_TABLE[search]) {
  156. enc.write((byte) search);
  157. break;
  158. }
  159. if (c[loop] == EXT_CHAR_TABLE[search]) {
  160. enc.write((byte) EXTENDED_ESCAPE);
  161. enc.write((byte) search);
  162. break;
  163. }
  164. }
  165. if (search == CHAR_TABLE.length) {
  166. enc.write((byte) unknownCharReplacement);
  167. }
  168. }
  169. return enc.toByteArray();
  170. }
  171. public int getCharSize() {
  172. return 7;
  173. }
  174. /**
  175. * Pack a byte array according to the GSM bit-packing algorithm.
  176. * The GSM specification defines a simple compression mechanism for its
  177. * default alphabet to pack more message characters into a smaller space.
  178. * Since the alphabet only contains 128 symbols, each one can be represented
  179. * in 7 bits. The packing algorithm squeezes the bits for each symbol
  180. * "down" into the preceeding byte (so bit 7 of the first byte actually
  181. * contains bit 0 of the second symbol in a default alphabet string, bits
  182. * 6 and 7 in the second byte contain bits 0 and 1 of the third symbol etc.)
  183. * Since the maximum short message length is 140 <b>bytes</b>, you save
  184. * one bit per byte using the default alphabet giving you a total of
  185. * 140 + (140 / 8) = 160 characters to use. This is where the 160 character
  186. * limit comes from in SMPP packets.
  187. * <p>
  188. * Having said all that, most SMSCs do <b>NOT</b> use the packing
  189. * algorithm when communicating over TCP/IP. They either use a full
  190. * 8-bit alphabet such as ASCII or Latin-1, or they accept the default
  191. * alphabet in its unpacked form. As such, you will be unlikely to
  192. * need this method.
  193. * </p>
  194. * @param unpacked The unpacked byte array.
  195. * @return A new byte array containing the bytes in their packed form.
  196. */
  197. public byte[] pack(byte[] unpacked) {
  198. int packedLen = unpacked.length - (unpacked.length / 8);
  199. byte[] packed = new byte[packedLen];
  200. if (unpacked.length == 0) {
  201. return packed;
  202. }
  203. for (int i = 0, j = -1; i < packed.length; i++, j++) {
  204. int shiftRight = i % 7;
  205. int shiftLeft = 8 - (shiftRight + 1);
  206. if (shiftRight == 0) {
  207. j++;
  208. }
  209. int b = ((int) unpacked[j] & 0xff) >>> shiftRight;
  210. if (j + 1 < unpacked.length) {
  211. b |= (((int) unpacked[j + 1]) & 0xff) << shiftLeft;
  212. }
  213. packed[i] = (byte) b;
  214. }
  215. return packed;
  216. }
  217. /**
  218. * Unpack a byte array according to the GSM bit-packing algorithm.
  219. * Read the full description in the documentation of the
  220. * <code>pack</code> method.
  221. * @see #pack(byte[])
  222. * @param packed The packed byte array.
  223. * @return A new byte array containing the unpacked bytes.
  224. */
  225. public byte[] unpack(byte[] packed) {
  226. int unpackedLen = (packed.length * 8) / 7;
  227. byte[] unpacked = new byte[unpackedLen];
  228. if (packed.length == 0) {
  229. return unpacked;
  230. }
  231. for (int i = 0, j = 0; i < packed.length; i++, j++) {
  232. int shiftLeft = i % 7;
  233. int shiftRight = 8 - shiftLeft;
  234. if (shiftLeft == 0) {
  235. unpacked[j] = (byte) ((int) packed[i] & 0x7f);
  236. } else {
  237. int b = ((int) packed[i - 1] & 0xff) >>> shiftRight;
  238. b |= ((int) packed[i] << shiftLeft) & 0x7f;
  239. unpacked[j] = (byte) b;
  240. if (shiftLeft == 6) {
  241. j++;
  242. unpacked[j] = (byte) (((int) packed[i] & 0xff) >>> 1);
  243. }
  244. }
  245. }
  246. return unpacked;
  247. }
  248. }