PageRenderTime 59ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/src/org/apache/poi/hpsf/VariantSupport.java

https://github.com/minstrelsy/SimpleAndroidDocView
Java | 539 lines | 353 code | 27 blank | 159 comment | 17 complexity | 89a9616b84e878dcdfd285ef7159c220 MD5 | raw file
Possible License(s): Apache-2.0
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hpsf;
  16. import java.io.IOException;
  17. import java.io.OutputStream;
  18. import java.io.UnsupportedEncodingException;
  19. import java.util.Date;
  20. import java.util.LinkedList;
  21. import java.util.List;
  22. import org.apache.poi.util.POILogFactory;
  23. import org.apache.poi.util.POILogger;
  24. /**
  25. * <p>Supports reading and writing of variant data.</p>
  26. *
  27. * <p><strong>FIXME (3):</strong> Reading and writing should be made more
  28. * uniform than it is now. The following items should be resolved:
  29. *
  30. * <ul>
  31. *
  32. * <li><p>Reading requires a length parameter that is 4 byte greater than the
  33. * actual data, because the variant type field is included. </p></li>
  34. *
  35. * <li><p>Reading reads from a byte array while writing writes to an byte array
  36. * output stream.</p></li>
  37. *
  38. * </ul>
  39. *
  40. * @author Rainer Klute <a
  41. * href="mailto:klute@rainer-klute.de">&lt;klute@rainer-klute.de&gt;</a>
  42. */
  43. public class VariantSupport extends Variant
  44. {
  45. private static POILogger logger = POILogFactory.getLogger(VariantSupport.class);
  46. private static boolean logUnsupportedTypes = false;
  47. /**
  48. * <p>Specifies whether warnings about unsupported variant types are to be
  49. * written to <code>System.err</code> or not.</p>
  50. *
  51. * @param logUnsupportedTypes If <code>true</code> warnings will be written,
  52. * if <code>false</code> they won't.
  53. */
  54. public static void setLogUnsupportedTypes(final boolean logUnsupportedTypes)
  55. {
  56. VariantSupport.logUnsupportedTypes = logUnsupportedTypes;
  57. }
  58. /**
  59. * <p>Checks whether logging of unsupported variant types warning is turned
  60. * on or off.</p>
  61. *
  62. * @return <code>true</code> if logging is turned on, else
  63. * <code>false</code>.
  64. */
  65. public static boolean isLogUnsupportedTypes()
  66. {
  67. return logUnsupportedTypes;
  68. }
  69. /**
  70. * <p>Keeps a list of the variant types an "unsupported" message has already
  71. * been issued for.</p>
  72. */
  73. protected static List<Long> unsupportedMessage;
  74. /**
  75. * <p>Writes a warning to <code>System.err</code> that a variant type is
  76. * unsupported by HPSF. Such a warning is written only once for each variant
  77. * type. Log messages can be turned on or off by </p>
  78. *
  79. * @param ex The exception to log
  80. */
  81. protected static void writeUnsupportedTypeMessage
  82. (final UnsupportedVariantTypeException ex)
  83. {
  84. if (isLogUnsupportedTypes())
  85. {
  86. if (unsupportedMessage == null)
  87. unsupportedMessage = new LinkedList<Long>();
  88. Long vt = Long.valueOf(ex.getVariantType());
  89. if (!unsupportedMessage.contains(vt))
  90. {
  91. logger.log( POILogger.ERROR, ex.getMessage());
  92. unsupportedMessage.add(vt);
  93. }
  94. }
  95. }
  96. /**
  97. * <p>HPSF is able to read these {@link Variant} types.</p>
  98. */
  99. final static public int[] SUPPORTED_TYPES = { Variant.VT_EMPTY,
  100. Variant.VT_I2, Variant.VT_I4, Variant.VT_I8, Variant.VT_R8,
  101. Variant.VT_FILETIME, Variant.VT_LPSTR, Variant.VT_LPWSTR,
  102. Variant.VT_CF, Variant.VT_BOOL };
  103. /**
  104. * <p>Checks whether HPSF supports the specified variant type. Unsupported
  105. * types should be implemented included in the {@link #SUPPORTED_TYPES}
  106. * array.</p>
  107. *
  108. * @see Variant
  109. * @param variantType the variant type to check
  110. * @return <code>true</code> if HPFS supports this type, else
  111. * <code>false</code>
  112. */
  113. public boolean isSupportedType(final int variantType)
  114. {
  115. for (int i = 0; i < SUPPORTED_TYPES.length; i++)
  116. if (variantType == SUPPORTED_TYPES[i])
  117. return true;
  118. return false;
  119. }
  120. /**
  121. * <p>Reads a variant type from a byte array.</p>
  122. *
  123. * @param src The byte array
  124. * @param offset The offset in the byte array where the variant starts
  125. * @param length The length of the variant including the variant type field
  126. * @param type The variant type to read
  127. * @param codepage The codepage to use for non-wide strings
  128. * @return A Java object that corresponds best to the variant field. For
  129. * example, a VT_I4 is returned as a {@link Long}, a VT_LPSTR as a
  130. * {@link String}.
  131. * @exception ReadingNotSupportedException if a property is to be written
  132. * who's variant type HPSF does not yet support
  133. * @exception UnsupportedEncodingException if the specified codepage is not
  134. * supported.
  135. * @see Variant
  136. */
  137. public static Object read( final byte[] src, final int offset,
  138. final int length, final long type, final int codepage )
  139. throws ReadingNotSupportedException, UnsupportedEncodingException
  140. {
  141. TypedPropertyValue typedPropertyValue = new TypedPropertyValue(
  142. (int) type, null );
  143. int unpadded;
  144. try
  145. {
  146. unpadded = typedPropertyValue.readValue( src, offset );
  147. }
  148. catch ( UnsupportedOperationException exc )
  149. {
  150. int propLength = Math.min( length, src.length - offset );
  151. final byte[] v = new byte[propLength];
  152. System.arraycopy( src, offset, v, 0, propLength );
  153. throw new ReadingNotSupportedException( type, v );
  154. }
  155. switch ( (int) type )
  156. {
  157. case Variant.VT_EMPTY:
  158. case Variant.VT_I4:
  159. case Variant.VT_I8:
  160. case Variant.VT_R8:
  161. /*
  162. * we have more property types that can be converted into Java
  163. * objects, but current API need to be preserved, and it returns
  164. * other types as byte arrays. In future major versions it shall be
  165. * changed -- sergey
  166. */
  167. return typedPropertyValue.getValue();
  168. case Variant.VT_I2:
  169. {
  170. /*
  171. * also for backward-compatibility with prev. versions of POI
  172. * --sergey
  173. */
  174. return Integer.valueOf( ( (Short) typedPropertyValue.getValue() )
  175. .intValue() );
  176. }
  177. case Variant.VT_FILETIME:
  178. {
  179. Filetime filetime = (Filetime) typedPropertyValue.getValue();
  180. return Util.filetimeToDate( (int) filetime.getHigh(),
  181. (int) filetime.getLow() );
  182. }
  183. case Variant.VT_LPSTR:
  184. {
  185. CodePageString string = (CodePageString) typedPropertyValue
  186. .getValue();
  187. return string.getJavaValue( codepage );
  188. }
  189. case Variant.VT_LPWSTR:
  190. {
  191. UnicodeString string = (UnicodeString) typedPropertyValue
  192. .getValue();
  193. return string.toJavaString();
  194. }
  195. case Variant.VT_CF:
  196. {
  197. // if(l1 < 0) {
  198. /**
  199. * YK: reading the ClipboardData packet (VT_CF) is not quite
  200. * correct. The size of the data is determined by the first four
  201. * bytes of the packet while the current implementation calculates
  202. * it in the Section constructor. Test files in Bugzilla 42726 and
  203. * 45583 clearly show that this approach does not always work. The
  204. * workaround below attempts to gracefully handle such cases instead
  205. * of throwing exceptions.
  206. *
  207. * August 20, 2009
  208. */
  209. // l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
  210. // }
  211. // final byte[] v = new byte[l1];
  212. // System.arraycopy(src, o1, v, 0, v.length);
  213. // value = v;
  214. // break;
  215. ClipboardData clipboardData = (ClipboardData) typedPropertyValue
  216. .getValue();
  217. return clipboardData.toByteArray();
  218. }
  219. case Variant.VT_BOOL:
  220. {
  221. VariantBool bool = (VariantBool) typedPropertyValue.getValue();
  222. return Boolean.valueOf( bool.getValue() );
  223. }
  224. default:
  225. {
  226. /*
  227. * it is not very good, but what can do without breaking current
  228. * API? --sergey
  229. */
  230. final byte[] v = new byte[unpadded];
  231. System.arraycopy( src, offset, v, 0, unpadded );
  232. throw new ReadingNotSupportedException( type, v );
  233. }
  234. }
  235. }
  236. /**
  237. * <p>Turns a codepage number into the equivalent character encoding's
  238. * name.</p>
  239. *
  240. * @param codepage The codepage number
  241. *
  242. * @return The character encoding's name. If the codepage number is 65001,
  243. * the encoding name is "UTF-8". All other positive numbers are mapped to
  244. * "cp" followed by the number, e.g. if the codepage number is 1252 the
  245. * returned character encoding name will be "cp1252".
  246. *
  247. * @exception UnsupportedEncodingException if the specified codepage is
  248. * less than zero.
  249. */
  250. public static String codepageToEncoding(final int codepage)
  251. throws UnsupportedEncodingException
  252. {
  253. if (codepage <= 0)
  254. throw new UnsupportedEncodingException
  255. ("Codepage number may not be " + codepage);
  256. switch (codepage)
  257. {
  258. case Constants.CP_UTF16:
  259. return "UTF-16";
  260. case Constants.CP_UTF16_BE:
  261. return "UTF-16BE";
  262. case Constants.CP_UTF8:
  263. return "UTF-8";
  264. case Constants.CP_037:
  265. return "cp037";
  266. case Constants.CP_GBK:
  267. return "GBK";
  268. case Constants.CP_MS949:
  269. return "ms949";
  270. case Constants.CP_WINDOWS_1250:
  271. return "windows-1250";
  272. case Constants.CP_WINDOWS_1251:
  273. return "windows-1251";
  274. case Constants.CP_WINDOWS_1252:
  275. return "windows-1252";
  276. case Constants.CP_WINDOWS_1253:
  277. return "windows-1253";
  278. case Constants.CP_WINDOWS_1254:
  279. return "windows-1254";
  280. case Constants.CP_WINDOWS_1255:
  281. return "windows-1255";
  282. case Constants.CP_WINDOWS_1256:
  283. return "windows-1256";
  284. case Constants.CP_WINDOWS_1257:
  285. return "windows-1257";
  286. case Constants.CP_WINDOWS_1258:
  287. return "windows-1258";
  288. case Constants.CP_JOHAB:
  289. return "johab";
  290. case Constants.CP_MAC_ROMAN:
  291. return "MacRoman";
  292. case Constants.CP_MAC_JAPAN:
  293. return "SJIS";
  294. case Constants.CP_MAC_CHINESE_TRADITIONAL:
  295. return "Big5";
  296. case Constants.CP_MAC_KOREAN:
  297. return "EUC-KR";
  298. case Constants.CP_MAC_ARABIC:
  299. return "MacArabic";
  300. case Constants.CP_MAC_HEBREW:
  301. return "MacHebrew";
  302. case Constants.CP_MAC_GREEK:
  303. return "MacGreek";
  304. case Constants.CP_MAC_CYRILLIC:
  305. return "MacCyrillic";
  306. case Constants.CP_MAC_CHINESE_SIMPLE:
  307. return "EUC_CN";
  308. case Constants.CP_MAC_ROMANIA:
  309. return "MacRomania";
  310. case Constants.CP_MAC_UKRAINE:
  311. return "MacUkraine";
  312. case Constants.CP_MAC_THAI:
  313. return "MacThai";
  314. case Constants.CP_MAC_CENTRAL_EUROPE:
  315. return "MacCentralEurope";
  316. case Constants.CP_MAC_ICELAND:
  317. return "MacIceland";
  318. case Constants.CP_MAC_TURKISH:
  319. return "MacTurkish";
  320. case Constants.CP_MAC_CROATIAN:
  321. return "MacCroatian";
  322. case Constants.CP_US_ACSII:
  323. case Constants.CP_US_ASCII2:
  324. return "US-ASCII";
  325. case Constants.CP_KOI8_R:
  326. return "KOI8-R";
  327. case Constants.CP_ISO_8859_1:
  328. return "ISO-8859-1";
  329. case Constants.CP_ISO_8859_2:
  330. return "ISO-8859-2";
  331. case Constants.CP_ISO_8859_3:
  332. return "ISO-8859-3";
  333. case Constants.CP_ISO_8859_4:
  334. return "ISO-8859-4";
  335. case Constants.CP_ISO_8859_5:
  336. return "ISO-8859-5";
  337. case Constants.CP_ISO_8859_6:
  338. return "ISO-8859-6";
  339. case Constants.CP_ISO_8859_7:
  340. return "ISO-8859-7";
  341. case Constants.CP_ISO_8859_8:
  342. return "ISO-8859-8";
  343. case Constants.CP_ISO_8859_9:
  344. return "ISO-8859-9";
  345. case Constants.CP_ISO_2022_JP1:
  346. case Constants.CP_ISO_2022_JP2:
  347. case Constants.CP_ISO_2022_JP3:
  348. return "ISO-2022-JP";
  349. case Constants.CP_ISO_2022_KR:
  350. return "ISO-2022-KR";
  351. case Constants.CP_EUC_JP:
  352. return "EUC-JP";
  353. case Constants.CP_EUC_KR:
  354. return "EUC-KR";
  355. case Constants.CP_GB2312:
  356. return "GB2312";
  357. case Constants.CP_GB18030:
  358. return "GB18030";
  359. case Constants.CP_SJIS:
  360. return "SJIS";
  361. default:
  362. return "cp" + codepage;
  363. }
  364. }
  365. /**
  366. * <p>Writes a variant value to an output stream. This method ensures that
  367. * always a multiple of 4 bytes is written.</p>
  368. *
  369. * <p>If the codepage is UTF-16, which is encouraged, strings
  370. * <strong>must</strong> always be written as {@link Variant#VT_LPWSTR}
  371. * strings, not as {@link Variant#VT_LPSTR} strings. This method ensure this
  372. * by converting strings appropriately, if needed.</p>
  373. *
  374. * @param out The stream to write the value to.
  375. * @param type The variant's type.
  376. * @param value The variant's value.
  377. * @param codepage The codepage to use to write non-wide strings
  378. * @return The number of entities that have been written. In many cases an
  379. * "entity" is a byte but this is not always the case.
  380. * @exception IOException if an I/O exceptions occurs
  381. * @exception WritingNotSupportedException if a property is to be written
  382. * who's variant type HPSF does not yet support
  383. */
  384. public static int write(final OutputStream out, final long type,
  385. final Object value, final int codepage)
  386. throws IOException, WritingNotSupportedException
  387. {
  388. int length = 0;
  389. switch ((int) type)
  390. {
  391. case Variant.VT_BOOL:
  392. {
  393. if ( ( (Boolean) value ).booleanValue() )
  394. {
  395. out.write( 0xff );
  396. out.write( 0xff );
  397. }
  398. else
  399. {
  400. out.write( 0x00 );
  401. out.write( 0x00 );
  402. }
  403. length += 2;
  404. break;
  405. }
  406. case Variant.VT_LPSTR:
  407. {
  408. CodePageString codePageString = new CodePageString( (String) value,
  409. codepage );
  410. length += codePageString.write( out );
  411. break;
  412. }
  413. case Variant.VT_LPWSTR:
  414. {
  415. final int nrOfChars = ( (String) value ).length() + 1;
  416. length += TypeWriter.writeUIntToStream( out, nrOfChars );
  417. char[] s = ( (String) value ).toCharArray();
  418. for ( int i = 0; i < s.length; i++ )
  419. {
  420. final int high = ( ( s[i] & 0x0000ff00 ) >> 8 );
  421. final int low = ( s[i] & 0x000000ff );
  422. final byte highb = (byte) high;
  423. final byte lowb = (byte) low;
  424. out.write( lowb );
  425. out.write( highb );
  426. length += 2;
  427. }
  428. // NullTerminator
  429. out.write( 0x00 );
  430. out.write( 0x00 );
  431. length += 2;
  432. break;
  433. }
  434. case Variant.VT_CF:
  435. {
  436. final byte[] b = (byte[]) value;
  437. out.write(b);
  438. length = b.length;
  439. break;
  440. }
  441. case Variant.VT_EMPTY:
  442. {
  443. length += TypeWriter.writeUIntToStream( out, Variant.VT_EMPTY );
  444. break;
  445. }
  446. case Variant.VT_I2:
  447. {
  448. length += TypeWriter.writeToStream( out,
  449. ( (Integer) value ).shortValue() );
  450. break;
  451. }
  452. case Variant.VT_I4:
  453. {
  454. if (!(value instanceof Integer))
  455. {
  456. throw new ClassCastException("Could not cast an object to "
  457. + Integer.class.toString() + ": "
  458. + value.getClass().toString() + ", "
  459. + value.toString());
  460. }
  461. length += TypeWriter.writeToStream(out,
  462. ((Integer) value).intValue());
  463. break;
  464. }
  465. case Variant.VT_I8:
  466. {
  467. length += TypeWriter.writeToStream(out, ((Long) value).longValue());
  468. break;
  469. }
  470. case Variant.VT_R8:
  471. {
  472. length += TypeWriter.writeToStream(out,
  473. ((Double) value).doubleValue());
  474. break;
  475. }
  476. case Variant.VT_FILETIME:
  477. {
  478. long filetime = Util.dateToFileTime((Date) value);
  479. int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL);
  480. int low = (int) (filetime & 0x00000000FFFFFFFFL);
  481. Filetime filetimeValue = new Filetime( low, high);
  482. length += filetimeValue.write( out );
  483. break;
  484. }
  485. default:
  486. {
  487. /* The variant type is not supported yet. However, if the value
  488. * is a byte array we can write it nevertheless. */
  489. if (value instanceof byte[])
  490. {
  491. final byte[] b = (byte[]) value;
  492. out.write(b);
  493. length = b.length;
  494. writeUnsupportedTypeMessage
  495. (new WritingNotSupportedException(type, value));
  496. }
  497. else
  498. throw new WritingNotSupportedException(type, value);
  499. break;
  500. }
  501. }
  502. /* pad values to 4-bytes */
  503. while ( ( length & 0x3 ) != 0 )
  504. {
  505. out.write( 0x00 );
  506. length++;
  507. }
  508. return length;
  509. }
  510. }