PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java

https://github.com/apache/pdfbox
Java | 685 lines | 315 code | 35 blank | 335 comment | 55 complexity | ae87515c470a9fa12219d94e961cd96c MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.pdfbox.util;
  18. import java.text.ParsePosition;
  19. import java.text.SimpleDateFormat;
  20. import java.util.Calendar;
  21. import java.util.Date;
  22. import java.util.GregorianCalendar;
  23. import java.util.Locale;
  24. import java.util.SimpleTimeZone;
  25. import java.util.TimeZone;
  26. import org.apache.pdfbox.cos.COSString;
  27. /*
  28. * Date format is described in PDF Reference 1.7 section 3.8.2
  29. * (www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf)
  30. * and also in PDF 32000-1:2008
  31. * (http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf))
  32. * although the latter inexplicably omits the trailing apostrophe.
  33. *
  34. * The interpretation of dates without timezones is unclear.
  35. * The code below assumes that such dates are in UTC+00 (aka GMT).
  36. * This is in keeping with the PDF Reference's assertion that:
  37. * numerical fields default to zero values.
  38. * However, the Reference does go on to make the cryptic remark:
  39. * If no UT information is specified, the relationship of the specified
  40. * time to UT is considered to be unknown. Whether or not the time
  41. * zone is known, the rest of the date should be specified in local time.
  42. * I understand this to refer to _creating_ a pdf date value. That is,
  43. * code that can get the wall clock time and cannot get the timezone
  44. * should write the wall clock time with a time zone of zero.
  45. * When _parsing_ a PDF date, the statement talks about "the rest of the date"
  46. * being local time, thus explicitly excluding the use of the local time
  47. * for the time zone.
  48. */
  49. /**
  50. * Converts dates to strings and back using the PDF date standard
  51. * in section 3.8.2 of PDF Reference 1.7.
  52. *
  53. * @author Ben Litchfield
  54. * @author Fred Hansen
  55. *
  56. * TODO Move members of this class elsewhere for shared use in pdfbox and xmpbox.
  57. */
  58. public final class DateConverter
  59. {
  60. private DateConverter()
  61. {
  62. }
  63. // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours
  64. private static final int MINUTES_PER_HOUR = 60;
  65. private static final int SECONDS_PER_MINUTE = 60;
  66. private static final int MILLIS_PER_MINUTE = SECONDS_PER_MINUTE*1000;
  67. private static final int MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE;
  68. private static final int HALF_DAY = 12 * MINUTES_PER_HOUR * MILLIS_PER_MINUTE, DAY = 2*HALF_DAY;
  69. /*
  70. * The Date format is supposed to be the PDF_DATE_FORMAT, but other
  71. * forms appear. These lists offer alternatives to be tried
  72. * if parseBigEndianDate fails.
  73. *
  74. * The time zone offset generally trails the date string, so it is processed
  75. * separately with parseTZoffset. (This does not preclude having time
  76. * zones in the elements below; one does.)
  77. *
  78. * Alas, SimpleDateFormat is badly non-reentrant -- it modifies its
  79. * calendar field (PDFBox-402), so these lists are strings to create
  80. * SimpleDate format as needed.
  81. *
  82. * Some past entries have been elided because they duplicate existing
  83. * entries. See the API for SimpleDateFormat, which says
  84. * "For parsing, the number of pattern letters is ignored
  85. * unless it's needed to separate two adjacent fields."
  86. *
  87. * toCalendar(String, String[]) tests to see that the entire input text
  88. * has been consumed. Therefore the ordering of formats is important.
  89. * If one format begins with the entirety of another, the longer
  90. * must precede the other in the list.
  91. *
  92. * HH is for 0-23 hours and hh for 1-12 hours; an "a" field must follow "hh"
  93. * Where year is yy, four digit years are accepted
  94. * and two digit years are converted to four digits in the range
  95. * [thisyear-79...thisyear+20]
  96. */
  97. private static final String[] ALPHA_START_FORMATS =
  98. {
  99. "EEEE, dd MMM yy hh:mm:ss a",
  100. "EEEE, MMM dd, yy hh:mm:ss a",
  101. "EEEE, MMM dd, yy 'at' hh:mma", // Acrobat Net Distiller 1.0 for Windows
  102. "EEEE, MMM dd, yy", // Acrobat Distiller 1.0.2 for Macintosh && PDFBOX-465
  103. "EEEE MMM dd, yy HH:mm:ss", // ECMP5
  104. "EEEE MMM dd HH:mm:ss z yy", // GNU Ghostscript 7.0.7
  105. "EEEE MMM dd HH:mm:ss yy", // GNU Ghostscript 7.0.7 variant
  106. };
  107. private static final String[] DIGIT_START_FORMATS =
  108. {
  109. "dd MMM yy HH:mm:ss", // for 26 May 2000 11:25:00
  110. "dd MMM yy HH:mm", // for 26 May 2000 11:25
  111. "yyyy MMM d", // ambiguity resolved only by omitting time
  112. "yyyymmddhh:mm:ss", // test case "200712172:2:3"
  113. "H:m M/d/yy", // test case "9:47 5/12/2008"
  114. "M/d/yy HH:mm:ss",
  115. "M/d/yy HH:mm",
  116. "M/d/yy",
  117. // proposed rule that is unreachable due to "dd MMM yy HH:mm:ss"
  118. // "yyyy MMM d HH:mm:ss",
  119. // rules made unreachable by "M/d/yy HH:mm:ss" "M/d/yy HH:mm" "M/d/yy",
  120. // (incoming digit strings do not mark themselves as y, m, or d!)
  121. // "d/MM/yyyy HH:mm:ss", // PDFBOX-164 and PDFBOX-170
  122. // "M/dd/yyyy hh:mm:ss",
  123. // "MM/d/yyyy hh:mm:ss",
  124. // "M/d/yyyy HH:mm:ss",
  125. // "M/dd/yyyy",
  126. // "MM/d/yyyy",
  127. // "M/d/yyyy",
  128. // "M/d/yyyy HH:mm:ss",
  129. // "M/d/yy HH:mm:ss",
  130. // subsumed by big-endian parse
  131. // "yyyy-MM-dd'T'HH:mm:ss",
  132. // "yyyy-MM-dd'T'HH:mm:ss",
  133. // "yyyymmdd hh:mm:ss",
  134. // "yyyymmdd",
  135. // "yyyymmddX''00''", // covers 24 cases
  136. // (orignally the above ended with '+00''00''';
  137. // the first apostrophe quoted the plus,
  138. // '' mapped to a single ', and the ''' was invalid)
  139. };
  140. /**
  141. * Converts a Calendar to a string formatted as:
  142. * D:yyyyMMddHHmmss#hh'mm' where # is Z, +, or -.
  143. *
  144. * @param cal The date to convert to a string. May be null.
  145. * The DST_OFFSET is included when computing the output time zone.
  146. *
  147. * @return The date as a String to be used in a PDF document,
  148. * or null if the cal value is null
  149. */
  150. public static String toString(Calendar cal)
  151. {
  152. if (cal == null)
  153. {
  154. return null;
  155. }
  156. String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) +
  157. cal.get(Calendar.DST_OFFSET), "'");
  158. return String.format("D:"
  159. + "%1$4tY%1$2tm%1$2td" // yyyyMMdd
  160. + "%1$2tH%1$2tM%1$2tS" // HHmmss
  161. + "%2$s" // time zone
  162. + "'", // trailing apostrophe
  163. cal, offset);
  164. }
  165. /**
  166. * Converts the date to ISO 8601 string format:
  167. * yyyy-mm-ddThh:MM:ss#hh:mm (where '#" is '+' or '-').
  168. *
  169. * @param cal The date to convert. Must not be null.
  170. * The DST_OFFSET is included in the output value.
  171. *
  172. * @return The date represented as an ISO 8601 string.
  173. */
  174. public static String toISO8601(Calendar cal)
  175. {
  176. String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) +
  177. cal.get(Calendar.DST_OFFSET), ":");
  178. return String.format(
  179. "%1$4tY" // yyyy
  180. + "-%1$2tm" // -mm (%tm adds one to cal month value)
  181. + "-%1$2td" // -dd (%tm adds one to cal month value)
  182. + "T" // T
  183. + "%1$2tH:%1$2tM:%1$2tS" // HHmmss
  184. + "%2$s", // time zone
  185. cal, offset);
  186. }
  187. /*
  188. * Constrain a timezone offset to the range [-11:59 thru +11:59].
  189. * by adding or subtracting multiples of a full day.
  190. */
  191. private static int restrainTZoffset(long proposedOffset)
  192. {
  193. proposedOffset = ((proposedOffset + HALF_DAY) % DAY + DAY) % DAY;
  194. // 0 <= proposedOffset < DAY
  195. proposedOffset = (proposedOffset - HALF_DAY) % HALF_DAY;
  196. // -HALF_DAY < proposedOffset < HALF_DAY
  197. return (int)proposedOffset;
  198. }
  199. /*
  200. * Formats a time zone offset as #hh^mm
  201. * where # is + or -, hh is hours, ^ is a separator, and mm is minutes.
  202. * Any separator may be specified by the second argument;
  203. * the usual values are ":" (ISO 8601), "" (RFC 822), and "'" (PDF).
  204. * The returned value is constrained to the range -11:59 ... 11:59.
  205. * For offset of 0 millis, the String returned is "+00^00", never "Z".
  206. * To get a "general" offset in form GMT#hh:mm, write
  207. * "GMT"+DateConverter.formatTZoffset(offset, ":");
  208. *
  209. * Take thought in choosing the source for the millis value.
  210. * It can come from calendarValue.getTimeZone() or from
  211. * calendarValue.get(Calendar.ZONE_OFFSET). If a TimeZone was created
  212. * from a valid time zone ID, then it may have a daylight savings rule.
  213. * (As of July 4, 2013, the data base at http://www.iana.org/time-zones
  214. * recognized 629 time zone regions. But a TimeZone created as
  215. * new SimpleTimeZone(millisOffset, "ID"),
  216. * will not have a daylight savings rule. (Not even if there is a
  217. * known time zone with the given ID. To get the TimeZone named "xDT"
  218. * with its DST rule, use an ID of EST5EDT, CST6CDT, MST7MDT, or PST8PDT.
  219. *
  220. * When parsing PDF dates, the incoming values DOES NOT have a TIMEZONE value.
  221. * At most it has an OFFSET value like -04'00'. It is generally impossible to
  222. * determine what TIMEZONE corresponds to a given OFFSET. If the date is
  223. * in the summer when daylight savings is in effect, an offset of -0400
  224. * might correspond to any one of the 38 regions (of 53) with standard time
  225. * offset -0400 and no daylight saving. Or it might correspond to
  226. * any one of the 31 regions (out of 43) that observe daylight savings
  227. * and have standard time offset of -0500.
  228. *
  229. * If a Calendar has not been assigned a TimeZone with setTimeZone(),
  230. * it will have by default the local TIMEZONE, not just the OFFSET. In the
  231. * USA, this TimeZone will have a daylight savings rule.
  232. *
  233. * The offset assigned with calVal.set(Calendar.ZONE_OFFSET) differs
  234. * from the offset in the TimeZone set by Calendar.setTimeZone(). Example:
  235. * Suppose my local TimeZone is America/New_York. It has an offset of -05'00'.
  236. * And suppose I set a GregorianCalendar's ZONE_OFFSET to -07'00'
  237. * calVal = new GregorianCalendar(); // TimeZone is the local default
  238. * calVal.set(Calendar.ZONE_OFFSET, -7* MILLIS_PER_HOUR);
  239. * Four different offsets can be computed from calVal:
  240. * calVal.get(Calendar.ZONE_OFFSET) => -07:00
  241. * calVal.get(Calendar.ZONE_OFFSET) + calVal.get(Calendar.DST_OFFSET) => -06:00
  242. * calVal.getTimeZone().getRawOffset() => -05:00
  243. * calVal.getTimeZone().getOffset(calVal.getTimeInMillis()) => -04:00
  244. *
  245. * Which is correct??? I dunno, though setTimeZone() does seem to affect
  246. * ZONE_OFFSET, and not vice versa. One cannot even test whether TimeZone
  247. * or ZONE_OFFSET has been set; both have been set by initialization code.
  248. * TimeZone is initialized to the local default time zone
  249. * and ZONE_OFFSET is set from it.
  250. *
  251. * My choice in this DateConverter class has been to set the
  252. * initial TimeZone of a GregorianCalendar to GMT. Thereafter
  253. * the TimeZone is modified with {@link #adjustTimeZoneNicely}.
  254. *
  255. * package-private for testing
  256. */
  257. static String formatTZoffset(long millis, String sep)
  258. {
  259. SimpleDateFormat sdf = new SimpleDateFormat("Z"); // #hhmm
  260. sdf.setTimeZone(new SimpleTimeZone(restrainTZoffset(millis),"unknown"));
  261. String tz = sdf.format(new Date());
  262. return tz.substring(0,3) + sep + tz.substring(3);
  263. }
  264. /*
  265. * Parses an integer from a string, starting at and advancing a ParsePosition.
  266. * Returns The integer that was at the given parse position, or the remedy value
  267. * if no digits were found.
  268. *
  269. * The ParsePosition will be incremented by the number of digits found, but no
  270. * more than maxlen. That is, the ParsePosition will advance across at most
  271. * maxlen initial digits in text. The error index is ignored and unchanged.
  272. *
  273. * maxlen is the maximum length of the integer to parse, usually 2, but 4 for
  274. * year fields. If the field of length maxlen begins with a digit, but contains
  275. * a non-digit, no error is signaled and the integer value is returned.
  276. */
  277. private static int parseTimeField(String text, ParsePosition where, int maxlen, int remedy)
  278. {
  279. if (text == null)
  280. {
  281. return remedy;
  282. }
  283. // it would seem that DecimalFormat.parse() would be simpler;
  284. // but that class blithely ignores setMaximumIntegerDigits
  285. int retval = 0;
  286. int index = where.getIndex();
  287. int limit = index + Math.min(maxlen, text.length()-index);
  288. for (; index < limit; index++)
  289. {
  290. // convert digit to integer
  291. int cval = text.charAt(index) - '0';
  292. // test to see if we got a digit
  293. if (cval < 0 || cval > 9)
  294. {
  295. // no digit at index
  296. break;
  297. }
  298. // append the digit to the return value
  299. retval = retval * 10 + cval;
  300. }
  301. if (index == where.getIndex())
  302. {
  303. return remedy;
  304. }
  305. where.setIndex(index);
  306. return retval;
  307. }
  308. /*
  309. * Advances the ParsePosition past any and all the characters that match
  310. * those in the optionals list. In particular, a space will skip all spaces.
  311. *
  312. * The start value is incremented by the number of optionals found. The error
  313. * index is ignored and unchanged.
  314. *
  315. * Returns the last non-space character passed over (even if space is not in
  316. * the optionals list.)
  317. */
  318. private static char skipOptionals(String text, ParsePosition where, String optionals)
  319. {
  320. char retval = ' ', currch;
  321. while (text != null && where.getIndex() < text.length() &&
  322. optionals.indexOf((currch = text.charAt(where.getIndex()))) >= 0)
  323. {
  324. retval = (currch != ' ') ? currch : retval;
  325. where.setIndex(where.getIndex() + 1);
  326. }
  327. return retval;
  328. }
  329. /*
  330. * If the victim string is at the given position in the text, this method
  331. * advances the position past that string.
  332. *
  333. * `where` is the initial position to look at. After return, this will have
  334. * been incremented by the length of the victim if it was found. The error
  335. * index is ignored and unchanged.
  336. */
  337. private static boolean skipString(String text, String victim, ParsePosition where)
  338. {
  339. if (text.startsWith(victim, where.getIndex()))
  340. {
  341. where.setIndex(where.getIndex()+victim.length());
  342. return true;
  343. }
  344. return false;
  345. }
  346. /*
  347. * Construct a new GregorianCalendar and set defaults.
  348. * Locale is ENGLISH.
  349. * TimeZone is "UTC" (zero offset and no DST).
  350. * Parsing is NOT lenient. Milliseconds are zero.
  351. *
  352. * package-private for testing
  353. */
  354. static GregorianCalendar newGreg()
  355. {
  356. GregorianCalendar retCal = new GregorianCalendar(Locale.ENGLISH);
  357. retCal.setTimeZone(new SimpleTimeZone(0, "UTC"));
  358. retCal.setLenient(false);
  359. retCal.set(Calendar.MILLISECOND, 0);
  360. return retCal;
  361. }
  362. /*
  363. * Install a TimeZone on a GregorianCalendar without changing the
  364. * hours value. A plain GregorianCalendat.setTimeZone()
  365. * adjusts the Calendar.HOUR value to compensate. This is *BAD*
  366. * (not to say *EVIL*) when we have already set the time.
  367. */
  368. private static void adjustTimeZoneNicely(GregorianCalendar cal, TimeZone tz)
  369. {
  370. cal.setTimeZone(tz);
  371. int offset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET)) /
  372. MILLIS_PER_MINUTE;
  373. cal.add(Calendar.MINUTE, -offset);
  374. }
  375. /*
  376. * Parses the end of a date string for a time zone and, if one is found,
  377. * sets the time zone of the GregorianCalendar. Otherwise the calendar
  378. * time zone is unchanged.
  379. *
  380. * The text is parsed as
  381. * (Z|GMT|UTC)? [+- ]* h [': ]? m '?
  382. * where the leading String is optional, h is two digits by default,
  383. * but may be a single digit if followed by one of space, apostrophe,
  384. * colon, or the end of string. Similarly, m is one or two digits.
  385. * This scheme accepts the format of PDF, RFC 822, and ISO8601.
  386. * If none of these applies (as for a time zone name), we try
  387. * TimeZone.getTimeZone().
  388. *
  389. * Scanning begins at where.index. After success, the returned index
  390. * is that of the next character after the recognized string.
  391. *
  392. * package-private for testing
  393. */
  394. static boolean parseTZoffset(String text, GregorianCalendar cal,
  395. ParsePosition initialWhere)
  396. {
  397. ParsePosition where = new ParsePosition(initialWhere.getIndex());
  398. TimeZone tz = new SimpleTimeZone(0, "GMT");
  399. int tzHours, tzMin;
  400. char sign = skipOptionals(text, where, "Z+- ");
  401. boolean hadGMT = (sign == 'Z' || skipString(text, "GMT", where) ||
  402. skipString(text, "UTC", where));
  403. sign = (!hadGMT) ? sign : skipOptionals(text, where, "+- ");
  404. tzHours = parseTimeField(text, where, 2, -999);
  405. skipOptionals(text, where, "\': ");
  406. tzMin = parseTimeField(text, where, 2, 0);
  407. skipOptionals(text, where, "\' ");
  408. if (tzHours != -999)
  409. {
  410. // we parsed a time zone in default format
  411. int hrSign = (sign == '-' ? -1 : 1);
  412. tz.setRawOffset(restrainTZoffset(hrSign * (tzHours * MILLIS_PER_HOUR + tzMin *
  413. (long) MILLIS_PER_MINUTE)));
  414. tz.setID("unknown");
  415. }
  416. else if ( ! hadGMT)
  417. {
  418. // try to process as a name; "GMT" or "UTC" has already been processed
  419. String tzText = text.substring(initialWhere.getIndex()).trim();
  420. tz = TimeZone.getTimeZone(tzText);
  421. // getTimeZone returns "GMT" for unknown ids
  422. if ("GMT".equals(tz.getID()))
  423. {
  424. // no timezone in text, cal amd initialWhere are unchanged
  425. return false;
  426. }
  427. else
  428. {
  429. // we got a tz by name; use it
  430. where.setIndex(text.length());
  431. }
  432. }
  433. adjustTimeZoneNicely(cal, tz);
  434. initialWhere.setIndex(where.getIndex());
  435. return true;
  436. }
  437. /*
  438. * Parses a big-endian date: year month day hour min sec.
  439. * The year must be four digits. Other fields may be adjacent
  440. * and delimited by length or they may follow appropriate delimiters.
  441. * year [ -/]* month [ -/]* dayofmonth [ T]* hour [:] min [:] sec [.secFraction]
  442. * If any numeric field is omitted, all following fields must also be omitted.
  443. * No time zone is processed.
  444. *
  445. * Ambiguous dates can produce unexpected results. For example:
  446. * 1970 12 23:08 will parse as 1970 December 23 00:08:00
  447. *
  448. * The parse begins at `where, on return the index
  449. * is advanced to just beyond the last character processed.
  450. * The error index is ignored and unchanged.
  451. */
  452. private static GregorianCalendar parseBigEndianDate(String text,
  453. ParsePosition initialWhere)
  454. {
  455. ParsePosition where = new ParsePosition(initialWhere.getIndex());
  456. int year = parseTimeField(text, where, 4, 0);
  457. if (where.getIndex() != 4 + initialWhere.getIndex())
  458. {
  459. return null;
  460. }
  461. skipOptionals(text, where, "/- ");
  462. int month = parseTimeField(text, where, 2, 1) - 1; // Calendar months are 0...11
  463. skipOptionals(text, where, "/- ");
  464. int day = parseTimeField(text, where, 2, 1);
  465. skipOptionals(text, where, " T");
  466. int hour = parseTimeField(text, where, 2, 0);
  467. skipOptionals(text, where, ": ");
  468. int minute = parseTimeField(text, where, 2, 0);
  469. skipOptionals(text, where, ": ");
  470. int second = parseTimeField(text, where, 2, 0);
  471. char nextC = skipOptionals(text, where, ".");
  472. if (nextC == '.')
  473. {
  474. // fractions of a second: skip upto 19 digits
  475. parseTimeField(text, where, 19, 0);
  476. }
  477. GregorianCalendar dest = newGreg();
  478. try
  479. {
  480. dest.set(year, month, day, hour, minute, second);
  481. // trigger limit tests
  482. dest.getTimeInMillis();
  483. }
  484. catch (IllegalArgumentException ill)
  485. {
  486. return null;
  487. }
  488. initialWhere.setIndex(where.getIndex());
  489. skipOptionals(text, initialWhere, " ");
  490. // dest has at least a year value
  491. return dest;
  492. }
  493. /*
  494. * See if text can be parsed as a date according to any of a list of
  495. * formats. The time zone may be included as part of the format, or
  496. * omitted in favor of later testing for a trailing time zone.
  497. *
  498. * The parse starts at `where`, upon return it will have been
  499. * incremented to refer to the next non-space character after the date.
  500. * If no date was found, the value is unchanged.
  501. * The error index is ignored and unchanged.
  502. *
  503. * If there is a failure to find a date, or the GregorianCalendar
  504. * for the date that was found. Unless a time zone was
  505. * part of the format, the time zone will be GMT+0
  506. */
  507. private static GregorianCalendar parseSimpleDate(String text, String[] fmts,
  508. ParsePosition initialWhere)
  509. {
  510. for(String fmt : fmts)
  511. {
  512. ParsePosition where = new ParsePosition(initialWhere.getIndex());
  513. SimpleDateFormat sdf = new SimpleDateFormat(fmt, Locale.ENGLISH);
  514. GregorianCalendar retCal = newGreg();
  515. sdf.setCalendar(retCal);
  516. if (sdf.parse(text, where) != null)
  517. {
  518. initialWhere.setIndex(where.getIndex());
  519. skipOptionals(text, initialWhere, " ");
  520. return retCal;
  521. }
  522. }
  523. return null;
  524. }
  525. /*
  526. * Parses a String to see if it begins with a date, and if so,
  527. * returns that date. The date must be strictly correct--no
  528. * field may exceed the appropriate limit.
  529. * (That is, the Calendar has setLenient(false).)
  530. * Skips initial spaces, but does NOT check for "D:"
  531. *
  532. * The scan first tries parseBigEndianDate and parseTZoffset
  533. * and then tries parseSimpleDate with appropriate formats,
  534. * again followed by parseTZoffset. If at any stage the entire
  535. * text is consumed, that date value is returned immediately.
  536. * Otherwise the date that consumes the longest initial part
  537. * of the text is returned.
  538. *
  539. * - PDF format dates are among those recognized by parseBigEndianDate.
  540. * - The formats tried are alphaStartFormats or digitStartFormat and
  541. * any listed in the value of moreFmts.
  542. */
  543. private static Calendar parseDate(String text, ParsePosition initialWhere)
  544. {
  545. if (text == null || text.isEmpty())
  546. {
  547. return null;
  548. }
  549. // remember longestr date string
  550. int longestLen = -999999;
  551. // theorem: the above value will never be used
  552. // proof: longestLen is only used if longestDate is not null
  553. GregorianCalendar longestDate = null; // null says no date found yet
  554. int whereLen; // tempcopy of where.getIndex()
  555. ParsePosition where = new ParsePosition(initialWhere.getIndex());
  556. // check for null (throws exception) and trim off surrounding spaces
  557. skipOptionals(text, where, " ");
  558. int startPosition = where.getIndex();
  559. // try big-endian parse
  560. GregorianCalendar retCal = parseBigEndianDate(text, where);
  561. // check for success and a timezone
  562. if (retCal != null && (where.getIndex() == text.length() ||
  563. parseTZoffset(text, retCal, where)))
  564. {
  565. // if text is fully consumed, return the date else remember it and its length
  566. whereLen = where.getIndex();
  567. if (whereLen == text.length())
  568. {
  569. initialWhere.setIndex(whereLen);
  570. return retCal;
  571. }
  572. longestLen = whereLen;
  573. longestDate = retCal;
  574. }
  575. // try one of the sets of standard formats
  576. where.setIndex(startPosition);
  577. String [] formats
  578. = Character.isDigit(text.charAt(startPosition))
  579. ? DIGIT_START_FORMATS
  580. : ALPHA_START_FORMATS;
  581. retCal = parseSimpleDate(text, formats, where);
  582. // check for success and a timezone
  583. if (retCal != null &&
  584. (where.getIndex() == text.length() ||
  585. parseTZoffset(text, retCal, where)))
  586. {
  587. // if text is fully consumed, return the date else remember it and its length
  588. whereLen = where.getIndex();
  589. if (whereLen == text.length())
  590. {
  591. initialWhere.setIndex(whereLen);
  592. return retCal;
  593. }
  594. if (whereLen > longestLen)
  595. {
  596. longestLen = whereLen;
  597. longestDate = retCal;
  598. }
  599. }
  600. if (longestDate != null)
  601. {
  602. initialWhere.setIndex(longestLen);
  603. return longestDate;
  604. }
  605. return retCal;
  606. }
  607. /**
  608. * Returns the Calendar for a given COS string containing a date,
  609. * or {@code null} if it cannot be parsed.
  610. *
  611. * The returned value will have 0 for DST_OFFSET.
  612. *
  613. * @param text A COS string containing a date.
  614. * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed.
  615. */
  616. public static Calendar toCalendar(COSString text)
  617. {
  618. if (text == null)
  619. {
  620. return null;
  621. }
  622. return toCalendar(text.getString());
  623. }
  624. /**
  625. * Returns the Calendar for a given string containing a date,
  626. * or {@code null} if it cannot be parsed.
  627. *
  628. * The returned value will have 0 for DST_OFFSET.
  629. *
  630. * @param text A COS string containing a date.
  631. * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed.
  632. */
  633. public static Calendar toCalendar(String text)
  634. {
  635. if (text == null || text.trim().isEmpty())
  636. {
  637. return null;
  638. }
  639. ParsePosition where = new ParsePosition(0);
  640. skipOptionals(text, where, " ");
  641. skipString(text, "D:", where);
  642. Calendar calendar = parseDate(text, where);
  643. if (calendar == null || where.getIndex() != text.length())
  644. {
  645. // the date string is invalid
  646. return null;
  647. }
  648. return calendar;
  649. }
  650. }