/components/com_citations/BibTex.php

https://bitbucket.org/osobh/invertnet · PHP · 1228 lines · 759 code · 32 blank · 437 comment · 233 complexity · 4f2e6e573d9c8fb45a1e91e6b8ef471b MD5 · raw file

  1. <?php
  2. /**
  3. * @package hubzero-cms
  4. * @author Shawn Rice <zooley@purdue.edu>
  5. * @copyright Copyright 2005-2011 Purdue University. All rights reserved.
  6. * @license http://www.gnu.org/licenses/lgpl-3.0.html LGPLv3
  7. *
  8. * Copyright 2005-2011 Purdue University. All rights reserved.
  9. *
  10. * This file is part of: The HUBzero(R) Platform for Scientific Collaboration
  11. *
  12. * The HUBzero(R) Platform for Scientific Collaboration (HUBzero) is free
  13. * software: you can redistribute it and/or modify it under the terms of
  14. * the GNU Lesser General Public License as published by the Free Software
  15. * Foundation, either version 3 of the License, or (at your option) any
  16. * later version.
  17. *
  18. * HUBzero is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public License
  24. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  25. *
  26. * HUBzero is a registered trademark of Purdue University.
  27. */
  28. // Check to ensure this file is included in Joomla!
  29. defined('_JEXEC') or die( 'Restricted access' );
  30. /* vim: set ts=4 sw=4: */
  31. /**
  32. * Class for working with BibTex data
  33. *
  34. * A class which provides common methods to access and
  35. * create Strings in BibTex format
  36. *
  37. * PHP versions 4 and 5
  38. *
  39. * LICENSE: This source file is subject to version 3.0 of the PHP license
  40. * that is available through the world-wide-web at the following URI:
  41. * http://www.php.net/license/3_0.txt. If you did not receive a copy of
  42. * the PHP License and are unable to obtain it through the web, please
  43. * send a note to license@php.net so we can mail you a copy immediately.
  44. *
  45. * @category Structures
  46. * @package Structures_BibTex
  47. * @author Elmar Pitschke <elmar.pitschke@gmx.de>
  48. * @copyright 1997-2005 The PHP Group
  49. * @license http://www.php.net/license/3_0.txt PHP License 3.0
  50. * @version CVS: $Id: BibTex.php,v 1.15 2007/01/23 22:56:40 hugoki Exp $
  51. * @link http://pear.php.net/package/Structures_BibTex
  52. */
  53. require_once( JPATH_ROOT.DS.'libraries'.DS.'pear'.DS.'PEAR.php' );
  54. /**
  55. * Structures_BibTex
  56. *
  57. * A class which provides common methods to access and
  58. * create Strings in BibTex format.
  59. * Example 1: Parsing a BibTex File and returning the number of entries
  60. * <code>
  61. * $bibtex = new Structures_BibTex();
  62. * $ret = $bibtex->loadFile('foo.bib');
  63. * if (PEAR::isError($ret)) {
  64. * die($ret->getMessage());
  65. * }
  66. * $bibtex->parse();
  67. * print "There are ".$bibtex->amount()." entries";
  68. * </code>
  69. * Example 2: Parsing a BibTex File and getting all Titles
  70. * <code>
  71. * $bibtex = new Structures_BibTex();
  72. * $ret = $bibtex->loadFile('bibtex.bib');
  73. * if (PEAR::isError($ret)) {
  74. * die($ret->getMessage());
  75. * }
  76. * $bibtex->parse();
  77. * foreach ($bibtex->data as $entry) {
  78. * print $entry['title']."<br />";
  79. * }
  80. * </code>
  81. * Example 3: Adding an entry and printing it in BibTex Format
  82. * <code>
  83. * $bibtex = new Structures_BibTex();
  84. * $addarray = array();
  85. * $addarray['type'] = 'Article';
  86. * $addarray['cite'] = 'art2';
  87. * $addarray['title'] = 'Titel2';
  88. * $addarray['author'][0]['first'] = 'John';
  89. * $addarray['author'][0]['last'] = 'Doe';
  90. * $addarray['author'][1]['first'] = 'Jane';
  91. * $addarray['author'][1]['last'] = 'Doe';
  92. * $bibtex->addEntry($addarray);
  93. * print nl2br($bibtex->bibTex());
  94. * </code>
  95. *
  96. * @category Structures
  97. * @package Structures_BibTex
  98. * @author Elmar Pitschke <elmar.pitschke@gmx.de>
  99. * @copyright 1997-2005 The PHP Group
  100. * @license http://www.php.net/license/3_0.txt PHP License 3.0
  101. * @version Release: @package_version@
  102. * @link http://pear.php.net/Structures/Structure_BibTex
  103. */
  104. class Structures_BibTex
  105. {
  106. /**
  107. * Array with the BibTex Data
  108. *
  109. * @access public
  110. * @var array
  111. */
  112. var $data;
  113. /**
  114. * String with the BibTex content
  115. *
  116. * @access public
  117. * @var string
  118. */
  119. var $content;
  120. /**
  121. * Array with possible Delimiters for the entries
  122. *
  123. * @access private
  124. * @var array
  125. */
  126. var $_delimiters;
  127. /**
  128. * Array to store warnings
  129. *
  130. * @access public
  131. * @var array
  132. */
  133. var $warnings;
  134. /**
  135. * Run-time configuration options
  136. *
  137. * @access private
  138. * @var array
  139. */
  140. var $_options;
  141. /**
  142. * RTF Format String
  143. *
  144. * @access public
  145. * @var string
  146. */
  147. var $rtfstring;
  148. /**
  149. * HTML Format String
  150. *
  151. * @access public
  152. * @var string
  153. */
  154. var $htmlstring;
  155. /**
  156. * Array with the "allowed" types
  157. *
  158. * @access public
  159. * @var array
  160. */
  161. var $allowedTypes;
  162. /**
  163. * Author Format Strings
  164. *
  165. * @access public
  166. * @var string
  167. */
  168. var $authorstring;
  169. /**
  170. * Constructor
  171. *
  172. * @access public
  173. * @return void
  174. */
  175. function Structures_BibTex($options = array())
  176. {
  177. $this->_delimiters = array('"'=>'"',
  178. '{'=>'}');
  179. $this->data = array();
  180. $this->content = '';
  181. //$this->_stripDelimiter = $stripDel;
  182. //$this->_validate = $val;
  183. $this->warnings = array();
  184. $this->_options = array(
  185. 'stripDelimiter' => true,
  186. 'validate' => true,
  187. 'unwrap' => false,
  188. 'wordWrapWidth' => false,
  189. 'wordWrapBreak' => "\n",
  190. 'wordWrapCut' => 0,
  191. 'removeCurlyBraces' => false,
  192. 'extractAuthors' => true,
  193. );
  194. foreach ($options as $option => $value) {
  195. $test = $this->setOption($option, $value);
  196. if (PEAR::isError($test)) {
  197. //Currently nothing is done here, but it could for example raise an warning
  198. }
  199. }
  200. $this->rtfstring = 'AUTHORS, "{\b TITLE}", {\i JOURNAL}, YEAR';
  201. $this->htmlstring = '<tr><td>AUTHORS, "<strong>TITLE</strong>", <em>JOURNAL</em>, VOLUME, PAGES, PUBLISHER, YEAR</th></td>';
  202. $this->allowedTypes = array(
  203. 'article',
  204. 'book',
  205. 'booklet',
  206. 'conference',
  207. 'inbook',
  208. 'incollection',
  209. 'inproceedings',
  210. 'manual',
  211. 'masterthesis',
  212. 'misc',
  213. 'phdthesis',
  214. 'proceedings',
  215. 'techreport',
  216. 'unpublished',
  217. 'xarchive',
  218. 'magazine',
  219. 'patent appl',
  220. 'book',
  221. 'chapter',
  222. 'notes',
  223. 'letter',
  224. 'manuscript'
  225. );
  226. $this->authorstring = 'VON LAST JR, FIRST';
  227. }
  228. /**
  229. * Sets run-time configuration options
  230. *
  231. * @access public
  232. * @param string $option option name
  233. * @param mixed $value value for the option
  234. * @return mixed true on success PEAR_Error on failure
  235. */
  236. function setOption($option, $value)
  237. {
  238. $ret = true;
  239. if (array_key_exists($option, $this->_options)) {
  240. $this->_options[$option] = $value;
  241. } else {
  242. $ret = PEAR::raiseError('Unknown option '.$option);
  243. }
  244. return $ret;
  245. }
  246. /**
  247. * Reads a give BibTex File
  248. *
  249. * @access public
  250. * @param string $filename Name of the file
  251. * @return mixed true on success PEAR_Error on failure
  252. */
  253. function loadFile($filename)
  254. {
  255. if (file_exists($filename)) {
  256. if (($this->content = @file_get_contents($filename)) === false) {
  257. return PEAR::raiseError('Could not open file '.$filename);
  258. } else {
  259. $this->_pos = 0;
  260. $this->_oldpos = 0;
  261. return true;
  262. }
  263. } else {
  264. return PEAR::raiseError('Could not find file '.$filename);
  265. }
  266. }
  267. /**
  268. * Adds to the content string
  269. *
  270. * @access public
  271. * @param string $filename Name of the file
  272. * @return mixed true on success PEAR_Error on failure
  273. */
  274. function addContent($bibstring)
  275. {
  276. $this->content .= $bibstring;
  277. }
  278. /**
  279. * Parses what is stored in content and clears the content if the parsing is successfull.
  280. *
  281. * @access public
  282. * @return boolean true on success and PEAR_Error if there was a problem
  283. */
  284. function parse()
  285. {
  286. //The amount of opening braces is compared to the amount of closing braces
  287. //Braces inside comments are ignored
  288. $this->warnings = array();
  289. $this->data = array();
  290. $valid = true;
  291. $open = 0;
  292. $entry = false;
  293. $char = '';
  294. $lastchar = '';
  295. $buffer = '';
  296. for ($i = 0; $i < strlen($this->content); $i++) {
  297. $char = substr($this->content, $i, 1);
  298. if ((0 != $open) && ('@' == $char)) {
  299. if (!$this->_checkAt($buffer)) {
  300. $this->_generateWarning('WARNING_MISSING_END_BRACE', '', $buffer);
  301. //To correct the data we need to insert a closing brace
  302. $char = '}';
  303. $i--;
  304. }
  305. }
  306. if ((0 == $open) && ('@' == $char)) { //The beginning of an entry
  307. $entry = true;
  308. } elseif ($entry && ('{' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is opening
  309. $open++;
  310. } elseif ($entry && ('}' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is closing
  311. $open--;
  312. if ($open < 0) { //More are closed than opened
  313. $valid = false;
  314. }
  315. if (0 == $open) { //End of entry
  316. $entry = false;
  317. $entrydata = $this->_parseEntry($buffer);
  318. if (!$entrydata) {
  319. /**
  320. * This is not yet used.
  321. * We are here if the Entry is either not correct or not supported.
  322. * But this should already generate a warning.
  323. * Therefore it should not be necessary to do anything here
  324. */
  325. } else {
  326. $this->data[] = $entrydata;
  327. }
  328. $buffer = '';
  329. }
  330. }
  331. if ($entry) { //Inside entry
  332. $buffer .= $char;
  333. }
  334. $lastchar = $char;
  335. }
  336. //If open is one it may be possible that the last ending brace is missing
  337. if (1 == $open) {
  338. $entrydata = $this->_parseEntry($buffer);
  339. if (!$entrydata) {
  340. $valid = false;
  341. } else {
  342. $this->data[] = $entrydata;
  343. $buffer = '';
  344. $open = 0;
  345. }
  346. }
  347. //At this point the open should be zero
  348. if (0 != $open) {
  349. $valid = false;
  350. }
  351. //Are there Multiple entries with the same cite?
  352. if ($this->_options['validate']) {
  353. $cites = array();
  354. foreach ($this->data as $entry) {
  355. $cites[] = $entry['cite'];
  356. }
  357. $unique = array_unique($cites);
  358. if (sizeof($cites) != sizeof($unique)) { //Some values have not been unique!
  359. $notuniques = array();
  360. for ($i = 0; $i < sizeof($cites); $i++) {
  361. if ('' == $unique[$i]) {
  362. $notuniques[] = $cites[$i];
  363. }
  364. }
  365. $this->_generateWarning('WARNING_MULTIPLE_ENTRIES', implode(',',$notuniques));
  366. }
  367. }
  368. if ($valid) {
  369. $this->content = '';
  370. return true;
  371. } else {
  372. return PEAR::raiseError('Unbalanced parenthesis');
  373. }
  374. }
  375. /**
  376. * Extracting the data of one content
  377. *
  378. * The parse function splits the content into its entries.
  379. * Then every entry is parsed by this function.
  380. * It parses the entry backwards.
  381. * First the last '=' is searched and the value extracted from that.
  382. * A copy is made of the entry if warnings should be generated. This takes quite
  383. * some memory but it is needed to get good warnings. If nor warnings are generated
  384. * then you don have to worry about memory.
  385. * Then the last ',' is searched and the field extracted from that.
  386. * Again the entry is shortened.
  387. * Finally after all field=>value pairs the cite and type is extraced and the
  388. * authors are splitted.
  389. * If there is a problem false is returned.
  390. *
  391. * @access private
  392. * @param string $entry The entry
  393. * @return array The representation of the entry or false if there is a problem
  394. */
  395. function _parseEntry($entry)
  396. {
  397. $entrycopy = '';
  398. if ($this->_options['validate']) {
  399. $entrycopy = $entry; //We need a copy for printing the warnings
  400. }
  401. $ret = array();
  402. if ('@string' == strtolower(substr($entry, 0, 7))) {
  403. //String are not yet supported!
  404. if ($this->_options['validate']) {
  405. $this->_generateWarning('STRING_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}');
  406. }
  407. } elseif ('@preamble' == strtolower(substr($entry, 0, 9))) {
  408. //Preamble not yet supported!
  409. if ($this->_options['validate']) {
  410. $this->_generateWarning('PREAMBLE_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}');
  411. }
  412. } else {
  413. //Parsing all fields
  414. while (strrpos($entry,'=') !== false) {
  415. $position = strrpos($entry, '=');
  416. //Checking that the equal sign is not quoted or is not inside a equation (For example in an abstract)
  417. $proceed = true;
  418. if (substr($entry, $position-1, 1) == '\\') {
  419. $proceed = false;
  420. }
  421. if ($proceed) {
  422. $proceed = $this->_checkEqualSign($entry, $position);
  423. }
  424. while (!$proceed) {
  425. $substring = substr($entry, 0, $position);
  426. $position = strrpos($substring,'=');
  427. $proceed = true;
  428. if (substr($entry, $position-1, 1) == '\\') {
  429. $proceed = false;
  430. }
  431. if ($proceed) {
  432. $proceed = $this->_checkEqualSign($entry, $position);
  433. }
  434. }
  435. $value = trim(substr($entry, $position+1));
  436. $entry = substr($entry, 0, $position);
  437. if (',' == substr($value, strlen($value)-1, 1)) {
  438. $value = substr($value, 0, -1);
  439. }
  440. if ($this->_options['validate']) {
  441. $this->_validateValue($value, $entrycopy);
  442. }
  443. if ($this->_options['stripDelimiter']) {
  444. $value = $this->_stripDelimiter($value);
  445. }
  446. if ($this->_options['unwrap']) {
  447. $value = $this->_unwrap($value);
  448. }
  449. if ($this->_options['removeCurlyBraces']) {
  450. $value = $this->_removeCurlyBraces($value);
  451. }
  452. $position = strrpos($entry, ',');
  453. $field = strtolower(trim(substr($entry, $position+1)));
  454. $ret[$field] = $value;
  455. $entry = substr($entry, 0, $position);
  456. }
  457. //Parsing cite and type
  458. $arr = split('{', $entry);
  459. $ret['cite'] = trim($arr[1]);
  460. $ret['type'] = strtolower(trim($arr[0]));
  461. if ('@' == $ret['type']{0}) {
  462. $ret['type'] = substr($ret['type'], 1);
  463. }
  464. if ($this->_options['validate']) {
  465. if (!$this->_checkAllowedType($ret['type'])) {
  466. $this->_generateWarning('WARNING_NOT_ALLOWED_TYPE', $ret['type'], $entry.'}');
  467. }
  468. }
  469. //Handling the authors
  470. if (in_array('author', array_keys($ret)) && $this->_options['extractAuthors']) {
  471. $ret['author'] = $this->_extractAuthors($ret['author']);
  472. }
  473. }
  474. return $ret;
  475. }
  476. /**
  477. * Checking whether the position of the '=' is correct
  478. *
  479. * Sometimes there is a problem if a '=' is used inside an entry (for example abstract).
  480. * This method checks if the '=' is outside braces then the '=' is correct and true is returned.
  481. * If the '=' is inside braces it contains to a equation and therefore false is returned.
  482. *
  483. * @access private
  484. * @param string $entry The text of the whole remaining entry
  485. * @param int the current used place of the '='
  486. * @return bool true if the '=' is correct, false if it contains to an equation
  487. */
  488. function _checkEqualSign($entry, $position)
  489. {
  490. $ret = true;
  491. //This is getting tricky
  492. //We check the string backwards until the position and count the closing an opening braces
  493. //If we reach the position the amount of opening and closing braces should be equal
  494. $length = strlen($entry);
  495. $open = 0;
  496. for ($i = $length-1; $i >= $position; $i--) {
  497. $precedingchar = substr($entry, $i-1, 1);
  498. $char = substr($entry, $i, 1);
  499. if (('{' == $char) && ('\\' != $precedingchar)) {
  500. $open++;
  501. }
  502. if (('}' == $char) && ('\\' != $precedingchar)) {
  503. $open--;
  504. }
  505. }
  506. if (0 != $open) {
  507. $ret = false;
  508. }
  509. //There is still the posibility that the entry is delimited by double quotes.
  510. //Then it is possible that the braces are equal even if the '=' is in an equation.
  511. if ($ret) {
  512. $entrycopy = trim($entry);
  513. $lastchar = $entrycopy{strlen($entrycopy)-1};
  514. if (',' == $lastchar) {
  515. $lastchar = $entrycopy{strlen($entrycopy)-2};
  516. }
  517. if ('"' == $lastchar) {
  518. //The return value is set to false
  519. //If we find the closing " before the '=' it is set to true again.
  520. //Remember we begin to search the entry backwards so the " has to show up twice - ending and beginning delimiter
  521. $ret = false;
  522. $found = 0;
  523. for ($i = $length; $i >= $position; $i--) {
  524. $precedingchar = substr($entry, $i-1, 1);
  525. $char = substr($entry, $i, 1);
  526. if (('"' == $char) && ('\\' != $precedingchar)) {
  527. $found++;
  528. }
  529. if (2 == $found) {
  530. $ret = true;
  531. break;
  532. }
  533. }
  534. }
  535. }
  536. return $ret;
  537. }
  538. /**
  539. * Checking if the type is allowed
  540. *
  541. * @access private
  542. * @param string $entry The entry to check
  543. * @return bool true if allowed, false otherwise
  544. */
  545. function _checkAllowedType($entry)
  546. {
  547. return in_array($entry, $this->allowedTypes);
  548. }
  549. /**
  550. * Checking whether an at is outside an entry
  551. *
  552. * Sometimes an entry misses an entry brace. Then the at of the next entry seems to be
  553. * inside an entry. This is checked here. When it is most likely that the at is an opening
  554. * at of the next entry this method returns true.
  555. *
  556. * @access private
  557. * @param string $entry The text of the entry until the at
  558. * @return bool true if the at is correct, false if the at is likely to begin the next entry.
  559. */
  560. function _checkAt($entry)
  561. {
  562. $ret = false;
  563. $opening = array_keys($this->_delimiters);
  564. $closing = array_values($this->_delimiters);
  565. //Getting the value (at is only allowd in values)
  566. if (strrpos($entry,'=') !== false) {
  567. $position = strrpos($entry, '=');
  568. $proceed = true;
  569. if (substr($entry, $position-1, 1) == '\\') {
  570. $proceed = false;
  571. }
  572. while (!$proceed) {
  573. $substring = substr($entry, 0, $position);
  574. $position = strrpos($substring,'=');
  575. $proceed = true;
  576. if (substr($entry, $position-1, 1) == '\\') {
  577. $proceed = false;
  578. }
  579. }
  580. $value = trim(substr($entry, $position+1));
  581. $open = 0;
  582. $char = '';
  583. $lastchar = '';
  584. for ($i = 0; $i < strlen($value); $i++) {
  585. $char = substr($this->content, $i, 1);
  586. if (in_array($char, $opening) && ('\\' != $lastchar)) {
  587. $open++;
  588. } elseif (in_array($char, $closing) && ('\\' != $lastchar)) {
  589. $open--;
  590. }
  591. $lastchar = $char;
  592. }
  593. //if open is grater zero were are inside an entry
  594. if ($open>0) {
  595. $ret = true;
  596. }
  597. }
  598. return $ret;
  599. }
  600. /**
  601. * Stripping Delimiter
  602. *
  603. * @access private
  604. * @param string $entry The entry where the Delimiter should be stripped from
  605. * @return string Stripped entry
  606. */
  607. function _stripDelimiter($entry)
  608. {
  609. $beginningdels = array_keys($this->_delimiters);
  610. $length = strlen($entry);
  611. $firstchar = substr($entry, 0, 1);
  612. $lastchar = substr($entry, -1, 1);
  613. while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter
  614. if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter
  615. $entry = substr($entry, 1, -1);
  616. } else {
  617. break;
  618. }
  619. $firstchar = substr($entry, 0, 1);
  620. $lastchar = substr($entry, -1, 1);
  621. }
  622. return $entry;
  623. }
  624. /**
  625. * Unwrapping entry
  626. *
  627. * @access private
  628. * @param string $entry The entry to unwrap
  629. * @return string unwrapped entry
  630. */
  631. function _unwrap($entry)
  632. {
  633. $entry = preg_replace('/\s+/', ' ', $entry);
  634. return trim($entry);
  635. }
  636. /**
  637. * Wordwrap an entry
  638. *
  639. * @access private
  640. * @param string $entry The entry to wrap
  641. * @return string wrapped entry
  642. */
  643. function _wordwrap($entry)
  644. {
  645. if ( (''!=$entry) && (is_string($entry)) ) {
  646. $entry = wordwrap($entry, $this->_options['wordWrapWidth'], $this->_options['wordWrapBreak'], $this->_options['wordWrapCut']);
  647. }
  648. return $entry;
  649. }
  650. /**
  651. * Extracting the authors
  652. *
  653. * @access private
  654. * @param string $entry The entry with the authors
  655. * @return array the extracted authors
  656. */
  657. function _extractAuthors($entry) {
  658. $entry = $this->_unwrap($entry);
  659. $authorarray = array();
  660. $authorarray = split(' and ', $entry);
  661. for ($i = 0; $i < sizeof($authorarray); $i++) {
  662. $author = trim($authorarray[$i]);
  663. /*The first version of how an author could be written (First von Last)
  664. has no commas in it*/
  665. $first = '';
  666. $von = '';
  667. $last = '';
  668. $jr = '';
  669. if (strpos($author, ',') === false) {
  670. $tmparray = array();
  671. //$tmparray = explode(' ', $author);
  672. $tmparray = split(' |~', $author);
  673. $size = sizeof($tmparray);
  674. if (1 == $size) { //There is only a last
  675. $last = $tmparray[0];
  676. } elseif (2 == $size) { //There is a first and a last
  677. $first = $tmparray[0];
  678. $last = $tmparray[1];
  679. } else {
  680. $invon = false;
  681. $inlast = false;
  682. for ($j=0; $j<($size-1); $j++) {
  683. if ($inlast) {
  684. $last .= ' '.$tmparray[$j];
  685. } elseif ($invon) {
  686. $case = $this->_determineCase($tmparray[$j]);
  687. if (PEAR::isError($case)) {
  688. // IGNORE?
  689. } elseif ((0 == $case) || (-1 == $case)) { //Change from von to last
  690. //You only change when there is no more lower case there
  691. $islast = true;
  692. for ($k=($j+1); $k<($size-1); $k++) {
  693. $futurecase = $this->_determineCase($tmparray[$k]);
  694. if (PEAR::isError($case)) {
  695. // IGNORE?
  696. } elseif (0 == $futurecase) {
  697. $islast = false;
  698. }
  699. }
  700. if ($islast) {
  701. $inlast = true;
  702. if (-1 == $case) { //Caseless belongs to the last
  703. $last .= ' '.$tmparray[$j];
  704. } else {
  705. $von .= ' '.$tmparray[$j];
  706. }
  707. } else {
  708. $von .= ' '.$tmparray[$j];
  709. }
  710. } else {
  711. $von .= ' '.$tmparray[$j];
  712. }
  713. } else {
  714. $case = $this->_determineCase($tmparray[$j]);
  715. if (PEAR::isError($case)) {
  716. // IGNORE?
  717. } elseif (0 == $case) { //Change from first to von
  718. $invon = true;
  719. $von .= ' '.$tmparray[$j];
  720. } else {
  721. $first .= ' '.$tmparray[$j];
  722. }
  723. }
  724. }
  725. //The last entry is always the last!
  726. $last .= ' '.$tmparray[$size-1];
  727. }
  728. } else { //Version 2 and 3
  729. $tmparray = array();
  730. $tmparray = explode(',', $author);
  731. //The first entry must contain von and last
  732. $vonlastarray = array();
  733. $vonlastarray = explode(' ', $tmparray[0]);
  734. $size = sizeof($vonlastarray);
  735. if (1==$size) { //Only one entry->got to be the last
  736. $last = $vonlastarray[0];
  737. } else {
  738. $inlast = false;
  739. for ($j=0; $j<($size-1); $j++) {
  740. if ($inlast) {
  741. $last .= ' '.$vonlastarray[$j];
  742. } else {
  743. if (0 != ($this->_determineCase($vonlastarray[$j]))) { //Change from von to last
  744. $islast = true;
  745. for ($k=($j+1); $k<($size-1); $k++) {
  746. $this->_determineCase($vonlastarray[$k]);
  747. $case = $this->_determineCase($vonlastarray[$k]);
  748. if (PEAR::isError($case)) {
  749. // IGNORE?
  750. } elseif (0 == $case) {
  751. $islast = false;
  752. }
  753. }
  754. if ($islast) {
  755. $inlast = true;
  756. $last .= ' '.$vonlastarray[$j];
  757. } else {
  758. $von .= ' '.$vonlastarray[$j];
  759. }
  760. } else {
  761. $von .= ' '.$vonlastarray[$j];
  762. }
  763. }
  764. }
  765. $last .= ' '.$vonlastarray[$size-1];
  766. }
  767. //Now we check if it is version three (three entries in the array (two commas)
  768. if (3==sizeof($tmparray)) {
  769. $jr = $tmparray[1];
  770. }
  771. //Everything in the last entry is first
  772. $first = $tmparray[sizeof($tmparray)-1];
  773. }
  774. $authorarray[$i] = array('first'=>trim($first), 'von'=>trim($von), 'last'=>trim($last), 'jr'=>trim($jr));
  775. }
  776. return $authorarray;
  777. }
  778. /**
  779. * Case Determination according to the needs of BibTex
  780. *
  781. * To parse the Author(s) correctly a determination is needed
  782. * to get the Case of a word. There are three possible values:
  783. * - Upper Case (return value 1)
  784. * - Lower Case (return value 0)
  785. * - Caseless (return value -1)
  786. *
  787. * @access private
  788. * @param string $word
  789. * @return int The Case or PEAR_Error if there was a problem
  790. */
  791. function _determineCase($word) {
  792. $ret = -1;
  793. $trimmedword = trim ($word);
  794. /*We need this variable. Without the next of would not work
  795. (trim changes the variable automatically to a string!)*/
  796. if (is_string($word) && (strlen($trimmedword) > 0)) {
  797. $i = 0;
  798. $found = false;
  799. $openbrace = 0;
  800. while (!$found && ($i <= strlen($word))) {
  801. $letter = substr($trimmedword, $i, 1);
  802. $ord = ord($letter);
  803. if ($ord == 123) { //Open brace
  804. $openbrace++;
  805. }
  806. if ($ord == 125) { //Closing brace
  807. $openbrace--;
  808. }
  809. if (($ord>=65) && ($ord<=90) && (0==$openbrace)) { //The first character is uppercase
  810. $ret = 1;
  811. $found = true;
  812. } elseif ( ($ord>=97) && ($ord<=122) && (0==$openbrace) ) { //The first character is lowercase
  813. $ret = 0;
  814. $found = true;
  815. } else { //Not yet found
  816. $i++;
  817. }
  818. }
  819. } else {
  820. $ret = PEAR::raiseError('Could not determine case on word: '.(string)$word);
  821. }
  822. return $ret;
  823. }
  824. /**
  825. * Validation of a value
  826. *
  827. * There may be several problems with the value of a field.
  828. * These problems exist but do not break the parsing.
  829. * If a problem is detected a warning is appended to the array warnings.
  830. *
  831. * @access private
  832. * @param string $entry The entry aka one line which which should be validated
  833. * @param string $wholeentry The whole BibTex Entry which the one line is part of
  834. * @return void
  835. */
  836. function _validateValue($entry, $wholeentry)
  837. {
  838. //There is no @ allowed if the entry is enclosed by braces
  839. if (preg_match('/^{.*@.*}$/', $entry)) {
  840. $this->_generateWarning('WARNING_AT_IN_BRACES', $entry, $wholeentry);
  841. }
  842. //No escaped " allowed if the entry is enclosed by double quotes
  843. if (preg_match('/^\".*\\".*\"$/', $entry)) {
  844. $this->_generateWarning('WARNING_ESCAPED_DOUBLE_QUOTE_INSIDE_DOUBLE_QUOTES', $entry, $wholeentry);
  845. }
  846. //Amount of Braces is not correct
  847. $open = 0;
  848. $lastchar = '';
  849. $char = '';
  850. for ($i = 0; $i < strlen($entry); $i++) {
  851. $char = substr($entry, $i, 1);
  852. if (('{' == $char) && ('\\' != $lastchar)) {
  853. $open++;
  854. }
  855. if (('}' == $char) && ('\\' != $lastchar)) {
  856. $open--;
  857. }
  858. $lastchar = $char;
  859. }
  860. if (0 != $open) {
  861. $this->_generateWarning('WARNING_UNBALANCED_AMOUNT_OF_BRACES', $entry, $wholeentry);
  862. }
  863. }
  864. /**
  865. * Remove curly braces from entry
  866. *
  867. * @access private
  868. * @param string $value The value in which curly braces to be removed
  869. * @param string Value with removed curly braces
  870. */
  871. function _removeCurlyBraces($value)
  872. {
  873. //First we save the delimiters
  874. $beginningdels = array_keys($this->_delimiters);
  875. $firstchar = substr($entry, 0, 1);
  876. $lastchar = substr($entry, -1, 1);
  877. $begin = '';
  878. $end = '';
  879. while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter
  880. if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter
  881. $begin .= $firstchar;
  882. $end .= $lastchar;
  883. $value = substr($value, 1, -1);
  884. } else {
  885. break;
  886. }
  887. $firstchar = substr($value, 0, 1);
  888. $lastchar = substr($value, -1, 1);
  889. }
  890. //Now we get rid of the curly braces
  891. $pattern = '/([^\\\\])\{(.*?[^\\\\])\}/';
  892. $replacement = '$1$2';
  893. $value = preg_replace($pattern, $replacement, $value);
  894. //Reattach delimiters
  895. $value = $begin.$value.$end;
  896. return $value;
  897. }
  898. /**
  899. * Generates a warning
  900. *
  901. * @access private
  902. * @param string $type The type of the warning
  903. * @param string $entry The line of the entry where the warning occurred
  904. * @param string $wholeentry OPTIONAL The whole entry where the warning occurred
  905. */
  906. function _generateWarning($type, $entry, $wholeentry='')
  907. {
  908. $warning['warning'] = $type;
  909. $warning['entry'] = $entry;
  910. $warning['wholeentry'] = $wholeentry;
  911. $this->warnings[] = $warning;
  912. }
  913. /**
  914. * Cleares all warnings
  915. *
  916. * @access public
  917. */
  918. function clearWarnings()
  919. {
  920. $this->warnings = array();
  921. }
  922. /**
  923. * Is there a warning?
  924. *
  925. * @access public
  926. * @return true if there is, false otherwise
  927. */
  928. function hasWarning()
  929. {
  930. if (sizeof($this->warnings)>0) return true;
  931. else return false;
  932. }
  933. /**
  934. * Returns the amount of available BibTex entries
  935. *
  936. * @access public
  937. * @return int The amount of available BibTex entries
  938. */
  939. function amount()
  940. {
  941. return sizeof($this->data);
  942. }
  943. /**
  944. * Returns the author formatted
  945. *
  946. * The Author is formatted as setted in the authorstring
  947. *
  948. * @access private
  949. * @param array $array Author array
  950. * @return string the formatted author string
  951. */
  952. function _formatAuthor($array)
  953. {
  954. if (!array_key_exists('von', $array)) {
  955. $array['von'] = '';
  956. } else {
  957. $array['von'] = trim($array['von']);
  958. }
  959. if (!array_key_exists('last', $array)) {
  960. $array['last'] = '';
  961. } else {
  962. $array['last'] = trim($array['last']);
  963. }
  964. if (!array_key_exists('jr', $array)) {
  965. $array['jr'] = '';
  966. } else {
  967. $array['jr'] = trim($array['jr']);
  968. }
  969. if (!array_key_exists('first', $array)) {
  970. $array['first'] = '';
  971. } else {
  972. $array['first'] = trim($array['first']);
  973. }
  974. $ret = $this->authorstring;
  975. $ret = str_replace("VON", $array['von'], $ret);
  976. $ret = str_replace("LAST", $array['last'], $ret);
  977. $ret = str_replace("JR", $array['jr'], $ret);
  978. $ret = str_replace("FIRST", $array['first'], $ret);
  979. return trim($ret);
  980. }
  981. /**
  982. * Converts the stored BibTex entries to a BibTex String
  983. *
  984. * In the field list, the author is the last field.
  985. *
  986. * @access public
  987. * @return string The BibTex string
  988. */
  989. function bibTex()
  990. {
  991. $bibtex = '';
  992. foreach ($this->data as $entry) {
  993. //Intro
  994. $bibtex .= '@'.strtolower($entry['type']).' { '.$entry['cite'].",\n";
  995. //Other fields except author
  996. foreach ($entry as $key=>$val) {
  997. if ($this->_options['wordWrapWidth']>0) {
  998. $val = $this->_wordWrap($val);
  999. }
  1000. if (!in_array($key, array('cite','type','author'))) {
  1001. $bibtex .= "\t".$key.' = {'.$val."},\n";
  1002. }
  1003. }
  1004. //Author
  1005. if (array_key_exists('author', $entry)) {
  1006. if ($this->_options['extractAuthors']) {
  1007. $tmparray = array(); //In this array the authors are saved and the joind with an and
  1008. foreach ($entry['author'] as $authorentry) {
  1009. $tmparray[] = $this->_formatAuthor($authorentry);
  1010. }
  1011. $author = join(' and ', $tmparray);
  1012. } else {
  1013. $author = $entry['author'];
  1014. }
  1015. } else {
  1016. $author = '';
  1017. }
  1018. $bibtex .= "\tauthor = {".$author."}\n";
  1019. $bibtex.="}\n\n";
  1020. }
  1021. return $bibtex;
  1022. }
  1023. /**
  1024. * Adds a new BibTex entry to the data
  1025. *
  1026. * @access public
  1027. * @param array $newentry The new data to add
  1028. * @return void
  1029. */
  1030. function addEntry($newentry)
  1031. {
  1032. $this->data[] = $newentry;
  1033. }
  1034. /**
  1035. * Returns statistic
  1036. *
  1037. * This functions returns a hash table. The keys are the different
  1038. * entry types and the values are the amount of these entries.
  1039. *
  1040. * @access public
  1041. * @return array Hash Table with the data
  1042. */
  1043. function getStatistic()
  1044. {
  1045. $ret = array();
  1046. foreach ($this->data as $entry) {
  1047. if (array_key_exists($entry['type'], $ret)) {
  1048. $ret[$entry['type']]++;
  1049. } else {
  1050. $ret[$entry['type']] = 1;
  1051. }
  1052. }
  1053. return $ret;
  1054. }
  1055. /**
  1056. * Returns the stored data in RTF format
  1057. *
  1058. * This method simply returns a RTF formatted string. This is done very
  1059. * simple and is not intended for heavy using and fine formatting. This
  1060. * should be done by BibTex! It is intended to give some kind of quick
  1061. * preview or to send someone a reference list as word/rtf format (even
  1062. * some people in the scientific field still use word). If you want to
  1063. * change the default format you have to override the class variable
  1064. * "rtfstring". This variable is used and the placeholders simply replaced.
  1065. * Lines with no data cause an warning!
  1066. *
  1067. * @return string the RTF Strings
  1068. */
  1069. function rtf()
  1070. {
  1071. $ret = "{\\rtf\n";
  1072. foreach ($this->data as $entry) {
  1073. $line = $this->rtfstring;
  1074. $title = '';
  1075. $journal = '';
  1076. $year = '';
  1077. $authors = '';
  1078. if (array_key_exists('title', $entry)) {
  1079. $title = $this->_unwrap($entry['title']);
  1080. }
  1081. if (array_key_exists('journal', $entry)) {
  1082. $journal = $this->_unwrap($entry['journal']);
  1083. }
  1084. if (array_key_exists('year', $entry)) {
  1085. $year = $this->_unwrap($entry['year']);
  1086. }
  1087. if (array_key_exists('author', $entry)) {
  1088. if ($this->_options['extractAuthors']) {
  1089. $tmparray = array(); //In this array the authors are saved and the joind with an and
  1090. foreach ($entry['author'] as $authorentry) {
  1091. $tmparray[] = $this->_formatAuthor($authorentry);
  1092. }
  1093. $authors = join(', ', $tmparray);
  1094. } else {
  1095. $authors = $entry['author'];
  1096. }
  1097. }
  1098. if ((''!=$title) || (''!=$journal) || (''!=$year) || (''!=$authors)) {
  1099. $line = str_replace("TITLE", $title, $line);
  1100. $line = str_replace("JOURNAL", $journal, $line);
  1101. $line = str_replace("YEAR", $year, $line);
  1102. $line = str_replace("AUTHORS", $authors, $line);
  1103. $line .= "\n\\par\n";
  1104. $ret .= $line;
  1105. } else {
  1106. $this->_generateWarning('WARNING_LINE_WAS_NOT_CONVERTED', '', print_r($entry,1));
  1107. }
  1108. }
  1109. $ret .= '}';
  1110. return $ret;
  1111. }
  1112. /**
  1113. * Returns the stored data in HTML format
  1114. *
  1115. * This method simply returns a HTML formatted string. This is done very
  1116. * simple and is not intended for heavy using and fine formatting. This
  1117. * should be done by BibTex! It is intended to give some kind of quick
  1118. * preview. If you want to change the default format you have to override
  1119. * the class variable "htmlstring". This variable is used and the placeholders
  1120. * simply replaced.
  1121. * Lines with no data cause an warning!
  1122. *
  1123. * @return string the HTML Strings
  1124. */
  1125. function html()
  1126. {
  1127. $ret = "<table border='0' padding='1px' class='prettytable'>";
  1128. foreach ($this->data as $entry) {
  1129. $line = $this->htmlstring;
  1130. $title = '';
  1131. $journal = '';
  1132. $year = '';
  1133. $authors = '';
  1134. if (array_key_exists('title', $entry)) {
  1135. $title = $this->_unwrap($entry['title']);
  1136. }
  1137. if (array_key_exists('journal', $entry)) {
  1138. $journal = $this->_unwrap($entry['journal']);
  1139. }
  1140. if (array_key_exists('year', $entry)) {
  1141. $year = $this->_unwrap($entry['year']);
  1142. }
  1143. if (array_key_exists('volume', $entry)) {
  1144. $volume = $this->_unwrap($entry['volume']);
  1145. }
  1146. if (array_key_exists('pages', $entry)) {
  1147. $pages = $this->_unwrap($entry['pages']);
  1148. }
  1149. if (array_key_exists('publisher', $entry)) {
  1150. $publisher = $this->_unwrap($entry['publisher']);
  1151. }
  1152. if (array_key_exists('author', $entry)) {
  1153. if ($this->_options['extractAuthors']) {
  1154. $tmparray = array(); //In this array the authors are saved and the joind with an and
  1155. foreach ($entry['author'] as $authorentry) {
  1156. $tmparray[] = $this->_formatAuthor($authorentry);
  1157. }
  1158. $authors = join(', ', $tmparray);
  1159. } else {
  1160. $authors = $entry['author'];
  1161. }
  1162. }
  1163. if ((''!=$title) || (''!=$journal) || (''!=$year) || (''!=$authors)) {
  1164. $line = str_replace("TITLE", $title, $line);
  1165. if (array_key_exists('journal', $entry)){
  1166. $line = str_replace("JOURNAL", $journal, $line);
  1167. }else {
  1168. $line = str_replace("<em>JOURNAL</em>,", '', $line);
  1169. }
  1170. $line = str_replace("YEAR", $year, $line);
  1171. $line = str_replace("AUTHORS", $authors, $line);
  1172. if (array_key_exists('pages', $entry)){
  1173. $line = str_replace("PAGES", $pages, $line);
  1174. }else {
  1175. $line = str_replace("PAGES,", '', $line);
  1176. }
  1177. if (array_key_exists('volume', $entry)){
  1178. $line = str_replace("VOLUME", $volume, $line);
  1179. }else {
  1180. $line = str_replace("VOLUME,", '', $line);
  1181. }
  1182. if (array_key_exists('publisher', $entry)){
  1183. $line = str_replace("PUBLISHER", $publisher, $line);
  1184. }else {
  1185. $line = str_replace("PUBLISHER,", '', $line);
  1186. }
  1187. $ret .= $line;
  1188. } else {
  1189. $this->_generateWarning('WARNING_LINE_WAS_NOT_CONVERTED', '', print_r($entry,1));
  1190. }
  1191. }
  1192. $ret .= "</table>";
  1193. return $ret;
  1194. }
  1195. }
  1196. ?>