PageRenderTime 53ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/task/import/csvImportTask.class.php

https://github.com/cceh/atom
PHP | 1195 lines | 946 code | 123 blank | 126 comment | 109 complexity | f46a10c5e46844c4f6e39f6d8ddfe701 MD5 | raw file
Possible License(s): CC-BY-3.0, AGPL-3.0, MIT, ISC
  1. <?php
  2. /*
  3. * This file is part of the Access to Memory (AtoM) software.
  4. *
  5. * Access to Memory (AtoM) is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * Access to Memory (AtoM) is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with Access to Memory (AtoM). If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /**
  19. * Import csv data
  20. *
  21. * @package symfony
  22. * @subpackage task
  23. * @author Mike Cantelon <mike@artefactual.com>
  24. */
  25. class csvImportTask extends csvImportBaseTask
  26. {
  27. protected $namespace = 'csv';
  28. protected $name = 'import';
  29. protected $briefDescription = 'Import csv information object data';
  30. protected $detailedDescription = <<<EOF
  31. Import CSV data
  32. EOF;
  33. /**
  34. * @see sfTask
  35. */
  36. protected function configure()
  37. {
  38. parent::configure();
  39. $this->addOptions(array(
  40. new sfCommandOption(
  41. 'source-name',
  42. null,
  43. sfCommandOption::PARAMETER_OPTIONAL,
  44. 'Source name to use when inserting keymap entries.'
  45. ),
  46. new sfCommandOption(
  47. 'default-parent-slug',
  48. null,
  49. sfCommandOption::PARAMETER_OPTIONAL,
  50. 'Parent slug under which imported items, with no parent specified, will be added.'
  51. ),
  52. new sfCommandOption(
  53. 'default-legacy-parent-id',
  54. null,
  55. sfCommandOption::PARAMETER_OPTIONAL,
  56. 'Legacy parent ID under which imported items, with no parent specified, will be added.'
  57. ),
  58. new sfCommandOption(
  59. 'skip-nested-set-build',
  60. null,
  61. sfCommandOption::PARAMETER_NONE,
  62. "Don't build the nested set upon import completion."
  63. ),
  64. new sfCommandOption(
  65. 'index',
  66. null,
  67. sfCommandOption::PARAMETER_NONE,
  68. "Index for search during import."
  69. )
  70. ));
  71. }
  72. /**
  73. * @see sfTask
  74. */
  75. public function execute($arguments = array(), $options = array())
  76. {
  77. $configuration = ProjectConfiguration::getApplicationConfiguration('qubit', 'test', false);
  78. $sf_context = sfContext::createInstance($configuration);
  79. $this->validateOptions($options);
  80. $skipRows = ($options['skip-rows']) ? $options['skip-rows'] : 0;
  81. // source name can be specified so, if importing from multiple
  82. // sources, you can accommodate legacy ID collisions in files
  83. // you import from different places
  84. $sourceName = ($options['source-name'])
  85. ? $options['source-name']
  86. : basename($arguments['filename']);
  87. if (false === $fh = fopen($arguments['filename'], 'rb'))
  88. {
  89. throw new sfException('You must specify a valid filename');
  90. }
  91. $databaseManager = new sfDatabaseManager($this->configuration);
  92. $conn = $databaseManager->getDatabase('propel')->getConnection();
  93. // set default publication status
  94. $results = $conn->query('SELECT i18n.value
  95. FROM setting INNER JOIN setting_i18n i18n ON setting.id = i18n.id
  96. WHERE setting.name=\'defaultPubStatus\'');
  97. if ($results)
  98. {
  99. $defaultStatusId = $results->fetchColumn();
  100. }
  101. else
  102. {
  103. $defaultStatusId = QubitTerm::PUBLICATION_STATUS_PUBLISHED_ID;
  104. }
  105. // TODO: this may be unnecessary as it may now be part of Qubit trunk
  106. // create note term if it doesn't yet exist
  107. QubitFlatfileImport::createOrFetchTerm(
  108. QubitTaxonomy::NOTE_TYPE_ID,
  109. 'Language note'
  110. );
  111. QubitFlatfileImport::createOrFetchTerm(
  112. QubitTaxonomy::RAD_NOTE_ID,
  113. 'Cast note'
  114. );
  115. QubitFlatfileImport::createOrFetchTerm(
  116. QubitTaxonomy::RAD_NOTE_ID,
  117. 'Credits note'
  118. );
  119. QubitFlatfileImport::createOrFetchTerm(
  120. QubitTaxonomy::RAD_NOTE_ID,
  121. 'Signatures note'
  122. );
  123. // Load taxonomies into variables to avoid use of magic numbers
  124. $termData = QubitFlatfileImport::loadTermsFromTaxonomies(array(
  125. QubitTaxonomy::DESCRIPTION_STATUS_ID => 'descriptionStatusTypes',
  126. QubitTaxonomy::PUBLICATION_STATUS_ID => 'pubStatusTypes',
  127. QubitTaxonomy::DESCRIPTION_DETAIL_LEVEL_ID => 'levelOfDetailTypes',
  128. QubitTaxonomy::NOTE_TYPE_ID => 'noteTypes',
  129. QubitTaxonomy::RAD_NOTE_ID => 'radNoteTypes',
  130. QubitTaxonomy::RAD_TITLE_NOTE_ID => 'titleNoteTypes',
  131. QubitTaxonomy::MATERIAL_TYPE_ID => 'materialTypes',
  132. QubitTaxonomy::RIGHT_ACT_ID => 'copyrightActTypes',
  133. QubitTaxonomy::COPYRIGHT_STATUS_ID => 'copyrightStatusTypes',
  134. QubitTaxonomy::PHYSICAL_OBJECT_TYPE_ID => 'physicalObjectTypes'
  135. ));
  136. // Allow default parent ID to be overridden by CLI options
  137. if ($options['default-parent-slug'])
  138. {
  139. $defaultParentId = getIdCorrespondingToSlug($options['default-parent-slug']);
  140. if (!$options['quiet'])
  141. {
  142. print 'Parent ID of slug "'. $options['default-parent-slug'] .'" is '. $defaultParentId;
  143. }
  144. } else if($options['default-legacy-parent-id']) {
  145. // attempt to fetch keymap entry
  146. $keyMapEntry = QubitFlatfileImport::fetchKeymapEntryBySourceAndTargetName(
  147. $options['default-legacy-parent-id'],
  148. $sourceName,
  149. 'information_object'
  150. );
  151. if ($keyMapEntry)
  152. {
  153. $defaultParentId = $keyMapEntry->target_id;
  154. } else {
  155. throw new sfException('Could not find Qubit ID corresponding to legacy ID.');
  156. }
  157. print 'Using default parent ID '. $defaultParentId .' (legacy parent ID '. $options['default-legacy-parent-id'] .")\n";
  158. } else {
  159. $defaultParentId = QubitInformationObject::ROOT_ID;
  160. }
  161. // Define import
  162. $import = new QubitFlatfileImport(array(
  163. /* Pass context */
  164. 'context' => sfContext::createInstance($this->configuration),
  165. /* What type of object are we importing? */
  166. 'className' => 'QubitInformationObject',
  167. /* Allow silencing of progress info */
  168. 'displayProgress' => ($options['quiet']) ? false : true,
  169. /* How many rows should import until we display an import status update? */
  170. 'rowsUntilProgressDisplay' => $options['rows-until-update'],
  171. /* Where to log errors to */
  172. 'errorLog' => $options['error-log'],
  173. /* the status array is a place to put data that should be accessible
  174. from closure logic using the getStatus method */
  175. 'status' => array(
  176. 'options' => $options,
  177. 'sourceName' => $sourceName,
  178. 'defaultParentId' => $defaultParentId,
  179. 'copyrightStatusTypes' => $termData['copyrightStatusTypes'],
  180. 'copyrightActTypes' => $termData['copyrightActTypes'],
  181. 'defaultStatusId' => $defaultStatusId,
  182. 'descriptionStatusTypes' => $termData['descriptionStatusTypes'],
  183. 'pubStatusTypes' => $termData['pubStatusTypes'],
  184. 'levelOfDetailTypes' => $termData['levelOfDetailTypes'],
  185. 'materialTypes' => $termData['materialTypes'],
  186. 'physicalObjectTypes' => $termData['physicalObjectTypes']
  187. ),
  188. /* import columns that map directory to QubitInformationObject properties */
  189. 'standardColumns' => array(
  190. 'updatedAt',
  191. 'createdAt',
  192. 'accessConditions',
  193. 'accruals',
  194. 'acquisition',
  195. 'alternateTitle',
  196. 'appraisal',
  197. 'archivalHistory',
  198. 'arrangement',
  199. 'culture',
  200. 'descriptionIdentifier',
  201. 'extentAndMedium',
  202. 'findingAids',
  203. 'identifier',
  204. 'locationOfCopies',
  205. 'locationOfOriginals',
  206. 'physicalCharacteristics',
  207. 'relatedUnitsOfDescription',
  208. 'reproductionConditions',
  209. 'revisionHistory',
  210. 'rules',
  211. 'scopeAndContent',
  212. 'sources',
  213. 'title'
  214. ),
  215. /* import columns that should be redirected to QubitInformationObject
  216. properties (and optionally transformed)
  217. Example:
  218. 'columnMap' => array(
  219. 'Archival History' => 'archivalHistory',
  220. 'Revision history' => array(
  221. 'column' => 'revision',
  222. 'transformationLogic' => function(&$self, $text)
  223. {
  224. return $self->appendWithLineBreakIfNeeded(
  225. $self->object->revision,
  226. $text
  227. );
  228. }
  229. )
  230. ),
  231. */
  232. 'columnMap' => array(
  233. 'radEdition' => 'edition',
  234. 'institutionIdentifier' => 'institutionResponsibleIdentifier'
  235. ),
  236. /* import columns that can be added using the
  237. QubitInformationObject::addProperty method */
  238. 'propertyMap' => array(
  239. 'radOtherTitleInformation' => 'otherTitleInformation',
  240. 'radTitleStatementOfResponsibility' => 'titleStatementOfResponsibility',
  241. 'radStatementOfProjection' => 'statementOfProjection',
  242. 'radStatementOfCoordinates' => 'statementOfCoordinates',
  243. 'radStatementOfScaleArchitectural' => 'statementOfScaleArchitectural',
  244. 'radStatementOfScaleCartographic' => 'statementOfScaleCartographic',
  245. 'radPublishersSeriesNote' => 'noteOnPublishersSeries',
  246. 'radIssuingJurisdiction' => 'issuingJurisdictionAndDenomination',
  247. 'radEditionStatementOfResponsibility' => 'editionStatementOfResponsibility',
  248. 'radTitleProperOfPublishersSeries' => 'titleProperOfPublishersSeries',
  249. 'radParallelTitlesOfPublishersSeries' => 'parallelTitleOfPublishersSeries',
  250. 'radOtherTitleInformationOfPublishersSeries' => 'otherTitleInformationOfPublishersSeries',
  251. 'radStatementOfResponsibilityRelatingToPublishersSeries' => 'statementOfResponsibilityRelatingToPublishersSeries',
  252. 'radNumberingWithinPublishersSeries' => 'numberingWithinPublishersSeries',
  253. 'radStandardNumber' => 'standardNumber'
  254. ),
  255. /* import columns that can be added as QubitNote objects */
  256. 'noteMap' => array(
  257. 'languageNote' => array(
  258. 'typeId' => array_search('Language note', $termData['noteTypes'])
  259. ),
  260. 'publicationNote' => array(
  261. 'typeId' => array_search('Publication note', $termData['noteTypes'])
  262. ),
  263. 'generalNote' => array(
  264. 'typeId' => array_search('General note', $termData['noteTypes'])
  265. ),
  266. 'archivistNote' => array(
  267. 'typeId' => array_search("Archivist's note", $termData['noteTypes'])
  268. ),
  269. 'radNoteCast' => array(
  270. 'typeId' => array_search('Cast note', $termData['radNoteTypes'])
  271. ),
  272. 'radNoteCredits' => array(
  273. 'typeId' => array_search('Credits note', $termData['radNoteTypes'])
  274. ),
  275. 'radNoteSignaturesInscriptions' => array(
  276. 'typeId' => array_search('Signatures note', $termData['radNoteTypes'])
  277. ),
  278. 'radNoteConservation' => array(
  279. 'typeId' => array_search('Conservation', $termData['radNoteTypes'])
  280. ),
  281. 'radNoteGeneral' => array(
  282. 'typeId' => array_search('General note', $termData['radNoteTypes'])
  283. ),
  284. 'radNotePhysicalDescription' => array(
  285. 'typeId' => array_search('Physical description', $termData['radNoteTypes'])
  286. ),
  287. 'radNotePublishersSeries' => array(
  288. 'typeId' => array_search("Publisher's series", $termData['radNoteTypes'])
  289. ),
  290. 'radNoteRights' => array(
  291. 'typeId' => array_search('Rights', $termData['radNoteTypes'])
  292. ),
  293. 'radNoteAccompanyingMaterial' => array(
  294. 'typeId' => array_search('Accompanying material', $termData['radNoteTypes'])
  295. ),
  296. 'radNoteAlphaNumericDesignation' => array(
  297. 'typeId' => array_search('Alpha-numeric designations', $termData['radNoteTypes'])
  298. ),
  299. 'radNoteEdition' => array(
  300. 'typeId' => array_search('Edition', $termData['radNoteTypes'])
  301. ),
  302. 'radTitleStatementOfResponsibilityNote' => array(
  303. 'typeId' => array_search('Statements of responsibility', $termData['titleNoteTypes'])
  304. ),
  305. 'radTitleParallelTitles' => array(
  306. 'typeId' => array_search('Parallel titles and other title information', $termData['titleNoteTypes'])
  307. ),
  308. 'radTitleSourceOfTitleProper' => array(
  309. 'typeId' => array_search('Source of title proper', $termData['titleNoteTypes'])
  310. ),
  311. 'radTitleVariationsInTitle' => array(
  312. 'typeId' => array_search('Variations in title', $termData['titleNoteTypes'])
  313. ),
  314. 'radTitleAttributionsAndConjectures' => array(
  315. 'typeId' => array_search('Attributions and conjectures', $termData['titleNoteTypes'])
  316. ),
  317. 'radTitleContinues' => array(
  318. 'typeId' => array_search('Continuation of title', $termData['titleNoteTypes'])
  319. ),
  320. 'radTitleNoteContinuationOfTitle' => array(
  321. 'typeId' => array_search('Continuation of title', $termData['titleNoteTypes'])
  322. )
  323. ),
  324. /* these values get stored to the rowStatusVars array */
  325. 'variableColumns' => array(
  326. 'legacyId',
  327. 'parentId',
  328. 'copyrightStatus',
  329. 'copyrightExpires',
  330. 'copyrightHolder',
  331. 'qubitParentSlug',
  332. 'descriptionStatus',
  333. 'publicationStatus',
  334. 'levelOfDetail',
  335. 'repository',
  336. 'language',
  337. 'script',
  338. 'languageOfDescription',
  339. 'scriptOfDescription',
  340. 'physicalObjectName',
  341. 'physicalObjectLocation',
  342. 'physicalObjectType',
  343. 'physicalStorageLocation',
  344. 'digitalObjectPath',
  345. 'digitalObjectURI'
  346. ),
  347. /* these values get exploded and stored to the rowStatusVars array */
  348. 'arrayColumns' => array(
  349. 'accessionNumber' => '|',
  350. 'creators' => '|',
  351. 'creatorHistories' => '|',
  352. // TODO: the creatorDates* columns should be depricated in favor of
  353. // a separate event import
  354. 'creatorDates' => '|',
  355. 'creatorDateNotes' => '|',
  356. 'creatorDatesStart' => '|',
  357. 'creatorDatesEnd' => '|',
  358. 'nameAccessPoints' => '|',
  359. 'nameAccessPointHistories' => '|',
  360. 'placeAccessPoints' => '|',
  361. 'placeAccessPointHistories' => '|', // not yet implemented
  362. 'subjectAccessPoints' => '|',
  363. 'subjectAccessPointScopes' => '|', // not yet implemented
  364. 'eventActors' => '|',
  365. 'eventTypes' => '|',
  366. 'eventPlaces' => '|',
  367. 'eventDates' => '|',
  368. 'eventStartDates' => '|',
  369. 'eventEndDates' => '|',
  370. 'eventDescriptions' => '|'
  371. ),
  372. /* import logic to execute before saving information object */
  373. 'preSaveLogic' => function(&$self)
  374. {
  375. // set repository
  376. if (
  377. isset($self->rowStatusVars['repository'])
  378. && $self->rowStatusVars['repository']
  379. )
  380. {
  381. $repository = $self->createOrFetchRepository($self->rowStatusVars['repository']);
  382. $self->object->repositoryId = $repository->id;
  383. }
  384. // set level of detail
  385. if (isset($self->rowStatusVars['levelOfDetail'])
  386. && 0 < strlen($self->rowStatusVars['levelOfDetail']))
  387. {
  388. $levelOfDetailTermId = array_search(
  389. (trim($self->rowStatusVars['levelOfDetail'])) ? $self->rowStatusVars['levelOfDetail'] : 'Full',
  390. $self->status['levelOfDetailTypes']
  391. );
  392. $self->object->descriptionDetailId = $levelOfDetailTermId;
  393. }
  394. // storage language-related properties as serialized data
  395. $languageProperties = array(
  396. 'language',
  397. 'script',
  398. 'languageOfDescription',
  399. 'scriptOfDescription'
  400. );
  401. foreach($languageProperties as $serializeProperty)
  402. {
  403. if (isset($self->rowStatusVars[$serializeProperty])
  404. && 0 < strlen($self->rowStatusVars[$serializeProperty]))
  405. {
  406. $data = explode('|', $self->rowStatusVars[$serializeProperty]);
  407. $self->object->addProperty(
  408. $serializeProperty,
  409. serialize($data)
  410. );
  411. }
  412. }
  413. // set description status
  414. if (isset($self->rowStatusVars['descriptionStatus'])
  415. && 0 < strlen($self->rowStatusVars['descriptionStatus']))
  416. {
  417. $statusTermId = array_search(
  418. trim($self->rowStatusVars['descriptionStatus']),
  419. $self->status['descriptionStatusTypes']
  420. );
  421. if (false !== $statusTermId)
  422. {
  423. $self->object->descriptionStatusId = $statusTermId;
  424. }
  425. else
  426. {
  427. printf("Row %s: Invalid description status '%s', using null instead\n",
  428. $self->getStatus('rows') + 1,
  429. trim($self->rowStatusVars['descriptionStatus']));
  430. }
  431. }
  432. // set publication status
  433. if (isset($self->rowStatusVars['publicationStatus'])
  434. && 0 < strlen($self->rowStatusVars['publicationStatus']))
  435. {
  436. $pubStatusTermId = array_search_case_insensitive(
  437. $self->rowStatusVars['publicationStatus'],
  438. $self->status['pubStatusTypes']
  439. );
  440. if (!$pubStatusTermId)
  441. {
  442. print "Publication status: '". $self->rowStatusVars['publicationStatus'] ."' is invalid. Using default.\n";
  443. $pubStatusTermId = $self->status['defaultStatusId'];
  444. }
  445. } else {
  446. $pubStatusTermId = $self->status['defaultStatusId'];
  447. }
  448. $self->object->setPublicationStatus($pubStatusTermId);
  449. if (
  450. isset($self->rowStatusVars['qubitParentSlug'])
  451. && $self->rowStatusVars['qubitParentSlug']
  452. )
  453. {
  454. $parentId = getIdCorrespondingToSlug($self->rowStatusVars['qubitParentSlug']);
  455. } else {
  456. if (!isset($self->rowStatusVars['parentId']) || !$self->rowStatusVars['parentId'])
  457. {
  458. // Don't overwrite valid parentId when adding an i18n row
  459. if (!isset($self->object->parentId))
  460. {
  461. $parentId = $self->status['defaultParentId'];
  462. }
  463. } else {
  464. if ($mapEntry = $self->fetchKeymapEntryBySourceAndTargetName(
  465. $self->rowStatusVars['parentId'],
  466. $self->getStatus('sourceName'),
  467. 'information_object'
  468. ))
  469. {
  470. $parentId = $mapEntry->target_id;
  471. } else {
  472. $error = 'For legacyId '
  473. . $self->rowStatusVars['legacyId']
  474. .' Could not find parentId '
  475. . $self->rowStatusVars['parentId']
  476. .' in key_map table';
  477. print $self->logError($error);
  478. }
  479. }
  480. }
  481. if (isset($parentId))
  482. {
  483. $self->object->parentId = $parentId;
  484. }
  485. },
  486. /* import logic to execute after saving information object */
  487. 'postSaveLogic' => function(&$self)
  488. {
  489. if (!$self->object->id)
  490. {
  491. throw new sfException('Information object save failed');
  492. } else {
  493. // add keymap entry
  494. $keymap = new QubitKeymap;
  495. $keymap->sourceId = $self->rowStatusVars['legacyId'];
  496. $keymap->sourceName = $self->getStatus('sourceName');
  497. $keymap->targetId = $self->object->id;
  498. $keymap->targetName = 'information_object';
  499. $keymap->save();
  500. // add physical objects
  501. if (
  502. isset($self->rowStatusVars['physicalObjectName'])
  503. && $self->rowStatusVars['physicalObjectName']
  504. )
  505. {
  506. $names = explode('|', $self->rowStatusVars['physicalObjectName']);
  507. $locations = explode('|', $self->rowStatusVars['physicalObjectLocation']);
  508. $types = (isset($self->rowStatusVars['physicalObjectType']))
  509. ? explode('|', $self->rowStatusVars['physicalObjectType'])
  510. : array();
  511. foreach($names as $index => $name)
  512. {
  513. // if location column populated
  514. if ($self->rowStatusVars['physicalObjectLocation'])
  515. {
  516. // if current index applicable
  517. if (isset($locations[$index]))
  518. {
  519. $location = $locations[$index];
  520. } else {
  521. $location = $locations[0];
  522. }
  523. } else {
  524. $location = '';
  525. }
  526. // if object type column populated
  527. if ($self->rowStatusVars['physicalObjectType'])
  528. {
  529. // if current index applicable
  530. if (isset($types[$index]))
  531. {
  532. $type = $types[$index];
  533. } else {
  534. $type = $types[0];
  535. }
  536. } else {
  537. $type = 'Box';
  538. }
  539. $physicalObjectTypeId = array_search(
  540. $type,
  541. $self->getStatus('physicalObjectTypes')
  542. );
  543. // Create new physical object type if not found
  544. if ($physicalObjectTypeId === false)
  545. {
  546. $newType = new QubitTerm;
  547. $newType->name = $type;
  548. $newType->culture = isset($self->object->culture) ? $self->object->culture : 'en';
  549. $newType->taxonomyId = QubitTaxonomy::PHYSICAL_OBJECT_TYPE_ID;
  550. $newType->parentId = QubitTerm::ROOT_ID;
  551. $newType->save();
  552. $physicalObjectTypeId = $newType->id;
  553. }
  554. $container = $self->createOrFetchPhysicalObject(
  555. $name,
  556. $location,
  557. $physicalObjectTypeId
  558. );
  559. // associate container with information object
  560. $self->createRelation(
  561. $container->id,
  562. $self->object->id,
  563. QubitTerm::HAS_PHYSICAL_OBJECT_ID
  564. );
  565. }
  566. }
  567. // add subject access points
  568. $accessPointColumns = array(
  569. 'subjectAccessPoints' => QubitTaxonomy::SUBJECT_ID,
  570. 'placeAccessPoints' => QubitTaxonomy::PLACE_ID,
  571. );
  572. foreach($accessPointColumns as $columnName => $taxonomyId)
  573. {
  574. if (isset($self->rowStatusVars[$columnName]))
  575. {
  576. $index = 0;
  577. foreach($self->rowStatusVars[$columnName] as $subject)
  578. {
  579. if ($subject)
  580. {
  581. $scope = false;
  582. if (isset($self->rowStatusVars['subjectAccessPointScopes'][$index]))
  583. {
  584. $scope = $self->rowStatusVars['subjectAccessPointScopes'][$index];
  585. }
  586. $self->createAccessPoint($taxonomyId, $subject);
  587. if ($scope)
  588. {
  589. // get term ID
  590. $query = "SELECT t.id FROM term t \r
  591. INNER JOIN term_i18n i ON t.id=i.id \r
  592. WHERE i.name=? AND t.taxonomy_id=? AND culture='en'";
  593. $statement = QubitFlatfileImport::sqlQuery(
  594. $query,
  595. array($subject, $taxonomyId)
  596. );
  597. $result = $statement->fetch(PDO::FETCH_OBJ);
  598. if ($result)
  599. {
  600. $termId = $result->id;
  601. // check if a scope note already exists for this term
  602. $query = "SELECT n.id FROM note n INNER JOIN note_i18n i ON n.id=i.id WHERE n.object_id=? AND n.type_id=?";
  603. $statement = QubitFlatfileImport::sqlQuery(
  604. $query,
  605. array($termId, QubitTerm::SCOPE_NOTE_ID)
  606. );
  607. $result = $statement->fetch(PDO::FETCH_OBJ);
  608. if (!$result)
  609. {
  610. // add scope note if it doesn't exist
  611. $note = new QubitNote;
  612. $note->objectId = $termId;
  613. $note->typeId = QubitTerm::SCOPE_NOTE_ID;
  614. $note->content = $self->content($scope);
  615. $note->scope = 'QubitTerm'; # not sure if this is needed
  616. $note->save();
  617. }
  618. } else {
  619. throw new sfException('Could not find term "'. $subject .'"');
  620. }
  621. }
  622. }
  623. $index++;
  624. }
  625. }
  626. }
  627. // add name access points
  628. if (isset($self->rowStatusVars['nameAccessPoints']))
  629. {
  630. // add name access points
  631. $index = 0;
  632. foreach($self->rowStatusVars['nameAccessPoints'] as $name)
  633. {
  634. // skip blank names
  635. if ($name)
  636. {
  637. $actorOptions = array();
  638. if (isset($self->rowStatusVars['nameAccessPointHistories'][$index]))
  639. {
  640. $actorOptions['history'] = $self->rowStatusVars['nameAccessPointHistories'][$index];
  641. }
  642. if (isset($self->object->repositoryId))
  643. {
  644. $actorOptions['repositoryId'] = $self->object->repositoryId;
  645. }
  646. $actor = $self->createOrFetchActor($name, $actorOptions);
  647. $self->createRelation($self->object->id, $actor->id, QubitTerm::NAME_ACCESS_POINT_ID);
  648. }
  649. $index++;
  650. }
  651. }
  652. // add accessions
  653. if (
  654. isset($self->rowStatusVars['accessionNumber'])
  655. && count($self->rowStatusVars['accessionNumber'])
  656. )
  657. {
  658. foreach($self->rowStatusVars['accessionNumber'] as $accessionNumber)
  659. {
  660. // attempt to fetch keymap entry
  661. $accessionMapEntry = $self->fetchKeymapEntryBySourceAndTargetName(
  662. $accessionNumber,
  663. $self->getStatus('sourceName'),
  664. 'accession'
  665. );
  666. // if no entry found, create accession and entry
  667. if (!$accessionMapEntry)
  668. {
  669. print "\nCreating accession # ". $accessionNumber ."\n";
  670. // create new accession
  671. $accession = new QubitAccession;
  672. $accession->identifier = $accessionNumber;
  673. $accession->save();
  674. // create keymap entry for accession
  675. $keymap = new QubitKeymap;
  676. $keymap->sourceId = $accessionNumber;
  677. $keymap->sourceName = $self->getStatus('sourceName');
  678. $keymap->targetId = $accession->id;
  679. $keymap->targetName = 'accession';
  680. $keymap->save();
  681. $accessionId = $accession->id;
  682. } else {
  683. $accessionId = $accessionMapEntry->target_id;
  684. }
  685. print "\nAssociating accession # ". $accessionNumber ." with ". $self->object->title ."\n";
  686. // add relationship between information object and accession
  687. $self->createRelation($self->object->id, $accessionId, QubitTerm::ACCESSION_ID);
  688. }
  689. }
  690. // add material-related term relation
  691. if (isset($self->rowStatusVars['radGeneralMaterialDesignation']))
  692. {
  693. foreach($self->rowStatusVars['radGeneralMaterialDesignation'] as $material)
  694. {
  695. $self->createObjectTermRelation(
  696. $self->object->id,
  697. $material
  698. );
  699. }
  700. }
  701. // add copyright info
  702. // TODO: handle this via a separate import
  703. if (isset($self->rowStatusVars['copyrightStatus']) && $self->rowStatusVars['copyrightStatus'])
  704. {
  705. switch (strtolower($self->rowStatusVars['copyrightStatus']))
  706. {
  707. case 'under copyright':
  708. print "Adding rights for ". $self->object->title ."...\n";
  709. $rightsHolderId = false;
  710. $rightsHolderNames = explode('|', $self->rowStatusVars['copyrightHolder']);
  711. if ($self->rowStatusVars['copyrightExpires'])
  712. {
  713. $endDates = explode('|', $self->rowStatusVars['copyrightExpires']);
  714. }
  715. foreach($rightsHolderNames as $index => $rightsHolderName)
  716. {
  717. $rightsHolderName = ($rightsHolderName) ? $rightsHolderName : 'Unknown';
  718. $rightsHolder = $self->createOrFetchRightsHolder($rightsHolderName);
  719. $rightsHolderId = $rightsHolder->id;
  720. $rightsHolderName = trim(strtolower($rightsHolderName));
  721. if (
  722. $rightsHolderName == 'city of vancouver'
  723. || strpos($rightsHolderName, 'city of vancouver') === 0
  724. )
  725. {
  726. $restriction = 1;
  727. } else {
  728. $restriction = 0;
  729. }
  730. $rightAndRelation = array(
  731. 'restriction' => $restriction,
  732. 'basisId' => QubitTerm::RIGHT_BASIS_COPYRIGHT_ID,
  733. 'actId' => array_search(
  734. 'Replicate',
  735. $self->getStatus('copyrightActTypes')
  736. ),
  737. 'copyrightStatusId' => array_search(
  738. 'Under copyright',
  739. $self->getStatus('copyrightStatusTypes')
  740. )
  741. );
  742. if (isset($endDates))
  743. {
  744. // if rightsholder/expiry dates and paired, use
  745. // corresponding date ...otherwise just use the
  746. // first expiry date
  747. $rightAndRelation['endDate']
  748. = (count($endDates) == count($rightsHolderNames))
  749. ? $endDates[$index]
  750. : $endDates[0];
  751. if (!is_numeric($rightAndRelation['endDate']))
  752. {
  753. throw new sfException('Copyright expiry '. $rightAndRelation['endDate']
  754. .' is invalid.');
  755. }
  756. }
  757. if ($rightsHolderId) $rightAndRelation['rightsHolderId'] = $rightsHolderId;
  758. $self->createRightAndRelation($rightAndRelation);
  759. }
  760. break;
  761. case 'unknown':
  762. $rightsHolder = $self->createOrFetchRightsHolder('Unknown');
  763. $rightsHolderId = $rightsHolder->id;
  764. $rightAndRelation = array(
  765. 'rightsHolderId' => $rightsHolderId,
  766. 'restriction' => 0,
  767. 'basisId' => QubitTerm::RIGHT_BASIS_COPYRIGHT_ID,
  768. 'actId' => array_search(
  769. 'Replicate',
  770. $self->getStatus('copyrightActTypes')
  771. ),
  772. 'copyrightStatusId' => array_search(
  773. 'Unknown',
  774. $self->getStatus('copyrightStatusTypes')
  775. )
  776. );
  777. if ($self->rowStatusVars['copyrightExpires'])
  778. {
  779. $rightAndRelation['endDate'] = $self->rowStatusVars['copyrightExpires'];
  780. }
  781. $self->createRightAndRelation($rightAndRelation);
  782. break;
  783. case 'public domain':
  784. $rightAndRelation = array(
  785. 'restriction' => 1,
  786. 'basisId' => QubitTerm::RIGHT_BASIS_COPYRIGHT_ID,
  787. 'actId' => array_search(
  788. 'Replicate',
  789. $self->getStatus('copyrightActTypes')
  790. ),
  791. 'copyrightStatusId' => array_search(
  792. 'Public domain',
  793. $self->getStatus('copyrightStatusTypes')
  794. )
  795. );
  796. if ($self->rowStatusVars['copyrightExpires'])
  797. {
  798. $rightAndRelation['endDate'] = $self->rowStatusVars['copyrightExpires'];
  799. }
  800. $self->createRightAndRelation($rightAndRelation);
  801. break;
  802. default:
  803. throw new sfException('Copyright status "'
  804. . $self->rowStatusVars['copyrightStatus']
  805. .'" not handled: adjust script or import data');
  806. break;
  807. }
  808. }
  809. // add ad-hoc events
  810. if (isset($self->rowStatusVars['eventActors']))
  811. {
  812. foreach($self->rowStatusVars['eventActors'] as $index => $actor)
  813. {
  814. // initialize data that'll be used to create the event
  815. $eventData = array(
  816. 'actorName' => $actor
  817. );
  818. // define whether each event-related column's values go directly
  819. // into an event property or put into a varibale for further
  820. // processing
  821. $eventColumns = array(
  822. 'eventTypes' => array(
  823. 'variable' => 'eventType',
  824. 'requiredError' => 'You have populated the eventActors column but not the eventTypes column.'
  825. ),
  826. 'eventPlaces' => array('variable' => 'place'),
  827. 'eventDates' => array('property' => 'date'),
  828. 'eventStartDates' => array('property' => 'startDate'),
  829. 'eventEndDates' => array('property' => 'endDate'),
  830. 'eventDescriptions' => array('property' => 'description')
  831. );
  832. // handle each of the event-related columns
  833. $eventType = false;
  834. $place = false;
  835. foreach($eventColumns as $column => $definition)
  836. {
  837. if (isset($self->rowStatusVars[$column]))
  838. {
  839. $value
  840. = (count($self->rowStatusVars['eventActors']) == count($self->rowStatusVars[$column]))
  841. ? $self->rowStatusVars[$column][$index]
  842. : $self->rowStatusVars[$column][0];
  843. // allow column value(s) to set event property
  844. if (isset($definition['property']))
  845. {
  846. $eventData[($definition['property'])] = $value;
  847. }
  848. // allow column values(s) to set variable
  849. if (isset($definition['variable']))
  850. {
  851. $$definition['variable'] = $value;
  852. }
  853. } else if (isset($definition['requiredError'])) {
  854. throw new sfException('You have populated the eventActors column but not the eventTypes column.');
  855. }
  856. }
  857. // if an event type has been specified, attempt to create the event
  858. if ($eventType)
  859. {
  860. // do lookup of type ID
  861. $typeTerm = $self->createOrFetchTerm(QubitTaxonomy::EVENT_TYPE_ID, $eventType);
  862. $eventTypeId = $typeTerm->id;
  863. // create event
  864. $event = $self->createOrUpdateEvent($eventTypeId, $eventData);
  865. // create a place term if specified
  866. if ($place)
  867. {
  868. // create place
  869. $placeTerm = $self->createTerm(QubitTaxonomy::PLACE_ID, $place);
  870. $self->createObjectTermRelation($event->id, $placeTerm->id);
  871. }
  872. } else {
  873. throw new sfException('eventTypes column need to be populated.');
  874. }
  875. }
  876. }
  877. // add creators and create events
  878. $createEvents = array();
  879. if (isset($self->rowStatusVars['creators'])
  880. && count($self->rowStatusVars['creators']))
  881. {
  882. foreach($self->rowStatusVars['creators'] as $index => $creator)
  883. {
  884. // Init eventData array and add creator name
  885. $eventData = array('actorName' => $creator);
  886. setupEventDateData($self, $eventData, $index);
  887. // Add creator history if specified
  888. if(isset($self->rowStatusVars['creatorHistories'][$index]))
  889. {
  890. $eventData['actorHistory'] = $self->rowStatusVars['creatorHistories'][$index];
  891. }
  892. array_push($createEvents, $eventData);
  893. }
  894. }
  895. else if(
  896. isset($self->rowStatusVars['creatorDatesStart'])
  897. || isset($self->rowStatusVars['creatorDatesEnd'])
  898. ) {
  899. foreach($self->rowStatusVars['creatorDatesStart'] as $index => $date)
  900. {
  901. $eventData = array();
  902. setupEventDateData($self, $eventData, $index);
  903. array_push($createEvents, $eventData);
  904. }
  905. }
  906. else if(isset($self->rowStatusVars['creatorDates'])) {
  907. foreach($self->rowStatusVars['creatorDates'] as $index => $date)
  908. {
  909. $eventData = array();
  910. setupEventDateData($self, $eventData, $index);
  911. array_push($createEvents, $eventData);
  912. }
  913. }
  914. // create events, if any
  915. if (count($createEvents))
  916. {
  917. if ($self->rowStatusVars['culture'] != $self->object->sourceCulture)
  918. {
  919. // Add i18n data to existing event
  920. $sql = "SELECT id FROM event WHERE information_object_id = ? and type_id = ?;";
  921. $stmt = QubitFlatfileImport::sqlQuery($sql, array(
  922. $self->object->id,
  923. QubitTerm::CREATION_ID));
  924. $i = 0;
  925. while ($eventId = $stmt->fetchColumn())
  926. {
  927. $createEvents[$i++]['eventId'] = $eventId;
  928. }
  929. }
  930. foreach($createEvents as $eventData)
  931. {
  932. $event = $self->createOrUpdateEvent(
  933. QubitTerm::CREATION_ID,
  934. $eventData
  935. );
  936. }
  937. }
  938. // This will import only a single digital object;
  939. // if both a URI and path are provided, the former is preferred.
  940. if ($uri = $self->rowStatusVars['digitalObjectURI']) {
  941. $do = new QubitDigitalObject;
  942. try {
  943. $do->importFromURI($uri);
  944. $do->informationObject = $self->object;
  945. $do->save($conn);
  946. // importFromURI can raise if the download hits a timeout
  947. } catch (Exception $e) {
  948. $self->logError($e->getMessage());
  949. }
  950. } elseif ($path = $self->rowStatusVars['digitalObjectPath']) {
  951. if (false === $content = file_get_contents($path)) {
  952. $this->logError("Unable to read file: ".$path);
  953. } else {
  954. $do = new QubitDigitalObject;
  955. $do->assets[] = new QubitAsset($path, $content);
  956. $do->usageId = QubitTerm::MASTER_ID;
  957. $do->informationObject = $self->object;
  958. $do->save($conn);
  959. }
  960. }
  961. }
  962. }
  963. ));
  964. // allow search indexing to be enabled via a CLI option
  965. $import->searchIndexingDisabled = ($options['index']) ? false : true;
  966. // convert content with | characters to a bulleted list
  967. $import->contentFilterLogic = function($text)
  968. {
  969. return (substr_count($text, '|'))
  970. ? '* '. str_replace("|", "\n* ", $text)
  971. : $text;
  972. };
  973. $import->addColumnHandler('levelOfDescription', function(&$self, $data)
  974. {
  975. $self->object->setLevelOfDescriptionByName($data);
  976. });
  977. // map value to taxonomy term name and take note of taxonomy term's ID
  978. $import->addColumnHandler('radGeneralMaterialDesignation', function(&$self, $data)
  979. {
  980. if ($data)
  981. {
  982. $data = explode('|', $data);
  983. foreach ($data as $value)
  984. {
  985. $materialIndex = array_search(
  986. $value,
  987. $self->getStatus('materialTypes')
  988. );
  989. if (is_numeric($materialIndex))
  990. {
  991. $self->rowStatusVars['radGeneralMaterialDesignation'][] = $materialIndex;
  992. } else {
  993. throw new sfException('Invalid material type:'. $value);
  994. }
  995. }
  996. }
  997. });
  998. $import->csv($fh, $skipRows);
  999. // build nested set if desired
  1000. if (!$options['skip-nested-set-build'])
  1001. {
  1002. $buildNestedSet = new propelBuildNestedSetTask($this->dispatcher, $this->formatter);
  1003. $buildNestedSet->setCommandApplication($this->commandApplication);
  1004. $buildNestedSet->setConfiguration($this->configuration);
  1005. $ret = $buildNestedSet->run();
  1006. }
  1007. }
  1008. }
  1009. function array_search_case_insensitive($search, $array)
  1010. {
  1011. return array_search(strtolower($search), array_map('strtolower', $array));
  1012. }
  1013. function setupEventDateData(&$self, &$eventData, $index)
  1014. {
  1015. // add dates if specified
  1016. if (
  1017. isset($self->rowStatusVars['creatorDates'][$index])
  1018. || isset($self->rowStatusVars['creatorDatesStart'][$index])
  1019. )
  1020. {
  1021. // Start and end date
  1022. foreach(array(
  1023. 'creatorDatesEnd' => 'endDate',
  1024. 'creatorDatesStart' => 'startDate'
  1025. )
  1026. as $statusVar => $eventProperty
  1027. )
  1028. {
  1029. if (isset($self->rowStatusVars[$statusVar][$index])) {
  1030. $eventData[$eventProperty] = $self->rowStatusVars[$statusVar][$index] .'-00-00';
  1031. }
  1032. }
  1033. // Other date info
  1034. foreach(array(
  1035. 'creatorDateNotes' => 'description',
  1036. 'creatorDates' => 'date'
  1037. )
  1038. as $statusVar => $eventProperty
  1039. )
  1040. {
  1041. if (isset($self->rowStatusVars[$statusVar][$index]))
  1042. {
  1043. $eventData[$eventProperty] = $self->rowStatusVars[$statusVar][$index];
  1044. }
  1045. }
  1046. }
  1047. }
  1048. function getIdCorrespondingToSlug($slug)
  1049. {
  1050. $query = "SELECT object_id FROM slug WHERE slug=?";
  1051. $statement = QubitFlatfileImport::sqlQuery(
  1052. $query,
  1053. array($slug)
  1054. );
  1055. $result = $statement->fetch(PDO::FETCH_OBJ);
  1056. if ($result)
  1057. {
  1058. return $result->object_id;
  1059. } else {
  1060. throw new sfException('Could not find information object matching slug "'. $slug .'"');
  1061. }
  1062. }