/extensions.ext/wrapper/musicbrainz/musicbrainz.php

https://code.google.com/p/ontowiki/ · PHP · 434 lines · 268 code · 60 blank · 106 comment · 56 complexity · 3ffb9c91cf6ece1be47c43c232a5100e MD5 · raw file

  1. <?php
  2. require_once 'Erfurt/Wrapper.php';
  3. /**
  4. * Initial version of a wrapper for Musicbrainz.
  5. * Currently this is only a demo. It shows how a wrapper can handle data
  6. * itself, as well as quering the store and removing data.
  7. *
  8. * @category OntoWiki
  9. * @package OntoWiki_extensions_wrapper
  10. * @author Thomas K??nig <koenig.thomas@googlemail.com>
  11. * @copyright Copyright (c) 2009 {@link http://aksw.org aksw}
  12. * @license http://opensource.org/licenses/gpl-license.php GNU General Public License (GPL)
  13. * @version ???
  14. */
  15. class MusicbrainzWrapper extends Erfurt_Wrapper
  16. {
  17. //URL
  18. //musicbrainz webpage
  19. //http://musicbrainz.org/artist/092ca127-2e07-4cbd-9cba-e412b4ddddd9.html --> artist
  20. //http://musicbrainz.org/release/71c3797a-52e2-4cc7-9d2d-711be98c321d.html --> album
  21. //http://musicbrainz.org/track/213ca793-fd94-49ad-93ef-fcd0e8520033.html --> track
  22. //rdf data (url to artist, artist name, date, type (single / group))
  23. //http://musicbrainz.org/mm-2.1/*/092ca127-2e07-4cbd-9cba-e412b4ddddd9 --> * = artist / album / track
  24. //more rdf data, add /number (1, 2, 3, 4) at the end
  25. //http://musicbrainz.org/mm-2.1/*/092ca127-2e07-4cbd-9cba-e412b4ddddd9/1 --> only url to artist
  26. //http://musicbrainz.org/mm-2.1/*/092ca127-2e07-4cbd-9cba-e412b4ddddd9/2 --> same as without number
  27. //http://musicbrainz.org/mm-2.1/*/092ca127-2e07-4cbd-9cba-e412b4ddddd9/3 --> artist, album: additional urls for albums/tracks; track: same as /2
  28. //http://musicbrainz.org/mm-2.1/*/092ca127-2e07-4cbd-9cba-e412b4ddddd9/4 --> artist, album: additional info for every album/track; track: shows artist
  29. //redirect to musicbrainz page
  30. //http://musicbrainz.org/*/092ca127-2e07-4cbd-9cba-e412b4ddddd9 --> * = artist / album / track
  31. //var
  32. protected $_cachedData = array(); //cache
  33. protected $_pattern = null; //url pattern
  34. //---------------------------------------------------------------------------------------------
  35. public function getDescription()
  36. {
  37. return 'A simple wrapper for Musicbrainz.';
  38. }
  39. //---------------------------------------------------------------------------------------------
  40. public function getName()
  41. {
  42. return 'Musicbrainz';
  43. }
  44. //---------------------------------------------------------------------------------------------
  45. public function init($config)
  46. {
  47. parent::init($config);
  48. $this->_pattern = "/^http:\/\/musicbrainz.org\/(mm-2.1)?\/?(artist|track|release|album)\/([^.^\/]+).?(html|htm)?\/?\d?/";
  49. }
  50. //---------------------------------------------------------------------------------------------
  51. public function isHandled($uri, $graphUri)
  52. {
  53. if (preg_match($this->_pattern, $uri))
  54. return true;
  55. else
  56. return false;
  57. }
  58. //---------------------------------------------------------------------------------------------
  59. public function isAvailable($uri, $graphUri)
  60. {
  61. //check cache
  62. $id = $this->_cache->makeId($this, 'isAvailable', array($uri, $graphUri));
  63. $result = $this->_cache->load($id);
  64. if ($result !== false)
  65. {
  66. if (!isset($this->_cachedData[$graphUri]))
  67. {
  68. $this->_cachedData[$graphUri] = array($uri => $result['data']);
  69. }
  70. else
  71. {
  72. $this->_cachedData[$graphUri][$uri] = $result['data'];
  73. }
  74. return $result['value'];
  75. }
  76. //prepare arrays for http-output
  77. $retVal = false;
  78. $data = array();
  79. //check if uri is valid and then get http-output
  80. $match = array();
  81. if (preg_match($this->_pattern, $uri, $match))
  82. {
  83. //parts of the uri-pattern
  84. $complete_address = $match[0] != null ? $match[0] : "";
  85. $mm21 = $match[1] != null ? $match[1] : "";
  86. $artist_or_album_or_track = $match[2] != null ? $match[2] : "";
  87. $mbid = $match[3] != null ? $match[3] : "";
  88. $html = $match[4] != null ? $match[4] : "";
  89. //build uri
  90. if ($artist_or_album_or_track == "release") $artist_or_album_or_track = "album";
  91. $url1 = 'http://musicbrainz.org/mm-2.1/' . $artist_or_album_or_track . '/' . $mbid;
  92. //setup http client
  93. $client = Erfurt_App::getInstance()->getHttpClient($url1, array('maxredirects' => 5, 'timeout' => 30));
  94. //send request
  95. $response = $client->request();
  96. //get response
  97. if ($response->getStatus() === 200)
  98. {
  99. $result = $response->getBody();
  100. $data['status'] = $result;
  101. $retVal = true;
  102. }
  103. //Cache the retrieved data if possible.
  104. if (!isset($this->_cachedData[$graphUri]))
  105. {
  106. $this->_cachedData[$graphUri] = array($uri => $data);
  107. }
  108. else
  109. {
  110. $this->_cachedData[$graphUri][$uri] = $data;
  111. }
  112. //return data
  113. $cacheVal = array('value' => $retVal, 'data' => $data);
  114. $this->_cache->save($cacheVal, $id);
  115. return $retVal;
  116. }
  117. return $retVal;
  118. }
  119. //---------------------------------------------------------------------------------------------
  120. public function run($uri, $graphUri)
  121. {
  122. //load cache
  123. $id = $this->_cache->makeId($this, 'run', array($uri, $graphUri));
  124. $result = $this->_cache->load($id);
  125. if ($result !== false)
  126. {
  127. return $result;
  128. }
  129. //url was loaded before (and is available)
  130. if ($this->isAvailable($uri, $graphUri))
  131. {
  132. //read from cache
  133. $data = $this->_cachedData[$graphUri][$uri];
  134. $raw_data = $data['status'];
  135. //check if uri is valid
  136. $match = array();
  137. if (preg_match($this->_pattern, $uri, $match))
  138. {
  139. //parts of the uri-pattern
  140. $complete_address = $match[0] != null ? $match[0] : "";
  141. $mm21 = $match[1] != null ? $match[1] : "";
  142. $artist_or_album_or_track = $match[2] != null ? $match[2] : "";
  143. $mbid = $match[3] != null ? $match[3] : "";
  144. $html = $match[4] != null ? $match[4] : "";
  145. //build uri: rdf data
  146. if ($artist_or_album_or_track == "release") $artist_or_album_or_track = "album";
  147. $url1 = 'http://musicbrainz.org/mm-2.1/' . $artist_or_album_or_track . '/' . $mbid;
  148. //$uri = $url1;
  149. //build uri: webpage
  150. if ($artist_or_album_or_track == "album") $artist_or_album_or_track = "release";
  151. $musicbrainz_webpage = "http://musicbrainz.org/" . $artist_or_album_or_track . "/" . $mbid . ".html";
  152. //parsed a / an ...
  153. //=====================================================================================
  154. //... ARTIST
  155. if (strpos($url1, "artist") != null)
  156. {
  157. //use deep url for more information
  158. $url2 = $url1 . "/3";
  159. //setup http client
  160. $client = Erfurt_App::getInstance()->getHttpClient($url2, array('maxredirects' => 5, 'timeout' => 30) );
  161. //send request
  162. $response = $client->request();
  163. //get response
  164. if ($response->getStatus() === 200)
  165. {
  166. $raw_data = $response->getBody();
  167. }
  168. //parse rdf data
  169. preg_match('|<dc:title>([^"]+)</dc:title>|', $raw_data, $artist);
  170. preg_match('|<mm:beginDate>([^"]+)</mm:beginDate>|', $raw_data, $beginDate);
  171. preg_match('|<mm:artistType rdf:resource="([^"]+)"/>|', $raw_data, $artistType);
  172. preg_match('|<mm:albumList>(.+)</mm:albumList>|s', $raw_data, $albumRdf);
  173. preg_match_all('|<rdf:li rdf:resource="(.+)"/>|', $albumRdf[1], $albumUrls, PREG_PATTERN_ORDER);
  174. preg_match('|<dc:comment>(.+)</dc:comment>|', $raw_data, $comment);
  175. //create array
  176. $fullResult = array
  177. (
  178. 'status_codes' => array(Erfurt_Wrapper::NO_MODIFICATIONS, Erfurt_Wrapper::RESULT_HAS_ADD),
  179. 'status_description' => 'Musicbrainz artist data found',
  180. 'add' => array
  181. (
  182. $uri => array
  183. (
  184. //foaf:name (artist name)
  185. 'http://xmlns.com/foaf/0.1/name' => array(array('value' => utf8_encode($artist[1]), 'type' => 'literal')),
  186. //mo:musicbrainz (url)
  187. 'http://purl.org/ontology/mo/musicbrainz' => array(array('value' => $musicbrainz_webpage, 'type' => 'literal'))
  188. )
  189. )
  190. );
  191. //rdf:type: MusicArtist | MusicGroup
  192. //date: foaf:birthday | mo:beginsatDateTime (when group was founded OR when single artist was born)
  193. if (strpos($artistType[1], "TypeGroup") != null)
  194. {
  195. $fullResult['add'][$uri]['http://www.w3.org/1999/02/22-rdf-syntax-ns#type'] = array(array('value' => 'http://purl.org/ontology/mo/MusicGroup', 'type' => 'uri'));
  196. $fullResult['add'][$uri]['http://purl.org/ontology/mo/beginsAtDateTime'] = array(array('value' => utf8_encode($beginDate[1]), 'type' => 'literal'));
  197. }
  198. elseif (strpos($artistType[1], "TypePerson") != null)
  199. {
  200. $fullResult['add'][$uri]['http://www.w3.org/1999/02/22-rdf-syntax-ns#type'] = array(array('value' => 'http://purl.org/ontology/mo/MusicArtist', 'type' => 'uri'));
  201. $fullResult['add'][$uri]['http://xmlns.com/foaf/0.1/birthday'] = array(array('value' => utf8_encode($beginDate[1]), 'type' => 'literal'));
  202. }
  203. //foaf:made (album releases)
  204. if (count($albumUrls) >= 2)
  205. {
  206. if (count($albumUrls[1]) >= 1)
  207. {
  208. $fullResult['add'][$uri]['http://xmlns.com/foaf/0.1/made'] = array();
  209. foreach ($albumUrls[1] as $album)
  210. {
  211. $fullResult['add'][$uri]['http://xmlns.com/foaf/0.1/made'][] = array('value' => $album, 'type' => 'uri');
  212. }
  213. }
  214. }
  215. //dc:description (comment to the artist | group)
  216. if (count($comment) >= 2)
  217. {
  218. $fullResult['add'][$uri]['http://purl.org/dc/elements/1.1/description'] = array(array('value' => utf8_encode($comment[1]), 'type' => 'literal'));
  219. }
  220. //save cache and return result array
  221. $this->_cache->save($fullResult, $id);
  222. return $fullResult;
  223. } //ARTIST
  224. //=====================================================================================
  225. //... ALBUM
  226. else if (strpos($url1, "album") != null)
  227. {
  228. //use deep url for more information
  229. $url2 = $url1 . "/4";
  230. //setup http client
  231. $client = Erfurt_App::getInstance()->getHttpClient($url2, array('maxredirects' => 5, 'timeout' => 30) );
  232. //send request
  233. $response = $client->request();
  234. //get response
  235. if ($response->getStatus() === 200)
  236. {
  237. $raw_data = $response->getBody();
  238. }
  239. //parse rdf data
  240. preg_match('|<dc:title>([^"]+)</dc:title>|', $raw_data, $album);
  241. preg_match('|<mm:releaseDateList>(.+)</mm:releaseDateList>|s', $raw_data, $releaseRdf);
  242. preg_match_all('|<dc:date>(.+)</dc:date>|', $releaseRdf[1], $releaseDate, PREG_PATTERN_ORDER);
  243. preg_match_all('|<mm:country>(.+)</mm:country>|', $releaseRdf[1], $releaseCountry, PREG_PATTERN_ORDER);
  244. preg_match('|<mm:trackList>(.+)</mm:trackList>|s', $raw_data, $trackRdf);
  245. preg_match_all('|<rdf:li rdf:resource="(.+)"/>|', $trackRdf[1], $trackUrls, PREG_PATTERN_ORDER);
  246. preg_match_all('|<mm:Artist rdf:about="(.+)">[^<]+<dc:title>([^<]+)</dc:title>|', $raw_data, $artist, PREG_PATTERN_ORDER);
  247. preg_match('|<az:Asin>(.+)</az:Asin>|', $raw_data, $amazonAsin);
  248. //create array
  249. $fullResult = array
  250. (
  251. 'status_codes' => array(Erfurt_Wrapper::NO_MODIFICATIONS, Erfurt_Wrapper::RESULT_HAS_ADD),
  252. 'status_description' => 'Musicbrainz album data found',
  253. 'add' => array
  254. (
  255. $uri => array
  256. (
  257. //rdf:type (MusicalManifestation)
  258. 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' => array(array('value' => 'http://purl.org/ontology/mo/MusicalManifestation', 'type' => 'uri')),
  259. //dc:title (Album name)
  260. 'http://purl.org/dc/elements/1.1/title' => array(array('value' => utf8_encode($album[1]), 'type' => 'literal')),
  261. //mo:musicbrainz (url)
  262. 'http://purl.org/ontology/mo/musicbrainz' => array(array('value' => $musicbrainz_webpage, 'type' => 'literal'))
  263. )
  264. )
  265. );
  266. //?? (releases)
  267. //looks like: [date] in [country], e.g. "2008-01-10 in GB"
  268. if (count($releaseDate) >= 2)
  269. {
  270. $fullResult['add'][$uri]['http://www.w3.org/2000/01/rdf-schema#comment'] = array();
  271. for ($i=0; $i < count($releaseDate[1]); $i++)
  272. {
  273. //release date and country
  274. $rel_date = $releaseDate[1][$i];
  275. $rel_country = $releaseCountry[1][$i];
  276. //will be shown:
  277. //released 1997-11-28 in DE
  278. $text = "released ";
  279. //release date may be 0 or "" -> don't write to database
  280. if (($rel_date != "0") && ($rel_date != ""))
  281. {
  282. $text .= $rel_date . " ";
  283. }
  284. //country may be 0 or "" -> don't write to database
  285. if (($rel_country != "0") && ($rel_country != ""))
  286. {
  287. $text .= "in $rel_country";
  288. }
  289. //add to array
  290. $fullResult['add'][$uri]['http://www.w3.org/2000/01/rdf-schema#comment'][] = array('value' => utf8_encode($text), 'type' => 'literal');
  291. }
  292. }
  293. //mo:track (track uris)
  294. if (count($trackUrls) >= 2)
  295. {
  296. if (count($trackUrls[1]) >= 1)
  297. {
  298. $fullResult['add'][$uri]['http://purl.org/ontology/mo/track'] = array();
  299. foreach ($trackUrls[1] as $track)
  300. {
  301. $fullResult['add'][$uri]['http://purl.org/ontology/mo/track'][] = array('value' => $track, 'type' => 'uri');
  302. }
  303. }
  304. }
  305. //foaf:maker (artist url)
  306. //artist[1][] --> urls
  307. //artist[3][] --> artist name
  308. if (count($artist) >= 2)
  309. {
  310. $fullResult['add'][$uri]['http://xmlns.com/foaf/0.1/maker'] = array();
  311. foreach ($artist[1] as $artist_url)
  312. {
  313. $fullResult['add'][$uri]['http://xmlns.com/foaf/0.1/maker'][] = array('value' => $artist_url, 'type' => 'uri');
  314. }
  315. }
  316. //mo:amazon_asin (amazon asin)
  317. if (count($amazonAsin) >= 2)
  318. {
  319. $fullResult['add'][$uri]['http://purl.org/ontology/mo/amazon_asin'] = array(array('value' => utf8_encode($amazonAsin[1]), 'type' => 'literal'));
  320. }
  321. //save cache and return result array
  322. $this->_cache->save($fullResult, $id);
  323. return $fullResult;
  324. } //ALBUM
  325. //=====================================================================================
  326. //... TRACK
  327. else if (strpos($url1, "track") != null)
  328. {
  329. //use deep url for more information
  330. $url2 = $url1 . "/4";
  331. //setup http client
  332. $client = Erfurt_App::getInstance()->getHttpClient($url2, array('maxredirects' => 5,'timeout' => 30) );
  333. //send request
  334. $response = $client->request();
  335. //get response
  336. if ($response->getStatus() === 200)
  337. {
  338. $raw_data = $response->getBody();
  339. }
  340. //parse rdf data
  341. preg_match_all('|<mm:Track rdf:about="(.+)">[^<]+<dc:title>([^<]+)</dc:title>|', $raw_data, $trackinfo, PREG_PATTERN_ORDER);
  342. preg_match_all('|<mm:Artist rdf:about="(.+)">[^<]+<dc:title>([^<]+)</dc:title>|', $raw_data, $artistinfo, PREG_PATTERN_ORDER);
  343. preg_match('|<mm:duration>([^"]+)</mm:duration>|', $raw_data, $duration);
  344. //create array
  345. $fullResult = array
  346. (
  347. 'status_codes' => array(Erfurt_Wrapper::NO_MODIFICATIONS, Erfurt_Wrapper::RESULT_HAS_ADD),
  348. 'status_description' => 'Musicbrainz track data found',
  349. 'add' => array
  350. (
  351. $uri => array
  352. (
  353. //rdf:type (Track)
  354. 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' => array(array('value' => 'http://purl.org/ontology/mo/Track', 'type' => 'uri')),
  355. //dc:title (track name)
  356. 'http://purl.org/dc/elements/1.1/title' => array(array('value' => utf8_encode($trackinfo[2][0]), 'type' => 'literal')),
  357. //mo:durationXSD (track duration)
  358. 'http://purl.org/ontology/mo/durationXSD' => array(array('value' => utf8_encode($duration[1]), 'type' => 'literal')),
  359. //mo:musicbrainz (url)
  360. 'http://purl.org/ontology/mo/musicbrainz' => array(array('value' => $musicbrainz_webpage, 'type' => 'literal')),
  361. //foaf:maker(artist name)
  362. 'http://xmlns.com/foaf/0.1/maker' => array(array('value' => utf8_encode($artistinfo[2][0]), 'type' => 'literal'),
  363. array('value' => $artistinfo[1][0], 'type' => 'uri')),
  364. )
  365. )
  366. );
  367. //save cache and return result array
  368. $this->_cache->save($fullResult, $id);
  369. return $fullResult;
  370. } //TRACK
  371. } //pattern supported
  372. } //isAvailable in cache
  373. } //run()
  374. } //class