PageRenderTime 30ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/services/services/metadata/webmosher.php

https://github.com/jinzora/jinzora3
PHP | 931 lines | 721 code | 89 blank | 121 comment | 192 complexity | d94f265c2270345f725f5764992a96c0 MD5 | raw file
  1. <?php if (!defined(JZ_SECURE_ACCESS)) die ('Security breach detected.');
  2. /**
  3. * Custom Metadata Service
  4. *
  5. * This service retrieves data from two distinct sources: Amazon for album
  6. * data and Yahoo! Music for artist information. The Amazon retrieval method
  7. * is the most complex and feature filled.
  8. *-------------------------------------------------------------------------
  9. * FEATURES
  10. * Amazon Album Retrieval
  11. * o
  12. *-------------------------------------------------------------------------
  13. * TODO:
  14. * o Retrieve customer images from Amazon when no album image exists.
  15. * o Allow [COUNTRY] meta tag to allow lookup on different Amazon servers.
  16. */
  17. /**
  18. * Configuration
  19. */
  20. define('SERVICE_METADATA_webmosher','true');
  21. $jzSERVICE_INFO = array();
  22. $jzSERVICE_INFO['name'] = "Custom combination service retrieval";
  23. $jzSERVICE_INFO['url'] = "http://www.darkhart.net";
  24. global $matchAlbumWeight;
  25. $matchAlbumWeight = array(
  26. 'album' => array(
  27. 'exact' => 8000,
  28. 'general' => 4000,
  29. 'regex' => 2000,
  30. 'partial' => 2000,
  31. ),
  32. 'artist' => array(
  33. 'exact' => 800,
  34. 'general' => 400,
  35. 'regex' => 200,
  36. 'partial' => 200),
  37. 'year' => array (
  38. 'exact' => 80,
  39. 'general' => 0),
  40. 'image' => array (
  41. 'exact' => 40,
  42. 'general' => 20),
  43. 'review' => array (
  44. 'exact' => 8,
  45. 'general' => 4),
  46. 'rating' => array (
  47. 'exact' => 4)
  48. );
  49. /**
  50. *-------------------------------------------------------------------------
  51. * CONFIG COMPLETE -- VENTURE BELOW AT YOUR OWN RISK
  52. *-------------------------------------------------------------------------
  53. */
  54. /*
  55. * Gets the metadata for an album from Amazon
  56. *
  57. * @author Fred Hirsch
  58. * @param $node The current node we are looking at
  59. * @param $displayOutput Should we display output? (defaults to true)
  60. **/
  61. function SERVICE_GETALBUMMETADATA_webmosher($node, $displayOutput = true, $return = false) {
  62. global $include_path, $matchAlbumWeight;
  63. global $test_php4;$test_php4 = false;
  64. $link_url = 'http://www.amazon.com/dp/';
  65. $parent = $node->getParent();
  66. $search_tracking = array();
  67. // Normally, we are probably not overriding our values, so just procede.
  68. if(empty($_POST[descOVERRIDE]) && empty($_POST[imgOVERRIDE])) {
  69. // Next we pre-process the album and artist information from the JZ node
  70. // that is assigned to this meta data request. This is done to try and
  71. // simplify the data before it is sent off to Amazon, but also maintain
  72. // the original so that matching can be correlated correctly.
  73. // Setup the incoming Album/Artist information
  74. $album = trim($node->getName());
  75. $orig_album = $album;
  76. $artist = trim($parent->getName());
  77. $orig_artist = $artist;
  78. //Strip down the album a bit
  79. $album = preg_replace('/[\(\[][^\)\]]+[\)\]]/', '', $album); // Remove text in parenthesis & brackets
  80. $album = preg_replace('/[-_,]/', ' ', $album); // Convert - and _ to space
  81. $album = preg_replace('/([A-Z])/', " $1", $album); // Pad a space before capitol letters
  82. $album = preg_replace('/\s+/', ' ', $album); // Remove extra space
  83. // Stop word filtering removes extra words that may not be found in the
  84. // result and will cause a lower correlation value.
  85. $stopwords = array('the', 'a','and');
  86. foreach ($stopwords as $word) {
  87. $album = preg_replace('/\b' . $word . '\b/i', '', $album); // Remove stopwords
  88. }
  89. $album = preg_replace('/[^\w\s]/u', '', utf8_decode($album)); // Remove non-word characters & UTF8 handling
  90. $album = trim($album);
  91. // We utilize the idea of "Meta-tagging" in the album names to allow
  92. // better search results. Amazon uses a similar system to mark album
  93. // entries, so it fits well with their system. Essentially, if any album
  94. // has a tag enclosed in [] that matches the items below, the artist
  95. // value is modified to improve searching. This is most effective for
  96. // soundtracks & compilations.
  97. $various = array('Soundtrack' => array('orig_artist' => 'Soundtrack', 'artist' => 'Various'),
  98. 'Various' => array('orig_artist' => 'Various Artists', 'artist' => 'Various'),
  99. 'Compilation' => array('orig_artist' => 'Various Artists', 'artist' => 'Various'),
  100. 'Single' => array('orig_artist' => $orig_artist, 'artist' => $artist));
  101. // We want to keep track of the postfixes in case they can be matched to
  102. // the search.
  103. $postfix = '';
  104. foreach ($various as $key => $val) {
  105. if (preg_match('/\[' . $key . '\]/', $orig_album)) {
  106. $artist = $val['artist'];
  107. $orig_artist = $val['orig_artist'];
  108. $orig_album = preg_replace('/\s*[\(\[][^\)\]]+[\)\]]/', '', $orig_album);
  109. $postfix .= $key . ' ';
  110. }
  111. }
  112. // Some artists seem to like to release multiple albums with the same
  113. // name, but in different years. Using the year value in a meta tag will
  114. // allow the search to add more correlation for that release year.
  115. if (preg_match('/\[(\d\d\d\d)\]$/', $orig_album, $match)) {
  116. $exact_year = $match[1];
  117. $orig_album = preg_replace('/\s*[\(\[][^\)\]]+[\)\]]/', '', $orig_album);
  118. } else {
  119. $exact_year = false;
  120. }
  121. // Now, we do the same thing to the artist.
  122. $artist = preg_replace('/\s*[\(\[][^\)\]]+[\)\]]/', '', $artist); // Remove text in parenthesis & brackets
  123. $artist = preg_replace('/[-_,]/', ' ', $artist); // Convert - and _ to space
  124. $artist = preg_replace('/\s+/', ' ', $artist); // Remove extra space
  125. foreach ($stopwords as $word) {
  126. $artist = preg_replace('/\b' . $word . '\b/i', '', $artist); // Remove stopwords
  127. }
  128. $artist = preg_replace('/[^\w\s\']/u', '', utf8_decode($artist)); // Remove non-word characters & UTF8 handling
  129. $artist = trim($artist);
  130. // Lastly, we attempt to normalize any unicode in the artist text and if
  131. // its different, we will flag this as an additional search.
  132. // TODO
  133. // Configure a standard ordered search list
  134. $searches = array();
  135. // A fully exact search is the default. If this one matches something, we
  136. // usually ignore the rest.
  137. $searches[] = array( name => "All Exact", artist => $orig_artist, album => $orig_album, exact_artist => true, exact_album => true, exact_year => $exact_year, postfix => $postfix);
  138. $searches[] = array( name => "General Album", artist => '', album => $album, exact_artist => false, exact_album => false, exact_year => $exact_year, threshhold => 4800,postfix => $postfix);
  139. $searches[] = array( name => "General Artist", artist => $artist, album => '', exact_artist => false, exact_album => false, exact_year => $exact_year, threshhold => 8400,postfix => $postfix);
  140. // We attempt to normalize any unicode in the album/artist text and if
  141. // its different, we will flag this as an additional search.
  142. include_once($include_path . "lib/utfnormal/UtfNormal.php");
  143. $utffix = new UTFNormal();
  144. $artistUTFNormal = preg_replace('/[^\w\s]/', '', $utffix->toNFKD(utf8_encode($artist)));
  145. $albumUTFNormal = preg_replace('/[^\w\s]/', '', $utffix->toNFKD(utf8_encode($album)));
  146. if ($artist != $artistUTFNormal && $album != $albumUTFNormal) {
  147. $searches[] = array( name => "Normalized Artist", artist => $artistUTFNormal, album => $albumUTFNormal, exact_artist => false, exact_album => false, exact_year => $exact_year, postfix => $postfix);
  148. } elseif ($artist != $artistUTFNormal ) {
  149. $searches[] = array( name => "Normalized Album", artist => $artistUTFNormal, album => $album, exact_artist => false, exact_album => false, exact_year => $exact_year, postfix => $postfix);
  150. } elseif ($album != $albumUTFNormal) {
  151. $searches[] = array( name => "Normalized Album", artist => $artist, album => $albumUTFNormal, exact_artist => false, exact_album => false, exact_year => $exact_year, postfix => $postfix);
  152. }
  153. // Album & artist were modified, so we need to add a general search.
  154. if ($orig_album != $album && $orig_artist != $artist) {
  155. $searches[] = array( name => "All General", artist => $artist, album => $album, exact_artist => false, exact_album => false, exact_year => $exact_year,postfix => $postfix);
  156. }
  157. if ($orig_album != $album) {
  158. $searches[] = array( name => "Exact Artist", artist => $orig_artist, album => $album, exact_artist => true, exact_album => false, exact_year => $exact_year,postfix => $postfix);
  159. }
  160. if ($orig_artist != $artist) {
  161. $searches[] = array( name => "Exact Album", artist => $artist, album => $orig_album, exact_artist => false, exact_album => true, exact_year => $exact_year,postfix => $postfix);
  162. }
  163. // Set search defaults
  164. $lastSearchWeight = 0;
  165. // Calculate the best weightings, if we match this, we are done.
  166. $maxSearchWeight = ($matchAlbumWeight['album']['exact'] * 8) + // Exact album weight
  167. ($matchAlbumWeight['artist']['exact'] * 8)+ // Exact artist weight
  168. $matchAlbumWeight['year']['exact'] + // Exact artist weight
  169. $matchAlbumWeight['image']['exact'] + // Exact artist weight
  170. $matchAlbumWeight['review']['exact'] + // Exact artist weight
  171. //$matchAlbumWeight['rating']['exact'] + // Exact artist weight
  172. (($exact_year) ? 80 : 0) ; // We pro-rate a bit more if we need an exact year.
  173. $baseSearchWeight = $matchAlbumWeight['album']['general'] + // General album weight
  174. $matchAlbumWeight['artist']['general'] + // General artist weight
  175. (($exact_year) ? 80 : 0) ; // We pro-rate a bit more if we need an exact year.
  176. $maxPages = 3;
  177. $searchItem = '';
  178. $fix_jz_path = urlencode(implode('/', $node->getPath()));
  179. print "<form action=\"popup.php?action=popup&ptype=getmetadata&jz_path=$fix_jz_path\" method=\"post\">\n";
  180. print "<input type=\"hidden\" name=\"edit_search_all_albums\" value=\"on\"/>\n";
  181. print "<input type=\"hidden\" name=\"edit_search_all_artists\" value=\"off\"/>\n";
  182. print "<input type=\"hidden\" name=\"metaSearchSubmit\" value=\"Search\"/>\n";
  183. print "<input type=\"hidden\" name=\"edit_search_images_miss\" value=\"always\"/>\n";
  184. print "<input type=\"hidden\" name=\"edit_search_desc_miss\" value=\"always\"/>\n";
  185. while (list($key,$search) = each($searches)) {
  186. $currentPage = 1;
  187. $totalPages = 1;
  188. $weight = 1;
  189. // We don't bother with the following searches:
  190. if ($search[artist] == 'Various' && $search[album] == '') {
  191. continue;
  192. } elseif (isset($search[threshhold]) && $search[threshhold] < $lastSearchWeight) {
  193. continue;
  194. }
  195. print '<table width="100%" border="0" cellspacing="0" cellpadding="0">';
  196. print "<tr><td>Searching for $search[name]...</td></tr>\n";
  197. $currentSearch = 'Title=' . urlencode($search[album]) . '&Artist=' . urlencode($search[artist]);
  198. while ($currentPage <= $totalPages) {
  199. // Do the XML data retrieval searching
  200. if ($xml = getXMLData($currentSearch)) {
  201. } else {
  202. print "No content received from Amazon, please retry.";
  203. break;
  204. }
  205. $totalPages = (xml_data($xml->Items->TotalPages) <= $maxPages) ? xml_data($xml->Items->TotalPages) : $maxPages;
  206. // Did we just get one match, or more than one?
  207. if (xml_data($xml->Items->TotalResults) == 1) {
  208. $item = $xml->Items->Item;
  209. $weight = weightMatch($search, $item);
  210. // Check the weighting values
  211. if ($weight >= $maxSearchWeight) {
  212. $searchItem = $item;
  213. break;
  214. } elseif ($weight > $baseSearchWeight && $weight > $lastSearchWeight) {
  215. $searchItem = $item;
  216. $lastSearchWeight = $weight;
  217. }
  218. } elseif (xml_data($xml->Items->TotalResults) > 1) {
  219. // If we found multiple results, we need to look through them all.
  220. foreach ($xml->Items->Item as $item) {
  221. $weight = weightMatch($search, $item);
  222. // Check the weighting values
  223. if ($weight >= $maxSearchWeight) {
  224. $searchItem = $item;
  225. break;
  226. } elseif ($weight > $baseSearchWeight && $weight > $lastSearchWeight) {
  227. $searchItem = $item;
  228. $lastSearchWeight = $weight;
  229. }
  230. }
  231. $currentSearch = "ItemPage=" . $currentPage;
  232. }
  233. $currentPage++;
  234. if ($weight >= $maxSearchWeight) {
  235. break;
  236. }
  237. sleep(1); // Prevent "SPAMMING" Amazon?
  238. }
  239. print "</table>\n";
  240. if ($weight >= $maxSearchWeight) {
  241. break;
  242. }
  243. flushdisplay();
  244. }
  245. if (empty($searchItem)) {
  246. print "Match result not found. You may override the result by selecting override items above.<br\>\n";
  247. unset ($item);
  248. } else {
  249. print "Found as Amazon ID: [<a href=\"$link_url" . xml_data($searchItem->ASIN) . "\" target=\"_blank\">" . xml_data($searchItem->ASIN) . "</a>], setting data.<br/><br/>\n";
  250. print "<script><!--\n";
  251. print "document.getElementById(\"" . xml_data($searchItem->ASIN) ."\").setAttribute('bgcolor', '#000080');\n";
  252. print "//-->\n</script>\n";
  253. $item = $searchItem;
  254. }
  255. print "<div align=\"center\"><input type=\"submit\" value=\"Override Default\" class=\"jz_submit\"/></div>";
  256. print "</form>";
  257. flushdisplay();
  258. print "<div align=\"center\"><input type=\"submit\" value=\"Override Default\" class=\"jz_submit\"/></div>";
  259. // Here, we start an override of the original retrieval.
  260. } else {
  261. $item = albumOverride();
  262. }
  263. $id = xml_data($item->ASIN);
  264. $year = substr(xml_data($item->ItemAttributes->ReleaseDate),0,4);
  265. if (isset($item->LargeImage->URL) && xml_data($item->LargeImage->URL) != '') {
  266. $image = xml_data($item->LargeImage->URL);
  267. } elseif (isset($item->MediumImage->URL) && xml_data($item->MediumImage->URL) != '') {
  268. $image = $item->MediumImage->URL;
  269. }
  270. $review = xml_data($item->EditorialReviews->EditorialReview->Content);
  271. // TODO: Rating does not seem to set unless override is chosen.
  272. $rating = sprintf(xml_data($item->CustomerReviews->AverageRating));
  273. if (is_array( $item->BrowseNodes->BrowseNode)) {
  274. $genre = xml_data($item->BrowseNodes->BrowseNode[0]->Ancestors->BrowseNode->Name);
  275. } else {
  276. $genre = xml_data($item->BrowseNodes->BrowseNode->Ancestors->BrowseNode->Name);
  277. }
  278. $ListPrice = sprintf(xml_data($item->ItemAttributes->ListPrice->FormattedPrice));
  279. $tracks = array();
  280. if (is_array($item->Tracks->Disc->Track)) {
  281. foreach ($item->Tracks->Disc->Track as $track) {
  282. $tracks[] = sprintf(xml_data($track));
  283. }
  284. }
  285. if (!$return){
  286. writeAlbumMetaData($node, $year, $image, $tracks, $review, $rating, $ListPrice, $genre, true);
  287. return true;
  288. } else {
  289. if ($return == "array"){
  290. $retArr['year'] = $year;
  291. $retArr['image'] = $image;
  292. $retArr['review'] = $review;
  293. $retArr['rating'] = $rating;
  294. $retArr['id'] = $id;
  295. return $retArr;
  296. } else {
  297. return $$return;
  298. }
  299. }
  300. return true;
  301. }
  302. function weightMatch ($search, $item) {
  303. global $search_tracking, $matchAlbumWeight;
  304. $search_asin = (string) xml_data($item->ASIN);
  305. if (isset($search_tracking[$search_asin]) && $search_tracking[$search_asin]) {
  306. return 0;
  307. } else {
  308. $search_tracking[$search_asin] = true;
  309. }
  310. $weight = 0;
  311. $artist_weight = 0;
  312. $album_weight = 0;
  313. $album_multiplier = 1;
  314. $artist_multiplier = 1;
  315. $link_url = 'http://www.amazon.com/dp/';
  316. $amazon_key = '19B1FW4R5ABSKBWNV582';
  317. $link_xml = 'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&ResponseGroup=Large&Operation=ItemLookup&AWSAccessKeyId=' . $amazon_key . '&ItemId=';
  318. $colors = array(0 => '#A00000',
  319. 1 => '#008000',
  320. 2 => '#FFFF00',
  321. 3 => '#FFDD44',
  322. 4 => '#FF9900');
  323. $states = array('artist' => $colors[0],
  324. 'album' => $colors[0],
  325. 'image' => $colors[0],
  326. 'descr' => $colors[0],
  327. 'rating' => $colors[0],
  328. 'year' => $colors[0]);
  329. // Let's see if our one match got us good results:
  330. $search_album = xml_data($item->ItemAttributes->Title);
  331. // Check search item meta tags to see if they match our own.
  332. $filter_postfix = array('IMPORT');
  333. $postfix_match = '';
  334. foreach ($filter_postfix as $key ) {
  335. if (preg_match("/$search[postfix]/i", $key) && (preg_match('/\[' . $key . '\]/i', $search_album) ||
  336. preg_match('/\(' . $key . '\)/i', $search_album))) {
  337. $album_weight += 500;
  338. $postfix_match = '++';
  339. } elseif (preg_match('/\[' . $key . '\]/i', $search_album) ||
  340. preg_match('/\(' . $key . '\)/i', $search_album)) {
  341. $album_weight -= 1000;
  342. $postfix_match = '--';
  343. }
  344. }
  345. // Remove text in parenthesis & brackets
  346. $search_album = preg_replace('/\s*\[[^\]]+\]/', '', $search_album);
  347. $search_album = preg_replace('/\s*\([^\)]+\)/', '', $search_album);
  348. // File names cannot contain these characters, so we remove them from the search as well
  349. $search_album = preg_replace('/[:?\/\\\"*<>|]/', '', $search_album);
  350. $search_album = preg_replace('/\.$/', '', $search_album);
  351. $stopwords = array('the', 'a','and');
  352. if ($search[exact_album] == true) {
  353. $search_album = trim($search_album); // Just trim whitespace
  354. } else {
  355. // If this is a filtered match, we should make sure the result is filtered the same way
  356. $search_album = preg_replace('/[-_,]/', ' ', $search_album); // Convert - and _ to space
  357. $search_album = preg_replace('/\s+/', ' ', $search_album); // Remove extra space
  358. $search_album = preg_replace('/[^\w\s]/u', '', utf8_decode($search_album)); // Remove non-word characters & UTF8 handling
  359. foreach ($stopwords as $word) {
  360. $search_album = preg_replace('/\b' . $word . '\b/i', '', $search_album); // Remove stopwords
  361. }
  362. }
  363. // The artist field can be blank, so if it is, we try using the author
  364. if (sizeof ($item->ItemAttributes->Artist) == 1) {
  365. if (xml_data($item->ItemAttributes->Artist) != '') {
  366. $search_artist = xml_data($item->ItemAttributes->Artist);
  367. } elseif ($item->ItemAttributes->Author != '') {
  368. $search_artist = xml_data($item->ItemAttributes->Author);
  369. } else {
  370. // Last ditch, just use the filtered artist.
  371. $search_artist = $artist;
  372. }
  373. // Multiple artist album.
  374. } else {
  375. for ($i = 0; $i < sizeof($item->ItemAttributes->Artist); $i++) {
  376. $prefix = '';
  377. if ($i > 0) {
  378. $prefix = ' ';
  379. }
  380. $search_artist .= $prefix . xml_data($item->ItemAttributes->Artist[$i]);
  381. }
  382. }
  383. // File names cannot contain these characters, so filter them from results.
  384. $search_artist= preg_replace('/[:?\/\\\"*<>|]/', '', $search_artist);
  385. $search_artist = preg_replace('/\s*\[[^\]]+\]/', '', $search_artist); // Remove text in parenthesis & brackets
  386. $search_artist = preg_replace('/\s*\([^\)]+\)/', '', $search_artist); // Remove text in parenthesis & brackets
  387. // File names always trim the last period from a name.
  388. $search_artist = preg_replace('/\.$/', '', $search_artist);
  389. if ($search[exact_artist] == true) {
  390. $search_artist = trim($search_artist);
  391. } else {
  392. $search_artist = preg_replace('/[-_,]/', ' ', $search_artist); // Convert - and _ to space
  393. $search_artist = preg_replace('/\s+/', ' ', $search_artist); // Remove extra space
  394. $search_artist = preg_replace('/[^\w\s\']/u', '', utf8_decode($search_artist)); // Remove non-word characters & UTF8 handling
  395. foreach ($stopwords as $word) {
  396. $search_artist = preg_replace('/\b' . $word . '\b/i', '', $search_artist); // Remove stopwords
  397. }
  398. }
  399. // Highest priority is a completely exact match. Album priority is most important.
  400. if ($search[exact_album] && strtoupper($search[album]) == strtoupper($search_album)) {
  401. $album_weight += $matchAlbumWeight['album']['exact'];
  402. $artist_multiplier = 8;
  403. $states['album'] = $colors[1];
  404. } elseif (strtoupper($search[album]) == strtoupper($search_album)) {
  405. $album_weight += $matchAlbumWeight['album']['general'];
  406. $artist_multiplier = 4;
  407. $states['album'] = $colors[2];
  408. } elseif ($search[album] != '' &&
  409. (preg_match('/'. addcslashes($search_album, "&()[].+^$(){}=!'") . '/i', $search[album]) ||
  410. preg_match('/' . addcslashes($search[album], "&()[].+^$(){}=!'") . '/i', $search_album))) {
  411. $album_weight += $matchAlbumWeight['album']['regex'];
  412. $artist_multiplier = 3;
  413. $states['album'] = $colors[3];
  414. } else {
  415. $words = explode(' ', $search_album);
  416. $word_base = $matchAlbumWeight['album']['partial'] / sizeof($words);
  417. foreach ($words as $word) {
  418. if(preg_match('/' . $word . '/i', $search[album])) {
  419. $album_weight += $word_base;
  420. $states['album'] = $colors[4];
  421. }
  422. }
  423. }
  424. // First we try an exact match
  425. if ($search[exact_artist] && strtoupper($search[artist]) == strtoupper($search_artist)) {
  426. $artist_weight += $matchAlbumWeight['artist']['exact'];
  427. $album_multiplier = 8;
  428. $states['artist'] = $colors[1];
  429. // Then we try a less exact match
  430. } elseif (strtoupper($search[artist]) == strtoupper($search_artist)) {
  431. $artist_weight += $matchAlbumWeight['artist']['general'];
  432. $album_multiplier = 4;
  433. $states['artist'] = $colors[2];
  434. // Then we look for a general pattern match
  435. } elseif ($search[artist] != '' &&
  436. (preg_match('/' . addcslashes($search_artist, "&()[].+^$(){}=!'") . '/i', $search[artist]) ||
  437. preg_match('/' . addcslashes($search[artist], "&()[].+^$(){}=!'") . '/i', $search_artist))) {
  438. $artist_weight += $matchAlbumWeight['artist']['regex'];
  439. $album_multiplier = 2;
  440. $states['artist'] = $colors[3];
  441. // Lastly, we allow a match on individual terms in the search
  442. } else {
  443. $words = explode(' ', $search_artist);
  444. $word_base = $matchAlbumWeight['artist']['partial'] / sizeof($words);
  445. foreach ($words as $word) {
  446. if(preg_match('/' . $word . '/i', $search[artist])) {
  447. $artist_weight += $word_base;
  448. $states['artist'] = $colors[4];
  449. }
  450. }
  451. }
  452. // If the artist name is in the album, its probably a good bet.
  453. if (preg_match('/' . addcslashes($search_artist, "&()[].+^$(){}=!'") . '/i', $search_album)) {
  454. $artist_multiplier += 2;
  455. }
  456. $weight = ($album_weight * $album_multiplier) + ($artist_weight * $artist_multiplier);
  457. if ($search[exact_year] && substr(xml_data($item->ItemAttributes->ReleaseDate),0,4) == $search[exact_year]) {
  458. $weight += $matchAlbumWeight['year']['exact'];
  459. $states['year'] = $colors[1];
  460. } elseif (isset($item->ItemAttributes->ReleaseDate) && xml_data($item->ItemAttributes->ReleaseDate) != '') {
  461. $weight += $matchAlbumWeight['year']['general'];
  462. $states['year'] = $colors[1];
  463. } elseif ($search[exact_year]) {
  464. $states['year'] = $colors[2];
  465. } else {
  466. $states['year'] = '#c0c0c0';
  467. }
  468. // Add some weightings for various data we really are looking for:
  469. $display_img = 'style/slick/clear.gif';
  470. $display_desc = 'Not available';
  471. if (isset($item->LargeImage->URL) && xml_data($item->LargeImage->URL) != '') {
  472. $weight += $matchAlbumWeight['image']['exact'];
  473. $states['image'] = $colors[1];
  474. $display_img = $item->LargeImage->URL;
  475. } elseif (isset($item->MediumImage->URL) && xml_data($item->MediumImage->URL) != '') {
  476. $weight += $matchAlbumWeight['image']['general'];
  477. $states['image'] = $colors[2];
  478. $display_img = $item->MediumImage->URL;
  479. }
  480. if (isset($item->EditorialReviews->EditorialReview->Content) && trim(xml_data($item->EditorialReviews->EditorialReview->Content)) != '') {
  481. $weight += $matchAlbumWeight['review']['exact']; // Has a description
  482. $states['descr'] = $colors[1];
  483. $display_desc = $item->EditorialReviews->EditorialReview->Content;
  484. }
  485. if (isset($item->CustomerReviews->AverageRating) && xml_data($item->CustomerReviews->AverageRating) != '') {
  486. $weight += $matchAlbumWeight['rating']['exact'] ; // Has ratings
  487. $states['rating'] = $colors[1];
  488. }
  489. print '<tr><td><table width="100%" border="1" cellspacing="0" cellpadding="5" id="'. xml_data($item->ASIN) .'">';
  490. print '<tr><td align="right" width="160px" bgcolor="' . $states['artist'] . '"><b>Artist:</b></td><td width="100%"> ' . $search_artist . '</td>';
  491. print "<td rowspan=\"2\" align=\"center\" nowrap=\"nowrap\"><a href=\"$link_url" . xml_data($item->ASIN) . "\" target=\"_blank\">View Amazon</a><br>\n";
  492. print "[<a href=\"$link_xml" . xml_data($item->ASIN) . "\" target=\"_blank\">XML View</a>]</td></tr>\n";
  493. print '<tr><td align="right" width="160px" bgcolor="' . $states['album'] . '"><b>Album:</b></td><td width="100%"> ' . $search_album . ' ' . $postfix_match . '</td></tr>';
  494. print '<tr><td colspan="3"><table width="100%" border="0" cellspacing="0" cellpadding="5"><tr>';
  495. print '<td width="160px" height="160" align="center" valign="middle" bgcolor="' . $states['image'] . '"><img src="' . $display_img . '" height="150" width="150"><br></td>';
  496. print '<td valign="top" bgcolor="' . $states['descr'] . '">' . $display_desc . '</td>';
  497. print '</tr><tr>';
  498. print '<td><input type="radio" name="imgOVERRIDE" value="'. xml_data($item->ASIN) .'"/>Override</td>';
  499. print '<td><input type="radio" name="descOVERRIDE" value="'. xml_data($item->ASIN) .'"/>Override</td>';
  500. print '</tr></table></td></tr></table></td></tr>';
  501. return $weight;
  502. }
  503. function artistOverride() {
  504. $ret = array();
  505. if (isset($_POST[descOVERRIDE]) ) {
  506. $ret['bio'] = $_POST[descOVERRIDE];
  507. }
  508. if (isset($_POST[imgOVERRIDE])) {
  509. $ret['image'] = $_POST[imgOVERRIDE];
  510. }
  511. return $ret;
  512. }
  513. function albumOverride() {
  514. if (isset($_POST[descOVERRIDE])) {
  515. $asin = $_POST[descOVERRIDE];
  516. $desc_xml = getXMLData(urlencode($asin), true);
  517. }
  518. if (isset($_POST[imgOVERRIDE])) {
  519. $asin = $_POST[imgOVERRIDE];
  520. $img_xml = getXMLData(urlencode($asin), true);
  521. }
  522. if (isset($_POST[descOVERRIDE]) && isset($_POST[imgOVERRIDE])) {
  523. if (isset($desc_xml->Items->Item->LargeImage) && isset($desc_xml->Items->Item->LargeImage->URL)) {
  524. $desc_xml->Items->Item->LargeImage->URL = (string) xml_data($img_xml->Items->Item->LargeImage->URL);
  525. } else {
  526. $desc_xml->Items->Item->addChild('LargeImage');
  527. $desc_xml->Items->Item->LargeImage->addChild('URL', $img_xml->Items->Item->LargeImage->URL);
  528. }
  529. if (isset($desc_xml->Items->Item->MediumImage) && isset($desc_xml->Items->Item->MediumImage->URL)) {
  530. $desc_xml->Items->Item->MediumImage->URL = (string) xml_data($img_xml->Items->Item->MediumImage->URL);
  531. } else {
  532. $desc_xml->Items->Item->addChild('MediumImage');
  533. $desc_xml->Items->Item->MediumImage->addChild('URL', $img_xml->Items->Item->MediumImage->URL);
  534. }
  535. if (isset($desc_xml->Items->Item->SmallImage)&& isset($desc_xml->Items->Item->SmallImage->URL)) {
  536. $desc_xml->Items->Item->SmallImage->URL = (string) xml_data($img_xml->Items->Item->SmallImage->URL);
  537. } else {
  538. $desc_xml->Items->Item->addChild('SmallImage');
  539. $desc_xml->Items->Item->SmallImage->addChild('URL', $img_xml->Items->Item->SmallImage->URL);
  540. }
  541. $item = $desc_xml->Items->Item;
  542. } elseif (isset($_POST[descOVERRIDE])) {
  543. $item = $desc_xml->Items->Item;
  544. } elseif (isset($_POST[imgOVERRIDE])) {
  545. $item = $img_xml->Items->Item;
  546. } else {
  547. return false;
  548. }
  549. $display_img = 'style/slick/clear.gif';
  550. $display_desc = 'Not available';
  551. if (isset($item->LargeImage->URL) && xml_data($item->LargeImage->URL) != '') {
  552. $display_img = $item->LargeImage->URL;
  553. } elseif (isset($item->MediumImage->URL) && xml_data($item->MediumImage->URL) != '') {
  554. $display_img = $item->MediumImage->URL;
  555. }
  556. if (isset($item->EditorialReviews->EditorialReview->Content) && trim(xml_data($item->EditorialReviews->EditorialReview->Content)) != '') {
  557. $display_desc = $item->EditorialReviews->EditorialReview->Content;
  558. } else {
  559. $item->EditorialReviews->EditorialReview->Content = '';
  560. }
  561. print '<tr><td><table width="100%" border="1" cellspacing="0" cellpadding="5" id="'. xml_data($item->ASIN) .'">';
  562. print '<tr><td colspan="3" nowrap="nowrap"><strong>Attempting Override</strong></td>';
  563. print '<tr><td align="right" width="160px" bgcolor="' . $states['artist'] . '"><b>Artist:</b></td><td width="100%"> ' . xml_data($item->ItemAttributes->Artist) . '</td>';
  564. print "<td rowspan=\"2\" align=\"right\"><a href=\"$link_url" . xml_data($item->ASIN) . "\" target=\"_blank\">View Amazon</a></td></tr>\n";
  565. print '<tr><td align="right" width="160px" bgcolor="' . $states['album'] . '"><b>Album:</b></td><td width="100%"> ' . xml_data($item->ItemAttributes->Title) . '</td></tr>';
  566. print '<tr><td colspan="3"><table width="100%" border="0" cellspacing="0" cellpadding="5"><tr>';
  567. print '<td width="160px" height="160" align="center" valign="middle" bgcolor="' . $states['image'] . '"><img src="' . $display_img . '" height="150" width="150"><br></td>';
  568. print '<td valign="top" bgcolor="' . $states['descr'] . '">' . $display_desc . '</td>';
  569. print '</tr></table></td></tr></table></td></tr>';
  570. return $item;
  571. }
  572. function getXMLData($search, $exact = false) {
  573. // Snoopy is used to robot the URL fetching
  574. include_once($include_path. "lib/snoopy.class.php");
  575. $snoopy_retry = 3;
  576. $amazon_key = '19B1FW4R5ABSKBWNV582';
  577. if ($exact) {
  578. $baseSearch = 'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&ResponseGroup=Large&Operation=ItemLookup&AWSAccessKeyId=' . $amazon_key . '&ItemId=' . $search;
  579. } else {
  580. $search_url = 'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&ResponseGroup=Large&Operation=ItemSearch&SearchIndex=Music&AWSAccessKeyId=' . $amazon_key;
  581. $baseSearch = $search_url . '&' . $search;
  582. }
  583. $snoopy = new Snoopy;
  584. $snoopy_tries = 0;
  585. while ($snoopy_retry > $snoopy_tries) {
  586. $snoopy->fetch($baseSearch);
  587. $snoopy_tries++;
  588. if ($snoopy->status == 200) {
  589. $xml_content = $snoopy->results;
  590. break;
  591. } else {
  592. if ($snoopy->status) {
  593. print "\n<div width=\"100%\" align=\"center\" style=\"background:#C00000;color:#FFFFFF;padding:3px\"><b>";
  594. print "\nThere was a problem fetching results from Amazon: <font color=\"red\">" . $snoopy->status . " " . $snoopy->error . "\n";
  595. print "\nWe will retry this request " . ($snoopy_retry - $snoopy_tries) . " more times.";
  596. print "\n</font></b></div>";
  597. //print "\n<div width=\"100%\" align=\"center\" style=\"background:#C08000;color:#FFFFFF;padding:3px\">We will retry this request " . $snoopy_retry - $snoopy_tries . " more times.</div>";
  598. } else {
  599. print "<b>There was a fatal error: <font color=\"red\">" . $php_errormsg . "</font></b><br>";
  600. return false;
  601. }
  602. }
  603. }
  604. // Amazon returns XML, so parse it through simpleXML
  605. if (isset($xml_content) && $xml_content != '') {
  606. if (!$test_php4 && extension_loaded('simplexml')) {
  607. $xml = new SimpleXMLElement($xml_content);
  608. } elseif (extension_loaded('xml')) {
  609. if (version_compare('5.0.0', phpversion())) {
  610. print "<div width=\"100%\" align=\"center\" style=\"background:#C08000;color:#FFFFFF;padding:3px\"><b>PHP5 supports SimpleXML in a native library. This function will run much faster if you enable this component on your server.</b></div>";
  611. }
  612. include_once($include_path. "lib/simplexml/IsterXmlSimpleXMLImpl.php");
  613. $parser = new IsterXmlSimpleXMLImpl;
  614. $xml1 = $parser->load_string($xml_content);
  615. $xml = $xml1->ItemSearchResponse;
  616. } else {
  617. print "This metadata system requires PHP with the SimpleXML or plain libexpat XML extension installed.";
  618. exit;
  619. }
  620. if (empty ($xml)) {
  621. print "<div width=\"100%\" align=\"center\" style=\"background:#C00000;color:#FFFFFF;padding:3px\"><b>There was a problem fetching results from Amazon: <font color=\"red\">" . $snoopy->status . " " . $snoopy->error . "</font></b></div>";
  622. #print "<div width=\"100%\" align=\"center\" style=\"background:#C00000;color:#FFFFFF;padding:3px\">If you wish, you can report this error to the developer by clicking the following button.";
  623. #print '<form method="POST" action="http://www.darkhart.net/support.php"><input type="hidden" name="subject" value="JZ Custom - XML parse error"><input type="hidden" name="data" value="' . . '"><input type="submit" value="Submit Report"></form></div>';
  624. print "<code>htmlentities($xml_content)</code>";
  625. return false;
  626. }
  627. }
  628. return $xml;
  629. }
  630. function xml_data($data) {
  631. global $test_php4;
  632. if (!$test_php4 && extension_loaded('simplexml')) {
  633. return $data;
  634. } else {
  635. if (isset($data)) {
  636. return $data->CDATA();
  637. } else {
  638. return "";
  639. }
  640. }
  641. }
  642. /*
  643. * Gets the metadata for an artist from Yahoo!
  644. *
  645. * @author Fred Hirsch
  646. * @param $node The current node we are looking at
  647. * @param $return should we return or write data (defaults to write),
  648. * and if return what do we return (image = binaryImageData, genre, description)
  649. **/
  650. //function SERVICE_GETARTISTMETADATA_webmosher($node, $displayOutput, $return = false){
  651. function SERVICE_GETARTISTMETADATA_webmosher($node = false, $return = false, $artistName = false){
  652. global $include_path;
  653. include_once($include_path . "lib/utfnormal/UtfNormal.php");
  654. $utffix = new UTFNormal();
  655. // let's set the artist we're looking at
  656. if (is_object($node)){
  657. $artist = $node->getName();
  658. } else {
  659. $artist = $node['artist'];
  660. }
  661. $artist = preg_replace("/\&/", 'and', $artist);
  662. $items = array();
  663. // Normally, we are probably not overriding our values, so just procede.
  664. if(empty($_POST[descOVERRIDE]) && empty($_POST[imgOVERRIDE])) {
  665. $fix_jz_path = urlencode(implode('/', $node->getPath()));
  666. print "<form action=\"popup.php?action=popup&ptype=getmetadata&jz_path=$fix_jz_path\" method=\"post\">\n";
  667. print "<input type=\"hidden\" name=\"edit_search_all_albums\" value=\"off\"/>\n";
  668. print "<input type=\"hidden\" name=\"edit_search_all_artists\" value=\"on\"/>\n";
  669. print "<input type=\"hidden\" name=\"metaSearchSubmit\" value=\"Search\"/>\n";
  670. print "<input type=\"hidden\" name=\"edit_search_images_miss\" value=\"always\"/>\n";
  671. print "<input type=\"hidden\" name=\"edit_search_desc_miss\" value=\"always\"/>\n";
  672. $search_artist = urlencode(strtolower(preg_replace('/[^\w\s]/', '', $utffix->toNFKD($artist))));
  673. $yahoo_search = "http://search.music.yahoo.com/search/?m=artist&x=0&y=0&p=". $search_artist;
  674. $content_y = getHTMLData($yahoo_search);
  675. $items['yahoo'] = parseYahooArtist($content_y, $artist);
  676. print "<h2>YAHOO!</h2>\n";
  677. print "<table width=\"100%\" cellspacing=\"0\" cellpadding=\"2\" border=\"0\">\n";
  678. print "<tr>\n";
  679. print "<td align=\"center\" valign=\"top\"><img width=\"150px\" src=\"" . $items['yahoo']['image'] . "\"/></td>\n";
  680. print "<td align=\"left\" valign=\"top\">" . substr($items['yahoo']['bio'],0,200) . "</td>\n";
  681. print "</tr><tr>\n";
  682. print "<td width=\"50%\" align=\"center\"><input type=\"radio\" name=\"imgOVERRIDE\" value=\"" . $items['yahoo']['image'] . "\">Override Image</td>\n";
  683. print "<td width=\"50%\" align=\"center\"><input type=\"radio\" name=\"descOVERRIDE\" value=\"" . $items['yahoo']['bio'] . "\">Override Bio</td>\n";
  684. print "</table><hr/>\n";
  685. // Rhapsody has some very specific artist name search requirements.
  686. $search_artist = urlencode(strtolower(preg_replace('/[^\w]/', '', $utffix->toNFKD($artist))));
  687. $rhaps_search = "http://www.rhapsody.com/" . strtolower(preg_replace('/[^\w]/', '', $search_artist)) . "/more.html";
  688. $content_r = getHTMLData($rhaps_search,$artist);
  689. $items['rhaps'] = parseRhapsodyArtist($content_r, $artist);
  690. print "<h2>Rhapsody</h2>\n";
  691. print "<table width=\"100%\" cellspacing=\"0\" cellpadding=\"2\" border=\"0\">\n";
  692. print "<tr>\n";
  693. print "<td align=\"center\" valign=\"top\"><img width=\"150px\" src=\"" . $items['rhaps']['image'] . "\"/></td>\n";
  694. print "<td align=\"left\" valign=\"top\">" . substr($items['rhaps']['bio'],0,200) . "</td>\n";
  695. print "</tr><tr>\n";
  696. print "<td width=\"50%\" align=\"center\"><input type=\"radio\" name=\"imgOVERRIDE\" value=\"". $items['rhaps']['image'] ."\">Override Image</td>\n";
  697. print "<td width=\"50%\" align=\"center\"><input type=\"radio\" name=\"descOVERRIDE\" value=\"". $items['rhaps']['bio'] ."\">Override Bio</td>\n";
  698. print "</table><hr/>\n";
  699. if (isset($items['yahoo']['bio']) && $items['yahoo']['bio'] != 'Not available.') {
  700. $bio = $items['yahoo']['bio'];
  701. } elseif (isset($items['rhaps']['bio']) && $items['rhaps']['bio'] != 'Not available.') {
  702. $bio = $items['rhaps']['bio'];
  703. } else {
  704. $bio = '';
  705. }
  706. if (isset($items['yahoo']['image']) && $items['yahoo']['image'] != '') {
  707. $image = $items['yahoo']['image'];
  708. } elseif (isset($items['rhaps']['image']) && $items['rhaps']['image'] != '') {
  709. $image = $items['rhaps']['image'];
  710. } else {
  711. $image = '';
  712. }
  713. print "<div align=\"center\"><input type=\"submit\" value=\"Override Default\" class=\"jz_submit\"/></div>";
  714. print "</form>";
  715. flushdisplay();
  716. } else {
  717. $retArr = artistOverride();
  718. print "<h2>OVERRIDE</h2>\n";
  719. print "<table width=\"100%\" cellspacing=\"0\" cellpadding=\"2\" border=\"0\">\n";
  720. print "<tr>\n";
  721. print "<td align=\"center\" valign=\"top\"><img width=\"150px\" src=\"" . (isset($retArr['image']) ? $retArr['image'] : $image) . "\"/></td>\n";
  722. print "<td align=\"left\" valign=\"top\">" . (isset($retArr['bio']) ? $retArr['bio'] : $bio) . "</td>\n";
  723. print "</tr><tr>\n";
  724. print "</table><hr/>\n";
  725. }
  726. // Now let's write the data
  727. if ($return){
  728. if ($return == "array"){
  729. if (empty($retArr['bio'])) {
  730. $retArr['bio'] = $bio;
  731. }
  732. if (empty($retArr['image'])) {
  733. $retArr['image'] = $image;
  734. }
  735. return $retArr;
  736. } else {
  737. return $$return;
  738. }
  739. return $$return;
  740. } else {
  741. $artReturn = writeArtistMetaData($node, $image, $bio, $displayOutput);
  742. }
  743. return false;
  744. }
  745. function parseYahooArtist($contents,$artist) {
  746. $utffix = new UTFNormal();
  747. $artist_alternate = preg_replace('/[^\w\s]/', '', $utffix->toNFKD($artist));
  748. // Ok, now let's see if we got a direct hit or a link
  749. if (stristr($contents,$artist) || stristr($contents,$artist_alternate)){
  750. // Now let's see if we can get the right link
  751. //
  752. $artist_search = "<a href=\"http://music.yahoo.com/ar-";
  753. if (strpos($contents,$artist_search)) {
  754. $contents = substr($contents,strpos($contents,$artist_search) + 9);
  755. $link = trim(substr($contents,0,strpos($contents,"\">")));
  756. $link_bio = str_replace("---","-bio--",$link);
  757. // Now let's get the bio back
  758. $contents = getHTMLData($link_bio);
  759. $bio = substr($contents,strpos($contents,'width="401">'));
  760. $bio = substr($bio,strpos($bio,'<td>')+4);
  761. $bio = substr($bio,0,strpos($bio,'</td>'));
  762. $bio = strip_tags($bio);
  763. $bio = preg_replace("/(\r\n)+/m", "\n", $bio);
  764. #$bio = utf8_encode($bio);
  765. # Maybe there isn't a bio page.
  766. if (empty($bio) || $bio == '') {
  767. $bio = "Not available.";
  768. $contents = getHTMLData($link);
  769. }
  770. // Now let's get the artist image
  771. $image = substr($contents,strpos($contents,'<td width="300"><img src="http://')+26);
  772. $image = substr($image,0,strpos($image,'"'));
  773. if (!stristr($image,".jpg") or !stristr($image,"http://")){
  774. $image = "";
  775. }}
  776. else {
  777. $bio = "Not available";
  778. }
  779. } else {
  780. $bio = "Not available.";
  781. }
  782. return array('bio'=> $bio, 'image' => $image);
  783. }
  784. function parseRhapsodyArtist($contents, $artist) {
  785. $utffix = new UTFNormal();
  786. $artist_alternate = preg_replace('/[^\w\s]/', '', $utffix->toNFKD($artist));
  787. if (stristr($contents,$artist) || stristr($contents,$artist_alternate)){
  788. $img_search = '<img class="artistPageFlyoutMainBgImage" src="';
  789. if (stristr($contents,$img_search)) {
  790. $image = substr($contents,strpos($contents, $img_search) + strlen($img_search));
  791. $image = substr($image,0,strpos($image,'"'));
  792. }
  793. $bio_search = '<h3 class="fontSize13">About</h3>';
  794. if (stristr($contents,$bio_search) ) {
  795. $bio = substr($contents,strpos($contents,$bio_search) + strlen($bio_search));
  796. $bio = substr($bio,0,strpos($bio,'</div>'));
  797. $bio = strip_tags($bio);
  798. $bio = preg_replace("/(\r\n)+/m", "\n", $bio);
  799. #$bio = utf8_encode($bio);
  800. } else {
  801. $bio = "Not available.";
  802. }
  803. // TODO search for the Rhapsody content if the main page did not load.
  804. //} elseif (empty ($contents) || $contents == '') {
  805. //$utffix = new UTFNormal();
  806. //$search_artist = urlencode(strtolower(preg_replace('/[^\w\s]/', '', $utffix->toNFKD($artist))));
  807. //$link = 'http://www.rhapsody.com/-search?query=' . $search_artist . '&searchtype=RhapArtist';
  808. //$contents = getHTMLData($link);
  809. } else {
  810. $bio = "Not available.";
  811. }
  812. return array('bio'=> $bio, 'image' => $image);
  813. }
  814. function getHTMLData($search) {
  815. // Snoopy is used to robot the URL fetching
  816. include_once($include_path. "lib/snoopy.class.php");
  817. $snoopy_retry = 3;
  818. $snoopy = new Snoopy;
  819. $snoopy_tries = 0;
  820. while ($snoopy_retry > $snoopy_tries) {
  821. @$snoopy->fetch($search);
  822. $snoopy_tries++;
  823. if ($snoopy->status == 200) {
  824. $xml_content = $snoopy->results;
  825. break;
  826. } else {
  827. if ($snoopy->status) {
  828. print "<div width=\"100%\" align=\"center\" style=\"background:#C00000;color:#FFFFFF;padding:3px\"><b>There was a problem fetching results: <font color=\"red\">" . $snoopy->status . " " . $snoopy->error . "</font></b></div>";
  829. print "<div width=\"100%\" align=\"center\" style=\"background:#C08000;color:#FFFFFF;padding:3px\">We will retry this request " . $snoopy_retries - $snoopy_tries . " more times.</div>";
  830. } else {
  831. print "<b>There was a fatal error: <font color=\"red\">" . $php_errormsg . "</font></b><br>";
  832. return false;
  833. }
  834. }
  835. }
  836. $contents = $snoopy->results;
  837. return utf8_encode($contents);
  838. }
  839. ?>