PageRenderTime 49ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/parsers/sites/ozerich/premaman_ru.php

https://github.com/Ozerich/ISP-parsers
PHP | 178 lines | 133 code | 44 blank | 1 comment | 10 complexity | 7063e6fa952690b8041f5f580f5b22a1 MD5 | raw file
  1. <?php
  2. require_once PARSERS_BASE_DIR . '/parsers/baseClasses/ozerich.php';
  3. class ISP_premaman_ru extends ItemsSiteParser_Ozerich
  4. {
  5. protected $shopBaseUrl = "http://www.premaman.ru/";
  6. public function __construct($savePath)
  7. {
  8. parent::__construct($savePath);
  9. // $this->httpClient->setRequestsPause (0.5);
  10. }
  11. public function loadItems ()
  12. {
  13. $base = array();
  14. $text = $this->httpClient->getUrlText($this->shopBaseUrl."katalog/");
  15. preg_match_all('#<li><a class=\'sublink\' href="/(katalog/(.+?)/)".+?>(.+?)</a>#sui', $text, $collections, PREG_SET_ORDER);
  16. foreach($collections as $collection_value)
  17. {
  18. $collection = new ParserCollection();
  19. $collection->url = $this->shopBaseUrl.$collection_value[1];
  20. $collection->id = $collection_value[2];
  21. $collection->name = $this->txt($collection_value[3]);
  22. $text = $this->httpClient->getUrlText($collection->url);
  23. preg_match_all('#<li><a class=\'sublink\' href="/(katalog/.+?/)".+?>(.+?)</a>#sui', $text, $categories, PREG_SET_ORDER);
  24. if(!$categories)
  25. $categories = array(array('1'=>$collection_value[1], '2'=>''));
  26. foreach($categories as $category)
  27. {
  28. $category_name = $category[2];
  29. $url = $this->shopBaseUrl.$category[1];
  30. $offset = 0;
  31. while($offset < 500)
  32. {
  33. $text = $this->httpClient->getUrlText($url."~".$offset);
  34. if($offset > 0 && mb_strpos($text, '<span>Страницы: </span>') === false)
  35. break;
  36. preg_match_all("#<div class='gal_cell_upper'>\s*<a href='/(.+?)/'.+?<p class='gal_cell_header'>(.+?)</p>#sui",
  37. $text, $items, PREG_SET_ORDER);
  38. if(!$items)
  39. break;
  40. foreach($items as $item_value)
  41. {
  42. $item = new ParserItem();
  43. $item->id = mb_substr($item_value[1], mb_strrpos($item_value[1],'/') + 1);
  44. $item->url = $this->shopBaseUrl.$item_value[1].'/';
  45. $item->name = $this->txt($item_value[2]);
  46. if($category_name != "")
  47. $item->categ = $category_name;
  48. $text = $this->httpClient->getUrlText($item->url);
  49. preg_match('#<p>\s*<img.+?src="(.+?)"#sui', $text, $image);
  50. $item->images[] = $this->loadImage($image[1]);
  51. preg_match('#\(арт(?:\.|\:)(.+?)\)#sui', $item->name, $articul);
  52. if($articul)
  53. {
  54. $item->articul = $this->txt($articul[1]);
  55. $item->name = str_replace($articul[0],'',$this->txt($item->name));
  56. }
  57. $collection->items[] = $item;
  58. }
  59. $offset+=16;
  60. }
  61. }
  62. $base[] = $collection;
  63. }
  64. return $this->saveItemsResult ($base);
  65. }
  66. public function loadPhysicalPoints ()
  67. {
  68. $base = array ();
  69. $text = $this->httpClient->getUrlText($this->shopBaseUrl."magazini/magazini_v_moskve/");
  70. preg_match_all('#<li><a href="/(.+?)">#sui', $text, $shops, PREG_SET_ORDER);
  71. foreach($shops as $shop_value)
  72. {
  73. $shop_item = new ParserPhysical();
  74. $shop_item->city = "Москва";
  75. $text = $this->httpClient->getUrlText($this->shopBaseUrl.$shop_value[1]);
  76. preg_match('#<strong>Адрес:</strong>(.+?)<br /><br />#sui', $text, $address);
  77. $shop_item->address = $this->txt($address[1]);
  78. preg_match('#<strong>Режим работы:</strong>(.+?)</p>#sui', $text, $timetable);
  79. $shop_item->timetable = $this->txt($timetable[1]);
  80. preg_match('#тел:(.+?)$#sui', $shop_item->address, $phone);
  81. if($phone)
  82. {
  83. $shop_item->phone = $this->txt($phone[1]);
  84. $shop_item->address = str_replace($phone[0],'',$shop_item->address);
  85. }
  86. $shop_item->address = str_replace($shop_item->city.', ', '', $shop_item->address);
  87. $base[] = $shop_item;
  88. }
  89. $text = $this->httpClient->getUrlText($this->shopBaseUrl."magazini/magazini_v_regionah/");
  90. preg_match_all('#<li>(.+?)</li>#sui', $text, $shops);
  91. foreach($shops[1] as $text)
  92. {
  93. $shop = new ParserPhysical();
  94. preg_match('#г\.(.+?)(?:,|(?=ул))#sui', $text, $city);
  95. if($city)
  96. {
  97. $shop->city = $this->txt($city[1]);
  98. $text = str_replace($city[0], '', $text);
  99. }
  100. preg_match('#тел(?:\.*\:*\.*)(.+?)(,.+?)*$#sui', $text, $phone);
  101. if($phone)
  102. {
  103. $shop->phone = $this->txt($phone[1]);
  104. $text = str_replace($phone[0], '', $text);
  105. }
  106. $shop->address = $this->address($text);
  107. $base[] = $shop;
  108. }
  109. return $this->savePhysicalResult ($base);
  110. }
  111. public function loadNews ()
  112. {
  113. $base = array();
  114. $url = $this->shopBaseUrl."novosti/";
  115. $text = $this->httpClient->getUrlText($url);
  116. preg_match_all("#<a href=\"/(novosti/(\d+)/)\" class='news_link'>.+?<em>(.+?)</em>.+?<p>(.+?)</p></b>#sui",
  117. $text, $news, PREG_SET_ORDER);
  118. foreach($news as $news_value)
  119. {
  120. $news_item = new ParserNews();
  121. $news_item->urlShort = $url;
  122. $news_item->urlFull = $this->shopBaseUrl.$news_value[1];
  123. $news_item->id = $news_value[2];
  124. $news_item->date = str_replace('/','.',$news_value[3]);
  125. $news_item->header = $this->txt($news_value[4]);
  126. $news_item->contentShort = $news_value[4];
  127. $text = $this->httpClient->getUrlText($news_item->urlFull);
  128. preg_match("#<h1 id='header_second'>.+?</h1>(.+?)</div>#sui", $text, $content);
  129. $news_item->contentFull = $content[1];
  130. $base[] = $news_item;
  131. }
  132. return $this->saveNewsResult($base);
  133. }
  134. }