PageRenderTime 67ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/snoopy/index.php

http://student-test.googlecode.com/
PHP | 880 lines | 839 code | 10 blank | 31 comment | 4 complexity | c75c28dc67d7b93271fb9fd63e4b1f7a MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0
  1. <?php
  2. /*
  3. * Created on 2010-12-22
  4. *
  5. * To change the template for this generated file go to
  6. * Window - Preferences - PHPeclipse - PHP - Code Templates
  7. */
  8. include "Snoopy.class.php";
  9. //????
  10. $snoopy = new Snoopy;
  11. $snoopy->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.2; Alexa Toolbar)"; //?????
  12. $action = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.3.9)';
  13. $formvars['client'] = 'ssologin.js(v1.3.9)';
  14. $formvars['encoding'] = 'utf-8';
  15. $formvars['entry'] = 'miniblog';
  16. $formvars['from'] = '';
  17. $formvars['gateway'] = 1;
  18. $formvars['returntype'] = 'META';
  19. $formvars['savestate'] = 0;
  20. $formvars['service'] = 'miniblog';
  21. $formvars['url'] = 'http://t.sina.com.cn/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack';
  22. //$formvars['username'] = 'aoweibo_3@sina.com';
  23. $formvars['password'] = '12345678sina';
  24. //$formvars['username'] = 'weibozixun1@sina.cn';
  25. $i_time = date('i');
  26. $i_time = intval($i_time);
  27. $i_time = intval($i_time/5);
  28. $formvars['username'] = 'weibozixun'.($i_time+1).'@sina.cn';
  29. $formvars['useticket'] = 0;
  30. //?????sae??
  31. $is_trans_sae = true;
  32. $snoopy->submit($action,$formvars);
  33. //???????????
  34. $fp = fopen("maxdate.txt",'a+'); //??????
  35. $last_time = fread($fp , 20);
  36. //$last_time = "2011-01-12 16:30:00";
  37. fclose($fp); //????
  38. //??????????
  39. $max_date = $last_time;
  40. for($page = 1; $page<=50; $page++)
  41. {
  42. echo "<br>page:".$page."......<br>";
  43. //if($page == 5)die();
  44. if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9&page=".$page))//????
  45. //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E6%2589%258B%25E6%259C%25BA&page=".$page))//????
  46. //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E9%2597%25A8%25E7%25A5%25A8&page=".$page))//????
  47. //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E5%258D%25A1&page=".$page))//???
  48. //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E7%2594%25B5%25E8%2584%2591&page=".$page))//??
  49. //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E7%2581%25AB%25E8%25BD%25A6%25E7%25A5%25A8&page=".$page))//???
  50. {
  51. $temp = $snoopy->results;
  52. }
  53. else
  54. {
  55. echo "no fetch content<br>";
  56. }
  57. //echo $temp;echo "<br>";
  58. $content_arr = array();
  59. if (!preg_match_all("/<div class=\"MIB_feed_c\">(.*)<div id=\"_comment_list/Us",$temp,$tt) )
  60. {
  61. echo "no content1<br>";
  62. }
  63. else
  64. {
  65. preg_match_all("/<div class=\"head_pic\">(.*)<\/div>/Us",$temp,$tt_head );
  66. foreach($tt[0] as $k=>$t)
  67. {
  68. //var_dump($t);die();
  69. preg_match("/<strong date=\"(.*)\"/Us",$t,$date_temp);
  70. $date = str_replace("<strong date=\"","",$date_temp);
  71. $date = str_replace("\"","",$date);
  72. //echo "date=".$date[0];
  73. //????
  74. echo "<br>lasttime:".$last_time."---datetime:".$date[0]."<br>";
  75. if($date[0] <= $last_time )
  76. {
  77. echo "---".$max_date;
  78. $fp = fopen("maxdate.txt",'r+'); //??????
  79. fwrite($fp, $max_date, strlen($max_date)); //???????
  80. fclose($fp); //????
  81. //?sae???????
  82. if(!empty($content_arr) && $is_trans_sae)
  83. {
  84. echo "<br>submit data";
  85. $snoopy_tr = new Snoopy;
  86. $snoopy_tr->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.2; Alexa Toolbar)"; //?????
  87. $action = 'http://zhuanrang.sinaapp.com/?action=Zhuaqu&op=addcontent';
  88. $formvars = array();
  89. $formvars['content'] = serialize($content_arr);
  90. $ret = $snoopy_tr->submit($action,$formvars);
  91. echo "---submit end";
  92. }
  93. exit();
  94. }
  95. if($max_date < $date[0])
  96. $max_date = $date[0];
  97. $content_arr[$k]['updae_time'] = $date[0];
  98. //echo "<br>";
  99. preg_match("/<div class=\"lf MIB_txtbl\">(.*)\">/Us",$t,$mid_temp);
  100. $mid = str_replace("<div class=\"lf MIB_txtbl\">","",$mid_temp[0]);
  101. $mid = str_replace("<cite><a href=\"","",$mid);
  102. $mid = str_replace("\">","",$mid);
  103. $mid = str_replace("http://t.sina.com.cn/","",$mid);
  104. $mid = trim($mid);
  105. preg_match("/\/.*/",$mid,$mid);
  106. $mid = str_replace("/","",$mid[0]);
  107. //echo "mid=".$mid;
  108. $content_arr[$k]['mid'] = $mid;
  109. //echo "<br>";
  110. if (preg_match_all("/<p class=\"sms\"(.*)<\/p>/Us",$t,$content))
  111. {
  112. $content_temp = $content[0];
  113. $content_temp = str_replace("<span style='color: red;'>","",$content_temp);
  114. $content_temp = str_replace("<span style=\"color: red;\">","",$content_temp);
  115. $content_temp = str_replace("</span>","",$content_temp);
  116. preg_match("/title=\"(.*)\"/Us",$content_temp[0],$title);
  117. $username = str_replace("title=\"","",$title[0]);
  118. $username = str_replace("\"","",$username);
  119. //echo "username=".$username;
  120. $content_arr[$k]['user_name'] = $username;
  121. //echo "<br>";
  122. preg_match("/mid=\"(.*)\"/Us",$content_temp[0],$mid_temp);
  123. $sid = str_replace("mid=\"","",$mid_temp[0]);
  124. $sid = str_replace("\"","",$sid);
  125. //echo "sid=".$sid;
  126. $content_arr[$k]['sid'] = $sid;
  127. //echo "<br>";
  128. preg_match("/dynamic-src=\"(.*)\"/Us",$tt_head[0][$k],$user_img_temp);
  129. $user_img = str_replace("dynamic-src=\"","",$user_img_temp[0]);
  130. $user_img = str_replace("\"","",$user_img);
  131. //echo "user_img=".$user_img;
  132. $content_arr[$k]['user_img'] = $user_img;
  133. //echo "<br>";
  134. preg_match("/<a href=\"http:\/\/t.sina.com.cn\/(.*)\"/Us",$content_temp[0],$uid_temp);
  135. $uid = str_replace("<a href=\"http://t.sina.com.cn/","",$uid_temp[0]);
  136. $uid = str_replace("\"","",$uid);
  137. //echo "uid=".$uid;
  138. $content_arr[$k]['user_id'] = $uid;
  139. //echo "<br>";
  140. $pattern = "/<p class=\"sms\" mid=(.*)type=\"\d\">/U";
  141. $replacement = "";
  142. $m_content = preg_replace($pattern, $replacement, $content_temp[0]);
  143. $pattern = "/<\/p>/U";
  144. $m_content = preg_replace($pattern, $replacement, $m_content);
  145. //var_dump($m_content);
  146. $pattern = "/<a href=\"http:\/\/t.sina.com.cn\/(.*)<\/a>/U";
  147. $m_content = preg_replace($pattern, $replacement, $m_content, 1);
  148. $m_content = str_replace("?", "", $m_content);
  149. $m_content = str_replace("<img dynamic-src", "<img src", $m_content);
  150. //echo "m_content=".$m_content;
  151. $content_arr[$k]['weibo_content'] = $m_content;
  152. //?????????pass?
  153. if(strpos($m_content,"//@"))
  154. $unset = 1;
  155. else
  156. {
  157. echo "---api excute";
  158. //???????????,????????
  159. $apiURL = "http://api.t.sina.com.cn/queryid.json?source=733331953&isBase62=1&type=1&mid=".$content_arr[$k]['mid'];
  160. if( $snoopy->fetch($apiURL))
  161. {
  162. $weibo_id = $snoopy->results;
  163. $weibo_id = json_decode($weibo_id,true);
  164. $weibo_id = $weibo_id['id'];
  165. $apiURL = "http://api.t.sina.com.cn/statuses/show/:".$weibo_id.".json?source=733331953";
  166. if($snoopy->fetch($apiURL))
  167. {
  168. $weibo_obj = $snoopy->results;
  169. $weibo_obj = json_decode($weibo_obj,true);
  170. if(isset($weibo_obj['user']['province']))
  171. {
  172. $content_arr[$k]['province'] = $weibo_obj['user']['province'];
  173. }
  174. if(isset($weibo_obj['user']['city']))
  175. {
  176. $content_arr[$k]['city'] = $weibo_obj['user']['city'];
  177. //var_dump($weibo_obj);die();
  178. }
  179. //????????
  180. //$content_arr[$k]['weibo_content'] = $weibo_obj['text'];
  181. //??????? ????
  182. $content_arr[$k]['user_verified'] = $weibo_obj['user']['verified'];
  183. }
  184. }
  185. //??????????
  186. $m_content_gbk = iconv("UTF-8", "GBK", $weibo_obj['text']);
  187. //echo $m_content_gbk;
  188. $f_ret = filter($m_content_gbk);
  189. //??????
  190. if(empty($f_ret["type"]))
  191. {
  192. //echo "-------unset-------";
  193. $unset = 1;
  194. }
  195. else
  196. {
  197. $unset = 0;
  198. foreach($f_ret as $k_f=>$f)
  199. {
  200. $content_arr[$k][$k_f] = $f;
  201. }
  202. }
  203. }
  204. if($unset == 1)
  205. {
  206. unset($content_arr[$k]);
  207. }
  208. //echo "<br><br>";
  209. }
  210. //var_dump($content_arr);
  211. }
  212. }
  213. //var_dump($content_arr);
  214. //???????????????
  215. if($page == 1)
  216. {
  217. $fp = fopen("maxdate.txt",'r+'); //??????
  218. fwrite($fp, $max_date, strlen($max_date)); //???????
  219. fclose($fp); //????
  220. }
  221. //?sae???????
  222. if(!empty($content_arr) && $is_trans_sae)
  223. {
  224. $snoopy_tr = new Snoopy;
  225. $snoopy_tr->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.2; Alexa Toolbar)"; //?????
  226. $action = 'http://zhuanrang.sinaapp.com/?action=Zhuaqu&op=addcontent';
  227. $formvars = array();
  228. //var_dump($content_arr);
  229. $formvars['content'] = serialize($content_arr);
  230. $ret = $snoopy_tr->submit($action,$formvars);
  231. //echo "submit data";
  232. }
  233. }
  234. //??????
  235. function filter($content)
  236. {
  237. $stag = 'source';
  238. $slen = strlen($stag);
  239. //$dict = '../pscws23/dict/dict.sqlite'; // ???? sqlite
  240. $dict = '../pscws23/dict/dict.txt'; // ???? txt
  241. //$dict = '../pscws23/dict/dict.xdb'; // ???? sqlite
  242. $version = 3; // ????
  243. $autodis = false; // ??????
  244. $ignore = false; // ??????
  245. $debug = false; // ???????
  246. $stats = true; // ????????
  247. //$is_cli = (php_sapi_name() == 'cli'); // ??? cli ????
  248. $object = 'PSCWS' . $version;
  249. require_once ('../pscws23/'.strtolower($object) . '.class.php');
  250. include "words.php";
  251. $cws = new $object($dict);
  252. $cws->set_ignore_mark($ignore);
  253. $cws->set_autodis($autodis);
  254. $cws->set_debug($debug);
  255. $cws->set_statistics($stats);
  256. $statistics = $cws->segment($content, '');
  257. //??????
  258. //$statistics = &$cws->get_statistics();
  259. $key_arr = array();
  260. $res_arr = array();
  261. $type = "";
  262. //var_dump($content);
  263. //var_dump($statistics);
  264. //echo "<br><br>";
  265. foreach($statistics as $k=>$stat)
  266. {
  267. $str = iconv("GBK", "UTF-8", $stat);
  268. $key_arr[] = array('str'=>$str, 'times'=>1, 'poses'=>$k);
  269. }
  270. //var_dump($key_arr);
  271. $index = 0;
  272. foreach($key_arr as $k=>$a)
  273. {
  274. if($index > 0)
  275. {
  276. $index --;
  277. continue;
  278. }
  279. $str = $a['str'];
  280. $str_type = checkStr($str);
  281. if($str_type == 'han')
  282. {
  283. //??????
  284. if(empty($type))
  285. {
  286. //echo $str."--";
  287. //?????
  288. if(in_array($str,$zr_huoche))
  289. {
  290. $res_huoche_arr = analysis_huoche($key_arr);
  291. if($res_huoche_arr['is_huoche'] == 1)
  292. {
  293. $type = "huoche";
  294. if(isset($res_huoche_arr['date']))
  295. {
  296. $res_arr['start_time'] = $res_huoche_arr['date'];
  297. }
  298. if(isset($res_huoche_arr['city']['start_city']))
  299. {
  300. $res_arr['start_station'] = $res_huoche_arr['city']['start_city'];
  301. }
  302. if(isset($res_huoche_arr['city']['end_city']))
  303. {
  304. $res_arr['end_station'] = $res_huoche_arr['city']['end_city'];
  305. }
  306. if(isset($res_huoche_arr['train_num']))
  307. {
  308. $res_arr['train_num'] = $res_huoche_arr['train_num'];
  309. }
  310. if(isset($res_huoche_arr['seat_type']))
  311. {
  312. $res_arr['train_type'] = $res_huoche_arr['seat_type'];
  313. }
  314. if(isset($res_huoche_arr['count']))
  315. {
  316. $res_arr['count'] = $res_huoche_arr['count'];
  317. }
  318. }
  319. }
  320. //????
  321. else if(in_array($str,$zr_menpiao))
  322. {
  323. $res_menpiao_arr = analysis_menpiao($key_arr);
  324. if($res_menpiao_arr['is_menpiao'] == 1)
  325. {
  326. $type = "menpiao";
  327. if(isset($res_menpiao_arr['price']))
  328. {
  329. $res_arr['price'] = $res_menpiao_arr['price'];
  330. }
  331. if(isset($res_menpiao_arr['count']))
  332. {
  333. $res_arr['count'] = $res_menpiao_arr['count'];
  334. }
  335. if(isset($res_menpiao_arr['detail_type']))
  336. {
  337. $res_arr['detail_type'] = $res_menpiao_arr['detail_type'];
  338. }
  339. if(isset($res_menpiao_arr['date']))
  340. {
  341. $res_arr['start_time'] = $res_menpiao_arr['date'];
  342. }
  343. //var_dump($res_arr);//die();
  344. }
  345. }
  346. //????? ???
  347. else if(in_array($str,$zr_dazhe))
  348. {
  349. $type = "dazhe";
  350. $pd_arr = get_type_and_price($str, $key_arr, $type);
  351. if(isset($pd_arr['detail_type']))
  352. {
  353. $res_arr['detail_type'] = $pd_arr['detail_type'];
  354. }
  355. if(isset($pd_arr['price']))
  356. {
  357. $res_arr['price'] = $pd_arr['price'];
  358. }
  359. }
  360. //??????
  361. else if(in_array($str,$zr_shouji))
  362. {
  363. $type = "shouji";
  364. $pd_arr = get_type_and_price($str, $key_arr, $type);
  365. if(isset($pd_arr['detail_type']))
  366. {
  367. $res_arr['detail_type'] = $pd_arr['detail_type'];
  368. }
  369. if(isset($pd_arr['price']))
  370. {
  371. $res_arr['price'] = $pd_arr['price'];
  372. }
  373. }
  374. //??????
  375. else if(in_array($str,$zr_shuma))
  376. {
  377. $type = "shuma";
  378. $pd_arr = get_type_and_price($str, $key_arr, $type);
  379. if(isset($pd_arr['detail_type']))
  380. {
  381. $res_arr['detail_type'] = $pd_arr['detail_type'];
  382. }
  383. if(isset($pd_arr['price']))
  384. {
  385. $res_arr['price'] = $pd_arr['price'];
  386. }
  387. }
  388. //????
  389. else if(in_array($str,$zr_diannao))
  390. {
  391. $type = "diannao";
  392. $pd_arr = get_type_and_price($str, $key_arr, $type);
  393. if(isset($pd_arr['detail_type']))
  394. {
  395. $res_arr['detail_type'] = $pd_arr['detail_type'];
  396. }
  397. if(isset($pd_arr['price']))
  398. {
  399. $res_arr['price'] = $pd_arr['price'];
  400. }
  401. }
  402. //????
  403. else if(in_array($str,$zr_dianqi))
  404. {
  405. $type = "dianqi";
  406. $pd_arr = get_type_and_price($str, $key_arr, $type);
  407. if(isset($pd_arr['detail_type']))
  408. {
  409. $res_arr['detail_type'] = $pd_arr['detail_type'];
  410. }
  411. if(isset($pd_arr['price']))
  412. {
  413. $res_arr['price'] = $pd_arr['price'];
  414. }
  415. }
  416. //????
  417. else if(in_array($str,$zr_qiche))
  418. {
  419. $type = "qiche";
  420. $pd_arr = get_type_and_price($str, $key_arr, $type);
  421. if(isset($pd_arr['detail_type']))
  422. {
  423. $res_arr['detail_type'] = $pd_arr['detail_type'];
  424. }
  425. if(isset($pd_arr['price']))
  426. {
  427. $res_arr['price'] = $pd_arr['price'];
  428. }
  429. }
  430. //????
  431. else if(in_array($str,$zr_jiaju))
  432. {
  433. $type = "jiaju";
  434. $pd_arr = get_type_and_price($str, $key_arr, $type);
  435. if(isset($pd_arr['detail_type']))
  436. {
  437. $res_arr['detail_type'] = $pd_arr['detail_type'];
  438. }
  439. if(isset($pd_arr['price']))
  440. {
  441. $res_arr['price'] = $pd_arr['price'];
  442. }
  443. }
  444. //????
  445. else if(in_array($str,$zr_wangpu))
  446. {
  447. $type = "wangpu";
  448. $pd_arr = get_type_and_price($str, $key_arr, $type);
  449. if(isset($pd_arr['detail_type']))
  450. {
  451. $res_arr['detail_type'] = $pd_arr['detail_type'];
  452. }
  453. if(isset($pd_arr['price']))
  454. {
  455. $res_arr['price'] = $pd_arr['price'];
  456. }
  457. }
  458. }
  459. }
  460. else if($str_type == 'ying')
  461. {}
  462. else if($str_type == 'shu')
  463. {}
  464. else if($str_type == 'hs')
  465. {}
  466. else if($str_type == 'hy')
  467. {}
  468. else if($str_type == 'sy')
  469. {}
  470. }
  471. $res_arr["type"] = $type;
  472. //var_dump($res_arr);
  473. //echo "<br><br>";
  474. //echo "--".$res_arr["type"];
  475. return $res_arr;
  476. }
  477. // ??????????
  478. function get_microtime()
  479. {
  480. list($usec, $sec) = explode(' ', microtime());
  481. return ((float)$usec + (float)$sec);
  482. }
  483. //?????????
  484. function get_type_and_price($str, $key_arr, $type)
  485. {
  486. include "type_config.php";
  487. $res_arr = array();
  488. foreach($key_arr as $k=>$a)
  489. {
  490. $str = $a['str'];
  491. $str_type = checkStr($str);
  492. //????
  493. $is_money = is_money($str, $k, $key_arr);
  494. if($is_money)
  495. {
  496. $index = $is_money['pos'] - $k;
  497. $res_arr['price'] = $is_money['str'];
  498. continue;
  499. //echo "-------".$is_money['str']."-------";
  500. }
  501. //????
  502. if($str_type == 'han')
  503. {
  504. if(in_array($str, $detail_type[$type]))
  505. {
  506. $res_arr['detail_type'] = $str;
  507. continue;
  508. }
  509. }
  510. }
  511. return $res_arr;
  512. }
  513. //??????
  514. function is_date($str, $k, $key_arr)
  515. {
  516. if($str == '?')
  517. {
  518. $date_str = "";
  519. //?????????
  520. if(checkStr($key_arr[$k - 1]['str']) == 'shu' )
  521. {
  522. $date_str = "";
  523. if(checkStr($key_arr[$k + 1]['str']) == 'shu')//??????
  524. {
  525. if($key_arr[$k + 2]['str'] == "?" || $key_arr[$k + 2]['str'] == "?" )
  526. {
  527. $date_str = date('Y').'-'.$key_arr[$k - 1]['str'].'-'.$key_arr[$k + 1]['str'];
  528. }
  529. }
  530. if(!empty($date_str))
  531. {
  532. return array("date_str"=>$date_str, "pos"=>$k+2);
  533. }
  534. }
  535. }
  536. else if($str == "?" || $str == "?")
  537. {
  538. $date_str = "";
  539. //?????????
  540. if(checkStr($key_arr[$k - 1]['str']) == 'shu' )
  541. {
  542. $date_str = date('Y-m').'-'.$key_arr[$k - 1]['str'];
  543. if(!empty($date_str))
  544. return array("date_str"=>$date_str, "pos"=>$k);
  545. }
  546. }
  547. else if(checkStr($str) == 'shu')
  548. {
  549. if($key_arr[$k + 1]['str'] == '-' && checkStr($key_arr[$k + 2]['str']) == 'shu'
  550. && $key_arr[$k + 3]['str'] == '-' && checkStr($key_arr[$k + 4]['str']) == 'shu')
  551. {
  552. if(intval($str) > 1000 )
  553. {
  554. $date_str = $str.'-'.$key_arr[$k + 2]['str'].'-'.$key_arr[$k + 4]['str'];
  555. }
  556. else
  557. {
  558. $date_str = date('Y').'-'.$key_arr[$k + 2]['str'].'-'.$key_arr[$k + 4]['str'];
  559. }
  560. return array("date_str"=>$date_str, "pos"=>$k + 4);
  561. }
  562. else if($key_arr[$k + 1]['str'] == '-' && checkStr($key_arr[$k + 2]['str']) == 'shu'
  563. && $key_arr[$k + 3]['str'] != '-' && checkStr($key_arr[$k - 1]['str']) != 'shu')
  564. {
  565. $date_str = date('Y').'-'.$str.'-'.$key_arr[$k + 2]['str'];
  566. return array("date_str"=>$date_str, "pos"=>$k + 2);
  567. }
  568. //echo $str.'--'.$key_arr[$k + 1]['str'].'--'.$key_arr[$k + 2]['str'].'--'.$key_arr[$k + 3]['str'];
  569. //echo "----333-----<br>";
  570. }
  571. return false;
  572. }
  573. //???
  574. function is_money($str, $k, $key_arr)
  575. {
  576. if(checkStr($str) == 'shu')
  577. {
  578. if($key_arr[$k - 1]['str'] == '?')
  579. {
  580. return array("str"=>$str, "pos"=>$k);
  581. }
  582. else if($key_arr[$k + 1]['str'] == '?' || $key_arr[$k + 1]['str'] == '?')
  583. {
  584. return array("str"=>$str, "pos"=>$k+1);
  585. }
  586. else if($key_arr[$k - 1]['str'] == '?' && $key_arr[$k - 2]['str'] == '??')
  587. {
  588. return array("str"=>$str, "pos"=>$k);
  589. }
  590. else if($key_arr[$k - 1]['str'] == '(' && $key_arr[$k - 2]['str'] == '?')
  591. {
  592. return array("str"=>$str, "pos"=>$k);
  593. }
  594. else if($key_arr[$k + 1]['str'] == '/' && $key_arr[$k + 2]['str'] == '?')
  595. {
  596. return array("str"=>$str, "pos"=>$k+2);
  597. }
  598. else if($key_arr[$k + 1]['str'] == '??')
  599. {
  600. return array("str"=>$str, "pos"=>$k+1);
  601. }
  602. else if($key_arr[$k - 1]['str'] == ':' && $key_arr[$k - 2]['str'] == '?')
  603. {
  604. return array("str"=>$str, "pos"=>$k);
  605. }
  606. else if($key_arr[$k + 1]['str'] == '?' && $key_arr[$k + 2]['str'] == '??')
  607. {
  608. return array("str"=>$str, "pos"=>$k+2);
  609. }
  610. }
  611. return false;
  612. }
  613. //???????
  614. function analysis_huoche($key_arr)
  615. {
  616. include "words.php";
  617. $res_arr = array();
  618. $index = 0;
  619. foreach($key_arr as $k=>$a)
  620. {
  621. if($index > 0)
  622. {
  623. $index --;
  624. continue;
  625. }
  626. $str = $a['str'];
  627. $str_type = checkStr($str);
  628. //????
  629. $is_date = is_date($str, $k, $key_arr);
  630. if($is_date)
  631. {
  632. $index = $is_date['pos'] - $k;
  633. $res_arr['date'] = $is_date['date_str'];
  634. continue;
  635. //echo "-------".$is_date['date_str']."-------";
  636. }
  637. //?????????
  638. if(in_array($str,$zr_chengshi))
  639. {
  640. //echo $key_arr[$k +1]['str']."------".$key_arr[$k +2]['str'];
  641. if(in_array($key_arr[$k +1]['str'],$zr_connect_word) && in_array($key_arr[$k +2]['str'],$zr_chengshi) )
  642. {
  643. $res_arr['city']['start_city'] = $str;
  644. $res_arr['city']['end_city'] = $key_arr[$k +2]['str'];
  645. $index = 2;
  646. continue;
  647. }
  648. if(in_array($key_arr[$k +1]['str'],$zr_chengshi))
  649. {
  650. $res_arr['city']['start_city'] = $str;
  651. $res_arr['city']['end_city'] = $key_arr[$k +1]['str'];
  652. $index = 1;
  653. continue;
  654. }
  655. if(isset($res_arr['city']) && isset($res_arr['city']['start_city']))
  656. {
  657. $res_arr['city']['end_city'] = $str;
  658. }
  659. else
  660. {
  661. $res_arr['city']['start_city'] = $str;
  662. }
  663. continue;
  664. }
  665. //????
  666. if(($str == 'd') || ($str == 'D') || ($str == 'k') || ($str == 'K') || ($str == 'z')
  667. || ($str == 'Z') || ($str == 't')|| ($str == 'T') || ($str == 'l')|| ($str == 'L') )
  668. {
  669. if(checkStr($key_arr[$k +1]['str']) == 'shu' && $key_arr[$k +1]['str'] <10000)
  670. {
  671. $res_arr['train_num'] = strtoupper($str).$key_arr[$k +1]['str'];
  672. $index = 1;
  673. continue;
  674. }
  675. }
  676. //??????
  677. if(in_array($str,$zr_huoche_zuoxi))
  678. {
  679. $res_arr['seat_type'] = $str;
  680. continue;
  681. }
  682. //??????
  683. if($str_type == 'shu' || in_array($str,$zr_shuzi_hanzi) || $str == '?')
  684. {
  685. if($key_arr[$k +1]['str'] == '?' || $key_arr[$k +1]['str'] == '?')
  686. {
  687. $res_arr['count'] = $str;
  688. if(in_array($str,$zr_shuzi_hanzi)|| $str == '?')
  689. {
  690. //echo $str."---".$zr_shu_han_key[$str]."---";
  691. $res_arr['count'] = isset($zr_shu_han_key[$str]) && $zr_shu_han_key[$str]>0 ? $zr_shu_han_key[$str]:0;
  692. }
  693. $index = 1;
  694. continue;
  695. }
  696. }
  697. }
  698. //?????????????????????????????
  699. if(count($res_arr) >= 2)
  700. {
  701. $res_arr['is_huoche'] = 1;
  702. }
  703. else
  704. {
  705. $res_arr['is_huoche'] = 0;
  706. }
  707. return $res_arr;
  708. }
  709. //??????
  710. function analysis_menpiao($key_arr)
  711. {
  712. include "words.php";
  713. $res_arr = array();
  714. $index = 0;
  715. foreach($key_arr as $k=>$a)
  716. {
  717. if($index > 0)
  718. {
  719. $index --;
  720. continue;
  721. }
  722. $str = $a['str'];
  723. $str_type = checkStr($str);
  724. //????
  725. $is_money = is_money($str, $k, $key_arr);
  726. if($is_money)
  727. {
  728. $index = $is_money['pos'] - $k;
  729. $res_arr['price'] = $is_money['str'];
  730. continue;
  731. //echo "-------".$is_money['str']."-------";
  732. }
  733. //????
  734. if($str_type == 'shu' || in_array($str,$zr_shuzi_hanzi) || $str == '?')
  735. {
  736. if($key_arr[$k +1]['str'] == '?' || $key_arr[$k +1]['str'] == '?')
  737. {
  738. $res_arr['count'] = $str;
  739. if(in_array($str,$zr_shuzi_hanzi)|| $str == '?')
  740. {
  741. //echo $str."---".$zr_shu_han_key[$str]."---";
  742. $res_arr['count'] = isset($zr_shu_han_key[$str]) && $zr_shu_han_key[$str]>0 ? $zr_shu_han_key[$str]:0;
  743. }
  744. $index = 1;
  745. continue;
  746. }
  747. }
  748. //??????
  749. if($str_type == 'han')
  750. {
  751. if($str == '???')
  752. {
  753. $res_arr['detail_type'] = '???';
  754. continue;
  755. }
  756. else if($str == '???')
  757. {
  758. $res_arr['detail_type'] = '???';
  759. continue;
  760. }
  761. else if($str == '??')
  762. {
  763. $res_arr['detail_type'] = '??';
  764. continue;
  765. }
  766. else if($str == '??')
  767. {
  768. $res_arr['detail_type'] = '????';
  769. continue;
  770. }
  771. else if($str == '??' || $str == '??')
  772. {
  773. $res_arr['detail_type'] = '????';
  774. continue;
  775. }
  776. }
  777. //??????
  778. $is_date = is_date($str, $k, $key_arr);
  779. if($is_date)
  780. {
  781. $index = $is_date['pos'] - $k;
  782. $res_arr['date'] = $is_date['date_str'];
  783. continue;
  784. }
  785. }
  786. //????????????????????????????
  787. if(count($res_arr) >= 1)
  788. {
  789. $res_arr['is_menpiao'] = 1;
  790. }
  791. else
  792. {
  793. $res_arr['is_menpiao'] = 0;
  794. }
  795. return $res_arr;
  796. }
  797. // ????????? (param: ?????????)
  798. function words_cb($ar)
  799. {
  800. foreach ($ar as $tmp)
  801. {
  802. $key_arr[] = $tmp ;
  803. $tmp = iconv("GBK", "UTF-8", $tmp);
  804. echo $tmp . ' ';
  805. }
  806. //var_dump($key_arr);
  807. flush();
  808. }
  809. //????
  810. function checkStr($str){
  811. $output='';
  812. $a=ereg('['.chr(0xa1).'-'.chr(0xff).']', $str);
  813. $b=ereg('[0-9]', $str);
  814. $c=ereg('[a-zA-Z]', $str);
  815. if($a && $b && $c){ $output='hsy';}
  816. elseif($a && $b && !$c){ $output='hs';}
  817. elseif($a && !$b && $c){ $output='hy';}
  818. elseif(!$a && $b && $c){ $output='sy';}
  819. elseif($a && !$b && !$c){ $output='han';}
  820. elseif(!$a && $b && !$c){ $output='shu';}
  821. elseif(!$a && !$b && $c){ $output='ying';}
  822. return $output;
  823. }
  824. function delhtml($str){ //??HTML??
  825. $st=-1; //??
  826. $et=-1; //??
  827. $stmp=array();
  828. $stmp[]=" ";
  829. $len=strlen($str);//http://zhidao.oumeiya.net
  830. for($i=0;$i<$len;$i++){
  831. $ss=substr($str,$i,1);
  832. if(ord($ss)==60){ //ord("<")==60
  833. $st=$i;
  834. }
  835. if(ord($ss)==62){ //ord(">")==62
  836. $et=$i;
  837. if($st!=-1){
  838. $stmp[]=substr($str,$st,$et-$st+1);
  839. }
  840. }
  841. }
  842. $str=str_replace($stmp,"",$str);
  843. $str = preg_replace('/\s{2,}| /','',$str);
  844. return $str;
  845. }
  846. ?>