/snoopy/index.php
PHP | 880 lines | 839 code | 10 blank | 31 comment | 4 complexity | c75c28dc67d7b93271fb9fd63e4b1f7a MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0
- <?php
- /*
- * Created on 2010-12-22
- *
- * To change the template for this generated file go to
- * Window - Preferences - PHPeclipse - PHP - Code Templates
- */
- include "Snoopy.class.php";
- //????
- $snoopy = new Snoopy;
- $snoopy->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.2; Alexa Toolbar)"; //?????
- $action = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.3.9)';
- $formvars['client'] = 'ssologin.js(v1.3.9)';
- $formvars['encoding'] = 'utf-8';
- $formvars['entry'] = 'miniblog';
- $formvars['from'] = '';
- $formvars['gateway'] = 1;
- $formvars['returntype'] = 'META';
- $formvars['savestate'] = 0;
- $formvars['service'] = 'miniblog';
- $formvars['url'] = 'http://t.sina.com.cn/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack';
- //$formvars['username'] = 'aoweibo_3@sina.com';
- $formvars['password'] = '12345678sina';
- //$formvars['username'] = 'weibozixun1@sina.cn';
- $i_time = date('i');
- $i_time = intval($i_time);
- $i_time = intval($i_time/5);
- $formvars['username'] = 'weibozixun'.($i_time+1).'@sina.cn';
- $formvars['useticket'] = 0;
- //?????sae??
- $is_trans_sae = true;
- $snoopy->submit($action,$formvars);
- //???????????
- $fp = fopen("maxdate.txt",'a+'); //??????
- $last_time = fread($fp , 20);
- //$last_time = "2011-01-12 16:30:00";
- fclose($fp); //????
- //??????????
- $max_date = $last_time;
- for($page = 1; $page<=50; $page++)
- {
- echo "<br>page:".$page."......<br>";
- //if($page == 5)die();
- if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9&page=".$page))//????
- //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E6%2589%258B%25E6%259C%25BA&page=".$page))//????
- //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E9%2597%25A8%25E7%25A5%25A8&page=".$page))//????
- //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E5%258D%25A1&page=".$page))//???
- //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E7%2594%25B5%25E8%2584%2591&page=".$page))//??
-
- //if($snoopy->fetch("http://t.sina.com.cn/k/%25E8%25BD%25AC%25E8%25AE%25A9%2520%25E7%2581%25AB%25E8%25BD%25A6%25E7%25A5%25A8&page=".$page))//???
- {
- $temp = $snoopy->results;
- }
- else
- {
- echo "no fetch content<br>";
- }
-
- //echo $temp;echo "<br>";
- $content_arr = array();
- if (!preg_match_all("/<div class=\"MIB_feed_c\">(.*)<div id=\"_comment_list/Us",$temp,$tt) )
- {
- echo "no content1<br>";
- }
- else
- {
- preg_match_all("/<div class=\"head_pic\">(.*)<\/div>/Us",$temp,$tt_head );
- foreach($tt[0] as $k=>$t)
- {
- //var_dump($t);die();
- preg_match("/<strong date=\"(.*)\"/Us",$t,$date_temp);
- $date = str_replace("<strong date=\"","",$date_temp);
- $date = str_replace("\"","",$date);
- //echo "date=".$date[0];
- //????
- echo "<br>lasttime:".$last_time."---datetime:".$date[0]."<br>";
- if($date[0] <= $last_time )
- {
- echo "---".$max_date;
- $fp = fopen("maxdate.txt",'r+'); //??????
- fwrite($fp, $max_date, strlen($max_date)); //???????
- fclose($fp); //????
-
- //?sae???????
- if(!empty($content_arr) && $is_trans_sae)
- {
- echo "<br>submit data";
- $snoopy_tr = new Snoopy;
- $snoopy_tr->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.2; Alexa Toolbar)"; //?????
- $action = 'http://zhuanrang.sinaapp.com/?action=Zhuaqu&op=addcontent';
- $formvars = array();
- $formvars['content'] = serialize($content_arr);
- $ret = $snoopy_tr->submit($action,$formvars);
- echo "---submit end";
- }
- exit();
- }
- if($max_date < $date[0])
- $max_date = $date[0];
- $content_arr[$k]['updae_time'] = $date[0];
- //echo "<br>";
-
- preg_match("/<div class=\"lf MIB_txtbl\">(.*)\">/Us",$t,$mid_temp);
- $mid = str_replace("<div class=\"lf MIB_txtbl\">","",$mid_temp[0]);
- $mid = str_replace("<cite><a href=\"","",$mid);
- $mid = str_replace("\">","",$mid);
- $mid = str_replace("http://t.sina.com.cn/","",$mid);
- $mid = trim($mid);
- preg_match("/\/.*/",$mid,$mid);
- $mid = str_replace("/","",$mid[0]);
- //echo "mid=".$mid;
- $content_arr[$k]['mid'] = $mid;
- //echo "<br>";
-
- if (preg_match_all("/<p class=\"sms\"(.*)<\/p>/Us",$t,$content))
- {
- $content_temp = $content[0];
- $content_temp = str_replace("<span style='color: red;'>","",$content_temp);
- $content_temp = str_replace("<span style=\"color: red;\">","",$content_temp);
- $content_temp = str_replace("</span>","",$content_temp);
- preg_match("/title=\"(.*)\"/Us",$content_temp[0],$title);
- $username = str_replace("title=\"","",$title[0]);
- $username = str_replace("\"","",$username);
- //echo "username=".$username;
- $content_arr[$k]['user_name'] = $username;
- //echo "<br>";
-
- preg_match("/mid=\"(.*)\"/Us",$content_temp[0],$mid_temp);
- $sid = str_replace("mid=\"","",$mid_temp[0]);
- $sid = str_replace("\"","",$sid);
- //echo "sid=".$sid;
- $content_arr[$k]['sid'] = $sid;
- //echo "<br>";
-
- preg_match("/dynamic-src=\"(.*)\"/Us",$tt_head[0][$k],$user_img_temp);
- $user_img = str_replace("dynamic-src=\"","",$user_img_temp[0]);
- $user_img = str_replace("\"","",$user_img);
- //echo "user_img=".$user_img;
- $content_arr[$k]['user_img'] = $user_img;
- //echo "<br>";
-
- preg_match("/<a href=\"http:\/\/t.sina.com.cn\/(.*)\"/Us",$content_temp[0],$uid_temp);
- $uid = str_replace("<a href=\"http://t.sina.com.cn/","",$uid_temp[0]);
- $uid = str_replace("\"","",$uid);
- //echo "uid=".$uid;
- $content_arr[$k]['user_id'] = $uid;
- //echo "<br>";
-
- $pattern = "/<p class=\"sms\" mid=(.*)type=\"\d\">/U";
- $replacement = "";
- $m_content = preg_replace($pattern, $replacement, $content_temp[0]);
- $pattern = "/<\/p>/U";
- $m_content = preg_replace($pattern, $replacement, $m_content);
- //var_dump($m_content);
- $pattern = "/<a href=\"http:\/\/t.sina.com.cn\/(.*)<\/a>/U";
- $m_content = preg_replace($pattern, $replacement, $m_content, 1);
- $m_content = str_replace("?", "", $m_content);
- $m_content = str_replace("<img dynamic-src", "<img src", $m_content);
- //echo "m_content=".$m_content;
- $content_arr[$k]['weibo_content'] = $m_content;
-
- //?????????pass?
- if(strpos($m_content,"//@"))
- $unset = 1;
- else
- {
- echo "---api excute";
- //???????????,????????
- $apiURL = "http://api.t.sina.com.cn/queryid.json?source=733331953&isBase62=1&type=1&mid=".$content_arr[$k]['mid'];
- if( $snoopy->fetch($apiURL))
- {
- $weibo_id = $snoopy->results;
- $weibo_id = json_decode($weibo_id,true);
- $weibo_id = $weibo_id['id'];
- $apiURL = "http://api.t.sina.com.cn/statuses/show/:".$weibo_id.".json?source=733331953";
- if($snoopy->fetch($apiURL))
- {
- $weibo_obj = $snoopy->results;
- $weibo_obj = json_decode($weibo_obj,true);
-
- if(isset($weibo_obj['user']['province']))
- {
- $content_arr[$k]['province'] = $weibo_obj['user']['province'];
- }
- if(isset($weibo_obj['user']['city']))
- {
- $content_arr[$k]['city'] = $weibo_obj['user']['city'];
- //var_dump($weibo_obj);die();
- }
- //????????
- //$content_arr[$k]['weibo_content'] = $weibo_obj['text'];
- //??????? ????
- $content_arr[$k]['user_verified'] = $weibo_obj['user']['verified'];
- }
- }
- //??????????
- $m_content_gbk = iconv("UTF-8", "GBK", $weibo_obj['text']);
- //echo $m_content_gbk;
- $f_ret = filter($m_content_gbk);
- //??????
- if(empty($f_ret["type"]))
- {
- //echo "-------unset-------";
- $unset = 1;
- }
- else
- {
- $unset = 0;
-
- foreach($f_ret as $k_f=>$f)
- {
- $content_arr[$k][$k_f] = $f;
- }
- }
- }
- if($unset == 1)
- {
- unset($content_arr[$k]);
- }
- //echo "<br><br>";
- }
- //var_dump($content_arr);
- }
- }
-
- //var_dump($content_arr);
- //???????????????
- if($page == 1)
- {
- $fp = fopen("maxdate.txt",'r+'); //??????
- fwrite($fp, $max_date, strlen($max_date)); //???????
- fclose($fp); //????
- }
- //?sae???????
- if(!empty($content_arr) && $is_trans_sae)
- {
- $snoopy_tr = new Snoopy;
- $snoopy_tr->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.2; Alexa Toolbar)"; //?????
- $action = 'http://zhuanrang.sinaapp.com/?action=Zhuaqu&op=addcontent';
- $formvars = array();
- //var_dump($content_arr);
- $formvars['content'] = serialize($content_arr);
- $ret = $snoopy_tr->submit($action,$formvars);
- //echo "submit data";
- }
- }
- //??????
- function filter($content)
- {
- $stag = 'source';
- $slen = strlen($stag);
- //$dict = '../pscws23/dict/dict.sqlite'; // ???? sqlite
- $dict = '../pscws23/dict/dict.txt'; // ???? txt
- //$dict = '../pscws23/dict/dict.xdb'; // ???? sqlite
- $version = 3; // ????
- $autodis = false; // ??????
- $ignore = false; // ??????
- $debug = false; // ???????
- $stats = true; // ????????
- //$is_cli = (php_sapi_name() == 'cli'); // ??? cli ????
- $object = 'PSCWS' . $version;
- require_once ('../pscws23/'.strtolower($object) . '.class.php');
- include "words.php";
- $cws = new $object($dict);
- $cws->set_ignore_mark($ignore);
- $cws->set_autodis($autodis);
- $cws->set_debug($debug);
- $cws->set_statistics($stats);
- $statistics = $cws->segment($content, '');
- //??????
- //$statistics = &$cws->get_statistics();
- $key_arr = array();
- $res_arr = array();
- $type = "";
- //var_dump($content);
- //var_dump($statistics);
- //echo "<br><br>";
- foreach($statistics as $k=>$stat)
- {
- $str = iconv("GBK", "UTF-8", $stat);
- $key_arr[] = array('str'=>$str, 'times'=>1, 'poses'=>$k);
- }
- //var_dump($key_arr);
- $index = 0;
- foreach($key_arr as $k=>$a)
- {
- if($index > 0)
- {
- $index --;
- continue;
- }
-
-
- $str = $a['str'];
- $str_type = checkStr($str);
- if($str_type == 'han')
- {
- //??????
- if(empty($type))
- {
- //echo $str."--";
- //?????
- if(in_array($str,$zr_huoche))
- {
- $res_huoche_arr = analysis_huoche($key_arr);
- if($res_huoche_arr['is_huoche'] == 1)
- {
- $type = "huoche";
- if(isset($res_huoche_arr['date']))
- {
- $res_arr['start_time'] = $res_huoche_arr['date'];
- }
- if(isset($res_huoche_arr['city']['start_city']))
- {
- $res_arr['start_station'] = $res_huoche_arr['city']['start_city'];
- }
- if(isset($res_huoche_arr['city']['end_city']))
- {
- $res_arr['end_station'] = $res_huoche_arr['city']['end_city'];
- }
- if(isset($res_huoche_arr['train_num']))
- {
- $res_arr['train_num'] = $res_huoche_arr['train_num'];
- }
- if(isset($res_huoche_arr['seat_type']))
- {
- $res_arr['train_type'] = $res_huoche_arr['seat_type'];
- }
- if(isset($res_huoche_arr['count']))
- {
- $res_arr['count'] = $res_huoche_arr['count'];
- }
- }
- }
- //????
- else if(in_array($str,$zr_menpiao))
- {
-
- $res_menpiao_arr = analysis_menpiao($key_arr);
- if($res_menpiao_arr['is_menpiao'] == 1)
- {
- $type = "menpiao";
- if(isset($res_menpiao_arr['price']))
- {
- $res_arr['price'] = $res_menpiao_arr['price'];
- }
- if(isset($res_menpiao_arr['count']))
- {
- $res_arr['count'] = $res_menpiao_arr['count'];
- }
- if(isset($res_menpiao_arr['detail_type']))
- {
- $res_arr['detail_type'] = $res_menpiao_arr['detail_type'];
- }
- if(isset($res_menpiao_arr['date']))
- {
- $res_arr['start_time'] = $res_menpiao_arr['date'];
- }
- //var_dump($res_arr);//die();
- }
- }
- //????? ???
- else if(in_array($str,$zr_dazhe))
- {
- $type = "dazhe";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
-
- //??????
- else if(in_array($str,$zr_shouji))
- {
- $type = "shouji";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
- //??????
- else if(in_array($str,$zr_shuma))
- {
- $type = "shuma";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
- //????
- else if(in_array($str,$zr_diannao))
- {
- $type = "diannao";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
- //????
- else if(in_array($str,$zr_dianqi))
- {
- $type = "dianqi";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
- //????
- else if(in_array($str,$zr_qiche))
- {
- $type = "qiche";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
- //????
- else if(in_array($str,$zr_jiaju))
- {
- $type = "jiaju";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
- //????
- else if(in_array($str,$zr_wangpu))
- {
- $type = "wangpu";
- $pd_arr = get_type_and_price($str, $key_arr, $type);
- if(isset($pd_arr['detail_type']))
- {
- $res_arr['detail_type'] = $pd_arr['detail_type'];
- }
- if(isset($pd_arr['price']))
- {
- $res_arr['price'] = $pd_arr['price'];
- }
- }
- }
- }
- else if($str_type == 'ying')
- {}
- else if($str_type == 'shu')
- {}
- else if($str_type == 'hs')
- {}
- else if($str_type == 'hy')
- {}
- else if($str_type == 'sy')
- {}
- }
- $res_arr["type"] = $type;
- //var_dump($res_arr);
- //echo "<br><br>";
- //echo "--".$res_arr["type"];
- return $res_arr;
- }
- // ??????????
- function get_microtime()
- {
- list($usec, $sec) = explode(' ', microtime());
- return ((float)$usec + (float)$sec);
- }
- //?????????
- function get_type_and_price($str, $key_arr, $type)
- {
- include "type_config.php";
- $res_arr = array();
-
- foreach($key_arr as $k=>$a)
- {
- $str = $a['str'];
- $str_type = checkStr($str);
- //????
- $is_money = is_money($str, $k, $key_arr);
- if($is_money)
- {
- $index = $is_money['pos'] - $k;
- $res_arr['price'] = $is_money['str'];
- continue;
- //echo "-------".$is_money['str']."-------";
- }
-
- //????
- if($str_type == 'han')
- {
- if(in_array($str, $detail_type[$type]))
- {
- $res_arr['detail_type'] = $str;
- continue;
- }
- }
- }
- return $res_arr;
- }
- //??????
- function is_date($str, $k, $key_arr)
- {
- if($str == '?')
- {
- $date_str = "";
- //?????????
- if(checkStr($key_arr[$k - 1]['str']) == 'shu' )
- {
- $date_str = "";
- if(checkStr($key_arr[$k + 1]['str']) == 'shu')//??????
- {
- if($key_arr[$k + 2]['str'] == "?" || $key_arr[$k + 2]['str'] == "?" )
- {
- $date_str = date('Y').'-'.$key_arr[$k - 1]['str'].'-'.$key_arr[$k + 1]['str'];
- }
- }
- if(!empty($date_str))
- {
- return array("date_str"=>$date_str, "pos"=>$k+2);
- }
- }
- }
- else if($str == "?" || $str == "?")
- {
- $date_str = "";
- //?????????
- if(checkStr($key_arr[$k - 1]['str']) == 'shu' )
- {
- $date_str = date('Y-m').'-'.$key_arr[$k - 1]['str'];
- if(!empty($date_str))
- return array("date_str"=>$date_str, "pos"=>$k);
- }
- }
- else if(checkStr($str) == 'shu')
- {
-
- if($key_arr[$k + 1]['str'] == '-' && checkStr($key_arr[$k + 2]['str']) == 'shu'
- && $key_arr[$k + 3]['str'] == '-' && checkStr($key_arr[$k + 4]['str']) == 'shu')
- {
- if(intval($str) > 1000 )
- {
- $date_str = $str.'-'.$key_arr[$k + 2]['str'].'-'.$key_arr[$k + 4]['str'];
- }
- else
- {
- $date_str = date('Y').'-'.$key_arr[$k + 2]['str'].'-'.$key_arr[$k + 4]['str'];
- }
- return array("date_str"=>$date_str, "pos"=>$k + 4);
- }
- else if($key_arr[$k + 1]['str'] == '-' && checkStr($key_arr[$k + 2]['str']) == 'shu'
- && $key_arr[$k + 3]['str'] != '-' && checkStr($key_arr[$k - 1]['str']) != 'shu')
- {
- $date_str = date('Y').'-'.$str.'-'.$key_arr[$k + 2]['str'];
- return array("date_str"=>$date_str, "pos"=>$k + 2);
- }
- //echo $str.'--'.$key_arr[$k + 1]['str'].'--'.$key_arr[$k + 2]['str'].'--'.$key_arr[$k + 3]['str'];
- //echo "----333-----<br>";
- }
- return false;
- }
- //???
- function is_money($str, $k, $key_arr)
- {
- if(checkStr($str) == 'shu')
- {
- if($key_arr[$k - 1]['str'] == '?')
- {
- return array("str"=>$str, "pos"=>$k);
- }
- else if($key_arr[$k + 1]['str'] == '?' || $key_arr[$k + 1]['str'] == '?')
- {
- return array("str"=>$str, "pos"=>$k+1);
- }
- else if($key_arr[$k - 1]['str'] == '?' && $key_arr[$k - 2]['str'] == '??')
- {
- return array("str"=>$str, "pos"=>$k);
- }
- else if($key_arr[$k - 1]['str'] == '(' && $key_arr[$k - 2]['str'] == '?')
- {
- return array("str"=>$str, "pos"=>$k);
- }
- else if($key_arr[$k + 1]['str'] == '/' && $key_arr[$k + 2]['str'] == '?')
- {
- return array("str"=>$str, "pos"=>$k+2);
- }
- else if($key_arr[$k + 1]['str'] == '??')
- {
- return array("str"=>$str, "pos"=>$k+1);
- }
- else if($key_arr[$k - 1]['str'] == ':' && $key_arr[$k - 2]['str'] == '?')
- {
- return array("str"=>$str, "pos"=>$k);
- }
- else if($key_arr[$k + 1]['str'] == '?' && $key_arr[$k + 2]['str'] == '??')
- {
- return array("str"=>$str, "pos"=>$k+2);
- }
- }
- return false;
- }
- //???????
- function analysis_huoche($key_arr)
- {
- include "words.php";
- $res_arr = array();
- $index = 0;
- foreach($key_arr as $k=>$a)
- {
- if($index > 0)
- {
- $index --;
- continue;
- }
- $str = $a['str'];
- $str_type = checkStr($str);
- //????
- $is_date = is_date($str, $k, $key_arr);
- if($is_date)
- {
- $index = $is_date['pos'] - $k;
- $res_arr['date'] = $is_date['date_str'];
- continue;
- //echo "-------".$is_date['date_str']."-------";
- }
- //?????????
- if(in_array($str,$zr_chengshi))
- {
- //echo $key_arr[$k +1]['str']."------".$key_arr[$k +2]['str'];
- if(in_array($key_arr[$k +1]['str'],$zr_connect_word) && in_array($key_arr[$k +2]['str'],$zr_chengshi) )
- {
- $res_arr['city']['start_city'] = $str;
- $res_arr['city']['end_city'] = $key_arr[$k +2]['str'];
- $index = 2;
- continue;
- }
- if(in_array($key_arr[$k +1]['str'],$zr_chengshi))
- {
- $res_arr['city']['start_city'] = $str;
- $res_arr['city']['end_city'] = $key_arr[$k +1]['str'];
- $index = 1;
- continue;
- }
- if(isset($res_arr['city']) && isset($res_arr['city']['start_city']))
- {
- $res_arr['city']['end_city'] = $str;
- }
- else
- {
- $res_arr['city']['start_city'] = $str;
- }
- continue;
- }
- //????
- if(($str == 'd') || ($str == 'D') || ($str == 'k') || ($str == 'K') || ($str == 'z')
- || ($str == 'Z') || ($str == 't')|| ($str == 'T') || ($str == 'l')|| ($str == 'L') )
- {
- if(checkStr($key_arr[$k +1]['str']) == 'shu' && $key_arr[$k +1]['str'] <10000)
- {
- $res_arr['train_num'] = strtoupper($str).$key_arr[$k +1]['str'];
- $index = 1;
- continue;
- }
- }
- //??????
- if(in_array($str,$zr_huoche_zuoxi))
- {
- $res_arr['seat_type'] = $str;
- continue;
- }
- //??????
- if($str_type == 'shu' || in_array($str,$zr_shuzi_hanzi) || $str == '?')
- {
- if($key_arr[$k +1]['str'] == '?' || $key_arr[$k +1]['str'] == '?')
- {
- $res_arr['count'] = $str;
- if(in_array($str,$zr_shuzi_hanzi)|| $str == '?')
- {
- //echo $str."---".$zr_shu_han_key[$str]."---";
- $res_arr['count'] = isset($zr_shu_han_key[$str]) && $zr_shu_han_key[$str]>0 ? $zr_shu_han_key[$str]:0;
- }
- $index = 1;
- continue;
- }
-
- }
- }
- //?????????????????????????????
- if(count($res_arr) >= 2)
- {
- $res_arr['is_huoche'] = 1;
- }
- else
- {
- $res_arr['is_huoche'] = 0;
- }
- return $res_arr;
- }
- //??????
- function analysis_menpiao($key_arr)
- {
- include "words.php";
- $res_arr = array();
- $index = 0;
-
- foreach($key_arr as $k=>$a)
- {
- if($index > 0)
- {
- $index --;
- continue;
- }
- $str = $a['str'];
- $str_type = checkStr($str);
- //????
- $is_money = is_money($str, $k, $key_arr);
- if($is_money)
- {
- $index = $is_money['pos'] - $k;
- $res_arr['price'] = $is_money['str'];
- continue;
- //echo "-------".$is_money['str']."-------";
- }
-
- //????
- if($str_type == 'shu' || in_array($str,$zr_shuzi_hanzi) || $str == '?')
- {
- if($key_arr[$k +1]['str'] == '?' || $key_arr[$k +1]['str'] == '?')
- {
- $res_arr['count'] = $str;
- if(in_array($str,$zr_shuzi_hanzi)|| $str == '?')
- {
- //echo $str."---".$zr_shu_han_key[$str]."---";
- $res_arr['count'] = isset($zr_shu_han_key[$str]) && $zr_shu_han_key[$str]>0 ? $zr_shu_han_key[$str]:0;
- }
- $index = 1;
- continue;
- }
- }
- //??????
- if($str_type == 'han')
- {
- if($str == '???')
- {
- $res_arr['detail_type'] = '???';
- continue;
- }
- else if($str == '???')
- {
- $res_arr['detail_type'] = '???';
- continue;
- }
- else if($str == '??')
- {
- $res_arr['detail_type'] = '??';
- continue;
- }
- else if($str == '??')
- {
- $res_arr['detail_type'] = '????';
- continue;
- }
- else if($str == '??' || $str == '??')
- {
- $res_arr['detail_type'] = '????';
- continue;
- }
- }
- //??????
- $is_date = is_date($str, $k, $key_arr);
- if($is_date)
- {
- $index = $is_date['pos'] - $k;
- $res_arr['date'] = $is_date['date_str'];
- continue;
- }
- }
- //????????????????????????????
- if(count($res_arr) >= 1)
- {
- $res_arr['is_menpiao'] = 1;
- }
- else
- {
- $res_arr['is_menpiao'] = 0;
- }
- return $res_arr;
- }
- // ????????? (param: ?????????)
- function words_cb($ar)
- {
- foreach ($ar as $tmp)
- {
- $key_arr[] = $tmp ;
- $tmp = iconv("GBK", "UTF-8", $tmp);
- echo $tmp . ' ';
- }
- //var_dump($key_arr);
-
- flush();
- }
- //????
- function checkStr($str){
- $output='';
- $a=ereg('['.chr(0xa1).'-'.chr(0xff).']', $str);
- $b=ereg('[0-9]', $str);
- $c=ereg('[a-zA-Z]', $str);
- if($a && $b && $c){ $output='hsy';}
- elseif($a && $b && !$c){ $output='hs';}
- elseif($a && !$b && $c){ $output='hy';}
- elseif(!$a && $b && $c){ $output='sy';}
- elseif($a && !$b && !$c){ $output='han';}
- elseif(!$a && $b && !$c){ $output='shu';}
- elseif(!$a && !$b && $c){ $output='ying';}
- return $output;
- }
- function delhtml($str){ //??HTML??
- $st=-1; //??
- $et=-1; //??
- $stmp=array();
- $stmp[]=" ";
- $len=strlen($str);//http://zhidao.oumeiya.net
- for($i=0;$i<$len;$i++){
- $ss=substr($str,$i,1);
- if(ord($ss)==60){ //ord("<")==60
- $st=$i;
- }
- if(ord($ss)==62){ //ord(">")==62
- $et=$i;
- if($st!=-1){
- $stmp[]=substr($str,$st,$et-$st+1);
- }
- }
- }
- $str=str_replace($stmp,"",$str);
- $str = preg_replace('/\s{2,}| /','',$str);
-
- return $str;
- }
- ?>