/GetNews/modules/getnews/admin/Library.php
PHP | 201 lines | 196 code | 5 blank | 0 comment | 13 complexity | 587e4169e6b274648ca6edbf86e82564 MD5 | raw file
- <?php
- class xmlLibrary
- {
- function createNode($xml,$nodecha,$element) //tao node trong file xml
- {
- if($element->nodeType!=4&&$element->nodeType!=8)
- {
- $nodename=$element->nodeName;
- $node=$xml->createElement($nodename,"");
- $node->setAttribute("cha",$nodecha->nodeName);
- $node->setAttribute("vtcha",$nodecha->getAttribute("vt"));
- $count=1;
- foreach($nodecha->childNodes as $item)
- {if($item->nodeName==$nodename)
- $count++;
- }
- $node->setAttribute("vt",$count);
- $nodecha->appendChild($node);
- if($element->hasChildNodes())
- {
- foreach($element->childNodes as $item)
- {
- if($item->nodeName!="#text")
- {
- $xmllib=new xmlLibrary();
- $xmllib->createNode($xml,$node,$item);
- $count++;
- }
- }
-
- }
- $i++;
- }
- }
- function CreateXML($url,$fileName) //tao file XML
- {
- $file= file_get_contents($url);
- $file = mb_convert_encoding($file, 'HTML-ENTITIES', "UTF-8");
- $file=preg_replace('/\s+/'," ",$file);
- $doc=new DOMDocument();
- $doc->encoding="utf-8";
- @$doc->loadHTML($file);
- $doc->saveHTML();
- $xmldoc=new DOMDocument();
- $xmldoc->encoding="utf-8";
- $xmldoc->formatOutput=true;
- $RootNode=$doc->documentElement;
- $root=$xmldoc->createElement($RootNode->nodeName,"");
- $root->setAttribute("cha",$RootNode->nodeName);
- $root->setAttribute("vtcha","1");
- $root->setAttribute("vt","1");
- $xmldoc->appendChild($root);
- $node=$RootNode;
- foreach($node->childNodes as $element)
- {
- if($element->nodeName!="#text")
- {
- $xml=new xmlLibrary();
- $xml->createNode($xmldoc,$root,$element);
- }
- }
- $xmldoc->save($fileName);
- }
- function CreateXHTML($url,$filename) //tao file xhtml
- {
- $file=file_get_contents($url);
- $file = mb_convert_encoding($file, 'HTML-ENTITIES', "UTF-8");
- $file=preg_replace('/\s+/'," ",$file);
- $regex='/<html\s*.*?>/';
- $file=preg_replace($regex,'<html>',$file);
- $regex='/<!DOCTYPE.*?>/';
- $file=preg_replace($regex,'',$file);
- $dom=new DOMDocument();
- $dom->formatOutput=true;
- $dom->encoding='utf-8';
- @$dom->loadHTML($file);
- $dom->save($filename);
- }
- function GetNodefrom($nodeName,$cha,$vtcha,$vt,$filename) //lay 1 node t? cac thuoc tinh, cha, vtcha, vt
- {
- $dom=new DOMDocument();
- $dom->load($filename);
- $elements=$dom->getElementsByTagName('*');
- for($i=0;$i<$elements->length;$i++)
- {
- $attributes=new DOMNamedNodeMap();
- $node=$elements->item($i);
- $attributes=$node->attributes;
- $chaName=$attributes->item(0)->nodeValue;
- $vitricha=$attributes->item(1)->nodeValue;
- $vitriNode=$attributes->item(2)->nodeValue;
- if($node->nodeName==$nodeName&&$chaName==$cha&&$vitricha==$vtcha&&$vitriNode==$vt)
- return $node;
- }
- return $dom->documentElement;
- }
- function CreateXpathFrom($node,$xpath) //lay xpath tu 1 node
- {
- if($node->parentNode->nodeName!='html')
- {
- $nodeCha=$node->parentNode;
- $vtCha=$node->attributes->item(1)->nodeValue;
- $xpath=$this->CreateXpathFrom($nodeCha,$nodeCha->nodeName.'['.$vtCha.']/'.$xpath);
- return $xpath;
- }
- return $xpath;
- }
- function CreateXpath($nodeName,$cha,$vtcha,$vt,$filename) //lay xpath tu cac thuoc tinh cua node
- {
- $node=$this->GetNodefrom($nodeName,$cha,$vtcha,$vt,$filename);
- return $this->CreateXpathFrom($node,$nodeName.'['.$vt.']');
- }
- function GetValueFromXpath($xpath,$filename) //lay gia tri cua xpath
- {
- $simpleXMLElement=new SimpleXMLElement($filename,null,true);
- $values=$simpleXMLElement->xpath($xpath);
- $value="";
- foreach($values as $item)
- {
- $value.=$item;
- }
- return $value;
- }
- function UpdateNodeValue($dom,$fileName,$tagName,$value,$parentNode)
- {
- $oldnode=$dom->getElementsByTagName($tagName);
- if($oldnode->length>0)
- $parentNode->removeChild($oldnode->item(0));
- $node=$dom->createElement($tagName,$value);
- $parentNode->appendChild($node);
- $dom->save($fileName);
- }
- }
-
- class GetNews //Lop lay tin
- {
- function GetImage($content) //Lay link anh
- {
- $regex='/<\s*img.*?src="(?<linkimage>.*?)".*?>/';
- preg_match_all($regex,$content,$result);
- return $result['linkimage'];
- }
- function GetFileName($url) //Lay ten file
- {
- $filename=$url;
- $pos=strrpos($filename,"/");
- if($pos>0)
- $filename=substr($filename,$pos+1);
- return $filename;
- }
- function DownloadFile($inPath,$outPath) //DownloadFile
- {
- try
- {
- $filename=$this->GetFileName($inPath);
- $in= fopen($inPath, "rb");
- $out= fopen($outPath.$filename, "wb");
- while ($chunk = fread($in,8192))
- {
- fwrite($out, $chunk, 8192);
- }
- fclose($in);
- fclose($out);
-
- }catch(Exception $e)
- {
- }
-
- }
- function str_start_with($str,$needle) //Kiem tra 1 chuoi co bat dau bang chuoi $str khong
- {
- $regex="/\b".$str."/i";
- if(preg_match($regex,$needle)==1) return true;
- return false;
- }
- function ReplaceLinkImage($domain,$content,$path) //Thay doi lai toan bo link anh trong noi dung va download anh tu trang nguon
- {
- $image=$this->GetImage($content);
- for($i=0;$i<count($image);$i++)
- {
- $linkimage=$image[$i];
- $filename=$this->GetFileName($linkimage);
-
- if(!$this->str_start_with('http:\/\/',$image[$i]))
- $linkimage=$domain.$image[$i];
- $this->DownloadFile($linkimage,$path);
- $content=str_replace($image[$i],$path.$filename,$content);
- }
- return $content;
- }
- function GetNewsFrom($xpathTitle,$xpathHead,$xpathContent,$xpathImage) //lay tin tu cac xpath co san
- {
- $arr=array();
- $arr['title']=$xpathTitle;
- $arr['head']=$xpathHead;
- $arr['content']=$xpathContent;
- $arr['image']=$xpathImage;
- return $arr;
- }
- }
- ?>