采集

<?php
include("Snoopy.class.php");
include("a.php");
$url = array('aaa.com/a.html','bbb.com');
$id = $_REQUEST['id'];
$mid = $id - 1;
$page = $_REQUEST['page'];
$snoopy = new Snoopy;
$url1 = $url[$id];
$content = file($url1);
//$content = $snoopy->results;
$url2 = $url[$mid];
$content1 = file($url2);
$sameurl = array_intersect($content,$content1);
$caiurl = array_diff($content,$sameurl);
$caiurl = array_unique($caiurl);
$paichu = array('相关','转让信息','要发表评论','举报该信息','iframe','Counter58','GetCookieValue','浏览','北京二手汽车转让','北京58同城','h1','网邻通');
foreach($caiurl as $key=>$val){
$val = iconv("utf-8","gbk//IGNORE",$val);
foreach($paichu as $value){
if(strpos('&'.$val,$value)){
$painum = $key;
}
}
if($key != $painum){
$cai .= $val;
}
}
print_r($cai);
exit();
$id = $id+1;
if(is_array($content)){
echo 'aaa';
$page++;
$p = $_SERVER['SCRIPT_NAME'];
echo "<META HTTP-EQUIV=REFRESH CONTENT='2;URL=$p?id=$id&page=$page'>";
}else{
echo 'bbb';
$page=1;
$id = $id+1;
echo "<META HTTP-EQUIV=REFRESH CONTENT='2;URL=$p?id=$id&page=$page'>";
}
function escape($str) { 
preg_match_all("/[\x80-\xff].|[\x01-\x7f]+/",$str,$r); 
$ar = $r[0]; 
foreach($ar as $k=>$v) { 
if(ord($v[0]) < 128) 
$ar[$k] = rawurlencode($v); 
else 
$ar[$k] = "%u".bin2hex(iconv("GB2312","UCS-2",$v)); 
return join("",$ar); 
function unescape($str) { 
$str = rawurldecode($str); 
preg_match_all("/(?:%u.{4})|.+/",$str,$r); 
$ar = $r[0]; 
foreach($ar as $k=>$v) { 
if(substr($v,0,2) == "%u" && strlen($v) == 6) 
$ar[$k] = iconv("UCS-2","GB2312",pack("H4",substr($v,-4))); 
return join("",$ar); 
}