[list]class=\"page-down\">下一页";
String pageUrlRegex = "
";
WebContent wc = new WebContent();
try {
String html = wc.getHtml(url, encoding);
String listarea = wc.getArea(html, arearegex, WebContent.AREA_LIST);
List urllist = wc.getListUrl(listarea, urlregex, prefixUrl);
System.out.println(urllist.size());
long s = System.currentTimeMillis();
// String url2 = "http://info.taobao.com/detail/lady/a2/ef/a2ef0063-a1fc-46fe-bd43-23c7b8dc8158_1.php";
for(String url2 : urllist){
System.out.println(url2);
String subHtml = wc.getHtml( url2, encoding);
String title = wc.getArea(subHtml, titleRegex, WebContent.AREA_TITLE);
String message = wc.getArea(subHtml, messageRegex, WebContent.AREA_MESSAGE);
if(needReSeacheUrl)
message = wc.messageUrlReplease(message);
if(iSpagination){
//先取出分页的url
String pageAreaHtml = wc.getArea(subHtml, pageAreaRegex, WebContent.AREA_LIST);
List pageUrlList = wc.getListUrl(pageAreaHtml, pageUrlRegex, prefixUrl,listHasOmit,pageNumRegex);
StringBuffer allMessage = new StringBuffer(message);
allMessage.append("\r\n");
Iterator pageurls = pageUrlList.iterator();
int pageSize = pageUrlList.size(), i = 0;
while(pageurls.hasNext()){
i ++;
subHtml = wc.getHtml( pageurls.next(), encoding);
message = wc.getArea(subHtml, messageRegex, WebContent.AREA_MESSAGE);
if(needReSeacheUrl)
message = wc.messageUrlReplease(message);
allMessage.append(message);
if( pageSize > i )
allMessage.append("\r\n");
}
message = allMessage.toString();
}
Wordpress.post(title, message);
}
System.out.println((System.currentTimeMillis()-s));
// System.out.println(message);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}