<version>0.5.2</version>
        <exclusions>
            <exclusion>
                <groupId>org.slf4j</groupId>
                <artifactId>slf4j-log4j12</artifactId>
            </exclusion>
        </exclusions>
	</dependency>

## process

/**

  • Created by ray on 2017/7/16.

  • 爬虫管道 */ @Service public class NicoNicoProcessor implements PageProcessor {

    private Logger logger = LoggerFactory.getLogger(this.getClass());

    //https://api.bilibili.com/archive_rank/getarchiverankbypartion?type=jsonp&tid=20&pn=1 private String bashUrl = “http://www.nicovideo.jp/tag/%E8%B8%8A%E3%81%A3%E3%81%A6%E3%81%BF%E3%81%9F?page=" ;// + i //http://www.nicovideo.jp/watch/sm23385186 private String detailUrl = “http://www.nicovideo.jp/watch/";

    @Value("${spider.niconico.maxSize}”) int maxSize;

    @Autowired NicoNicoPipeLine pipeLine;

    @Override public Site getSite() {

     //HttpHost httpHost = new HttpHost("127.0.0.1",1087);
     Site site = Site.me()
             //.setHttpProxy(httpHost)
             .setUserAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36")
             .setSleepTime(10 * 1000)
             .setTimeOut(20 * 1000)
             .setRetryTimes(3)
             .setCycleRetryTimes(3);
     return site;
    

    }

    @Override public void process(Page page){

     String pageUrl = page.getUrl().toString();
    
     //新增请求列表
     List<String> requestUrls =new ArrayList<>();
     List<SpiderNico> resList = new ArrayList<>();
     logger.info(pageUrl);
    
     try {
         if (pageUrl.contains(bashUrl)){
    
             //解析列表
             List <String> htmlList = page.getHtml().xpath("//div/ul[@class='list']/li[@class='item']").all();
             for (String tmp: htmlList) {
                 if (tmp.length() < 5) continue;
                 if (tmp.contains("data-id")){
    
                     try {
                         Html html = new Html(tmp);
    
                         //id
                         String dataId = html.xpath("//li[@class='item']/@data-id").toString();
                         //标题
                         String title = html.xpath("//p[@class='itemTitle']/a/text()").toString();
                         //封面
                         String icon = html.xpath("//img[@class='jsLazyImage thumb']/@data-original").toString();
    
                         String view = "0";
                         String comment = "0";
                         String wrapTitle = null;
                         String createTime = "0000-00-00";
                         //光看人数
                         view = html.xpath("//ul[@class='list']/li[@class='count view']/span/text()").toString().replace(",","");
                         //回复人数
                         comment = html.xpath("//ul[@class='list']/li[@class='count comment']/span/text()").toString().replace(",","");
                         //其他
                         wrapTitle = html.xpath("//div[@class='wrap']/p/@title").toString().replace(",","");
                         //创建时间
                         createTime = html.xpath("//p[@class='itemTime']/span/span/text()").toString().replace("/","-");
                         createTime = "20" + createTime;
    
    
                         //增加nico对象
                         SpiderNico nico = new  SpiderNico ();
                         nico.setAid(dataId);
                         nico.setTitle(title);
                         nico.setDescription(wrapTitle);
                         nico.setCreate(createTime);
                         nico.setComment(Integer.valueOf(comment));
                         nico.setPic(icon);
                         nico.setView(Integer.valueOf(view));
                         resList.add(nico);
    
                         //增加请求地址
                         String url =  detailUrl + dataId;
                         requestUrls.add(url);
    
                     }
                     catch (Exception e){
                         logger.error("nico xpath:" + pageUrl );
                     }
                 }
             }
    
             //批量增加请求
             if (resList.size() > 0 ){
                 page.putField("type", 0);
                 page.putField("data", resList);
             }
         }
         else if (pageUrl.contains(detailUrl)){
             logger.info(pageUrl);
         }
    
     }
     catch (Exception e){
         logger.error("url:" + pageUrl  );
     }
    

    }

    public void run(){

     Spider spider = Spider.create(new NicoNicoProcessor())
             //.setDownloader(new HttpClientDownloader())
             //.setDownloader(new HttpDownloader())
             .setDownloader(new SslDownloader())
             //.addPipeline(new ConsolePipeline())//打印到控制台
             .addPipeline(pipeLine);
     for (int i = 1; i < maxSize; i++) {
         String tmp = bashUrl + i;
         spider.addUrl(tmp);
     }
     spider.run();
    

    } }


## pipeline 

/**

  • Created by ray on 2017/7/16.

  • 爬虫进程 */ @Service public class NicoNicoPipeLine implements Pipeline {

    @Autowired SpiderNicoService service;

    @Override public void process(ResultItems resultItems, Task task){

     if (resultItems.getAll().isEmpty() == false) {
    
         int type = resultItems.get("type");
         if (type == 0){
             //列表内容
             List<SpiderNico> list = resultItems.get("data");
             for (SpiderNico obj: list) {
                 service.updateBySpider(obj);
             }
         }
         else if(type == 1){
    
         }
    
     }
    

    } }