如何用Java编写程序来爬取特定网站的长尾关键词数据?
- 内容介绍
- 文章标签
- 相关推荐
本文共计201个文字,预计阅读时间需要1分钟。
javapackage com.zzger.model;
import java.util.ArrayList;import java.util.Collections;import java.util.List;import java.util.concurrent.CountDownLatch;import com.zzger.module.queue.UrlQueue;import com.zzger.util.HttpUtils;
package com.zzger.model; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.CountDownLatch; import com.zzger.module.queue.UrlQueue; import com.zzger.util.HttpUtils; import com.zzger.util.RegexUtils; public class WebSite { /** * 站点url */ private String url; /** * 需要爬行的url队列 */ private UrlQueue<String> urls = new UrlQueue<>(); /** * 已爬行过的页面url */ private List<String> exitUrls = Collections.synchronizedList(new ArrayList<>()); private static final int TOTAL_THREADS = 12; private final CountDownLatch mStartSignal = new CountDownLatch(1); private final CountDownLatch mDoneSignal = new CountDownLatch(TOTAL_THREADS); public WebSite(String url){ this.url = url; urls.offer(url);//把网站首页加入需要爬行的队列中 } public void guangDu(){ new Thread(new Runnable() { @Override public void run() { paxing(HttpUtils.duanziwang.com"); web.guangDu(); for(int i = 0; i<10;i++){ new Thread(new Runnable() { @Override public void run() { web.dxcPx(); } }).start(); } } }
本文共计201个文字,预计阅读时间需要1分钟。
javapackage com.zzger.model;
import java.util.ArrayList;import java.util.Collections;import java.util.List;import java.util.concurrent.CountDownLatch;import com.zzger.module.queue.UrlQueue;import com.zzger.util.HttpUtils;
package com.zzger.model; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.CountDownLatch; import com.zzger.module.queue.UrlQueue; import com.zzger.util.HttpUtils; import com.zzger.util.RegexUtils; public class WebSite { /** * 站点url */ private String url; /** * 需要爬行的url队列 */ private UrlQueue<String> urls = new UrlQueue<>(); /** * 已爬行过的页面url */ private List<String> exitUrls = Collections.synchronizedList(new ArrayList<>()); private static final int TOTAL_THREADS = 12; private final CountDownLatch mStartSignal = new CountDownLatch(1); private final CountDownLatch mDoneSignal = new CountDownLatch(TOTAL_THREADS); public WebSite(String url){ this.url = url; urls.offer(url);//把网站首页加入需要爬行的队列中 } public void guangDu(){ new Thread(new Runnable() { @Override public void run() { paxing(HttpUtils.duanziwang.com"); web.guangDu(); for(int i = 0; i<10;i++){ new Thread(new Runnable() { @Override public void run() { web.dxcPx(); } }).start(); } } }

