如何用Java实现一个简单的网络爬虫模仿案例?
- 内容介绍
- 文章标签
- 相关推荐
本文共计357个文字,预计阅读时间需要2分钟。
Java 模拟网络爬虫 简单示例,直接查看代码:
javapackage com.example.demo1;
import java.io.*;import java.net.*;import java.util.regex.Matcher;import java.util.regex.Pattern;
/** * @author YinLei */public class SimpleCrawler { public static void main(String[] args) { try { URL url=new URL(http://example.com); HttpURLConnection conn=(HttpURLConnection) url.openConnection(); conn.setRequestMethod(GET); BufferedReader reader=new BufferedReader(new InputStreamReader(conn.getInputStream())); String line; while ((line=reader.readLine()) !=null) { System.out.println(line); } reader.close(); } catch (Exception e) { e.printStackTrace(); } }}
java模仿网络爬虫简单案例,直接看代码
package com.example.demo1;import java.io.*;
import java.net.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author: YinLei
* Package: com.example.demo1
* @date: 2021/9/7 20:23
* @Description: java爬虫测试
* @version: 1.0
*/
public class Crawler {
public static void main(String[] args) {
URL url = null; //用于定义url类型
URLConnection urlConnection = null; //用于定义url连接类型
BufferedReader br = null; //缓存读取
PrintWriter pw = null; //输出流
try {
url = new URL("search.dangdang.com/?key=%BB%FA%D0%B5%B1%ED&act=input");//爬取的网址、这里爬取的是一个生物网站
urlConnection = url.openConnection(); //url连接
pw = new PrintWriter(new FileWriter("D:/SiteURL.txt"), true);//将爬取到的内容放到D盘的SiteURL文件中
System.out.println("Stay Here1!!");
br = new BufferedReader(new InputStreamReader(
urlConnection.getInputStream(),"UTF-8"));
String buf = null;
System.out.println("Stay Here2!!");
Pattern p = Pattern.compile("1\\d{10}");
while ((buf = br.readLine()) != null) {
// System.out.println(buf);
Matcher m = p.matcher(buf);
while (m.find()){
pw.println(m.group());
}
}
System.out.println("success!");
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
本文共计357个文字,预计阅读时间需要2分钟。
Java 模拟网络爬虫 简单示例,直接查看代码:
javapackage com.example.demo1;
import java.io.*;import java.net.*;import java.util.regex.Matcher;import java.util.regex.Pattern;
/** * @author YinLei */public class SimpleCrawler { public static void main(String[] args) { try { URL url=new URL(http://example.com); HttpURLConnection conn=(HttpURLConnection) url.openConnection(); conn.setRequestMethod(GET); BufferedReader reader=new BufferedReader(new InputStreamReader(conn.getInputStream())); String line; while ((line=reader.readLine()) !=null) { System.out.println(line); } reader.close(); } catch (Exception e) { e.printStackTrace(); } }}
java模仿网络爬虫简单案例,直接看代码
package com.example.demo1;import java.io.*;
import java.net.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author: YinLei
* Package: com.example.demo1
* @date: 2021/9/7 20:23
* @Description: java爬虫测试
* @version: 1.0
*/
public class Crawler {
public static void main(String[] args) {
URL url = null; //用于定义url类型
URLConnection urlConnection = null; //用于定义url连接类型
BufferedReader br = null; //缓存读取
PrintWriter pw = null; //输出流
try {
url = new URL("search.dangdang.com/?key=%BB%FA%D0%B5%B1%ED&act=input");//爬取的网址、这里爬取的是一个生物网站
urlConnection = url.openConnection(); //url连接
pw = new PrintWriter(new FileWriter("D:/SiteURL.txt"), true);//将爬取到的内容放到D盘的SiteURL文件中
System.out.println("Stay Here1!!");
br = new BufferedReader(new InputStreamReader(
urlConnection.getInputStream(),"UTF-8"));
String buf = null;
System.out.println("Stay Here2!!");
Pattern p = Pattern.compile("1\\d{10}");
while ((buf = br.readLine()) != null) {
// System.out.println(buf);
Matcher m = p.matcher(buf);
while (m.find()){
pw.println(m.group());
}
}
System.out.println("success!");
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

