如何获取百度网页内容,不包括由JavaScript动态生成的数据?
- 内容介绍
- 文章标签
- 相关推荐
本文共计106个文字,预计阅读时间需要1分钟。
javapublic class Crawler { public static void main(String[] args) { HttpClient client=new DefaultHttpClient(); HttpGet httpGet=new HttpGet(http://www.baidu.com/); HttpResponse response=client.execute(httpGet); }}
gistfile1.txtpublic class Crawler { public static void main(String[] args) throws IOException { HttpClient client=new DefaultHttpClient(); HttpGet www.baidu.com/"); HttpResponse response=client.execute(httpGet); HttpEntity entity=response.getEntity(); InputStream in=entity.getContent(); byte[] bytes=new byte[1024]; int count; StringBuffer sb = new StringBuffer(); while ((count=in.read(bytes))!=-1) { sb.append(new String(bytes,0,count,"UTF-8")); } System.out.println(sb.toString()); System.out.println("-----------------------"); in.close(); System.out.println(JSON.toJSONString(entity)); } }
本文共计106个文字,预计阅读时间需要1分钟。
javapublic class Crawler { public static void main(String[] args) { HttpClient client=new DefaultHttpClient(); HttpGet httpGet=new HttpGet(http://www.baidu.com/); HttpResponse response=client.execute(httpGet); }}
gistfile1.txtpublic class Crawler { public static void main(String[] args) throws IOException { HttpClient client=new DefaultHttpClient(); HttpGet www.baidu.com/"); HttpResponse response=client.execute(httpGet); HttpEntity entity=response.getEntity(); InputStream in=entity.getContent(); byte[] bytes=new byte[1024]; int count; StringBuffer sb = new StringBuffer(); while ((count=in.read(bytes))!=-1) { sb.append(new String(bytes,0,count,"UTF-8")); } System.out.println(sb.toString()); System.out.println("-----------------------"); in.close(); System.out.println(JSON.toJSONString(entity)); } }

