package com.powerX;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
public class Main {
public static void main(String[] args) {
for (int i = 1; i < 101; i++) {
String html = doGet(\"https://news.cnblogs.com/n/page/\" + i);
Elements elements = Jsoup.parse(html).getElementsByClass(\"news_block\");
for (Element element : elements){
Element content = element.getElementsByClass(\"content\").get(0);
Element entity = content.getElementsByClass(\"news_entry\").get(0).getElementsByTag(\"a\").get(0);
Element entity2 = content.getElementsByClass(\"entry_footer\").get(0);
System.out.println(Jsoup.parse(entity2.html().split(\"发布于\")[1]).body().getElementsByTag(\"span\").get(0).html() + \'\\t\' + entity.html());
}
}
}
public static String doGet(String httpurl) {
HttpURLConnection connection = null;
InputStream is = null;
BufferedReader br = null;
String result = null;// 返回结果字符串
try {
// 创建远程url连接对象
URL url = new URL(httpurl);
// 通过远程url连接对象打开一个连接,强转成httpURLConnection类
connection = (HttpURLConnection) url.openConnection();
// 设置连接方式:get
connection.setRequestMethod(\"GET\");
// 设置连接主机服务器的超时时间:15000毫秒
connection.setConnectTimeout(15000);
// 设置读取远程返回的数据时间:60000毫秒
connection.setReadTimeout(60000);
// 发送请求
connection.connect();
// 通过connection连接,获取输入流
if (connection.getResponseCode() == 200) {
is = connection.getInputStream();
// 封装输入流is,并指定字符集
br = new BufferedReader(new InputStreamReader(is, \"UTF-8\"));
// 存放数据
StringBuffer sbf = new StringBuffer();
String temp = null;
while ((temp = br.readLine()) != null) {
sbf.append(temp);
sbf.append(\"\\r\\n\");
}
result = sbf.toString();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭资源
if (null != br) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != is) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
connection.disconnect();// 关闭远程连接
}
return result;
}
public static String doPost(String httpUrl, String param) {
HttpURLConnection connection = null;
InputStream is = null;
OutputStream os = null;
BufferedReader br = null;
String result = null;
try {
URL url = new URL(httpUrl);
// 通过远程url连接对象打开连接
connection = (HttpURLConnection) url.openConnection();
// 设置连接请求方式
connection.setRequestMethod(\"POST\");
// 设置连接主机服务器超时时间:15000毫秒
connection.setConnectTimeout(15000);
// 设置读取主机服务器返回数据超时时间:60000毫秒
connection.setReadTimeout(60000);
// 默认值为:false,当向远程服务器传送数据/写数据时,需要设置为true
connection.setDoOutput(true);
// 默认值为:true,当前向远程服务读取数据时,设置为true,该参数可有可无
connection.setDoInput(true);
// 设置传入参数的格式:请求参数应该是 name1=value1&name2=value2 的形式。
connection.setRequestProperty(\"Content-Type\", \"application/x-www-form-urlencoded\");
// 设置鉴权信息:Authorization: Bearer da3efcbf-0845-4fe3-8aba-ee040be542c0
connection.setRequestProperty(\"Authorization\", \"Bearer da3efcbf-0845-4fe3-8aba-ee040be542c0\");
// 通过连接对象获取一个输出流
os = connection.getOutputStream();
// 通过输出流对象将参数写出去/传输出去,它是通过字节数组写出的
os.write(param.getBytes());
// 通过连接对象获取一个输入流,向远程读取
if (connection.getResponseCode() == 200) {
is = connection.getInputStream();
// 对输入流对象进行包装:charset根据工作项目组的要求来设置
br = new BufferedReader(new InputStreamReader(is, \"UTF-8\"));
StringBuffer sbf = new StringBuffer();
String temp = null;
// 循环遍历一行一行读取数据
while ((temp = br.readLine()) != null) {
sbf.append(temp);
sbf.append(\"\\r\\n\");
}
result = sbf.toString();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭资源
if (null != br) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != os) {
try {
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != is) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
// 断开与远程地址url的连接
connection.disconnect();
}
return result;
}
}
需要引入包 jsoup
来源:https://www.cnblogs.com/sunbingqiang/p/15966177.html
本站部分图文来源于网络,如有侵权请联系删除。