Java爬虫系列一HttpClient请求工具,IP代理模式
IP代理模式顾名思义,使用非本机IP来请求目标数据,两大好处:
- 1.作为爬虫项目,有效防止IP风控
- 2.不多说,你懂得~
特此声明:本人所有文章都只供大家学习,任何个人或组织不得直接或间接使用本文所有文章中的技术内容干违背国家法律规定的业务。如因此造成的一切后果本人概不承担。
另附《中华人民共和国网络安全法》大家以此为底线,一定要保持职业操守,做合法社会主义好公民
废话不多,直接上源码。
1.Maven依赖
-
<dependency>
-
<groupId>org.apache.httpcomponents</groupId>
-
<artifactId>httpclient</artifactId>
-
<version>4.5.3</version>
-
</dependency>
2.为了支持Https协议,所以我们还需要写个绕过SSL验证的工具
-
//添加主机名验证程序类,设置不验证主机
-
private final static HostnameVerifier DO_NOT_VERIFY = new HostnameVerifier() {
-
public boolean verify(String hostname, SSLSession session) {
-
return true;
-
}
-
};
-
-
/**
-
* 创建SSL安全连接
-
*
-
* @return
-
*/
-
private static SSLConnectionSocketFactory createSSLConnSocketFactory() {
-
SSLConnectionSocketFactory sslsf = null;
-
try {
-
SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null,
-
new TrustStrategy() {
-
public boolean isTrusted(X509Certificate[] chain, String authType) {
-
return true;
-
}
-
}).build();
-
sslsf = new SSLConnectionSocketFactory(sslContext, new HostnameVerifier() {
-
-
-
public boolean verify(String arg0, SSLSession arg1) {
-
return true;
-
}
-
});
-
} catch (GeneralSecurityException e) {
-
e.printStackTrace();
-
}
-
return sslsf;
-
}
3.为了解决很多莫名其妙的的异常,我们有必要详细点来捕获各种可能的异常,并选择抛出或者返回,方便后续处理。
-
ConnectTimeoutException,SocketTimeoutException异常:连接超时
- 其它的都不重要,可以统一Exception捕获
4.Get方式请求
全局设置超时时间,大家根据自己实际情况设置
private final static int CONNECTION_TIME_OUT = 6000;
-
/**
-
* Get方式请求
-
* @param pageUrl 请求地址
-
* @param charset 编码方式
-
* @param params 参数
-
* @param proxyIp 代理IP
-
* @return
-
*/
-
public static Map<String, Object> doGet(String pageUrl, String charset, Map<String, String> params, String proxyIp) {
-
Map<String, Object> map = new HashMap<String, Object>();
-
String result = null;
-
if (null == charset) {
-
charset = "utf-8";
-
}
-
//设置绕过SSL请求验证
-
CloseableHttpClient httpclient = HttpClients.custom().setSSLSocketFactory(createSSLConnSocketFactory()).build();
-
try {
-
URL url = new URL(pageUrl);
-
//设置代理协议
-
HttpHost target = new HttpHost(url.getHost(), url.getDefaultPort(), url.getProtocol());
-
HttpHost proxy = new HttpHost(proxyIp.split(":")[0], Integer.parseInt(proxyIp.split(":")[1]));
-
RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(CONNECTION_TIME_OUT)
-
.setConnectionRequestTimeout(CONNECTION_TIME_OUT).setSocketTimeout(CONNECTION_TIME_OUT).build();
-
HttpGet httpget = new HttpGet(url.toString());
-
httpget.setConfig(config);
-
try {
-
for (Map.Entry<String, String> entry : params.entrySet()) {
-
httpget.addHeader(entry.getKey(), entry.getValue());
-
}
-
} catch (Exception e) {
-
}
-
CloseableHttpResponse response = null;
-
try {
-
response = httpclient.execute(target, httpget);
-
if (response != null) {
-
HttpEntity resEntity = response.getEntity();
-
if (resEntity != null) {
-
result = EntityUtils.toString(resEntity, charset);
-
map.put("res", result);
-
}
-
Header[] headerinfo = response.getAllHeaders();
-
map.put("headerinfo", headerinfo);
-
}
-
} catch (Exception e) {
-
map.put("res", "error");
-
log.info("Connection refused: connect:{}", e.getMessage());
-
} finally {
-
try {
-
response.close();
-
} catch (NullPointerException e) {
-
map.put("res", "error");
-
log.info("无响应结果");
-
}
-
}
-
}catch (ConnectTimeoutException | SocketTimeoutException e) {
-
log.info("请求超时");
-
map.put("res", "error");
-
return map;
-
}catch (ClientProtocolException e) {
-
e.printStackTrace();
-
} catch (IOException e) {
-
e.printStackTrace();
-
} finally {
-
try {
-
httpclient.close();
-
} catch (IOException e) {
-
e.printStackTrace();
-
}
-
}
-
return map;
-
}
5.Post方式请求一,模拟Form表单方式提交参数,即构造Map传递参数(自定义请求头信息)
-
/**
-
* Post方式请求
-
* @param pageUrl 请求地址
-
* @param params 请求参数
-
* @param charset 编码方式
-
* @param header 请求头
-
* @param proxyIp 代理IP
-
* @return
-
*/
-
public static Map<String, Object> doPost(String pageUrl, String params, String charset, Map<String, String> header, String proxyIp) {
-
// log.info("===========================================【POST请求信息】==================================================");
-
// log.info("|| 【POST地址】-{}",pageUrl);
-
// log.info("|| 【请求参数】{}",params);
-
// log.info("===========================================================================================================");
-
Map<String, Object> resMap = new HashMap<String, Object>();
-
String result = null;
-
CloseableHttpClient httpclient = HttpClients.custom().setSSLSocketFactory(createSSLConnSocketFactory()).build();
-
try {
-
URL url = new URL(pageUrl);
-
HttpHost target = new HttpHost(url.getHost(), url.getDefaultPort(), url.getProtocol());
-
HttpHost proxy = new HttpHost(proxyIp.split(":")[0], Integer.parseInt(proxyIp.split(":")[1]));
-
RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(CONNECTION_TIME_OUT)
-
.setConnectionRequestTimeout(CONNECTION_TIME_OUT).setSocketTimeout(CONNECTION_TIME_OUT).build();
-
HttpPost httpPost = new HttpPost(url.toString());
-
httpPost.setConfig(config);
-
try {
-
if (null != header) {
-
Set<Map.Entry<String, String>> entries = header.entrySet();
-
for (Map.Entry<String, String> entry : entries) {
-
httpPost.addHeader(entry.getKey(), entry.getValue());
-
}
-
}
-
} catch (Exception e) {
-
}
-
// httpPost.setEntity(new StringEntity(params));
-
// httpPost.setEntity(new StringEntity(params, ContentType.APPLICATION_FORM_URLENCODED));
-
StringEntity stringEntity = new StringEntity(params);
-
stringEntity.setContentType("application/x-www-form-urlencoded");
-
httpPost.setEntity(stringEntity);
-
CloseableHttpResponse response = null;
-
try {
-
response = httpclient.execute(target, httpPost);
-
if (response != null) {
-
HttpEntity resEntity = response.getEntity();
-
if (resEntity != null) {
-
result = EntityUtils.toString(resEntity, "UTF-8");
-
// log.info("===============================================【返回结果】==================================================");
-
// log.info("|| {}",result);
-
// log.info("===========================================================================================================");
-
resMap.put("res", result);
-
}
-
Header[] headerinfo = response.getAllHeaders();
-
resMap.put("headerinfo", headerinfo);
-
// log.info("===============================================【返回头部】==================================================");
-
// log.info("===========================================================================================================");
-
}
-
} catch (Exception e) {
-
resMap.put("res", "error");
-
log.info("Connection refused: connect:{}", e.getMessage());
-
} finally {
-
try {
-
response.close();
-
} catch (NullPointerException e) {
-
resMap.put("res", "error");
-
log.info("无响应结果");
-
}
-
}
-
}catch (ConnectTimeoutException | SocketTimeoutException e) {
-
// log.info("====请求超时=====");
-
log.info("【POST请求异常1】---->",e.getMessage());
-
resMap.put("res", "error");
-
return resMap;
-
}catch (ClientProtocolException e) {
-
// e.printStackTrace();
-
log.info("【POST请求异常2】---->",e.getMessage());
-
resMap.put("res", "error");
-
return resMap;
-
} catch (IOException e) {
-
log.info("【POST请求异常3】---->",e.getMessage());
-
// e.printStackTrace();
-
resMap.put("res", "error");
-
return resMap;
-
}finally {
-
try {
-
httpclient.close();
-
} catch (IOException e) {
-
e.printStackTrace();
-
}
-
}
-
return resMap;
-
}
6.Post方式请求二,模拟JSON数据方式提交参数,即以字符串方式传递参数(自定义请求头信息)
-
/**
-
* 只针对提交JSON字符串方式
-
* @param pageUrl
-
* @param params
-
* @param charset
-
* @param header
-
* @param proxyIp
-
* @return
-
*/
-
public static Map<String, Object> doPostByJson(String pageUrl, String params, String charset, Map<String, String> header, String proxyIp) {
-
log.info("===========================================【doPostByJson-POST请求信息】==================================================");
-
log.info("|| 【POST地址】-{}",pageUrl);
-
log.info("|| 【请求参数】{}",params);
-
log.info("===========================================================================================================");
-
Map<String, Object> resMap = new HashMap<String, Object>();
-
String result = null;
-
CloseableHttpClient httpclient = HttpClients.custom().setSSLSocketFactory(createSSLConnSocketFactory()).build();
-
try {
-
URL url = new URL(pageUrl);
-
HttpHost target = new HttpHost(url.getHost(), url.getDefaultPort(), url.getProtocol());
-
HttpHost proxy = new HttpHost(proxyIp.split(":")[0], Integer.parseInt(proxyIp.split(":")[1]));
-
RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(CONNECTION_TIME_OUT)
-
.setConnectionRequestTimeout(CONNECTION_TIME_OUT).setSocketTimeout(CONNECTION_TIME_OUT).build();
-
HttpPost httpPost = new HttpPost(url.toString());
-
httpPost.setConfig(config);
-
try {
-
if (null != header) {
-
Set<Map.Entry<String, String>> entries = header.entrySet();
-
for (Map.Entry<String, String> entry : entries) {
-
httpPost.addHeader(entry.getKey(), entry.getValue());
-
}
-
}
-
} catch (Exception e) {
-
e.printStackTrace();
-
}
-
//参数
-
List<BasicNameValuePair> pair =new ArrayList<BasicNameValuePair>();
-
pair.add(new BasicNameValuePair("data", params));
-
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(pair,"UTF-8");
-
httpPost.setEntity(entity);
-
-
Header[] allheader = httpPost.getAllHeaders();
-
for (int i = 0; i < allheader.length; i ) {
-
log.info("||--请求头信息-->{}",allheader[i]);
-
}
-
-
CloseableHttpResponse response = httpclient.execute(target, httpPost);
-
log.info("||--请求参数-->{}",EntityUtils.toString(httpPost.getEntity(),"UTF-8"));
-
try {
-
if (response != null) {
-
HttpEntity resEntity = response.getEntity();
-
if (resEntity != null) {
-
result = EntityUtils.toString(resEntity, "UTF-8");
-
log.info("===============================================【返回结果】==================================================");
-
log.info("|| {}",result);
-
log.info("===========================================================================================================");
-
resMap.put("res", result);
-
}
-
Header[] headerinfo = response.getAllHeaders();
-
resMap.put("headerinfo", headerinfo);
-
log.info("===============================================【返回头部】==================================================");
-
log.info("===========================================================================================================");
-
}
-
} finally {
-
response.close();
-
}
-
}catch (ConnectTimeoutException | SocketTimeoutException e) {
-
// log.info("====请求超时=====");
-
log.info("【POST请求异常1】---->",e.getMessage());
-
resMap.put("res", "error");
-
return resMap;
-
}catch (ClientProtocolException e) {
-
// e.printStackTrace();
-
log.info("【POST请求异常2】---->",e.getMessage());
-
resMap.put("res", "error");
-
return resMap;
-
} catch (IOException e) {
-
log.info("【POST请求异常3】---->",e.getMessage());
-
// e.printStackTrace();
-
resMap.put("res", "error");
-
return resMap;
-
}finally {
-
try {
-
httpclient.close();
-
} catch (IOException e) {
-
e.printStackTrace();
-
}
-
}
-
return resMap;
-
}
7.响应的实体类工具
-
/**
-
* 获得响应HTTP实体内容
-
*
-
* @param response
-
* @return
-
* @throws IOException
-
* @throws UnsupportedEncodingException
-
*/
-
private static String getHttpEntityContent(HttpResponse response) throws IOException, UnsupportedEncodingException {
-
HttpEntity entity = response.getEntity();
-
if (entity != null) {
-
InputStream is = entity.getContent();
-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
-
String line = br.readLine();
-
StringBuilder sb = new StringBuilder();
-
while (line != null) {
-
sb.append(line "\n");
-
line = br.readLine();
-
}
-
return sb.toString();
-
}
-
return "";
-
}
到此,基本整个工具类就完事了。具体用法我们下一篇再写吧。
拜拜了您嘞~
这篇好文章是转载于:学新通技术网
- 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
- 本站站名: 学新通技术网
- 本文地址: /boutique/detail/tanhhacbci
系列文章
更多
同类精品
更多
-
photoshop保存的图片太大微信发不了怎么办
PHP中文网 06-15 -
《学习通》视频自动暂停处理方法
HelloWorld317 07-05 -
word里面弄一个表格后上面的标题会跑到下面怎么办
PHP中文网 06-20 -
Android 11 保存文件到外部存储,并分享文件
Luke 10-12 -
photoshop扩展功能面板显示灰色怎么办
PHP中文网 06-14 -
微信公众号没有声音提示怎么办
PHP中文网 03-31 -
excel下划线不显示怎么办
PHP中文网 06-23 -
excel打印预览压线压字怎么办
PHP中文网 06-22 -
TikTok加速器哪个好免费的TK加速器推荐
TK小达人 10-01 -
怎样阻止微信小程序自动打开
PHP中文网 06-13