国际dhl与gatl运单号透过httpClient抓取数据
国际dhl与gatl运单号通过httpClient抓取数据
package com.choice.ehr.utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.codehaus.jackson.map.ObjectMapper;
import com.alibaba.fastjson.JSON;
import com.choice.ehr.jobs.exchange.Checkpoints;
import com.choice.ehr.jobs.exchange.Courier;
import com.choice.ehr.jobs.exchange.Orders;
import com.choice.weixin.bean.ShptInfoWeiXinBean;
import com.sun.org.apache.xalan.internal.xsltc.compiler.sym;
import com.sunrise.vivo.utils.DateUtil;
public class DHLExchange {
private static final Log log = LogFactory.getLog(DHLExchange.class);
//dhl 请求返回json
public static String QueryDHL(String dhlID) throws Exception {
String gjson="";
//String strURL="http://api.open.baidu.com/pae/channel/data/asyncqury?cb=jQuery11020536401330732589_1417141855576&appid=4001&com=dhl&nu=96974175220&_=1417141855582";
//String strURL="http://www.cn.dhl.com/shipmentTracking?AWB=9697417520&countryCode=cn&languageCode=zh&_=1416213138381";
String strURL="http://www.cn.dhl.com/shipmentTracking?AWB="+dhlID+"&countryCode=cn&languageCode=en&_=1417403812558";
// http://www.cn.dhl.com/shipmentTracking?AWB=9697417520%2C9697426421%2C9697426340%2C9697426314%2C9697426270%2C9697426233%2C9697426222%2C9697426115%2C9697426071%2C9697426244&countryCode=cn&languageCode=zh&_=1417582317698
//String strURL="http://www.cn.dhl.com/shipmentTracking?AWB=9697417523&countryCode=cn&languageCode=zh&_=1416213138381";
URL url = new URL(strURL);
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
InputStreamReader input = new InputStreamReader(httpConn
.getInputStream(), "utf-8");
BufferedReader bufReader = new BufferedReader(input);
String line = "";
StringBuilder contentBuf = new StringBuilder();
while ((line = bufReader.readLine()) != null) {
contentBuf.append(line);
}
System.out.println("captureJavascript()的结果:\n" + contentBuf.toString());
gjson=contentBuf.toString();
return gjson;
}
//gatl 运单号 国外网站
public static List<Checkpoints> QueryGatlen(String fRefNo) {
List returnList = new ArrayList();
List<Checkpoints> poinsList= new ArrayList<Checkpoints>();
// 构造HttpClient的实例
chttpClient = new HttpClient();
// 处理代理服务器
//httpClient.getHostConfiguration().setProxy("10.200.1.19", 886);
// 创建GET方法的实例
//GetMethod getMethod = new GetMethod(
// "http://www.boc.cn/sourcedb/whpj/index.html");
GetMethod getMethod= new GetMethod("http://www.gati.com/gatitrck.jsp?dktNo="+fRefNo+"");
getMethod.getResponseCharSet();
// 使用系统提供的默认的恢复策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler());
try {
// 执行getMethod
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
log.error("微信gati请求网站出错:" + getMethod.getStatusLine());
System.err.println("Method failed: "
+ getMethod.getStatusLine());
}
// 读取内容
byte[] responseBody = getMethod.getResponseBody();
// 处理内容
String returnStr = new String(responseBody, "UTF-8");
String regex;
List<String> list = new ArrayList<String>();
//<table border=\"0\" cellpadding=\"1\" cellspacing=\"1\" width=\"98%\" align=\"center\" class=\"form_table\">
//regex="<table class=\"result-checkpoints show result-has-pieces\" summary=\"DHL Express shipments checkpoints\">.*?</table>";
// regex = "<table cellpadding=\"0\" align=\"left\" cellspacing=\"0\" width=\"100%\">.*?</table>";
//regex="<table border=\"0\" cellpadding=\"1\" cellspacing=\"1\" width=\"98%\" align=\"center\" class=\"form_table\">.*?</table>";
regex="<tr bgcolor='#05a6c3' class=\"textbold\">.*?</table>";
final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
final Matcher ma = pa.matcher(returnStr);
while (ma.find()) {
list.add(ma.group());
}
for ( String s:list) {
String regex1;
String ye=s.replaceAll(" colspan='2'", "");
List<String> list1 = new ArrayList<String>();
regex1 = "<td>.*?</td>";
final Pattern pa1 = Pattern.compile(regex1, Pattern.DOTALL);
final Matcher ma1 = pa1.matcher(ye);
while (ma1.find()) {
list1.add(ma1.group());
}
int i = 0;
String date="";
String description="";
for ( String s1:list1) {
i ++;
String str = s1.substring(4, s1.lastIndexOf("<"));
Checkpoints points=new Checkpoints();
if ( i >3) {
if(i%4==0){
date=DateUtil.getEnsYYMM(str);
}
if(i%4==1){
date+=" "+str;
points.setDate(date);
}
if(i%4==2){
description=str;
}
if(i%4==3){
points.setDate(date+":00");
points.setDescription(str);
points.setLocation(description);
poinsList.add(points);
}
}
}
}
} catch (HttpException e) {
// 发生致命的异常,可能是协议不对或者返回的内容有问题
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e) {
// 发生网络异常
log.error("微信gati网站网络出错:"+e, e);
e.printStackTrace();
} finally {
// 释放连接
getMethod.releaseConnection();
return poinsList;
}
}
//国内gatl运单号抓取页面的快递信息
public static List<Checkpoints> QueryGatlzg(String fRefNo) {
List returnList = new ArrayList();
List<Checkpoints> poinsList= new ArrayList<Checkpoints>();
// 构造HttpClient的实例
HttpClient httpClient = new HttpClient();
GetMethod getMethod= new GetMethod("http://218.244.150.40/cgi-bin/GInfo.dll?EmmisTrack&cno="+fRefNo+"");
getMethod.getResponseCharSet();
// 使用系统提供的默认的恢复策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler());
try {
// 执行getMethod
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
log.error("微信gati请求网站出错:" + getMethod.getStatusLine());
System.err.println("Method failed: "
+ getMethod.getStatusLine());
}
// 读取内容
byte[] responseBody = getMethod.getResponseBody();
// 处理内容
String returnStr = new String(responseBody, "gb2312");
String regex;
List<String> list = new ArrayList<String>();
//<table border=\"0\" cellpadding=\"1\" cellspacing=\"1\" width=\"98%\" align=\"center\" class=\"form_table\">
regex="<table width='98%' border='1' align='center' cellpadding='1' cellspacing=0 class='trackContentTable' id='oTHtable'>.*?</table>";
final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
final Matcher ma = pa.matcher(returnStr);
while (ma.find()) {
list.add(ma.group());
}
for ( String s:list) {
String regex1;
String ye=s.replaceAll(" align='left' bgcolor='#F5F9FA' class='trackListOdd'", "");
ye=ye.replaceAll(" align='center' bgcolor='#FFFFFF' class='trackListEven'", "");
ye=ye.replaceAll(" align='center' bgcolor='#F5F9FA' class='trackListOdd'", "");
ye=ye.replaceAll(" align='left' bgcolor='#FFFFFF' class='trackListEven'", "");
ye=ye.replaceAll(" align='center' bgcolor='#FFFFFF'", "");
ye=ye.replaceAll(" align='center' bgcolor='#F5F9FA'", "");
List<String> list1 = new ArrayList<String>();
regex1 = "<td>.*?</td>";
final Pattern pa1 = Pattern.compile(regex1, Pattern.DOTALL);
final Matcher ma1 = pa1.matcher(ye);
while (ma1.find()) {
list1.add(ma1.group());
}
int i = 3;
String date="";
String description="";
for ( String s1:list1) {
String str = s1.substring(4, s1.lastIndexOf("<"));
Checkpoints points=new Checkpoints();
if(i%3==0){
date=DateUtil.getcgYYMM1(str);
}
if(i%3==1){
description=str.trim();
}
if(i%3==2){
points.setDate(date);
System.out.println();
points.setDescription(str.substring(0, str.trim().length()-1));
points.setLocation(description.substring(0, description.length()-1));
poinsList.add(points);
}
i ++;
}
}
} catch (HttpException e) {
// 发生致命的异常,可能是协议不对或者返回的内容有问题
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e) {
// 发生网络异常
log.error("微信gati网站网络出错:"+e, e);
e.printStackTrace();
} finally {
// 释放连接
getMethod.releaseConnection();
// 将ArrayLista中的元素进行倒序
Collections.reverse(poinsList);
return poinsList;
}
}
public static void main(String[] args) throws Exception {
List<Checkpoints> list= DHLExchange.QueryGatlzg("641449633");
for (Checkpoints opint : list) {
System.out.println(opint.getDate()+"="+opint.getDescription()+"=="+opint.getLocation());
}
}
}