可以采集的彩票类型包括:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩
本项目介绍了如何使用代理IP和多线程采集公开彩票数据,项目尚不具备使用条件,仅供学习参考
项目需要用Maven引入,这里输入引用文本打开后如果有报错,可以检查是否为JDK版本问题
运行Starter类启动爬虫
需要修改test.config包下面的Memory类,可以修改
1. 是否使用代理IP
2. 图片保存路径
3. 代理IP的API接口
4. 线程池数量
5. 默认超时时间
如果提示
获取代理IP出错: 请到 http://www.data5u.com 获取最新的代理IP-API接口,或者修改Memory.useProxyIp=false
那么按照提示关闭代理IP服务即可。
项目代码已上传到GITHUB https://github.com/mcj8089/crawl-caipiao.git
代码分为BEAN和核心采集:
CaiPiaoWinner
package test.bean;import java.io.Serializable;/*** 中奖情况*/
public class CaiPiaoWinner implements Serializable {private static final long serialVersionUID = 1L;private String idx; // 彩票ID : 彩票类型+期+奖项名称private String cpId; // 彩票IDprivate String remark; // 奖项名称private Integer baseAwardNum; // 基本中奖注数(注)private Float baseAwardMoney; // 基本中奖金额(元)public String getIdx() {return idx;}public void setIdx(String idx) {this.idx = idx;}public String getCpId() {return cpId;}public void setCpId(String cpId) {this.cpId = cpId;}public String getRemark() {return remark;}public void setRemark(String remark) {this.remark = remark;}public Integer getBaseAwardNum() {return baseAwardNum;}public void setBaseAwardNum(Integer baseAwardNum) {this.baseAwardNum = baseAwardNum;}public Float getBaseAwardMoney() {return baseAwardMoney;}public void setBaseAwardMoney(Float baseAwardMoney) {this.baseAwardMoney = baseAwardMoney;}}
CaiPiaoIssue
package test.bean;import java.io.Serializable;
import java.util.List;/*** 中奖情况*/
public class CaiPiaoIssue implements Serializable {private static final long serialVersionUID = 1L;private String cpId; // 彩票ID : 彩票类型+期private Integer type; // 彩票类型:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩private String issue; // 期数private String openTime; // 开奖时间private Float saleMoney; // 销售金额private Float prizePoolMoney; // 奖池金额private String deadlineAwardDate; // 截止兑奖日期private String frontWinningNum; // 开奖号码,前排private String backWinningNum; // 开奖号码,后排private List winnerList; // 中奖情况public String getCpId() {return cpId;}public void setCpId(String cpId) {this.cpId = cpId;}public Integer getType() {return type;}public void setType(Integer type) {this.type = type;}public String getIssue() {return issue;}public void setIssue(String issue) {this.issue = issue;}public String getOpenTime() {return openTime;}public void setOpenTime(String openTime) {this.openTime = openTime;}public Float getSaleMoney() {return saleMoney;}public void setSaleMoney(Float saleMoney) {this.saleMoney = saleMoney;}public Float getPrizePoolMoney() {return prizePoolMoney;}public void setPrizePoolMoney(Float prizePoolMoney) {this.prizePoolMoney = prizePoolMoney;}public String getDeadlineAwardDate() {return deadlineAwardDate;}public void setDeadlineAwardDate(String deadlineAwardDate) {this.deadlineAwardDate = deadlineAwardDate;}public String getFrontWinningNum() {return frontWinningNum;}public void setFrontWinningNum(String frontWinningNum) {this.frontWinningNum = frontWinningNum;}public String getBackWinningNum() {return backWinningNum;}public void setBackWinningNum(String backWinningNum) {this.backWinningNum = backWinningNum;}public List getWinnerList() {return winnerList;}public void setWinnerList(List winnerList) {this.winnerList = winnerList;}}
CaipiaoHistoryCrawler
package test.crawler;import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;import test.bean.CaiPiaoIssue;
import test.bean.CaiPiaoWinner;
import test.config.Memory;
import test.util.CrawlerUtil;
import test.util.LogUtil;
import test.util.StrUtil;/*** 彩票历史*/
public class CaipiaoHistoryCrawler extends Crawler {String TAG = "CaipiaoHistoryCrawler";Map headerMap = new HashMap();int retryTime = 3;AtomicInteger atoInt = new AtomicInteger(1);Set uniqSet = new HashSet();public void crawl() {headerMap.put("Accept", "*/*");headerMap.put("Accept-Encoding", "gzip, deflate, br");headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");headerMap.put("Connection", "keep-alive");headerMap.put("Cookie", "BAIDU_SSP_lcr=https://www.baidu.com/link?url=riNXkDsMHCOiaKifIQRKh0P3RuASJjDVfIvNZy0PFwS&wd=&eqid=8a03215500000b570000000360dbeecd; _ga=GA1.2.1911959757.1625027094; _gid=GA1.2.724130032.1625027094; PHPSESSID=45a4gkalmomcnbjabcvkmij3p3; Hm_lvt_12e4883fd1649d006e3ae22a39f97330=1625027094; Hm_lvt_692bd5f9c07d3ebd0063062fb0d7622f=1625027095; _gat_UA-66069030-3=1; Hm_lpvt_692bd5f9c07d3ebd0063062fb0d7622f=1625027400; Hm_lpvt_12e4883fd1649d006e3ae22a39f97330=1625027400; KLBRSID=13ce4968858adba085afff577d78760d|1625027411|1625027093");headerMap.put("Host", "jc.zhcw.com");headerMap.put("Referer", "https://www.zhcw.com/kjxx/pl3/kjxq/");headerMap.put("Sec-Fetch-Dest", "script");headerMap.put("Sec-Fetch-Mode", "no-cors");headerMap.put("Sec-Fetch-Site", "same-site");crawlZhongCai(1);crawlZhongCai(2);crawlZhongCai(3);crawlZhongCai(4);crawlZhongCai(5);crawlZhongCai(6);crawlZhongCai(7);crawlZhongCai(8);LogUtil.logInfo(TAG, "采集任务已完成");}// 彩票类型:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩private void crawlZhongCai(Integer type) {Set issueSet = new HashSet();String prefix = "";String surfix = ".html";if( type == 1 ) {issueSet = getIssueSet("https://www.ydniu.com/open/kl8.aspx");prefix = "https://www.ydniu.com/open/kl8/";} else if( type == 2 ) {issueSet = getIssueSet("https://www.ydniu.com/open/ssq.aspx");prefix = "https://www.ydniu.com/open/ssq/";} else if( type == 3 ) {issueSet = getIssueSet("https://www.ydniu.com/open/sd.aspx");prefix = "https://www.ydniu.com/open/sd/";} else if( type == 4 ) {issueSet = getIssueSet("https://www.ydniu.com/open/qlc.aspx");prefix = "https://www.ydniu.com/open/qlc/";} else if( type == 5 ) {issueSet = getIssueSet("https://www.ydniu.com/open/dlt.aspx");prefix = "https://www.ydniu.com/open/dlt/";} else if( type == 6 ) {issueSet = getIssueSet("https://www.ydniu.com/open/pl3.aspx");prefix = "https://www.ydniu.com/open/pl3/";} else if( type == 7 ) {issueSet = getIssueSet("https://www.ydniu.com/open/pl5.aspx");prefix = "https://www.ydniu.com/open/pl5/";} else if( type == 8 ) {issueSet = getIssueSet("https://www.ydniu.com/open/qxc.aspx");prefix = "https://www.ydniu.com/open/qxc/";}for( String issue : issueSet ) {final String fPrefix = prefix;Memory.threadPool.execute(new Runnable() {@Overridepublic void run() {if( !uniqSet.add(issue) ) {return;}try {// STARTString url = fPrefix + issue + surfix;String html = null;for( int i = 1; i <= retryTime; i ++ ) {try {if( i == retryTime && Memory.useProxyIp ) {html = CrawlerUtil.getHtml(url, false, false, Memory.DEFAULT_TIMEOUT, headerMap);} else {html = CrawlerUtil.getHtml(url, Memory.useProxyIp, false, Memory.DEFAULT_TIMEOUT, headerMap);}if( StrUtil.isNotEmpty(html) && html.contains("Bad Gateway: www.ydniu.com:443") || html.contains("白名单校验失败") ) {i = i - 1;continue;}if( StrUtil.isNotEmpty(html) ) {break;}} catch ( Exception e ) {LogUtil.logInfo(TAG, "采集分期报错", e);}}if( StrUtil.isNotEmpty(html) ) {try {Document startDoc = Jsoup.parse(html);CaiPiaoIssue caiPiaoIssue = new CaiPiaoIssue();caiPiaoIssue.setIssue(issue);Elements openNumberRedEl = startDoc.select("#openNumber i");Elements openNumberBlueEl = startDoc.select("#openNumber em");StringBuilder redBallSB = new StringBuilder();for( Element el : openNumberRedEl ) {redBallSB.append(el.text()).append(",");}StringBuilder blueBallSB = new StringBuilder();for( Element el : openNumberBlueEl ) {blueBallSB.append(el.text()).append(",");}String temp = startDoc.select("#openDate").text();String openTime = temp.split(",")[0].replace("开奖日期:", "");String deadlineAwardDate = temp.split(",")[1].replace("兑奖截止日期:", "");String frontWinningNum = redBallSB.toString();String backWinningNum = blueBallSB.toString();Float saleMoney = Float.valueOf(startDoc.select("#sumSales").text().replace(",", ""));Float prizePoolMoney = Float.valueOf(startDoc.select("#prizePool").text().replace(",", ""));frontWinningNum = frontWinningNum.substring(0, frontWinningNum.length() - 1);backWinningNum = backWinningNum.substring(0, backWinningNum.length() - 1);caiPiaoIssue.setBackWinningNum(backWinningNum);caiPiaoIssue.setDeadlineAwardDate(deadlineAwardDate);caiPiaoIssue.setFrontWinningNum(frontWinningNum);caiPiaoIssue.setOpenTime(openTime);caiPiaoIssue.setPrizePoolMoney(prizePoolMoney);caiPiaoIssue.setSaleMoney(saleMoney);caiPiaoIssue.setType(type);caiPiaoIssue.setCpId(type + issue);List winnerList = new ArrayList();Elements trs = startDoc.select("#t_WinType tr");for( Element tr : trs ) {String name = tr.select("td").get(0).text();String num = tr.select("td").get(1).text();String money = tr.select("td").get(2).text();CaiPiaoWinner winner = new CaiPiaoWinner();winner.setBaseAwardMoney( Float.valueOf(money) );winner.setBaseAwardNum(Integer.valueOf(num));winner.setRemark(name);winner.setCpId(caiPiaoIssue.getCpId());winner.setIdx(type + issue + name);winnerList.add(winner);}crawlToDB(winnerList);} catch (Exception e) {e.printStackTrace();}} // END} catch (Exception e) {e.printStackTrace();}}});}}private Set getIssueSet(String url) {headerMap.put("Host", "www.ydniu.com");headerMap.put("Referer", "https://www.ydniu.com/open/ssq.aspx");String html = null;for( int i = 1; i <= retryTime; i ++ ) {try {if( i == retryTime && Memory.useProxyIp ) {html = CrawlerUtil.getHtml(url, false, false, Memory.DEFAULT_TIMEOUT, headerMap);} else {html = CrawlerUtil.getHtml(url, Memory.useProxyIp, false, Memory.DEFAULT_TIMEOUT, headerMap);}if( StrUtil.isNotEmpty(html) ) {break;}} catch ( Exception e ) {LogUtil.logInfo(TAG, "采集分期报错", e);}}Set reSet = new HashSet<>();if( StrUtil.isNotEmpty(html) ) {if( html.contains("Bad Gateway: www.ydniu.com:443") || html.contains("白名单校验失败") ) {return getIssueSet(url);}Document document = Jsoup.parse(html);Elements as = document.select(".iSelectBox .iSelectList.listOverFlow a");for( Element el : as ) {reSet.add(el.text());}}return reSet;}}