/skkj_server-english-webmagic-0580f61df038/src/main/java/net/onemost/english/service/RepoPageProcessorService.java
Java | 301 lines | 177 code | 43 blank | 81 comment | 15 complexity | 86b4046644d3a8901d7afd897b8f42bc MD5 | raw file
- package net.onemost.english.service;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import com.alibaba.fastjson.JSON;
- import com.alibaba.fastjson.JSONObject;
- import net.onemost.english.dao.BookMapper;
- import net.onemost.english.dao.WordMapper;
- import net.onemost.english.dto.LoginInfo;
- import net.onemost.english.model.Book;
- import net.onemost.english.model.Word;
- import net.onemost.english.util.DBTools;
- import net.onemost.english.util.FileOpertionUtil;
- import net.onemost.english.util.GetCode;
- import us.codecraft.webmagic.Page;
- import us.codecraft.webmagic.Request;
- import us.codecraft.webmagic.Site;
- import us.codecraft.webmagic.Spider;
- import us.codecraft.webmagic.processor.PageProcessor;
- import us.codecraft.webmagic.utils.HttpConstant;
- public class RepoPageProcessorService implements PageProcessor {
- private static BookMapper bookMapper = DBTools.getSession().getMapper(BookMapper.class);
-
- private static WordMapper wordMapper = DBTools.getSession().getMapper(WordMapper.class);
-
- private static RepoPageProcessorService repoPageProcessorService;
- private Page page;
- private static final String POST_LOGIN_URL = "https://www.initwords.com/login/authless/ajaxLogin.do";
- private Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(3000)
- .addHeader("Accept",
- "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8")
- .addHeader("Accept-Encoding", "gzip, deflate, br").addHeader("Accept-Language", "zh-CN,zh;q=0.9")
- .addHeader("Cache-Control", "no-cache")
- .addHeader("User-Agent",
- "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36")
- .addCookie("Hm_lvt_49a5957871e8051bc1a873596375812d", "1520235720,1520240018,1520298497,1520298867")
- .addCookie("Hm_lpvt_49a5957871e8051bc1a873596375812d", "1520298867")
- .addHeader("Referer", "https://www.initwords.com/studycenter/studyhome.jsp")
- .addHeader("Origin", "https://www.initwords.com");
- public RepoPageProcessorService(String jsessionId, String userCode, String tokenCode) {
- site.addHeader("EM-TokenCode", tokenCode).addHeader("EM-UserCode", userCode).addCookie("JSESSIONID",
- jsessionId);
- }
- public RepoPageProcessorService(String jsessionId) {
- site.addCookie("JSESSIONID", jsessionId);
- }
- @Override
- public void process(Page page) {
- this.page = page;
- }
- @Override
- public Site getSite() {
- return site;
- }
- public Page getPage() {
- return page;
- }
-
- public static void start(LoginInfo loginInfo) throws IOException {
- // 获取该用户订阅的所有课程
- repoPageProcessorService = new RepoPageProcessorService(loginInfo.getJsessionId(), loginInfo.getUserCode(),
- loginInfo.getTokenCode());
- Spider spider = Spider.create(repoPageProcessorService);
- visiteHomePage(spider, loginInfo.getUserCode());
- Page page = repoPageProcessorService.getPage();
- JSONObject jsonObject = JSON.parseObject(page.getJson().toString());
- List<JSONObject> list = JSON.parseArray(jsonObject.getJSONObject("data").getString("userbooks"),
- JSONObject.class);
- if (list.size() == 0) {
- throw new RuntimeException("获取课程失败");
- }
- List<String> bookModelCodes = new ArrayList<>();
- for (JSONObject node : list) {
- bookModelCodes.add(node.getString("moduleCode"));
- }
- // 开始抓取单词
- getLearnInfo(spider, bookModelCodes, loginInfo.getUserCode());
- }
- /*public static void start(String jsessionId, String username, String password) throws IOException {
- //获取登录信息
- LoginInfo loginInfo = login(jsessionId, username, password);
- // 获取该用户订阅的所有课程
- repoPageProcessorService = new RepoPageProcessorService(loginInfo.getJsessionId(), loginInfo.getUserCode(),
- loginInfo.getTokenCode());
- Spider spider = Spider.create(repoPageProcessorService);
- visiteHomePage(spider, loginInfo.getUserCode());
- Page page = repoPageProcessorService.getPage();
- JSONObject jsonObject = JSON.parseObject(page.getJson().toString());
- List<JSONObject> list = JSON.parseArray(jsonObject.getJSONObject("data").getString("userbooks"),
- JSONObject.class);
- if (list.size() == 0) {
- throw new RuntimeException("获取课程失败");
- }
- List<String> bookModelCodes = new ArrayList<>();
- for (JSONObject node : list) {
- bookModelCodes.add(node.getString("moduleCode"));
- }
- // 开始抓取单词
- getLearnInfo(spider, bookModelCodes, loginInfo.getUserCode());
- }*/
- protected static LoginInfo login(String jsessionId, String username, String password) throws IOException {
- //查询是否存在配置文件,存在则读取配置文件里面的信息
- if(FileOpertionUtil.getLoginFile()) {
- Map<String, String> map = FileOpertionUtil.getConfig();
- LoginInfo loginInfo = new LoginInfo();
- loginInfo.setJsessionId(map.get("session"));
- loginInfo.setTokenCode(map.get("tokenCode"));
- loginInfo.setUserCode(map.get("userCode"));
- return loginInfo;
- }else {
- repoPageProcessorService = new RepoPageProcessorService(jsessionId);
- Spider spider = Spider.create(repoPageProcessorService);
- String url = POST_LOGIN_URL + "?userId=" + username + "&userPwd=" + password
- + "&siteName=xfinit&loginType=studentLogin";
-
- Request request = new Request(url);
- Map<String, Object> variales = new HashMap<>();
- request.setMethod(HttpConstant.Method.POST);
- request.setExtras(variales);
- spider.addRequest(request);
- spider.run();
- Page page = repoPageProcessorService.getPage();
- LoginInfo loginInfo = GetCode.getInfo(page);
- FileOpertionUtil.createLoginFile(jsessionId, loginInfo.getUserCode(), loginInfo.getTokenCode());
- return loginInfo;
- }
- }
- /**
- * 获取课程信息
- *
- * @param spider
- */
- protected static void visiteHomePage(Spider spider, String userCode) {
- Request request = new Request(
- "https://www.initwords.com/book/getStudentBooks.do?studentCode=" + userCode + "&isNeedStudyPos=true");
- Map<String, Object> variales = new HashMap<>();
- request.setMethod(HttpConstant.Method.POST);
- request.setExtras(variales);
- spider.addRequest(request);
- spider.run();
- }
- /**
- * 获取模块信息
- *
- * @param spider
- */
- protected static void getLearnInfo(Spider spider, List<String> codes, String userCode) {
- for (String string : codes) {
- Book book = getBookInfo(string, userCode, spider);
- if(book == null) {
- continue;
- }
- Integer totalNum = book.getTotalunitnbr();
- for (int i = book.getStartNum(); i <= totalNum + book.getStartNum(); i++) {
- List<Word> list = getWordList(book.getBookCode(), userCode, spider, i, book.getId());
- }
- }
- }
- /**
- * 获取书本信息
- *
- * @param codes
- * @param userCode
- * @param spider
- * @return
- */
- protected static Book getBookInfo(String codes, String userCode, Spider spider) {
- String url = "https://www.initwords.com/unitstudy/ajaxUnitStudy.do?unitNbr=1&moduleCode=" + codes + "&userCode="
- + userCode;
- Request request = new Request(url);
- request.setMethod(HttpConstant.Method.POST);
- spider.addRequest(request);
- spider.run();
- Page page = repoPageProcessorService.getPage();
- JSONObject jsonObject = JSON.parseObject(page.getJson().toString()).getJSONObject("data")
- .getJSONObject("bookInfo");
- if (jsonObject == null) {
- return null;
- }
-
- Book book = new Book();
- book.setBookCode(jsonObject.getString("moduleCode"));
- book.setBookName(jsonObject.getString("bookName"));
- book.setIntroduce(jsonObject.getString("introduce"));
- book.setTotalunitnbr(jsonObject.getInteger("totalUnitNbr"));
- book.setModelName(jsonObject.getString("moduleName"));
- book.setPublisher(jsonObject.getString("publisher"));
- book.setBookGroupName(jsonObject.getString("bookGroupName"));
- book.setGroupName(jsonObject.getString("groupName"));
- book.setStartNum(jsonObject.getInteger("startFrom"));
- List<Book> bookList = bookMapper.selectAll();
- /*System.out.println(book.getBookName());
- if(!bookList.isEmpty()) {
- for(Book bk:bookList) {
- System.out.println(bk.getBookName());
- if(!bk.getBookName().equals(book.getBookName())) {
- bookMapper.insert(book);
- DBTools.getSession().commit();
- return book;
- }
- }
- }*/
- // return null;
- // bookMapper.insert(book);
- List<String> courseNameList = new ArrayList<>();
- for(Book bk:bookList) {
- courseNameList.add(bk.getBookName());
- }
- if(!courseNameList.contains(book.getBookName())) {
- bookMapper.insert(book);
- DBTools.getSession().commit();
- return book;
- }
- return null;
- // bookMapper.insert(book);
- // DBTools.getSession().commit();
- // return book;
- }
- /**
- * 获取单词列表
- *
- * @param bookCode
- * @param userCode
- * @param spider
- * @param current
- * @return
- */
- protected static List<Word> getWordList(String bookCode, String userCode, Spider spider, Integer current, Integer bookId) {
- String url = "https://www.initwords.com/unitstudy/ajaxUnitStudy.do?unitNbr=" + current + "&moduleCode="
- + bookCode + "&userCode=" + userCode;
- Request request = new Request(url);
- request.setMethod(HttpConstant.Method.POST);
- spider.addRequest(request);
- spider.run();
- Page page = repoPageProcessorService.getPage();
- JSONObject jsonObject = JSON.parseObject(page.getJson().toString()).getJSONObject("data");
- List<Word> words = JSON.parseArray(jsonObject.getString("vocList"), Word.class);
- if(words == null) {
- return null;
- }
-
- for (Word word : words) {
- // word.setBookId(bookId);
- // List<Word> wordList = wordMapper.selectAll();
- // for(Word word2:wordList) {
- // if(word2.getBookId()!=bookId && !word2.getSpelling().equals(word.getSpelling())) {
- // wordMapper.insert(word);
- // }
- // }
- word.setBookId(bookId);
- wordMapper.insert(word);
-
- }
- DBTools.getSession().commit();
- return words;
- }
- }