利用java代码自动下载嗨学网的学习视频

来源:互联网 发布:网络销售渠道有哪2种 编辑:程序博客网 时间:2024/06/17 18:34

需求:

将嗨学网上所有需要学习的全部视频下载下来并保存为合适的名字。


由于对前端知识不太了解,完成过程比较困难。

关键点分析:

1.拿到下载视频的链接

第一想法是分析网页源码得到组装链接的逻辑,然后用java代码组装,但是网页源代码太复杂,而且对前端实在是不熟,最终采用了一个投机取巧的方法:在点击下载的同时用截屏捕捉下载地址。


其中有两个变量itemId和goodsId。

再结合源代码进行查找分析得知itemId就是goodsCatalogVideoId,goodsId一直是41889不变。

这样一来就得到了下载视频的地址。

2.通过登陆验证

之前一直认为登陆之后需要保存一些cookie相关的东西,分析了一下网页返回的cookie,比较复杂不知如何下手。一番搜索,最终得知,只需要登陆下载保持用同一个会话就可以了,cookie之类的东西都不用自己操心。(详情见代码)

3.正则匹配,得到课程名字以及goodsCatalogVideoId。

这个过程不是很复杂,详情见代码。


源代码贴上,方便自己将来查看,也许对他人也会有些许帮助。

package Spider;import java.io.BufferedOutputStream;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.LinkedHashMap;import java.util.List;import java.util.Map;import java.util.Map.Entry;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.http.HeaderIterator;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.NameValuePair;import org.apache.http.ParseException;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpGet;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.http.message.BasicNameValuePair;import org.apache.http.util.EntityUtils;public class DownloadVideo {static String loginUrl = "http://haixue.com/doLogin.do";// String downloadUrl =// "http://haixue.com/goods/downloadUrl.do?itemId=161789577&type=Video&isCatalog=No&goodsId=41889";static String path = "E:/videos/";static CloseableHttpClient client = HttpClients.createDefault();// key:goodsCatalogVideoId value:0.name  1.goodsId//后面发现goodsId根本不会变,懒得改数据结构了。static Map<String, List<String>> coureseInfoMap = new LinkedHashMap<String, List<String>>();public static void main(String[] args) {try {login();String page = getCoursePage();//String page = read("C:/Users/copbint/Desktop/test.html");//System.out.println(page);getIdAndName(page);//getAnotherId();download();} catch (Exception e) {e.printStackTrace();}}public static String getCoursePage() throws ParseException, IOException{HttpResponse httpResponse = null;try{HttpGet httpGet = new HttpGet("http://haixue.com/course/video/watchVideo.do?goodsCatalogVideoId=161789633&goodsId=0"); httpResponse = client.execute(httpGet);} catch(Exception e){e.printStackTrace();}return EntityUtils.toString(httpResponse.getEntity());}public static void download(){String url = null;for(String goodsCatalogVideoId : coureseInfoMap.keySet()){url = "http://haixue.com/goods/downloadUrl.do?itemId="+ goodsCatalogVideoId + "&type=Video&isCatalog=No&goodsId=41889";String fileName = coureseInfoMap.get(goodsCatalogVideoId).get(0) + ".flv";downloadVideo(url,fileName);}}public static void getAnotherId(){try{for(String goodsCatalogVideoId : coureseInfoMap.keySet()){HttpGet httpGet = new HttpGet("http://haixue.com/course/video/watchVideo.do?goodsCatalogVideoId="+goodsCatalogVideoId+"&goodsId=0");HttpResponse httpResponse = client.execute(httpGet);String page = EntityUtils.toString(httpResponse.getEntity());Pattern pattern = Pattern.compile("<input type=\"hidden\" id=\"goodsId\" value=\"(.*?)\"/>");Matcher m = pattern.matcher(page);List<String> list = coureseInfoMap.get(goodsCatalogVideoId);if(m.find()){list.add(m.group(1));coureseInfoMap.put(goodsCatalogVideoId, list);}else{System.out.println("没有找到goodsId:" + list.get(0));coureseInfoMap.remove(goodsCatalogVideoId);}}} catch(Exception e){e.printStackTrace();}}public static void getIdAndName(String in) {//.*?中非贪心匹配 Pattern pattern = Pattern.compile("<div class=\"tit\">(.*?)</div>.*?" + "<div class=\"con-bottom hideinfo\">.*?" + "<input type=\"hidden\" value=\".*?\"/>.*?" + "<input type=\"hidden\" value=\"(.*?)\"/>.*?" + "<span>时长 </span>.*?" + "<span>已观看 </span>.*?" + "</div>" ,Pattern.DOTALL);//Pattern pattern = Pattern.compile("<div class=\"tit\">(.*?)</div>\n",Pattern.DOTALL); java.util.regex.Matcher m = pattern.matcher(in); while(m.find()){ String name = m.group(1).trim(); String goodsCatalogVideoId = m.group(2); List<String> list = new ArrayList<String>(); list.add(name);  System.out.println("课程名称:"+m.group(1).trim()+"\ngoodsCatalogVideoId:" + m.group(2)); coureseInfoMap.put(goodsCatalogVideoId,list); }}public static void login() throws Exception {HttpPost httpPost = new HttpPost(loginUrl);Map<String, String> parameterMap = new HashMap<String, String>();parameterMap.put("j_username", "******");parameterMap.put("j_password", "****");parameterMap.put("_spring_security_remember_me", "no");UrlEncodedFormEntity postEntity = new UrlEncodedFormEntity(getParam(parameterMap), "UTF-8");httpPost.setEntity(postEntity);System.out.println("request line:" + httpPost.getRequestLine());try {HttpResponse httpResponse = client.execute(httpPost);printResponse(httpResponse);} catch (IOException e) {e.printStackTrace();}}public static void downloadVideo(String downloadUrl, String fileName) {try {System.out.println("strart download video:" + fileName);HttpGet httpGet = new HttpGet(downloadUrl);HttpResponse httpResponse1 = client.execute(httpGet);InputStream in = httpResponse1.getEntity().getContent();byte[] buffer = new byte[1024 * 1024];int n = -1;// byte[] result =// EntityUtils.toByteArray(httpResponse1.getEntity());BufferedOutputStream bw = null;File f = new File(path + fileName);if (!f.getParentFile().exists())f.getParentFile().mkdirs();bw = new BufferedOutputStream(new FileOutputStream(f));while ((n = in.read(buffer)) != -1) {bw.write(buffer, 0, n);}bw.close();System.out.println("finished!");} catch (Exception e) {e.printStackTrace();}}public static void printResponse(HttpResponse httpResponse) throws ParseException, IOException {// 获取响应消息实体HttpEntity entity = httpResponse.getEntity();// 响应状态System.out.println("status:" + httpResponse.getStatusLine());System.out.println("headers:");HeaderIterator iterator = httpResponse.headerIterator();while (iterator.hasNext()) {System.out.println("\t" + iterator.next());}// 判断响应实体是否为空if (entity != null) {String responseString = EntityUtils.toString(entity);System.out.println("response length:" + responseString.length());System.out.println("response content:" + responseString.replace("\r\n", ""));}}public static List<NameValuePair> getParam(Map parameterMap) {List<NameValuePair> param = new ArrayList<NameValuePair>();Iterator it = parameterMap.entrySet().iterator();while (it.hasNext()) {Entry parmEntry = (Entry) it.next();param.add(new BasicNameValuePair((String) parmEntry.getKey(), (String) parmEntry.getValue()));}return param;}public static  String read(String filename) throws IOException {// Reading input by lines:BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename),"UTF-8"));String s;StringBuilder sb = new StringBuilder();while ((s = in.readLine()) != null)sb.append(s + "\n");in.close();return sb.toString();}}


结果展示: