Android 使用Jsoup解析html+下载图片

来源:互联网 发布:薛凯琪房祖名 知乎 编辑:程序博客网 时间:2024/06/05 18:30

       最近想鼓捣一下CSDN客户端,这篇博客主要介绍如何使用Jsoup解析html页面通过标签获取所需内容,并下载指定图片资源。

一、导入Jsoup JAR包

       JAR包下载地址:jsoup 1.6.1

注意导入包到项目时,直接将解压后的jar文件全部复制到libs文件目录下即可,否则运行时会报错。




二、下载html页面并解析

代码:

package com.example.testcsdn;import java.io.ByteArrayOutputStream;import java.io.IOException;import java.io.InputStream;import java.net.HttpURLConnection;import java.net.URL;import java.util.ArrayList;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import android.util.Log;/** * 通过给定链接地址,解析获取的html资源,返回封装好的ArrayList<Blog>对象 */public class BlogsFetchr {private static final String TAG = "BlogsFetchr";/** * 下载URL指定的资源 *  * @return 返回为类型byte[] * */public byte[] getUrlBytes(String urlSpec) throws IOException {URL url = new URL(urlSpec);HttpURLConnection conn = (HttpURLConnection) url.openConnection();// 这里强制转换,是因为下面要用到HttpURLConnection.getInputStreamtry {ByteArrayOutputStream out = new ByteArrayOutputStream();InputStream in = conn.getInputStream();if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) {// 连接不成功Log.i(TAG, "连接不成功");return null;}byte[] buffer = new byte[1024];int len = 0;while ((len = in.read(buffer)) > 0) {out.write(buffer, 0, len);}out.close();return out.toByteArray();} finally {conn.disconnect();}}/** * 下载URL指定的资源(即将getUrlBytes方法的返回值byte[]转换成String类型) *  * @return 返回类型为String */private String getUrl(String urlSpec) {String result = null;try {result = new String(getUrlBytes(urlSpec));} catch (IOException e) {e.printStackTrace();}return result;}public ArrayList<Blog> downloadBlogItems(String urlSpec) {ArrayList<Blog> blogs = new ArrayList<>();String htmlString = getUrl(urlSpec);// 解析htmlStringparserItems(blogs, htmlString);return blogs;}private void parserItems(ArrayList<Blog> blogs, String htmlString) {Document doc = Jsoup.parse(htmlString);Elements units = doc.getElementsByClass("blog_list");for (int i = 0; i < units.size(); i++) {Blog blog = new Blog();Element unit_ele = units.get(i);Element dl_ele = unit_ele.getElementsByTag("dl").get(0);Element dl_dt_ele = dl_ele.getElementsByTag("dt").get(0);Element dt_a_ele = dl_dt_ele.child(0);String iconUrl = dt_a_ele.child(0).attr("src"); // 博主头像链接Log.i(TAG, "文章" + i + "的博主头像链接:" + iconUrl);Elements fls = unit_ele.getElementsByClass("fl");Element fl_ele = fls.get(0);Element fl_a1_ele = fl_ele.child(0);String bloggerId = fl_a1_ele.text(); // 博主IdLog.i(TAG, "文章" + i + "的作者:" + bloggerId);blog.setBloggerIconUrl(iconUrl);blog.setBloggerId(bloggerId);blogs.add(blog);}}}

如代码所示,使用Jsoup解析html十分简单。

可以使用浏览器,右键审查元素,得到下图所示的工具框,可以很快的找到页面中元素所对应的标签,再使用Jsoup API获取标签的值。




三、下载指定图片

       如果想要下载博客列表中子项,博主的头像。可以先通过解析html获取图片的url,然后再使用HttpURLConnection直接下载。

       下面创建一个ThumbnailDownloader<Token>类,继承HandlerThread,用于等待并处理图片下载请求,同时更新UI:

 package com.example.testcsdn;import java.io.IOException;import java.util.Collections;import java.util.HashMap;import java.util.Map;import android.graphics.Bitmap;import android.graphics.BitmapFactory;import android.os.Handler;import android.os.HandlerThread;import android.os.Message;import android.support.v4.util.LruCache;import android.widget.ImageView;public class ThumbnailDownloader<Token> extends HandlerThread {// Token表示泛型,"类名<泛型>"以保证在类内可以使用Token,就像Token已经是定义好的类一样private static final String TAG = "ThumbnailDownloader";private static final int MESSAGE_DOWNLOAD = 0;private Handler mHandler; // 发送下载图片的指令,和处理下载图片的指令的使者private Handler mResponseHandler; // 来自主线程的Handler,更新UIprivate Listener<Token> mListener;private Map<Token, String> requestMap = Collections.synchronizedMap(new HashMap<Token, String>());// 保存ImageView和URL的键值对,并是线程安全的private LruCache<String, Bitmap> mMemoryCache;// 缓存图片的类,当存储图片的大小大于LruCache设定的值,系统自动释放内存public ThumbnailDownloader(Handler handler) {super(TAG);mResponseHandler = handler;// 创建一个名为TAG的HandlerThread,是拥有自己Looper的独立线程// super(TAG) 相当于new HandlerThread(TAG)int maxMemory = (int) Runtime.getRuntime().maxMemory(); // 系统最大运行内存int mCacheSize = maxMemory / 8; // 分配给缓存的内存大小mMemoryCache = new LruCache<String, Bitmap>(mCacheSize) {// 必须重写此方法,来测量Bitmap的大小@Overrideprotected int sizeOf(String key, Bitmap value) {return value.getRowBytes() * value.getHeight();}};}public interface Listener<Token> { // 回调方法,在主线程中实现void onThumbnailDownloaded(Token token, Bitmap thumbnail);}public void setListener(Listener<Token> listener) {mListener = listener;}@Overridepublic void onLooperPrepared() {// 在此线程的Looper启动循环准备时段运行的方法mHandler = new Handler() { // 在当前线程新建的Handler,只会在当前线程运行@Overridepublic void handleMessage(Message message) {// 处理发送过来的图片下载消息,下载图片并更新UIif (message.what == MESSAGE_DOWNLOAD) {Token token = (Token) message.obj;try {handleRequest(token);// 处理消息} catch (IOException e) {e.printStackTrace();}}}};}private void handleRequest(final Token token) throws IOException {final String url = requestMap.get(token);// 解决图片加载错位问题String key = (String) ((ImageView) token).getTag();if (url == null)return;// -----------------------------------byte[] bitmapBytes = new BlogsFetchr().getUrlBytes(url);// 下载图片final Bitmap bitmap = BitmapFactory.decodeByteArray(bitmapBytes, 0,bitmapBytes.length);mMemoryCache.put(key, bitmap); // 存入缓存mResponseHandler.post(new Runnable() {@Overridepublic void run() {// 更新UIif (requestMap.get(token) != url)return;requestMap.remove(token);mListener.onThumbnailDownloaded(token, bitmap);// 更新UI}});}public void clearQueue() {mHandler.removeMessages(MESSAGE_DOWNLOAD);requestMap.clear();}public void queueThumbnail(Token token, String url) {// 将下载图片命令加入"ThumbnailDownloader"消息队列,// 在PhotoGalleryFragment中被调用requestMap.put(token, url);Message message = mHandler.obtainMessage(MESSAGE_DOWNLOAD, token);// 获取Message,并且自动与mHandler绑定在一起// 参数一: what,int型,用于描述消息// 参数二: obj,随消息发送的指定对象// 参数三: target,处理消息的Handler,这里由于使用自动和mHandler绑定,故缺省message.sendToTarget(); // 发送消息给目标Handler}public Bitmap getCacheImage(String key) {// 获取缓存中的图片Bitmap bitmap = mMemoryCache.get(key);return bitmap;}}




MainActivity:

package com.example.testcsdn;import java.util.ArrayList;import android.app.Activity;import android.graphics.Bitmap;import android.os.AsyncTask;import android.os.Bundle;import android.os.Handler;import android.util.Log;import android.view.View;import android.view.ViewGroup;import android.widget.ArrayAdapter;import android.widget.ImageView;import android.widget.ListView;import android.widget.TextView;public class MainActivity extends Activity {private static final String TAG = "MainActivity";private ListView mListView;private ArrayList<Blog> mBlogs; // 博客列表private String testUrl = "http://blog.csdn.net/column.html"; // 访问的链接,这里测试的CSDN博客专栏的首页private BlogsFetchr fetchr; // 下载html页面和解析它的工具对象private MyAdapter adapter;private ThumbnailDownloader<ImageView> mThumbnailDownloader; // 图片下载器@Overrideprotected void onCreate(Bundle savedInstanceState) {super.onCreate(savedInstanceState);setContentView(R.layout.activity_main);fetchr = new BlogsFetchr();mBlogs = new ArrayList<Blog>();Log.i(TAG, "mBlogs.size:" + mBlogs.size());Blog blog = new Blog();blog.setBloggerId("hello");mBlogs.add(blog);update(testUrl);// 开启响应下载图片消息的线程mThumbnailDownloader = new ThumbnailDownloader<ImageView>(new Handler());mThumbnailDownloader.setListener(new ThumbnailDownloader.Listener<ImageView>() {@Overridepublic void onThumbnailDownloaded(ImageView imageView,Bitmap thumbnail) {// 更新UI,上图imageView.setImageBitmap(thumbnail);}});mThumbnailDownloader.start();mThumbnailDownloader.getLooper(); // 必须要在start之后}private void update(final String testUrl) {new AsyncTask<Void, Void, Void>() {@Overrideprotected Void doInBackground(Void... params) {mBlogs = fetchr.downloadBlogItems(testUrl); // 下载博客列表return null;};@Overrideprotected void onPostExecute(Void result) {// 更新ListViewmListView = (ListView) findViewById(R.id.listview_blogcolumn);adapter = new MyAdapter(mBlogs);mListView.setAdapter(adapter);}}.execute();}private class MyAdapter extends ArrayAdapter<Blog> {public MyAdapter(ArrayList<Blog> blogs) {super(MainActivity.this, 0, blogs);}@Overridepublic View getView(int position, View convertView, ViewGroup parent) {if (convertView == null) {convertView = getLayoutInflater().inflate(R.layout.listview_item, null);}ImageView imageView = (ImageView) convertView.findViewById(R.id.imageView);TextView textView = (TextView) convertView.findViewById(R.id.textView);textView.setText(getItem(position).getBloggerId());String imageUrl = getItem(position).getBloggerIconUrl();String imageTag = imageUrl.replaceAll("[^\\w]", "");imageView.setTag(imageTag);// 去掉字符串中非(字母、数字、下划线)// 给imageView设置一个标签,用于存取于CacheBitmap bitmap = null;if ((bitmap = mThumbnailDownloader.getCacheImage(imageTag)) != null) {// 如果在缓存中存在imageView.setImageBitmap(bitmap);} else {// 发送下载图片消息mThumbnailDownloader.queueThumbnail(imageView, imageUrl);}return convertView;}}}

运行效果:



源码下载


2 0
原创粉丝点击