Base64学习备忘录

来源:互联网 发布:自制数据库 编辑:程序博客网 时间:2024/06/18 14:21

前言

最近偶尔了解了一下图片识别,发现有些接口接受的图片请求使用的是该图片处理后的Base64编码字符串。于是对Base64进行了一下了解。这里贴出来一些新遇到的只是,作为备忘。

Base64的用途

讲过Base64编码处理后的文本数据,图片等可以在邮件正文、网页等直接显示。验证一下,使用的图片:
来自百度的图片
经过Base64编码工具,对图片进行编码:
在Html中使用的方式为

<img src="data:image/png;base64,图片的Base64编码" />

Base64编码解码工具:
http://www.atool.org/img2base64.php
编写html:
这里写图片描述
显示效果:
这里写图片描述

为什么要使用Base64?

大家自己去看,我感觉没什么意思:http://www.cnblogs.com/wellsoho/archive/2009/12/09/1619924.html

实现原理

Base64编码表
对二进制数据进行处理,每3个字节一组,一共是3x8=24bit,划为4组,每组正好6个bit:


这样我们得到4个数字作为索引,然后查表,获得相应的4个字符,就是编码后的字符串。

所以,Base64编码会把3字节的二进制数据编码为4字节的文本数据,长度增加33%,好处是编码后的文本数据可以在邮件正文、网页等直接显示。

如果要编码的二进制数据不是3的倍数,最后会剩下1个或2个字节怎么办?Base64用\x00字节在末尾补足后,再在编码的末尾加上1个或2个=号,表示补了多少字节,解码的时候,会自动去掉。

附:Base64编码解码的一种实现方式Java版

/* * Copyright (C) 2007 Esmertec AG. * Copyright (C) 2007 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *      http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package com.google.android.mms.pdu;public class Base64 {    /**     * Used to get the number of Quadruples.     */    static final int FOURBYTE = 4;    /**     * Byte used to pad output.     */    static final byte PAD = (byte) '=';    /**     * The base length.     */    static final int BASELENGTH = 255;    // Create arrays to hold the base64 characters    private static byte[] base64Alphabet = new byte[BASELENGTH];    // Populating the character arrays    static {        for (int i = 0; i < BASELENGTH; i++) {            base64Alphabet[i] = (byte) -1;        }        for (int i = 'Z'; i >= 'A'; i--) {            base64Alphabet[i] = (byte) (i - 'A');        }        for (int i = 'z'; i >= 'a'; i--) {            base64Alphabet[i] = (byte) (i - 'a' + 26);        }        for (int i = '9'; i >= '0'; i--) {            base64Alphabet[i] = (byte) (i - '0' + 52);        }        base64Alphabet['+'] = 62;        base64Alphabet['/'] = 63;    }    /**     * Decodes Base64 data into octects     *     * @param base64Data Byte array containing Base64 data     * @return Array containing decoded data.     */    public static byte[] decodeBase64(byte[] base64Data) {        // RFC 2045 requires that we discard ALL non-Base64 characters        base64Data = discardNonBase64(base64Data);        // handle the edge case, so we don't have to worry about it later        if (base64Data.length == 0) {            return new byte[0];        }        int numberQuadruple = base64Data.length / FOURBYTE;        byte decodedData[] = null;        byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;        // Throw away anything not in base64Data        int encodedIndex = 0;        int dataIndex = 0;        {            // this sizes the output array properly - rlw            int lastData = base64Data.length;            // ignore the '=' padding            while (base64Data[lastData - 1] == PAD) {                if (--lastData == 0) {                    return new byte[0];                }            }            decodedData = new byte[lastData - numberQuadruple];        }        for (int i = 0; i < numberQuadruple; i++) {            dataIndex = i * 4;            marker0 = base64Data[dataIndex + 2];            marker1 = base64Data[dataIndex + 3];            b1 = base64Alphabet[base64Data[dataIndex]];            b2 = base64Alphabet[base64Data[dataIndex + 1]];            if (marker0 != PAD && marker1 != PAD) {                //No PAD e.g 3cQl                b3 = base64Alphabet[marker0];                b4 = base64Alphabet[marker1];                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);                decodedData[encodedIndex + 1] =                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));                decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);            } else if (marker0 == PAD) {                //Two PAD e.g. 3c[Pad][Pad]                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);            } else if (marker1 == PAD) {                //One PAD e.g. 3cQ[Pad]                b3 = base64Alphabet[marker0];                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);                decodedData[encodedIndex + 1] =                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));            }            encodedIndex += 3;        }        return decodedData;    }    /**     * Check octect wheter it is a base64 encoding.     *     * @param octect to be checked byte     * @return ture if it is base64 encoding, false otherwise.     */    private static boolean isBase64(byte octect) {        if (octect == PAD) {            return true;        } else if (base64Alphabet[octect] == -1) {            return false;        } else {            return true;        }    }    /**     * Discards any characters outside of the base64 alphabet, per     * the requirements on page 25 of RFC 2045 - "Any characters     * outside of the base64 alphabet are to be ignored in base64     * encoded data."     *     * @param data The base-64 encoded data to groom     * @return The data, less non-base64 characters (see RFC 2045).     */    static byte[] discardNonBase64(byte[] data) {        byte groomedData[] = new byte[data.length];        int bytesCopied = 0;        for (int i = 0; i < data.length; i++) {            if (isBase64(data[i])) {                groomedData[bytesCopied++] = data[i];            }        }        byte packedData[] = new byte[bytesCopied];        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);        return packedData;    }}
1 0