编码系列--Base32编码

来源:互联网 发布:外国网友评论淘宝 编辑:程序博客网 时间:2024/05/09 03:30
0、写在前面
这里讨论的编码主要的目的是将不可显示的二进制数组转变为可显示的字符串,包括其逆运算。通过特定的协议传输数据,或者加密解密的时候都会用到类似的方法。
在这类运算中用的比较多的是Base64,比如MIME中,DotNET中更是直接提供了Base64 Encode和Decode的方法,相当方便。但是Base64通常由“a-z”、“A-Z”、0-9以及“+”和“=”这些符号组成,当中包含了很多混淆的字符,例如“1”、“I”和“l”,“0”和“O”或者“2”和“Z”,看起来总是不爽。特别是当作为序列号编码时,是不应该包含容易混淆的字母,所以有另一种编码形式叫做Base24,用过MS产品的兄弟们一定非常熟悉。但是Base24在实现上还要多绕一个弯,先放一放,我们先说Base32,能够基本满足要求的,又非常直观的编码方式。
 
1、编码原理
Base32的原理和Base64一模一样,所以先看一下Base64编码是怎么一回事。
Base64顾名思义就是用64个可显示字符表示所有的ASC字符,64也就是6Bits,而ASC字符一共有256个,也就是8Bits,很简单了,取一下最小公约数,24位,言下之意就是用4个Base64的字符来表示3个ASC字符。即在编码时,3个一组ASC字符,产生4个Base64字符,解码时4个一组,还原3个ASC字符。根据这个原理Base64编码之后的字符串应该比原先增加1/3的长度。
这里所谓的编码就是一次取6Bits,换算出来的值作为索引号,利用这个索引数,到预先定义的长度为64的字符数组中取相应的字符替换即可;解码就是逆运算,根据字符取在预定义数组中的索引值,然后按8Bits一组还原ASC字符。
Base32和Base64相比只有一个区别就是,用32个字符表示256个ASC字符,也就是说5个ASC字符一组可以生成8个Base字符,反之亦然。
 
2、源代码
对于实现这样的功能的代码,毫无难度,如果不考虑效率的,只要细心点,肯定不会错。有人写一次就够了,所以拿出来和大家一起分享。
下面这个类还额外提供了一个功能就是可以自定义编码的字符串,注意Base32Map这个属性。
 
/// *******************************************************************************
/// Name: CBase32.cs
/// Module: CBase32
/// Author: Mittermeyer
/// Comment:
///  
/// History:
///  2004-08-31 Mittermeyer Create
///  
///  Copyright 1995-2004 by Mittermeyer.All rights reserved.
/// *******************************************************************************
 
using System;
using System.Text;
 
namespace Mittermeyer
{
 /// <summary>
 /// Summary description for CBase32.
 /// </summary>
 public class CBase32
 {
  private const String DefaultBase32Map = "ABCDEFGHIJKLMNPQRSTUVWXYZ3456789";
  private const Int32  Base32MapLength = 32;
  
  private static Char[] m_acBase32Map = null;
 
  static CBase32()
  {
   m_acBase32Map = DefaultBase32Map.ToCharArray();
  }
  public CBase32()
  {
  }
 
  public static String Base32Map
  {
   get
   {
    return m_acBase32Map.ToString();
   }
   set
   {
    if (value != null && value.Length >= Base32MapLength)
    {
     m_acBase32Map = value.ToCharArray();
    }
    else
    {
     m_acBase32Map = DefaultBase32Map.ToCharArray();
    }
   }
  }
 
  public static Char GetCharacter(Int32 dwIndex)
  {
   Char cMappingData = '/0';
 
   if (m_acBase32Map != null && dwIndex >= 0 && dwIndex < m_acBase32Map.Length)
   {
    cMappingData = m_acBase32Map[dwIndex];
   }
   return cMappingData;
  }
 

  public static Int32 GetCharIndex(Char cData)
  {
   Int32 dwIndex = -1,dwLoop = 0;
 
   if (m_acBase32Map != null)
   {
    for (dwLoop = 0;dwLoop < m_acBase32Map.Length;dwLoop++)
    {
     if (m_acBase32Map[dwLoop] == cData)
     {
      dwIndex = dwLoop;
      break;
     }
    }
   }
   return dwIndex;
  }
 
  public static String Encode(Byte[] abData)
  {
   Int32 dwLoop = 0,dwCharIndex = 0,dwCharCount;
   Char[] acPart = null;
   StringBuilder sbOutput = null;
 
   if (abData == null || m_acBase32Map == null || m_acBase32Map.Length < Base32MapLength)
    return null;
 
   try
   {
    dwCharCount = (Int32) (abData.Length / 5f * 8f) + 1;
    sbOutput = new StringBuilder(dwCharCount);
    acPart = new Char[8];
   }
   catch (Exception)
   {
   }
   if (acPart == null || sbOutput == null)
    return null;
 
   dwCharCount = 0;
   for(dwLoop = 0;dwLoop < abData.Length;dwLoop += 5)
   {
    // every 5 bytes is a unit,can convert to 8 chars
    // data format:
    //   AAAAABBB BBCCCCCD DDDDEEEE EFFFFFGG GGGHHHHH
    switch (abData.Length - dwLoop)
    {
     case 1:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];  
      dwCharIndex = (abData[dwLoop] & 0x7) << 2;  // BBB00
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharCount = 2;
      break;
 
     case 2:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = abData[dwLoop + 1] & 0x1;   // D0000
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharCount = 4;
      break;
 
     case 3:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 1] & 0x1) << 4) + (abData[dwLoop + 2] >> 4);// DDDDD
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 2] & 0xF) << 1; // EEEE0
      acPart[4] = m_acBase32Map[dwCharIndex];
      dwCharCount = 5;
      break;
 
     case 4:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 1] & 0x1) << 4) + (abData[dwLoop + 2] >> 4);// DDDDD
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 2] & 0xF) << 1) + (abData[dwLoop + 3] >> 7);// EEEEE
      acPart[4] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 3] & 0x7F) >> 2; // FFFFF
      acPart[5] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 3] & 0x3) << 3; // GG000
      acPart[6] = m_acBase32Map[dwCharIndex];
      dwCharCount = 7;
      break;
 
     default:  // >= 5
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 1] & 0x1) << 4) + (abData[dwLoop + 2] >> 4);// DDDDD
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 2] & 0xF) << 1) + (abData[dwLoop + 3] >> 7);// EEEEE
      acPart[4] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 3] & 0x7F) >> 2; // FFFFF
      acPart[5] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 3] & 0x3) << 3) + (abData[dwLoop + 4] >> 5);// GGGGG
      acPart[6] = m_acBase32Map[dwCharIndex];
      dwCharIndex = abData[dwLoop + 4] & 0x1F;  // HHHHH
      acPart[7] = m_acBase32Map[dwCharIndex];
      dwCharCount = 8;
      break;
    }
    
    sbOutput.Append(acPart,0,dwCharCount);
   }
 
   return sbOutput.ToString();
  }
 
  public static Byte[] Decode(String sData)
  {
   Int32 dwLoop = 0,dwLength = 0;
   Int32[] dwCharIndex = null;
   Byte[] abOutput = null;
   Char[] acInput = null;
 
   if (sData == null || sData == String.Empty)
    return null;
 
   acInput = sData.ToCharArray();
   if (acInput == null)
    return null;
 
   try
   {
    dwLength = (acInput.Length / 8 * 5) + 1;
    abOutput = new Byte[dwLength];
    dwCharIndex = new Int32[8];
   }
   catch (Exception)
   {
   }
   if (acInput == null)
    return null;
 
   dwLength = 0;
   for (dwLoop = 0;dwLoop < acInput.Length;dwLoop += 8)
   {
    switch (acInput.Length - dwLoop)
    {
     case 1:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3);
      break;
 
     case 2:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6);
      break;
 
     case 3:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1);
      break;
 
     case 4:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4);
      break;
 
     case 5:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7);
      break;
 
     case 6:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
      dwCharIndex[5] = GetCharIndex(acInput[dwLoop + 5]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7 + dwCharIndex[5] << 2);
      break;
      
     case 7:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
      dwCharIndex[5] = GetCharIndex(acInput[dwLoop + 5]);
      dwCharIndex[6] = GetCharIndex(acInput[dwLoop + 6]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7 + dwCharIndex[5] << 2 + dwCharIndex[6] >> 3);
      abOutput[dwLength + 4] = (Byte) ((dwCharIndex[6] & 0x7) << 5);
      break;
 
     default:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
      dwCharIndex[5] = GetCharIndex(acInput[dwLoop + 5]);
      dwCharIndex[6] = GetCharIndex(acInput[dwLoop + 6]);
      dwCharIndex[7] = GetCharIndex(acInput[dwLoop + 7]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7 + dwCharIndex[5] << 2 + dwCharIndex[6] >> 3);
      abOutput[dwLength + 4] = (Byte) ((dwCharIndex[6] & 0x7) << 5 + dwCharIndex[8]);
      break;
    }
    dwLength += 5;
   }
 
   return abOutput;
  }
 }
}
 
3、后记
其实笔者真正想些的是Base24,至于Base24的代码是什么,卖个关子,过几天再说。当然可能很多老兄们已经知道是怎么回事了......

 
原创粉丝点击