Android逆向之旅—解析编译之后的AndroidManifest文件格式

来源：互联网发布：简述云计算的发展历程编辑：程序博客网时间：2024/06/04 23:09

转载自：
http://www.wjdiankong.cn/android%E9%80%86%E5%90%91%E4%B9%8B%E6%97%85-%E8%A7%A3%E6%9E%90%E7%BC%96%E8%AF%91%E4%B9%8B%E5%90%8E%E7%9A%84androidmanifest%E6%96%87%E4%BB%B6%E6%A0%BC%E5%BC%8F/

格式解析

第一、头部信息

1、文件魔数：四个字节
2、文件大小：四个字节

第二、String Chunk内容

1、ChunkType：StringChunk的类型，固定四个字节：0x001C0001
2、ChunkSize：StringChunk的大小，四个字节
3、StringCount：StringChunk中字符串的个数，四个字节
4、StyleCount：StringChunk中样式的个数，四个字节，但是在实际解析过程中，这个值一直是0x00000000
5、Unknown：位置区域，四个字节，在解析的过程中，这里需要略过四个字节
6、StringPoolOffset：字符串池的偏移值，四个字节，这个偏移值是相对于StringChunk的头部位置
7、StylePoolOffset：样式池的偏移值，四个字节，这里没有Style,所以这个字段可忽略
8、StringOffsets：每个字符串的偏移值，所以他的大小应该是：StringCount*4个字节
9、SytleOffsets：每个样式的偏移值，所以他的大小应该是SytleCount*4个字节

1、首先我们需要把AndroidManifest.xml文件读入到一个byte数组中：

byte[] byteSrc = null;FileInputStream fis = null;ByteArrayOutputStream bos = null;try{    fis = new FileInputStream("xmltest/AndroidManifest1.xml");    bos = new ByteArrayOutputStream();    byte[] buffer = new byte[1024];    int len = 0;    while((len=fis.read(buffer)) != -1){        bos.write(buffer, 0, len);    }    byteSrc = bos.toByteArray();}catch(Exception e){    System.out.println("parse xml error:"+e.toString());}finally{    try{        fis.close();        bos.close();    }catch(Exception e){    }}

2、下面我们就来看看解析头部信息：

/** * 解析xml的头部信息 * @param byteSrc */public static void parseXmlHeader(byte[] byteSrc){    byte[] xmlMagic = Utils.copyByte(byteSrc, 0, 4);    System.out.println("magic number:"+Utils.bytesToHexString(xmlMagic));    byte[] xmlSize = Utils.copyByte(byteSrc, 4, 4);    System.out.println("xml size:"+Utils.bytesToHexString(xmlSize));    xmlSb.append("<?xml version=\"1.0\" encoding=\"utf-8\"?>");    xmlSb.append("\n");}

3、解析StringChunk信息

/** * 解析StringChunk * @param byteSrc */public static void parseStringChunk(byte[] byteSrc){    //String Chunk的标示    byte[] chunkTagByte = Utils.copyByte(byteSrc, stringChunkOffset, 4);    System.out.println("string chunktag:"+Utils.bytesToHexString(chunkTagByte));    //String Size    byte[] chunkSizeByte = Utils.copyByte(byteSrc, 12, 4);    //System.out.println(Utils.bytesToHexString(chunkSizeByte));    int chunkSize = Utils.byte2int(chunkSizeByte);    System.out.println("chunk size:"+chunkSize);    //String Count    byte[] chunkStringCountByte = Utils.copyByte(byteSrc, 16, 4);    int chunkStringCount = Utils.byte2int(chunkStringCountByte);    System.out.println("count:"+chunkStringCount);    stringContentList = new ArrayList<String>(chunkStringCount);    //这里需要注意的是，后面的四个字节是Style的内容，然后紧接着的四个字节始终是0，所以我们需要直接过滤这8个字节    //String Offset 相对于String Chunk的起始位置0x00000008    byte[] chunkStringOffsetByte = Utils.copyByte(byteSrc, 28, 4);    int stringContentStart = 8 + Utils.byte2int(chunkStringOffsetByte);    System.out.println("start:"+stringContentStart);    //String Content    byte[] chunkStringContentByte = Utils.copyByte(byteSrc, stringContentStart, chunkSize);    /**     * 在解析字符串的时候有个问题，就是编码：UTF-8和UTF-16,如果是UTF-8的话是以00结尾的，如果是UTF-16的话以00 00结尾的     */    /**     * 此处代码是用来解析AndroidManifest.xml文件的     */    //这里的格式是：偏移值开始的两个字节是字符串的长度，接着是字符串的内容，后面跟着两个字符串的结束符00    byte[] firstStringSizeByte = Utils.copyByte(chunkStringContentByte, 0, 2);    //一个字符对应两个字节    int firstStringSize = Utils.byte2Short(firstStringSizeByte)*2;    System.out.println("size:"+firstStringSize);    byte[] firstStringContentByte = Utils.copyByte(chunkStringContentByte, 2, firstStringSize+2);    String firstStringContent = new String(firstStringContentByte);    stringContentList.add(Utils.filterStringNull(firstStringContent));    System.out.println("first string:"+Utils.filterStringNull(firstStringContent));    //将字符串都放到ArrayList中    int endStringIndex = 2+firstStringSize+2;    while(stringContentList.size() < chunkStringCount){        //一个字符对应两个字节，所以要乘以2        int stringSize = Utils.byte2Short(Utils.copyByte(chunkStringContentByte, endStringIndex, 2))*2;        String str = new String(Utils.copyByte(chunkStringContentByte, endStringIndex+2, stringSize+2));        System.out.println("str:"+Utils.filterStringNull(str));        stringContentList.add(Utils.filterStringNull(str));        endStringIndex += (2+stringSize+2);    }    /**     * 此处的代码是用来解析资源文件xml的     */    /*int stringStart = 0;        int index = 0;        while(index < chunkStringCount){            byte[] stringSizeByte = Utils.copyByte(chunkStringContentByte, stringStart, 2);            int stringSize = (stringSizeByte[1] & 0x7F);            System.out.println("string size:"+Utils.bytesToHexString(Utils.int2Byte(stringSize)));            if(stringSize != 0){                //这里注意是UTF-8编码的                String val = "";                try{                    val = new String(Utils.copyByte(chunkStringContentByte, stringStart+2, stringSize), "utf-8");                }catch(Exception e){                    System.out.println("string encode error:"+e.toString());                }                stringContentList.add(val);            }else{                stringContentList.add("");            }            stringStart += (stringSize+3);            index++;        }        for(String str : stringContentList){            System.out.println("str:"+str);        }*/    resourceChunkOffset = stringChunkOffset + Utils.byte2int(chunkSizeByte);}

这里我们需要解释几个点：

1、忽略过Unknown字段
2、字符串内容的结束符是：0x0000
3、每个字符串开始的前两个字节是字符串的长度
所以我们有了每个字符串的偏移值和大小，那么解析字符串内容就简单了：
这里写图片描述

这里我们看到0x000B(高位和低位相反)就是字符串的大小，结尾是0x0000

这里写图片描述

一个字符对应的是两个字节，而且这里有一个方法：Utils.filterStringNull(firstStringContent)：

public static String filterStringNull(String str){    if(str == null || str.length() == 0){        return str;    }    byte[] strByte = str.getBytes();    ArrayList<Byte> newByte = new ArrayList<Byte>();    for(int i=0;i<strByte.length;i++){        if(strByte[i] != 0){            newByte.add(strByte[i]);        }    }    byte[] newByteAry = new byte[newByte.size()];    for(int i=0;i<newByteAry.length;i++){        newByteAry[i] = newByte.get(i);    }    return new String(newByteAry);}

实逻辑很简单，就是过滤空字符串：在C语言中是NULL,在Java中就是00，如果不过滤的话，会出现下面的这种情况：
这里写图片描述
每个字符是宽字符，很难看，其实愿意就是每个字符后面多了一个00，所以过滤之后就可以了

上面我们就解析了AndroidManifest.xml中所有的字符串内容。这里我们需要用一个全局的字符列表，用来存储这些字符串的值，后面会用索引来获取这些字符串的值。

第三、解析ResourceIdChunk
这个Chunk主要是存放的是AndroidManifest中用到的系统属性值对应的资源Id，比如android:versionCode中的versionCode属性，android是前缀，后面会说道。

1、ChunkType：ResourceIdChunk的类型，固定四个字节：0x00080108
2、ChunkSize：ResourceChunk的大小，四个字节
3、ResourceIds：ResourceId的内容，这里大小是ResourceChunk大小除以4，减去头部的大小8个字节(ChunkType和ChunkSize)

/** * 解析Resource Chunk * @param byteSrc */public static void parseResourceChunk(byte[] byteSrc){    byte[] chunkTagByte = Utils.copyByte(byteSrc, resourceChunkOffset, 4);    System.out.println(Utils.bytesToHexString(chunkTagByte));    byte[] chunkSizeByte = Utils.copyByte(byteSrc, resourceChunkOffset+4, 4);    int chunkSize = Utils.byte2int(chunkSizeByte);    System.out.println("chunk size:"+chunkSize);    //这里需要注意的是chunkSize是包含了chunkTag和chunkSize这两个字节的，所以需要剔除    byte[] resourceIdByte = Utils.copyByte(byteSrc, resourceChunkOffset+8, chunkSize-8);    ArrayList<Integer> resourceIdList = new ArrayList<Integer>(resourceIdByte.length/4);    for(int i=0;i<resourceIdByte.length;i+=4){        int resId = Utils.byte2int(Utils.copyByte(resourceIdByte, i, 4));        System.out.println("id:"+resId+",hex:"+Utils.bytesToHexString(Utils.copyByte(resourceIdByte, i, 4)));        resourceIdList.add(resId);    }    nextChunkOffset = (resourceChunkOffset+chunkSize);}

解析结果：
这里写图片描述

第四、解析StartNamespaceChunk

1、ChunkType：Chunk的类型，固定四个字节：0x00100100

2、ChunkSize：Chunk的大小，四个字节

3、LineNumber：在AndroidManifest文件中的行号，四个字节

4、Unknown：未知区域，四个字节

5、Prefix：命名空间的前缀(在字符串中的索引值)，比如：android

6、Uri：命名空间的uri(在字符串中的索引值)：比如：http://schemas.android.com/apk/res/android

解析结果如下：
这里写图片描述

第五、StratTagChunk

这个Chunk主要是存放了AndroidManifest.xml中的标签信息了，也是最核心的内容，当然也是最复杂的内容。
1、ChunkType：Chunk的类型，固定四个字节：0x00100102

2、ChunkSize：Chunk的大小，固定四个字节

3、LineNumber：对应于AndroidManifest中的行号，四个字节

4、Unknown：未知领域，四个字节

5、NamespaceUri：这个标签用到的命名空间的Uri,比如用到了android这个前缀，那么就需要用http://schemas.android.com/apk/res/android这个Uri去获取，四个字节

6、Name：标签名称(在字符串中的索引值)，四个字节

7、Flags：标签的类型，四个字节，比如是开始标签还是结束标签等

8、AttributeCount：标签包含的属性个数，四个字节

9、ClassAtrribute：标签包含的类属性，四个字节

10，Atrributes：属性内容，每个属性算是一个Entry,这个Entry固定大小是大小为5的字节数组：

[Namespace，Uri，Name，ValueString，Data]，我们在解析的时候需要注意第四个值，要做一次处理：需要右移24位。所以这个字段的大小是：属性个数*5*4个字节。

/** * 解析StartTag Chunk * @param byteSrc */public static void parseStartTagChunk(byte[] byteSrc){    //解析ChunkTag    byte[] chunkTagByte = Utils.copyByte(byteSrc, 0, 4);    System.out.println(Utils.bytesToHexString(chunkTagByte));    //解析ChunkSize    byte[] chunkSizeByte = Utils.copyByte(byteSrc, 4, 4);    int chunkSize = Utils.byte2int(chunkSizeByte);    System.out.println("chunk size:"+chunkSize);    //解析行号    byte[] lineNumberByte = Utils.copyByte(byteSrc, 8, 4);    int lineNumber = Utils.byte2int(lineNumberByte);    System.out.println("line number:"+lineNumber);    //解析prefix    byte[] prefixByte = Utils.copyByte(byteSrc, 8, 4);    int prefixIndex = Utils.byte2int(prefixByte);    //这里可能会返回-1，如果返回-1的话，那就是说没有prefix    if(prefixIndex != -1 && prefixIndex<stringContentList.size()){        System.out.println("prefix:"+prefixIndex);        System.out.println("prefix str:"+stringContentList.get(prefixIndex));    }else{        System.out.println("prefix null");    }    //解析Uri    byte[] uriByte = Utils.copyByte(byteSrc, 16, 4);    int uriIndex = Utils.byte2int(uriByte);    if(uriIndex != -1 && prefixIndex<stringContentList.size()){        System.out.println("uri:"+uriIndex);        System.out.println("uri str:"+stringContentList.get(uriIndex));    }else{        System.out.println("uri null");    }    //解析TagName    byte[] tagNameByte = Utils.copyByte(byteSrc, 20, 4);    System.out.println(Utils.bytesToHexString(tagNameByte));    int tagNameIndex = Utils.byte2int(tagNameByte);    String tagName = stringContentList.get(tagNameIndex);    if(tagNameIndex != -1){        System.out.println("tag name index:"+tagNameIndex);        System.out.println("tag name str:"+tagName);    }else{        System.out.println("tag name null");    }    //解析属性个数(这里需要过滤四个字节:14001400)    byte[] attrCountByte = Utils.copyByte(byteSrc, 28, 4);    int attrCount = Utils.byte2int(attrCountByte);    System.out.println("attr count:"+attrCount);    //解析属性    //这里需要注意的是每个属性单元都是由五个元素组成，每个元素占用四个字节：namespaceuri, name, valuestring, type, data    //在获取到type值的时候需要右移24位    ArrayList<AttributeData> attrList = new ArrayList<AttributeData>(attrCount);    for(int i=0;i<attrCount;i++){        Integer[] values = new Integer[5];        AttributeData attrData = new AttributeData();        for(int j=0;j<5;j++){            int value = Utils.byte2int(Utils.copyByte(byteSrc, 36+i*20+j*4, 4));            switch(j){            case 0:                attrData.nameSpaceUri = value;                break;            case 1:                attrData.name = value;                break;            case 2:                attrData.valueString = value;                break;            case 3:                value = (value >> 24);                attrData.type = value;                break;            case 4:                attrData.data = value;                break;            }            values[j] = value;        }        attrList.add(attrData);    }    for(int i=0;i<attrCount;i++){        if(attrList.get(i).nameSpaceUri != -1){            System.out.println("nameSpaceUri:"+stringContentList.get(attrList.get(i).nameSpaceUri));        }else{            System.out.println("nameSpaceUri == null");        }        if(attrList.get(i).name != -1){            System.out.println("name:"+stringContentList.get(attrList.get(i).name));        }else{            System.out.println("name == null");        }        if(attrList.get(i).valueString != -1){            System.out.println("valueString:"+stringContentList.get(attrList.get(i).valueString));        }else{            System.out.println("valueString == null");        }        System.out.println("type:"+AttributeType.getAttrType(attrList.get(i).type));        System.out.println("data:"+AttributeType.getAttributeData(attrList.get(i)));    }    //这里开始构造xml结构    xmlSb.append(createStartTagXml(tagName, attrList));}

阅读全文

0 0