213 lab 4 cachelab

来源:互联网 发布:图标修复软件 编辑:程序博客网 时间:2024/06/07 17:50

这个lab让我深刻地体会到了自己还是一个菜鸟。。。。


lab分为AB两个部分。

先说Apart。要写一个C语言程序模拟cache的工作情况。


#include"cachelab.h"#include<stdlib.h>#include<getopt.h>#include<stdio.h>#include<unistd.h>#include<string.h>#define MAX 100int misses,evictions,hits;int s,E,b;//cache的参数enum TYPE{HIT,MISS,MISS_HIT,MISS_EVICTION,MISS_EVICTION_HIT};//枚举出所有的结果enum TYPE type;typedef struct//use a 2-dimension array to simulate cache{        int is_valid;        int tag;        int access_time;}Cache;Cache **cache;int GetS(long long address)                       //从address中获得s参数{long long mask=0x7fffffffffffffff>>(63-s);//制作一个mask address 的结构   tag set offset  63-s 就是把除了s的部分置为零return (address>>b)&mask;//b就是offset的部分}int GetT(long long address)                       //从address中获得tag参数{long long mask=0x7fffffffffffffff>>(63-s-b);return (address>>(s+b))&mask;}//--------------------------------------------------------------------------------------------------------------------------------------------------------void update_time(int sel,int tarline)//更新访问的时间 sel是当前set tarline是set内line的行数{int i;        for(i=0;i<E;++i)        {                if(cache[sel][i].is_valid==1 &&                cache[sel][i].access_time>cache[sel][tarline].access_time) //caches that visited before                        --cache[sel][i].access_time;        }        cache[sel][tarline].access_time=E-1;            //make it the newest}enum TYPE calcu(char instr[])//get result of the instruction{        char ins;        long long address;    int siz;        sscanf(instr," %c %llx %d", &ins, &address,&siz);//输入部分的格式        int sel=GetS(address);//sel是set的值        int tag=GetT(address);//tag是tagint i;for(i=0;i<E;++i)//E是每个set 的line的个数        {                if(cache[sel][i].is_valid==1 && cache[sel][i].tag==tag)//检查每一行  如果是vaild并tag相等则是找到了                {                        if(ins=='M')//hit*2 for modify 按照文件中的说明 前面char是M的时候 hit两次                                ++hits;                        ++hits;                        update_time(sel,i);//完成hit之后要更新访问时间                        return HIT;//这种时候返回的TYPR就是HIT                }        }        ++misses;//如果没有hit 那么misses加一        for(i=0;i<E;++i)        {                if(cache[sel][i].is_valid==0)//如果没有vaild的部分 也就是没有空位                {                        cache[sel][i].is_valid=1;                        cache[sel][i].tag=tag;                        update_time(sel,i);//强行挤一个位置                        if(ins=='M')//miss first and hit later for modify{++hits;//如果是M的情况 hit两次 就是MISS_HIT的情况        return MISS_HIT;}                        else//如果不是M 直接miss                                return MISS;                }        }        ++evictions;//means no room,must do eviction        for(i=0;i<E;++i)        {                if(cache[sel][i].access_time==0)//find the most early one 如果有可以替换的                {                        cache[sel][i].tag=tag;                        update_time(sel,i);                        if(ins=='M')//first miss and eviction and hit later for modify                        {                                ++hits;                                return MISS_EVICTION_HIT;                        }                        else                                return MISS_EVICTION;                }        }return 0;}int main(int argc, char *argv[])//主函数开始{        FILE *f;//定义一个file        char instr[MAX],trace[MAX];        int opt,verbose=0;        opterr=0;        while((opt=getopt(argc, argv, "vs:E:b:t:"))!=-1)//读取信息 个函数是用来且只能用来处理短参数的,            //getopt函数的返回值即为当前调用它所读取到的那个参数(int对应其ASCII码值),其中的opstring是一个短参数集合的字符串            //调用一次,返回一个选项。 在命令行选项参数再也检查不到optstring中包含的选项时,返回-1,        {                switch(opt)                {                        case 'v':                                verbose=1;//help to debugbreak;                        case 's':                                s=atoi(optarg);                                break;                        case 'E':                                E=atoi(optarg);//atoi():将字符串转换为整型值。                                break;                        case 'b':                                b=atoi(optarg);                                break;                        case 't':                                strcpy(trace, optarg);                                break;                }        }        int totsets=(1<<s)*2;        cache=(Cache**)malloc(totsets*sizeof(Cache*));//为模拟的cache分配空间        int i;for(i=0;i<totsets;++i)        {                cache[i]=(Cache*)malloc(E*sizeof(Cache));//为每一个set分配空间                for(int j=0;j<E;++j)                {                        cache[i][j].is_valid=0;                        cache[i][j].access_time=cache[i][j].tag=-1;                }        }        f=fopen(trace,"r");//打开trace文件   "r"表示打开文字文件只读        while(fgets(instr,MAX,f))//get every instruction        {              if(instr[0]==' ') //not instruction load{                type=calcu(instr);               if(verbose)       {     switch (type)                {                        case HIT:                                printf("%s hit\n", instr+1);                                break;                      case MISS:                                printf("%s miss\n", instr+1);                                break;                        case MISS_HIT:                                printf("%s miss hit\n", instr+1);                                break;                        case MISS_EVICTION:                                printf("%s miss eviction\n", instr+1);                                break;                        case MISS_EVICTION_HIT:                                printf("%s miss eviction hit\n", instr+1);                                break;}                }        }}        fclose(f);        printSummary(hits,misses,evictions);//print results    free(cache);        return 0;}


Part B 

要求优化矩阵的转置算法 让三个不同大小的矩阵在运算的过程中cache的miss率低于题目要求。。。


/* * trans.c - Matrix transpose B = A^T * * Each transpose function must have a prototype of the form: * void trans(int M, int N, int A[N][M], int B[M][N]); * * A transpose function is evaluated by counting the number of misses * on a 1KB direct mapped cache with a block size of 32 bytes. */#include <stdio.h>#include "cachelab.h"int is_transpose(int M, int N, int A[N][M], int B[M][N]);/* * transpose_submit - This is the solution transpose function that you *     will be graded on for Part B of the assignment. Do not change *     the description string "Transpose submission", as the driver *     searches for that string to identify the transpose function to *     be graded. */char transpose_submit_desc[] = "Transpose submission";void transpose_submit(int M, int N, int A[N][M], int B[M][N]){//cache(s = 5, E = 1, b = 5).  一共有32个set  每个set1line 每个block有32个byte     int i, j, k, temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;     if ( M == 32 )//M=N=32的情况     {        for (j = 0; j < 32; j = j+8)        {            for (i = 0; i < 32; i++)// 一个int元素4byte 也就是cache里面存放这些元素的时候 一个set只能放8个            {                 temp0 = A[i][j];//                 temp1 = A[i][j+1];                 temp2 = A[i][j+2];                 temp3 = A[i][j+3];                 temp4 = A[i][j+4];                 temp5 = A[i][j+5];                 temp6 = A[i][j+6];                 temp7 = A[i][j+7];                 B[j][i] = temp0;                 B[j+1][i] = temp1;                 B[j+2][i] = temp2;                 B[j+3][i] = temp3;                 B[j+4][i] = temp4;                 B[j+5][i] = temp5;                 B[j+6][i] = temp6;                 B[j+7][i] = temp7;             }        }     }     if ( M == 64 )//当M=64时候 矩阵大了 所以不能完全放在cache里面     {//cache(s = 5, E = 1, b = 5).  一共有32个set  每个set1line 每个block有32个byte        for (j = 0; j < 64; j = j + 8)//A[k][j],B[j][k]        {            for (k = 0; k < 64; k = k + 8)            {//相当于把64*64的矩阵分割成了8*8                for (i = k; i < k + 4; i++)                {                     temp0 = A[i][j];                     temp1 = A[i][j+1];                     temp2 = A[i][j+2];                     temp3 = A[i][j+3];                     temp4 = A[i][j+4];                     temp5 = A[i][j+5];                     temp6 = A[i][j+6];                     temp7 = A[i][j+7];//同样是先取八个出来                     B[j][i] = temp0;                     B[j+1][i] = temp1;                     B[j+2][i] = temp2;                     B[j+3][i] = temp3;//前面四个正常放置                     B[j][i+4] = temp4;//后面四个先并列放好                     B[j+1][i+4] = temp5;                     B[j+2][i+4] = temp6;                     B[j+3][i+4] = temp7;                }                for (i = j; i < j + 4; i++)                {                     temp0 = B[i][k+4];                     temp1 = B[i][k+5];                     temp2 = B[i][k+6];                     temp3 = B[i][k+7];                     temp4 = A[k+4][i];                     temp5 = A[k+5][i];                     temp6 = A[k+6][i];                     temp7 = A[k+7][i];                     B[i][k+4] = temp4;                     B[i][k+5] = temp5;                     B[i][k+6] = temp6;                     B[i][k+7] = temp7;                     B[i+4][k] = temp0;                     B[i+4][k+1] = temp1;                     B[i+4][k+2] = temp2;                     B[i+4][k+3] = temp3;                }                for (i = j + 4; i < j + 8; i ++)                {                     temp0 = A[k+4][i];                     temp1 = A[k+5][i];                     temp2 = A[k+6][i];                     temp3 = A[k+7][i];                     B[i][k+4] = temp0;                     B[i][k+5] = temp1;                     B[i][k+6] = temp2;                     B[i][k+7] = temp3;                }            }        }     }    if ( M == 61 )//M=61 N=67 int A[N][M], int B[M][N] 所以A是有67行的    {       for (k = 0; k < 64; k=k+8)//A[k][j],B[j][k]       {           for (j = 0; j < 60; j = j + 6)//所以先划成小一点的形状           {                for (i = k; i < k + 4; i++)                {                     temp0 = A[i][j];                     temp1 = A[i][j+1];                     temp2 = A[i][j+2];                     temp3 = A[i][j+3];                     temp4 = A[i][j+4];                     temp5 = A[i][j+5];                     B[j][i] = temp0;                     B[j+1][i] = temp1;                     B[j+2][i] = temp2;                     B[j][i+4] = temp3;                     B[j+1][i+4] = temp4;                     B[j+2][i+4] = temp5;                     if (j == 54)                     {                        temp0 = A[i][60];                        B[60][i] = temp0;                     }                 }                 for (i = j; i < j + 3; i++)                 {                     temp0 = B[i][k+4];                     temp1 = B[i][k+5];                     temp2 = B[i][k+6];                     temp7 = B[i][k+7];                     temp3 = A[k+4][i];                     temp4 = A[k+5][i];                     temp5 = A[k+6][i];                     temp6 = A[k+7][i];                     B[i][k+4] = temp3;                     B[i][k+5] = temp4;                     B[i][k+6] = temp5;                     B[i][k+7] = temp6;                     B[i+3][k] = temp0;                     B[i+3][k+1] = temp1;                     B[i+3][k+2] = temp2;                     B[i+3][k+3] = temp7;                  }                  for (i = j + 3; i < j + 6; i++)                  {                      temp0 = A[k+4][i];                      temp1 = A[k+5][i];                      temp2 = A[k+6][i];                      temp3 = A[k+7][i];                      B[i][k+4] = temp0;                      B[i][k+5] = temp1;                      B[i][k+6] = temp2;                      B[i][k+7] = temp3;                      if (i == 59)                      {                        temp0 = A[k+4][i+1];                        temp1 = A[k+5][i+1];                        temp2 = A[k+6][i+1];                        temp3 = A[k+7][i+1];                        B[i+1][k+4] = temp0;                        B[i+1][k+5] = temp1;                        B[i+1][k+6] = temp2;                        B[i+1][k+7] = temp3;                      }                  }                  if ( k == 56)//到了最后一大块的时候                  {                     for ( i = j;i < j + 6; i++)                     {                         temp0 = A[64][i];                         temp1 = A[65][i];                         temp2 = A[66][i];                         B[i][64] = temp0;                         B[i][65] = temp1;                         B[i][66] = temp2;                         if ( i == 59 )                         {                            temp0 = A[64][60];                            temp1 = A[65][60];                            temp2 = A[66][60];                            B[60][64] = temp0;                            B[60][65] = temp1;                            B[60][66] = temp2;                         }                     }                  }           }       }    }}/* * You can define additional transpose functions below. We've defined * a simple one below to help you get started. *//* * trans - A simple baseline transpose function, not optimized for the cache. */char trans_desc[] = "Simple row-wise scan transpose";//原有的自带最简单直接转职函数void trans(int M, int N, int A[N][M], int B[M][N]){    int i, j, tmp;    for (i = 0; i < N; i++) {        for (j = 0; j < M; j++) {            tmp = A[i][j];            B[j][i] = tmp;        }    }}/* * registerFunctions - This function registers your transpose *     functions with the driver.  At runtime, the driver will *     evaluate each of the registered functions and summarize their *     performance. This is a handy way to experiment with different *     transpose strategies. */void registerFunctions(){    /* Register your solution function */    registerTransFunction(transpose_submit, transpose_submit_desc);    /* Register any additional transpose functions */    registerTransFunction(trans, trans_desc);}/* * is_transpose - This helper function checks if B is the transpose of *     A. You can check the correctness of your transpose by calling *     it before returning from the transpose function. */int is_transpose(int M, int N, int A[N][M], int B[M][N])//用来测试是否是转置矩阵{    int i, j;    for (i = 0; i < N; i++) {        for (j = 0; j < M; ++j) {            if (A[i][j] != B[j][i]) {                return 0;            }        }    }    return 1;}


我目前还没有完全啃明白= = 以后再更

0 0