213 lab 4 cachelab
来源:互联网 发布:图标修复软件 编辑:程序博客网 时间:2024/06/07 17:50
这个lab让我深刻地体会到了自己还是一个菜鸟。。。。
lab分为AB两个部分。
先说Apart。要写一个C语言程序模拟cache的工作情况。
#include"cachelab.h"#include<stdlib.h>#include<getopt.h>#include<stdio.h>#include<unistd.h>#include<string.h>#define MAX 100int misses,evictions,hits;int s,E,b;//cache的参数enum TYPE{HIT,MISS,MISS_HIT,MISS_EVICTION,MISS_EVICTION_HIT};//枚举出所有的结果enum TYPE type;typedef struct//use a 2-dimension array to simulate cache{ int is_valid; int tag; int access_time;}Cache;Cache **cache;int GetS(long long address) //从address中获得s参数{long long mask=0x7fffffffffffffff>>(63-s);//制作一个mask address 的结构 tag set offset 63-s 就是把除了s的部分置为零return (address>>b)&mask;//b就是offset的部分}int GetT(long long address) //从address中获得tag参数{long long mask=0x7fffffffffffffff>>(63-s-b);return (address>>(s+b))&mask;}//--------------------------------------------------------------------------------------------------------------------------------------------------------void update_time(int sel,int tarline)//更新访问的时间 sel是当前set tarline是set内line的行数{int i; for(i=0;i<E;++i) { if(cache[sel][i].is_valid==1 && cache[sel][i].access_time>cache[sel][tarline].access_time) //caches that visited before --cache[sel][i].access_time; } cache[sel][tarline].access_time=E-1; //make it the newest}enum TYPE calcu(char instr[])//get result of the instruction{ char ins; long long address; int siz; sscanf(instr," %c %llx %d", &ins, &address,&siz);//输入部分的格式 int sel=GetS(address);//sel是set的值 int tag=GetT(address);//tag是tagint i;for(i=0;i<E;++i)//E是每个set 的line的个数 { if(cache[sel][i].is_valid==1 && cache[sel][i].tag==tag)//检查每一行 如果是vaild并tag相等则是找到了 { if(ins=='M')//hit*2 for modify 按照文件中的说明 前面char是M的时候 hit两次 ++hits; ++hits; update_time(sel,i);//完成hit之后要更新访问时间 return HIT;//这种时候返回的TYPR就是HIT } } ++misses;//如果没有hit 那么misses加一 for(i=0;i<E;++i) { if(cache[sel][i].is_valid==0)//如果没有vaild的部分 也就是没有空位 { cache[sel][i].is_valid=1; cache[sel][i].tag=tag; update_time(sel,i);//强行挤一个位置 if(ins=='M')//miss first and hit later for modify{++hits;//如果是M的情况 hit两次 就是MISS_HIT的情况 return MISS_HIT;} else//如果不是M 直接miss return MISS; } } ++evictions;//means no room,must do eviction for(i=0;i<E;++i) { if(cache[sel][i].access_time==0)//find the most early one 如果有可以替换的 { cache[sel][i].tag=tag; update_time(sel,i); if(ins=='M')//first miss and eviction and hit later for modify { ++hits; return MISS_EVICTION_HIT; } else return MISS_EVICTION; } }return 0;}int main(int argc, char *argv[])//主函数开始{ FILE *f;//定义一个file char instr[MAX],trace[MAX]; int opt,verbose=0; opterr=0; while((opt=getopt(argc, argv, "vs:E:b:t:"))!=-1)//读取信息 个函数是用来且只能用来处理短参数的, //getopt函数的返回值即为当前调用它所读取到的那个参数(int对应其ASCII码值),其中的opstring是一个短参数集合的字符串 //调用一次,返回一个选项。 在命令行选项参数再也检查不到optstring中包含的选项时,返回-1, { switch(opt) { case 'v': verbose=1;//help to debugbreak; case 's': s=atoi(optarg); break; case 'E': E=atoi(optarg);//atoi():将字符串转换为整型值。 break; case 'b': b=atoi(optarg); break; case 't': strcpy(trace, optarg); break; } } int totsets=(1<<s)*2; cache=(Cache**)malloc(totsets*sizeof(Cache*));//为模拟的cache分配空间 int i;for(i=0;i<totsets;++i) { cache[i]=(Cache*)malloc(E*sizeof(Cache));//为每一个set分配空间 for(int j=0;j<E;++j) { cache[i][j].is_valid=0; cache[i][j].access_time=cache[i][j].tag=-1; } } f=fopen(trace,"r");//打开trace文件 "r"表示打开文字文件只读 while(fgets(instr,MAX,f))//get every instruction { if(instr[0]==' ') //not instruction load{ type=calcu(instr); if(verbose) { switch (type) { case HIT: printf("%s hit\n", instr+1); break; case MISS: printf("%s miss\n", instr+1); break; case MISS_HIT: printf("%s miss hit\n", instr+1); break; case MISS_EVICTION: printf("%s miss eviction\n", instr+1); break; case MISS_EVICTION_HIT: printf("%s miss eviction hit\n", instr+1); break;} } }} fclose(f); printSummary(hits,misses,evictions);//print results free(cache); return 0;}
Part B
要求优化矩阵的转置算法 让三个不同大小的矩阵在运算的过程中cache的miss率低于题目要求。。。
/* * trans.c - Matrix transpose B = A^T * * Each transpose function must have a prototype of the form: * void trans(int M, int N, int A[N][M], int B[M][N]); * * A transpose function is evaluated by counting the number of misses * on a 1KB direct mapped cache with a block size of 32 bytes. */#include <stdio.h>#include "cachelab.h"int is_transpose(int M, int N, int A[N][M], int B[M][N]);/* * transpose_submit - This is the solution transpose function that you * will be graded on for Part B of the assignment. Do not change * the description string "Transpose submission", as the driver * searches for that string to identify the transpose function to * be graded. */char transpose_submit_desc[] = "Transpose submission";void transpose_submit(int M, int N, int A[N][M], int B[M][N]){//cache(s = 5, E = 1, b = 5). 一共有32个set 每个set1line 每个block有32个byte int i, j, k, temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; if ( M == 32 )//M=N=32的情况 { for (j = 0; j < 32; j = j+8) { for (i = 0; i < 32; i++)// 一个int元素4byte 也就是cache里面存放这些元素的时候 一个set只能放8个 { temp0 = A[i][j];// temp1 = A[i][j+1]; temp2 = A[i][j+2]; temp3 = A[i][j+3]; temp4 = A[i][j+4]; temp5 = A[i][j+5]; temp6 = A[i][j+6]; temp7 = A[i][j+7]; B[j][i] = temp0; B[j+1][i] = temp1; B[j+2][i] = temp2; B[j+3][i] = temp3; B[j+4][i] = temp4; B[j+5][i] = temp5; B[j+6][i] = temp6; B[j+7][i] = temp7; } } } if ( M == 64 )//当M=64时候 矩阵大了 所以不能完全放在cache里面 {//cache(s = 5, E = 1, b = 5). 一共有32个set 每个set1line 每个block有32个byte for (j = 0; j < 64; j = j + 8)//A[k][j],B[j][k] { for (k = 0; k < 64; k = k + 8) {//相当于把64*64的矩阵分割成了8*8 for (i = k; i < k + 4; i++) { temp0 = A[i][j]; temp1 = A[i][j+1]; temp2 = A[i][j+2]; temp3 = A[i][j+3]; temp4 = A[i][j+4]; temp5 = A[i][j+5]; temp6 = A[i][j+6]; temp7 = A[i][j+7];//同样是先取八个出来 B[j][i] = temp0; B[j+1][i] = temp1; B[j+2][i] = temp2; B[j+3][i] = temp3;//前面四个正常放置 B[j][i+4] = temp4;//后面四个先并列放好 B[j+1][i+4] = temp5; B[j+2][i+4] = temp6; B[j+3][i+4] = temp7; } for (i = j; i < j + 4; i++) { temp0 = B[i][k+4]; temp1 = B[i][k+5]; temp2 = B[i][k+6]; temp3 = B[i][k+7]; temp4 = A[k+4][i]; temp5 = A[k+5][i]; temp6 = A[k+6][i]; temp7 = A[k+7][i]; B[i][k+4] = temp4; B[i][k+5] = temp5; B[i][k+6] = temp6; B[i][k+7] = temp7; B[i+4][k] = temp0; B[i+4][k+1] = temp1; B[i+4][k+2] = temp2; B[i+4][k+3] = temp3; } for (i = j + 4; i < j + 8; i ++) { temp0 = A[k+4][i]; temp1 = A[k+5][i]; temp2 = A[k+6][i]; temp3 = A[k+7][i]; B[i][k+4] = temp0; B[i][k+5] = temp1; B[i][k+6] = temp2; B[i][k+7] = temp3; } } } } if ( M == 61 )//M=61 N=67 int A[N][M], int B[M][N] 所以A是有67行的 { for (k = 0; k < 64; k=k+8)//A[k][j],B[j][k] { for (j = 0; j < 60; j = j + 6)//所以先划成小一点的形状 { for (i = k; i < k + 4; i++) { temp0 = A[i][j]; temp1 = A[i][j+1]; temp2 = A[i][j+2]; temp3 = A[i][j+3]; temp4 = A[i][j+4]; temp5 = A[i][j+5]; B[j][i] = temp0; B[j+1][i] = temp1; B[j+2][i] = temp2; B[j][i+4] = temp3; B[j+1][i+4] = temp4; B[j+2][i+4] = temp5; if (j == 54) { temp0 = A[i][60]; B[60][i] = temp0; } } for (i = j; i < j + 3; i++) { temp0 = B[i][k+4]; temp1 = B[i][k+5]; temp2 = B[i][k+6]; temp7 = B[i][k+7]; temp3 = A[k+4][i]; temp4 = A[k+5][i]; temp5 = A[k+6][i]; temp6 = A[k+7][i]; B[i][k+4] = temp3; B[i][k+5] = temp4; B[i][k+6] = temp5; B[i][k+7] = temp6; B[i+3][k] = temp0; B[i+3][k+1] = temp1; B[i+3][k+2] = temp2; B[i+3][k+3] = temp7; } for (i = j + 3; i < j + 6; i++) { temp0 = A[k+4][i]; temp1 = A[k+5][i]; temp2 = A[k+6][i]; temp3 = A[k+7][i]; B[i][k+4] = temp0; B[i][k+5] = temp1; B[i][k+6] = temp2; B[i][k+7] = temp3; if (i == 59) { temp0 = A[k+4][i+1]; temp1 = A[k+5][i+1]; temp2 = A[k+6][i+1]; temp3 = A[k+7][i+1]; B[i+1][k+4] = temp0; B[i+1][k+5] = temp1; B[i+1][k+6] = temp2; B[i+1][k+7] = temp3; } } if ( k == 56)//到了最后一大块的时候 { for ( i = j;i < j + 6; i++) { temp0 = A[64][i]; temp1 = A[65][i]; temp2 = A[66][i]; B[i][64] = temp0; B[i][65] = temp1; B[i][66] = temp2; if ( i == 59 ) { temp0 = A[64][60]; temp1 = A[65][60]; temp2 = A[66][60]; B[60][64] = temp0; B[60][65] = temp1; B[60][66] = temp2; } } } } } }}/* * You can define additional transpose functions below. We've defined * a simple one below to help you get started. *//* * trans - A simple baseline transpose function, not optimized for the cache. */char trans_desc[] = "Simple row-wise scan transpose";//原有的自带最简单直接转职函数void trans(int M, int N, int A[N][M], int B[M][N]){ int i, j, tmp; for (i = 0; i < N; i++) { for (j = 0; j < M; j++) { tmp = A[i][j]; B[j][i] = tmp; } }}/* * registerFunctions - This function registers your transpose * functions with the driver. At runtime, the driver will * evaluate each of the registered functions and summarize their * performance. This is a handy way to experiment with different * transpose strategies. */void registerFunctions(){ /* Register your solution function */ registerTransFunction(transpose_submit, transpose_submit_desc); /* Register any additional transpose functions */ registerTransFunction(trans, trans_desc);}/* * is_transpose - This helper function checks if B is the transpose of * A. You can check the correctness of your transpose by calling * it before returning from the transpose function. */int is_transpose(int M, int N, int A[N][M], int B[M][N])//用来测试是否是转置矩阵{ int i, j; for (i = 0; i < N; i++) { for (j = 0; j < M; ++j) { if (A[i][j] != B[j][i]) { return 0; } } } return 1;}
我目前还没有完全啃明白= = 以后再更
0 0
- 213 lab 4 cachelab
- LAB 4
- csapp 2e cachelab
- csapp-lab4 cachelab
- 213 lab 3 attacklab
- [Lab Week] 4->5
- Lab 4: Authentication
- Lab 4:尝试bootloader
- 嵌入式LAB 4:Bootloder
- Lab 4:优先级捐赠
- # Software-eng lab 4
- Lab
- LAB
- [Lab Week] 2->3->4
- JOS——lab 4
- CSAPP: Bomb Lab(4)
- 计算机导论——CSAPP, cachelab
- 恶意代码分析实战 Lab 4 习题笔记
- Android OpenCV简单图片处理,不需要安装Manager,不需要ndk,简单实用
- hadoop优化参数说明
- IntelliJ Idea新手从svn挡下普通项目步骤配置
- NIPS 2016 -- 增量Boosting CNN 用于面部动作单元识别
- 机器学习—局部加权线性回归
- 213 lab 4 cachelab
- 安装虚拟机
- 你应该知道的RPC原理
- KMP算法中next数组的求取
- Ubuntu16.04 LTS 上查找Django源文件的位置
- IOS使用本地通知(UILocalNotification)删除应用角标小红点不删除通知中心内容
- 文章标题
- 巨杉应用案例:大数据司法查询平台
- MarkDown图片大小处理