将败者树运用到文档匹配打分程序中
来源:互联网 发布:哈尔滨雨人软件 编辑:程序博客网 时间:2024/05/16 14:10
length={4}, pfRtv={ 1.000000} length={9}, pfRtv={ 2.000000} length={16}, pfRtv={ 3.000000} length={25}, pfRtv={ 4.000000}
doc0 start
10 2.000000 11 3.000000 12 6.000000 13 11.000000
doc1 start
11 3.000000 12 4.000000 13 7.000000 14 12.000000 15 19.000000 16 8.000000 17 19.000000 18 12.000000 19 7.000000
doc2 start
12 4.000000 13 5.000000 14 8.000000 15 13.000000 16 0.000000 17 9.000000 18 0.000000 19 13.000000 20 8.000000 21 5.000000 22 4.000000 23 5.000000 24 8.000000 25 13.000000 26 0.000000 27 9.000000
doc3 start
13 5.000000 14 6.000000 15 9.000000 16 14.000000 17 1.000000 18 10.000000 19 1.000000 20 14.000000 21 9.000000 22 6.000000 23 5.000000 24 6.000000 25 9.000000 26 14.000000 27 1.000000 28 10.000000 29 1.000000 30 14.000000 31 9.000000 32 6.000000 33 5.000000 34 6.000000 35 9.000000 36 14.000000 37 1.000000
result
13 0.000916
¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define PATH 4
typedef struct _SSDOC_ITEM_TMP
{
ushort iDocID;
float tf;
} SSDOC_ITEM_TMP;
#define RTV_TYPE float
#define MAX_USHORT 65535
#define PAGE_SEARCH_ARG int
#define BUCKET_VECTOR_BLOCK 64*(1024)
typedef struct _TERM_DIC_HASH_VALUE_INTER
{
float * pfRtv;
int length;
}TERM_DIC_HASH_VALUE_INTER;
typedef struct wrap_data
{
int offset;
int path;
SSDOC_ITEM_TMP *data;
}wrap_data;
int choosevec(int path)
{
if(path<=4)
{
return 4;
}
else if (path<=8)
{
return 8;
}
else if(path<=16)
{
return 16;
}
else
{
return 32;
}
}
wrap_data **vec;
int vecsize;
wrap_data * up ( int num )
{
int i,j,k;
wrap_data *first,*second;
i=num;
second=vec[i];
while(i)
{
j=i/2;
first=vec[j];
if(!first)
{
vec[j]=second;
if (!j)
{
return second;
}
else
{
return NULL;
}
}
if ( first->path==second->path)
{
i=j;
}
else if ( (*( second->data + second->offset )).iDocID> (*( first->data + first->offset )).iDocID)
{
vec[j]=second;
second=first;
i=j;
}
else
{
i=j;
}
}
return second;
}
void GetSameDocWrap(ushort sBucketPos, int &nSameCount,SSDOC_ITEM_TMP *aSameDoc, int nDocArray, SSDOC_ITEM_TMP *pDocArray[],
PAGE_SEARCH_ARG *pArg, const TERM_DIC_HASH_VALUE_INTER * const pDocList, off_t *loffset)
{
off_t lloffset[nDocArray];
memset(lloffset, 0, sizeof(off_t) * nDocArray);
wrap_data *result;
int i=0,j=0,k=0;
wrap_data a[PATH]={0};
int last_docid=MAX_USHORT;
int count_docid=0;
wrap_data sed[PATH]={0};
vecsize=2* choosevec(PATH);
vec=(wrap_data **)calloc( vecsize ,sizeof (wrap_data*));
for(i=0;i<PATH;i++)
{
a[i].data=pDocArray[i];
a[i].offset=0;
a[i].path=i;
}
k=vecsize/2;
for(i=0;i<PATH;i++)
{
vec[k+i]=&a[i];
}
for(i=0;i<PATH;i++)
{
result=up(i+k);
}
while(result)
{
if( (*(result->data+result->offset)).iDocID == MAX_USHORT )
{
break;
}
if ( (*(result->data+result->offset)).iDocID == last_docid )
{
count_docid++;
sed[count_docid-1]=*result;
if (count_docid == PATH)
{
aSameDoc[nSameCount].iDocID=last_docid;
aSameDoc[nSameCount].tf=0;
for(i=0;i<PATH;i++)
{
aSameDoc[nSameCount].tf+=
(*(sed[i].data + sed[i].offset)).tf/ MAX_USHORT * (*(pDocList[sed[i].path ].pfRtv));
}
last_docid=MAX_USHORT;
nSameCount++;
}
}
else
{
last_docid=(*(result->data+result->offset)).iDocID ;
count_docid=1;
sed[count_docid-1]=*result;
}
//add a sed for the result by chenbing 2011.11.14
result->offset++;
result=up(result->path+k);
}
printf("\n");
/*
{
ushort doc_count=pDocList[j]->iTotalDocCnt ;
if (doc_count > 0.1*MAX_DOC_COUNT)
{
uint iDocID = aSameDoc[nSameCount].iDocID = pTmpDocArray[j]->iDocID + BUCKET_VECTOR_BLOCK * sBucketPos;
ushort buc_pos =iDocID/256;
ushort buc_mod =iDocID%256;
lloffset[j] += pTmpDocArray[j]->nOccurs;
}
else
{
for(int ii=0;ii<pos ;i++)
{
lloffset[ii]+=(pTmpDocArray[j]+ii)->nOccurs;
}
}
}
*/
/*
while (pTmpDocArray[j]->iDocID < pTmpDocArray[j - 1]->iDocID)
{
lloffset[j] += pTmpDocArray[j]->nOccurs;
pTmpDocArray[j]++;
}
if (!pTmpDocArray[j]->nOccurs)
return;
*/
}
int main()
{
int nSameCount=0;
SSDOC_ITEM_TMP aSameDoc[MAX_USHORT ] ={0};
int nDocArray =PATH;
SSDOC_ITEM_TMP * pDocArray[PATH];
PAGE_SEARCH_ARG pArg[PATH];
TERM_DIC_HASH_VALUE_INTER pDocList[PATH]={0};
for(int i=0;i<nDocArray ;i++)
{
pDocList[i].pfRtv=(float *)calloc (1,sizeof (float ));
*(pDocList[i].pfRtv)=i+1;
pDocList[i].length=(i+2)*(i+2);
printf("length={%d}, pfRtv={ %f} ",pDocList[i].length, *(pDocList[i].pfRtv) );
}
printf("\n");
for(int i=0;i<nDocArray;i++)
{
printf("doc%d start \n",i);
pDocArray[i]=(SSDOC_ITEM_TMP *)calloc ( pDocList[i].length +1,sizeof ( SSDOC_ITEM_TMP ));
for(int j=0;j< pDocList[i].length;j++)
{
pDocArray[i][j].iDocID=i+j+10;
pDocArray[i][j].tf=(j*j+2+i)%20;
printf("%d %f ", pDocArray[i][j].iDocID,pDocArray[i][j].tf);
}
pDocArray[i][ pDocList[i].length ].iDocID=MAX_USHORT;
printf("\n");
}
GetSameDocWrap ( 0 , nSameCount ,aSameDoc ,nDocArray , pDocArray , pArg , pDocList ,0);
printf("result \n");
for(int i=0;i<nSameCount;i++)
{
printf("%d %f \n", aSameDoc[i].iDocID,aSameDoc[i].tf);
}
}
- 将败者树运用到文档匹配打分程序中
- 文档合并打分程序
- 如何将 Linux 内核实现的红黑树 rbtree 运用到你的 C 程序中?
- 将程序输出在控制台的内容输出到文档中
- 将条形码插入到word文档中
- 将九九乘法表打印到txt文档中
- 败者树
- 败者树
- 败者树
- 败者树
- 败者树
- 败者树
- 败者树
- 败者树
- 败者树
- 给你个基类如何添加到自己运用程序中
- C#将dll打包到程序中
- java中,将一串字符串写入到本地文档中
- Android: 获取android market的登陆ID<get market place google account>
- vim的使用
- EditText属性
- 回首三年
- .NET4.0新特性集合贴
- 将败者树运用到文档匹配打分程序中
- devenv.exe启动版本
- php 安装libevent 附webserver代码
- vs2008编译出来的程序不能运行或需要安装vcredist_x86.exe才能运行解决办法
- effective c++之虚析构函数
- 几个代码搜索网站
- 调用打印界面
- 关于用jquery中checkbox与toggle ...
- VC6.0插件一览表