分词DLL导出小结

来源:互联网 发布:淘宝店释放是什么意思 编辑:程序博客网 时间:2024/05/16 18:38
今天整理下,原来写的,用了计算所得那个API,DLL的任务有三个
  1. 输入句子,导出结构体,结构体包含了句子中的专有名词,动词,动宾结构
  2. 对句子进行分词
  3. 添加词典
DLL要使用动态调用来实现,这样内存才不会一直增长。DLL调用的时候调用了DLL,也算是DLL嵌套调用
#include <stdio.h>#include <stdlib.h>#include <malloc.h>#include <Windows.h>#include <string.h>#include "SplitDll.h"#include "ICTCLAS50.h"//#pragma comment(lib, "ICTCLAS50.lib")//入口地址typedef bool (*mICTCLAS_Init)(const char* pszInitDir);typedef int (*mICTCLAS_ParagraphProcess)(const char* sSentence,int nPaLen,char*pszResult, eCodeType Type,boolnPos);typedef unsigned int (*mICTCLAS_ImportUserDict)(const char* pszDictBuffer, const int nLength, eCodeType codeType);typedef LPICTCLAS_RESULT (*mICTCLAS_Para)(const char *sSentence,int nPaLen,int &rstCount, eCodeType mCode , bool bSet);typedef bool (*mICTCLAS_SetPOSmap)(int nPOSmap);typedef bool (*mICTCLAS_ResultFree)(LPICTCLAS_RESULT pRetVec);typedef bool (*mICTCLAS_Exit)();DICT_API int SplitPara( const char* strSrc, char* StrDes, bool bPos ){  HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");  if (hInstLibrary == NULL)  {    FreeLibrary(hInstLibrary);  }  mICTCLAS_Init mmICTCLAS_Init;  mICTCLAS_ParagraphProcess mmICTCLAS_ParagraphProcess;   mICTCLAS_Exit mmICTCLAS_Exit;  mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");  mmICTCLAS_ParagraphProcess = (mICTCLAS_ParagraphProcess)GetProcAddress(hInstLibrary, "ICTCLAS_ParagraphProcess");  mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");  if (!mmICTCLAS_Init(NULL))  {    printf("初始化错误");    return -1;  }  unsigned int nLen = strlen(strSrc);  char* Strtmp = 0;  Strtmp = (char*)malloc(nLen * 6);  int WordCount = mmICTCLAS_ParagraphProcess(strSrc, nLen, StrDes, CODE_TYPE_UTF8, bPos);  mmICTCLAS_Exit();  FreeLibrary(hInstLibrary);//释放资源!  return WordCount;}DICT_API int UpdataUserDict( const char* AddDict ){  HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");  if (hInstLibrary == NULL)  {    FreeLibrary(hInstLibrary);  }  mICTCLAS_Init mmICTCLAS_Init;  mICTCLAS_ImportUserDict mmICTCLAS_ImportUserDict;   mICTCLAS_Exit mmICTCLAS_Exit;  mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");  mmICTCLAS_ImportUserDict = (mICTCLAS_ImportUserDict)GetProcAddress(hInstLibrary, "ICTCLAS_ImportUserDict");  mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");  if (!mmICTCLAS_Init(NULL))  {    return -1;  }  return mmICTCLAS_ImportUserDict(AddDict, strlen(AddDict), CODE_TYPE_UNKNOWN);  mmICTCLAS_Exit();  FreeLibrary(hInstLibrary);}//DLL改为动态调用DICT_API ResultInfo* ProcessSenstence( const char* strSrc ){  HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");  if (hInstLibrary == NULL)  {    FreeLibrary(hInstLibrary);    return NULL;  }  mICTCLAS_Init mmICTCLAS_Init;  mICTCLAS_Para mmICTCLAS_Para;  mICTCLAS_SetPOSmap mmICTCLAS_SetPOSmap;  mICTCLAS_ResultFree mmICTCLAS_ResultFree;  mICTCLAS_Exit mmICTCLAS_Exit;  mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");  mmICTCLAS_Para = (mICTCLAS_Para)GetProcAddress(hInstLibrary, "ICTCLAS_ParagraphProcessA");  mmICTCLAS_SetPOSmap = (mICTCLAS_SetPOSmap)GetProcAddress(hInstLibrary, "ICTCLAS_SetPOSmap");  mmICTCLAS_ResultFree = (mICTCLAS_ResultFree)GetProcAddress(hInstLibrary, "ICTCLAS_ResultFree");  mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");  if (!mmICTCLAS_Init(NULL))  {    return NULL;  }//初始化  int nPaLen=strlen(strSrc);      int rstCount=0;  mmICTCLAS_SetPOSmap(ICT_POS_MAP_SECOND);  LPICTCLAS_RESULT stVec=mmICTCLAS_Para(strSrc, nPaLen,rstCount,CODE_TYPE_UNKNOWN,1);  /************************************************************************/  /* 0814modified,返回输出数组                                              */  /************************************************************************/  ResultInfo* mResult = (ResultInfo*)malloc(sizeof(ResultInfo));  if (mResult == NULL)    return NULL;    int NListSize_Pre = 0, NListSize_Non = 0,  VListSize = 0, VNListSize = 0;  //中间结构体,其中名词分两种,NWordList_Pre专有名词,地名,人名 NWordList_Non一般名词  pWord NWordList_Pre = (pWord)malloc(sizeof(Word));  pWord NWordList_Non = (pWord)malloc(sizeof(Word));  pWord VWordList = (pWord)malloc(sizeof(Word));  pWord VNWordList = (pWord)malloc(sizeof(Word));  if (NWordList_Pre == NULL && NWordList_Non == NULL && VWordList == NULL && VNWordList ==NULL)    return NULL;  //中间结构体,字符串预分配  NWordList_Pre->sWord = (char*)calloc(nPaLen, sizeof(char));  NWordList_Non->sWord = (char*)calloc(nPaLen, sizeof(char));  VWordList->sWord = (char*)calloc(nPaLen, sizeof(char));  VNWordList->sWord = (char*)calloc(nPaLen, sizeof(char));  //判断最终struct中是否为空  bool bN = false, bV = false, bVN = false;  //分词结构体的大小,分词结果LPICTCLAS_RESULT结构详见头文件    for (int i = 0; i < rstCount; i++)  {        static LPICTCLAS_RESULT lastWordInfo = &stVec[0];    if (stVec[i].iPOS == 24 || stVec[i].iPOS == 29 ||  stVec[i].iPOS == 27|| stVec[i].iPOS == 32|| stVec[i].iPOS == 21 )    {      if (stVec[i].iPOS != 21)//非n      {        //NListSize_Pre += (stVec[i].iLength + 1);        //处理非nlist        char* pAddWord = (char*)malloc(stVec[i].iLength+2);        memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);              pAddWord[stVec[i].iLength] = ' ';        pAddWord[stVec[i].iLength + 1] = '\0';            //printf("%s\n", pAddWord);        //NWordList_Pre->WordSize = NListSize_Pre;        if(strstr(NWordList_Pre->sWord, pAddWord) == NULL)          strcat(NWordList_Pre->sWord, pAddWord);         NWordList_Pre->WordSize = strlen(NWordList_Pre->sWord);        free(pAddWord);        bN = true;        //printf("%s\n", NWordList_Pre->sWord);      }      else//n      {                     //NListSize_Non += (stVec[i].iLength + 1);            //处理n        char* pAddWord = (char*)malloc(stVec[i].iLength+2);        memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);        pAddWord[stVec[i].iLength] = ' ';        pAddWord[stVec[i].iLength + 1] = '\0';             //printf("%s\n", pAddWord);        //NWordList_Non->WordSize = NListSize_Non;        if(strstr(NWordList_Non->sWord, pAddWord) == NULL)          strcat(NWordList_Non->sWord, pAddWord);         NWordList_Non->WordSize = strlen(NWordList_Non->sWord);        free(pAddWord);        bN = true;        // printf("%s\n", NWordList_Non->sWord);        //处理V+N的情况        if (lastWordInfo->iPOS == 68 || lastWordInfo->iPOS == 74 ||lastWordInfo->iPOS == 2)        {          //VNListSize +=(lastWordInfo->iLength + stVec[i].iLength + 1);          int tmpLen = lastWordInfo->iLength + stVec[i].iLength;          char* pAddWord = (char*)malloc(tmpLen + 2);          memcpy(pAddWord, &strSrc[lastWordInfo->iStartPos], tmpLen);          pAddWord[tmpLen] = ' ';          pAddWord[tmpLen+1] = '\0';          // printf("%s\n", pAddWord);          //VNWordList->WordSize = VNListSize;          if(strstr(VNWordList->sWord, pAddWord) == NULL)            strcat(VNWordList->sWord, pAddWord);           VNWordList->WordSize = strlen(VNWordList->sWord);          free(pAddWord);          bVN = true;          //printf("%s\n", VNWordList->sWord);        }      }    }    if (stVec[i].iPOS == 74 ||stVec[i].iPOS == 72 ||stVec[i].iPOS == 68)    {      //VListSize += (stVec[i].iLength + 1);      char* pAddWord = (char*)malloc(stVec[i].iLength+2);      memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);      pAddWord[stVec[i].iLength] = ' ';      pAddWord[stVec[i].iLength + 1] = '\0';       //printf("%s\n", pAddWord);      //VWordList->WordSize = VListSize;      if(strstr(VWordList->sWord, pAddWord) == NULL)        strcat(VWordList->sWord, pAddWord);       VWordList->WordSize = strlen(VWordList->sWord);      free(pAddWord);      bV = true;      //printf("%s\n", VWordList->sWord);    }    lastWordInfo = &stVec[i];        if (i == rstCount - 1)//最终结果    {      pWord NList = (pWord)malloc(sizeof(Word));      NList->sWord = (char*)calloc(2 * nPaLen,sizeof(char));            strcat(strcat(NList->sWord, NWordList_Pre->sWord), NWordList_Non->sWord);        strcat(strcat(NList->sWord, NWordList_Pre->sWord), NWordList_Non->sWord);       free(NWordList_Pre->sWord);      free(NWordList_Non->sWord);      free(NWordList_Pre);      free(NWordList_Non);      //NList->WordSize = NWordList_Pre->WordSize +NWordList_Non->WordSize;      NList->WordSize = strlen(NList->sWord);      mResult->NrWordList = NList;      mResult->VWordList = VWordList;      mResult->VNWordList = VNWordList;      if(!bN)      {        free(NList->sWord);        free(NList);        mResult->NrWordList = NULL;      }      if(!bV)      {        free(VWordList->sWord);        free(VWordList);        mResult->VWordList = NULL;      }      if(!bVN)      {        free(VNWordList->sWord);        free(VNWordList);        mResult->VNWordList = NULL;      }           }  }  mmICTCLAS_ResultFree(stVec);  mmICTCLAS_Exit();  FreeLibrary(hInstLibrary);  return mResult;  }DICT_API void FreeProcessResult( ResultInfo* pStruResult ){  if (pStruResult != NULL)  {    if (pStruResult->NrWordList != NULL)      if (pStruResult->NrWordList->sWord != NULL)        free(pStruResult->NrWordList->sWord);    free(pStruResult->NrWordList);    if (pStruResult->VWordList != NULL)      if(pStruResult->VWordList->sWord != NULL)        free(pStruResult->VWordList->sWord );    free(pStruResult->VWordList);    if (pStruResult->VNWordList != NULL)      if (pStruResult->VNWordList->sWord != NULL)        free(pStruResult->VNWordList->sWord);    free(pStruResult->VNWordList);    free(pStruResult);  }}

//最终导出来的函数就这三个,注意调用的时候使用动态调用就行了=_=

原创粉丝点击