整理一下,这些天我都崩溃了,怎样读取word

来源:互联网 发布:曲线拟合算法 编辑:程序博客网 时间:2024/05/26 02:53

这些天崩溃了,各种煎熬。

 

用vc去读word,本来就不是c的专长。

最开始用的方法是ole com的一个方法,把word的内容写到txt中,

 

void DocToTxt(BSTR bstrOpenFile, BSTR bstrSaveFile){    // ******************* Declare Some Variables ********************    // Variables that will be used and re-used in our calls    DISPPARAMS        m_dpNoArgs = {NULL, NULL, 0, 0};    VARIANT            m_vResult;    OLECHAR FAR*    m_szFunction;    // IDispatch pointers for Word's objects    IDispatch*        m_pDispDocs;          //Documents collection    IDispatch*        m_pDispActiveDoc;      //ActiveDocument object    // DISPID's    DISPID            m_dispid_Docs;        //Documents property of Application object    DISPID            m_dispid_ActiveDoc;   //ActiveDocument property of Application    DISPID            m_dispid_SaveAs;      //SaveAs method of the Document object    DISPID            m_dispid_Quit;        //Quit method of the Application object    DISPID            m_dispid_Open;          //Open method of the Application object    BSTR            m_bstrEmptyString ;    // ******************** Start Automation ***********************    //Initialize the COM libraries    ::CoInitialize(NULL);    // Create an instance of the Word application and obtain the pointer    // to the application's IDispatch interface.    CLSID    m_clsid;    CLSIDFromProgID(L"Word.Application.12", &m_clsid);    IUnknown*    m_pUnk;    HRESULT m_hr = ::CoCreateInstance( m_clsid, NULL, CLSCTX_SERVER,                                       IID_IUnknown, (void**) &m_pUnk);    IDispatch*    m_pDispApp;    m_hr = m_pUnk->QueryInterface( IID_IDispatch, (void**)&m_pDispApp);    // Get IDispatch* for the Documents collection object    m_szFunction = OLESTR("Documents");    m_hr = m_pDispApp->GetIDsOfNames (IID_NULL, &m_szFunction, 1,                                      LOCALE_USER_DEFAULT, &m_dispid_Docs);    m_hr = m_pDispApp->Invoke (m_dispid_Docs, IID_NULL, LOCALE_USER_DEFAULT,                               DISPATCH_PROPERTYGET, &m_dpNoArgs, &m_vResult,                               NULL, NULL);    m_pDispDocs = m_vResult.pdispVal;    // Query id of method open    m_szFunction = OLESTR("Open");    m_hr = m_pDispDocs->GetIDsOfNames(IID_NULL, &m_szFunction,1,                                      LOCALE_USER_DEFAULT, &m_dispid_Open);    // Prepare parameters for open method    VARIANT        vArgsOpen[6];    DISPPARAMS    dpOpen;    dpOpen.cArgs        = 6;    dpOpen.cNamedArgs   = 0;    dpOpen.rgvarg        = vArgsOpen;    VARIANT        vFalse, vTrue;    vFalse.vt            = VT_BOOL;    vFalse.boolVal        = FALSE;    vTrue.vt            = VT_BOOL;    vTrue.boolVal        = TRUE;    m_bstrEmptyString = ::SysAllocString(OLESTR("")) ;    vArgsOpen[5].vt            = VT_BSTR;    vArgsOpen[5].bstrVal    = bstrOpenFile;    vArgsOpen[4]            = vFalse;    vArgsOpen[3]            = vTrue;    vArgsOpen[2]            = vFalse;    vArgsOpen[1].vt            = VT_BSTR;    vArgsOpen[1].bstrVal    = m_bstrEmptyString;    vArgsOpen[0].vt            = VT_BSTR;    vArgsOpen[0].bstrVal    = m_bstrEmptyString;    //Execute open method    m_hr=m_pDispDocs->Invoke(m_dispid_Open,IID_NULL,LOCALE_USER_DEFAULT,                             DISPATCH_METHOD,&dpOpen,NULL,NULL,NULL);    //Query activedocument interface    m_szFunction = OLESTR("ActiveDocument");    m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,                                     LOCALE_USER_DEFAULT,&m_dispid_ActiveDoc);    m_hr = m_pDispApp->Invoke(m_dispid_ActiveDoc,IID_NULL,                              LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET,                              &m_dpNoArgs,&m_vResult,NULL,NULL);    m_pDispActiveDoc = m_vResult.pdispVal;    //Prepare arguments for save as .txt    VARIANT            vArgsSaveAs[11];    DISPPARAMS        dpSaveAs;    dpSaveAs.cArgs            =   11;    dpSaveAs.cNamedArgs        =   0;    dpSaveAs.rgvarg            =   vArgsSaveAs;    vArgsSaveAs[10].vt        =   VT_BSTR;    vArgsSaveAs[10].bstrVal =    bstrSaveFile;        //Filename    vArgsSaveAs[9].vt        =   VT_I4;//VT_I4;    vArgsSaveAs[9].lVal        = 7;//wdFormatUnicodeText;// 7;//2;                  //FileFormat(wdFormatText = 2)    vArgsSaveAs[8]            =   vFalse;                //LockComments    vArgsSaveAs[7].vt        =   VT_BSTR;    vArgsSaveAs[7].bstrVal  =   m_bstrEmptyString;  //Password    vArgsSaveAs[6].vt        =   VT_BOOL;    vArgsSaveAs[6].boolVal  =   TRUE;               //AddToRecentFiles    vArgsSaveAs[5].vt        =   VT_BSTR;    vArgsSaveAs[5].bstrVal  =   m_bstrEmptyString;  //WritePassword    vArgsSaveAs[4]            =   vFalse;             //ReadOnlyRecommended    vArgsSaveAs[3]            =   vFalse;             //EmbedTrueTypeFonts    vArgsSaveAs[2]            =   vFalse;             //SaveNativePictureFormat    vArgsSaveAs[1]            =   vFalse;             //SaveFormsData    vArgsSaveAs[0]            =   vFalse;                //SaveAsOCELetter    // Query and execute SaveAs method    m_szFunction = OLESTR("SaveAs");    m_hr = m_pDispActiveDoc->GetIDsOfNames(IID_NULL,&m_szFunction,1,                                           LOCALE_USER_DEFAULT,&m_dispid_SaveAs);    m_hr = m_pDispActiveDoc->Invoke(m_dispid_SaveAs, IID_NULL,                                    LOCALE_SYSTEM_DEFAULT,DISPATCH_METHOD,//LOCALE_USER_DEFAULT                                    &dpSaveAs,NULL,NULL,NULL);    // Invoke the Quit method    m_szFunction = OLESTR("Quit");    m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,                                     LOCALE_USER_DEFAULT, &m_dispid_Quit);    m_hr = m_pDispApp->Invoke(m_dispid_Quit, IID_NULL, LOCALE_USER_DEFAULT,                              DISPATCH_METHOD, &m_dpNoArgs, NULL, NULL, NULL);//cout << m_bstrEmptyString << endl;cout << (char*)m_bstrEmptyString << endl;    //Clean-up    ::SysFreeString(bstrOpenFile) ;    ::SysFreeString(bstrSaveFile) ;    ::SysFreeString(m_bstrEmptyString) ;    m_pDispActiveDoc->Release();    m_pDispDocs->Release();    m_pDispApp->Release();    m_pUnk->Release();    ::CoUninitialize();}


int main(int argc, char* argv[])
{
DocToTxt(::SysAllocString(OLESTR("D:\\code\\data\\c2.docx")), ::SysAllocString(OLESTR("D:\\to.txt")));int main(int argc, char* argv[])
{
DocToTxt(::SysAllocString(OLESTR("D:\\code\\data\\c2.docx")), ::SysAllocString(OLESTR("D:\\to.txt")));int main(int argc, char* argv[])
{
DocToTxt(::SysAllocString(OLESTR("D:\\code\\data\\c2.docx")), ::SysAllocString(OLESTR("D:\\to.txt")));

}

 

但是这个方法另存为的是ANSI编码的txt,会导致word中的韩文等字符丢失。

看不懂这些代码,不知道怎么修改参数。

 

又尝试了下msword.cpp里的方法

CString CWordOffice::getText(){m_wdSel.WholeStory();//全部选择return m_wdSel.GetText();}CString Selection::GetText(){CString result;InvokeHelper(0x0, DISPATCH_PROPERTYGET, VT_BSTR, (void*)&result, NULL);return result;}


调试发现,在保存到CString中的时候,韩文又变成?了

试了下其他方法

//4-13
char* Selection::GetText(char* result)
{
 //CString result;

// LPSTR
 wchar_t tmp[10240] = {0};
 LPWSTR result1 = tmp;
 InvokeHelper(0x0, DISPATCH_PROPERTYGET, VT_LPWSTR, (void*)tmp, NULL);
 return result;
}

 

不行,不懂其中的原理,没有方向。

 

ole com 这些东西不懂,是个很大的问题

http://msdn.microsoft.com/en-us/library/Aa155776

里面有写word的例子,但是没有读的,看不懂

 

后来想改变系统设置,让txt默认就是UTF8编码的,找了个方法,

生成一个注册表文件cc.reg

Windows Registry Editor Version 5.00

[HKEY_CLASSES_ROOT\.txt\ShellNew]
"FileName"="d:\\tmp.txt"

其中tmp.txt是一个模板空的txt,编码手工设置为UTF8

 

这样确实可以修改默认设置了,手工生成记事本的时候,就是UTF8格式的

但是再调用“”另存为的那个程序,还是ANSI格式的txt

这个方法又失败了

 

各种上网找,求助,都没有找到解决方案。

 

据说c#里面读写操作word很方便,字符集转换也很方便。

实在不行,就用c#去实现word到UTF8格式txt文件的方法,

然后在C++程序中去调用,

尺有所长,寸有所短

 

 

原创粉丝点击