字符串处理---找到第一个不重复的…

来源：互联网发布：局域网上网监控软件编辑：程序博客网时间：2024/06/03 14:25

实际上字符串的处理一直是各种测试的难点。为什么叫难点呢？因为很多字符串处理，会因为处理方法的不同而导致内存和时间的花费差异巨大。所以说这也是别人经常说的一个考点。

今天博主也随意写了一个字符串的处理问题。

问题描述：

编写一个高效的函数，找到字符串中首个非重复的字符。例如：“tatol”首个非重复的字符时a，“teeter”中首个非重复的字符时r。

首先我来说一下一般情况下都能想到的处理方法：遍历，然后每个字符和其他的字符进行比较，这样我们就能找到首个非重复的字符。

但是这样做的时间复杂度是多少呢？n个字符，每个字符比较（n-1）次，那么时间复杂度就是n*n。这种复杂度还是挺坑爹的，因为在字符串处理中一般能轻易想到的方法都是各种次方的时间复杂度，真是蛋疼啊。

实际上，就博主各种的阅读，发现，其实各种什么单个字符的查找之类都可以使用接个数组进行解决。虽然说空间复杂度增加了，但是时间复杂度减少了很多。

好了，废话不多说，我来带大家好好看看代码吧。

// blog_find_first_unique_char.cpp : 定义控制台应用程序的入口点。

#include "stdafx.h"

#include

#include // for uintmax_t

#include

using namespace boost;

//这里我使用的是自己从boost的progress_timer的修改，具体内容大家可以阅读相关书籍，本次我只是讲一下涉及到的内容

class new_progress_timer : public timer, privatenoncopyable//公有继承timer，这是定时器类，noncopyable需要私有继承，说明这个类的对象不可赋值不可拷贝

{

public:

explicit new_progress_timer( std::ostream& os = std::cout)//这里面explicit说的是这个类只能显式调用，注意这里的ostream的引用。输入输出也是不可拷贝不可赋值的

// osis hint; implementation may ignore, particularly in embeddedsystems

:timer(), noncopyable(), m_os(os) {}//这里就是各种初始化了

~new_progress_timer()

{

// A) Throwing anexception from a destructor is a Bad Thing.

// B) The progress_timerdestructor does output which may throw.

// C) A progress_timer isusually not critical to the application.

// Therefore, wrap theI/O in a try block, catch and ignore all exceptions.

try

{

//use istream instead of ios_base to workaround GNU problem (GregChicares)

//这里看起来写了很多io处理，其实大家只要明白这是一种io_state_save就行了，就是保存之前的io状态，以免由于后面的修改导致io状态的丢失

std::istream::fmtflags old_flags = m_os.setf(std::istream::fixed,

std::istream::floatfield);

std::streamsize old_prec = m_os.precision( 5);//这个地方输出的是计时时间的精度,小数点后面0-10之间

m_os<< elapsed() << " s\n" // "s" is System Internationald'Unites std

<<std::endl;

m_os.flags( old_flags );

m_os.precision( old_prec );

}

catch (...) {} // eatany exceptions

} // ~progress_timer

private:

std::ostream & m_os;

};

//这是第一种方法，用的就是n*n算法复杂度的，两个循环就可以解决的比较

bool method_1(char *source, size_tlength)//这里我要说一下，对于C风格的字符串和数组，大家最好是直接把长度传进来。虽然说，char*可以通过strlen得到长度，但是我个人的风格就是传长度。因为int，double之类的数组，没有了传递的length，就是真的是不知道长度了。

{

new_progress_timer tm;//定义一个无参的对象，这个对象会在析构时输出时间

for(size_t tempi=0;tempi

{

size_t tempj=0;

for(;tempj

{

if(tempi==tempj) continue;

else if(*(source+tempj)==*(source+tempi)) break;

}

if(tempj==length)//如果比较了全部字符串都没有找到重复的，就可以认为是首个唯一的

{

//std::cout<<"timeelapaed_min"<<tm.elapsed_min()<<std::endl;

std::cout<<"first unique char:"<<*(source+tempi)<<std::endl;

std::cout<<"time is :"<<std::endl;

return true;

}

return false;

}

//第二种方法主要的思路是：

//首先声明两个额外的数组，一个存储每个字符出现的次数，一个存储每个字符最后出现的额位置(假设只有26个字符)。遍历一遍，记下每个字符的次数，找出出现一次的数组下表。然后在比较所有出现一次数组的位置大小，最小位置的就是首次出现唯一字符。

bool method_2(char *source, size_t length)

{

new_progress_timer tm;

int exist_times[26]={0};//存储每个字符出现的次数

int char_pos[26]={0};//存储每个字符最后出现的位置

for(size_t tempi=0;tempi

{

++exist_times[*(source+tempi)-'a'];

char_pos[*(source+tempi)-'a']=tempi;

}

int pos=0x7fffffff;//将初始化的位置声明为int max

for(int tempi=0;tempi<26;++tempi)

{

if(exist_times[tempi]==1)

{

if(char_pos[tempi]

}

if(pos!=0xefffffff)

{

//std::cout<<"timeelapaed_min"<<tm.elapsed_min()<<std::endl;

std::cout<<" first unique char:"<<*(source+pos)<<std::endl;

std::cout<<"time is: "<<std::endl;

return true;

}

return false;

}

int main()

{

using namespace std;

char *source=

"xiaozuonanjingzhendehaoshuaiaasdawfregadfasfadgadgaefadgadtayadfvsagaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaadagadtfareafdfdaetaxdfaddsfastasfasdasdasdaswfasdfdgrtrysdfadflasdasfdsgfysgjbishugbsduvbgudsbgysdbygbsygbysbgysbgyp\0";

//string str(source);

size_t const length=273;

if(!method_1(source,length))cout<<"unique char does notexist"<<endl;

if(!method_2(source,length))cout<<"unique char does notexist"<<endl;

return 0;

}

运行结果：

阅读全文

0 0