python MRJob

来源:互联网 发布:单片机控制继电器电路 编辑:程序博客网 时间:2024/06/05 10:18
#-*- coding: utf8 -*-import jiebafrom mrjob.job import MRJobimport reword_re = re.compile(u"[\u4E00-\u9FA5]+")class TestMrJob(MRJob):    def mapper(self, _, line):        print type(line)        for word in word_re.findall(u"%s" % line):        #for word in jieba.cut(line):            yield word.lower(), 1    def reducer(self, word, counts):        yield word, sum(counts)if __name__ == '__main__':    TestMrJob.run()        

0 0
原创粉丝点击