python MRJob

来源:互联网 发布:你见过最恐怖的事 知乎 编辑:程序博客网 时间:2024/06/05 04:02
#!-*- coding: utf8 -*-import reimport sysimport jiebafrom mrjob.job import MRJobword_re = re.compile(u"[\u4E00-\u9FA5]+")class TestMrJob(MRJob):    def mapper(self, _, line):        ""        for word in word_re.findall(line.decode("utf8")):            for new_word in jieba.cut(word):                yield new_word.lower(), 1    def reducer(self, word, counts):        yield word, sum(counts)if __name__ == '__main__':    #此处注意:    data = TestMrJob.run()    sys.argv.append("data/2")    data = TestMrJob.run()    print data
0 0