python根据key对流做join

来源:互联网 发布:php学生成绩管理系统 编辑:程序博客网 时间:2024/05/20 11:23
<span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);"></span><pre name="code" class="python">def keyjoin(instreams,keyindexs=None,default=None):    '''把各流进行关联,instreams是流数组,每个流中的数据类型为list或tuple    indexss是用于把流数据关联在一起的字段索引,每个流对应一个索引数组'''    if not keyindexs:        keyindexs = [ () for s in instreams ]    if not default:        default = [ () for s in instreams ]    data = [ () for s in instreams ]    livings = set(list(range(len(instreams))))    togets = list(range(len(instreams)))    while True:        for toget in togets:            if toget in livings:                try:                    data[toget] = instreams[toget].next()                except:                    livings.remove(toget)        if not livings:            raise StopIteration        keys = [[record[index] if record else () for index in keyindexs[i]] for i,record in enumerate(data)]        key = min(keys)        result = [data[i] if keys[i]==key else default[i] for i,record in enumerate(data)]        yield result        data = [data[i] if keys[i]!=key else () for i,record in enumerate(data)]        togets = [i if keys[i]==key else () for i,record in enumerate(data)]


<span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">>>> stream1</span>
[['il', 10], ['ls', 12]]
>>> stream2

[['ls', 22.199999999999999], ['zs', 21.100000000000001]]

>>> for data in keyjoin(instreams=[iter(stream1),iter(stream2)],keyindexs=[[0],[0]]):
...   print data
... 
[['il', 10], ()]
[['ls', 12], ['ls', 22.199999999999999]]
[(), ['zs', 21.100000000000001]]

0 0
原创粉丝点击