zhuge

来源:互联网 发布:mac版股票软件 编辑:程序博客网 时间:2024/06/05 15:44
# -*- coding: utf-8 -*-"""Created on Fri Dec  1 18:08:06 2017@author: Administrator"""def popo(dmi,columns):    most_area=Counter(dmi[columns]).most_common(1)    if most_area[0][1]>=2:        dmi2=dmi[dmi[columns]==most_area[0][0]]        print('dmi=',dmi)        #dmi2=dmi1        dmiy=lower_dimention(dmi2)        ratio2=(dmiy.max()-dmiy.min())/dmiy.mean()        ww1=round(k_means(    dmiy   ),2)        ww2=round(k_means(dmi2[columns]),2)        print('dmi2=',dmi2)        if ratio2<0.1 and len(dmiy)>1:            return [ww1,ww2,10]            print('area_filter',dd)        else:            return [ww1,ww2,len(dmiy)]dd=numpy.array([[None,None,None],[None,None,None],[None,None,None]])dd[0]=popo(dmi2,'room')def filter_data(dmi):    dd=numpy.array([[None,None,None],[None,None,None],[None,None,None]])    #dmi_values=dmi['total_price']    #ratio1=(dmi_values.max()-dmi_values.min())/dmi_values.mean()    #cred1=set(dmi['url'])    #avg1=dmi['total_price']/dmi['area']    dmix=lower_dimention(dmi)    ratio1=(dmix.max()-dmix.min())/dmix.mean()    vv1=round(k_means(    dmix   ),2)    vv2=round(k_means(dmi['area']),2)    print('x'*100)    print(dmi)    if ratio1<0.1 and len(dmix)>1:         dd[0]=[vv1,vv2,10]        print('room_filter',dd)    else:        dd[0]=[vv1,vv2,len(dmix)]        most_area=Counter(dmi['area']).most_common(1)        if most_area[0][1]>=2:            dmi2=dmi[dmi['area']==most_area[0][0]]            print('dmi=',dmi)            #dmi2=dmi1            dmiy=lower_dimention(dmi2)            ratio2=(dmiy.max()-dmiy.min())/dmiy.mean()            ww1=round(k_means(    dmiy   ),2)            ww2=round(k_means(dmi2['area']),2)            print('dmi2=',dmi2)            #dmi2_values=dmi2['total_price']            #ratio2=(dmi2_values.max()-dmi2_values.min())/dmi2_values.mean()            #cred2=set(dmi2['url'])            if ratio2<0.1 and len(dmiy)>1:                #avg1=dmi['total_price']/dmi['area']                #avg2=dmi2['total_price']/dmi2['area']                #ww1=round(k_means(    avg2   ),2)                #ww2=round(k_means(dmi['area']),2)                dd[1]=[ww1,ww2,10]                print('area_filter',dd)            else:                dd[1]=[ww1,ww2,len(dmiy)]                print('****************************************'*10)                #print(dmi2)                #print(dmi2['floor'])                most_floor=Counter(dmi2['floor']).most_common(1)                #dmi2['floor']=dmi2['floor'].fillna(most_floor[0][0])                if most_floor[0][1]>=2:                    dmi3=dmi2[dmi2['floor']==most_floor[0][0]]                      #dmi2=dmi1                    print('dmi3=',dmi3)                    dmiz=lower_dimention(dmi3)                    ratio3=(dmiz.max()-dmiz.min())/dmiz.mean()                    xx1=round(k_means(    dmiz    ),2)                    xx2=round(k_means(dmi3['area']),2)                    #dmi3=dmi2[dmi2['floor']==most_floor[0][0]]                     #dmi3_values=dmi3['total_price']                    #ratio3=(dmi3_values.max()-dmi3_values.min())/dmi3_values.mean()                    #cred3=set(dmi2['url'])                    if ratio3<0.1 and len(dmiz)>1:                        #avg3=dmi3['total_price']/dmi3['area']                        #xx1=round(k_means(    dimz  ),2)                        #xx2=round(k_means(dmi['area']),2)                        dd[2]=[xx1,xx2,10]                        print(dd)                    else:                        dd[2]=[xx1,xx2,len(dmiz)]    return dd ddr=filter_data(dmi2)