CorpAct抽样模板

来源:互联网 发布:禅道linux安装 编辑:程序博客网 时间:2024/05/16 10:52

最近有个项目的抽样也是够恶心,原始数据表包含一张全的公司list(无重复公司)。交给Ops Team去根据它抽数。

接着对方返回一个excel 包含母list 和 多个data group sheet,每个data group里面就是抽到的,在指定时间段内的数据。

在被抽到的data group 中,总共抽取30个公司的数据。要求每个data group 都要random到两个记录。剩下的部分随机挑公司。

另外还要在过万的记录里面随机抽样30个公司,是没有任何数据的。去audit missing case。

各data group之间复制粘贴,还要考虑有些公司可能在各个data group都有数据。想来想去,觉得还是来个tool吧。

下图是拿到的raw data list样式


母list一万多条。在家测试共用去了26秒。每个页面

下图那些flag就是被抽中的了

下面上代码。

Option ExplicitSub Summarize()       Dim WrkSht As Worksheet    Dim ShtNew As Worksheet    Dim ShtCom As Worksheet    Dim Rng As Range       Dim k As Integer    Dim k1 As Integer    Dim k2 As Integer    Dim kk As Integer    Dim yy As Integer    Dim Row1 As Integer    Dim Zebra As Integer    Dim Flag As Integer    Flag = 1        Dim CID As String                Dim arrData    Dim dataCount As Integer    dataCount = 0    Dim i As Integer        ReDim arrData(1 To 1)            Dim d As Object    Set d = CreateObject("scripting.dictionary")                  Application.ScreenUpdating = False               Set ShtNew = Worksheets.Add(After:=Worksheets(Worksheets.Count))    ShtNew.Name = "AccuracyList"         'Accuracy List     For Each WrkSht In Worksheets                       If Not WrkSht Is ShtNew And WrkSht.Name <> "CompanyList" Then                               kk = WrkSht.Range("A1").CurrentRegion.Columns.Count                'MsgBox kk                yy = WrkSht.Range("A1").CurrentRegion.Rows.Count                'MsgBox yy                               For k = 1 To kk                                       If WrkSht.Cells(1, k).Value = "Company Id" Then                                                              WrkSht.Range(WrkSht.Cells(2, k), WrkSht.Cells(yy, k)).Copy ShtNew.Range("A" & ShtNew.Range("A56565").End(3).Row + 1)                        'note = note + yy                        Exit For                                            End If                                   Next k                           End If     Next        ShtNew.Cells(1, 1).Value = "Company Id"        ShtNew.Cells(1, 1).CurrentRegion.RemoveDuplicates Columns:=1, Header:=xlYes        ShtNew.Cells(1, 2).Value = "RAND"        yy = ShtNew.Range("A1").CurrentRegion.Rows.Count        For k = 2 To yy                ShtNew.Cells(k, 2).Value = Rnd            Next k        ShtNew.Range("A:B").Sort Columns(2), xlAscending, Header:=xlYes          'End of Accuracy List'Completeness List    Set ShtCom = Worksheets.Add(After:=Worksheets(Worksheets.Count))    ShtCom.Name = "CompleteList"        For Each WrkSht In Worksheets                If WrkSht.Name = "CompanyList" Then                                WrkSht.Range("A1").CurrentRegion.Copy ShtCom.Range("A1")                Exit For        End If    Next                kk = ShtCom.Range("A1").CurrentRegion.Columns.Count ' The total number of columns        For k = 1 To kk            If ShtCom.Cells(1, k).Value = "CompanyId" Then                k1 = k ' the column named Company Id                Exit For            End If                Next k                     For k = 1 To ShtNew.Range("A1").CurrentRegion.Columns.Count                If ShtNew.Cells(1, k).Value = "Company Id" Then            k2 = k ' the column named Company Id            Exit For        End If    Next        kk = ShtCom.Range("A1").CurrentRegion.Rows.Count ' The total number of rows    yy = ShtCom.Range("A1").CurrentRegion.Columns.Count 'The total number of columns        Cells(1, yy + 1).Value = "Sequence"                For Row1 = 2 To ShtNew.Range("A1").CurrentRegion.Rows.Count                        CID = ShtNew.Cells(Row1, k2).Value                                Set Rng = ShtCom.Range(ShtCom.Cells(1, k1), ShtCom.Cells(kk, k1)).Find(CID, lookat:=xlWhole)        If Not Rng Is Nothing Then           Cells(Rng.Row, yy + 1).Value = 1        End If    Next Row1       ShtCom.Range(Cells(1, 1), Cells(kk, yy + 1)).Sort Columns(yy + 1), xlDescending, Header:=xlYes        Range(Cells(2, yy + 1), Cells(Columns(yy + 1).End(xlDown).Row, yy + 1)).EntireRow.Delete        ShtCom.Columns(yy + 1).Delete        ShtCom.Cells(1, yy + 1).Value = "RAND"         kk = ShtCom.Range("A1").CurrentRegion.Rows.Count ' The total number of rows        For k = 2 To kk                Cells(k, yy + 1).Value = Rnd        Next k        ShtCom.Range(Cells(1, 1), Cells(kk, yy + 1)).Sort Columns(yy + 1), xlAscending, Header:=xlYes'CompleteList End'Accuracy Sampling For Each WrkSht In Worksheets                   If Not WrkSht Is ShtNew And WrkSht.Name <> "CompanyList" And Not WrkSht Is ShtCom Then                                                               kk = WrkSht.Range("A1").CurrentRegion.Columns.Count                'MsgBox kk                yy = WrkSht.Range("A1").CurrentRegion.Rows.Count                'MsgBox yy                                For k = 1 To kk                                If WrkSht.Cells(1, k).Value = "Company Id" Then                                    k1 = k                                    Exit For                                End If                                                                            Next k                                If k = kk Then                                        MsgBox "No Company Id Column!"                    Exit Sub                                End If                                               If yy > 3 Then                                        WrkSht.Cells(1, kk + 1).Value = "RAND"                                                For k = 2 To yy                                                                            WrkSht.Cells(k, kk + 1) = Rnd                                                Next k                                                WrkSht.Range(WrkSht.Cells(1, 1), WrkSht.Cells(yy, kk + 1)).Sort WrkSht.Columns(kk + 1), xlAscending, Header:=xlYes                                                With WrkSht                                                                                            dataCount = dataCount + 1                                ReDim Preserve arrData(1 To dataCount)                                arrData(dataCount) = .Cells(2, k1).Value                                'MsgBox arrData(dataCount)                                dataCount = dataCount + 1                                ReDim Preserve arrData(1 To dataCount)                                arrData(dataCount) = .Cells(3, k1).Value                                'MsgBox arrData(dataCount)                        End With                                                 ElseIf yy = 3 Then                                                With WrkSht                                                  dataCount = dataCount + 1                                ReDim Preserve arrData(1 To dataCount)                                arrData(dataCount) = .Cells(2, k1).Value                                dataCount = dataCount + 1                                ReDim Preserve arrData(1 To dataCount)                                arrData(dataCount) = .Cells(3, k1).Value                        End With                                         ElseIf yy = 2 Then                                         With WrkSht                                dataCount = dataCount + 1                                ReDim Preserve arrData(1 To dataCount)                                arrData(dataCount) = .Cells(2, k1).Value                        End With                 Else                                MsgBox "Error"                                Exit Sub                                                End If                                                                                End If          NextFor i = 1 To UBound(arrData)        d(arrData(i)) = d(arrData(i)) + 1Next iZebra = 30 - d.CountReDim Preserve arrData(1 To 31)If Zebra <> 0 Then                        k = 1                                                While Zebra >= 0                                                                                                      arrData(d.Count + 1) = ShtNew.Cells(k + 1, 1)                                                                                   For i = 1 To UBound(arrData)                                                        d(arrData(i)) = d(arrData(i)) + 1                                                            Next i                                                        k = k + 1                                                        Zebra = 30 - d.Count                                                                             WendEnd IfShtNew.Range("D2").Resize(UBound(arrData), 1).Value = WorksheetFunction.Transpose(arrData)'Accuracy Sampling Complete'Pick them up For Each WrkSht In Worksheets                   If Not WrkSht Is ShtNew And WrkSht.Name <> "CompanyList" And Not WrkSht Is ShtCom Then                                        kk = WrkSht.Range("A1").CurrentRegion.Columns.Count ' The total number of columns                yy = WrkSht.Range("A1").CurrentRegion.Rows.Count ' The total number of rows                                WrkSht.Cells(1, kk + 1).Value = "Flag"                    For k = 1 To kk                        If ShtCom.Cells(1, k).Value = "Company Id" Then                            k1 = k ' the column named Company Id                            Exit For                        End If                                        Next k                                For i = 1 To UBound(arrData)                                        For k = 2 To yy                                                    If WrkSht.Cells(k, k1).Value = arrData(i) Then                                                                                        WrkSht.Cells(k, kk + 1).Value = Flag                            End If                                                    Next k                                                                        Flag = Flag + 1                Next i                        End If                  WrkSht.Range(WrkSht.Cells(1, 1), WrkSht.Cells(yy, kk + 1)).Sort WrkSht.Columns(kk + 1), xlDescending, Header:=xlYes                Next'End of pick-up                                                                Set d = Nothing                                   Application.ScreenUpdating = True   End Sub

结束语:再一次的感谢组小牛同学,当我告诉他我用union range法去实现删除区域时,他提醒我应该去尝试排序再删除。

实验表明,同样家里的土冒机器的环境下,union_range法39秒,排序删除法19秒。

另外还有就是使用union 的时候,里面不可以有设置为nothing的区块或者没有定义的区块。

0 0