海量数据批量插入案例

来源:互联网 发布:东北大学网络教育好吗 编辑:程序博客网 时间:2024/05/18 01:47

建立测试数据库(BulkTestDB)、主表(BulkTestMain)、从表(BulkTestDetail)

[sql] view plaincopyprint?
  1. --Create DataBase
  2. create database BulkTestDB;
  3. go
  4. use BulkTestDB;
  5. go
  6. --Create Table
  7. Create table BulkTestMain(
  8. Id int primarykey,
  9. GuidId long,--辅助的唯一标识
  10. Batch long,--导入的批次标识
  11. Name nvarchar(32)
  12. go
  13. Create table BulkTestDetail(
  14. Id int primarykey,
  15. PId int,
  16. Lesson nvarchar(32)
  17. go


代码及其分析

[csharp] view plaincopyprint?
  1. public void TestMain()
  2. {
  3. using (SqlConnection connection =new SqlConnection("你的链接字符串"))
  4. {
  5. connection.Open();
  6. SqlTransaction transaction = connection.BeginTransaction("Transaction1");
  7. DataTable dtTestMain= GetTableSchema("BulkTestMain");//构建BulkTestMain表结构
  8. DataTable dtTestDetail = GetTableSchema("BulkTestDetail");//构建BulkTestDetail表结构
  9. Guid Batch = Guid.NewGuid();//插入的批次,为后面查询dtTestMainTmp 做条件
  10. for (int i = 0; i < 1000000; i++)//测试100w条数据
  11. {
  12. DataRow dr= dtTestMain.NewRow();
  13. Guid newGuid = Guid.NewGuid();
  14. dr["_GuidId"] = newGuid;
  15. dr["_Batch"] = Batch;
  16. dr["_UserName"] = "测试" + i.ToString();
  17. dtTestMain.Rows.Add(dr);
  18. for(int j = 0;j<10;j++)//给从表每次插入10条数据
  19. {
  20. DataRow dr1 = dtTestDetail.NewRow();
  21. dr1["_GuidId"]= newGuid;
  22. dr1["_Lesson"]="课程"+j.ToString();
  23. dtTestDetail.Rows.Add(dr1);
  24. }
  25. //这样做的目的,让主表与从表可以临时通过GuidId关联起来
  26. }
  27. BulkToDB(dtTestMain, "BulkTestMain", connection, transaction);//先让BulkTestMain插入了大量的数据,注意这些数据是临时的,在SqlTransaction提交之前,查询时要用with(nolock)
  28. DataSet dtTestMainTmp = GetNewImportData(Batch.ToString());//好吧,我们来查询下,刚才大量插入的10w条数据,这里只需要查询标识的2列字段
  29. Dictionary<string, long> dicGuidToID = new Dictionary<string,long>();
  30. foreach (DataRow dr in dtTestMainTmp.Tables[0].Rows)
  31. {
  32. dicGuidToID.Add(dr[1].ToString(), Convert.ToInt64(dr[0]));
  33. }//dicGuidToID:guid字段与插入的主表ID字段关联起来成字典,用字典是为了访问起来效率(为什么获取字典key的值效率很高,有兴趣的可以去研究“散列表”的概念)
  34. foreach (DataRow dr in dtTestDetail.Rows)//现在给dtTestDetail的PId字段赋值(PId字段与主表Id外键关联)
  35. {
  36. dr["_PId"] = dicGuidToID[dr["_GuidId"].ToString()].ToString();
  37. }
  38. dtTestDetail.Columns.Remove("_GuidId");//移除dtTestDetail的GuidId字段,使它与数据库列匹配
  39. BulkToDB(dtTestDetail,"BulkTestDetail",connection, transaction);//给从表插入数据
  40. transaction.Commit();
  41. connection.Close();
  42. }
  43. }
  44. /// <summary>
  45. /// 根据批次Batch获取导进来的临时数据
  46. /// </summary>
  47. /// <returns></returns>
  48. public static DataSet GetNewImportData(string batch)
  49. {
  50. StringBuilder strSql = new StringBuilder();
  51. strSql.Append("SELECT [Id],[GuidId]")
  52. .Append(" FROM ContactInfo WITH (NOLOCK) WHERE Batch=@batch");
  53. SqlParameter[] parameters = {
  54. new SqlParameter("@Batch", SqlDbType.BigInt){Value = batch}
  55. };
  56. DataSet ds = SqlHelper.ExecuteDataset(strSql.ToString(), parameters);
  57. return ds;
  58. }
  59. public staticvoid BulkToDB(DataTable dtSource, string TableName,SqlConnection connection, SqlTransaction transaction)
  60. {
  61. using (SqlBulkCopy sqlBulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.KeepIdentity, transaction))
  62. {
  63. sqlBulkCopy.DestinationTableName = TableName;//要插入数据的表的名称
  64. sqlBulkCopy.BatchSize = dtSource.Rows.Count;//数据的行数
  65. List<SqlBulkCopyColumnMapping> mpList = getMapping(TableName);//获取表映射关系
  66. foreach (SqlBulkCopyColumnMapping mpin mpList)
  67. {
  68. sqlBulkCopy.ColumnMappings.Add(mp);
  69. }
  70. if (dtSource != null && dtSource.Rows.Count != 0)
  71. {
  72. sqlBulkCopy.WriteToServer(dtSource);//插入数据
  73. }
  74. }
  75. }
  76. public static List<SqlBulkCopyColumnMapping> getMapping(string TableName)
  77. {
  78. List<SqlBulkCopyColumnMapping> mpList = new List<SqlBulkCopyColumnMapping>();
  79. switch(TableName)
  80. {
  81. case "BulkTestMain":{
  82. mpList.Add(new SqlBulkCopyColumnMapping("_Id","Id"));
  83. mpList.Add(new SqlBulkCopyColumnMapping("_GuidId","GuidId"));
  84. mpList.Add(new SqlBulkCopyColumnMapping("_Batch","Batch"));
  85. mpList.Add(new SqlBulkCopyColumnMapping("_UserName","UserName"));
  86. }break;
  87. case "BulkTestDetail":{
  88. mpList.Add(new SqlBulkCopyColumnMapping("_Id","Id"));
  89. mpList.Add(new SqlBulkCopyColumnMapping("_PId","PId"));
  90. mpList.Add(new SqlBulkCopyColumnMapping("_Lesson","Lesson"));
  91. }break;
  92. }
  93. return mpList;
  94. }
  95. private static DataTable GetTableSchema(string TableName)
  96. {
  97. DataTable dataTable = new DataTable();
  98. switch(TableName)
  99. {
  100. case "BulkTestMain" :{
  101. dataTable.Columns.AddRange(new DataColumn[] {
  102. new DataColumn("_Id",typeof(Int32)),
  103. new DataColumn("_GuidId",typeof(Int64)),
  104. new DataColumn("_Batch",typeof(Int64)),
  105. new DataColumn("_UserName",typeof(String))
  106. });}break;
  107. case "BulkTestDetail":{
  108. dataTable.Columns.AddRange(new DataColumn[] {
  109. new DataColumn("_Id",typeof(Int32)),
  110. new DataColumn("_PId",typeof(Int32)),
  111. new DataColumn("_GuidId",typeof(Int64)),
  112. new DataColumn("_Lesson",typeof(String))});
  113. }break;
  114. }
  115. return dataTable;
  116. }