C#多线程处理文件的简单例子
来源:互联网 发布:乱码修复软件 编辑:程序博客网 时间:2024/06/07 03:54
using System;using System.Collections;using System.Collections.Generic;using System.Diagnostics;using System.IO;using System.Linq;using System.Text;using System.Text.RegularExpressions;using System.Threading;using System.Threading.Tasks;namespace vs2012csharp{ class Program { ArrayList fileList; static int ThreadNum = 3; static string ProposalExtractLessInfo(string inputfile, string outputfile1, string outputfile2) { string paperText = File.ReadAllText(inputfile, Encoding.Default); string strRegexProposer = @"申\s+请\s+人:(?<Proposer>\w+)\s+[^\r\n]+[\r\n]+依托单位:(?<Proposer_Org>\w+)"; Regex regProposer = new Regex(strRegexProposer, RegexOptions.Singleline); Match matchProposer = regProposer.Match(paperText); string strRegexExpCode = @"申请代码\s+((?:[A-Z\d]+:\w+\s+)+)"; Regex regExpCode = new Regex(strRegexExpCode, RegexOptions.Singleline); Match matchCode = regExpCode.Match(paperText); //Console.WriteLine(matchCode.Groups[1].Value); //Console.WriteLine(matchCode.Groups[2].Value); string[] codeDesArr = matchCode.Groups[1].Value.Split('\r', '\n'); int codeNum = 0; string strCodes = ""; foreach (string aCodeDes in codeDesArr) { if (Regex.IsMatch(aCodeDes, @"\w+:\w+")) { codeNum++; strCodes += Regex.Replace(aCodeDes, @":\w+$", "") + "\n"; //Console.WriteLine("#####code####:" + aCode); } } //string result = matchCode.Groups[1].Value + "#" + matchCode.Groups[2].Value + "#"; //string strRegexExp = @"中\s+文\s+关\s+键\s+词\r?\n(.*)\r?\n"; string strRegexExp = @".*中\s+文\s+关\s+键\s+词\s+(?<keywords_ch>[^\r\n]+)\s+英\s+文\s+关\s+键\s+词\s+(?<keywords_en>[^\r\n]+).*"; Regex regExpKeywords = new Regex(strRegexExp, RegexOptions.Singleline); Match matchKeywords = regExpKeywords.Match(paperText); //result += matchKeywords.Groups["keywords_ch"] + "#" + matchKeywords.Groups["keywords_en"]; string strRegexExpHeader = @".*项目名称:([^\r\n]+).*"; Regex regExpHeader = new Regex(strRegexExpHeader, RegexOptions.Singleline); Match matchHeader = regExpHeader.Match(paperText); string title = matchHeader.Groups[1].Value; strRegexExpHeader = @".*中\s+文\s+摘\s+要\s+\(限400字\):([^\r\n]+)\s+.*"; regExpHeader = new Regex(strRegexExpHeader, RegexOptions.Singleline); matchHeader = regExpHeader.Match(paperText); string abstract_ch = matchHeader.Groups[1].Value; strRegexExpHeader = @".*报告正文\s+(.*)签字和盖章页(.*)"; regExpHeader = new Regex(strRegexExpHeader, RegexOptions.Singleline); matchHeader = regExpHeader.Match(paperText); string maintext = matchHeader.Groups[1].Value; string footer = matchHeader.Groups[2].Value; //结尾部分,用于提取合作者 maintext = Regex.Replace(maintext, @"[^\s]+经费申请说明.*", "", RegexOptions.Singleline); File.WriteAllText(outputfile1, title + "\n\n" + abstract_ch + "\n\n" + maintext, Encoding.Default); string strRegexRsrh = @".*?科研成果(.*)"; Regex regexRsrh = new Regex(strRegexRsrh, RegexOptions.Singleline); Match matchRsrh = regexRsrh.Match(maintext); string textRsrch = matchRsrh.Groups[1].Value; //Console.WriteLine(textRsrch); string output = ""; output += codeNum + "\n" + strCodes; string[] keywordsArr = matchKeywords.Groups["keywords_ch"].Value.Split(';'); output += keywordsArr.Length + "\n"; foreach (string aKeyword in keywordsArr) { output += aKeyword.Trim() + "\n"; } File.WriteAllText(outputfile2, output, Encoding.Default); return ""; } void ProposalExtractLessInfoMT() { int i = Convert.ToInt16(Thread.CurrentThread.Name) % ThreadNum; for ( ; i < fileList.Count; i += ThreadNum ) { string fileName = fileList[i].ToString(); Console.WriteLine("in thread [" + Thread.CurrentThread.Name + "], processing " + fileName); ProposalExtractLessInfo(fileName, Regex.Replace(fileName, @"\.txt$", @".main"), Regex.Replace(fileName, @"\.txt$", @".info")); } } void DoProposalExtractMultiThread(string fileFolderName) { TimeSpan ts1 = Process.GetCurrentProcess().TotalProcessorTime; Stopwatch stw = new Stopwatch(); stw.Start(); DirectoryInfo aFolder = new DirectoryInfo(fileFolderName); fileList = new ArrayList(); foreach (FileInfo aFile in aFolder.GetFiles(@"*.txt")) { fileList.Add(aFile.FullName); } Thread []threadArr = new Thread[ThreadNum]; for (int i = 0; i < threadArr.Length; i++) { threadArr[i] = new Thread(new ThreadStart(ProposalExtractLessInfoMT)); threadArr[i].Name = Convert.ToString(i); threadArr[i].Start(); } for (int i = 0; i < threadArr.Length; i ++ ) { threadArr[i].Join(); } double Msecs = Process.GetCurrentProcess().TotalProcessorTime.Subtract(ts1).TotalMilliseconds; stw.Stop(); Console.WriteLine(string.Format("CPU时间(毫秒)={0} 实际时间(毫秒)={1}", Msecs, stw.Elapsed.TotalMilliseconds, stw.ElapsedTicks)); } static void Main(string[] args) { Program prog = new Program(); prog.DoProposalExtractMultiThread(@"D:\work\"); } }}