C#多线程处理文件的简单例子

来源:互联网 发布:乱码修复软件 编辑:程序博客网 时间:2024/06/07 03:54
using System;using System.Collections;using System.Collections.Generic;using System.Diagnostics;using System.IO;using System.Linq;using System.Text;using System.Text.RegularExpressions;using System.Threading;using System.Threading.Tasks;namespace vs2012csharp{     class Program    {        ArrayList fileList;        static int ThreadNum = 3;        static string ProposalExtractLessInfo(string inputfile, string outputfile1, string outputfile2)        {            string paperText = File.ReadAllText(inputfile, Encoding.Default);            string strRegexProposer = @"申\s+请\s+人:(?<Proposer>\w+)\s+[^\r\n]+[\r\n]+依托单位:(?<Proposer_Org>\w+)";            Regex regProposer = new Regex(strRegexProposer, RegexOptions.Singleline);            Match matchProposer = regProposer.Match(paperText);            string strRegexExpCode = @"申请代码\s+((?:[A-Z\d]+:\w+\s+)+)";            Regex regExpCode = new Regex(strRegexExpCode, RegexOptions.Singleline);            Match matchCode = regExpCode.Match(paperText);            //Console.WriteLine(matchCode.Groups[1].Value);            //Console.WriteLine(matchCode.Groups[2].Value);            string[] codeDesArr = matchCode.Groups[1].Value.Split('\r', '\n');            int codeNum = 0;            string strCodes = "";            foreach (string aCodeDes in codeDesArr)            {                if (Regex.IsMatch(aCodeDes, @"\w+:\w+"))                {                    codeNum++;                    strCodes += Regex.Replace(aCodeDes, @":\w+$", "") + "\n";                    //Console.WriteLine("#####code####:" + aCode);                }            }            //string result = matchCode.Groups[1].Value + "#" + matchCode.Groups[2].Value + "#";            //string strRegexExp = @"中\s+文\s+关\s+键\s+词\r?\n(.*)\r?\n";            string strRegexExp = @".*中\s+文\s+关\s+键\s+词\s+(?<keywords_ch>[^\r\n]+)\s+英\s+文\s+关\s+键\s+词\s+(?<keywords_en>[^\r\n]+).*";            Regex regExpKeywords = new Regex(strRegexExp, RegexOptions.Singleline);            Match matchKeywords = regExpKeywords.Match(paperText);            //result += matchKeywords.Groups["keywords_ch"] + "#" + matchKeywords.Groups["keywords_en"];            string strRegexExpHeader = @".*项目名称:([^\r\n]+).*";            Regex regExpHeader = new Regex(strRegexExpHeader, RegexOptions.Singleline);            Match matchHeader = regExpHeader.Match(paperText);            string title = matchHeader.Groups[1].Value;            strRegexExpHeader = @".*中\s+文\s+摘\s+要\s+\(限400字\):([^\r\n]+)\s+.*";            regExpHeader = new Regex(strRegexExpHeader, RegexOptions.Singleline);            matchHeader = regExpHeader.Match(paperText);            string abstract_ch = matchHeader.Groups[1].Value;            strRegexExpHeader = @".*报告正文\s+(.*)签字和盖章页(.*)";            regExpHeader = new Regex(strRegexExpHeader, RegexOptions.Singleline);            matchHeader = regExpHeader.Match(paperText);            string maintext = matchHeader.Groups[1].Value;            string footer = matchHeader.Groups[2].Value; //结尾部分,用于提取合作者            maintext = Regex.Replace(maintext, @"[^\s]+经费申请说明.*", "", RegexOptions.Singleline);            File.WriteAllText(outputfile1, title + "\n\n" + abstract_ch + "\n\n" + maintext, Encoding.Default);            string strRegexRsrh = @".*?科研成果(.*)";            Regex regexRsrh = new Regex(strRegexRsrh, RegexOptions.Singleline);            Match matchRsrh = regexRsrh.Match(maintext);            string textRsrch = matchRsrh.Groups[1].Value;            //Console.WriteLine(textRsrch);            string output = "";            output += codeNum + "\n" + strCodes;            string[] keywordsArr = matchKeywords.Groups["keywords_ch"].Value.Split(';');            output += keywordsArr.Length + "\n";            foreach (string aKeyword in keywordsArr)            {                output += aKeyword.Trim() + "\n";            }            File.WriteAllText(outputfile2, output, Encoding.Default);            return "";        }        void ProposalExtractLessInfoMT() {            int i = Convert.ToInt16(Thread.CurrentThread.Name) % ThreadNum;            for ( ; i < fileList.Count; i += ThreadNum ) {                string fileName = fileList[i].ToString();                Console.WriteLine("in thread [" + Thread.CurrentThread.Name + "], processing " + fileName);                ProposalExtractLessInfo(fileName, Regex.Replace(fileName, @"\.txt$", @".main"),                       Regex.Replace(fileName, @"\.txt$", @".info"));            }        }        void DoProposalExtractMultiThread(string fileFolderName) {            TimeSpan ts1 = Process.GetCurrentProcess().TotalProcessorTime;            Stopwatch stw = new Stopwatch();            stw.Start();            DirectoryInfo aFolder = new DirectoryInfo(fileFolderName);            fileList = new ArrayList();            foreach (FileInfo aFile in aFolder.GetFiles(@"*.txt"))            {                fileList.Add(aFile.FullName);            }            Thread []threadArr = new Thread[ThreadNum];            for (int i = 0; i < threadArr.Length; i++) {                threadArr[i] = new Thread(new ThreadStart(ProposalExtractLessInfoMT));                threadArr[i].Name = Convert.ToString(i);                threadArr[i].Start();            }            for (int i = 0; i < threadArr.Length; i ++ ) {                threadArr[i].Join();            }            double Msecs = Process.GetCurrentProcess().TotalProcessorTime.Subtract(ts1).TotalMilliseconds;            stw.Stop();            Console.WriteLine(string.Format("CPU时间(毫秒)={0} 实际时间(毫秒)={1}", Msecs,                 stw.Elapsed.TotalMilliseconds, stw.ElapsedTicks));        }        static void Main(string[] args)        {            Program prog = new Program();            prog.DoProposalExtractMultiThread(@"D:\work\");        }    }}

原创粉丝点击