Translater-语言翻译类

来源:互联网 发布:fedora13 yum源 编辑:程序博客网 时间:2024/05/17 22:25
using System;using System.Collections.Generic;using System.Text;using System.Net;using System.IO;using Newtonsoft.Json;using System.Web;using System.Text.RegularExpressions;namespace Framework{    /// <summary>    /// 语言类型    /// </summary>    public class LanguageType    {        /// <summary>        /// 中文        /// </summary>        public static string Chinese = "zh-cn";        /// <summary>        /// 英文        /// </summary>        public static string English = "en";    }    /// <summary>    /// 翻译方式类型    /// </summary>    public class TranslationType    {        /// <summary>        /// Google        /// </summary>        public static string Google = "GoogleTanslater";        /// <summary>        /// Bing        /// </summary>        public static string Bing = "MircsoftTanslater";    }    /// <summary>    /// 语言翻译类    /// </summary>    public class Translater    {        /// <summary>        /// 翻译方法 中文:"zh-cn", 英文:"en" type:MircsoftTanslater,GoogleTanslater        /// </summary>        /// <param name="sourceText">翻译原文</param>        /// <param name="fromLanguage">原始语言</param>        /// <param name="toLanguage">目标语言</param>        /// <param name="type">翻译API</param>        /// <returns>译文</returns>        public static string Translate(string sourceText, string fromLanguage, string toLanguage, string type = "MircsoftTanslater")        {            string translateStr = string.Empty;            switch (type)            {                case "MircsoftTanslater":                    translateStr = MircsoftTanslater(sourceText, fromLanguage, toLanguage);//"zh-cn", "en";                    break;                case "GoogleTanslater":                    translateStr = GoogleTranslater_PostMethod(sourceText, fromLanguage, toLanguage);//"zh-cn", "en";                    break;            }            return translateStr;        }        #region Google 翻译: Get方式获取翻译        /// <summary>        /// Google 翻译: Get方式获取翻译        /// </summary>        /// <param name="sourceText"></param>        /// <param name="fromType"></param>        /// <param name="toType"></param>        /// <returns></returns>        private static string GoogleTranslater_GetMethod(string sourceText, string fromType, string toType)        {            string result;            string langPair = fromType.ToLower() == "zh-cn" ? "zh|en" : "en|zh";            string url = "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&langpair=" + HttpUtility.UrlEncode(langPair) + "&q=" + HttpUtility.UrlEncode(sourceText);            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);            request.Method = "GET";            request.Referer = "http://www.my-ajax-site.com";            try            {                HttpWebResponse response = (HttpWebResponse)request.GetResponse();                Stream responseStream = response.GetResponseStream();                StreamReader reader = new StreamReader(responseStream, Encoding.GetEncoding("UTF-8"));                string responseStr = reader.ReadToEnd();                ResponseResult readConfig = (ResponseResult)JavaScriptConvert.DeserializeObject(responseStr, typeof(ResponseResult));                if (readConfig.responseStatus == "200")                {                    result = readConfig.responseData.translatedText;                }                else                {                    result = readConfig.responseStatus;                }            }            catch (Exception Ex)            {                result = "err:" + Ex.Message;            }            return result;        }        #endregion        #region Google 翻译: Post方式获取翻译        /// <summary>        /// Google 翻译: Post方式获取翻译        /// </summary>        /// <param name="sourceText"></param>        /// <param name="fromType"></param>        /// <param name="toType"></param>        /// <returns></returns>        private static string GoogleTranslater_PostMethod(string sourceText, string fromType, string toType)        {            string fromLan = fromType.ToLower() == "zh-cn" ? "zh" : "en";            string toLan = toType.ToLower() == "zh-cn" ? "zh" : "en";            HttpWebRequest requestScore = (HttpWebRequest)WebRequest.Create("http://translate.google.com/translate_t#");            StringBuilder postContent = new StringBuilder();            Encoding myEncoding = Encoding.UTF8;            postContent.Append(HttpUtility.UrlEncode("hl", myEncoding));            postContent.Append("=");            postContent.Append(HttpUtility.UrlEncode("en", myEncoding));            postContent.Append("&");            postContent.Append(HttpUtility.UrlEncode("ie", myEncoding));            postContent.Append("=");            postContent.Append(HttpUtility.UrlEncode("UTF-8", myEncoding));            postContent.Append("&");            postContent.Append(HttpUtility.UrlEncode("sl", myEncoding));            postContent.Append("=");            postContent.Append(HttpUtility.UrlEncode(fromLan, myEncoding));            postContent.Append("&");            postContent.Append(HttpUtility.UrlEncode("text", myEncoding));            postContent.Append("=");            postContent.Append(HttpUtility.UrlEncode(sourceText, myEncoding));            postContent.Append("&");            postContent.Append(HttpUtility.UrlEncode("tl", myEncoding));            postContent.Append("=");            postContent.Append(HttpUtility.UrlEncode(toLan, myEncoding));            byte[] data = Encoding.ASCII.GetBytes(postContent.ToString());            requestScore.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";            requestScore.Method = "Post";            //requestScore.ContentType = "application/x-www-form-urlencoded;charset=gb2312";            requestScore.ContentLength = data.Length;            requestScore.KeepAlive = true;            requestScore.Timeout = (6 * 60 * 1000);            requestScore.ProtocolVersion = HttpVersion.Version10;            Stream stream = requestScore.GetRequestStream();            stream.Write(data, 0, data.Length);            stream.Close();            string content = string.Empty;            try            {                System.Net.ServicePointManager.Expect100Continue = false;                HttpWebResponse responseSorce = (HttpWebResponse)requestScore.GetResponse();                StreamReader reader = new StreamReader(responseSorce.GetResponseStream());                content = reader.ReadToEnd();                responseSorce.Close();                reader.Dispose();                stream.Dispose();            }            catch (WebException ex)            {                HttpWebResponse responseSorce = (HttpWebResponse)ex.Response;//得到请求网站的详细错误提示                StreamReader reader = new StreamReader(responseSorce.GetResponseStream());                content = reader.ReadToEnd();                responseSorce.Close();                reader.Dispose();                stream.Dispose();            }            finally            {                requestScore.Abort();            }            string reg = @"<(?<HtmlTag>[\w]+)[^>]*\s[iI][dD]=(?<Quote>[""']?)result_box(?(Quote)\k<Quote>)[""']?[^>]*>((?<Nested><\k<HtmlTag>[^>]*>)|</\k<HtmlTag>>(?<-Nested>)|.*?)*</\k<HtmlTag>>";            //string reg = @"<(span) id=result_box [^>]*>.*?</\1>";//匹配出翻译内容            Regex r = new Regex(reg);            MatchCollection mcItem = r.Matches(content);            string result = ConvertHtmlToText(mcItem[0].Value);            return result;        }        /// <summary>        /// 将HTML转换为纯文本        /// </summary>        /// <param name="source"></param>        /// <returns></returns>        public static string ConvertHtmlToText(string source)        {            // 代码的实现的思路是:            //a、先将html文本中的所有空格、换行符去掉(因为html中的空格和换行是被忽略的)            //b、将<head>标记中的所有内容去掉            //c、将<script>标记中的所有内容去掉            //d、将<style>标记中的所有内容去掉            //e、将td换成空格,tr,li,br,p 等标记换成换行符            //f、去掉所有以“<>”符号为头尾的标记去掉。            //g、转换&,&nbps;等转义字符换成相应的符号            //h、去掉多余的空格和空行            string result;            //remove line breaks,tabs            result = source.Replace("\r", " ");            result = result.Replace("\n", " ");            result = result.Replace("\t", " ");            //remove the header            result = Regex.Replace(result, "(<head>).*(</head>)", string.Empty, RegexOptions.IgnoreCase);            result = Regex.Replace(result, @"<( )*script([^>])*>", "<script>", RegexOptions.IgnoreCase);            result = Regex.Replace(result, @"(<script>).*(</script>)", string.Empty, RegexOptions.IgnoreCase);            //remove all styles            result = Regex.Replace(result, @"<( )*style([^>])*>", "<style>", RegexOptions.IgnoreCase); //clearing attributes            result = Regex.Replace(result, "(<style>).*(</style>)", string.Empty, RegexOptions.IgnoreCase);            //insert tabs in spaces of <td> tags            result = Regex.Replace(result, @"<( )*td([^>])*>", " ", RegexOptions.IgnoreCase);            //insert line breaks in places of <br> and <li> tags            result = Regex.Replace(result, @"<( )*br( )*>", "\r", RegexOptions.IgnoreCase);            result = Regex.Replace(result, @"<( )*li( )*>", "\r", RegexOptions.IgnoreCase);            //insert line paragraphs in places of <tr> and <p> tags            result = Regex.Replace(result, @"<( )*tr([^>])*>", "\r\r", RegexOptions.IgnoreCase);            result = Regex.Replace(result, @"<( )*p([^>])*>", "\r\r", RegexOptions.IgnoreCase);            //remove anything thats enclosed inside < >            result = Regex.Replace(result, @"<[^>]*>", string.Empty, RegexOptions.IgnoreCase);            //replace special characters:            result = Regex.Replace(result, @"&", "&", RegexOptions.IgnoreCase);            result = Regex.Replace(result, @" ", " ", RegexOptions.IgnoreCase);            result = Regex.Replace(result, @"<", "<", RegexOptions.IgnoreCase);            result = Regex.Replace(result, @">", ">", RegexOptions.IgnoreCase);            result = Regex.Replace(result, @"&(.{2,6});", string.Empty, RegexOptions.IgnoreCase);            //remove extra line breaks and tabs            result = Regex.Replace(result, @" ( )+", " ");            result = Regex.Replace(result, "(\r)( )+(\r)", "\r\r");            result = Regex.Replace(result, @"(\r\r)+", "\r\n");            return result;        }        #endregion        #region 微软翻译        /// <summary>        /// 微软翻译API :  语言类型:"zh-cn", "en"        /// </summary>        /// <param name="orgStr">翻译原文</param>        /// <param name="fromType">原文语言类型</param>        /// <param name="toType">目标语言类型</param>        /// <returns></returns>        public static string MircsoftTanslater(string orgStr, string fromType, string toType)        {            string content = string.Empty;            string appId = "56E164FED4017D272E06AD7E16778536251CA5CB";            string text = orgStr;// "Translate this for me";            string from = fromType;// "en";            string to = toType;// "zh-cn";            string uri = "http://api.microsofttranslator.com/v2/Http.svc/Translate?appId=" + appId + "&text=" + System.Web.HttpUtility.UrlEncode(text) + "&from=" + from + "&to=" + to;            HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);            WebResponse response = null;            try            {                response = httpWebRequest.GetResponse();                StreamReader reader = new StreamReader(response.GetResponseStream());                content = reader.ReadToEnd();//"<string xmlns=\"http://schemas.microsoft.com/2003/10/Serialization/\">Hello, China</string>"                 content = content.Replace("<string xmlns=\"http://schemas.microsoft.com/2003/10/Serialization/\">", "");                content = content.Replace("</string>", "");                response.Close();                reader.Dispose();            }            catch (WebException e)            {                content = ProcessWebException(e, "Failed to translate");            }            finally            {                if (response != null)                {                    response.Close();                    response = null;                }            }            return content;        }        private static string ProcessWebException(WebException e, string message)        {            string result = string.Empty;            result = string.Format("{0}: {1}", message, e.ToString());            // Obtain detailed error information            string strResponse = string.Empty;            using (HttpWebResponse response = (HttpWebResponse)e.Response)            {                using (Stream responseStream = response.GetResponseStream())                {                    using (StreamReader sr = new StreamReader(responseStream, System.Text.Encoding.ASCII))                    {                        strResponse = sr.ReadToEnd();                    }                }            }            result = string.Format("Http status code={0}, error message={1}", e.Status, strResponse);            return result;        }        #endregion    }    /// <summary>    /// 翻译返回类    /// </summary>    public class ResponseResult    {        public ResponseData responseData { get; set; }        public string responseDetails { get; set; }        public string responseStatus { get; set; }    }    /// <summary>    ///      /// </summary>    public class ResponseData    {        public string translatedText { get; set; }    }}


原创粉丝点击