Azure Storm入门(二)—— 事务处理

来源:互联网 发布:屏幕截图软件下载 编辑:程序博客网 时间:2024/06/04 23:24

这是Azure Storm系列文章。此前的文章包括:

Azure Storm入门(一)——从一个例子开始










3、此程序与Azure Storm入门(一)的区别主要有:


// Set a User customized config (Generator.config) for the GeneratortopologyBuilder.SetSpout(    "generator",    Generator.Get,    new Dictionary<string, List<string>>()    {        {Constants.DEFAULT_STREAM_ID, new List<string>(){"sentence"}}    },    1,    "Generator.config");



using System;using System.Collections.Generic;using System.IO;using System.Threading;using System.Linq;using System.Text;using System.Threading.Tasks;using Microsoft.SCP;using Microsoft.SCP.Topology; /// <summary>/// This program shows the ability to create a SCP.NET topology using C# Spouts and Bolts./// For how to use SCP.NET, please refer to: For more Storm samples, please refer to our GitHub repository: </summary> namespace StormSample1{    /// <summary>    /// Implements the TopologyDescriptor interface to describe the topology in C#,    /// and return a ITopologyBuilder instance.     /// This TopologyDescriptor is marked as Active    /// </summary>    [Active(true)]    class HelloWorld : TopologyDescriptor    {        /// <summary>        /// Use Topology Specification API to describe the topology        /// </summary>        /// <returns></returns>        public ITopologyBuilder GetTopologyBuilder()        {            // Use TopologyBuilder to define a Non-Tx topology            // And define each spouts/bolts one by one            TopologyBuilder topologyBuilder = new TopologyBuilder(typeof(HelloWorld).Name + DateTime.Now.ToString("yyyyMMddHHmmss"));             // Set a User customized config (Generator.config) for the Generator            topologyBuilder.SetSpout(                "generator",                Generator.Get,                new Dictionary<string, List<string>>()                {                    {Constants.DEFAULT_STREAM_ID, new List<string>(){"sentence"}}   //定义输出格式                },                2,              //设置该Task的executor(进程)的数量                "Generator.config",     //自定义配置                true);          //允许ack             topologyBuilder.SetBolt(                "splitter",                Splitter.Get,                new Dictionary<string, List<string>>()                {                    {Constants.DEFAULT_STREAM_ID, new List<string>(){"word", "firstLetterOfWord"}}  //定义输出格式                },                2,                true).shuffleGrouping("generator");            //定义输入,以及输入的分组方式             // Use scp-field-group from Splitter to Counter,             // and specify the second field in the Output schema of Splitter (Input schema of Counter) as the field grouping target            // by passing the index array [1] (index start from 0)             topologyBuilder.SetBolt(                "counter",                Counter.Get,                new Dictionary<string, List<string>>()                {                    {Constants.DEFAULT_STREAM_ID, new List<string>(){"word", "count"}}                },                2,                true).fieldsGrouping("splitter", new List<int>() { 1 });             // Add topology config            topologyBuilder.SetTopologyConfig(new Dictionary<string, string>()            {                {"topology.kryo.register","[\"[B\"]"}            });             return topologyBuilder;        }    }}

private const int MAX_PENDING_TUPLE_NUM = 10;    //最大同时处理的数目 private bool enableAck = false;             //是否允许Ack,在Generagor的构造函数中通过读取配置文件赋值private long lastSeqId = 0;private Dictionary<long, string> cachedTuples = new Dictionary<long, string>();    //将所有已经发出去的句子存储起来
public Generator(Context ctx, Dictionary<string, Object> parms = null){    Context.Logger.Info("StormSample1, Generator constructor called");    this.ctx = ctx;     // Demo how to get User customized config from parms     if (parms != null && parms.ContainsKey("UserConfig"))    {        this.cfg = (Configuration)parms["UserConfig"];    }     if (cfg != null)    {        //这里展示如何获得用户自定义信息        Context.Logger.Info("StormSample1, Generator " + string.Format("New \"Generator\" instance created with config setting: {0}={1}.", "BatchSize", cfg.AppSettings.Settings["BatchSize"].Value));    }     // Declare Output schema    Dictionary<string, List<Type>> outputSchema = new Dictionary<string, List<Type>>();    outputSchema.Add("default", new List<Type>() { typeof(string) });    this.ctx.DeclareComponentSchema(new ComponentStreamSchema(null, outputSchema));     // Demo how to get pluginConf info and enable ACK in Non-Tx topology    if (Context.Config.pluginConf.ContainsKey(Constants.NONTRANSACTIONAL_ENABLE_ACK))    {        //这里展示如何判断是否支持Ack        enableAck = (bool)(Context.Config.pluginConf[Constants.NONTRANSACTIONAL_ENABLE_ACK]);    }    Context.Logger.Info("StormSample1, Generator enableAck: {0}", enableAck);}


(4)Generator.cs的NextTuple函数如下。Spout 必须存储所发出数据的元数据,这样,在失败时,就可以检索和发出数据。此示例所发出的数据太少,因此为了重放,每个 Tuple 的原始数据都会存储在字典中。

public void NextTuple(Dictionary<string, Object> parms){    Thread.Sleep(1000 * 60);    Context.Logger.Info("StormSample1, Generator NextTuple enter");    string sentence;     if (enableAck)    {        //这里当未处理完毕的Tuple大于给定数值时,不再发送。        //利用成员变量存储已经发送过的sentence,其中序列id作为唯一标记。        if (cachedTuples.Count <= MAX_PENDING_TUPLE_NUM)        {            lastSeqId++;            sentence = sentences[rand.Next(0, sentences.Length - 1)];            Context.Logger.Info("StormSample1, Generator Emit: {0}, seqId: {1}", sentence, lastSeqId);            this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new Values(sentence), lastSeqId);            cachedTuples[lastSeqId] = sentence;        }        else        {            // if have nothing to emit, then sleep for a little while to release CPU            Thread.Sleep(50);        }        Context.Logger.Info("StormSample1, Generator cached tuple num: {0}", cachedTuples.Count);    }    else    {        sentence = sentences[rand.Next(0, sentences.Length - 1)];        Context.Logger.Info("StormSample1, Generator Emit: {0}", sentence);        this.ctx.Emit(new Values(sentence));    }     Context.Logger.Info("StormSample1, Generator NextTx exit");}

public void Ack(long seqId, Dictionary<string, Object> parms){    Context.Logger.Info("StormSample1, Generator Ack, seqId: {0}", seqId);    bool result = cachedTuples.Remove(seqId);    if (!result)    {        Context.Logger.Warn("StormSample1, Ack(), Generator remove cached tuple for seqId {0} fail!", seqId);    }}

public void Fail(long seqId, Dictionary<string, Object> parms){    Context.Logger.Info("StormSample1, Generator Fail, seqId: {0}", seqId);    if (cachedTuples.ContainsKey(seqId))    {        string sentence = cachedTuples[seqId];        Context.Logger.Info("StormSample1, Generator Re-Emit: {0}, seqId: {1}", sentence, seqId);        this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new Values(sentence), seqId);    }    else    {        Context.Logger.Warn("StormSample1, Fail(), Generator can't find cached tuple for seqId {0}!", seqId);    }}

public void Execute(SCPTuple tuple){    Context.Logger.Info("StormSample1, Splitter Execute enter");     string sentence = tuple.GetString(0);    foreach (string word in sentence.Split(' '))    {        Context.Logger.Info("StormSample1, Splitter Emit: {0}", word);        this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple> { tuple }, new Values(word, word[0]));    }     if (enableAck)    {        if (Sample(50)) // this is to demo how to fail tuple. We do it randomly        {            Context.Logger.Info("StormSample1, Splitter fail tuple: tupleId: {0}", tuple.GetTupleId());            this.ctx.Fail(tuple);        }        else        {            if (Sample(50)) // this is to simulate timeout            {                Context.Logger.Info("StormSample1, Splitter sleep {0} seconds", msgTimeoutSecs + 1);                Thread.Sleep((msgTimeoutSecs + 1) * 1000);            }            Context.Logger.Info("StormSample1, Splitter Ack tuple: tupleId: {0}", tuple.GetTupleId());            this.ctx.Ack(tuple);        }    }     Context.Logger.Info("StormSample1, Splitter Execute exit");}

public void Execute(SCPTuple tuple){    Context.Logger.Info("StormSample1, Counter Execute enter");     string word = tuple.GetString(0);    int count = counts.ContainsKey(word) ? counts[word] : 0;    count++;    counts[word] = count;     Context.Logger.Info("StormSample1, Counter Emit: {0}, count: {1}", word, count);    this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple> { tuple }, new Values(word, count));     if (enableAck)    {        Context.Logger.Info("StormSample1, Counter Ack tuple: tupleId: {0}", tuple.GetTupleId());        this.ctx.Ack(tuple);    }     // log some info to out file for bvt test validataion    if (taskIndex == 0) // For component with multiple parallism, only one of them need to log info     {        string fileName = @"..\..\..\..\..\HelloWorldOutput" + Process.GetCurrentProcess().Id + ".txt";        FileStream fs = new FileStream(fileName, FileMode.Append);        using (StreamWriter writer = new StreamWriter(fs))        {            writer.WriteLine("word: {0}, count: {1}", word, count);        }    }    Context.Logger.Info("StormSample1, Counter Execute exit");}






转载请注明:康瑞部落 » Azure Storm入门(二)—— 事务处理
0 0