任务调度程序

来源:互联网 发布:光纤分路器 什么网络 编辑:程序博客网 时间:2024/06/06 21:42
题目:一个XXX系统,有多个数据来源,需要每个一段时间去数据提供方拉取数据,每次拉取有个超时上限,如果超时kill掉并且记录到日志。因为拉取数据的需求经常变,并且每个数据源变的需求都不一样,所以拉取操作希望能用脚本语言比如php实现,而调度进程>无要求。

解:调度进程用c++,php脚本接收两个参数超时时间和任务号,php脚本用sleep模拟拉取数据和处理数据操作。

c++调度程序:

#include <stdlib.h>#include <unistd.h>#include <vector>#include <sys/time.h>#include <sys/types.h>#include <signal.h>#include <string>#include <fcntl.h>#include <stdlib.h>#include <sys/time.h>#include <sys/wait.h>#include <signal.h>#include <algorithm>#include <string.h>//所有的异常先忽略using namespace std;//任务结构体,开始时间、超时时间typedef struct{    int id;    unsigned begin;    unsigned duration;} Task;static int gStartSec;//测试20s周期即可const unsigned timeRound = 20;Task tasks[] = {    {1,1,11},     {2,5,12},    {3,6,11},     {4,8,14},    {5,10,14},    {6,12,12},    {7,13,11},    {8,15,13},    {9,16,12},    {10,16,12},    {11,17,11},    {12,18,11},    {13,19,13},    {14,19,13} };      //大循环应该sleep的时间,就是下一个有超时或启动任务的时间inline unsigned getInterval(const vector<vector<unsigned>> &timeMap,unsigned now){
    unsigned interval = 0;    do{        if(now == timeMap.size() - 1)            now = 0;        else            now++;        interval++;    }while(timeMap[now].size() == 0);    return interval;}//log函数,上不了网,先用c方法写入文件了void toLog(const string &str){    static int fd = -1;    struct timeval tv;    gettimeofday(&tv,NULL);    if(fd == -1)        fd = open("mainlog",O_WRONLY|O_APPEND|O_CLOEXEC|O_APPEND|O_CREAT,S_IRWXU|S_IRWXG|S_IRWXO);    //write(fd,(to_string(tv.tv_sec - gStartSec) + ":" + str + "\n").c_str(),str.size());    string log = to_string(tv.tv_sec - gStartSec) + ":" + str;    write(fd,log.c_str(),log.size());}//每个任务当前执行进程pid数组,0说明没有该编号任务在执行vector<pid_t> pids(sizeof(tasks) / sizeof(Task),0);void child_handler(int signo,siginfo_t *psi,void *pv){    pid_t pid = psi->si_pid;    int status;    string log;    //僵尸done    waitpid(pid,&status,WNOHANG);    //防止多个子进程同时挂掉发信号用循环    while(pid > 0){        vector<pid_t>::iterator iter = find(pids.begin(),pids.end(),pid);        log = to_string(pid) + " done,exit status: " + to_string(WEXITSTATUS(status));        if(iter != pids.end()){            *iter = 0;            log += ",taskNum:" + to_string(iter - pids.begin() + 1);        }        log += "\n";        toLog(log);        pid = waitpid(-1,&status,WNOHANG);
    }}int main(){    //lastTime为了防止本秒任务被重复执行    unsigned i = 0,j,k,taskNum,interval,lastTime = timeRound * 2;    int status;    string log;    vector<vector<unsigned>> timeMap(timeRound);    pid_t pid;    struct itimerval iv;    struct timeval tv;    gettimeofday(&tv,NULL);    gStartSec = tv.tv_sec;    struct sigaction sa;    memset(&sa,0,sizeof(sa));    sa.sa_sigaction = child_handler;    //sleep可能会用SIGALRM实现    sigemptyset(&(sa.sa_mask));    sigaddset(&(sa.sa_mask),SIGALRM);    sa.sa_flags = SA_SIGINFO;    //安装sigchld处理程序    sigaction(SIGCHLD,&sa,NULL);    //初始化timeMap“事件时间图”,执行事件以0为个位数,超时事件以1为个位数    while(i < sizeof(tasks) / sizeof(Task)){        timeMap[tasks[i].begin].push_back(i * 10);        timeMap[(tasks[i].begin + tasks[i].duration) % timeRound].push_back(i * 10 + 1);        i++;    }    //bool roundOne = true;    while(true){        gettimeofday(&tv,NULL);        i = tv.tv_sec % 20;        if(lastTime != i && timeMap[i].size() != 0){            j = 0;            while(j < timeMap[i].size()){                k = timeMap[i][j];                taskNum = k / 10;                //应该开始                if(k % 10 == 0){                    //新的都要开始了老的竟然没有执行完!!!                    if(pids[taskNum] != 0){
                        //还活着没?是不是子进程?免得误伤                        if(waitpid(pids[taskNum],&status,WNOHANG) == 0){                            kill(pids[taskNum],SIGKILL);                            log = string() + to_string(tasks[taskNum].id) + " lasted too long and killed\n";                            toLog(log);                        }                        pids[taskNum] = 0;                    }                    if((pid = fork()) == 0){                        if(execl("/usr/bin/php","php","/home/ubuntu/liu3/work/sleep.php",to_string(tasks[taskNum].duration).c_str(),to_string(tasks[taskNum].id).c_str(),NULL) == -1)                            perror(NULL);                        //should never go there                        exit(0);                    }                    pids[taskNum] = pid;                    log = string() + to_string(tasks[taskNum].id) + " started, pid = " + to_string(pid) + "\n";                    toLog(log);                }                //应该结束                else if(k % 10 == 1){                    //重复操作可以define成宏                    if(pids[taskNum] != 0){                        if(waitpid(pids[taskNum],&status,WNOHANG) == 0){                            kill(pids[taskNum],SIGKILL);                            log = string() + to_string(tasks[taskNum].id) + " lasted too long and killed\n";                            toLog(log);                        }                        pids[taskNum] = 0;                    }                }                j++;            }            lastTime = i;        }        interval = getInterval(timeMap,i);        log = string("now i = ") + to_string(i) + ",will sleep " + to_string(interval) + "\n";        toLog(log);        //sleep被唤醒可能是子进程结束,也可能是sleep足时间有新事件要处理被唤醒        sleep(interval);    }}

php:

<?phpglobal $argv;$time = $argv[1];$taskId = $argv[2];//假设time >= 20//随机小概率超时$sleepTime = $time + rand(-10,1);$fd = fopen("log","a+");fwrite($fd,"php:$taskId:".(time() - 1499855185).':'.posix_getpid().":$time:$sleepTime\n");sleep($sleepTime);//随机小概率返回执行失败返回码验证父进程处理sigchldif(rand(1,10) < 2)        exit(1);exit(0);


测试结果,大致的测试和解说:

php:11:73312:28261:11:9php:12:73313:28262:11:4php:14:73314:28266:13:8php:13:73314:28265:13:14        //编号13的任务执行超时,后面应该被killphp:1:73316:28268:11:12         //编号1的任务本该执行11秒,现在随机到执行12秒php:2:73320:28272:12:10php:3:73321:28273:11:12         //任务3也应该被kill,在第20秒php:4:73323:28274:14:8          //四号任务8秒后应该寿终正寝php:5:73325:28276:14:12php:6:73327:28278:12:12php:7:73328:28279:11:6php:8:73330:28280:13:10php:10:73331:28282:12:2php:9:73331:28281:12:7php:11:73332:28283:11:10php:12:73333:28284:11:8php:14:73334:28289:13:13php:13:73334:28288:13:12php:1:73336:28291:11:4  //被kill的任务1 20秒后重启php:2:73340:28292:12:5php:3:73341:28293:11:5php:4:73343:28296:14:7php:5:73345:28299:14:10php:6:73347:28302:12:7php:7:73348:28304:11:12php:8:73350:28305:13:11php:10:73351:28307:12:7php:9:73351:28306:12:11php:11:73352:28308:11:5php:12:73353:28309:11:12php:14:73354:28313:13:5php:13:73354:28312:13:11php:1:73356:28330:11:40:11 started, pid = 282610:now i = 17,will sleep 11:12 started, pid = 282621:now i = 18,will sleep 12:13 started, pid = 282652:14 started, pid = 282662:now i = 19,will sleep 24:1 started, pid = 28268        //第四秒编号1的任务执行4:now i = 1,will sleep 15:now i = 2,will sleep 25:28262 done,exit status: 0,taskNum:125:now i = 2,will sleep 27:now i = 4,will sleep 18:2 started, pid = 282728:now i = 5,will sleep 19:28261 done,exit status: 0,taskNum:119:3 started, pid = 28273        //应该执行到第20秒正常退出9:now i = 6,will sleep 210:28266 done,exit status: 1,taskNum:1410:now i = 7,will sleep 111:4 started, pid = 28274       //四号应该在19秒正常退出11:now i = 8,will sleep 112:now i = 9,will sleep 113:5 started, pid = 2827613:now i = 10,will sleep 215:1 lasted too long and killed         //11秒以后编号为1的任务还没执行完被kill掉15:6 started, pid = 2827815:13 lasted too long and killed15:now i = 12,will sleep 115:28268 done,exit status: 0    //编号1任务如愿被kill,至于status为0,to do15:28265 done,exit status: 0    //同理编号为13的,提前两秒执行的任务也在15秒被kill15:now i = 12,will sleep 116:7 started, pid = 2827916:now i = 13,will sleep 218:8 started, pid = 2828018:now i = 15,will sleep 118:28272 done,exit status: 0,taskNum:218:now i = 15,will sleep 119:9 started, pid = 2828119:10 started, pid = 2828219:now i = 16,will sleep 119:28274 done,exit status: 0,taskNum:4  //四号正常退出19:now i = 16,will sleep 120:3 lasted too long and killed         //任务3超时被kill20:11 started, pid = 2828320:now i = 17,will sleep 120:28273 done,exit status: 020:now i = 17,will sleep 121:12 started, pid = 2828421:now i = 18,will sleep 121:28282 done,exit status: 0,taskNum:1021:now i = 18,will sleep 122:28279 done,exit status: 0,taskNum:722:13 started, pid = 2828822:14 started, pid = 2828922:now i = 19,will sleep 224:1 started, pid = 28291       //被kill的1在第二个轮回如约重启24:now i = 1,will sleep 125:28276 done,exit status: 0,taskNum:525:now i = 2,will sleep 226:28281 done,exit status: 0,taskNum:926:now i = 3,will sleep 127:28278 done,exit status: 0,taskNum:627:now i = 4,will sleep 1

算接触过的程序里比较麻烦点的。

原创粉丝点击