Subtitle Resynchronizer (I)

来源:互联网 发布:帆船鞋 知乎 编辑:程序博客网 时间:2024/05/22 00:08
I found some of the movies I downloaded failed to have any perfectly matching subtitles that can be easily found online, so I have to use some of the closest. But using resync function of vobsub or so is obviously not a way round. With this tool and not too much tuning on it, I guess I may even use those most diverse ones. Complete code of the latest version with some inefficient commentary is provided here:

#include <cstdio>
#include <cstring>
#include <cstdlib>

typedef unsigned long    movtime_t;

static void fgetline (char *buf, int size, FILE *f)
{
    fgets(buf, size, f);
    int len = strlen(buf);
    buf[len - 1] = 0;
}

static bool isnumber (char *p)
{
    int len = 0;
    for ( ; *p != '/0'; ++p, ++len)
    {
        if (*p < '0' || *p > '9')
        {
            return false;
        }
    }
    return (len > 0);
}

static bool parse_timestamp (char *p, movtime_t &t)
{
#define ISDIGIT(c)    ((c)>='0'&&(c)<='9')
#define ASSERTC(c, ca)    if (c!=ca) return false;
#define DIGIT2(v,p)    /
    if (!ISDIGIT((p)[0]) || !ISDIGIT((p)[1])) return false;    /
    v = ((p)[0]-'0')*10+((p)[1]-'0');
#define DIGIT3(v,p)    /
    if (!ISDIGIT((p)[0]) || !ISDIGIT((p)[1]) || !ISDIGIT((p)[2])) return false;    /
    v = ((p)[0]-'0')*100+((p)[1]-'0')*10+((p)[2]-'0');

    int h, m, s, ms;
    DIGIT2(h,p);
    ASSERTC(p[2],':');
    DIGIT2(m,p+3);
    ASSERTC(p[5],':');
    DIGIT2(s,p+6);
    ASSERTC(p[8],',');
    DIGIT3(ms,p+9);
    t = (h*3600+m*60+s)*1000+ms;
    return true;
}

static bool istimestamp (char *p, movtime_t &begin, movtime_t &end)
{
    // form 00:00:46,430
    if (strlen(p) < 29) { return false; }

    if (!parse_timestamp(p, begin))
    {
        return false;
    }

    // no assertion on the arrow, for it's already very certain

    if (!parse_timestamp(p + 17, end))
    {
        return false;
    }

    return true;
}

static void analyse_time (movtime_t t, int &h, int &m, int &s, int &ms)
{
    ms = t % 1000;
    t /= 1000;
    s = t % 60;
    t /= 60;
    m = t % 60;
    t /= 60;
    h = t;
}

/* it should be big enough to avoid line segmentation */
#define sizebuf    2048


static void rectify (
    FILE **ssfpins,        // ssfnum
    FILE **ssfpouts,       // movnum
    int ssfnum,
    int movnum,
    movtime_t *ssfdurs,    // in msec, 120min --> 7200000
    movtime_t *movdurs     // in msec
    )
{

    // the initial offset of both items are provided at the tail
    // the following are the absolute time of them
    movtime_t ssfabs = ssfdurs[ssfnum];   
    movtime_t movabs = movdurs[movnum];

    char buf[sizebuf];
    char outbuf[sizebuf*2] = {0, };
    char oldbuf[sizebuf*2] = {0, };

    char tempbuf[sizebuf];

    int ssfi = 0;
    int movi = 0;
    FILE *ssfpin = ssfpins[ssfi];
    FILE *ssfpout = ssfpouts[movi];
    int outcount = 0;
    int oldcount;
    bool someinold = false;

    while (1)
    {
        if (feof(ssfpin))
        {
__fileend:
            ssfabs += ssfdurs[ssfi];
            ssfi++;
            if (ssfi >= ssfnum )
            {
                break;
            }
            ssfpin = ssfpins[ssfi];
        }

        fgetline(buf, sizebuf, ssfpin);

        movtime_t begin, end;
        if (isnumber(buf))
        {
            // print last
            if (someinold)
            {
                fprintf(ssfpout-1, "%d/n", oldcount);
                fprintf(ssfpout-1, "%s", oldbuf);
                someinold = false;
            }
            if (outcount > 0)
            {
                fprintf(ssfpout, "%d/n", outcount);
                fprintf(ssfpout, "%s", outbuf);
            }

            outbuf[0] = 0;
            outcount++;
        }
        else if (istimestamp(buf, begin, end))
        {
            if (begin == 0 && end == 0)
            {
                // sign of end
                goto __fileend;
            }

            movtime_t absbegin, absend;
            movtime_t corrbegin, corrend;

            absbegin = ssfabs + begin;
            absend = ssfabs + end;

            // check if it's the next output file's turn
            if (absend >= movabs + movdurs[movi])
            {
                if (absbegin < movabs + movdurs[movi])
                {
                    // absbegin to movabs + movdurs[movi] -> cur
                    corrbegin = absbegin - movabs;
                    corrend = movdurs[movi];
                   
                    int h, m, s, ms;
                    analyse_time(corrbegin, h, m, s, ms);
                    sprintf(tempbuf, "%02d:%02d:%02d,%03d", h, m, s, ms);
                    strcat(outbuf, tempbuf);
                    analyse_time(corrend, h, m, s, ms);
                    sprintf(tempbuf, " --> %02d:%02d:%02d,%03d/n", h, m, s, ms);
                    strcat(outbuf, tempbuf);
                    strcpy(oldbuf, outbuf); outbuf[0] = 0;
                    oldcount = outcount;
                    someinold = true;

                    // movabs + movdurs[movi] to absend -> next
                    absbegin = movabs + movdurs[movi];

                }
                else
                {    // totally in a new one

                }

                movabs += movdurs[movi];
                movi++;
                if (movi >= movnum)
                {
                    break;
                }
                ssfpout = ssfpouts[movi];

                outcount = 1;
            }

            corrbegin = absbegin - movabs;
            corrend = absend - movabs;

            int h, m, s, ms;
            analyse_time(corrbegin, h, m, s, ms);
            sprintf(tempbuf, "%02d:%02d:%02d,%03d", h, m, s, ms);
            strcat(outbuf, tempbuf);
            analyse_time(corrend, h, m, s, ms);
            sprintf(tempbuf, " --> %02d:%02d:%02d,%03d/n", h, m, s, ms);
            strcat(outbuf, tempbuf);
        }
        else
        {
            sprintf(tempbuf, "%s/n", buf);
            strcat(outbuf, tempbuf);
            if (someinold)
            {
                strcat(oldbuf, tempbuf);
            }
        }
    }
}

struct CorrStruct
{
#define MAXFNUM    16
    FILE *ssfpins[MAXFNUM];        // ssfnum in count
    FILE *ssfpouts[MAXFNUM];    // movnum in count

    int ssfnum;
    int movnum;

    // one more than ssfnum in count
    movtime_t ssfdurs[MAXFNUM];    // in msec, 120min --> 7200000
    movtime_t movdurs[MAXFNUM];    // in msec
};

static int ParseConfig (CorrStruct *corr, FILE *fConfig)
{
    int i;
    char buf[sizebuf];
    fgetline(buf, sizebuf, fConfig);
    corr->ssfnum = atoi(buf);
    if (corr->ssfnum + 1 > MAXFNUM)
    {
        return -1;    /* too many files */
    }

    fgetline(buf, sizebuf, fConfig);
    if (!parse_timestamp(buf, corr->ssfdurs[corr->ssfnum]))
    {
        return -2;    /* timestamp error */
    }
    for (i = 0; i < corr->ssfnum; i++)
    {
        fgetline(buf, sizebuf, fConfig);
        corr->ssfpins[i] = fopen(buf, "r");
        fgetline(buf, sizebuf, fConfig);
        if (!parse_timestamp(buf, corr->ssfdurs[i]))
        {
            return -2;   
        }
    }

    fgetline(buf, sizebuf, fConfig);
    corr->movnum = atoi(buf);
    if (corr->movnum + 1 > MAXFNUM)
    {
        return -1;    /* too many files */
    }

    fgetline(buf, sizebuf, fConfig);
    if (!parse_timestamp(buf, corr->movdurs[corr->movnum]))
    {
        return -2;   
    }

    for (i = 0; i < corr->movnum; i++)
    {
        fgetline(buf, sizebuf, fConfig);
        corr->ssfpouts[i] = fopen(buf, "w");
        fgetline(buf, sizebuf, fConfig);
        if (!parse_timestamp(buf, corr->movdurs[i]))
        {
            return -2;
        }
    }
    return 0;
}

static void Finalize (CorrStruct *corr)
{
    for (int i = 0; i < corr->ssfnum; i++)
    {
        fclose(corr->ssfpins[i]);
    }
    for (int i = 0; i < corr->movnum; i++)
    {
        fclose(corr->ssfpouts[i]);
    }
}

int main (void)
{
    FILE *fConfig = fopen("ssresync_config.txt", "r");
    CorrStruct corr;
    ParseConfig(&corr, fConfig);
    rectify(corr.ssfpins, corr.ssfpouts, corr.ssfnum, corr.movnum, corr.ssfdurs, corr.movdurs);
    Finalize(&corr);

    fclose(fConfig);

    return 0;
}

// An example of config file
// It is applied on the subtitles downloaded from shooter.cn for the movie Amadeus with both the
// movie and the script divided into 3 parts with some time deviation between them.
/*
== BEGIN ==
3
00:00:00,000
inNewMov-Amadeus-CD1.eng.srt
00:58:03,160
inNewMov-Amadeus-CD2.eng.srt
00:58:12,500
inNewMov-Amadeus-CD3.eng.srt
00:58:23,240
3
00:00:00,000
NewMov-Amadeus-CD1.eng.srt
00:57:09,000
NewMov-Amadeus-CD2.eng.srt
00:57:40,000
NewMov-Amadeus-CD3.eng.srt
01:05:32,000
== END ==
*/