Erlang和其他语言的交互

来源:互联网 发布:淘宝wap流量是什么 编辑:程序博客网 时间:2024/05/16 02:23

Erlang和其他语言的交互

Erlang和其他语言(如C和Java)的交互手段一直是我很感兴趣的主题,周末看了下OTP文档,终于大致理清楚了思路。这里先简单总结四种交互手段,也是更进一步学习Erlang的开始。

端口

最简单的方式是调用Erlang模块的 open_port/2 ,创建一个端口。Erlang端口可以被认为是一个外部Erlang进程,交互手段是通过标准IO输入输出,对应C语言里的read和write函数。

-spec open_port(PortName, PortSettings) -> port() when      PortName :: {spawn, Command :: string() | binary()} |                  {spawn_driver, Command :: string() | binary()} |                  {spawn_executable, FileName :: file:name() } |                  {fd, In :: non_neg_integer(), Out :: non_neg_integer()},      PortSettings :: [Opt],      Opt :: {packet, N :: 1 | 2 | 4}           | stream           | {line, L :: non_neg_integer()}           | {cd, Dir :: string() | binary()}           | {env, Env :: [{Name :: string(), Val :: string() | false}]}           | {args, [string() | binary()]}           | {arg0, string() | binary()}           | exit_status           | use_stdio           | nouse_stdio           | stderr_to_stdout           | in           | out           | binary           | eof       | {parallelism, Boolean :: boolean()}       | hide.

还没有读源码,但是原理很好理解,只需要重定向标准输入输出,如linux的dup2函数,就可以实现数据交互。具体的数据格式也是简单的二进制串,由{packet, N}指定开头长度标识符的位数。

例子如下:

%% erlang%% simple test for string operation with c-module(complex1).-export([start/1, stop/0, init/1]).-export([strlen/1, strcmp/2]).start(Prog) ->    spawn(?MODULE, init, [Prog]).stop() ->    ?MODULE ! stop.strlen(S) ->    call_port({strlen, S}).strcmp(S, T) ->    call_port({strcmp, S, T}).call_port(Msg) ->    ?MODULE ! {call, self(), Msg},    receive        {?MODULE, Result} ->            Result    end.init(Prog) ->    register(?MODULE, self()),    process_flag(trap_exit, true),    Port = open_port({spawn, Prog}, [{packet, 2}]),    loop(Port).loop(Port) ->    receive        {call, From, Msg} ->            Port ! {self(), {command, encode(Msg)}},            receive                {Port, {data, Data}} ->                    From ! {?MODULE, decode(Data)}            end,            loop(Port);        stop ->            Port ! {self(), close},            receive                {Port, closed} ->                    exit(normal)            end;        {'EXIT', Port, _Reason} ->            exit(port_exit_error)    end.encode({strlen, X}) -> [1, list_to_binary(X)];encode({strcmp, X, Y}) -> [2, list_to_binary(X), 0, list_to_binary(Y), 0].decode([Int]) -> Int.

后面的Erlang端程序大同小异,只写关键点。C语言那边也很简单:

#include <unistd.h>#include "comm.h"int read_exact(byte*, int);int write_exact(byte*, int);int read_cmd(byte* buf){    int len;    if (read_exact(buf, 2) != 2)        return -1;    len = (buf[0] << 8) | buf[1];    return read_exact(buf, len);}int write_cmd(byte* buf, int len){    byte li;    li = (len >> 8) & 0xff;    write_exact(&li, 1);    li = len & 0xff;    write_exact(&li, 1);    return write_exact(buf, len);}int read_exact(byte* buf, int len){    int i, got = 0;    do {        if ((i = read(0, buf+got, len-got)) <= 0)            return i;        got += i;    } while (got < len);    return len;}int write_exact(byte* buf, int len){    int i, wrote = 0;    do {        if ((i = write(1, buf+wrote, len-wrote)) <= 0)            return i;        wrote += i;    } while (wrote < len);    return len;}
// C#include <stdio.h>#include <unistd.h>#include <stdlib.h>#include <string.h>#include "comm.h"#define LOG_FILE "einterface.log"void split_string(char* s, char** s1, char** s2){    *s1 = s;    *s2 = s + strlen(s) + 1;    }int main(){    int fn, res, len, i;    char *s1, *s2;        byte buf[100] = {0};        FILE* f = fopen(LOG_FILE, "w");    if (f == NULL)        return -1;    while ( (len=read_cmd(buf)) > 0) {        fn = buf[0];                fprintf(f, "Data: ");        for (i = 0; i < len; ++i)            fprintf(f, "0x%02x ", buf[i]);        fprintf(f, "\n");                if (fn == 1) {            res = strlen((char*)buf + 1);        } else if (fn == 2) {            split_string((char*)buf + 1, &s1, &s2);            fprintf(f, "s1: %s, s2: %s\n", s1, s2);            res = strcmp(s1,s2);        }        buf[0] = res;        write_cmd(buf, 1);        memset(buf, 0, sizeof(buf));    }    fclose(f);        return 0;}

端口方式最简单,但是缺点也很明显,数据量大的时候效率很低。

Erl_Interface

准确的说,这和上一种方式一样,都是利用输入输出。Erl_Interface是Erlang官方提供的数据编码手段,用来替换我们的decode和encode。能把所有Erlang项式编码成二进制。

    {call, From, Msg} ->        Port ! {self(), {command, term_to_binary(Msg)}},        receive            {Port, {data, Data}} ->                From ! {?MODULE, binary_to_term(Data)}        end,

C结构体的定义在erl_interface.h,缺少文档说明的接口也能在这里找到。用到的关键结构ETERM是所有Erlang基本数据结构的union。

typedef struct _eterm {  union {    Erl_Integer    ival;    Erl_Uinteger   uival;     Erl_LLInteger  llval;    Erl_ULLInteger ullval;    Erl_Float      fval;    Erl_Atom       aval;    Erl_Pid        pidval;         Erl_Port       portval;        Erl_Ref        refval;       Erl_List       lval;    Erl_EmptyList  nval;    Erl_Tuple      tval;    Erl_Binary     bval;    Erl_Variable   vval;    Erl_Function   funcval;    Erl_Big        bigval;  } uval;} ETERM;

常用的转换函数也都封装成了宏:

#define ERL_INT_VALUE(x)  ((x)->uval.ival.i)#define ERL_INT_UVALUE(x) ((x)->uval.uival.u)#define ERL_LL_VALUE(x)   ((x)->uval.llval.i)#define ERL_LL_UVALUE(x)  ((x)->uval.ullval.u)#define ERL_FLOAT_VALUE(x) ((x)->uval.fval.f)#define ERL_ATOM_PTR(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.aval.d)#define ERL_ATOM_PTR_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.aval.d)#define ERL_ATOM_SIZE(x) erl_atom_size_latin1((Erl_Atom_data*) &(x)->uval.aval.d)#define ERL_ATOM_SIZE_UTF8(x) erl_atom_size_utf8((Erl_Atom_data*) &(x)->uval.aval.d)#define ERL_PID_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.pidval.node)#define ERL_PID_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.pidval.node)#define ERL_PID_NUMBER(x) ((x)->uval.pidval.number)#define ERL_PID_SERIAL(x) ((x)->uval.pidval.serial)#define ERL_PID_CREATION(x) ((x)->uval.pidval.creation)

使用erl_interface的C程序如下:

#include <unistd.h>#include <string.h>#include "comm.h"#include "erl_interface.h"#include "ei.h"int main(){    ETERM *tuplep, *intp;    ETERM *fnp, *argp, *sp1, *sp2;    byte buf[100] = {0};    int res, allocated, freed;    erl_init(NULL, 0);    while (read_cmd(buf) > 0) {        tuplep = erl_decode(buf);        fnp = erl_element(1, tuplep);        sp1 = erl_element(2, tuplep);        if (strncmp((const char*)ERL_ATOM_PTR(fnp), "strlen", 6) == 0) {            res = strlen(erl_iolist_to_string(sp1));        } else if (strncmp((const char*)ERL_ATOM_PTR(fnp), "strcmp", 6) == 0) {            sp2 = erl_element(3, tuplep);            res = strcmp(erl_iolist_to_string(sp1), erl_iolist_to_string(sp2));        }        intp = erl_mk_int(res);        erl_encode(intp, buf);        write_cmd(buf, erl_term_len(intp));        erl_free_compound(tuplep);        erl_free_term(fnp);        erl_free_term(argp);        erl_free_term(intp);    }    return 0;}

erl_init 用于初始化内存管理等。

嵌入式端口(端口驱动)

和前面两种方式不同,嵌入式端口作为动态模块直接加载到Erlang虚拟机。Erlang端需要先加载模块:

start(ProgLib) ->    case erl_ddll:load_driver("./", ProgLib) of        ok ->            ok;        {error, already_loaded} ->            ok;        Reason ->            io:format("error: ~p~n", [Reason]),            exit({error, could_not_load_driver})    end,    spawn(?MODULE, init, [ProgLib]).

C程序做的变动比较大,主要是完善ErlDrvEntry接口:

/* * This structure defines a driver. */typedef struct erl_drv_entry {    int (*init)(void);      /* called at system start up for statically                   linked drivers, and after loading for                   dynamically loaded drivers */ #ifndef ERL_SYS_DRV    ErlDrvData (*start)(ErlDrvPort port, char *command);                /* called when open_port/2 is invoked.                   return value -1 means failure. */#else    ErlDrvData (*start)(ErlDrvPort port, char *command, SysDriverOpts* opts);                /* special options, only for system driver */#endif    void (*stop)(ErlDrvData drv_data);                                /* called when port is closed, and when the                   emulator is halted. */    void (*output)(ErlDrvData drv_data, char *buf, ErlDrvSizeT len);                /* called when we have output from erlang to                   the port */    void (*ready_input)(ErlDrvData drv_data, ErlDrvEvent event);                 /* called when we have input from one of                    the driver's handles */    void (*ready_output)(ErlDrvData drv_data, ErlDrvEvent event);                  /* called when output is possible to one of                    the driver's handles */    char *driver_name;      /* name supplied as command                    in open_port XXX ? */    void (*finish)(void);        /* called before unloading the driver -                   DYNAMIC DRIVERS ONLY */    void *handle;       /* Reserved -- Used by emulator internally */    ErlDrvSSizeT (*control)(ErlDrvData drv_data, unsigned int command,                char *buf, ErlDrvSizeT len, char **rbuf,                ErlDrvSizeT rlen); /* "ioctl" for drivers - invoked by                          port_control/3 */    void (*timeout)(ErlDrvData drv_data);   /* Handling of timeout in driver */    void (*outputv)(ErlDrvData drv_data, ErlIOVec *ev);                /* called when we have output from erlang                   to the port */    void (*ready_async)(ErlDrvData drv_data, ErlDrvThreadData thread_data);    void (*flush)(ErlDrvData drv_data);                                /* called when the port is about to be                    closed, and there is data in the                    driver queue that needs to be flushed                   before 'stop' can be called */    ErlDrvSSizeT (*call)(ErlDrvData drv_data,             unsigned int command, char *buf, ErlDrvSizeT len,             char **rbuf, ErlDrvSizeT rlen,             unsigned int *flags); /* Works mostly like 'control',                          a synchronous                          call into the driver. */    void (*event)(ErlDrvData drv_data, ErlDrvEvent event,          ErlDrvEventData event_data);                                /* Called when an event selected by                    driver_event() has occurred */    int extended_marker;    /* ERL_DRV_EXTENDED_MARKER */    int major_version;      /* ERL_DRV_EXTENDED_MAJOR_VERSION */    int minor_version;      /* ERL_DRV_EXTENDED_MINOR_VERSION */    int driver_flags;       /* ERL_DRV_FLAGs */    void *handle2;              /* Reserved -- Used by emulator internally */    void (*process_exit)(ErlDrvData drv_data, ErlDrvMonitor *monitor);                                /* Called when a process monitor fires */    void (*stop_select)(ErlDrvEvent event, void* reserved);                                /* Called on behalf of driver_select when                   it is safe to release 'event'. A typical                   unix driver would call close(event) */    void (*emergency_close)(ErlDrvData drv_data);                                /* called when the port is closed abruptly.                   specifically when erl_crash_dump is called. */    /* When adding entries here, dont forget to pad in obsolete/driver.h */} ErlDrvEntry;

作为样例,简单的初始化stop、start和output就行:

ErlDrvEntry driver_entry = {    NULL,    drv_start,    drv_stop,    drv_output,    NULL,    NULL,    "port_driver",    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    ERL_DRV_EXTENDED_MARKER,    ERL_DRV_EXTENDED_MAJOR_VERSION,    ERL_DRV_EXTENDED_MINOR_VERSION,    0,    NULL,    NULL,    NULL};

利用宏 DRIVER_INIT 完成初始化:

#ifdef STATIC_ERLANG_DRIVER#  define ERLANG_DRIVER_NAME(NAME) NAME ## _driver_init#  define ERL_DRIVER_EXPORT#else#  define ERLANG_DRIVER_NAME(NAME) driver_init#  if defined(__GNUC__) && __GNUC__ >= 4#    define ERL_DRIVER_EXPORT __attribute__ ((visibility("default")))#  elif defined (__SUNPRO_C) && (__SUNPRO_C >= 0x550)#    define ERL_DRIVER_EXPORT __global#  else#    define ERL_DRIVER_EXPORT#  endif#endif#ifndef ERL_DRIVER_TYPES_ONLY#define DRIVER_INIT(DRIVER_NAME) \    ERL_DRIVER_EXPORT ErlDrvEntry* ERLANG_DRIVER_NAME(DRIVER_NAME)(void); \    ERL_DRIVER_EXPORT ErlDrvEntry* ERLANG_DRIVER_NAME(DRIVER_NAME)(void)

三个回调如下:

typedef struct {    ErlDrvPort port;} data;static ErlDrvData drv_start(ErlDrvPort port, char* buf){    data* d = (data*)driver_alloc(sizeof(data));    d->port = port;    return (ErlDrvData)d;}static void drv_stop(ErlDrvData handle){    driver_free(handle);}void split_string(char* s, char** s1, char** s2){    *s1 = s;    *s2 = s + strlen(s) + 1;    }static void drv_output(ErlDrvData handle, char* buf,        ErlDrvSizeT len){    data* d = (data*)handle;    char *s1, *s2;    int res = 0, fn = buf[0];        if (fn == 1) {        res = strlen(buf + 1);    } else if (fn == 2) {        split_string(buf + 1, &s1, &s2);        res = strcmp(s1,s2);    }    driver_output(d->port, (char*)&res, 1);}

依然是利用端口交互。

分布式Erlang节点

分布式方式用C创建一个Erlang节点,以分布式的方式和Erlang交互。很独特的一种方法,详细的工作原理和数据格式以后会分析。

%% Erlang-module(complex4).-export([strlen/1, strcmp/2]).-define(CNODE, 'cnode@192.168.0.4').strlen(S) ->    call_port({strlen, S}).strcmp(S, T) ->    call_port({strcmp, S, T}).call_port(Msg) ->    {any, ?CNODE} ! {call, self(), Msg},    receive        {cnode, Result} ->            Result    end.

另一端是什么语言编写的完全无所谓。

C程序用ErlMessage封装了消息:

typedef struct {  int type;   /* one of the message type constants in eiext.h */  ETERM *msg; /* the actual message */  ETERM *from;  ETERM *to;  char to_name[MAXREGLEN+1];} ErlMessage;

erl_receive_msg 接收消息,erl_send 发送消息,消息格式用erl_interface封装。可以实现为服务端和客户端,服务端等待Erlang节点连接,客户端节点要在Erlang启动后发起连接:

    erl_init(NULL, 0);    addr.s_addr = inet_addr("192.168.0.4");    if (erl_connect_xinit("192.168.0.4", "cnode", "cnode@192.168.0.4",                &addr, "123456", 0) == -1)        erl_err_quit("erl_connect_init error");    if ((listen = listen_port(port)) <= 0)        erl_err_quit("listen_port error");    if (erl_publish(port) == -1)        erl_err_quit("erl_publish error");    if ((fd = erl_accept(listen, &conn)) == ERL_ERROR)        erl_err_quit("erl_accept error");
    erl_init(NULL, 0);    addr.s_addr = inet_addr("192.168.0.4");    if (erl_connect_xinit("192.168.0.4", "cnode", "cnode@192.168.0.4",                &addr, "123456", 0) == -1)        erl_err_quit("erl_connect_init error");    if ((fd = erl_connect("e1@192.168.0.4")) < 0)        erl_err_quit("erl_connect error");    fprintf(stderr, "Connected to e1@192.168.0.4\n"); 

连接成功后就是数据交互:

    while (loop) {        got = erl_receive_msg(fd, buf, BUFSIZE, &emsg);        if (got == ERL_ERROR) {            loop = 0;        } else if (got == ERL_TICK) {            // pass        } else {            if (emsg.type == ERL_REG_SEND) {                fromp = erl_element(2, emsg.msg);                tuplep = erl_element(3, emsg.msg);                fnp = erl_element(1, tuplep);                s1 = erl_element(2, tuplep);                                if (strncmp((const char*)ERL_ATOM_PTR(fnp), "strlen", 6) == 0) {                    res = strlen(erl_iolist_to_string(s1));                 } else if (strncmp((const char*)ERL_ATOM_PTR(fnp), "strcmp", 6) == 0) {                    s2 = erl_element(3, tuplep);                    res = strcmp(erl_iolist_to_string(s1), erl_iolist_to_string(s2));                }                resp = erl_format("{cnode, ~i}", res);                erl_send(fd, fromp, resp);                erl_free_term(emsg.from);                erl_free_term(emsg.msg);                erl_free_term(fromp);                erl_free_term(tuplep);                erl_free_term(fnp);                erl_free_term(s1);                erl_free_term(s2);                erl_free_term(resp);            }        }    }

NIF

最后一张方式,也是最新的,实现NIF内部函数。具体来说,就是编写一个动态链接库,加载到Erlang虚拟机,导出接口让Erlang调用。

-module(complex5).-export([cstrlen/1, cstrcmp/2]).-on_load(init/0).init() ->    ok = erlang:load_nif("./cnif", 0).cstrlen(_S) ->    exit(nif_library_not_loaded).cstrcmp(_S, _T) ->    exit(nif_library_not_loaded).

load_nif 加载模块,erlang形式的函数用于模块导出函数不存在时的stub。

C程序填充ERL_NIF_INIT宏:

#define ERL_NIF_INIT(NAME, FUNCS, LOAD, RELOAD, UPGRADE, UNLOAD) \ERL_NIF_INIT_PROLOGUE                   \ERL_NIF_INIT_GLOB                       \ERL_NIF_INIT_DECL(NAME);        \ERL_NIF_INIT_DECL(NAME)         \{                   \    static ErlNifEntry entry =      \    {                   \    ERL_NIF_MAJOR_VERSION,      \    ERL_NIF_MINOR_VERSION,      \    #NAME,              \    sizeof(FUNCS) / sizeof(*FUNCS), \    FUNCS,              \    LOAD, RELOAD, UPGRADE, UNLOAD,  \    ERL_NIF_VM_VARIANT,     \    ERL_NIF_ENTRY_OPTIONS       \    };                                  \    ERL_NIF_INIT_BODY;                  \    return &entry;          \}     

实际上是初始化了ErlNifEntry结构体。

typedef struct enif_entry_t{    int major;    int minor;    const char* name;    int num_of_funcs;    ErlNifFunc* funcs;    int  (*load)   (ErlNifEnv*, void** priv_data, ERL_NIF_TERM load_info);    int  (*reload) (ErlNifEnv*, void** priv_data, ERL_NIF_TERM load_info);    int  (*upgrade)(ErlNifEnv*, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info);    void (*unload) (ErlNifEnv*, void* priv_data);    const char* vm_variant;    unsigned options;}ErlNifEntry;

作为简单例子,这里只关注导出函数:

static ErlNifFunc nif_func[] = {    {"cstrlen", 1, strlen_nif},    {"cstrcmp", 2, strcmp_nif}};

分别是函数名、元数和实现:

static ERL_NIF_TERM strlen_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]){    int ret;    char s[100];        if (!enif_get_string(env, argv[0], s, 100, 1)) {        return enif_make_badarg(env);    }    ret = strlen(s);    return enif_make_int(env, ret);}static ERL_NIF_TERM strcmp_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]){    int ret;    char s1[100], s2[100];    if (!enif_get_string(env, argv[0], s1, 100, 1) ||             !enif_get_string(env, argv[1], s2, 100, 1)) {        return enif_make_badarg(env);    }    ret = strcmp(s1, s2);    return enif_make_int(env, ret);}

最后

完整的代码在这里找到。

对我而言,平时C语言写的最多,而且往往是内核模块。结合C和Erlang是很有必要的,一定要搞清楚交互方式。这几种方式都透露着Erlang的设计哲学和内部实现,接下来需要深入代码来学习了。