pytorch 学习笔记之编写 C 扩展

来源:互联网 发布:淘宝网黄金首饰 编辑:程序博客网 时间:2024/05/16 17:16

pytorch利用 CFFI 进行 C 语言扩展。包括两个基本的步骤(docs):

  1. 编写 C 代码;
  2. python 调用 C 代码,实现相应的 Function 或 Module。

在之前的文章中,我们已经了解了如何自定义 Module。至于 [py]torch 的 C 代码库的结构,我们留待之后讨论; 这里,重点关注,如何在 pytorch C 代码库高层接口的基础上,编写 C 代码,以及如何调用自己编写的 C 代码。
官方示例了如何定义一个加法运算(见 repo)。这里我们定义ReLU函数(见 repo)。

1. C 代码

pytorch C THTensorTHFloatTensorTHByteTensorReLU C
$$y = ReLU(x) = max(x, 0)$$
Function C

1.1 头文件声明

/* ext_lib.h */int relu_forward(THFloatTensor *input, THFloatTensor *output);int relu_backward(THFloatTensor *grad_output, THFloatTensor *input, THFloatTensor *grad_input);

1.2 函数实现

TH/TH.h pytorch C include

/* ext_lib.c */#include <TH/TH.h>int relu_forward(THFloatTensor *input, THFloatTensor *output){  THFloatTensor_resizeAs(output, input);  THFloatTensor_clamp(output, input, 0, INFINITY);  return 1;}int relu_backward(THFloatTensor *grad_output, THFloatTensor *input, THFloatTensor *grad_input){  THFloatTensor_resizeAs(grad_input, grad_output);  THFloatTensor_zero(grad_input);  THLongStorage* size = THFloatTensor_newSizeOf(grad_output);  THLongStorage *stride = THFloatTensor_newStrideOf(grad_output);  THByteTensor *mask = THByteTensor_newWithSize(size, stride);  THFloatTensor_geValue(mask, input, 0);  THFloatTensor_maskedCopy(grad_input, mask, grad_output);  return 1;}

2. 编译代码

2.1 依赖

pytorch C
* pytorch -
* cffi - pip install cffi

; _ext.ext_lib

# build.pyimport osimport torchfrom torch.utils.ffi import create_extensionsources = ['src/ext_lib.c']headers = ['src/ext_lib.h']defines = []with_cuda = Falseif torch.cuda.is_available():    print('Including CUDA code.')    sources += ['src/ext_lib_cuda.c']    headers += ['src/ext_lib_cuda.h']    defines += [('WITH_CUDA', None)]    with_cuda = Trueffi = create_extension(    '_ext.ext_lib',    headers=headers,    sources=sources,    define_macros=defines,    relative_to=__file__,    with_cuda=with_cuda)if __name__ == '__main__':    ffi.build()
python build.py

3. python 调用

3.1 编写配置文件

python pytorch tensor C THTensor

import torchfrom torch.autograd import Functionfrom _ext import ext_libclass ReLUF(Function):    def forward(self, input):        self.save_for_backward(input)        output = input.new()        if not input.is_cuda:            ext_lib.relu_forward(input, output)        else:            raise Exception, "No CUDA Implementation"        return output    def backward(self, grad_output):        input, = self.saved_tensors        grad_input = grad_output.new()        if not grad_output.is_cuda:            ext_lib.relu_backward(grad_output, input, grad_input)        else:            raise Exception, "No CUDA Implementation"        return grad_input

3.2 测试

Module C ReLU

import torchimport torch.nn as nnfrom torch.autograd import Variablefrom modules.relu import ReLUMtorch.manual_seed(1111)class MyNetwork(nn.Module):    def __init__(self):        super(MyNetwork, self).__init__()        self.relu = ReLUM()    def forward(self, input):        return self.relu(input)model = MyNetwork()x = torch.randn(1, 25).view(5, 5)input = Variable(x, requires_grad=True)output = model(input)print(output)print(input.clamp(min=0))output.backward(torch.ones(input.size()))print(input.grad.data)

Variable containing: 0.8749  0.5990  0.6844  0.0000  0.0000 0.6516  0.0000  1.5117  0.5734  0.0072 0.1286  1.4171  0.0796  1.0355  0.0000 0.0000  0.0000  0.0312  0.0999  0.0000 1.0401  1.0599  0.0000  0.0000  0.0000[torch.FloatTensor of size 5x5]Variable containing: 0.8749  0.5990  0.6844  0.0000  0.0000 0.6516  0.0000  1.5117  0.5734  0.0072 0.1286  1.4171  0.0796  1.0355  0.0000 0.0000  0.0000  0.0312  0.0999  0.0000 1.0401  1.0599  0.0000  0.0000  0.0000[torch.FloatTensor of size 5x5] 1  1  1  0  0 1  0  1  1  1 1  1  1  1  0 0  0  1  1  0 1  1  0  0  0
0 0
原创粉丝点击