七、caffe之train函数片段

来源:互联网 发布:每日流量控制软件 编辑:程序博客网 时间:2024/06/03 11:16

当运行下列命令的时候

ubuntu@ubuntu:~/caffe$ ./examples/mnist/train_lenet.sh 

这是脚本train_lenet.sh 命令行(如果只有cpu 需要修改这个文件的lenet_solver.prototxt,选择 cpu)

#!/usr/bin/env shset -e./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt $@

下面是caffe.cpp里面的函数段

// Train / Finetune a model.int train() {  CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train.";  CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())      << "Give a snapshot to resume training or weights to finetune "      "but not both.";  vector<string> stages = get_stages_from_flags();  caffe::SolverParameter solver_param;  caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);  solver_param.mutable_train_state()->set_level(FLAGS_level);  for (int i = 0; i < stages.size(); i++) {    solver_param.mutable_train_state()->add_stage(stages[i]);  }  // If the gpus flag is not provided, allow the mode and device to be set  // in the solver prototxt.  if (FLAGS_gpu.size() == 0      && solver_param.has_solver_mode()      && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {      if (solver_param.has_device_id()) {          FLAGS_gpu = "" +              boost::lexical_cast<string>(solver_param.device_id());      } else {  // Set default GPU if unspecified          FLAGS_gpu = "" + boost::lexical_cast<string>(0);      }  }  vector<int> gpus;  get_gpus(&gpus);  if (gpus.size() == 0) {    LOG(INFO) << "Use CPU.";    Caffe::set_mode(Caffe::CPU);  } else {    ostringstream s;    for (int i = 0; i < gpus.size(); ++i) {      s << (i ? ", " : "") << gpus[i];    }    LOG(INFO) << "Using GPUs " << s.str();#ifndef CPU_ONLY    cudaDeviceProp device_prop;    for (int i = 0; i < gpus.size(); ++i) {      cudaGetDeviceProperties(&device_prop, gpus[i]);      LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name;    }#endif    solver_param.set_device_id(gpus[0]);    Caffe::SetDevice(gpus[0]);    Caffe::set_mode(Caffe::GPU);    Caffe::set_solver_count(gpus.size());  }  caffe::SignalHandler signal_handler(        GetRequestedAction(FLAGS_sigint_effect),        GetRequestedAction(FLAGS_sighup_effect));  shared_ptr<caffe::Solver<float> >      solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));  solver->SetActionFunction(signal_handler.GetActionFunction());  if (FLAGS_snapshot.size()) {    LOG(INFO) << "Resuming from " << FLAGS_snapshot;    solver->Restore(FLAGS_snapshot.c_str());  } else if (FLAGS_weights.size()) {    CopyLayers(solver.get(), FLAGS_weights);  }  LOG(INFO) << "Starting Optimization";  if (gpus.size() > 1) {#ifdef USE_NCCL    caffe::NCCL<float> nccl(solver);    nccl.Run(gpus, FLAGS_snapshot.size() > 0 ? FLAGS_snapshot.c_str() : NULL);#else    LOG(FATAL) << "Multi-GPU execution not available - rebuild with USE_NCCL";#endif  } else {    solver->Solve();  }  LOG(INFO) << "Optimization Done.";  return 0;}RegisterBrewFunction(train);
--solver=examples/mnist/lenet_solver.prototxt 文件里面的参数

代码中如FLAGS_solver等都是命令解析的参数gflags的参数使用

vector< string> stages = get_stages_from_flags();

这函数主要是针对命令行多个参数进行统计(详细参见本博客c++基本知识讲解)

 caffe::SolverParameter solver_param;

这行代码表示使用了probuff解析了caffe.proto文件,然后使用的变量在caffe.pb.cc文件里面定义了

caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);

函数的原定义在/caffe/src/caffe/util/update_proto.cpp中,这里主要是把命令行中--solver=examples/mnist/lenet_solver.prototxt的lenet_solver.prototxt里面的参数读到caffe.proto变量中(由于使用了gflags 所以命令行解析–solver—————–>FLGAS_solver;代码中所有的FLGAS_开头的都是命令解析,列如FLAGS_weights.size() FLAGS_snapshot.size() 在minst 这个脚本中都没有使用到)即检查–solver、–snapshot和–weight并输出消息;

shared_ptr<caffe::Solver<float> >solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));

进入了caffe/src/solver.cpp文件

Solver<Dtype>::Solver(const SolverParameter& param)

大家可以进入ubuntu系统的文件/tmp目录,会在运行一次产生caffe.ubuntu.ubuntu.log.INFO.20170627-141243.4440 日志信息,可以跟踪其打印的日志信息跟踪代码的阅读
下面是截取一小段glogs日志信息

Log file created at: 2017/06/27 14:12:43Running on machine: ubuntuLog line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msgI0627 14:12:43.187585  4440 caffe.cpp:211] Use CPU.I0627 14:12:43.188031  4440 solver.cpp:44] Initializing solver from parameters: test_iter: 100test_interval: 500base_lr: 0.01display: 100max_iter: 10000lr_policy: "inv"gamma: 0.0001power: 0.75momentum: 0.9weight_decay: 0.0005snapshot: 5000snapshot_prefix: "examples/mnist/lenet"solver_mode: CPUnet: "examples/mnist/lenet_train_test.prototxt"train_state {  level: 0  stage: ""}I0627 14:12:43.188195  4440 solver.cpp:87] Creating training net from net file: examples/mnist/lenet_train_test.prototxtI0627 14:12:43.188627  4440 net.cpp:294] The NetState phase (0) differed from the phase (1) specified by a rule in layer mnistI0627 14:12:43.188666  4440 net.cpp:294] The NetState phase (0) differed from the phase (1) specified by a rule in layer accuracyI0627 14:12:43.188832  4440 net.cpp:51] Initializing net from parameters: name: "LeNet"state {  phase: TRAIN  level: 0  stage: ""}layer {

未完待续。。。。。

原创粉丝点击