1 #ifndef CAFFE_PARALLEL_HPP_
2 #define CAFFE_PARALLEL_HPP_
6 #include <boost/thread.hpp>
11 #include "caffe/blob.hpp"
12 #include "caffe/common.hpp"
13 #include "caffe/internal_thread.hpp"
14 #include "caffe/layer.hpp"
15 #include "caffe/proto/caffe.pb.h"
16 #include "caffe/solver.hpp"
17 #include "caffe/syncedmem.hpp"
18 #include "caffe/util/blocking_queue.hpp"
19 #include "caffe/util/nccl.hpp"
26 template<
typename Dtype>
29 explicit Params(shared_ptr<Solver<Dtype> > root_solver);
33 inline size_t size()
const {
36 inline Dtype* data()
const {
39 inline Dtype* diff()
const {
48 DISABLE_COPY_AND_ASSIGN(Params);
52 template<
typename Dtype>
53 class GPUParams :
public Params<Dtype> {
55 GPUParams(shared_ptr<Solver<Dtype> > root_solver,
int device);
58 void Configure(Solver<Dtype>* solver)
const;
61 using Params<Dtype>::size_;
62 using Params<Dtype>::data_;
63 using Params<Dtype>::diff_;
66 template<
typename Dtype>
67 class NCCL :
public GPUParams<Dtype>,
68 public Solver<Dtype>::Callback,
69 public Net<Dtype>::Callback {
74 explicit NCCL(shared_ptr<Solver<Dtype> > solver);
79 NCCL(shared_ptr<Solver<Dtype> > solver,
const string& uid);
82 boost::barrier* barrier();
83 void set_barrier(boost::barrier* value);
89 static void InitSingleProcess(vector<NCCL<Dtype>*>* nccls);
91 static string new_uid();
101 void Run(
const vector<int>& gpus,
const char* restore);
107 void on_gradients_ready();
110 cudaStream_t stream_;
112 shared_ptr<Solver<Dtype> > solver_;
114 boost::barrier* barrier_;
115 using Params<Dtype>::size_;
116 using Params<Dtype>::data_;
117 using Params<Dtype>::diff_;