darknet框架GPU编译安装
创始人
2024-03-15 21:13:36
0

Darknet: Open Source Neural Networks in C

1、darknet下载

git clone https://github.com/pjreddie/darknet.git
cd darknet

设置makefile

gpu=1 
cudnn=1 
opencv=1

【1】GPU=1;需要设置显卡驱动、cuda

  • 使用nvidia-smi 查看显卡型号和支持的cuda版本号

在这里插入图片描述

  • nvidia官网下载cuda,以及cudnn

在这里插入图片描述

安装cuda若提示

Existing package manager installation of the driver found. It is strongly recommended that you remove this before continuing

原因是驱动重复安装,卸载掉其他驱动

dpkg -l | grep Nvidia //查看驱动
sudo apt-get purge "nvidia*"  //卸载旧版本驱动

然后再次安装就正常了。成功之后显示

===========
= Summary =
===========Driver:   Not Selected
Toolkit:  Installed in /usr/local/cuda-11.6/Please make sure that-   PATH includes /usr/local/cuda-11.6/bin-   LD_LIBRARY_PATH includes /usr/local/cuda-11.6/lib64, or, add /usr/local/cuda-11.6/lib64 to /etc/ld.so.conf and run ldconfig as rootTo uninstall the CUDA Toolkit, run cuda-uninstaller in /usr/local/cuda-11.6/bin
***WARNING: Incomplete installation! This installation did not install the CUDA Driver. A driver of version at least 510.00 is required for CUDA 11.6 functionality to work.
To install the driver using this installer, run the following command, replacing  with the name of this run file:sudo .run --silent --driverLogfile is /var/log/cuda-installer.log

添加cuda到系统路径,vim ~/.zshrv

export PATH=/usr/local/cuda-11.6/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-11.6/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}

运行source ~/.zshrc 让路径生效,此时可以输入命令nvcc -V验证一下cuda

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Thu_Feb_10_18:23:41_PST_2022
Cuda compilation tools, release 11.6, V11.6.112
Build cuda_11.6.r11.6/compiler.30978841_0

【2】cudnn=1

  • nvidia官网选择相应版本的cudnn,进行下载(建议下载可解压版本的,方便自己操作)
    在这里插入图片描述

将解压出来的cudnn文件copy到cuda路径中(usl/local中会有两个cuda路径,一个带版本号,一个不带,记得是copy到不带版本号的cuda路径中)

sudo cp include/cudnn*.h /usr/local/cuda/include
sudo cp lib/libcudnn* /usr/local/cuda/lib64
sudo chmod a+r /usr/local/cuda/include/cudnn*.h /usr/local/cuda/lib64/libcudnn*

copy完成之后用cat /usr/local/cuda/include/cudnn_version.h | grep CUDNN_MAJOR -A 2 验证一下

#define CUDNN_MAJOR 8
#define CUDNN_MINOR 7
#define CUDNN_PATCHLEVEL 0
--
#define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)

2、darknet编译

由于版本问题,需要先修改几个文件

  • https://github.com/arnoldfychen/darknet/blob/master/src/convolutional_layer.c 直接替换darknet/src/convolutional_laye.c文件,老版本不支持cudnn8以上的
#include "convolutional_layer.h"
#include "utils.h"
#include "batchnorm_layer.h"
#include "im2col.h"
#include "col2im.h"
#include "blas.h"
#include "gemm.h"
#include 
#include #define PRINT_CUDNN_ALGO 0
#define MEMORY_LIMIT 2000000000#ifdef AI2
#include "xnor_layer.h"
#endifvoid swap_binary(convolutional_layer *l)
{float *swap = l->weights;l->weights = l->binary_weights;l->binary_weights = swap;#ifdef GPUswap = l->weights_gpu;l->weights_gpu = l->binary_weights_gpu;l->binary_weights_gpu = swap;
#endif
}void binarize_weights(float *weights, int n, int size, float *binary)
{int i, f;for(f = 0; f < n; ++f){float mean = 0;for(i = 0; i < size; ++i){mean += fabs(weights[f*size + i]);}mean = mean / size;for(i = 0; i < size; ++i){binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;}}
}void binarize_cpu(float *input, int n, float *binary)
{int i;for(i = 0; i < n; ++i){binary[i] = (input[i] > 0) ? 1 : -1;}
}void binarize_input(float *input, int n, int size, float *binary)
{int i, s;for(s = 0; s < size; ++s){float mean = 0;for(i = 0; i < n; ++i){mean += fabs(input[i*size + s]);}mean = mean / n;for(i = 0; i < n; ++i){binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;}}
}int convolutional_out_height(convolutional_layer l)
{return (l.h + 2*l.pad - l.size) / l.stride + 1;
}int convolutional_out_width(convolutional_layer l)
{return (l.w + 2*l.pad - l.size) / l.stride + 1;
}image get_convolutional_image(convolutional_layer l)
{return float_to_image(l.out_w,l.out_h,l.out_c,l.output);
}image get_convolutional_delta(convolutional_layer l)
{return float_to_image(l.out_w,l.out_h,l.out_c,l.delta);
}static size_t get_workspace_size(layer l){
#ifdef CUDNNif(gpu_index >= 0){size_t most = 0;size_t s = 0;cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(),l.srcTensorDesc,l.weightDesc,l.convDesc,l.dstTensorDesc,l.fw_algo,&s);if (s > most) most = s;cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(),l.srcTensorDesc,l.ddstTensorDesc,l.convDesc,l.dweightDesc,l.bf_algo,&s);if (s > most) most = s;cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(),l.weightDesc,l.ddstTensorDesc,l.convDesc,l.dsrcTensorDesc,l.bd_algo,&s);if (s > most) most = s;return most;}
#endifreturn (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float);
}#ifdef GPU
#ifdef CUDNN
void cudnn_convolutional_setup(layer *l)
{cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); #if CUDNN_MAJOR >= 6cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);#elsecudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);#endif#if CUDNN_MAJOR >= 7cudnnSetConvolutionGroupCount(l->convDesc, l->groups);#elseif(l->groups > 1){error("CUDNN < 7 doesn't support groups, please upgrade!");}#endif#if CUDNN_MAJOR >= 8int returnedAlgoCount;cudnnConvolutionFwdAlgoPerf_t       fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT];cudnnConvolutionBwdDataAlgoPerf_t   bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT];cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT];cudnnFindConvolutionForwardAlgorithm(cudnn_handle(),l->srcTensorDesc,l->weightDesc,l->convDesc,l->dstTensorDesc,CUDNN_CONVOLUTION_FWD_ALGO_COUNT,&returnedAlgoCount,fw_results);for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){#if PRINT_CUDNN_ALGO > 0printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",cudnnGetErrorString(fw_results[algoIndex].status),fw_results[algoIndex].algo, fw_results[algoIndex].time,(unsigned long long)fw_results[algoIndex].memory);#endifif( fw_results[algoIndex].memory < MEMORY_LIMIT ){l->fw_algo = fw_results[algoIndex].algo;break;}}cudnnFindConvolutionBackwardDataAlgorithm(cudnn_handle(),l->weightDesc,l->ddstTensorDesc,l->convDesc,l->dsrcTensorDesc,CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT,&returnedAlgoCount,bd_results);for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){#if PRINT_CUDNN_ALGO > 0printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",cudnnGetErrorString(bd_results[algoIndex].status),bd_results[algoIndex].algo, bd_results[algoIndex].time,(unsigned long long)bd_results[algoIndex].memory);#endifif( bd_results[algoIndex].memory < MEMORY_LIMIT ){l->bd_algo = bd_results[algoIndex].algo;break;}}cudnnFindConvolutionBackwardFilterAlgorithm(cudnn_handle(),l->srcTensorDesc,l->ddstTensorDesc,l->convDesc,l->dweightDesc,CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT,&returnedAlgoCount,bf_results);for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){#if PRINT_CUDNN_ALGO > 0printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",cudnnGetErrorString(bf_results[algoIndex].status),bf_results[algoIndex].algo, bf_results[algoIndex].time,(unsigned long long)bf_results[algoIndex].memory);#endifif( bf_results[algoIndex].memory < MEMORY_LIMIT ){l->bf_algo = bf_results[algoIndex].algo;break;}}#elsecudnnGetConvolutionForwardAlgorithm(cudnn_handle(),l->srcTensorDesc,l->weightDesc,l->convDesc,l->dstTensorDesc,CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,2000000000,&l->fw_algo);cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),l->weightDesc,l->ddstTensorDesc,l->convDesc,l->dsrcTensorDesc,CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,2000000000,&l->bd_algo);cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),l->srcTensorDesc,l->ddstTensorDesc,l->convDesc,l->dweightDesc,CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,2000000000,&l->bf_algo);#endif
}
#endif
#endifconvolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
{int i;convolutional_layer l = {0};l.type = CONVOLUTIONAL;l.groups = groups;l.h = h;l.w = w;l.c = c;l.n = n;l.binary = binary;l.xnor = xnor;l.batch = batch;l.stride = stride;l.size = size;l.pad = padding;l.batch_normalize = batch_normalize;l.weights = calloc(c/groups*n*size*size, sizeof(float));l.weight_updates = calloc(c/groups*n*size*size, sizeof(float));l.biases = calloc(n, sizeof(float));l.bias_updates = calloc(n, sizeof(float));l.nweights = c/groups*n*size*size;l.nbiases = n;// float scale = 1./sqrt(size*size*c);float scale = sqrt(2./(size*size*c/l.groups));//printf("convscale %f\n", scale);//scale = .02;//for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal();int out_w = convolutional_out_width(l);int out_h = convolutional_out_height(l);l.out_h = out_h;l.out_w = out_w;l.out_c = n;l.outputs = l.out_h * l.out_w * l.out_c;l.inputs = l.w * l.h * l.c;l.output = calloc(l.batch*l.outputs, sizeof(float));l.delta  = calloc(l.batch*l.outputs, sizeof(float));l.forward = forward_convolutional_layer;l.backward = backward_convolutional_layer;l.update = update_convolutional_layer;if(binary){l.binary_weights = calloc(l.nweights, sizeof(float));l.cweights = calloc(l.nweights, sizeof(char));l.scales = calloc(n, sizeof(float));}if(xnor){l.binary_weights = calloc(l.nweights, sizeof(float));l.binary_input = calloc(l.inputs*l.batch, sizeof(float));}if(batch_normalize){l.scales = calloc(n, sizeof(float));l.scale_updates = calloc(n, sizeof(float));for(i = 0; i < n; ++i){l.scales[i] = 1;}l.mean = calloc(n, sizeof(float));l.variance = calloc(n, sizeof(float));l.mean_delta = calloc(n, sizeof(float));l.variance_delta = calloc(n, sizeof(float));l.rolling_mean = calloc(n, sizeof(float));l.rolling_variance = calloc(n, sizeof(float));l.x = calloc(l.batch*l.outputs, sizeof(float));l.x_norm = calloc(l.batch*l.outputs, sizeof(float));}if(adam){l.m = calloc(l.nweights, sizeof(float));l.v = calloc(l.nweights, sizeof(float));l.bias_m = calloc(n, sizeof(float));l.scale_m = calloc(n, sizeof(float));l.bias_v = calloc(n, sizeof(float));l.scale_v = calloc(n, sizeof(float));}#ifdef GPUl.forward_gpu = forward_convolutional_layer_gpu;l.backward_gpu = backward_convolutional_layer_gpu;l.update_gpu = update_convolutional_layer_gpu;if(gpu_index >= 0){if (adam) {l.m_gpu = cuda_make_array(l.m, l.nweights);l.v_gpu = cuda_make_array(l.v, l.nweights);l.bias_m_gpu = cuda_make_array(l.bias_m, n);l.bias_v_gpu = cuda_make_array(l.bias_v, n);l.scale_m_gpu = cuda_make_array(l.scale_m, n);l.scale_v_gpu = cuda_make_array(l.scale_v, n);}l.weights_gpu = cuda_make_array(l.weights, l.nweights);l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights);l.biases_gpu = cuda_make_array(l.biases, n);l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);if(binary){l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);}if(xnor){l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);}if(batch_normalize){l.mean_gpu = cuda_make_array(l.mean, n);l.variance_gpu = cuda_make_array(l.variance, n);l.rolling_mean_gpu = cuda_make_array(l.mean, n);l.rolling_variance_gpu = cuda_make_array(l.variance, n);l.mean_delta_gpu = cuda_make_array(l.mean, n);l.variance_delta_gpu = cuda_make_array(l.variance, n);l.scales_gpu = cuda_make_array(l.scales, n);l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);}
#ifdef CUDNNcudnnCreateTensorDescriptor(&l.normTensorDesc);cudnnCreateTensorDescriptor(&l.srcTensorDesc);cudnnCreateTensorDescriptor(&l.dstTensorDesc);cudnnCreateFilterDescriptor(&l.weightDesc);cudnnCreateTensorDescriptor(&l.dsrcTensorDesc);cudnnCreateTensorDescriptor(&l.ddstTensorDesc);cudnnCreateFilterDescriptor(&l.dweightDesc);cudnnCreateConvolutionDescriptor(&l.convDesc);cudnn_convolutional_setup(&l);
#endif}
#endifl.workspace_size = get_workspace_size(l);l.activation = activation;fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d  %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.);return l;
}void denormalize_convolutional_layer(convolutional_layer l)
{int i, j;for(i = 0; i < l.n; ++i){float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale;}l.biases[i] -= l.rolling_mean[i] * scale;l.scales[i] = 1;l.rolling_mean[i] = 0;l.rolling_variance[i] = 1;}
}/*
void test_convolutional_layer()
{convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0);l.batch_normalize = 1;float data[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3};//net.input = data;//forward_convolutional_layer(l);
}
*/void resize_convolutional_layer(convolutional_layer *l, int w, int h)
{l->w = w;l->h = h;int out_w = convolutional_out_width(*l);int out_h = convolutional_out_height(*l);l->out_w = out_w;l->out_h = out_h;l->outputs = l->out_h * l->out_w * l->out_c;l->inputs = l->w * l->h * l->c;l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));l->delta  = realloc(l->delta,  l->batch*l->outputs*sizeof(float));if(l->batch_normalize){l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));l->x_norm  = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));}#ifdef GPUcuda_free(l->delta_gpu);cuda_free(l->output_gpu);l->delta_gpu =  cuda_make_array(l->delta,  l->batch*l->outputs);l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);if(l->batch_normalize){cuda_free(l->x_gpu);cuda_free(l->x_norm_gpu);l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);}
#ifdef CUDNNcudnn_convolutional_setup(l);
#endif
#endifl->workspace_size = get_workspace_size(*l);
}void add_bias(float *output, float *biases, int batch, int n, int size)
{int i,j,b;for(b = 0; b < batch; ++b){for(i = 0; i < n; ++i){for(j = 0; j < size; ++j){output[(b*n + i)*size + j] += biases[i];}}}
}void scale_bias(float *output, float *scales, int batch, int n, int size)
{int i,j,b;for(b = 0; b < batch; ++b){for(i = 0; i < n; ++i){for(j = 0; j < size; ++j){output[(b*n + i)*size + j] *= scales[i];}}}
}void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
{int i,b;for(b = 0; b < batch; ++b){for(i = 0; i < n; ++i){bias_updates[i] += sum_array(delta+size*(i+b*n), size);}}
}void forward_convolutional_layer(convolutional_layer l, network net)
{int i, j;fill_cpu(l.outputs*l.batch, 0, l.output, 1);if(l.xnor){binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);swap_binary(&l);binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);net.input = l.binary_input;}int m = l.n/l.groups;int k = l.size*l.size*l.c/l.groups;int n = l.out_w*l.out_h;for(i = 0; i < l.batch; ++i){for(j = 0; j < l.groups; ++j){float *a = l.weights + j*l.nweights/l.groups;float *b = net.workspace;float *c = l.output + (i*l.groups + j)*n*m;float *im =  net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;if (l.size == 1) {b = im;} else {im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);}gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);}}if(l.batch_normalize){forward_batchnorm_layer(l, net);} else {add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);}activate_array(l.output, l.outputs*l.batch, l.activation);if(l.binary || l.xnor) swap_binary(&l);
}void backward_convolutional_layer(convolutional_layer l, network net)
{int i, j;int m = l.n/l.groups;int n = l.size*l.size*l.c/l.groups;int k = l.out_w*l.out_h;gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);if(l.batch_normalize){backward_batchnorm_layer(l, net);} else {backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);}for(i = 0; i < l.batch; ++i){for(j = 0; j < l.groups; ++j){float *a = l.delta + (i*l.groups + j)*m*k;float *b = net.workspace;float *c = l.weight_updates + j*l.nweights/l.groups;float *im  = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w;if(l.size == 1){b = im;} else {im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);}gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);if (net.delta) {a = l.weights + j*l.nweights/l.groups;b = l.delta + (i*l.groups + j)*m*k;c = net.workspace;if (l.size == 1) {c = imd;}gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);if (l.size != 1) {col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd);}}}}
}void update_convolutional_layer(convolutional_layer l, update_args a)
{float learning_rate = a.learning_rate*l.learning_rate_scale;float momentum = a.momentum;float decay = a.decay;int batch = a.batch;axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);scal_cpu(l.n, momentum, l.bias_updates, 1);if(l.scales){axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);scal_cpu(l.n, momentum, l.scale_updates, 1);}axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1);scal_cpu(l.nweights, momentum, l.weight_updates, 1);
}image get_convolutional_weight(convolutional_layer l, int i)
{int h = l.size;int w = l.size;int c = l.c/l.groups;return float_to_image(w,h,c,l.weights+i*h*w*c);
}void rgbgr_weights(convolutional_layer l)
{int i;for(i = 0; i < l.n; ++i){image im = get_convolutional_weight(l, i);if (im.c == 3) {rgbgr_image(im);}}
}void rescale_weights(convolutional_layer l, float scale, float trans)
{int i;for(i = 0; i < l.n; ++i){image im = get_convolutional_weight(l, i);if (im.c == 3) {scale_image(im, scale);float sum = sum_array(im.data, im.w*im.h*im.c);l.biases[i] += sum*trans;}}
}image *get_weights(convolutional_layer l)
{image *weights = calloc(l.n, sizeof(image));int i;for(i = 0; i < l.n; ++i){weights[i] = copy_image(get_convolutional_weight(l, i));normalize_image(weights[i]);/*char buff[256];sprintf(buff, "filter%d", i);save_image(weights[i], buff);*/}//error("hey");return weights;
}image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights)
{image *single_weights = get_weights(l);show_images(single_weights, l.n, window);image delta = get_convolutional_image(l);image dc = collapse_image_layers(delta, 1);char buff[256];sprintf(buff, "%s: Output", window);//show_image(dc, buff);//save_image(dc, buff);free_image(dc);return single_weights;
}
  • 修改/src/gemm.c中的cudaThreadSynchronizecudaDeviceSynchronize
  • Makefile中添加,并删除掉低版本的信息
-gencode arch=compute_70,code=[sm_70,compute_70] \
-gencode arch=compute_75,code=[sm_75,compute_75] \
-gencode arch=compute_86,code=[sm_86,compute_86]

最后,用make命令编译

3、编译出错

/bin/sh: 1: nvcc: not found
sudo

修改Makefilenvcc 的路径

NVCC=/usr/local/cuda/bin/nvcc

相关内容

热门资讯

监控摄像头接入GB28181平... 流程简介将监控摄像头的视频在网站和APP中直播,要解决的几个问题是:1&...
Windows10添加群晖磁盘... 在使用群晖NAS时,我们需要通过本地映射的方式把NAS映射成本地的一块磁盘使用。 通过...
protocol buffer... 目录 目录 什么是protocol buffer 1.protobuf 1.1安装  1.2使用...
在Word、WPS中插入AxM... 引言 我最近需要写一些文章,在排版时发现AxMath插入的公式竟然会导致行间距异常&#...
Fluent中创建监测点 1 概述某些仿真问题,需要创建监测点,用于获取空间定点的数据࿰...
educoder数据结构与算法...                                                   ...
MySQL下载和安装(Wind... 前言:刚换了一台电脑,里面所有东西都需要重新配置,习惯了所...
MFC文件操作  MFC提供了一个文件操作的基类CFile,这个类提供了一个没有缓存的二进制格式的磁盘...
有效的括号 一、题目 给定一个只包括 '(',')','{','}'...
【Ctfer训练计划】——(三... 作者名:Demo不是emo  主页面链接:主页传送门 创作初心ÿ...