# Setting up env variables for cleaner command line commands.
import osprint("Please replace the variable with your key.")
%env KEY=your_key
%env GPU_INDEX=0
%env USER_EXPERIMENT_DIR=/your_path/CV
%env DATA_DOWNLOAD_DIR=/your_path/CV/data%env LOCAL_PROJECT_DIR=/your_path/CV
os.environ["LOCAL_DATA_DIR"] = os.path.join(os.getenv("LOCAL_PROJECT_DIR", os.getcwd()), "data")
os.environ["LOCAL_EXPERIMENT_DIR"] = os.path.join(os.getenv("LOCAL_PROJECT_DIR", os.getcwd()))os.environ["LOCAL_SPECS_DIR"] = os.path.join(os.getenv("NOTEBOOK_ROOT", os.getcwd()),"specs"
)
%env SPECS_DIR=/your_path/7th/specs!echo $LOCAL_SPECS_DIR
!ls -rlt $LOCAL_SPECS_DIR
2.将系统目录映射到Tao Docker
代码如下(示例):
# Mapping up the local directories to the TAO docker.
import json
mounts_file = os.path.expanduser("~/.tao_mounts.json")# Define the dictionary with the mapped drives
drive_map = {"Mounts": [# Mapping the data directory{"source": os.environ["LOCAL_PROJECT_DIR"],"destination": "/home/meng/7thSkyHackathon/CV/"},# Mapping the specs directory.{"source": os.environ["LOCAL_SPECS_DIR"],"destination": os.environ["SPECS_DIR"]},]
}# Writing the mounts file.
with open(mounts_file, "w") as mfile:json.dump(drive_map, mfile, indent=4)
# 下载您所选择的预训练模型
!ngc registry model download-version nvidia/tao/pretrained_object_detection:resnet18 --dest $LOCAL_EXPERIMENT_DIR/pretrained_resnet18
#查看预训练模型是否下载成功
print("Check that model is downloaded into dir.")
!ls -l $LOCAL_EXPERIMENT_DIR/pretrained_resnet18/pretrained_object_detection_vresnet18
5.定义训练的参数
修改target_class_mapping为您需要训练的类别
此处的路径基本不需要修改,除非您自己改动过。
batch_size_per_gpu定义了batch_size大小,如果您的GPU显存不是很大,建议可以调整小一点,否则可能出现out of memory的错误
num_epochs定义了您会训练多少圈,如果您第一次训练,建议不要小于80
validation_period_during_training定义了训练多少圈验证一下,可以直观地看出您训练的效果变化,可以定义5 or 10
print("To run with multigpu, please change --gpus based on the number of available GPUs in your machine.")
!tao ssd train --gpus 1 --gpu_index=$GPU_INDEX \-e $SPECS_DIR/ssd_train_resnet18_kitti.txt \-r $USER_EXPERIMENT_DIR/experiment_dir_unpruned_final \-k $KEY \-m $USER_EXPERIMENT_DIR/7th/pretrained_resnet18/pretrained_object_detection_vresnet18/resnet_18.hdf5
#如果是上面没训练完,中途断开了,可以利用这里的命令在上面的基础上继续训练
print("To resume from checkpoint, please uncomment and run this instead. Change last two arguments accordingly.")
!tao ssd train --gpus 1 --gpu_index=$GPU_INDEX \-e $SPECS_DIR/ssd_train_resnet18_kitti.txt \-r $USER_EXPERIMENT_DIR/experiment_dir_unpruned \-k $KEY \-m $USER_EXPERIMENT_DIR/experiment_dir_unpruned/weights/ssd_resnet18_epoch_020.tlt \--initial_epoch 20
#查看您训练的模型
print('Model for each epoch:')
print('---------------------')
!ls -ltrh $USER_EXPERIMENT_DIR/experiment_dir_unpruned_final/weights
# Simple grid visualizer
#!pip3 install matplotlib==3.3.3
import matplotlib.pyplot as plt
import os
from math import ceil
valid_image_ext = ['.jpg', '.png', '.jpeg', '.ppm']def visualize_images(image_dir, num_cols=4, num_images=10):output_path = os.path.join(os.environ['LOCAL_EXPERIMENT_DIR'], image_dir)num_rows = int(ceil(float(num_images) / float(num_cols)))f, axarr = plt.subplots(num_rows, num_cols, figsize=[80,30])f.tight_layout()a = [os.path.join(output_path, image) for image in os.listdir(output_path) if os.path.splitext(image)[1].lower() in valid_image_ext]for idx, img_path in enumerate(a[:num_images]):col_id = idx % num_colsrow_id = idx // num_colsimg = plt.imread(img_path)axarr[row_id, col_id].imshow(img)
!ls $USER_EXPERIMENT_DIR/ssd_infer_images
# Visualizing the sample images.
OUTPUT_PATH = '/home/meng/7thSkyHackathon/CV/ssd_infer_images' # relative path from $USER_EXPERIMENT_DIR.
COLS = 3 # number of columns in the visualizer grid.
IMAGES = 21 # number of images to visualize.visualize_images(OUTPUT_PATH, num_cols=COLS, num_images=IMAGES)
OUTPUT_PATH = 'ssd_infer_images' # relative path from $USER_EXPERIMENT_DIR.
COLS = 3 # number of columns in the visualizer grid.
IMAGES = 9 # number of images to visualize.visualize_images(OUTPUT_PATH, num_cols=COLS, num_images=IMAGES)