点群の並進移動&回転をCUDAで実装してみたので紹介します。
【ROS+CUDA】点群の並進移動&回転をCUDAで実装してみた
点群の並進移動&回転は、PCL(Point Cloud Library)を使えば簡単に実装することができます。
この記事では、あえてCUDAを使って実装してみたいと思います。CUDAプログラミングの入門は、以下の記事を参考にしていただければと思います。
目次
仕様
Subscribeした点群に対して、並進/回転移動を適用してPublishする仕様とします。
以下に、実装における、入出力とパラメータを記載します。
入力(Subscribe)
- sensor_msgs::PointCloud2:入力点群
出力(Publish)
- sensor_msgs::PointCloud2:変換された点群
パラメータ
- double:x軸方向の移動距離 [m]
- double:y軸方向の移動距離 [m]
- double:z軸方向の移動距離 [m]
- double:ロール回転角度 [deg]
- double:ピッチ回転角度 [deg]
- double:ヨー回転角度 [deg]
環境構築
今回のプログラムを動かすためには、以下をインストールする必要があります。
- ROS
- CUDA
(Dockerを使う場合)
Dockerを使う場合は、以下のDockerfileを例として参考にしてください。
########## Pull ########## FROM ros:noetic ########## Non-interactive ########## ENV DEBIAN_FRONTEND=noninteractive ########## ROS setup ########## RUN mkdir -p ~/catkin_ws/src && \ cd ~/catkin_ws && \ /bin/bash -c "source /opt/ros/noetic/setup.bash; catkin_make" && \ echo "source /opt/ros/noetic/setup.bash" >> ~/.bashrc && \ echo "source ~/catkin_ws/devel/setup.bash" >> ~/.bashrc && \ echo "export ROS_WORKSPACE=~/catkin_ws" >> ~/.bashrc ########## pc_transform_cuda ########## RUN apt-get update && \ apt-get install -y \ ros-noetic-rviz \ nvidia-cuda-toolkit && \ apt-get remove -y gcc && \ ln -s /usr/bin/gcc-8 /usr/bin/gcc && \ ln -s /usr/bin/g++-8 /usr/bin/g++ && \ ln -s /usr/bin/gcc-8 /usr/bin/cc && \ ln -s /usr/bin/g++-8 /usr/bin/c++
実装
ディレクトリ構造
~/catkin_ws/src/pc_transform_cuda/
┣ src/
┃ ┣ cuda_process.cu
┃ ┗ pc_transform_cuda.cpp
┣ launch/
┃ ┗ transform.launch
┗ CMakeLists.txt
CMakeLists.txt
cmake_minimum_required(VERSION 2.8.3) project(pc_transform_cuda) add_compile_options(-std=c++14 -O2 -g -Wall) find_package(catkin REQUIRED COMPONENTS roscpp ) find_package(PCL) find_package(CUDA) catkin_package( ) include_directories( include ${catkin_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ) cuda_add_executable(pc_transform_cuda src/pc_transform_cuda.cpp src/cuda_process.cu ) target_link_libraries(pc_transform_cuda ${catkin_LIBRARIES} ${CUDA_LIBRARIES} )
cuda_process.cu
#include <iostream> #include <assert.h> #include <sensor_msgs/PointCloud2.h> #include <sensor_msgs/point_field_conversion.h> typedef struct{ float x; float y; float z; }Point; typedef struct{ float m00, m10, m20, m01, m11, m21, m02, m12, m22; }Mat; inline cudaError_t checkCudaError(cudaError_t result) { if (result != cudaSuccess) { fprintf(stderr, "CUDA Runtime Error: %s\n", cudaGetErrorString(result)); assert(result == cudaSuccess); } return result; } inline void checkCudaKernelError(void) { cudaError_t kernel_error = cudaGetLastError(); if (kernel_error != cudaSuccess) { fprintf(stderr, "CUDA Runtime Error: %s\n", cudaGetErrorString(kernel_error)); assert(kernel_error == cudaSuccess); } } void pcRosToPtr(const sensor_msgs::PointCloud2& pc_ros, Point* pc_ptr){ int x_idx, y_idx, z_idx; for(size_t i = 0; i < pc_ros.fields.size(); ++i){ if(pc_ros.fields[i].name == "x") x_idx = i; else if(pc_ros.fields[i].name == "y") y_idx = i; else if(pc_ros.fields[i].name == "z") z_idx = i; } int x_offset = pc_ros.fields[x_idx].offset; int y_offset = pc_ros.fields[y_idx].offset; int z_offset = pc_ros.fields[z_idx].offset; uint8_t x_datatype = pc_ros.fields[x_idx].datatype; uint8_t y_datatype = pc_ros.fields[y_idx].datatype; uint8_t z_datatype = pc_ros.fields[z_idx].datatype; for (size_t i = 0; i < pc_ros.height * pc_ros.width; ++i){ pc_ptr[i].x = sensor_msgs::readPointCloud2BufferValue<float>(&pc_ros.data[i * pc_ros.point_step + x_offset], x_datatype); pc_ptr[i].y = sensor_msgs::readPointCloud2BufferValue<float>(&pc_ros.data[i * pc_ros.point_step + y_offset], y_datatype); pc_ptr[i].z = sensor_msgs::readPointCloud2BufferValue<float>(&pc_ros.data[i * pc_ros.point_step + z_offset], z_datatype); } } float degToRad(float deg) { float rad = deg / 180.0 * M_PI; rad = atan2(sin(rad), cos(rad)); return rad; } Mat getRotMatrix(float r_deg, float p_deg, float y_deg) { float r_rad = degToRad(r_deg); float p_rad = degToRad(p_deg); float y_rad = degToRad(y_deg); Mat rot; rot.m00 = cos(p_rad)*cos(y_rad); rot.m10 = sin(r_rad)*sin(p_rad)*cos(y_rad) - cos(r_rad)*sin(y_rad); rot.m20 = cos(r_rad)*sin(p_rad)*cos(y_rad) + sin(r_rad)*sin(y_rad); rot.m01 = cos(p_rad)*sin(y_rad); rot.m11 = sin(r_rad)*sin(p_rad)*sin(y_rad) + cos(r_rad)*cos(y_rad); rot.m21 = cos(r_rad)*sin(p_rad)*sin(y_rad) - sin(r_rad)*cos(y_rad); rot.m02 = -sin(p_rad); rot.m12 = sin(r_rad)*cos(p_rad); rot.m22 = cos(r_rad)*cos(p_rad); return rot; } __global__ void transformPcCuda(Point* pc_ptr, size_t num_points, float x_m, float y_m, float z_m, Mat rot) { size_t index = threadIdx.x + blockIdx.x * blockDim.x; size_t grid_stride = gridDim.x * blockDim.x; for(size_t i = index; i < num_points; i += grid_stride){ Point tmp; tmp.x = rot.m00 * pc_ptr[i].x + rot.m10 * pc_ptr[i].y + rot.m20 * pc_ptr[i].z + x_m; tmp.y = rot.m01 * pc_ptr[i].x + rot.m11 * pc_ptr[i].y + rot.m21 * pc_ptr[i].z + y_m; tmp.z = rot.m02 * pc_ptr[i].x + rot.m12 * pc_ptr[i].y + rot.m22 * pc_ptr[i].z + z_m; pc_ptr[i] = tmp; } } void pcPtrToRos(Point* pc_ptr, sensor_msgs::PointCloud2& pc_ros){ for(size_t i = 0; i < pc_ros.height * pc_ros.width; ++i){ memcpy(&pc_ros.data[i * pc_ros.point_step + pc_ros.fields[0].offset], &pc_ptr[i].x, sizeof(float)); memcpy(&pc_ros.data[i * pc_ros.point_step + pc_ros.fields[1].offset], &pc_ptr[i].y, sizeof(float)); memcpy(&pc_ros.data[i * pc_ros.point_step + pc_ros.fields[2].offset], &pc_ptr[i].z, sizeof(float)); } } void transformPc(sensor_msgs::PointCloud2& pc_ros, float x_m, float y_m, float z_m, float r_deg, float p_deg, float y_deg) { /*device query*/ int device_id; checkCudaError( cudaGetDevice(&device_id) ); int num_sm; checkCudaError( cudaDeviceGetAttribute(&num_sm, cudaDevAttrMultiProcessorCount, device_id) ); /*memory setting*/ int num_points = pc_ros.height * pc_ros.width; int bytes = num_points * sizeof(Point); Point* pc_ptr; checkCudaError( cudaMallocManaged(&pc_ptr, bytes) ); checkCudaError( cudaMemPrefetchAsync(pc_ptr, bytes, device_id) ); /*cpu*/ pcRosToPtr(pc_ros, pc_ptr); /*gpu*/ int num_blocks = num_sm * 32; int threads_per_block = 1024; transformPcCuda<<<num_blocks, threads_per_block>>>(pc_ptr, num_points, x_m, y_m, z_m, getRotMatrix(r_deg, p_deg, y_deg)); checkCudaKernelError(); checkCudaError( cudaDeviceSynchronize() ); /*cpu*/ checkCudaError( cudaMemPrefetchAsync(pc_ptr, bytes, cudaCpuDeviceId) ); pcPtrToRos(pc_ptr, pc_ros); checkCudaError( cudaFree(pc_ptr) ); }
pc_transform_cuda.cpp
#include <ros/ros.h> #include <sensor_msgs/PointCloud2.h> #include <sensor_msgs/point_cloud_conversion.h> #include "cuda_process.h" class PcTransform{ private: /*node handle*/ ros::NodeHandle nh_; ros::NodeHandle nh_private_; /*subscriber*/ ros::Subscriber sub_; /*publisher*/ ros::Publisher pub_; /*parameter*/ float x_m_, y_m_, z_m_; float r_deg_, p_deg_, y_deg_; public: PcTransform(); void callback(const sensor_msgs::PointCloud2ConstPtr& msg); void publication(void); }; PcTransform::PcTransform() : nh_private_("~") { /*parameter*/ nh_private_.param("x_m", x_m_, float(0.0)); std::cout << "x_m_ = " << x_m_ << std::endl; nh_private_.param("y_m", y_m_, float(0.0)); std::cout << "y_m_ = " << y_m_ << std::endl; nh_private_.param("z_m", z_m_, float(0.0)); std::cout << "z_m_ = " << z_m_ << std::endl; nh_private_.param("r_deg", r_deg_, float(0.0)); std::cout << "r_deg_ = " << r_deg_ << std::endl; nh_private_.param("p_deg", p_deg_, float(0.0)); std::cout << "p_deg_ = " << p_deg_ << std::endl; nh_private_.param("y_deg", y_deg_, float(0.0)); std::cout << "y_deg_ = " << y_deg_ << std::endl; /*subscriber*/ sub_ = nh_.subscribe("/point_cloud", 1, &PcTransform::callback, this); /*publisher*/ pub_ = nh_.advertise<sensor_msgs::PointCloud2>("/point_cloud/transformed", 1); } void PcTransform::callback(const sensor_msgs::PointCloud2ConstPtr& msg) { ros::Time t_start = ros::Time::now(); sensor_msgs::PointCloud2 pc = *msg; transformPc(pc, x_m_, y_m_, z_m_, r_deg_, p_deg_, y_deg_); pub_.publish(pc); std::cout << "cuda time: " << (ros::Time::now() - t_start).toSec() << " [s]" << std::endl; } int main(int argc, char** argv) { ros::init(argc, argv, "pc_transform"); PcTransform pc_transform; ros::spin(); }
transform.launch
<launch> <!-- global rosparam --> <param name="use_sim_time" value="true"/> <!-- rosbag --> <node pkg="rosbag" type="play" name="player" args="--clock $(env HOME)/rosbag/demo.bag"/> <!-- rviz --> <node pkg="rviz" type="rviz" name="rviz" args="-d $(find pc_transform_cuda)/rviz_config/demo.rviz"/> <!-- main --> <node pkg="pc_transform_cuda" name="pc_transform_cuda" type="pc_transform_cuda" output="screen"> <remap from="/point_cloud" to="/demo_points"/> <remap from="/point_cloud/transformed" to="/demo_points/transformed"/> <param name="x_m" type="double" value="1.0"/> <param name="y_m" type="double" value="2.0"/> <param name="z_m" type="double" value="3.0"/> <param name="r_deg" type="double" value="10.0"/> <param name="p_deg" type="double" value="20.0"/> <param name="y_deg" type="double" value="30.0"/> </node> </launch>
使い方
ビルド
cd ~/catkin_ws catkin_make
実行
roslaunch pc_transform_cuda transform.launch
さいごに
3次元点群の並進/回転移動を、あえてCUDAで実装してみました。
参考になれば幸いです。
関連記事
Ad.
コメント