I'm trying to run the cudaSift
in CudaSift project with a Nvidia Tesla M2090 on a remote machine with CUDA 7.5. The machine has 4 of these GPUs, but I'm quite sure from the debuggining that the initialization is done correctly, anyway this is the code:
void InitCuda(int devNum)
{
int nDevices;
cudaGetDeviceCount(&nDevices);
if (!nDevices) {
std::cerr << "No CUDA devices available" << std::endl;
return;
}
devNum = std::min(nDevices-1, devNum);
deviceInit(devNum);
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, devNum);
printf("Device Number: %d\n", devNum);
printf(" Device name: %s\n", prop.name);
printf(" Memory Clock Rate (MHz): %d\n", prop.memoryClockRate/1000);
printf(" Memory Bus Width (bits): %d\n", prop.memoryBusWidth);
printf(" Peak Memory Bandwidth (GB/s): %.1f\n\n",
2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
}
I use cmake to generate the makefile, and the make
file generate cudaSift
without any error.
Anyway, when I run it, the following error is returned:
safeCall() Runtime API error in file </ghome/rzhengac/Downloads/CudaSift-Maxwell/cudaSiftH.cu>, line 42 : invalid device symbol.
At line 42 of cudaSiftH.cu
it is:
safeCall(cudaMemcpyToSymbol(d_PointCounter, &totPts, sizeof(int)));
Where:
__device__ unsigned int d_PointCounter[1];
int totPts = 0;
This is the code printed by InitCuda
(which make quite sure that everything is fine during initialization):
Device Number: 0
Device name: Tesla M2090
Memory Clock Rate (MHz): 1848
Memory Bus Width (bits): 384
Peak Memory Bandwidth (GB/s): 177.4
SOLUTION:
As suggested in the comments, I was compiling with the wrong architecture: I had to change each sm_35
in CMakeList.txt
into sm_20
, and this is the result:
cmake_minimum_required(VERSION 2.6)
project(cudaSift)
set(cudaSift_VERSION_MAJOR 2)
set(cudaSift_VERSION_MINOR 0)
set(cudaSift_VERSION_PATCH 0)
set(CPACK_PACKAGE_VERSION_MAJOR "${cudaSift_VERSION_MAJOR}")
set(CPACK_PACKAGE_VERSION_MINOR "${cudaSift_VERSION_MINOR}")
set(CPACK_PACKAGE_VERSION_PATCH "${cudaSift_VERSION_PATCH}")
set(CPACK_GENERATOR "ZIP")
include(CPack)
find_package(OpenCV REQUIRED)
find_package(CUDA)
if (NOT CUDA_FOUND)
message(STATUS "CUDA not found. Project will not be built.")
endif(NOT CUDA_FOUND)
if (WIN32)
set(EXTRA_CXX_FLAGS "/DVERBOSE /D_CRT_SECURE_NO_WARNINGS ")
list(APPEND CUDA_NVCC_FLAGS "-arch=sm_20;--compiler-options;-O2;-DVERBOSE")
endif()
if (UNIX)
if (APPLE)
set(EXTRA_CXX_FLAGS "-DVERBOSE -msse2")
list(APPEND CUDA_NVCC_FLAGS "-arch=sm_20;--compiler-options;-O2;-DVERBOSE")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -msse2 ")
list(APPEND CUDA_NVCC_FLAGS "-lineinfo;--compiler-options;-O2;-DVERBOSE")
endif()
endif()
set(cuda_sources
# dynamic.cu
cudaImage.cu
cudaImage.h
cudaSiftH.cu
cudaSiftH.h
matching.cu
cudaSiftD.h
cudaSift.h
cudautils.h
)
set(sources
geomFuncs.cpp
mainSift.cpp
)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
)
SET(CUDA_SEPARABLE_COMPILATION ON)
cuda_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_20)
set_target_properties(cudasift PROPERTIES
COMPILE_FLAGS "${EXTRA_CXX_FLAGS}"
)
target_link_libraries(cudasift
/usr/local/cuda/lib64/libcudadevrt.a ${OpenCV_LIBS}
)
install(FILES
${cuda_sources}
${sources}
cudaSiftD.cu
CMakeLists.txt
Copyright.txt
DESTINATION .
)
install(FILES data/left.pgm data/righ.pgm
DESTINATION data
)