In the documentation of cuSparse, it stated that the function cusparseXcoo2csr
can also be used to convert the array containing the uncompressed column indices (corresponding to COO format) into an array of column pointers (corresponding to CSC format)
however, I could not find a way to reproduce it. Please see below minimal code:
CMakeLists.txt
cmake_minimum_required(VERSION 3.11)
project(sample)
find_package(CUDA REQUIRED)
add_executable(${PROJECT_NAME} main.cpp)
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_14)
target_include_directories(${PROJECT_NAME} SYSTEM PUBLIC ${CUDA_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES} ${CUDA_cusparse_LIBRARY})
main.cpp
#include <iostream>
#include <vector>
#include <cuda_runtime_api.h>
#include <cusparse_v2.h>
int main(){
// using the matrix as shown in https://docs.nvidia.com/cuda/cusparse/index.html#coo-format
// 1 4 0 0 0
// 0 2 3 0 0
// 5 0 0 7 8
// 0 0 9 0 6
std::vector<int> row;
std::vector<int> col;
std::vector<double> val;
row.emplace_back(0);
row.emplace_back(0);
row.emplace_back(1);
row.emplace_back(1);
row.emplace_back(2);
row.emplace_back(2);
row.emplace_back(2);
row.emplace_back(3);
row.emplace_back(3);
col.emplace_back(0);
col.emplace_back(1);
col.emplace_back(1);
col.emplace_back(2);
col.emplace_back(0);
col.emplace_back(3);
col.emplace_back(4);
col.emplace_back(2);
col.emplace_back(4);
val.emplace_back(1);
val.emplace_back(4);
val.emplace_back(2);
val.emplace_back(3);
val.emplace_back(5);
val.emplace_back(7);
val.emplace_back(8);
val.emplace_back(9);
val.emplace_back(6);
int *d_row;
int *d_col;
double *d_val;
cudaMalloc(reinterpret_cast<void **>(&d_row), row.size() * sizeof(int));
cudaMalloc(reinterpret_cast<void **>(&d_col), col.size() * sizeof(int));
cudaMalloc(reinterpret_cast<void **>(&d_val), val.size() * sizeof(double));
cudaMemcpy(d_row, row.data(), sizeof(int) * row.size(), cudaMemcpyHostToDevice);
cudaMemcpy(d_col, col.data(), sizeof(int) * col.size(), cudaMemcpyHostToDevice);
cudaMemcpy(d_val, val.data(), sizeof(double) * val.size(), cudaMemcpyHostToDevice);
cusparseHandle_t handle;
cusparseCreate(&handle);
cusparseMatDescr_t descr;
cusparseCreateMatDescr(&descr);
cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
cusparseMatDescr_t descr_out;
cusparseCreateMatDescr(&descr_out);
cusparseSetMatType(descr_out, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr_out, CUSPARSE_INDEX_BASE_ZERO);
int *d_row_csr;
cudaMalloc(reinterpret_cast<void **>(&d_row_csr), (4 + 1) * sizeof(int));
cusparseXcoo2csr(handle, d_row, 9, 4, d_row_csr, CUSPARSE_INDEX_BASE_ZERO);
std::vector<int> row_csr(4 + 1);
cudaMemcpy(row_csr.data(), d_row_csr, sizeof(int) * (4 + 1), cudaMemcpyDeviceToHost);
std::cout << "row" << std::endl;
for (int i : row_csr){
std::cout << i << std::endl; // prints 0 2 4 7 9 as expected
}
// however when I try to compress the column the same way...
int *d_col_csc;
cudaMalloc(reinterpret_cast<void **>(&d_col_csc), (5 + 1) * sizeof(int));
cusparseXcoo2csr(handle, d_col, 9, 5, d_col_csc, CUSPARSE_INDEX_BASE_ZERO);
std::vector<int> col_csc(5 + 1);
cudaMemcpy(col_csc.data(), d_col_csc, sizeof(int) * (5 + 1), cudaMemcpyDeviceToHost);
std::cout << "col" << std::endl;
for (int i : col_csc){
std::cout << i << std::endl; // prints 0 5 3 8 6 9, shouldn't it be 0 2 4 6 7 9?
}
return 0;
}
As you can see for some reason, the conversion from coo to csc is not right for some reason. I temporarily worked around this problem by calling cusparseXcoo2csr
to do coo to csr conversion, and then call another cusparseDcsr2csc
to convert the csr intermediate result to csc. That's extra computation so I would like to know how to use cusparseXcoo2csr
to directly convert coo to csc as indicated in the documentation.