CUDA сокращение Thrust ключом с кортежем ключом

Я два вектора, и после создания tuple (с zip_iterator) я бы заказать их с sort_by_key, а затем применить reduce_by_key.CUDA сокращение Thrust ключом с кортежем ключом

Но сокращение от ключа не работает, так как оно создает неверный счетчик векторов. Может кто-нибудь мне помочь? Вот мой соответствующий фрагмент кода.

... 
typedef thrust::device_vector<int>::iterator IntIterator; 
typedef thrust::tuple<IntIterator, IntIterator> IteratorTuple; 
typedef thrust::zip_iterator<IteratorTuple> ZipIterator; 
typedef thrust::tuple<int, int> tupla; 
... 
thrust::device_vector <int> documenti(n); 
thrust::device_vector <int> strip(n); 
... 
ZipIterator bufferBegin (thrust::make_tuple(documenti.begin(),strip.begin())); 
ZipIterator bufferEnd (thrust::make_tuple(documenti.end(),strip.end())); 

... 
thrust::sort_by_key(bufferBegin,bufferEnd, counter.begin()); 

thrust::device_vector <tupla> example(n); 

thrust::reduce_by_key(bufferBegin,bufferEnd, counter.begin(), example.begin(), counter.begin()); 

thrust::sort_by_key(counter.begin(), counter.begin()+n, example.begin(),thrust::greater <int>());

источник

2012-06-11 user1413067

Если вы занимаетесь сортировкой по ключевым словам, вам нужны * три * вектора, два для того, чтобы сделать кортеж, а другой - держать ключ для выполнения заказа. – talonmies

Это не проблема сортировки, а проблема уменьшения. когда я делаю команду reduce_by_key, он генерирует неверный вектор результата. Согласно тому, что вы сказали, я не знаю, как я мог решить проблему. Извините за мой английский :) ! – user1413067

Я предоставляю ответ на этот вопрос, чтобы удалить его из списка без ответа.

Ваш вопрос кажется мне не очень понятным. Мое понимание заключается в том, что вы заинтересованы в сокращении ключом с ключами tuple.

Ниже вы можете найти полный рабочий пример. Я надеюсь, что это будет полезно для будущих пользователей.

#include <thrust/device_vector.h> 
#include <thrust/reduce.h> 

// --- Defining key tuple type 
typedef thrust::tuple<int,int> Tuple; 

typedef thrust::host_vector<Tuple>::iterator dIter1; 
typedef thrust::host_vector<float>::iterator dIter2; 

/************************************/ 
/* EQUALITY OPERATOR BETWEEN TUPLES */ 
/************************************/ 
struct BinaryPredicate 
{ 
    __host__ __device__ bool operator() 
         (const Tuple& lhs, const Tuple& rhs) 
    { 
    return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && (thrust::get<1>(lhs) == thrust::get<1>(rhs)); 
    } 
}; 

/********/ 
/* MAIN */ 
/********/ 
int main() 
{ 
    const int N = 7; 

    thrust::host_vector<Tuple> keys_input(N); 
    thrust::host_vector<float> values_input(N); 

    int keys1_input[N]   = {1, 3, 3, 3, 2, 2, 1};   // input keys 1 
    int keys2_input[N]   = {1, 5, 3, 8, 2, 2, 1};   // input keys 2 
    float input_values[N]  = {9., 8., 7., 6., 5., 4., 3.};  // input values 

    for (int i=0; i<N; i++) { 
     keys_input[i] = thrust::make_tuple(keys1_input[i], keys2_input[i]); 
     values_input[i] = input_values[i]; 
    } 

    for (int i=0; i<N; i++) printf("%i %i\n", thrust::get<0>(keys_input[i]), thrust::get<1>(keys_input[i])); 

    thrust::host_vector<Tuple> keys_output(N); 
    thrust::host_vector<float> values_output(N); 

    thrust::pair<dIter1, dIter2> new_end; 

    new_end = thrust::reduce_by_key(keys_input.begin(), 
            keys_input.end(), 
            values_input.begin(), 
            keys_output.begin(), 
            values_output.begin(), 
            BinaryPredicate(), 
            thrust::plus<float>()); 

    int Nkeys = new_end.first - keys_output.begin(); 

    printf("\n\n"); 
    for (int i = 0; i < Nkeys; i++) printf("%i; %f\n", i, values_output[i]); 

    printf("\n\n"); 
    for (int i = 0; i < Nkeys; i++) printf("%i %i\n", thrust::get<0>(keys_output[i]), thrust::get<1>(keys_output[i])); 

    return 0; 
}

РЕДАКТИРОВАТЬ

приведенный выше пример работал называют host_vector-х гг. Ниже приведен полностью обработанный пример, рассматривающий случай, когда векторы ключей и значений являются регулярными массивами ed.

#include <thrust/device_vector.h> 
#include <thrust/reduce.h> 

#include "Utilities.cuh" 

// --- Defining key tuple type 
typedef thrust::tuple<int, int> Tuple; 

typedef thrust::device_vector<Tuple>::iterator dIter1; 
typedef thrust::device_vector<float>::iterator dIter2; 

/************************************/ 
/* EQUALITY OPERATOR BETWEEN TUPLES */ 
/************************************/ 
struct BinaryPredicate 
{ 
    __host__ __device__ bool operator() 
     (const Tuple& lhs, const Tuple& rhs) 
    { 
     return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && (thrust::get<1>(lhs) == thrust::get<1>(rhs)); 
    } 
}; 

/********/ 
/* MAIN */ 
/********/ 
int main() 
{ 
    const int N = 7; 

    // --- Keys and input values on the host: allocation and definition 
    int h_keys1_input[N] = { 1, 3, 3, 3, 2, 2, 1 };           // --- Input keys 1 - host side 
    int h_keys2_input[N] = { 1, 5, 3, 8, 2, 2, 1 };           // --- Input keys 2 - host side 
    float h_input_values[N] = { 9., 8., 7., 6., 5., 4., 3. };        // --- Input values - host side 

    // --- Keys and input values on the device: allocation 
    int *d_keys1_input;  gpuErrchk(cudaMalloc(&d_keys1_input, N * sizeof(int)));   // --- Input keys 1 - device side 
    int *d_keys2_input;  gpuErrchk(cudaMalloc(&d_keys2_input, N * sizeof(int)));   // --- Input keys 2 - device side 
    float *d_input_values; gpuErrchk(cudaMalloc(&d_input_values, N * sizeof(float)));  // --- Input values - device side 

    // --- Keys and input values: host -> device 
    gpuErrchk(cudaMemcpy(d_keys1_input, h_keys1_input, N * sizeof(int), cudaMemcpyHostToDevice)); 
    gpuErrchk(cudaMemcpy(d_keys2_input, h_keys2_input, N * sizeof(int), cudaMemcpyHostToDevice)); 
    gpuErrchk(cudaMemcpy(d_input_values, h_input_values, N * sizeof(float), cudaMemcpyHostToDevice)); 

    // --- From raw pointers to device_ptr 
    thrust::device_ptr<int> dev_ptr_keys1 = thrust::device_pointer_cast(d_keys1_input); 
    thrust::device_ptr<int> dev_ptr_keys2 = thrust::device_pointer_cast(d_keys2_input); 
    thrust::device_ptr<float> dev_ptr_values = thrust::device_pointer_cast(d_input_values); 

    // --- Declare outputs 
    thrust::device_vector<Tuple> d_keys_output(N); 
    thrust::device_vector<float> d_values_output(N); 

    thrust::pair<dIter1, dIter2> new_end; 

    auto begin = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1, dev_ptr_keys2)); 
    auto end = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1 + N, dev_ptr_keys2 + N)); 

    new_end = thrust::reduce_by_key(begin, 
            end, 
            dev_ptr_values, 
            d_keys_output.begin(), 
            d_values_output.begin(), 
            BinaryPredicate(), 
            thrust::plus<float>()); 

    int Nkeys = new_end.first - d_keys_output.begin(); 

    printf("\n\n"); 
    for (int i = 0; i < Nkeys; i++) { 
     float output = d_values_output[i]; 
     printf("%i; %f\n", i, output); 
    } 

    thrust::host_vector<Tuple> h_keys_output(d_keys_output); 
    printf("\n\n"); 
    for (int i = 0; i < Nkeys; i++) { 
     int key1 = thrust::get<0>(h_keys_output[i]); 
     int key2 = thrust::get<1>(h_keys_output[i]); 
     printf("%i %i\n", key1, key2); 
    } 

    return 0; 
}

источник

2015-04-30 17:51:59 JackOLantern

CUDA сокращение Thrust ключом с кортежем ключом

ответ

Смежные вопросы