I am trying to implement the Karatsuba algorithm on Cuda (Geforce 285)

But I am stuck on this error:

Quote
calling a host function from a __device__/__global__ function is not allowed


Here the code:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include "applink.c"
#include <string.h>
#include <cuda_runtime.h>
#include <sstream>
#include "e_os.h"
#include <openssl/bn.h>
#include <openssl/bio.h>
#include <openssl/rand.h>
#include <openssl/x509.h>
#include <openssl/err.h>
#include "Karatsuba.h"

__global__ void mul(BIGNUM *z0, BIGNUM a, BIGNUM b, BN_CTX *ctx)        {

        //BN_mul(z0, &a, &b, ctx);
}

//{}
int main( void )        {

        BIGNUM *x, *y, *x1, *y1, *x0, *y0, *x_temp, *y_temp;

        BIGNUM *z0, *dev_z0;

        BIO *out;

        out = BIO_new_fp(stdout, BIO_NOCLOSE);

        BN_CTX *ctx;


        x=BN_new();

        y=BN_new();

        x1=BN_new();

        y1=BN_new();

        x0=BN_new();

        y0=BN_new();

        x_temp=BN_new();

        y_temp=BN_new();

        z0=BN_new();

        dev_z0=BN_new();

        if(cudaMalloc( (void**) &dev_z0,sizeof(BIGNUM) ) != cudaSuccess )

                printf("cudaMalloc error!\n");

        ctx=BN_CTX_new();

        BN_rand(x, RAND_NUM_BIT_LENGHT, 0, 0);

        BN_rand(y, RAND_NUM_BIT_LENGHT, 0, 0);

        printf("x = ");

        BN_print(out, x);

        printf("\ny = ");

        BN_print(out, y);

        printf("\n");

        BN_rshift(x1, x, RAND_NUM_BIT_LENGHT/2);

        BN_rshift(y1, y, RAND_NUM_BIT_LENGHT/2);

        printf("x1 = ");

        BN_print(out, x1);

        printf("\ny1 = ");

        BN_print(out, y1);

        printf("\n");

        BN_lshift(x_temp, x1, RAND_NUM_BIT_LENGHT/2);

        BN_lshift(y_temp, y1, RAND_NUM_BIT_LENGHT/2);


        printf("x_temp = ");

        BN_print(out, x_temp);

        printf("\ny_temp = ");

        BN_print(out, y_temp);

        printf("\n");

        BN_sub(x0, x, x_temp);

        BN_sub(y0, y, y_temp);

        printf("x0 = ");

        BN_print(out, x0);

        printf("\ny0 = ");

        BN_print(out, y0);

        printf("\n");


        mul<<<1,1>>>(dev_z0, *x0, *y0, ctx);


        BN_free(x);

        BN_free(y);

        BN_free(x1);

        BN_free(y1);

        BN_free(x0);

        BN_free(y0);

        BN_free(x_temp);

        BN_free(y_temp);

        BN_CTX_free(ctx);

        return 0;
}



Should I copy on the device also a,b and ctx before executing the BN_mul?

Reply via email to