lps_683 阅读(57) 评论(0)

要把OpenCV的源码改写成CUDA,那么在改写成并行计算之前,我们需要保证CUDA C中(特别是CUDA中的核函数)能够支持OpenCV定义的类型,否则我们只有重写。

所以在将OpenCV源码改写成CUDA并行计算之前,我首先将OpenCV源码改写成了普通的C语言版本------这里的改写指的是:一些结构体的重写、Mat数组和普通一维二维数组的转换等等,具体情况下面的代码将见分晓。

这次改写的是ComputeGradient函数

 

注意:这里保留了Mat img,因为这里到时候移植到CUDA C中要改写成PtrstepSZ变量,这里就不费功夫把他改成数组了。

 

#include <cv.h>
#include <highgui.h>
#include <cxcore.h>
#include <ml.h>
#include <math.h>
#include <iostream>
#include <fstream>
#include"stdio.h"
using namespace std;
using namespace cv;

static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
enum { BLOCK_SIZE = 1024 };
int nbins = 9;

#define IMG_W 64
#define IMG_H 128
//**********************************************************************************************************
static void FastAtan2_32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees=true )
{
    int i = 0;
    float scale = angleInDegrees ? 1 : (float)(CV_PI/180);

    for( ; i < len; i++ )
    {
        float x = X[i], y = Y[i];
		//cout << "x: " << x << " "<< " y: " << y << endl;
        float ax = std::abs(x), ay = std::abs(y);
        float a, c, c2;
        if( ax >= ay )
        {
            c = ay/(ax + (float)DBL_EPSILON);
            c2 = c*c;
            a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
        }
        else
        {
            c = ax/(ay + (float)DBL_EPSILON);
            c2 = c*c;
            a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
        }
        if( x < 0 )
            a = 180.f - a;
        if( y < 0 )
            a = 360.f - a;
        angle[i] = (float)(a*scale);
		//cout << " angle: " << angle[i] << endl;
    }
}

static void Magnitude_32f(const float* x, const float* y, float* mag, int len)
{
    int i = 0;

    for( ; i < len; i++ )
    {
        float x0 = x[i], y0 = y[i];
        mag[i] = std::sqrt(x0*x0 + y0*y0);
		//cout << "mag: " << mag[i] << endl;
    }
}

void mycartToPolar( InputArray src1, InputArray src2,
                  OutputArray dst1, OutputArray dst2, bool angleInDegrees )
{
    Mat X = src1.getMat(), Y = src2.getMat();
    int type = X.type(), depth = X.depth(), cn = X.channels();
    CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F));
    dst1.create( X.dims, X.size, type );
    dst2.create( X.dims, X.size, type );
    Mat Mag = dst1.getMat(), Angle = dst2.getMat();

    const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
    uchar* ptrs[4];
    NAryMatIterator it(arrays, ptrs);
    cv::AutoBuffer<float> _buf;
    float* buf[2] = {0, 0};
    int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
	//cout << "it size: " << it.size << endl;
	//cout << "blocksize: " << blockSize << endl;
	//cout << "BLOCK_SIZE: " << BLOCK_SIZE << endl;
	//cout << "total :  " << total << endl;
    size_t esz1 = X.elemSize1();
	//cout << "esz: " << esz1 << endl;
	//cout << "nplanes: " << it.nplanes << endl;  
    if( depth == CV_64F )
    {
        _buf.allocate(blockSize*2);
        buf[0] = _buf;
        buf[1] = buf[0] + blockSize;
    }
		//cout << total << endl;
	//cout << blockSize << endl;

    for( size_t i = 0; i < it.nplanes; i++, ++it )
    {
        for( j = 0; j < total; j += blockSize )
        {
            int len = std::min(total - j, blockSize);
			
            if( depth == CV_32F )
            {
                const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
                float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
                Magnitude_32f( x, y, mag, len );
                FastAtan2_32f( y, x, angle, len, angleInDegrees );
            }
            ptrs[0] += len*esz1;
            ptrs[1] += len*esz1;
            ptrs[2] += len*esz1;
            ptrs[3] += len*esz1;
        }
    }
}


void newcartToPolar( float* src1, float* src2, float* dst1, float* dst2, int length, bool angleInDegrees )
{
	//dst1 = (float*)malloc(sizeof(float) * length);
	//dst2 = (float*)malloc(sizeof(float) * length);
	int j, k, total = length, blockSize = MIN(total, (BLOCK_SIZE));
	size_t esz1 = 4;
	size_t nplanes = 1;
	float* Dx = src1;
	float* Dy = src2;
	float* Mag = dst1;
	float* Angle = dst2;
	//for(int k = 0; k < IMG_W; k ++)
	//{
		//cout << Dx[k] << endl;
	//}
	//cout << total << endl;
	//cout << blockSize << endl;
	  for( size_t i = 0; i < nplanes; i++ )
    {
        for( j = 0; j < total; j += blockSize )
        {
			int len = std::min(total - j, blockSize);
			Magnitude_32f( Dx, Dy, Mag, len );
			FastAtan2_32f( Dy, Dx, Angle, len, angleInDegrees );
			Dx += len;
			Dy += len;
			Mag += len;
			Angle += len;
		}
    }
}
//****************************************************************************************************

void newcomputeGradient(const Mat& img, Mat& grad, Mat& qangle,
                                    Size paddingTL, Size paddingBR) 
{
    CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );

    Size gradsize(img.cols + paddingTL.width + paddingBR.width,
                  img.rows + paddingTL.height + paddingBR.height);
    //grad.create(gradsize, CV_32FC2);  // <magnitude*(1-alpha), magnitude*alpha>
    //qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation
    Size wholeSize;
    Point roiofs;
    img.locateROI(wholeSize, roiofs);

    int i, x, y;
    int cn = img.channels();

    Mat_<float> _lut(1, 256);
    const float* lut = &_lut(0,0);


    //if( gammaCorrection )
        for( i = 0; i < 256; i++ )
            _lut(0,i) = std::sqrt((float)i);
    //else
        //for( i = 0; i < 256; i++ )
         //   _lut(0,i) = (float)i;

    AutoBuffer<int> mapbuf(gradsize.width + gradsize.height + 4);
    int* xmap = (int*)mapbuf + 1;
    int* ymap = xmap + gradsize.width + 2;

    const int borderType = (int)BORDER_REFLECT_101;

    for( x = -1; x < gradsize.width + 1; x++ )
	{
        xmap[x] = borderInterpolate(x - paddingTL.width + roiofs.x,
                        wholeSize.width, borderType) - roiofs.x;
	
	}
		for( y = -1; y < gradsize.height + 1; y++ )
		{
			ymap[y] = borderInterpolate(y - paddingTL.height + roiofs.y,
                        wholeSize.height, borderType) - roiofs.y;
			//cout << ymap[y] << endl;
		}
    // x- & y- derivatives for the whole row
    int width = gradsize.width;
    AutoBuffer<float> _dbuf(width*4);
    float* dbuf = _dbuf;
    Mat Dx(1, width, CV_32F, dbuf);
    Mat Dy(1, width, CV_32F, dbuf + width);
    Mat Mag(1, width, CV_32F, dbuf + width*2);
    Mat Angle(1, width, CV_32F, dbuf + width*3);
	//cout << ymap[0] << "   " << xmap[0] << endl;    
    int _nbins = nbins;
    float angleScale = (float)(_nbins/CV_PI);

	    //cout << ymap[128] << endl;

    for( y = 0; y < gradsize.height; y++ )
    {
        const uchar* imgPtr  = img.data + img.step*ymap[y];
        const uchar* prevPtr = img.data + img.step*ymap[y-1];
        const uchar* nextPtr = img.data + img.step*ymap[y+1];

        float* gradPtr = (float*)grad.ptr(y);
        uchar* qanglePtr = (uchar*)qangle.ptr(y);

           //cout << ymap[y] << endl;
	    //cout << y << "!" <<endl;
		//cout << "_______________"<< endl;
            for( x = 0; x < width; x++ )
            {
                int x1 = xmap[x];
			
                dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]);
                dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]);
				
				//if(y == 127)
				//{
				//	cout << dbuf[x] << "      " << dbuf[width + x] << endl;
				//}
            }
        
       //cout << y << "@" <<endl;
		mycartToPolar( Dx, Dy, Mag, Angle, false );

        //cartToPolar( Dx, Dy, Mag, Angle, false );
			//for(int k = 0; k < Mag.cols; k++)
			//{
				//cout << Mag.at<float>(0, k) << endl;
			//}


        for( x = 0; x < width; x++ )
        {

            float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f;
			//cout << mag << "," << angle << endl;
            int hidx = cvFloor(angle);
            angle -= hidx;
            gradPtr[x*2] = mag*(1.f - angle);
            gradPtr[x*2+1] = mag*angle;
			//cout << gradPtr[x*2] << "," << gradPtr[x*2+1] << endl;
			//cout << gradPtr[x*2] << "          " << gradPtr[x*2+1] << endl;
            if( hidx < 0 )
                hidx += _nbins;
            else if( hidx >= _nbins )
                hidx -= _nbins;
            assert( (unsigned)hidx < (unsigned)_nbins );

            qanglePtr[x*2] = (uchar)hidx;
            hidx++;
            hidx &= hidx < _nbins ? -1 : 0;
            qanglePtr[x*2+1] = (uchar)hidx;
        }
		//cout << y << "#" <<endl;
    }
	//cout << y;
}





//****************************************************************************************************
int myborderInterpolate( int p, int len)
{
    if( (unsigned)p < (unsigned)len )
        //cout << "fuck" << endl
			;
    else
    {
        int delta = 1;
        if( len == 1 )
            return 0;
        do
        {
            if( p < 0 )
                p = -p - 1 + delta;
            else
                p = len - 1 - (p - len) - delta;
        }
        while( (unsigned)p >= (unsigned)len );
    }
    return p;
}
//****************************************************************************************************
//****************************************************************************************************
void mycomputeGradient(const Mat& img, float** grad1, float** grad2, uchar** qangle1, uchar** qangle2)
{

    int grad_w = img.cols;
	int grad_h = img.rows;


    int i, x, y;
    //int cn = img.channels();

    //Mat_<float> _lut(1, 256);
	float lut[256];


        for( i = 0; i < 256; i++ )
            lut[i] = std::sqrt((float)i);

		//int xmap[IMG_W + 1];
		//int ymap[IMG_H + 1];
		int* xmap = (int*)malloc(sizeof(int)*(grad_w+1));
		int* ymap = (int*)malloc(sizeof(int)*(grad_h+1));
		int xmapT = 1, ymapT = 1;

    for( x = -1; x < grad_w + 1; x++ )
	{
		if(x == -1)
		{
			xmapT = myborderInterpolate(x, grad_w);
			continue;
		}
		xmap[x] = myborderInterpolate(x, grad_w);
		//cout << xmap[-1];
	}

	for( y = -1; y < grad_h + 1; y++ )
	{
		if(y == -1)
		{
			ymapT = myborderInterpolate(y, grad_h);
			continue;
		}
		ymap[y] = myborderInterpolate(y, grad_h);
		//cout << ymap[y] << endl;
	}

		//cout << ymap[-1] << endl;
    // x- & y- derivatives for the whole row

	//float Dx[IMG_W];
	//float Dy[IMG_W];
	//float Mag[IMG_W];
	//float Angle[IMG_W];

	float* Dx = (float*)malloc(sizeof(float*)*grad_w);
	float* Dy = (float*)malloc(sizeof(float*)*grad_w);
	float* Mag = (float*)malloc(sizeof(float*)*grad_w);
	float* Angle = (float*)malloc(sizeof(float*)*grad_w);


    float angleScale = (float)(nbins/CV_PI);
	

	cout << xmapT << "  " << ymapT << endl;
	cout << ymap[128] << "  " << xmap[64] << endl;
    for( y = 0; y < grad_h; y++ )
    {
            for( x = 0; x < grad_w; x++ )
            {   
		
                int x1 = xmap[x];
				int a1 = (int)img.at<uchar>(ymap[y], xmap[x+1]);
				//int a2 = (int)img.at<uchar>(ymap[y], xmap[x-1]);
				int a3 = (int)img.at<uchar>(ymap[y+1], x1);
				//int a4 = (int)img.at<uchar>(ymap[y-1], x1);
				int a2, a4;
				if(x == 0)
				{
					 a2 = (int)img.at<uchar>(ymap[y], xmapT);
				}
				else 
				{
					 a2 = (int)img.at<uchar>(ymap[y], xmap[x-1]);
				}
				if(y == 0)
				{
					 a4 = (int)img.at<uchar>(ymapT, x1);
				}
				else
				{
					 a4 = (int)img.at<uchar>(ymap[y-1], x1);
				}
				Dx[x] = (float)(lut[a1] - lut[a2]);
				Dy[x] = (float)(lut[a3] - lut[a4]);
            }
       
			
        newcartToPolar( Dx, Dy, Mag, Angle, grad_w, false);

        for( x = 0; x < grad_w; x++ )
        {
			float mag = Mag[x], angle = Angle[x]*angleScale - 0.5f;
			int hidx = floor(angle);
            angle -= hidx;
			grad1[y][x] = mag*(1.f - angle);
			grad2[y][x] = mag*angle;
			//cout << grad1[y][x] << "," << grad2[y][x] << endl;
            if( hidx < 0 )
                hidx += nbins;
            else if( hidx >= nbins )
                hidx -= nbins;
            assert( (unsigned)hidx < (unsigned)nbins );

			qangle1[y][x] = (uchar)hidx;
			hidx++;
            hidx &= hidx < nbins ? -1 : 0;
			qangle2[y][x] = (uchar)hidx;
        }
    }
	free(xmap);
	free(ymap);
	free(Dx);
	free(Dy);
	free(Mag);
	free(Angle);
}

//****************************************************************************************************

int main()
{
	Mat src = imread("test.png");
	if (src.empty())
        {
            cout << "Can not load the image" << endl;
            return 0;
        }
	//缩放
        Mat testImage;
        //resize(src, testImage, Size(64, 128));
		cvtColor(src, testImage, CV_BGR2GRAY);
		//测试图片提取HOG特征
        HOGDescriptor hog(cvSize(64, 128), cvSize(16, 16), cvSize(8, 8), cvSize(8, 8), 9);
        vector<float> descriptors;
		Mat grad, qangle;
		Mat grad3, qangle3;
		Size gradsize(testImage.cols, testImage.rows);
		//grad.create(gradsize, CV_32FC2);  // <magnitude*(1-alpha), magnitude*alpha>
		//qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation
		grad3.create(gradsize, CV_32FC2);  // <magnitude*(1-alpha), magnitude*alpha>
		qangle3.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation
		//double t = (double)getTickCount();
		//hog.computeGradient(testImage, grad, qangle);
		newcomputeGradient(testImage, grad3, qangle3, cvSize(0, 0), cvSize(0, 0));
		//t = ((double)getTickCount() - t)/getTickFrequency();
		//cout << " t : " << t * 1000 << " ms" <<endl;
		//cout << "grad_w: " << grad.cols << endl;
		//cout << "grad_h: " << grad.rows << endl;


		int w = testImage.cols;
		int h = testImage.rows;
		float** grad1 = (float**)malloc(sizeof(float*) * h);
		for(int i = 0; i < h; i ++)
		{
			grad1[i] = (float*)malloc(sizeof(float) * w);
		}
		float** grad2 = (float**)malloc(sizeof(float*) * h);
		for(int i = 0; i < h; i ++)
		{
			grad2[i] = (float*)malloc(sizeof(float) * w);
		}
		uchar** qangle1 = (uchar**)malloc(sizeof(uchar*) * h);
		for(int i = 0; i < h; i ++)
		{
			qangle1[i] = (uchar*)malloc(sizeof(uchar) * w);
		}
		uchar** qangle2 = (uchar**)malloc(sizeof(uchar*) * h);
		for(int i = 0; i < h; i ++)
		{
			qangle2[i] = (uchar*)malloc(sizeof(uchar) * w);
		}
		
		cout << "0000" << endl;
		mycomputeGradient(testImage, grad1, grad2, qangle1, qangle2);
		cout << "1111" << endl;

		//cout << grad.channels() << endl;
		for(int i = 0; i < h; i ++)
		{
			float* gradPtr = (float*)grad3.ptr(i);
			uchar* qanglePtr = (uchar*)qangle3.ptr(i);
			for(int j = 0; j < w; j ++)
			{
				//cout << gradPtr[2*j] << " _ " << gradPtr[2*j +1] <<  "              ";
				//cout << grad1[i][j] << " _ " << grad2[i][j] << endl;
				if(gradPtr[2*j] == grad1[i][j] && gradPtr[2*j +1] == grad2[i][j])
				{
					cout << "ok" << endl;
				}
				else 
				{
				cout << i << "," << j << endl;;
				}
				//cout << grad.at<float>(i, j) << endl;
				//cout << grad1[i][j] << endl;
				/*float a = grad.at<float>(i, j);
				float b = grad1.at<float>(i, j);
				if(a == b)
				{
					cout << "nice+(" << j << "," << i << ")" << endl;
				}
				else
				{
					cout << "*";
				}*/
			}
		}



        //hog.compute(testImage, descriptors);
        //cout << "HOG dimensions:" << descriptors.size() << endl;
		getchar();
	
	return 0;
}

 

 

上述是源码和改写函数共存的一个测试函数,旨在使二者输出结果相同,即改写成功。

newcomputeGradient函数为源码函数(这里写出来是为了测试输出更加方便)

mycomputeGradient函数为改写后的函数

 

当然这附加的改写函数是newcartToPolar函数,源码是mycartToPolar函数

 

可以看出,大部分的修改是Mat转化成数组的修改。

 

大家一定很好奇,为什么我要把函数名字给改了,这是因为这些函数名早已被OpenCV定义好了,具体定义在cxcore头文件中,如果包含了该头文件,那么就不能重名,意味着我需要改函数名;否则,最直接了当的办法就是把这个头文件给删除!

 

这里还需要注意:

源码中会出现xmap[-1]、ymap[-1]这么一些神奇的东西。虽然改写成普通数组编译时不会出错的,但是,这个-1实际上表示的是数组上一个地址的内存单元,这是未定义的,所以盲目使用,可能会出错,就比如说:

我定义一个int变量,紧接着我给xmap[-1] = 1;很可能这个-1指向的内存单元就是前面所定义的变量,这样的错误是很难发现的!

所以这里我定义了xmapT,ymapT变量,意思就是避免上述错误!

 

关于borderInterpolate函数,源码实在opencv_core的filter.cpp中

关于FastAtan2_32f,Magnitude_32f,cartToPolar函数,源码均在opencv_core的mathfuncs.cpp中