文章/答案/技术大牛

发布

问用高值分割故障(Xeon )
EN

Stack Overflow用户

提问于 2014-12-20 21:24:20

回答 2查看 417关注 0票数 0

我正在研究一个Collatz猜想问题，使用Xeon通过斯坦佩德。我已经测试过我的代码，并且测试值高达10万，但是测试值高达100万，我几乎立即收到一个分段错误("SIGSEV")。我已经把头撞在墙上好几天了，但根本找不出窃听器。任何帮助都是非常感谢的。

typedef unsigned long long bigInt;

// Number to test to (starting from 1)
   #define bigSize     100000

typedef struct {
    int numSteps;
    bigInt stopPoint;
} batcher;

typedef struct {
    bigInt num;
    batcher to_batch;
} to_ret;
int main () {
    //Stores values as [num][#steps to smaller val][smaller val]
    to_ret retlist[bigSize];
    //Stores values as [#steps to smaller val][smaller val], sorted by num
    batcher results[bigSize];
    ...

    #pragma offload target(mic:0) inout(retlist) shared(retlist)
    {
        #pragma omp parallel for
        for(i = 1; i < bigSize; i++){
            retlist[i].num = i + 1;
            bigInt next = retlist[i].num;
            int count = 0;

            do {
                count++;

                if (next%2 == 1)
                    next=(3*next+1)/2;
                else
                    next/=2;

            } while(next > retlist[i].num);

            retlist[i].to_batch.numSteps = count;
            retlist[i].to_batch.stopPoint = next;
        }
    }

    ///Organizes data into a sorted array
    #pragma omp parallel for
    for (i = 0; i < bigSize; i++){
        results[retlist[i].num - 1] = retlist[i].to_batch;
    }
    ...
}

我很有信心，这个问题会出现在上面的代码段中。

openmp

collatz

xeon-phi

回答 2

Stack Overflow用户

回答已采纳

发布于 2014-12-22 05:19:56

完整的代码可以在github 这里上找到，尽管它仍然存在许多效率问题(可以使用矢量化支持)，但我目前讨论的是(利用巴拉克-马诺斯的建议)：

typedef unsigned long long bigInt;

/// Number to test up to (starting from 1)
#define bigSize     1000000000 //340282366920938463463374607431768211455

typedef struct {
    int numSteps;
    bigInt stopPoint;
} batcher;

typedef struct {
    bigInt num;
    batcher to_batch;
} to_ret;

__attribute__((target(mic))) to_ret retlist[bigSize]; ///Stores values as [num][#steps to smaller val][smaller val]
__attribute__((target(mic))) batcher results[bigSize]; ///Stores values as [#steps to smaller val][smaller val] & is sorted by num


int main () {
    bigInt j;
    double start, end;

    retlist[0].num = 1; retlist[0].to_batch.numSteps = 0; retlist[0].to_batch.stopPoint = 1;
    start = omp_get_wtime();

    #pragma offload target(mic:0) out(results)
    {
        int count;
        bigInt i, next;

    #pragma omp parallel for
        for(i = 1; i < bigSize; i++){
            next = retlist[i].num = i + 1;
            count = 0;

            do {
                count++;

                if (next%2 == 1)
                    next=(3*next+1)/2;
                else
                    next/=2;

            } while(next > retlist[i].num);

            retlist[i].to_batch.numSteps = count;
            retlist[i].to_batch.stopPoint = next;
        }

    ///Organizes data into a sorted array
    #pragma omp parallel for
    for (i = 0; i < bigSize; i++){
        results[i] = retlist[i].to_batch;
    }
  }
...

    for(j = 0; j < bigSize; j++){
        results[j].numSteps += results[results[j].stopPoint-1].numSteps;
    }

    return(0);
}

如果有人感兴趣，请随时创建一个叉子我的项目。

票数 0

Stack Overflow用户

发布于 2014-12-21 04:47:14

以下代码正确编译：

不会使堆栈溢出。
不会用一堆用于结构的类型来混淆代码。
不隐藏bigNum是一个未签名的长int。
包含索引变量'i‘的声明。

目前，我还没有访问优化实用程序的权限，因此对它们进行了注释。

//typedef unsigned long long bigInt;

// Number to test to (starting from 1)
#define bigSize     (100000)

struct batcher
{
    int numSteps;
    //bigInt stopPoint;
    unsigned long long stopPoint;
};

struct to_ret
{
    //bigInt num;
    unsigned long long num;
    struct batcher to_batch;
};

//Stores values as [num][#steps to smaller val][smaller val]
static struct to_ret retlist[bigSize];
//Stores values as [#steps to smaller val][smaller val], sorted by num
static struct batcher results[bigSize];

int main ()
{
    int i;
    // more code here

    ////#pragma offload target(mic:0) inout(retlist) shared(retlist)
    {
        ////#pragma omp parallel for
        for(i = 1; i < bigSize; i++)
        {
            retlist[i].num = i + 1;
            //bigInt next = retlist[i].num;
            unsigned long long next = retlist[i].num;
            int count = 0;

            do
            {
                count++;

                if (next%2 == 1)
                    next=(3*next+1)/2;
                else
                    next/=2;

            } while(next > retlist[i].num);

            retlist[i].to_batch.numSteps = count;
            retlist[i].to_batch.stopPoint = next;
        }
    }

    ///Organizes data into a sorted array
    ////#pragma omp parallel for
    for (i = 0; i < bigSize; i++){
        results[retlist[i].num - 1] = retlist[i].to_batch;
    }
    // more code here

    return(0);
} // end function: main

票数 0

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/27584467

复制

相似问题

问用高值分割故障(Xeon )
EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问用高值分割故障(Xeon )EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问用高值分割故障(Xeon )
EN