我正在研究一个Collatz猜想问题,使用Xeon通过斯坦佩德。我已经测试过我的代码,并且测试值高达10万,但是测试值高达100万,我几乎立即收到一个分段错误("SIGSEV")。我已经把头撞在墙上好几天了,但根本找不出窃听器。任何帮助都是非常感谢的。
typedef unsigned long long bigInt;
// Number to test to (starting from 1)
#define bigSize 100000
typedef struct {
int numSteps;
bigInt stopPoint;
} batcher;
typedef struct {
bigInt num;
batcher to_batch;
} to_ret;
int main () {
//Stores values as [num][#steps to smaller val][smaller val]
to_ret retlist[bigSize];
//Stores values as [#steps to smaller val][smaller val], sorted by num
batcher results[bigSize];
...
#pragma offload target(mic:0) inout(retlist) shared(retlist)
{
#pragma omp parallel for
for(i = 1; i < bigSize; i++){
retlist[i].num = i + 1;
bigInt next = retlist[i].num;
int count = 0;
do {
count++;
if (next%2 == 1)
next=(3*next+1)/2;
else
next/=2;
} while(next > retlist[i].num);
retlist[i].to_batch.numSteps = count;
retlist[i].to_batch.stopPoint = next;
}
}
///Organizes data into a sorted array
#pragma omp parallel for
for (i = 0; i < bigSize; i++){
results[retlist[i].num - 1] = retlist[i].to_batch;
}
...
}我很有信心,这个问题会出现在上面的代码段中。
发布于 2014-12-22 05:19:56
完整的代码可以在github 这里上找到,尽管它仍然存在许多效率问题(可以使用矢量化支持),但我目前讨论的是(利用巴拉克-马诺斯的建议):
typedef unsigned long long bigInt;
/// Number to test up to (starting from 1)
#define bigSize 1000000000 //340282366920938463463374607431768211455
typedef struct {
int numSteps;
bigInt stopPoint;
} batcher;
typedef struct {
bigInt num;
batcher to_batch;
} to_ret;
__attribute__((target(mic))) to_ret retlist[bigSize]; ///Stores values as [num][#steps to smaller val][smaller val]
__attribute__((target(mic))) batcher results[bigSize]; ///Stores values as [#steps to smaller val][smaller val] & is sorted by num
int main () {
bigInt j;
double start, end;
retlist[0].num = 1; retlist[0].to_batch.numSteps = 0; retlist[0].to_batch.stopPoint = 1;
start = omp_get_wtime();
#pragma offload target(mic:0) out(results)
{
int count;
bigInt i, next;
#pragma omp parallel for
for(i = 1; i < bigSize; i++){
next = retlist[i].num = i + 1;
count = 0;
do {
count++;
if (next%2 == 1)
next=(3*next+1)/2;
else
next/=2;
} while(next > retlist[i].num);
retlist[i].to_batch.numSteps = count;
retlist[i].to_batch.stopPoint = next;
}
///Organizes data into a sorted array
#pragma omp parallel for
for (i = 0; i < bigSize; i++){
results[i] = retlist[i].to_batch;
}
}
...
for(j = 0; j < bigSize; j++){
results[j].numSteps += results[results[j].stopPoint-1].numSteps;
}
return(0);
}如果有人感兴趣,请随时创建一个叉子我的项目。
发布于 2014-12-21 04:47:14
以下代码正确编译:
目前,我还没有访问优化实用程序的权限,因此对它们进行了注释。
//typedef unsigned long long bigInt;
// Number to test to (starting from 1)
#define bigSize (100000)
struct batcher
{
int numSteps;
//bigInt stopPoint;
unsigned long long stopPoint;
};
struct to_ret
{
//bigInt num;
unsigned long long num;
struct batcher to_batch;
};
//Stores values as [num][#steps to smaller val][smaller val]
static struct to_ret retlist[bigSize];
//Stores values as [#steps to smaller val][smaller val], sorted by num
static struct batcher results[bigSize];
int main ()
{
int i;
// more code here
////#pragma offload target(mic:0) inout(retlist) shared(retlist)
{
////#pragma omp parallel for
for(i = 1; i < bigSize; i++)
{
retlist[i].num = i + 1;
//bigInt next = retlist[i].num;
unsigned long long next = retlist[i].num;
int count = 0;
do
{
count++;
if (next%2 == 1)
next=(3*next+1)/2;
else
next/=2;
} while(next > retlist[i].num);
retlist[i].to_batch.numSteps = count;
retlist[i].to_batch.stopPoint = next;
}
}
///Organizes data into a sorted array
////#pragma omp parallel for
for (i = 0; i < bigSize; i++){
results[retlist[i].num - 1] = retlist[i].to_batch;
}
// more code here
return(0);
} // end function: mainhttps://stackoverflow.com/questions/27584467
复制相似问题