in / out / inout 详细用法

下面的代码主要使用in作为测试, out和inout的用法应该是类似的, 下面主要以代码为主, 并且附带执行结果。

静态一维数组

#include <stdio.h>
#include <stdlib.h>

void offload_one_dim_array(int n) {
    int arr[n];
    int arr2[n];
    int arr3[n];
    int i;

    for(i = 0; i < n; i++) {
        arr[i] = i;
        arr2[i] = n + i;
        arr3[i] = 2 * n + i;
    }

    //上传arr的全部元素,上传arr2的前0-4共5(长度为5)个元素,上传arr3的从索引2开始的5个元素(即索引2-6)到mic上
    #pragma offload target(mic) in(arr) in(arr2:length(5)) in(arr3[1:5])
    {
        for(i = 0; i < n; i++) {
            printf(" arr[%d] is %d\n", i, arr[i]);
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            printf("arr2[%d] is %d\n", i, arr2[i]);
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            printf("arr3[%d] is %d\n", i, arr3[i]);
        }
    }
}

int main() {
    offload_one_dim_array(10);
    return 0;
}

输出结果为:

 arr[0] is 0
 arr[1] is 1
 arr[2] is 2
 arr[3] is 3
 arr[4] is 4
 arr[5] is 5
 arr[6] is 6
 arr[7] is 7
 arr[8] is 8
 arr[9] is 9
==========================
arr2[0] is 10
arr2[1] is 11
arr2[2] is 12
arr2[3] is 13
arr2[4] is 14
arr2[5] is 0
arr2[6] is 0
arr2[7] is 0
arr2[8] is 0
arr2[9] is 0
==========================
arr3[0] is 0
arr3[1] is 0
arr3[2] is 22
arr3[3] is 23
arr3[4] is 24
arr3[5] is 25
arr3[6] is 26
arr3[7] is 0
arr3[8] is 0
arr3[9] is 0

静态二维数组

#include <stdio.h>
#include <stdlib.h>

void offload_two_dim_array(int n) {
    int arr[n][n];
    int arr2[n][n];
    int arr3[n][n];
    int arr4[n][n];
    int i, j, index = 0;
    for(i = 0; i < n; i++) {
        for(j = 0; j < n; j++) {
            arr[i][j] = index;
            arr2[i][j] = n * n + index;
            arr3[i][j] = 2 * n * n + index;
            arr4[i][j] = 3 * n * n + index;
            index++;
        }
    }

    //上传arr的全部值,上传arr2的前5个值(整体看为长度为n*n的一维数组,取前5个值),上传arr3中[0-1][0-(n-1)]的值,
    //不加后面的y的维度,默认y的是1-(n-1), 上传arr4中[0-1][0-1]的值
    #pragma offload target(mic) in(arr) in(arr2:length(5)) in(arr3[0:2]) in(arr4[0:2][0:2])
    {
        for(i = 0; i < n; i++) {
            for(j = 0; j < n; j++) {
                printf(" arr[%d][%d] is %d\n", i, j, arr[i][j]);
            }
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            for(j = 0; j < n; j++) {
                printf("arr2[%d][%d] is %d\n", i, j, arr2[i][j]);
            }
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            for(j = 0; j < n; j++) {
                printf("arr3[%d][%d] is %d\n", i, j,  arr3[i][j]);
            }
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            for(j = 0; j < n; j++) {
                printf("arr4[%d][%d] is %d\n", i, j, arr4[i][j]);
            }
        }
    }
}

int main() {
    offload_two_dim_array(3);
    return 0;
}

下面是输出结果

 arr[0][0] is 0
 arr[0][1] is 1
 arr[0][2] is 2
 arr[1][0] is 3
 arr[1][1] is 4
 arr[1][2] is 5
 arr[2][0] is 6
 arr[2][1] is 7
 arr[2][2] is 8
==========================
arr2[0][0] is 9
arr2[0][1] is 10
arr2[0][2] is 11
arr2[1][0] is 12
arr2[1][1] is 13
arr2[1][2] is 0
arr2[2][0] is 0
arr2[2][1] is 0
arr2[2][2] is 0
==========================
arr3[0][0] is 18
arr3[0][1] is 19
arr3[0][2] is 20
arr3[1][0] is 21
arr3[1][1] is 22
arr3[1][2] is 23
arr3[2][0] is 0
arr3[2][1] is 0
arr3[2][2] is 0
==========================
arr4[0][0] is 27
arr4[0][1] is 28
arr4[0][2] is 0
arr4[1][0] is 30
arr4[1][1] is 31
arr4[1][2] is 0
arr4[2][0] is 0
arr4[2][1] is 0
arr4[2][2] is 0

一个小问题

当数组(非指针)被offload一次之后会在mic上保存,并没有立即释放,在同一个作用域下,再次offload时, 如果值改变会更改为新值,如果没有offload某些位置的值,这些位置会使用上一次的旧值
下面是局部变量测试

#include <stdio.h>
#include <stdlib.h>

void offload_array_test(int n) {
    int arr[n];
    int i;
    for(i = 0; i < n; i++) {
        arr[i] = i;
    }

    #pragma offload target(mic) in(arr)
    {
        for(i = 0; i < n; i++) {
            printf("arr[%d] in first offload is %d\n", i,  arr[i]);
        }
        // 这里修改了并没有传回到CPU上, 但是会保存在MIC上
        arr[9] = 1111;
        printf("==========================\n");
    }

    arr[1] = 1000;
    arr[8] = 2000;

    //这次的offload只上传了0-2共3个值,mic上arr[1]的值会更改为1000,arr[3-(n-1)]的值会使用MIC上保存的值, 注意arr[9]的值
    #pragma offload target(mic) in(arr:length(3))
    {
        for(i = 0; i < n; i++) {
            printf("arr[%d] in second offload is %d\n",i, arr[i]);
        }
    }

    for(i = 0; i < n; i++) {
        printf("arr[%d] without offload is %d\n",i, arr[i]);
    }
    printf("==========================\n");
}

int main() {
    offload_array_test(10);
    return 0;
}

输出结果为:

arr[0] without offload is 0
arr[1] without offload is 1000
arr[2] without offload is 2
arr[3] without offload is 3
arr[4] without offload is 4
arr[5] without offload is 5
arr[6] without offload is 6
arr[7] without offload is 7
arr[8] without offload is 2000
arr[9] without offload is 9
==========================
arr[0] in first offload is 0
arr[1] in first offload is 1
arr[2] in first offload is 2
arr[3] in first offload is 3
arr[4] in first offload is 4
arr[5] in first offload is 5
arr[6] in first offload is 6
arr[7] in first offload is 7
arr[8] in first offload is 8
arr[9] in first offload is 9
==========================
arr[0] in second offload is 0
arr[1] in second offload is 1000
arr[2] in second offload is 2
arr[3] in second offload is 3
arr[4] in second offload is 4
arr[5] in second offload is 5
arr[6] in second offload is 6
arr[7] in second offload is 7
arr[8] in second offload is 8
arr[9] in second offload is 1111

下面是全局变量测试:

#include <stdio.h>
#include <stdlib.h>

#define __ONMIC__ __attribute__((target(mic)))

__ONMIC__ int gArr[10];

void test1() {

    int i;

    for(i = 0; i < 10; i++) {
        gArr[i] = i;
    }
    #pragma offload target(mic)
    {
        for(i = 0; i < 10; i++) {
            printf("gArr[%d] in test1 is %d\n", i, gArr[i]);
        }
        printf("==========================\n");
    }
}
void test2() {
    gArr[0] = 10;
    gArr[5] = 10;
    int i;
    #pragma offload target(mic) in(gArr[0:2])
    {
        for(i = 0; i < 10; i++) {
            printf("gArr[%d] in test2 is %d\n", i, gArr[i]);
        }
    }
}

int main() {
    test1();
    test2();
    return 0;
}

下面是测试结果:

gArr[0] in test1 is 0
gArr[1] in test1 is 1
gArr[2] in test1 is 2
gArr[3] in test1 is 3
gArr[4] in test1 is 4
gArr[5] in test1 is 5
gArr[6] in test1 is 6
gArr[7] in test1 is 7
gArr[8] in test1 is 8
gArr[9] in test1 is 9
==========================
gArr[0] in test2 is 10
gArr[1] in test2 is 1
gArr[2] in test2 is 2
gArr[3] in test2 is 3
gArr[4] in test2 is 4
gArr[5] in test2 is 5
gArr[6] in test2 is 6
gArr[7] in test2 is 7
gArr[8] in test2 is 8
gArr[9] in test2 is 9

一维动态数组

#include <stdio.h>
#include <stdlib.h>

void offload_point() {
    int n = 10;
    int *arr =(int*) calloc(n, sizeof(int));
    int *arr2 = (int*) calloc(n, sizeof(int));
    int *arr3 = (int*) calloc(n, sizeof(int));
    int i;

    for(i = 0; i < n; i++) {
        arr[i] = i;
        arr2[i] = n + i;
        arr3[i] = n * 2 + i;
    }

    //需要注意:上传指针定义的数组时 1:要指定length或者[start:length]属性 2:要显示用in
    #pragma offload target(mic) in(arr:length(n)) in (arr2[2:3]) in (arr3:length(3))
    {
        for(i = 0; i < n; i++) {
            printf(" arr[%d] is  %d\n",i, arr[i]);
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            printf("arr2[%d] is %d \n",i, arr2[i]);
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            printf("arr3[%d] is %d \n", i, arr3[i]);
        }
    }

    free(arr);
    free(arr2);
    free(arr3);
}

int main() {
    offload_point();
    return 0;
}

程序输出如下:

 arr[0] is  0
 arr[1] is  1
 arr[2] is  2
 arr[3] is  3
 arr[4] is  4
 arr[5] is  5
 arr[6] is  6
 arr[7] is  7
 arr[8] is  8
 arr[9] is  9
==========================
arr2[0] is 0
arr2[1] is 0
arr2[2] is 12
arr2[3] is 13
arr2[4] is 14
arr2[5] is 0
arr2[6] is 0
arr2[7] is 0
arr2[8] is 0
arr2[9] is 0
==========================
arr3[0] is 20
arr3[1] is 21
arr3[2] is 22
arr3[3] is 0
arr3[4] is 0
arr3[5] is 0
arr3[6] is 0
arr3[7] is 0
arr3[8] is 0
arr3[9] is 0

使用指针实现的二维数组

首先用typedef定义一个一维静态数组的类型, 然后为该类型声明一个动态数组

#include <stdio.h>
#include <stdlib.h>

typedef int ARRAY[5];

//下面相当于上传了一个二维数组
void offload_point2() {
    int n = 3;
    ARRAY *arr = (ARRAY*)calloc(n, sizeof(ARRAY));
    ARRAY *arr2 = (ARRAY*)calloc(n, sizeof(ARRAY));
    int i, j, index = 0;
    for(i = 0; i < n; i++) {
        for(j = 0; j < 5; j++) {
            arr[i][j] = index;
            arr2[i][j] = n *n + index;
            index++;
        }
    }

    #pragma offload target(mic) in(arr:length(n))  in (arr2[0:2][0:2])
    {
        for(i = 0; i < n; i++) {
            for(j = 0; j < 5; j++) {
                printf(" arr[%d][%d] is %d \n", i, j , arr[i][j]);
            }
        }

        printf("==========================\n");

        for(i = 0; i < n; i++) {
            for(j = 0; j < 5; j++) {
                printf("arr2[%d][%d] is %d \n", i, j , arr2[i][j]);
            }
        }
    }

    free(arr);
    free(arr2);
}

int main() {
    offload_point2();
    return 0;
}

输出结果为:

 arr[0][0] is 0
 arr[0][1] is 1
 arr[0][2] is 2
 arr[0][3] is 3
 arr[0][4] is 4
 arr[1][0] is 5
 arr[1][1] is 6
 arr[1][2] is 7
 arr[1][3] is 8
 arr[1][4] is 9
 arr[2][0] is 10
 arr[2][1] is 11
 arr[2][2] is 12
 arr[2][3] is 13
 arr[2][4] is 14
==========================
arr2[0][0] is 9
arr2[0][1] is 10
arr2[0][2] is 0
arr2[0][3] is 0
arr2[0][4] is 0
arr2[1][0] is 14
arr2[1][1] is 15
arr2[1][2] is 0
arr2[1][3] is 0
arr2[1][4] is 0
arr2[2][0] is 0
arr2[2][1] is 0
arr2[2][2] is 0
arr2[2][3] is 0
arr2[2][4] is 0

包含指针的struct

#include <stdio.h>
#include <stdlib.h>

struct my_struct {
    int y;
    int *a;
};

void offload_struct() {
    struct my_struct m;
    m.y = 10;
    m.a =(int*) calloc(10, sizeof(int));

    int i;
    for(i=0; i < 10; i++) {
        m.a[i] = i;
    }

    //struct中有指针变量时要单独传指针变量
    #pragma offload target(mic) in(m) in(m.a:length(10))
    {
        printf("offload_struct: the struct.y is %d\n", m.y);
        printf("offload_struct: the struct.a is %d\n", m.a[1]);
    }
    free(m.a);
}

int main() {
    offload_struct();
    return 0;
}

注意事项

使用offload不能上传指针数组, 即一个数组中的每个元素是一个指针, 或者元素中包含一个指针, 比如下面的形式

int **p


struct mystruct {
    int *i;
};

struct mystruct *m;

results matching ""

    No results matching ""

    results matching ""

      No results matching ""