head	1.1;
branch	1.1.1;
access;
symbols
	netbsd-11-0-RC4:1.1.1.2
	netbsd-11-0-RC3:1.1.1.2
	netbsd-11-0-RC2:1.1.1.2
	netbsd-11-0-RC1:1.1.1.2
	perseant-exfatfs-base-20250801:1.1.1.2
	netbsd-11:1.1.1.2.0.10
	netbsd-11-base:1.1.1.2
	netbsd-10-1-RELEASE:1.1.1.2
	perseant-exfatfs-base-20240630:1.1.1.2
	perseant-exfatfs:1.1.1.2.0.8
	perseant-exfatfs-base:1.1.1.2
	netbsd-9-4-RELEASE:1.1.1.1
	netbsd-10-0-RELEASE:1.1.1.2
	netbsd-10-0-RC6:1.1.1.2
	netbsd-10-0-RC5:1.1.1.2
	netbsd-10-0-RC4:1.1.1.2
	netbsd-10-0-RC3:1.1.1.2
	netbsd-10-0-RC2:1.1.1.2
	netbsd-10-0-RC1:1.1.1.2
	netbsd-10:1.1.1.2.0.6
	netbsd-10-base:1.1.1.2
	netbsd-9-3-RELEASE:1.1.1.1
	cjep_sun2x:1.1.1.2.0.4
	cjep_sun2x-base:1.1.1.2
	cjep_staticlib_x-base1:1.1.1.2
	netbsd-9-2-RELEASE:1.1.1.1
	cjep_staticlib_x:1.1.1.2.0.2
	cjep_staticlib_x-base:1.1.1.2
	netbsd-9-1-RELEASE:1.1.1.1
	phil-wifi-20200421:1.1.1.2
	phil-wifi-20200411:1.1.1.2
	phil-wifi-20200406:1.1.1.2
	netbsd-9-0-RELEASE:1.1.1.1
	netbsd-9-0-RC2:1.1.1.1
	netbsd-9-0-RC1:1.1.1.1
	netbsd-9:1.1.1.1.0.6
	netbsd-9-base:1.1.1.1
	phil-wifi:1.1.1.1.0.4
	phil-wifi-20190609:1.1.1.1
	pgoyette-compat-merge-20190127:1.1.1.1.2.2
	pgoyette-compat-20190127:1.1.1.1
	pgoyette-compat-20190118:1.1.1.1
	pgoyette-compat-1226:1.1.1.1
	pgoyette-compat-1126:1.1.1.1
	pgoyette-compat-1020:1.1.1.1
	pgoyette-compat-0930:1.1.1.1
	pgoyette-compat-0906:1.1.1.1
	pgoyette-compat:1.1.1.1.0.2
	pgoyette-compat-0728:1.1.1.1
	clang-337282:1.1.1.1
	LLVM:1.1.1;
locks; strict;
comment	@// @;


1.1
date	2018.07.17.18.31.36;	author joerg;	state Exp;
branches
	1.1.1.1;
next	;
commitid	wDzL46ALjrCZgwKA;

1.1.1.1
date	2018.07.17.18.31.36;	author joerg;	state Exp;
branches
	1.1.1.1.2.1
	1.1.1.1.4.1;
next	1.1.1.2;
commitid	wDzL46ALjrCZgwKA;

1.1.1.2
date	2019.11.13.22.22.45;	author joerg;	state dead;
branches;
next	;
commitid	QD8YATxuNG34YJKB;

1.1.1.1.2.1
date	2018.07.17.18.31.36;	author pgoyette;	state dead;
branches;
next	1.1.1.1.2.2;
commitid	1UP1xAIUxv1ZgRLA;

1.1.1.1.2.2
date	2018.07.28.04.34.08;	author pgoyette;	state Exp;
branches;
next	;
commitid	1UP1xAIUxv1ZgRLA;

1.1.1.1.4.1
date	2018.07.17.18.31.36;	author christos;	state dead;
branches;
next	1.1.1.1.4.2;
commitid	jtc8rnCzWiEEHGqB;

1.1.1.1.4.2
date	2019.06.10.21.46.30;	author christos;	state Exp;
branches;
next	1.1.1.1.4.3;
commitid	jtc8rnCzWiEEHGqB;

1.1.1.1.4.3
date	2020.04.13.07.50.14;	author martin;	state dead;
branches;
next	;
commitid	X01YhRUPVUDaec4C;


desc
@@


1.1
log
@Initial revision
@
text
@// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// Test host codegen.
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32

// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
#ifdef CK1

int a[100];

// CK1: define {{.*}}i32 @@{{.+}}teams_argument_globali(
int teams_argument_global(int n) {
  int i;
  int te = n / 128;
  int th = 128;
  // discard n_addr and i
  // CK1: alloca i32,
  // CK1: alloca i32,
  // CK1: [[TE:%.+]] = alloca i32,
  // CK1: [[TH:%.+]] = alloca i32,
  // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
  // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
  // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
  // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],

  // CK1: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})

  // CK1: call void @@[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
  #pragma omp target
  #pragma omp teams distribute simd num_teams(te), thread_limit(th) aligned(a) simdlen(16) linear(i)
  for(i = 0; i < n; i++) {
    a[i] = 0;
  }

  // CK1: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK1: call void @@[[OFFL2:.+]](i{{64|32}} %{{.+}})
  #pragma omp target
  {{{
  #pragma omp teams distribute simd safelen(32)
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }
  }}}

  // outlined target regions
  // CK1: define internal void @@[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], [100 x i{{32|64}}]* {{.+}}, i{{32|64}} {{.+}}, {{.+}})
  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
  // CK1: {{%.+}} = call i32 @@__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
  // CK1: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK1: ret void

  // CK1: define internal void @@[[OUTL1]]({{.+}})
  // CK1: call void @@__kmpc_for_static_init_4(
  // CK1: call void @@__kmpc_for_static_fini(
  // CK1: ret void

  // CK1: define internal void @@[[OFFL2]]({{.+}}, {{.+}})
  // CK1: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @@[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK1: ret void

  // CK1: define internal void @@[[OUTL2]]({{.+}})
  // CK1: call void @@__kmpc_for_static_init_4(
  // CK1: call void @@__kmpc_for_static_fini(
  // CK1: ret void

  return a[0];
}

// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 16}
// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 32}

#endif // CK1

// Test host codegen.
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32

// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
#ifdef CK2

// CK2: define {{.*}}i32 @@{{.+}}teams_local_argv(
int teams_local_arg(void) {
  int n = 100;
  int a[n];

  // CK2: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK2: call void @@[[OFFL1:.+]](i{{64|32}} %{{.+}})
  #pragma omp target
  #pragma omp teams distribute simd
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }

  // outlined target region
  // CK2: define internal void @@[[OFFL1]]({{.+}}, {{.+}})
  // CK2: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK2: ret void

  // CK2: define internal void @@[[OUTL1]]({{.+}})
  // CK2: call void @@__kmpc_for_static_init_4(
  // CK2: call void @@__kmpc_for_static_fini(
  // CK2: ret void  

  return a[0];
}
// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK2

// Test host codegen.
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32

// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
#ifdef CK3

// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }

template <typename T, int X, long long Y>
struct SS{
  T a[X];
  float b;
  // CK3: define {{.*}}i32 @@{{.+}}foo{{.+}}(
  int foo(void) {

  // CK3: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK3: call void @@[[OFFL1:.+]]([[SSI]]* %{{.+}})
    #pragma omp target
    #pragma omp teams distribute simd
    for(int i = 0; i < X; i++) {
      a[i] = (T)0;
    }

      // outlined target region
  // CK3: define internal void @@[[OFFL1]]([[SSI]]* {{.+}})
  // CK3: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK3: ret void

  // CK3: define internal void @@[[OUTL1]]({{.+}})
  // CK3: call void @@__kmpc_for_static_init_4(
  // CK3: call void @@__kmpc_for_static_fini(
  // CK3: ret void  

    return a[0];
  }
};

int teams_template_struct(void) {
  SS<int, 123, 456> V;
  return V.foo();

}
// CK3: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK3

// Test host codegen.
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32

// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}

#ifdef CK4

template <typename T, int n>
int tmain(T argc) {
  T a[n];
  int te = n/128;
  int th = 128;
#pragma omp target
#pragma omp teams distribute simd num_teams(te) thread_limit(th)
  for(int i = 0; i < n; i++) {
    a[i] = (T)0;
  }
  return 0;
}

int main (int argc, char **argv) {
  int n = 100;
  int a[n];
#pragma omp target
#pragma omp teams distribute simd
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }
  return tmain<int, 10>(argc);
}

// CK4:  define {{.*}}i32 @@{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
// CK4:   call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
// CK4: call void @@[[OFFL1:.+]]({{.+}})
// CK4: {{%.+}} = call{{.*}} i32 @@[[TMAIN:.+]]({{.+}})
// CK4:  ret

// CK4:  define {{.*}}void @@[[OFFL1]]({{.+}})
// CK4: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
// CK4: ret void

// CK4: define internal void @@[[OUTL1]]({{.+}})
// CK4: call void @@__kmpc_for_static_init_4(
// CK4: call void @@__kmpc_for_static_fini(
// CK4: ret void

// CK4:  define {{.*}}i32 @@[[TMAIN]]({{.+}})
// CK4:   call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
// CK4: call void @@[[OFFLT:.+]]({{.+}})
// CK4:  ret
// CK4-NEXT: }

// CK4: define {{.*}}void @@[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}})
// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}},
// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}},
// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
// CK4: {{%.+}} = call i32 @@__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
// CK4: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @@[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}})
// CK4: ret void

// CK4: define internal void @@[[OUTLT]]({{.+}})
// CK4: call void @@__kmpc_for_static_init_4(
// CK4: call void @@__kmpc_for_static_fini(
// CK4: ret void

// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK4
#endif

@


1.1.1.1
log
@Import clang r337282 from trunk
@
text
@@


1.1.1.2
log
@Mark old LLVM instance as dead.
@
text
@@


1.1.1.1.4.1
log
@file teams_distribute_simd_codegen.cpp was added on branch phil-wifi on 2019-06-10 21:46:30 +0000
@
text
@d1 283
@


1.1.1.1.4.2
log
@Sync with HEAD
@
text
@a0 283
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// Test host codegen.
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32

// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
#ifdef CK1

int a[100];

// CK1: define {{.*}}i32 @@{{.+}}teams_argument_globali(
int teams_argument_global(int n) {
  int i;
  int te = n / 128;
  int th = 128;
  // discard n_addr and i
  // CK1: alloca i32,
  // CK1: alloca i32,
  // CK1: [[TE:%.+]] = alloca i32,
  // CK1: [[TH:%.+]] = alloca i32,
  // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
  // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
  // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
  // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],

  // CK1: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})

  // CK1: call void @@[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
  #pragma omp target
  #pragma omp teams distribute simd num_teams(te), thread_limit(th) aligned(a) simdlen(16) linear(i)
  for(i = 0; i < n; i++) {
    a[i] = 0;
  }

  // CK1: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK1: call void @@[[OFFL2:.+]](i{{64|32}} %{{.+}})
  #pragma omp target
  {{{
  #pragma omp teams distribute simd safelen(32)
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }
  }}}

  // outlined target regions
  // CK1: define internal void @@[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], [100 x i{{32|64}}]* {{.+}}, i{{32|64}} {{.+}}, {{.+}})
  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
  // CK1: {{%.+}} = call i32 @@__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
  // CK1: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK1: ret void

  // CK1: define internal void @@[[OUTL1]]({{.+}})
  // CK1: call void @@__kmpc_for_static_init_4(
  // CK1: call void @@__kmpc_for_static_fini(
  // CK1: ret void

  // CK1: define internal void @@[[OFFL2]]({{.+}}, {{.+}})
  // CK1: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @@[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK1: ret void

  // CK1: define internal void @@[[OUTL2]]({{.+}})
  // CK1: call void @@__kmpc_for_static_init_4(
  // CK1: call void @@__kmpc_for_static_fini(
  // CK1: ret void

  return a[0];
}

// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 16}
// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 32}

#endif // CK1

// Test host codegen.
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32

// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
#ifdef CK2

// CK2: define {{.*}}i32 @@{{.+}}teams_local_argv(
int teams_local_arg(void) {
  int n = 100;
  int a[n];

  // CK2: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK2: call void @@[[OFFL1:.+]](i{{64|32}} %{{.+}})
  #pragma omp target
  #pragma omp teams distribute simd
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }

  // outlined target region
  // CK2: define internal void @@[[OFFL1]]({{.+}}, {{.+}})
  // CK2: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK2: ret void

  // CK2: define internal void @@[[OUTL1]]({{.+}})
  // CK2: call void @@__kmpc_for_static_init_4(
  // CK2: call void @@__kmpc_for_static_fini(
  // CK2: ret void  

  return a[0];
}
// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK2

// Test host codegen.
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32

// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
#ifdef CK3

// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }

template <typename T, int X, long long Y>
struct SS{
  T a[X];
  float b;
  // CK3: define {{.*}}i32 @@{{.+}}foo{{.+}}(
  int foo(void) {

  // CK3: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK3: call void @@[[OFFL1:.+]]([[SSI]]* %{{.+}})
    #pragma omp target
    #pragma omp teams distribute simd
    for(int i = 0; i < X; i++) {
      a[i] = (T)0;
    }

      // outlined target region
  // CK3: define internal void @@[[OFFL1]]([[SSI]]* {{.+}})
  // CK3: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK3: ret void

  // CK3: define internal void @@[[OUTL1]]({{.+}})
  // CK3: call void @@__kmpc_for_static_init_4(
  // CK3: call void @@__kmpc_for_static_fini(
  // CK3: ret void  

    return a[0];
  }
};

int teams_template_struct(void) {
  SS<int, 123, 456> V;
  return V.foo();

}
// CK3: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK3

// Test host codegen.
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32

// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}

#ifdef CK4

template <typename T, int n>
int tmain(T argc) {
  T a[n];
  int te = n/128;
  int th = 128;
#pragma omp target
#pragma omp teams distribute simd num_teams(te) thread_limit(th)
  for(int i = 0; i < n; i++) {
    a[i] = (T)0;
  }
  return 0;
}

int main (int argc, char **argv) {
  int n = 100;
  int a[n];
#pragma omp target
#pragma omp teams distribute simd
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }
  return tmain<int, 10>(argc);
}

// CK4:  define {{.*}}i32 @@{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
// CK4:   call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
// CK4: call void @@[[OFFL1:.+]]({{.+}})
// CK4: {{%.+}} = call{{.*}} i32 @@[[TMAIN:.+]]({{.+}})
// CK4:  ret

// CK4:  define {{.*}}void @@[[OFFL1]]({{.+}})
// CK4: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
// CK4: ret void

// CK4: define internal void @@[[OUTL1]]({{.+}})
// CK4: call void @@__kmpc_for_static_init_4(
// CK4: call void @@__kmpc_for_static_fini(
// CK4: ret void

// CK4:  define {{.*}}i32 @@[[TMAIN]]({{.+}})
// CK4:   call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
// CK4: call void @@[[OFFLT:.+]]({{.+}})
// CK4:  ret
// CK4-NEXT: }

// CK4: define {{.*}}void @@[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}})
// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}},
// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}},
// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
// CK4: {{%.+}} = call i32 @@__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
// CK4: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @@[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}})
// CK4: ret void

// CK4: define internal void @@[[OUTLT]]({{.+}})
// CK4: call void @@__kmpc_for_static_init_4(
// CK4: call void @@__kmpc_for_static_fini(
// CK4: ret void

// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK4
#endif

@


1.1.1.1.4.3
log
@Mostly merge changes from HEAD upto 20200411
@
text
@@


1.1.1.1.2.1
log
@file teams_distribute_simd_codegen.cpp was added on branch pgoyette-compat on 2018-07-28 04:34:08 +0000
@
text
@d1 283
@


1.1.1.1.2.2
log
@Sync with HEAD
@
text
@a0 283
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// Test host codegen.
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32

// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
#ifdef CK1

int a[100];

// CK1: define {{.*}}i32 @@{{.+}}teams_argument_globali(
int teams_argument_global(int n) {
  int i;
  int te = n / 128;
  int th = 128;
  // discard n_addr and i
  // CK1: alloca i32,
  // CK1: alloca i32,
  // CK1: [[TE:%.+]] = alloca i32,
  // CK1: [[TH:%.+]] = alloca i32,
  // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
  // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
  // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
  // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],

  // CK1: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})

  // CK1: call void @@[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
  #pragma omp target
  #pragma omp teams distribute simd num_teams(te), thread_limit(th) aligned(a) simdlen(16) linear(i)
  for(i = 0; i < n; i++) {
    a[i] = 0;
  }

  // CK1: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK1: call void @@[[OFFL2:.+]](i{{64|32}} %{{.+}})
  #pragma omp target
  {{{
  #pragma omp teams distribute simd safelen(32)
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }
  }}}

  // outlined target regions
  // CK1: define internal void @@[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], [100 x i{{32|64}}]* {{.+}}, i{{32|64}} {{.+}}, {{.+}})
  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
  // CK1: {{%.+}} = call i32 @@__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
  // CK1: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK1: ret void

  // CK1: define internal void @@[[OUTL1]]({{.+}})
  // CK1: call void @@__kmpc_for_static_init_4(
  // CK1: call void @@__kmpc_for_static_fini(
  // CK1: ret void

  // CK1: define internal void @@[[OFFL2]]({{.+}}, {{.+}})
  // CK1: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @@[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK1: ret void

  // CK1: define internal void @@[[OUTL2]]({{.+}})
  // CK1: call void @@__kmpc_for_static_init_4(
  // CK1: call void @@__kmpc_for_static_fini(
  // CK1: ret void

  return a[0];
}

// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 16}
// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 32}

#endif // CK1

// Test host codegen.
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32

// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
#ifdef CK2

// CK2: define {{.*}}i32 @@{{.+}}teams_local_argv(
int teams_local_arg(void) {
  int n = 100;
  int a[n];

  // CK2: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK2: call void @@[[OFFL1:.+]](i{{64|32}} %{{.+}})
  #pragma omp target
  #pragma omp teams distribute simd
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }

  // outlined target region
  // CK2: define internal void @@[[OFFL1]]({{.+}}, {{.+}})
  // CK2: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK2: ret void

  // CK2: define internal void @@[[OUTL1]]({{.+}})
  // CK2: call void @@__kmpc_for_static_init_4(
  // CK2: call void @@__kmpc_for_static_fini(
  // CK2: ret void  

  return a[0];
}
// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK2

// Test host codegen.
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32

// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
#ifdef CK3

// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }

template <typename T, int X, long long Y>
struct SS{
  T a[X];
  float b;
  // CK3: define {{.*}}i32 @@{{.+}}foo{{.+}}(
  int foo(void) {

  // CK3: call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
  // CK3: call void @@[[OFFL1:.+]]([[SSI]]* %{{.+}})
    #pragma omp target
    #pragma omp teams distribute simd
    for(int i = 0; i < X; i++) {
      a[i] = (T)0;
    }

      // outlined target region
  // CK3: define internal void @@[[OFFL1]]([[SSI]]* {{.+}})
  // CK3: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
  // CK3: ret void

  // CK3: define internal void @@[[OUTL1]]({{.+}})
  // CK3: call void @@__kmpc_for_static_init_4(
  // CK3: call void @@__kmpc_for_static_fini(
  // CK3: ret void  

    return a[0];
  }
};

int teams_template_struct(void) {
  SS<int, 123, 456> V;
  return V.foo();

}
// CK3: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK3

// Test host codegen.
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32

// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}

#ifdef CK4

template <typename T, int n>
int tmain(T argc) {
  T a[n];
  int te = n/128;
  int th = 128;
#pragma omp target
#pragma omp teams distribute simd num_teams(te) thread_limit(th)
  for(int i = 0; i < n; i++) {
    a[i] = (T)0;
  }
  return 0;
}

int main (int argc, char **argv) {
  int n = 100;
  int a[n];
#pragma omp target
#pragma omp teams distribute simd
  for(int i = 0; i < n; i++) {
    a[i] = 0;
  }
  return tmain<int, 10>(argc);
}

// CK4:  define {{.*}}i32 @@{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
// CK4:   call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
// CK4: call void @@[[OFFL1:.+]]({{.+}})
// CK4: {{%.+}} = call{{.*}} i32 @@[[TMAIN:.+]]({{.+}})
// CK4:  ret

// CK4:  define {{.*}}void @@[[OFFL1]]({{.+}})
// CK4: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @@[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
// CK4: ret void

// CK4: define internal void @@[[OUTL1]]({{.+}})
// CK4: call void @@__kmpc_for_static_init_4(
// CK4: call void @@__kmpc_for_static_fini(
// CK4: ret void

// CK4:  define {{.*}}i32 @@[[TMAIN]]({{.+}})
// CK4:   call i32 @@__tgt_target_teams(i64 -1, i8* @@{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
// CK4: call void @@[[OFFLT:.+]]({{.+}})
// CK4:  ret
// CK4-NEXT: }

// CK4: define {{.*}}void @@[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}})
// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}},
// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}},
// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
// CK4: {{%.+}} = call i32 @@__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
// CK4: call void {{.+}} @@__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @@[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}})
// CK4: ret void

// CK4: define internal void @@[[OUTLT]]({{.+}})
// CK4: call void @@__kmpc_for_static_init_4(
// CK4: call void @@__kmpc_for_static_fini(
// CK4: ret void

// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
#endif // CK4
#endif

@


