head	1.1;
branch	1.1.1;
access;
symbols
	netbsd-11-0-RC4:1.1.1.12
	netbsd-11-0-RC3:1.1.1.12
	netbsd-11-0-RC2:1.1.1.12
	netbsd-11-0-RC1:1.1.1.12
	perseant-exfatfs-base-20250801:1.1.1.12
	netbsd-11:1.1.1.12.0.10
	netbsd-11-base:1.1.1.12
	netbsd-10-1-RELEASE:1.1.1.12
	perseant-exfatfs-base-20240630:1.1.1.12
	perseant-exfatfs:1.1.1.12.0.8
	perseant-exfatfs-base:1.1.1.12
	netbsd-8-3-RELEASE:1.1.1.10
	netbsd-9-4-RELEASE:1.1.1.11
	netbsd-10-0-RELEASE:1.1.1.12
	netbsd-10-0-RC6:1.1.1.12
	netbsd-10-0-RC5:1.1.1.12
	netbsd-10-0-RC4:1.1.1.12
	netbsd-10-0-RC3:1.1.1.12
	netbsd-10-0-RC2:1.1.1.12
	netbsd-10-0-RC1:1.1.1.12
	netbsd-10:1.1.1.12.0.6
	netbsd-10-base:1.1.1.12
	netbsd-9-3-RELEASE:1.1.1.11
	cjep_sun2x:1.1.1.12.0.4
	cjep_sun2x-base:1.1.1.12
	cjep_staticlib_x-base1:1.1.1.12
	netbsd-9-2-RELEASE:1.1.1.11
	cjep_staticlib_x:1.1.1.12.0.2
	cjep_staticlib_x-base:1.1.1.12
	netbsd-9-1-RELEASE:1.1.1.11
	phil-wifi-20200421:1.1.1.12
	phil-wifi-20200411:1.1.1.12
	phil-wifi-20200406:1.1.1.12
	netbsd-8-2-RELEASE:1.1.1.10
	netbsd-9-0-RELEASE:1.1.1.11
	netbsd-9-0-RC2:1.1.1.11
	netbsd-9-0-RC1:1.1.1.11
	netbsd-9:1.1.1.11.0.2
	netbsd-9-base:1.1.1.11
	phil-wifi-20190609:1.1.1.11
	netbsd-8-1-RELEASE:1.1.1.10
	netbsd-8-1-RC1:1.1.1.10
	pgoyette-compat-merge-20190127:1.1.1.10.12.1
	pgoyette-compat-20190127:1.1.1.11
	pgoyette-compat-20190118:1.1.1.11
	pgoyette-compat-1226:1.1.1.11
	pgoyette-compat-1126:1.1.1.11
	pgoyette-compat-1020:1.1.1.11
	pgoyette-compat-0930:1.1.1.11
	pgoyette-compat-0906:1.1.1.11
	netbsd-7-2-RELEASE:1.1.1.7.2.1
	pgoyette-compat-0728:1.1.1.11
	clang-337282:1.1.1.11
	netbsd-8-0-RELEASE:1.1.1.10
	phil-wifi:1.1.1.10.0.14
	phil-wifi-base:1.1.1.10
	pgoyette-compat-0625:1.1.1.10
	netbsd-8-0-RC2:1.1.1.10
	pgoyette-compat-0521:1.1.1.10
	pgoyette-compat-0502:1.1.1.10
	pgoyette-compat-0422:1.1.1.10
	netbsd-8-0-RC1:1.1.1.10
	pgoyette-compat-0415:1.1.1.10
	pgoyette-compat-0407:1.1.1.10
	pgoyette-compat-0330:1.1.1.10
	pgoyette-compat-0322:1.1.1.10
	pgoyette-compat-0315:1.1.1.10
	netbsd-7-1-2-RELEASE:1.1.1.7.2.1
	pgoyette-compat:1.1.1.10.0.12
	pgoyette-compat-base:1.1.1.10
	netbsd-7-1-1-RELEASE:1.1.1.7.2.1
	clang-319952:1.1.1.10
	matt-nb8-mediatek:1.1.1.10.0.10
	matt-nb8-mediatek-base:1.1.1.10
	clang-309604:1.1.1.10
	perseant-stdc-iso10646:1.1.1.10.0.8
	perseant-stdc-iso10646-base:1.1.1.10
	netbsd-8:1.1.1.10.0.6
	netbsd-8-base:1.1.1.10
	prg-localcount2-base3:1.1.1.10
	prg-localcount2-base2:1.1.1.10
	prg-localcount2-base1:1.1.1.10
	prg-localcount2:1.1.1.10.0.4
	prg-localcount2-base:1.1.1.10
	pgoyette-localcount-20170426:1.1.1.10
	bouyer-socketcan-base1:1.1.1.10
	pgoyette-localcount-20170320:1.1.1.10
	netbsd-7-1:1.1.1.7.2.1.0.6
	netbsd-7-1-RELEASE:1.1.1.7.2.1
	netbsd-7-1-RC2:1.1.1.7.2.1
	clang-294123:1.1.1.10
	netbsd-7-nhusb-base-20170116:1.1.1.7.2.1
	bouyer-socketcan:1.1.1.10.0.2
	bouyer-socketcan-base:1.1.1.10
	clang-291444:1.1.1.10
	pgoyette-localcount-20170107:1.1.1.9
	netbsd-7-1-RC1:1.1.1.7.2.1
	pgoyette-localcount-20161104:1.1.1.9
	netbsd-7-0-2-RELEASE:1.1.1.7.2.1
	localcount-20160914:1.1.1.9
	netbsd-7-nhusb:1.1.1.7.2.1.0.4
	netbsd-7-nhusb-base:1.1.1.7.2.1
	clang-280599:1.1.1.9
	pgoyette-localcount-20160806:1.1.1.9
	pgoyette-localcount-20160726:1.1.1.9
	pgoyette-localcount:1.1.1.9.0.2
	pgoyette-localcount-base:1.1.1.9
	netbsd-7-0-1-RELEASE:1.1.1.7.2.1
	clang-261930:1.1.1.9
	netbsd-7-0:1.1.1.7.2.1.0.2
	netbsd-7-0-RELEASE:1.1.1.7.2.1
	netbsd-7-0-RC3:1.1.1.7.2.1
	netbsd-7-0-RC2:1.1.1.7.2.1
	netbsd-7-0-RC1:1.1.1.7.2.1
	clang-237755:1.1.1.8
	clang-232565:1.1.1.8
	clang-227398:1.1.1.8
	tls-maxphys-base:1.1.1.7
	tls-maxphys:1.1.1.7.0.4
	netbsd-7:1.1.1.7.0.2
	netbsd-7-base:1.1.1.7
	clang-215315:1.1.1.7
	clang-209886:1.1.1.6
	yamt-pagecache:1.1.1.5.0.4
	yamt-pagecache-base9:1.1.1.5
	tls-earlyentropy:1.1.1.5.0.2
	tls-earlyentropy-base:1.1.1.6
	riastradh-xf86-video-intel-2-7-1-pre-2-21-15:1.1.1.5
	riastradh-drm2-base3:1.1.1.5
	clang-202566:1.1.1.5
	clang-201163:1.1.1.4
	clang-199312:1.1.1.3
	clang-198450:1.1.1.3
	clang-196603:1.1.1.2
	clang-195771:1.1.1.1
	LLVM:1.1.1;
locks; strict;
comment	@// @;


1.1
date	2013.11.28.14.14.48;	author joerg;	state Exp;
branches
	1.1.1.1;
next	;
commitid	ow8OybrawrB1f3fx;

1.1.1.1
date	2013.11.28.14.14.48;	author joerg;	state Exp;
branches;
next	1.1.1.2;
commitid	ow8OybrawrB1f3fx;

1.1.1.2
date	2013.12.06.23.16.56;	author joerg;	state Exp;
branches;
next	1.1.1.3;
commitid	oIJdta8D25nvY7gx;

1.1.1.3
date	2014.01.05.15.37.50;	author joerg;	state Exp;
branches;
next	1.1.1.4;
commitid	wh3aCSIWykURqWjx;

1.1.1.4
date	2014.02.14.20.07.01;	author joerg;	state Exp;
branches;
next	1.1.1.5;
commitid	annVkZ1sc17rF6px;

1.1.1.5
date	2014.03.04.19.53.17;	author joerg;	state Exp;
branches
	1.1.1.5.2.1
	1.1.1.5.4.1;
next	1.1.1.6;
commitid	29z1hJonZISIXprx;

1.1.1.6
date	2014.05.30.18.14.50;	author joerg;	state Exp;
branches;
next	1.1.1.7;
commitid	8q0kdlBlCn09GACx;

1.1.1.7
date	2014.08.10.17.08.25;	author joerg;	state Exp;
branches
	1.1.1.7.2.1
	1.1.1.7.4.1;
next	1.1.1.8;
commitid	N85tXAN6Ex9VZPLx;

1.1.1.8
date	2015.01.29.19.57.27;	author joerg;	state Exp;
branches;
next	1.1.1.9;
commitid	mlISSizlPKvepX7y;

1.1.1.9
date	2016.02.27.22.11.00;	author joerg;	state Exp;
branches
	1.1.1.9.2.1;
next	1.1.1.10;
commitid	tIimz3oDlh1NpBWy;

1.1.1.10
date	2017.01.11.10.33.12;	author joerg;	state Exp;
branches
	1.1.1.10.12.1
	1.1.1.10.14.1;
next	1.1.1.11;
commitid	CNnUNfII1jgNmxBz;

1.1.1.11
date	2018.07.17.18.31.59;	author joerg;	state Exp;
branches;
next	1.1.1.12;
commitid	wDzL46ALjrCZgwKA;

1.1.1.12
date	2019.11.13.22.23.13;	author joerg;	state dead;
branches;
next	;
commitid	QD8YATxuNG34YJKB;

1.1.1.5.2.1
date	2014.08.10.07.08.26;	author tls;	state Exp;
branches;
next	;
commitid	t01A1TLTYxkpGMLx;

1.1.1.5.4.1
date	2014.03.04.19.53.17;	author yamt;	state dead;
branches;
next	1.1.1.5.4.2;
commitid	WSrDtL5nYAUyiyBx;

1.1.1.5.4.2
date	2014.05.22.16.19.50;	author yamt;	state Exp;
branches;
next	;
commitid	WSrDtL5nYAUyiyBx;

1.1.1.7.2.1
date	2015.06.04.20.04.47;	author snj;	state Exp;
branches;
next	;
commitid	yRnjq9fueSo6n9oy;

1.1.1.7.4.1
date	2014.08.10.17.08.25;	author tls;	state dead;
branches;
next	1.1.1.7.4.2;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.7.4.2
date	2014.08.19.23.49.29;	author tls;	state Exp;
branches;
next	;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.9.2.1
date	2017.03.20.06.53.40;	author pgoyette;	state Exp;
branches;
next	;
commitid	jjw7cAwgyKq7RfKz;

1.1.1.10.12.1
date	2018.07.28.04.34.20;	author pgoyette;	state Exp;
branches;
next	;
commitid	1UP1xAIUxv1ZgRLA;

1.1.1.10.14.1
date	2019.06.10.21.46.48;	author christos;	state Exp;
branches;
next	1.1.1.10.14.2;
commitid	jtc8rnCzWiEEHGqB;

1.1.1.10.14.2
date	2020.04.13.07.50.41;	author martin;	state dead;
branches;
next	;
commitid	X01YhRUPVUDaec4C;


desc
@@


1.1
log
@Initial revision
@
text
@//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend is responsible for emitting arm_neon.h, which includes
// a declaration and definition of each function specified by the ARM NEON
// compiler interface.  See ARM document DUI0348B.
//
// Each NEON instruction is implemented in terms of 1 or more functions which
// are suffixed with the element type of the input vectors.  Functions may be
// implemented in terms of generic vector operations such as +, *, -, etc. or
// by calling a __builtin_-prefixed function which will be handled by clang's
// CodeGen library.
//
// Additional validation code can be generated by this file when runHeader() is
// called, rather than the normal run() entry point.  A complete set of tests
// for Neon intrinsics can be generated by calling the runTests() entry point.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <string>
using namespace llvm;

enum OpKind {
  OpNone,
  OpUnavailable,
  OpAdd,
  OpAddl,
  OpAddlHi,
  OpAddw,
  OpAddwHi,
  OpSub,
  OpSubl,
  OpSublHi,
  OpSubw,
  OpSubwHi,
  OpMul,
  OpMla,
  OpMlal,
  OpMullHi,
  OpMlalHi,
  OpMls,
  OpMlsl,
  OpMlslHi,
  OpMulN,
  OpMlaN,
  OpMlsN,
  OpMlalN,
  OpMlslN,
  OpMulLane,
  OpMulXLane,
  OpMullLane,
  OpMullHiLane,
  OpMlaLane,
  OpMlsLane,
  OpMlalLane,
  OpMlalHiLane,
  OpMlslLane,
  OpMlslHiLane,
  OpQDMullLane,
  OpQDMullHiLane,
  OpQDMlalLane,
  OpQDMlalHiLane,
  OpQDMlslLane,
  OpQDMlslHiLane,
  OpQDMulhLane,
  OpQRDMulhLane,
  OpFMSLane,
  OpFMSLaneQ,
  OpTrn1,
  OpZip1,
  OpUzp1,
  OpTrn2,
  OpZip2,
  OpUzp2,
  OpEq,
  OpGe,
  OpLe,
  OpGt,
  OpLt,
  OpNeg,
  OpNot,
  OpAnd,
  OpOr,
  OpXor,
  OpAndNot,
  OpOrNot,
  OpCast,
  OpConcat,
  OpDup,
  OpDupLane,
  OpHi,
  OpLo,
  OpSelect,
  OpRev16,
  OpRev32,
  OpRev64,
  OpXtnHi,
  OpSqxtunHi,
  OpQxtnHi,
  OpFcvtnHi,
  OpFcvtlHi,
  OpFcvtxnHi,
  OpReinterpret,
  OpAddhnHi,
  OpRAddhnHi,
  OpSubhnHi,
  OpRSubhnHi,
  OpAbdl,
  OpAbdlHi,
  OpAba,
  OpAbal,
  OpAbalHi,
  OpQDMullHi,
  OpQDMlalHi,
  OpQDMlslHi,
  OpDiv,
  OpLongHi,
  OpNarrowHi,
  OpMovlHi,
  OpCopyLane,
  OpCopyQLane,
  OpCopyLaneQ,
  OpScalarMulLane,
  OpScalarMulLaneQ,
  OpScalarMulXLane,
  OpScalarMulXLaneQ,
  OpScalarVMulXLane,
  OpScalarVMulXLaneQ,
  OpScalarQDMullLane,
  OpScalarQDMullLaneQ,
  OpScalarQDMulHiLane,
  OpScalarQDMulHiLaneQ,
  OpScalarQRDMulHiLane,
  OpScalarQRDMulHiLaneQ
};

enum ClassKind {
  ClassNone,
  ClassI,           // generic integer instruction, e.g., "i8" suffix
  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
  ClassW,           // width-specific instruction, e.g., "8" suffix
  ClassB,           // bitcast arguments with enum argument to specify type
  ClassL,           // Logical instructions which are op instructions
                    // but we need to not emit any suffix for in our
                    // tests.
  ClassNoTest       // Instructions which we do not test since they are
                    // not TRUE instructions.
};

/// NeonTypeFlags - Flags to identify the types for overloaded Neon
/// builtins.  These must be kept in sync with the flags in
/// include/clang/Basic/TargetBuiltins.h.
namespace {
class NeonTypeFlags {
  enum {
    EltTypeMask = 0xf,
    UnsignedFlag = 0x10,
    QuadFlag = 0x20
  };
  uint32_t Flags;

public:
  enum EltType {
    Int8,
    Int16,
    Int32,
    Int64,
    Poly8,
    Poly16,
    Poly64,
    Float16,
    Float32,
    Float64
  };

  NeonTypeFlags(unsigned F) : Flags(F) {}
  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
    if (IsUnsigned)
      Flags |= UnsignedFlag;
    if (IsQuad)
      Flags |= QuadFlag;
  }

  uint32_t getFlags() const { return Flags; }
};
} // end anonymous namespace

namespace {
class NeonEmitter {
  RecordKeeper &Records;
  StringMap<OpKind> OpMap;
  DenseMap<Record*, ClassKind> ClassMap;

public:
  NeonEmitter(RecordKeeper &R) : Records(R) {
    OpMap["OP_NONE"]  = OpNone;
    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
    OpMap["OP_ADD"]   = OpAdd;
    OpMap["OP_ADDL"]  = OpAddl;
    OpMap["OP_ADDLHi"] = OpAddlHi;
    OpMap["OP_ADDW"]  = OpAddw;
    OpMap["OP_ADDWHi"] = OpAddwHi;
    OpMap["OP_SUB"]   = OpSub;
    OpMap["OP_SUBL"]  = OpSubl;
    OpMap["OP_SUBLHi"] = OpSublHi;
    OpMap["OP_SUBW"]  = OpSubw;
    OpMap["OP_SUBWHi"] = OpSubwHi;
    OpMap["OP_MUL"]   = OpMul;
    OpMap["OP_MLA"]   = OpMla;
    OpMap["OP_MLAL"]  = OpMlal;
    OpMap["OP_MULLHi"]  = OpMullHi;
    OpMap["OP_MLALHi"]  = OpMlalHi;
    OpMap["OP_MLS"]   = OpMls;
    OpMap["OP_MLSL"]  = OpMlsl;
    OpMap["OP_MLSLHi"] = OpMlslHi;
    OpMap["OP_MUL_N"] = OpMulN;
    OpMap["OP_MLA_N"] = OpMlaN;
    OpMap["OP_MLS_N"] = OpMlsN;
    OpMap["OP_MLAL_N"] = OpMlalN;
    OpMap["OP_MLSL_N"] = OpMlslN;
    OpMap["OP_MUL_LN"]= OpMulLane;
    OpMap["OP_MULX_LN"]= OpMulXLane;
    OpMap["OP_MULL_LN"] = OpMullLane;
    OpMap["OP_MULLHi_LN"] = OpMullHiLane;
    OpMap["OP_MLA_LN"]= OpMlaLane;
    OpMap["OP_MLS_LN"]= OpMlsLane;
    OpMap["OP_MLAL_LN"] = OpMlalLane;
    OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
    OpMap["OP_MLSL_LN"] = OpMlslLane;
    OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
    OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
    OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
    OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
    OpMap["OP_FMS_LN"] = OpFMSLane;
    OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
    OpMap["OP_TRN1"]  = OpTrn1;
    OpMap["OP_ZIP1"]  = OpZip1;
    OpMap["OP_UZP1"]  = OpUzp1;
    OpMap["OP_TRN2"]  = OpTrn2;
    OpMap["OP_ZIP2"]  = OpZip2;
    OpMap["OP_UZP2"]  = OpUzp2;
    OpMap["OP_EQ"]    = OpEq;
    OpMap["OP_GE"]    = OpGe;
    OpMap["OP_LE"]    = OpLe;
    OpMap["OP_GT"]    = OpGt;
    OpMap["OP_LT"]    = OpLt;
    OpMap["OP_NEG"]   = OpNeg;
    OpMap["OP_NOT"]   = OpNot;
    OpMap["OP_AND"]   = OpAnd;
    OpMap["OP_OR"]    = OpOr;
    OpMap["OP_XOR"]   = OpXor;
    OpMap["OP_ANDN"]  = OpAndNot;
    OpMap["OP_ORN"]   = OpOrNot;
    OpMap["OP_CAST"]  = OpCast;
    OpMap["OP_CONC"]  = OpConcat;
    OpMap["OP_HI"]    = OpHi;
    OpMap["OP_LO"]    = OpLo;
    OpMap["OP_DUP"]   = OpDup;
    OpMap["OP_DUP_LN"] = OpDupLane;
    OpMap["OP_SEL"]   = OpSelect;
    OpMap["OP_REV16"] = OpRev16;
    OpMap["OP_REV32"] = OpRev32;
    OpMap["OP_REV64"] = OpRev64;
    OpMap["OP_XTN"] = OpXtnHi;
    OpMap["OP_SQXTUN"] = OpSqxtunHi;
    OpMap["OP_QXTN"] = OpQxtnHi;
    OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
    OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
    OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
    OpMap["OP_REINT"] = OpReinterpret;
    OpMap["OP_ADDHNHi"] = OpAddhnHi;
    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
    OpMap["OP_SUBHNHi"] = OpSubhnHi;
    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
    OpMap["OP_ABDL"]  = OpAbdl;
    OpMap["OP_ABDLHi"] = OpAbdlHi;
    OpMap["OP_ABA"]   = OpAba;
    OpMap["OP_ABAL"]  = OpAbal;
    OpMap["OP_ABALHi"] = OpAbalHi;
    OpMap["OP_QDMULLHi"] = OpQDMullHi;
    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
    OpMap["OP_DIV"] = OpDiv;
    OpMap["OP_LONG_HI"] = OpLongHi;
    OpMap["OP_NARROW_HI"] = OpNarrowHi;
    OpMap["OP_MOVL_HI"] = OpMovlHi;
    OpMap["OP_COPY_LN"] = OpCopyLane;
    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
    OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
    OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
    OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
    OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
    OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
    OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
    OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
    OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
    OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
    OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
    OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
    OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;


    Record *SI = R.getClass("SInst");
    Record *II = R.getClass("IInst");
    Record *WI = R.getClass("WInst");
    Record *SOpI = R.getClass("SOpInst");
    Record *IOpI = R.getClass("IOpInst");
    Record *WOpI = R.getClass("WOpInst");
    Record *LOpI = R.getClass("LOpInst");
    Record *NoTestOpI = R.getClass("NoTestOpInst");

    ClassMap[SI] = ClassS;
    ClassMap[II] = ClassI;
    ClassMap[WI] = ClassW;
    ClassMap[SOpI] = ClassS;
    ClassMap[IOpI] = ClassI;
    ClassMap[WOpI] = ClassW;
    ClassMap[LOpI] = ClassL;
    ClassMap[NoTestOpI] = ClassNoTest;
  }

  // run - Emit arm_neon.h.inc
  void run(raw_ostream &o);

  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
  void runHeader(raw_ostream &o);

  // runTests - Emit tests for all the Neon intrinsics.
  void runTests(raw_ostream &o);

private:
  void emitIntrinsic(raw_ostream &OS, Record *R,
                     StringMap<ClassKind> &EmittedMap);
  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
                      bool isA64GenBuiltinDef);
  void genOverloadTypeCheckCode(raw_ostream &OS,
                                StringMap<ClassKind> &A64IntrinsicMap,
                                bool isA64TypeCheck);
  void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                  StringMap<ClassKind> &A64IntrinsicMap,
                                  bool isA64RangeCheck);
  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
                     bool isA64TestGen);
};
} // end anonymous namespace

/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
/// which each StringRef representing a single type declared in the string.
/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
/// 2xfloat and 4xfloat respectively.
static void ParseTypes(Record *r, std::string &s,
                       SmallVectorImpl<StringRef> &TV) {
  const char *data = s.data();
  int len = 0;

  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
                         || data[len] == 'H' || data[len] == 'S')
      continue;

    switch (data[len]) {
      case 'c':
      case 's':
      case 'i':
      case 'l':
      case 'h':
      case 'f':
      case 'd':
        break;
      default:
        PrintFatalError(r->getLoc(),
                      "Unexpected letter: " + std::string(data + len, 1));
    }
    TV.push_back(StringRef(data, len + 1));
    data += len + 1;
    len = -1;
  }
}

/// Widen - Convert a type code into the next wider type.  char -> short,
/// short -> int, etc.
static char Widen(const char t) {
  switch (t) {
    case 'c':
      return 's';
    case 's':
      return 'i';
    case 'i':
      return 'l';
    case 'h':
      return 'f';
    case 'f':
      return 'd';
    default:
      PrintFatalError("unhandled type in widen!");
  }
}

/// Narrow - Convert a type code into the next smaller type.  short -> char,
/// float -> half float, etc.
static char Narrow(const char t) {
  switch (t) {
    case 's':
      return 'c';
    case 'i':
      return 's';
    case 'l':
      return 'i';
    case 'f':
      return 'h';
    case 'd':
      return 'f';
    default:
      PrintFatalError("unhandled type in narrow!");
  }
}

static std::string GetNarrowTypestr(StringRef ty)
{
  std::string s;
  for (size_t i = 0, end = ty.size(); i < end; i++) {
    switch (ty[i]) {
      case 's':
        s += 'c';
        break;
      case 'i':
        s += 's';
        break;
      case 'l':
        s += 'i';
        break;
      default:
        s += ty[i];
        break;
    }
  }

  return s;
}

/// For a particular StringRef, return the base type code, and whether it has
/// the quad-vector, polynomial, or unsigned modifiers set.
static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
  unsigned off = 0;
  // ignore scalar.
  if (ty[off] == 'S') {
    ++off;
  }
  // remember quad.
  if (ty[off] == 'Q' || ty[off] == 'H') {
    quad = true;
    ++off;
  }

  // remember poly.
  if (ty[off] == 'P') {
    poly = true;
    ++off;
  }

  // remember unsigned.
  if (ty[off] == 'U') {
    usgn = true;
    ++off;
  }

  // base type to get the type string for.
  return ty[off];
}

/// ModType - Transform a type code and its modifiers based on a mod code. The
/// mod code definitions may be found at the top of arm_neon.td.
static char ModType(const char mod, char type, bool &quad, bool &poly,
                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
  switch (mod) {
    case 't':
      if (poly) {
        poly = false;
        usgn = true;
      }
      break;
    case 'b':
      scal = true;
    case 'u':
      usgn = true;
      poly = false;
      if (type == 'f')
        type = 'i';
      if (type == 'd')
        type = 'l';
      break;
    case '$':
      scal = true;
    case 'x':
      usgn = false;
      poly = false;
      if (type == 'f')
        type = 'i';
      if (type == 'd')
        type = 'l';
      break;
    case 'o':
      scal = true;
      type = 'd';
      usgn = false;
      break;
    case 'y':
      scal = true;
    case 'f':
      if (type == 'h')
        quad = true;
      type = 'f';
      usgn = false;
      break;
    case 'g':
      quad = false;
      break;
    case 'B':
    case 'C':
    case 'D':
    case 'j':
      quad = true;
      break;
    case 'w':
      type = Widen(type);
      quad = true;
      break;
    case 'n':
      type = Widen(type);
      break;
    case 'i':
      type = 'i';
      scal = true;
      break;
    case 'l':
      type = 'l';
      scal = true;
      usgn = true;
      break;
    case 'z':
      type = Narrow(type);
      scal = true;
      break;
    case 'r':
      type = Widen(type);
      scal = true;
      break;
    case 's':
    case 'a':
      scal = true;
      break;
    case 'k':
      quad = true;
      break;
    case 'c':
      cnst = true;
    case 'p':
      pntr = true;
      scal = true;
      break;
    case 'h':
      type = Narrow(type);
      if (type == 'h')
        quad = false;
      break;
    case 'q':
      type = Narrow(type);
      quad = true;
      break;
    case 'e':
      type = Narrow(type);
      usgn = true;
      break;
    case 'm':
      type = Narrow(type);
      quad = false;
      break;
    default:
      break;
  }
  return type;
}

static bool IsMultiVecProto(const char p) {
  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
}

/// TypeString - for a modifier and type, generate the name of the typedef for
/// that type.  QUc -> uint8x8_t.
static std::string TypeString(const char mod, StringRef typestr) {
  bool quad = false;
  bool poly = false;
  bool usgn = false;
  bool scal = false;
  bool cnst = false;
  bool pntr = false;

  if (mod == 'v')
    return "void";
  if (mod == 'i')
    return "int";

  // base type to get the type string for.
  char type = ClassifyType(typestr, quad, poly, usgn);

  // Based on the modifying character, change the type and width if necessary.
  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);

  SmallString<128> s;

  if (usgn)
    s.push_back('u');

  switch (type) {
    case 'c':
      s += poly ? "poly8" : "int8";
      if (scal)
        break;
      s += quad ? "x16" : "x8";
      break;
    case 's':
      s += poly ? "poly16" : "int16";
      if (scal)
        break;
      s += quad ? "x8" : "x4";
      break;
    case 'i':
      s += "int32";
      if (scal)
        break;
      s += quad ? "x4" : "x2";
      break;
    case 'l':
      s += (poly && !usgn)? "poly64" : "int64";
      if (scal)
        break;
      s += quad ? "x2" : "x1";
      break;
    case 'h':
      s += "float16";
      if (scal)
        break;
      s += quad ? "x8" : "x4";
      break;
    case 'f':
      s += "float32";
      if (scal)
        break;
      s += quad ? "x4" : "x2";
      break;
    case 'd':
      s += "float64";
      if (scal)
        break;
      s += quad ? "x2" : "x1";
      break;

    default:
      PrintFatalError("unhandled type!");
  }

  if (mod == '2' || mod == 'B')
    s += "x2";
  if (mod == '3' || mod == 'C')
    s += "x3";
  if (mod == '4' || mod == 'D')
    s += "x4";

  // Append _t, finishing the type string typedef type.
  s += "_t";

  if (cnst)
    s += " const";

  if (pntr)
    s += " *";

  return s.str();
}

/// BuiltinTypeString - for a modifier and type, generate the clang
/// BuiltinsARM.def prototype code for the function.  See the top of clang's
/// Builtins.def for a description of the type strings.
static std::string BuiltinTypeString(const char mod, StringRef typestr,
                                     ClassKind ck, bool ret) {
  bool quad = false;
  bool poly = false;
  bool usgn = false;
  bool scal = false;
  bool cnst = false;
  bool pntr = false;

  if (mod == 'v')
    return "v"; // void
  if (mod == 'i')
    return "i"; // int

  // base type to get the type string for.
  char type = ClassifyType(typestr, quad, poly, usgn);

  // Based on the modifying character, change the type and width if necessary.
  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);

  // All pointers are void* pointers.  Change type to 'v' now.
  if (pntr) {
    usgn = false;
    poly = false;
    type = 'v';
  }
  // Treat half-float ('h') types as unsigned short ('s') types.
  if (type == 'h') {
    type = 's';
    usgn = true;
  }
  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
                         scal && type != 'f' && type != 'd');

  if (scal) {
    SmallString<128> s;

    if (usgn)
      s.push_back('U');
    else if (type == 'c')
      s.push_back('S'); // make chars explicitly signed

    if (type == 'l') // 64-bit long
      s += "LLi";
    else
      s.push_back(type);

    if (cnst)
      s.push_back('C');
    if (pntr)
      s.push_back('*');
    return s.str();
  }

  // Since the return value must be one type, return a vector type of the
  // appropriate width which we will bitcast.  An exception is made for
  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
  // fashion, storing them to a pointer arg.
  if (ret) {
    if (IsMultiVecProto(mod))
      return "vv*"; // void result with void* first argument
    if (mod == 'f' || (ck != ClassB && type == 'f'))
      return quad ? "V4f" : "V2f";
    if (ck != ClassB && type == 'd')
      return quad ? "V2d" : "V1d";
    if (ck != ClassB && type == 's')
      return quad ? "V8s" : "V4s";
    if (ck != ClassB && type == 'i')
      return quad ? "V4i" : "V2i";
    if (ck != ClassB && type == 'l')
      return quad ? "V2LLi" : "V1LLi";

    return quad ? "V16Sc" : "V8Sc";
  }

  // Non-return array types are passed as individual vectors.
  if (mod == '2' || mod == 'B')
    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
  if (mod == '3' || mod == 'C')
    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
  if (mod == '4' || mod == 'D')
    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";

  if (mod == 'f' || (ck != ClassB && type == 'f'))
    return quad ? "V4f" : "V2f";
  if (ck != ClassB && type == 'd')
    return quad ? "V2d" : "V1d";
  if (ck != ClassB && type == 's')
    return quad ? "V8s" : "V4s";
  if (ck != ClassB && type == 'i')
    return quad ? "V4i" : "V2i";
  if (ck != ClassB && type == 'l')
    return quad ? "V2LLi" : "V1LLi";

  return quad ? "V16Sc" : "V8Sc";
}

/// InstructionTypeCode - Computes the ARM argument character code and
/// quad status for a specific type string and ClassKind.
static void InstructionTypeCode(const StringRef &typeStr,
                                const ClassKind ck,
                                bool &quad,
                                std::string &typeCode) {
  bool poly = false;
  bool usgn = false;
  char type = ClassifyType(typeStr, quad, poly, usgn);

  switch (type) {
  case 'c':
    switch (ck) {
    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
    case ClassI: typeCode = "i8"; break;
    case ClassW: typeCode = "8"; break;
    default: break;
    }
    break;
  case 's':
    switch (ck) {
    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
    case ClassI: typeCode = "i16"; break;
    case ClassW: typeCode = "16"; break;
    default: break;
    }
    break;
  case 'i':
    switch (ck) {
    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
    case ClassI: typeCode = "i32"; break;
    case ClassW: typeCode = "32"; break;
    default: break;
    }
    break;
  case 'l':
    switch (ck) {
    case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
    case ClassI: typeCode = "i64"; break;
    case ClassW: typeCode = "64"; break;
    default: break;
    }
    break;
  case 'h':
    switch (ck) {
    case ClassS:
    case ClassI: typeCode = "f16"; break;
    case ClassW: typeCode = "16"; break;
    default: break;
    }
    break;
  case 'f':
    switch (ck) {
    case ClassS:
    case ClassI: typeCode = "f32"; break;
    case ClassW: typeCode = "32"; break;
    default: break;
    }
    break;
  case 'd':
    switch (ck) {
    case ClassS:
    case ClassI:
      typeCode += "f64";
      break;
    case ClassW:
      PrintFatalError("unhandled type!");
    default:
      break;
    }
    break;
  default:
    PrintFatalError("unhandled type!");
  }
}

static char Insert_BHSD_Suffix(StringRef typestr){
  unsigned off = 0;
  if(typestr[off++] == 'S'){
    while(typestr[off] == 'Q' || typestr[off] == 'H'||
          typestr[off] == 'P' || typestr[off] == 'U')
      ++off;
    switch (typestr[off]){
    default  : break;
    case 'c' : return 'b';
    case 's' : return 'h';
    case 'i' :
    case 'f' : return 's';
    case 'l' :
    case 'd' : return 'd';
    }
  }
  return 0;
}

static bool endsWith_xN(std::string const &name) {
  if (name.length() > 3) {
    if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
        name.compare(name.length() - 3, 3, "_x3") == 0 ||
        name.compare(name.length() - 3, 3, "_x4") == 0)
      return true;
  }
  return false;
}

/// MangleName - Append a type or width suffix to a base neon function name,
/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
static std::string MangleName(const std::string &name, StringRef typestr,
                              ClassKind ck) {
  if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64")
    return name;

  bool quad = false;
  std::string typeCode = "";

  InstructionTypeCode(typestr, ck, quad, typeCode);

  std::string s = name;

  if (typeCode.size() > 0) {
    // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
    if (endsWith_xN(s))
      s.insert(s.length() - 3, "_" + typeCode);
    else
      s += "_" + typeCode;
  }

  if (ck == ClassB)
    s += "_v";

  // Insert a 'q' before the first '_' character so that it ends up before
  // _lane or _n on vector-scalar operations.
  if (typestr.find("Q") != StringRef::npos) {
      size_t pos = s.find('_');
      s = s.insert(pos, "q");
  }
  char ins = Insert_BHSD_Suffix(typestr);
  if(ins){
    size_t pos = s.find('_');
    s = s.insert(pos, &ins, 1);
  }

  return s;
}

static void PreprocessInstruction(const StringRef &Name,
                                  const std::string &InstName,
                                  std::string &Prefix,
                                  bool &HasNPostfix,
                                  bool &HasLanePostfix,
                                  bool &HasDupPostfix,
                                  bool &IsSpecialVCvt,
                                  size_t &TBNumber) {
  // All of our instruction name fields from arm_neon.td are of the form
  //   <instructionname>_...
  // Thus we grab our instruction name via computation of said Prefix.
  const size_t PrefixEnd = Name.find_first_of('_');
  // If InstName is passed in, we use that instead of our name Prefix.
  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;

  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());

  HasNPostfix = Postfix.count("_n");
  HasLanePostfix = Postfix.count("_lane");
  HasDupPostfix = Postfix.count("_dup");
  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");

  if (InstName.compare("vtbl") == 0 ||
      InstName.compare("vtbx") == 0) {
    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
    // encoding to get its true value.
    TBNumber = Name[Name.size()-1] - 48;
  }
}

/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
/// extracted, generate a FileCheck pattern for a Load Or Store
static void
GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
                                          const std::string& OutTypeCode,
                                          const bool &IsQuad,
                                          const bool &HasDupPostfix,
                                          const bool &HasLanePostfix,
                                          const size_t Count,
                                          std::string &RegisterSuffix) {
  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
  // will output a series of v{ld,st}1s, so we have to handle it specially.
  if ((Count == 3 || Count == 4) && IsQuad) {
    RegisterSuffix += "{";
    for (size_t i = 0; i < Count; i++) {
      RegisterSuffix += "d{{[0-9]+}}";
      if (HasDupPostfix) {
        RegisterSuffix += "[]";
      }
      if (HasLanePostfix) {
        RegisterSuffix += "[{{[0-9]+}}]";
      }
      if (i < Count-1) {
        RegisterSuffix += ", ";
      }
    }
    RegisterSuffix += "}";
  } else {

    // Handle normal loads and stores.
    RegisterSuffix += "{";
    for (size_t i = 0; i < Count; i++) {
      RegisterSuffix += "d{{[0-9]+}}";
      if (HasDupPostfix) {
        RegisterSuffix += "[]";
      }
      if (HasLanePostfix) {
        RegisterSuffix += "[{{[0-9]+}}]";
      }
      if (IsQuad && !HasLanePostfix) {
        RegisterSuffix += ", d{{[0-9]+}}";
        if (HasDupPostfix) {
          RegisterSuffix += "[]";
        }
      }
      if (i < Count-1) {
        RegisterSuffix += ", ";
      }
    }
    RegisterSuffix += "}, [r{{[0-9]+}}";

    // We only include the alignment hint if we have a vld1.*64 or
    // a dup/lane instruction.
    if (IsLDSTOne) {
      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
        RegisterSuffix += ":" + OutTypeCode;
      }
    }

    RegisterSuffix += "]";
  }
}

static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
                                     const bool &HasNPostfix) {
  return (NameRef.count("vmla") ||
          NameRef.count("vmlal") ||
          NameRef.count("vmlsl") ||
          NameRef.count("vmull") ||
          NameRef.count("vqdmlal") ||
          NameRef.count("vqdmlsl") ||
          NameRef.count("vqdmulh") ||
          NameRef.count("vqdmull") ||
          NameRef.count("vqrdmulh")) && HasNPostfix;
}

static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
                                         const bool &HasLanePostfix) {
  return (NameRef.count("vmla") ||
          NameRef.count("vmls") ||
          NameRef.count("vmlal") ||
          NameRef.count("vmlsl") ||
          (NameRef.count("vmul") && NameRef.size() == 3)||
          NameRef.count("vqdmlal") ||
          NameRef.count("vqdmlsl") ||
          NameRef.count("vqdmulh") ||
          NameRef.count("vqrdmulh")) && HasLanePostfix;
}

static bool IsSpecialLaneMultiply(const StringRef &NameRef,
                                  const bool &HasLanePostfix,
                                  const bool &IsQuad) {
  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
                               && IsQuad;
  const bool IsVMull = NameRef.count("mull") && !IsQuad;
  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
}

static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
                                                     const std::string &Proto,
                                                     const bool &HasNPostfix,
                                                     const bool &IsQuad,
                                                     const bool &HasLanePostfix,
                                                     const bool &HasDupPostfix,
                                                     std::string &NormedProto) {
  // Handle generic case.
  const StringRef NameRef(Name);
  for (size_t i = 0, end = Proto.size(); i < end; i++) {
    switch (Proto[i]) {
    case 'u':
    case 'f':
    case 'd':
    case 's':
    case 'x':
    case 't':
    case 'n':
      NormedProto += IsQuad? 'q' : 'd';
      break;
    case 'w':
    case 'k':
      NormedProto += 'q';
      break;
    case 'g':
    case 'j':
    case 'h':
    case 'e':
      NormedProto += 'd';
      break;
    case 'i':
      NormedProto += HasLanePostfix? 'a' : 'i';
      break;
    case 'a':
      if (HasLanePostfix) {
        NormedProto += 'a';
      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
        NormedProto += IsQuad? 'q' : 'd';
      } else {
        NormedProto += 'i';
      }
      break;
    }
  }

  // Handle Special Cases.
  const bool IsNotVExt = !NameRef.count("vext");
  const bool IsVPADAL = NameRef.count("vpadal");
  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
                                                           HasLanePostfix);
  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
                                                      IsQuad);

  if (IsSpecialLaneMul) {
    // If
    NormedProto[2] = NormedProto[3];
    NormedProto.erase(3);
  } else if (NormedProto.size() == 4 &&
             NormedProto[0] == NormedProto[1] &&
             IsNotVExt) {
    // If NormedProto.size() == 4 and the first two proto characters are the
    // same, ignore the first.
    NormedProto = NormedProto.substr(1, 3);
  } else if (Is5OpLaneAccum) {
    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
    std::string tmp = NormedProto.substr(1,2);
    tmp += NormedProto[4];
    NormedProto = tmp;
  } else if (IsVPADAL) {
    // If we have VPADAL, ignore the first character.
    NormedProto = NormedProto.substr(0, 2);
  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
    // If our instruction is a dup instruction, keep only the first and
    // last characters.
    std::string tmp = "";
    tmp += NormedProto[0];
    tmp += NormedProto[NormedProto.size()-1];
    NormedProto = tmp;
  }
}

/// GenerateRegisterCheckPatterns - Given a bunch of data we have
/// extracted, generate a FileCheck pattern to check that an
/// instruction's arguments are correct.
static void GenerateRegisterCheckPattern(const std::string &Name,
                                         const std::string &Proto,
                                         const std::string &OutTypeCode,
                                         const bool &HasNPostfix,
                                         const bool &IsQuad,
                                         const bool &HasLanePostfix,
                                         const bool &HasDupPostfix,
                                         const size_t &TBNumber,
                                         std::string &RegisterSuffix) {

  RegisterSuffix = "";

  const StringRef NameRef(Name);
  const StringRef ProtoRef(Proto);

  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
    return;
  }

  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");

  if (IsLoadStore) {
    // Grab N value from  v{ld,st}N using its ascii representation.
    const size_t Count = NameRef[3] - 48;

    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
                                              HasDupPostfix, HasLanePostfix,
                                              Count, RegisterSuffix);
  } else if (IsTBXOrTBL) {
    RegisterSuffix += "d{{[0-9]+}}, {";
    for (size_t i = 0; i < TBNumber-1; i++) {
      RegisterSuffix += "d{{[0-9]+}}, ";
    }
    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
  } else {
    // Handle a normal instruction.
    if (NameRef.count("vget") || NameRef.count("vset"))
      return;

    // We first normalize our proto, since we only need to emit 4
    // different types of checks, yet have more than 4 proto types
    // that map onto those 4 patterns.
    std::string NormalizedProto("");
    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
                                             HasLanePostfix, HasDupPostfix,
                                             NormalizedProto);

    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
      const char &c = NormalizedProto[i];
      switch (c) {
      case 'q':
        RegisterSuffix += "q{{[0-9]+}}, ";
        break;

      case 'd':
        RegisterSuffix += "d{{[0-9]+}}, ";
        break;

      case 'i':
        RegisterSuffix += "#{{[0-9]+}}, ";
        break;

      case 'a':
        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
        break;
      }
    }

    // Remove extra ", ".
    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
  }
}

/// GenerateChecksForIntrinsic - Given a specific instruction name +
/// typestr + class kind, generate the proper set of FileCheck
/// Patterns to check for. We could just return a string, but instead
/// use a vector since it provides us with the extra flexibility of
/// emitting multiple checks, which comes in handy for certain cases
/// like mla where we want to check for 2 different instructions.
static void GenerateChecksForIntrinsic(const std::string &Name,
                                       const std::string &Proto,
                                       StringRef &OutTypeStr,
                                       StringRef &InTypeStr,
                                       ClassKind Ck,
                                       const std::string &InstName,
                                       bool IsHiddenLOp,
                                       std::vector<std::string>& Result) {

  // If Ck is a ClassNoTest instruction, just return so no test is
  // emitted.
  if(Ck == ClassNoTest)
    return;

  if (Name == "vcvt_f32_f16") {
    Result.push_back("vcvt.f32.f16");
    return;
  }


  // Now we preprocess our instruction given the data we have to get the
  // data that we need.
  // Create a StringRef for String Manipulation of our Name.
  const StringRef NameRef(Name);
  // Instruction Prefix.
  std::string Prefix;
  // The type code for our out type string.
  std::string OutTypeCode;
  // To handle our different cases, we need to check for different postfixes.
  // Is our instruction a quad instruction.
  bool IsQuad = false;
  // Our instruction is of the form <instructionname>_n.
  bool HasNPostfix = false;
  // Our instruction is of the form <instructionname>_lane.
  bool HasLanePostfix = false;
  // Our instruction is of the form <instructionname>_dup.
  bool HasDupPostfix  = false;
  // Our instruction is a vcvt instruction which requires special handling.
  bool IsSpecialVCvt = false;
  // If we have a vtbxN or vtblN instruction, this is set to N.
  size_t TBNumber = -1;
  // Register Suffix
  std::string RegisterSuffix;

  PreprocessInstruction(NameRef, InstName, Prefix,
                        HasNPostfix, HasLanePostfix, HasDupPostfix,
                        IsSpecialVCvt, TBNumber);

  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
                               HasLanePostfix, HasDupPostfix, TBNumber,
                               RegisterSuffix);

  // In the following section, we handle a bunch of special cases. You can tell
  // a special case by the fact we are returning early.

  // If our instruction is a logical instruction without postfix or a
  // hidden LOp just return the current Prefix.
  if (Ck == ClassL || IsHiddenLOp) {
    Result.push_back(Prefix + " " + RegisterSuffix);
    return;
  }

  // If we have a vmov, due to the many different cases, some of which
  // vary within the different intrinsics generated for a single
  // instruction type, just output a vmov. (e.g. given an instruction
  // A, A.u32 might be vmov and A.u8 might be vmov.8).
  //
  // FIXME: Maybe something can be done about this. The two cases that we care
  // about are vmov as an LType and vmov as a WType.
  if (Prefix == "vmov") {
    Result.push_back(Prefix + " " + RegisterSuffix);
    return;
  }

  // In the following section, we handle special cases.

  if (OutTypeCode == "64") {
    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
    // type, the intrinsic will be optimized away, so just return
    // nothing.  On the other hand if we are handling an uint64x2_t
    // (i.e. quad instruction), vdup/vmov instructions should be
    // emitted.
    if (Prefix == "vdup" || Prefix == "vext") {
      if (IsQuad) {
        Result.push_back("{{vmov|vdup}}");
      }
      return;
    }

    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
    // multiple register operands.
    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
                            || Prefix == "vld4";
    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
                            || Prefix == "vst4";
    if (MultiLoadPrefix || MultiStorePrefix) {
      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
      return;
    }

    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
    // emitting said instructions. So return a check for
    // vldr/vstr/vmov/str instead.
    if (HasLanePostfix || HasDupPostfix) {
      if (Prefix == "vst1") {
        Result.push_back("{{str|vstr|vmov}}");
        return;
      } else if (Prefix == "vld1") {
        Result.push_back("{{ldr|vldr|vmov}}");
        return;
      }
    }
  }

  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
  // sometimes disassembled as vtrn.32. We use a regex to handle both
  // cases.
  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
    return;
  }

  // Currently on most ARM processors, we do not use vmla/vmls for
  // quad floating point operations. Instead we output vmul + vadd. So
  // check if we have one of those instructions and just output a
  // check for vmul.
  if (OutTypeCode == "f32") {
    if (Prefix == "vmls") {
      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
      Result.push_back("vsub." + OutTypeCode);
      return;
    } else if (Prefix == "vmla") {
      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
      Result.push_back("vadd." + OutTypeCode);
      return;
    }
  }

  // If we have vcvt, get the input type from the instruction name
  // (which should be of the form instname_inputtype) and append it
  // before the output type.
  if (Prefix == "vcvt") {
    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
    Prefix += "." + inTypeCode;
  }

  // Append output type code to get our final mangled instruction.
  Prefix += "." + OutTypeCode;

  Result.push_back(Prefix + " " + RegisterSuffix);
}

/// UseMacro - Examine the prototype string to determine if the intrinsic
/// should be defined as a preprocessor macro instead of an inline function.
static bool UseMacro(const std::string &proto) {
  // If this builtin takes an immediate argument, we need to #define it rather
  // than use a standard declaration, so that SemaChecking can range check
  // the immediate passed by the user.
  if (proto.find('i') != std::string::npos)
    return true;

  // Pointer arguments need to use macros to avoid hiding aligned attributes
  // from the pointer type.
  if (proto.find('p') != std::string::npos ||
      proto.find('c') != std::string::npos)
    return true;

  return false;
}

/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
/// defined as a macro should be accessed directly instead of being first
/// assigned to a local temporary.
static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
  // True for constant ints (i), pointers (p) and const pointers (c).
  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
}

// Generate the string "(argtype a, argtype b, ...)"
static std::string GenArgs(const std::string &proto, StringRef typestr,
                           const std::string &name) {
  bool define = UseMacro(proto);
  char arg = 'a';

  std::string s;
  s += "(";

  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    if (define) {
      // Some macro arguments are used directly instead of being assigned
      // to local temporaries; prepend an underscore prefix to make their
      // names consistent with the local temporaries.
      if (MacroArgUsedDirectly(proto, i))
        s += "__";
    } else {
      s += TypeString(proto[i], typestr) + " __";
    }
    s.push_back(arg);
    //To avoid argument being multiple defined, add extra number for renaming.
    if (name == "vcopy_lane" || name == "vcopy_laneq")
      s.push_back('1');
    if ((i + 1) < e)
      s += ", ";
  }

  s += ")";
  return s;
}

// Macro arguments are not type-checked like inline function arguments, so
// assign them to local temporaries to get the right type checking.
static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
                                  const std::string &name ) {
  char arg = 'a';
  std::string s;
  bool generatedLocal = false;

  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    // Do not create a temporary for an immediate argument.
    // That would defeat the whole point of using a macro!
    if (MacroArgUsedDirectly(proto, i))
      continue;
    generatedLocal = true;
    bool extranumber = false;
    if (name == "vcopy_lane" || name == "vcopy_laneq")
      extranumber = true;

    s += TypeString(proto[i], typestr) + " __";
    s.push_back(arg);
    if(extranumber)
      s.push_back('1');
    s += " = (";
    s.push_back(arg);
    if(extranumber)
      s.push_back('1');
    s += "); ";
  }

  if (generatedLocal)
    s += "\\\n  ";
  return s;
}

// Use the vmovl builtin to sign-extend or zero-extend a vector.
static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
  std::string s, high;
  high = h ? "_high" : "";
  s = MangleName("vmovl" + high, typestr, ClassS);
  s += "(" + a + ")";
  return s;
}

// Get the high 64-bit part of a vector
static std::string GetHigh(const std::string &a, StringRef typestr) {
  std::string s;
  s = MangleName("vget_high", typestr, ClassS);
  s += "(" + a + ")";
  return s;
}

// Gen operation with two operands and get high 64-bit for both of two operands.
static std::string Gen2OpWith2High(StringRef typestr,
                                   const std::string &op,
                                   const std::string &a,
                                   const std::string &b) {
  std::string s;
  std::string Op1 = GetHigh(a, typestr);
  std::string Op2 = GetHigh(b, typestr);
  s = MangleName(op, typestr, ClassS);
  s += "(" + Op1 + ", " + Op2 + ");";
  return s;
}

// Gen operation with three operands and get high 64-bit of the latter 
// two operands.
static std::string Gen3OpWith2High(StringRef typestr,
                                   const std::string &op,
                                   const std::string &a,
                                   const std::string &b,
                                   const std::string &c) {
  std::string s;
  std::string Op1 = GetHigh(b, typestr);
  std::string Op2 = GetHigh(c, typestr);
  s = MangleName(op, typestr, ClassS);
  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
  return s;
}

// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
static std::string GenCombine(std::string typestr,
                              const std::string &a,
                              const std::string &b) {
  std::string s;
  s = MangleName("vcombine", typestr, ClassS);
  s += "(" + a + ", " + b + ")";
  return s;
}

static std::string Duplicate(unsigned nElts, StringRef typestr,
                             const std::string &a) {
  std::string s;

  s = "(" + TypeString('d', typestr) + "){ ";
  for (unsigned i = 0; i != nElts; ++i) {
    s += a;
    if ((i + 1) < nElts)
      s += ", ";
  }
  s += " }";

  return s;
}

static std::string SplatLane(unsigned nElts, const std::string &vec,
                             const std::string &lane) {
  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
  for (unsigned i = 0; i < nElts; ++i)
    s += ", " + lane;
  s += ")";
  return s;
}

static std::string RemoveHigh(const std::string &name) {
  std::string s = name;
  std::size_t found = s.find("_high_");
  if (found == std::string::npos)
    PrintFatalError("name should contain \"_high_\" for high intrinsics");
  s.replace(found, 5, "");
  return s;
}

static unsigned GetNumElements(StringRef typestr, bool &quad) {
  quad = false;
  bool dummy = false;
  char type = ClassifyType(typestr, quad, dummy, dummy);
  unsigned nElts = 0;
  switch (type) {
  case 'c': nElts = 8; break;
  case 's': nElts = 4; break;
  case 'i': nElts = 2; break;
  case 'l': nElts = 1; break;
  case 'h': nElts = 4; break;
  case 'f': nElts = 2; break;
  case 'd':
    nElts = 1;
    break;
  default:
    PrintFatalError("unhandled type!");
  }
  if (quad) nElts <<= 1;
  return nElts;
}

// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
static std::string GenOpString(const std::string &name, OpKind op,
                               const std::string &proto, StringRef typestr) {
  bool quad;
  unsigned nElts = GetNumElements(typestr, quad);
  bool define = UseMacro(proto);

  std::string ts = TypeString(proto[0], typestr);
  std::string s;
  if (!define) {
    s = "return ";
  }

  switch(op) {
  case OpAdd:
    s += "__a + __b;";
    break;
  case OpAddl:
    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
    break;
  case OpAddlHi:
    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpAddw:
    s += "__a + " + Extend(typestr, "__b") + ";";
    break;
  case OpAddwHi:
    s += "__a + " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpSub:
    s += "__a - __b;";
    break;
  case OpSubl:
    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
    break;
  case OpSublHi:
    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpSubw:
    s += "__a - " + Extend(typestr, "__b") + ";";
    break;
  case OpSubwHi:
    s += "__a - " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpMulN:
    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
    break;
  case OpMulLane:
    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
    break;
  case OpMulXLane:
    s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpMul:
    s += "__a * __b;";
    break;
  case OpMullLane:
    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpMullHiLane:
    s += MangleName("vmull", typestr, ClassS) + "(" +
      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpMlaN:
    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlaLane:
    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMla:
    s += "__a + (__b * __c);";
    break;
  case OpMlalN:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlalLane:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlalHiLane:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlal:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    break;
  case OpMullHi:
    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
    break;
  case OpMlalHi:
    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
    break;
  case OpMlsN:
    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlsLane:
    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpFMSLane:
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
    s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
    break;
  case OpFMSLaneQ:
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
    s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
    break;
  case OpMls:
    s += "__a - (__b * __c);";
    break;
  case OpMlslN:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlslLane:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlslHiLane:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlsl:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    break;
  case OpMlslHi:
    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
    break;
  case OpQDMullLane:
    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpQDMullHiLane:
    s += MangleName("vqdmull", typestr, ClassS) + "(" +
      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpQDMlalLane:
    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMlalHiLane:
    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMlslLane:
    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMlslHiLane:
    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMulhLane:
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpQRDMulhLane:
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpEq:
    s += "(" + ts + ")(__a == __b);";
    break;
  case OpGe:
    s += "(" + ts + ")(__a >= __b);";
    break;
  case OpLe:
    s += "(" + ts + ")(__a <= __b);";
    break;
  case OpGt:
    s += "(" + ts + ")(__a > __b);";
    break;
  case OpLt:
    s += "(" + ts + ")(__a < __b);";
    break;
  case OpNeg:
    s += " -__a;";
    break;
  case OpNot:
    s += " ~__a;";
    break;
  case OpAnd:
    s += "__a & __b;";
    break;
  case OpOr:
    s += "__a | __b;";
    break;
  case OpXor:
    s += "__a ^ __b;";
    break;
  case OpAndNot:
    s += "__a & ~__b;";
    break;
  case OpOrNot:
    s += "__a | ~__b;";
    break;
  case OpCast:
    s += "(" + ts + ")__a;";
    break;
  case OpConcat:
    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
    s += ", (int64x1_t)__b, 0, 1);";
    break;
  case OpHi:
    // nElts is for the result vector, so the source is twice that number.
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = nElts; i < nElts * 2; ++i)
      s += ", " + utostr(i);
    s+= ");";
    break;
  case OpLo:
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = 0; i < nElts; ++i)
      s += ", " + utostr(i);
    s+= ");";
    break;
  case OpDup:
    s += Duplicate(nElts, typestr, "__a") + ";";
    break;
  case OpDupLane:
    s += SplatLane(nElts, "__a", "__b") + ";";
    break;
  case OpSelect:
    // ((0 & 1) | (~0 & 2))
    s += "(" + ts + ")";
    ts = TypeString(proto[1], typestr);
    s += "((__a & (" + ts + ")__b) | ";
    s += "(~__a & (" + ts + ")__c));";
    break;
  case OpRev16:
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = 2; i <= nElts; i += 2)
      for (unsigned j = 0; j != 2; ++j)
        s += ", " + utostr(i - j - 1);
    s += ");";
    break;
  case OpRev32: {
    unsigned WordElts = nElts >> (1 + (int)quad);
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = WordElts; i <= nElts; i += WordElts)
      for (unsigned j = 0; j != WordElts; ++j)
        s += ", " + utostr(i - j - 1);
    s += ");";
    break;
  }
  case OpRev64: {
    unsigned DblWordElts = nElts >> (int)quad;
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
      for (unsigned j = 0; j != DblWordElts; ++j)
        s += ", " + utostr(i - j - 1);
    s += ");";
    break;
  }
  case OpXtnHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpSqxtunHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpQxtnHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpFcvtnHi: {
    std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName(FName, typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpFcvtlHi: {
    std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
    s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
        ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
    break;
  }
  case OpFcvtxnHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpUzp1:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < nElts; i++)
      s += ", " + utostr(2*i);
    s += ");";
    break;
  case OpUzp2:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < nElts; i++)
      s += ", " + utostr(2*i+1);
    s += ");";
    break;
  case OpZip1:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < (nElts/2); i++)
       s += ", " + utostr(i) + ", " + utostr(i+nElts);
    s += ");";
    break;
  case OpZip2:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = nElts/2; i < nElts; i++)
       s += ", " + utostr(i) + ", " + utostr(i+nElts);
    s += ");";
    break;
  case OpTrn1:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < (nElts/2); i++)
       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
    s += ");";
    break;
  case OpTrn2:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < (nElts/2); i++)
       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
    s += ");";
    break;
  case OpAbdl: {
    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
    if (typestr[0] != 'U') {
      // vabd results are always unsigned and must be zero-extended.
      std::string utype = "U" + typestr.str();
      s += "(" + TypeString(proto[0], typestr) + ")";
      abd = "(" + TypeString('d', utype) + ")" + abd;
      s += Extend(utype, abd) + ";";
    } else {
      s += Extend(typestr, abd) + ";";
    }
    break;
  }
  case OpAbdlHi:
    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
    break;
  case OpAddhnHi: {
    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
    s += ";";
    break;
  }
  case OpRAddhnHi: {
    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
    s += ";";
    break;
  }
  case OpSubhnHi: {
    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
    s += ";";
    break;
  }
  case OpRSubhnHi: {
    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
    s += ";";
    break;
  }
  case OpAba:
    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
    break;
  case OpAbal:
    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
    break;
  case OpAbalHi:
    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
    break;
  case OpQDMullHi:
    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
    break;
  case OpQDMlalHi:
    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
    break;
  case OpQDMlslHi:
    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
    break;
  case OpDiv:
    s += "__a / __b;";
    break;
  case OpMovlHi: {
    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
    s += "(__a1, 0);";
    break;
  }
  case OpLongHi: {
    // Another local variable __a1 is needed for calling a Macro,
    // or using __a will have naming conflict when Macro expanding.
    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
         "(__a1, __b);";
    break;
  }
  case OpNarrowHi: {
    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
    break;
  }
  case OpCopyLane: {
    s += TypeString('s', typestr) + " __c2 = " +
         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
    break;
  }
  case OpCopyQLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
         "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
    break;
  }
  case OpCopyLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
         "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
    break;
  }
  case OpScalarMulLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
      "(__b, __c);\\\n  __a * __d1;";
    break;
  }
  case OpScalarMulLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
        s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
          "(__b, __c);\\\n  __a * __d1;";
    break;
  }
  case OpScalarMulXLane: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
      "(__b, __c);\\\n  vmulx" + type + "_" +
      typeCode +  "(__a, __d1);";
    break;
  }
  case OpScalarMulXLaneQ: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
      typeCode + "(__b, __c);\\\n  vmulx" + type +
      "_" + typeCode +  "(__a, __d1);";
    break;
  }

  case OpScalarVMulXLane: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
      typeCode + "(__a, 0);\\\n" +
      "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
      typeCode + "(__b, __c);\\\n" +
      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
      typeCode + "(__d1, __e1);\\\n" +
      "  " + TypeString('d', typestr) + " __g1;\\\n" +
      "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
    break;
  }

  case OpScalarVMulXLaneQ: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
      typeCode + "(__a, 0);\\\n" +
      "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
      typeCode + "(__b, __c);\\\n" +
      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
      typeCode + "(__d1, __e1);\\\n" +
      "  " + TypeString('d', typestr) + " __g1;\\\n" +
      "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
    break;
  }
  case OpScalarQDMullLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
    "vget_lane_" + typeCode + "(b, __c));";
    break;
  }
  case OpScalarQDMullLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
    "vgetq_lane_" + typeCode + "(b, __c));";
    break;
  }
  case OpScalarQDMulHiLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
    "vget_lane_" + typeCode + "(__b, __c));";
    break;
  }
  case OpScalarQDMulHiLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
    "vgetq_lane_" + typeCode + "(__b, __c));";
    break;
  }
  case OpScalarQRDMulHiLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
    "vget_lane_" + typeCode + "(__b, __c));";
    break;
  }
  case OpScalarQRDMulHiLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
    "vgetq_lane_" + typeCode + "(__b, __c));";
    break;
  }
  default:
    PrintFatalError("unknown OpKind!");
  }
  return s;
}

static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
  unsigned mod = proto[0];

  if (mod == 'v' || mod == 'f')
    mod = proto[1];

  bool quad = false;
  bool poly = false;
  bool usgn = false;
  bool scal = false;
  bool cnst = false;
  bool pntr = false;

  // Base type to get the type string for.
  char type = ClassifyType(typestr, quad, poly, usgn);

  // Based on the modifying character, change the type and width if necessary.
  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);

  NeonTypeFlags::EltType ET;
  switch (type) {
    case 'c':
      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
      break;
    case 's':
      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
      break;
    case 'i':
      ET = NeonTypeFlags::Int32;
      break;
    case 'l':
      ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
      break;
    case 'h':
      ET = NeonTypeFlags::Float16;
      break;
    case 'f':
      ET = NeonTypeFlags::Float32;
      break;
    case 'd':
      ET = NeonTypeFlags::Float64;
      break;
    default:
      PrintFatalError("unhandled type!");
  }
  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
  return Flags.getFlags();
}

static bool ProtoHasScalar(const std::string proto)
{
  return (proto.find('s') != std::string::npos
          || proto.find('r') != std::string::npos);
}

// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
static std::string GenBuiltin(const std::string &name, const std::string &proto,
                              StringRef typestr, ClassKind ck) {
  std::string s;

  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
  // sret-like argument.
  bool sret = IsMultiVecProto(proto[0]);

  bool define = UseMacro(proto);

  // Check if the prototype has a scalar operand with the type of the vector
  // elements.  If not, bitcasting the args will take care of arg checking.
  // The actual signedness etc. will be taken care of with special enums.
  if (!ProtoHasScalar(proto))
    ck = ClassB;

  if (proto[0] != 'v') {
    std::string ts = TypeString(proto[0], typestr);

    if (define) {
      if (sret)
        s += ts + " r; ";
      else
        s += "(" + ts + ")";
    } else if (sret) {
      s += ts + " r; ";
    } else {
      s += "return (" + ts + ")";
    }
  }

  bool splat = proto.find('a') != std::string::npos;

  s += "__builtin_neon_";
  if (splat) {
    // Call the non-splat builtin: chop off the "_n" suffix from the name.
    std::string vname(name, 0, name.size()-2);
    s += MangleName(vname, typestr, ck);
  } else {
    s += MangleName(name, typestr, ck);
  }
  s += "(";

  // Pass the address of the return variable as the first argument to sret-like
  // builtins.
  if (sret)
    s += "&r, ";

  char arg = 'a';
  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    std::string args = std::string(&arg, 1);

    // Use the local temporaries instead of the macro arguments.
    args = "__" + args;

    bool argQuad = false;
    bool argPoly = false;
    bool argUsgn = false;
    bool argScalar = false;
    bool dummy = false;
    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
                      dummy, dummy);

    // Handle multiple-vector values specially, emitting each subvector as an
    // argument to the __builtin.
    unsigned NumOfVec = 0;
    if (proto[i] >= '2' && proto[i] <= '4') {
      NumOfVec = proto[i] - '0';
    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
      NumOfVec = proto[i] - 'A' + 1;
    }
    
    if (NumOfVec > 0) {
      // Check if an explicit cast is needed.
      if (argType != 'c' || argPoly || argUsgn)
        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;

      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
        s += args + ".val[" + utostr(vi) + "]";
        if ((vi + 1) < ve)
          s += ", ";
      }
      if ((i + 1) < e)
        s += ", ";

      continue;
    }

    if (splat && (i + 1) == e)
      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);

    // Check if an explicit cast is needed.
    if ((splat || !argScalar) &&
        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
      std::string argTypeStr = "c";
      if (ck != ClassB)
        argTypeStr = argType;
      if (argQuad)
        argTypeStr = "Q" + argTypeStr;
      args = "(" + TypeString('d', argTypeStr) + ")" + args;
    }

    s += args;
    if ((i + 1) < e)
      s += ", ";
  }

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (ck == ClassB)
    s += ", " + utostr(GetNeonEnum(proto, typestr));

  s += ");";

  if (proto[0] != 'v' && sret) {
    if (define)
      s += " r;";
    else
      s += " return r;";
  }
  return s;
}

static std::string GenBuiltinDef(const std::string &name,
                                 const std::string &proto,
                                 StringRef typestr, ClassKind ck) {
  std::string s("BUILTIN(__builtin_neon_");

  // If all types are the same size, bitcasting the args will take care
  // of arg checking.  The actual signedness etc. will be taken care of with
  // special enums.
  if (!ProtoHasScalar(proto))
    ck = ClassB;

  s += MangleName(name, typestr, ck);
  s += ", \"";

  for (unsigned i = 0, e = proto.size(); i != e; ++i)
    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (ck == ClassB)
    s += "i";

  s += "\", \"n\")";
  return s;
}

static std::string GenIntrinsic(const std::string &name,
                                const std::string &proto,
                                StringRef outTypeStr, StringRef inTypeStr,
                                OpKind kind, ClassKind classKind) {
  assert(!proto.empty() && "");
  bool define = UseMacro(proto) && kind != OpUnavailable;
  std::string s;

  // static always inline + return type
  if (define)
    s += "#define ";
  else
    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";

  // Function name with type suffix
  std::string mangledName = MangleName(name, outTypeStr, ClassS);
  if (outTypeStr != inTypeStr) {
    // If the input type is different (e.g., for vreinterpret), append a suffix
    // for the input type.  String off a "Q" (quad) prefix so that MangleName
    // does not insert another "q" in the name.
    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
  }
  s += mangledName;

  // Function arguments
  s += GenArgs(proto, inTypeStr, name);

  // Definition.
  if (define) {
    s += " __extension__ ({ \\\n  ";
    s += GenMacroLocals(proto, inTypeStr, name);
  } else if (kind == OpUnavailable) {
    s += " __attribute__((unavailable));\n";
    return s;
  } else
    s += " {\n  ";

  if (kind != OpNone)
    s += GenOpString(name, kind, proto, outTypeStr);
  else
    s += GenBuiltin(name, proto, outTypeStr, classKind);
  if (define)
    s += " })";
  else
    s += " }";
  s += "\n";
  return s;
}

/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::run(raw_ostream &OS) {
  OS << 
    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
    "---===\n"
    " *\n"
    " * Permission is hereby granted, free of charge, to any person obtaining "
    "a copy\n"
    " * of this software and associated documentation files (the \"Software\"),"
    " to deal\n"
    " * in the Software without restriction, including without limitation the "
    "rights\n"
    " * to use, copy, modify, merge, publish, distribute, sublicense, "
    "and/or sell\n"
    " * copies of the Software, and to permit persons to whom the Software is\n"
    " * furnished to do so, subject to the following conditions:\n"
    " *\n"
    " * The above copyright notice and this permission notice shall be "
    "included in\n"
    " * all copies or substantial portions of the Software.\n"
    " *\n"
    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
    "EXPRESS OR\n"
    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
    "MERCHANTABILITY,\n"
    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
    "SHALL THE\n"
    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
    "OTHER\n"
    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
    "ARISING FROM,\n"
    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
    "DEALINGS IN\n"
    " * THE SOFTWARE.\n"
    " *\n"
    " *===--------------------------------------------------------------------"
    "---===\n"
    " */\n\n";

  OS << "#ifndef __ARM_NEON_H\n";
  OS << "#define __ARM_NEON_H\n\n";

  OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n";
  OS << "#error \"NEON support not enabled\"\n";
  OS << "#endif\n\n";

  OS << "#include <stdint.h>\n\n";

  // Emit NEON-specific scalar typedefs.
  OS << "typedef float float32_t;\n";
  OS << "typedef __fp16 float16_t;\n";

  OS << "#ifdef __aarch64__\n";
  OS << "typedef double float64_t;\n";
  OS << "#endif\n\n";

  // For now, signedness of polynomial types depends on target
  OS << "#ifdef __aarch64__\n";
  OS << "typedef uint8_t poly8_t;\n";
  OS << "typedef uint16_t poly16_t;\n";
  OS << "typedef uint64_t poly64_t;\n";
  OS << "#else\n";
  OS << "typedef int8_t poly8_t;\n";
  OS << "typedef int16_t poly16_t;\n";
  OS << "#endif\n";

  // Emit Neon vector typedefs.
  std::string TypedefTypes(
      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
  SmallVector<StringRef, 24> TDTypeVec;
  ParseTypes(0, TypedefTypes, TDTypeVec);

  // Emit vector typedefs.
  bool isA64 = false;
  bool preinsert;
  bool postinsert;
  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
    bool dummy, quad = false, poly = false;
    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
    preinsert = false;
    postinsert = false;

    if (type == 'd' || (type == 'l' && poly)) {
      preinsert = isA64? false: true;
      isA64 = true;
    } else {
      postinsert = isA64? true: false;
      isA64 = false;
    }
    if (postinsert)
      OS << "#endif\n";
    if (preinsert)
      OS << "#ifdef __aarch64__\n";

    if (poly)
      OS << "typedef __attribute__((neon_polyvector_type(";
    else
      OS << "typedef __attribute__((neon_vector_type(";

    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
    OS << utostr(nElts) << "))) ";
    if (nElts < 10)
      OS << " ";

    OS << TypeString('s', TDTypeVec[i]);
    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";

  }
  postinsert = isA64? true: false;
  if (postinsert)
    OS << "#endif\n";
  OS << "\n";

  // Emit struct typedefs.
  isA64 = false;
  for (unsigned vi = 2; vi != 5; ++vi) {
    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
      bool dummy, quad = false, poly = false;
      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
      preinsert = false;
      postinsert = false;

      if (type == 'd' || (type == 'l' && poly)) {
        preinsert = isA64? false: true;
        isA64 = true;
      } else {
        postinsert = isA64? true: false;
        isA64 = false;
      }
      if (postinsert)
        OS << "#endif\n";
      if (preinsert)
        OS << "#ifdef __aarch64__\n";

      std::string ts = TypeString('d', TDTypeVec[i]);
      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
      OS << "typedef struct " << vs << " {\n";
      OS << "  " << ts << " val";
      OS << "[" << utostr(vi) << "]";
      OS << ";\n} ";
      OS << vs << ";\n";
      OS << "\n";
    }
  }
  postinsert = isA64? true: false;
  if (postinsert)
    OS << "#endif\n";
  OS << "\n";

  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";

  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");

  StringMap<ClassKind> EmittedMap;

  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
  // intrinsics.  (Some of the saturating multiply instructions are also
  // used to implement the corresponding "_lane" variants, but tablegen
  // sorts the records into alphabetical order so that the "_lane" variants
  // come after the intrinsics they use.)
  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);

  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
  // common intrinsics appear only once in the output stream.
  // The check for uniquiness is done in emitIntrinsic.
  // Emit ARM intrinsics.
  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];

    // Skip AArch64 intrinsics; they will be emitted at the end.
    bool isA64 = R->getValueAsBit("isA64");
    if (isA64)
      continue;

    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
        R->getName() != "VABD")
      emitIntrinsic(OS, R, EmittedMap);
  }

  // Emit AArch64-specific intrinsics.
  OS << "#ifdef __aarch64__\n";

  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];

    // Skip ARM intrinsics already included above.
    bool isA64 = R->getValueAsBit("isA64");
    if (!isA64)
      continue;

    // Skip crypto temporarily, and will emit them all together at the end.
    bool isCrypto = R->getValueAsBit("isCrypto");
    if (isCrypto)
      continue;

    emitIntrinsic(OS, R, EmittedMap);
  }

  OS << "#ifdef __ARM_FEATURE_CRYPTO\n";

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];

    // Skip crypto temporarily, and will emit them all together at the end.
    bool isCrypto = R->getValueAsBit("isCrypto");
    if (!isCrypto)
      continue;

    emitIntrinsic(OS, R, EmittedMap);
  }
  
  OS << "#endif\n\n";

  OS << "#endif\n\n";

  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_NEON_H */\n";
}

/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
/// intrinsics specified by record R checking for intrinsic uniqueness.
void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
                                StringMap<ClassKind> &EmittedMap) {
  std::string name = R->getValueAsString("Name");
  std::string Proto = R->getValueAsString("Prototype");
  std::string Types = R->getValueAsString("Types");

  SmallVector<StringRef, 16> TypeVec;
  ParseTypes(R, Types, TypeVec);

  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];

  ClassKind classKind = ClassNone;
  if (R->getSuperClasses().size() >= 2)
    classKind = ClassMap[R->getSuperClasses()[1]];
  if (classKind == ClassNone && kind == OpNone)
    PrintFatalError(R->getLoc(), "Builtin has no class kind");

  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
    if (kind == OpReinterpret) {
      bool outQuad = false;
      bool dummy = false;
      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
      for (unsigned srcti = 0, srcte = TypeVec.size();
           srcti != srcte; ++srcti) {
        bool inQuad = false;
        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
        if (srcti == ti || inQuad != outQuad)
          continue;
        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
                                     OpCast, ClassS);
        if (EmittedMap.count(s))
          continue;
        EmittedMap[s] = ClassS;
        OS << s;
      }
    } else {
      std::string s =
          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
      if (EmittedMap.count(s))
        continue;
      EmittedMap[s] = classKind;
      OS << s;
    }
  }
  OS << "\n";
}

static unsigned RangeFromType(const char mod, StringRef typestr) {
  // base type to get the type string for.
  bool quad = false, dummy = false;
  char type = ClassifyType(typestr, quad, dummy, dummy);
  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);

  switch (type) {
    case 'c':
      return (8 << (int)quad) - 1;
    case 'h':
    case 's':
      return (4 << (int)quad) - 1;
    case 'f':
    case 'i':
      return (2 << (int)quad) - 1;
    case 'd':
    case 'l':
      return (1 << (int)quad) - 1;
    default:
      PrintFatalError("unhandled type!");
  }
}

static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
  // base type to get the type string for.
  bool dummy = false;
  char type = ClassifyType(typestr, dummy, dummy, dummy);
  type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);

  switch (type) {
    case 'c':
      return 7;
    case 'h':
    case 's':
      return 15;
    case 'f':
    case 'i':
      return 31;
    case 'd':
    case 'l':
      return 63;
    default:
      PrintFatalError("unhandled type!");
  }
}

/// Generate the ARM and AArch64 intrinsic range checking code for
/// shift/lane immediates, checking for unique declarations.
void
NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
                                        StringMap<ClassKind> &A64IntrinsicMap,
                                        bool isA64RangeCheck) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  StringMap<OpKind> EmittedMap;

  // Generate the intrinsic range checking code for shift/lane immediates.
  if (isA64RangeCheck)
    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
  else
    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];

    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
    if (k != OpNone)
      continue;

    std::string name = R->getValueAsString("Name");
    std::string Proto = R->getValueAsString("Prototype");
    std::string Types = R->getValueAsString("Types");
    std::string Rename = name + "@@" + Proto;

    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Proto.find('a') != std::string::npos)
      continue;

    // Functions which do not have an immediate do not need to have range
    // checking code emitted.
    size_t immPos = Proto.find('i');
    if (immPos == std::string::npos)
      continue;

    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    if (R->getSuperClasses().size() < 2)
      PrintFatalError(R->getLoc(), "Builtin has no class kind");

    ClassKind ck = ClassMap[R->getSuperClasses()[1]];

    // Do not include AArch64 range checks if not generating code for AArch64.
    bool isA64 = R->getValueAsBit("isA64");
    if (!isA64RangeCheck && isA64)
      continue;

    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
    // redefined by AArch64 to handle new types.
    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
      ClassKind &A64CK = A64IntrinsicMap[Rename];
      if (A64CK == ck && ck != ClassNone)
        continue;
    }

    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      std::string namestr, shiftstr, rangestr;

      if (R->getValueAsBit("isVCVT_N")) {
        // VCVT between floating- and fixed-point values takes an immediate
        // in the range [1, 32] for f32, or [1, 64] for f64.
        ck = ClassB;
        if (name.find("32") != std::string::npos)
          rangestr = "l = 1; u = 31"; // upper bound = l + u
        else if (name.find("64") != std::string::npos)
          rangestr = "l = 1; u = 63";
        else
          PrintFatalError(R->getLoc(),
              "Fixed point convert name should contains \"32\" or \"64\"");

      } else if (R->getValueAsBit("isScalarShift")) {
        // Right shifts have an 'r' in the name, left shifts do not.  Convert
        // instructions have the same bounds and right shifts.
        if (name.find('r') != std::string::npos ||
            name.find("cvt") != std::string::npos)
          rangestr = "l = 1; ";

        rangestr += "u = " +
          utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
      } else if (!ProtoHasScalar(Proto)) {
        // Builtins which are overloaded by type will need to have their upper
        // bound computed at Sema time based on the type constant.
        ck = ClassB;
        if (R->getValueAsBit("isShift")) {
          shiftstr = ", true";

          // Right shifts have an 'r' in the name, left shifts do not.
          if (name.find('r') != std::string::npos)
            rangestr = "l = 1; ";
        }
        rangestr += "u = RFT(TV" + shiftstr + ")";
      } else {
        // The immediate generally refers to a lane in the preceding argument.
        assert(immPos > 0 && "unexpected immediate operand");
        rangestr =
            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
      }
      // Make sure cases appear only once by uniquing them in a string map.
      namestr = MangleName(name, TypeVec[ti], ck);
      if (EmittedMap.count(namestr))
        continue;
      EmittedMap[namestr] = OpNone;

      // Calculate the index of the immediate that should be range checked.
      unsigned immidx = 0;

      // Builtins that return a struct of multiple vectors have an extra
      // leading arg for the struct return.
      if (IsMultiVecProto(Proto[0]))
        ++immidx;

      // Add one to the index for each argument until we reach the immediate
      // to be checked.  Structs of vectors are passed as multiple arguments.
      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
        switch (Proto[ii]) {
        default:
          immidx += 1;
          break;
        case '2':
        case 'B':
          immidx += 2;
          break;
        case '3':
        case 'C':
          immidx += 3;
          break;
        case '4':
        case 'D':
          immidx += 4;
          break;
        case 'i':
          ie = ii + 1;
          break;
        }
      }
      if (isA64RangeCheck)
        OS << "case AArch64::BI__builtin_neon_";
      else
        OS << "case ARM::BI__builtin_neon_";
      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
         << rangestr << "; break;\n";
    }
  }
  OS << "#endif\n\n";
}

/// Generate the ARM and AArch64 overloaded type checking code for
/// SemaChecking.cpp, checking for unique builtin declarations.
void
NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
                                      StringMap<ClassKind> &A64IntrinsicMap,
                                      bool isA64TypeCheck) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  StringMap<OpKind> EmittedMap;

  // Generate the overloaded type checking code for SemaChecking.cpp
  if (isA64TypeCheck)
    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
  else
    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];
    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
    if (k != OpNone)
      continue;

    std::string Proto = R->getValueAsString("Prototype");
    std::string Types = R->getValueAsString("Types");
    std::string name = R->getValueAsString("Name");
    std::string Rename = name + "@@" + Proto;
    
    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Proto.find('a') != std::string::npos)
      continue;

    // Functions which have a scalar argument cannot be overloaded, no need to
    // check them if we are emitting the type checking code.
    if (ProtoHasScalar(Proto))
      continue;

    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    if (R->getSuperClasses().size() < 2)
      PrintFatalError(R->getLoc(), "Builtin has no class kind");

    // Do not include AArch64 type checks if not generating code for AArch64.
    bool isA64 = R->getValueAsBit("isA64");
    if (!isA64TypeCheck && isA64)
      continue;

    // Include ARM  type check in AArch64 but only if ARM intrinsics
    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
    // redefined in AArch64 to handle an additional 2 x f64 type.
    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
      ClassKind &A64CK = A64IntrinsicMap[Rename];
      if (A64CK == ck && ck != ClassNone)
        continue;
    }

    int si = -1, qi = -1;
    uint64_t mask = 0, qmask = 0;
    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      // Generate the switch case(s) for this builtin for the type validation.
      bool quad = false, poly = false, usgn = false;
      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);

      if (quad) {
        qi = ti;
        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
      } else {
        si = ti;
        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
      }
    }

    // Check if the builtin function has a pointer or const pointer argument.
    int PtrArgNum = -1;
    bool HasConstPtr = false;
    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
      char ArgType = Proto[arg];
      if (ArgType == 'c') {
        HasConstPtr = true;
        PtrArgNum = arg - 1;
        break;
      }
      if (ArgType == 'p') {
        PtrArgNum = arg - 1;
        break;
      }
    }
    // For sret builtins, adjust the pointer argument index.
    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
      PtrArgNum += 1;

    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
    // and vst1_lane intrinsics.  Using a pointer to the vector element
    // type with one of those operations causes codegen to select an aligned
    // load/store instruction.  If you want an unaligned operation,
    // the pointer argument needs to have less alignment than element type,
    // so just accept any pointer type.
    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
      PtrArgNum = -1;
      HasConstPtr = false;
    }

    if (mask) {
      if (isA64TypeCheck)
        OS << "case AArch64::BI__builtin_neon_";
      else
        OS << "case ARM::BI__builtin_neon_";
      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
         << "0x" << utohexstr(mask) << "ULL";
      if (PtrArgNum >= 0)
        OS << "; PtrArgNum = " << PtrArgNum;
      if (HasConstPtr)
        OS << "; HasConstPtr = true";
      OS << "; break;\n";
    }
    if (qmask) {
      if (isA64TypeCheck)
        OS << "case AArch64::BI__builtin_neon_";
      else
        OS << "case ARM::BI__builtin_neon_";
      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
         << "0x" << utohexstr(qmask) << "ULL";
      if (PtrArgNum >= 0)
        OS << "; PtrArgNum = " << PtrArgNum;
      if (HasConstPtr)
        OS << "; HasConstPtr = true";
      OS << "; break;\n";
    }
  }
  OS << "#endif\n\n";
}

/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
/// declaration of builtins, checking for unique builtin declarations.
void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
                                 StringMap<ClassKind> &A64IntrinsicMap,
                                 bool isA64GenBuiltinDef) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  StringMap<OpKind> EmittedMap;

  // Generate BuiltinsARM.def and BuiltinsAArch64.def
  if (isA64GenBuiltinDef)
    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
  else
    OS << "#ifdef GET_NEON_BUILTINS\n";

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];
    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
    if (k != OpNone)
      continue;

    std::string Proto = R->getValueAsString("Prototype");
    std::string name = R->getValueAsString("Name");
    std::string Rename = name + "@@" + Proto;

    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Proto.find('a') != std::string::npos)
      continue;

    std::string Types = R->getValueAsString("Types");
    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    if (R->getSuperClasses().size() < 2)
      PrintFatalError(R->getLoc(), "Builtin has no class kind");

    ClassKind ck = ClassMap[R->getSuperClasses()[1]];

    // Do not include AArch64 BUILTIN() macros if not generating
    // code for AArch64
    bool isA64 = R->getValueAsBit("isA64");
    if (!isA64GenBuiltinDef && isA64)
      continue;

    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
    // redefined in AArch64 to handle an additional 2 x f64 type.
    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
      ClassKind &A64CK = A64IntrinsicMap[Rename];
      if (A64CK == ck && ck != ClassNone)
        continue;
    }

    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      // Generate the declaration for this builtin, ensuring
      // that each unique BUILTIN() macro appears only once in the output
      // stream.
      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
      if (EmittedMap.count(bd))
        continue;

      EmittedMap[bd] = OpNone;
      OS << bd << "\n";
    }
  }
  OS << "#endif\n\n";
}

/// runHeader - Emit a file with sections defining:
/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
/// 2. the SemaChecking code for the type overload checking.
/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
void NeonEmitter::runHeader(raw_ostream &OS) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");

  // build a map of AArch64 intriniscs to be used in uniqueness checks.
  StringMap<ClassKind> A64IntrinsicMap;
  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];

    bool isA64 = R->getValueAsBit("isA64");
    if (!isA64)
      continue;

    ClassKind CK = ClassNone;
    if (R->getSuperClasses().size() >= 2)
      CK = ClassMap[R->getSuperClasses()[1]];

    std::string Name = R->getValueAsString("Name");
    std::string Proto = R->getValueAsString("Prototype");
    std::string Rename = Name + "@@" + Proto;
    if (A64IntrinsicMap.count(Rename))
      continue;
    A64IntrinsicMap[Rename] = CK;
  }

  // Generate BuiltinsARM.def for ARM
  genBuiltinsDef(OS, A64IntrinsicMap, false);

  // Generate BuiltinsAArch64.def for AArch64
  genBuiltinsDef(OS, A64IntrinsicMap, true);

  // Generate ARM overloaded type checking code for SemaChecking.cpp
  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);

  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);

  // Generate ARM range checking code for shift/lane immediates.
  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);

  // Generate the AArch64 range checking code for shift/lane immediates.
  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
}

/// GenTest - Write out a test for the intrinsic specified by the name and
/// type strings, including the embedded patterns for FileCheck to match.
static std::string GenTest(const std::string &name,
                           const std::string &proto,
                           StringRef outTypeStr, StringRef inTypeStr,
                           bool isShift, bool isHiddenLOp,
                           ClassKind ck, const std::string &InstName,
                           bool isA64,
                           std::string & testFuncProto) {
  assert(!proto.empty() && "");
  std::string s;

  // Function name with type suffix
  std::string mangledName = MangleName(name, outTypeStr, ClassS);
  if (outTypeStr != inTypeStr) {
    // If the input type is different (e.g., for vreinterpret), append a suffix
    // for the input type.  String off a "Q" (quad) prefix so that MangleName
    // does not insert another "q" in the name.
    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
  }

  // todo: GenerateChecksForIntrinsic does not generate CHECK
  // for aarch64 instructions yet
  std::vector<std::string> FileCheckPatterns;
  if (!isA64) {
	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
							   isHiddenLOp, FileCheckPatterns);
	s+= "// CHECK_ARM: test_" + mangledName + "\n";
  }
  s += "// CHECK_AARCH64: test_" + mangledName + "\n";

  // Emit the FileCheck patterns.
  // If for any reason we do not want to emit a check, mangledInst
  // will be the empty string.
  if (FileCheckPatterns.size()) {
    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
                                                  e = FileCheckPatterns.end();
         i != e;
         ++i) {
      s += "// CHECK_ARM: " + *i + "\n";
    }
  }

  // Emit the start of the test function.

  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
  char arg = 'a';
  std::string comma;
  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    // Do not create arguments for values that must be immediate constants.
    if (proto[i] == 'i')
      continue;
    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
    testFuncProto.push_back(arg);
    comma = ", ";
  }
  testFuncProto += ")";

  s+= testFuncProto;
  s+= " {\n  ";

  if (proto[0] != 'v')
    s += "return ";
  s += mangledName + "(";
  arg = 'a';
  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    if (proto[i] == 'i') {
      // For immediate operands, test the maximum value.
      if (isShift)
        s += "1"; // FIXME
      else
        // The immediate generally refers to a lane in the preceding argument.
        s += utostr(RangeFromType(proto[i-1], inTypeStr));
    } else {
      s.push_back(arg);
    }
    if ((i + 1) < e)
      s += ", ";
  }
  s += ");\n}\n\n";
  return s;
}

/// Write out all intrinsic tests for the specified target, checking
/// for intrinsic test uniqueness.
void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
                                bool isA64GenTest) {
  if (isA64GenTest)
	OS << "#ifdef __aarch64__\n";

  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];
    std::string name = R->getValueAsString("Name");
    std::string Proto = R->getValueAsString("Prototype");
    std::string Types = R->getValueAsString("Types");
    bool isShift = R->getValueAsBit("isShift");
    std::string InstName = R->getValueAsString("InstName");
    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
    bool isA64 = R->getValueAsBit("isA64");

    // do not include AArch64 intrinsic test if not generating
    // code for AArch64
    if (!isA64GenTest && isA64)
      continue;

    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
    if (kind == OpUnavailable)
      continue;
    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      if (kind == OpReinterpret) {
        bool outQuad = false;
        bool dummy = false;
        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
        for (unsigned srcti = 0, srcte = TypeVec.size();
             srcti != srcte; ++srcti) {
          bool inQuad = false;
          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
          if (srcti == ti || inQuad != outQuad)
            continue;
		  std::string testFuncProto;
          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
                                  isShift, isHiddenLOp, ck, InstName, isA64,
								  testFuncProto);
          if (EmittedMap.count(testFuncProto))
            continue;
          EmittedMap[testFuncProto] = kind;
          OS << s << "\n";
        }
      } else {
		std::string testFuncProto;
        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
        if (EmittedMap.count(testFuncProto))
          continue;
        EmittedMap[testFuncProto] = kind;
        OS << s << "\n";
      }
    }
  }

  if (isA64GenTest)
	OS << "#endif\n";
}
/// runTests - Write out a complete set of tests for all of the Neon
/// intrinsics.
void NeonEmitter::runTests(raw_ostream &OS) {
  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
        "apcs-gnu\\\n"
        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
		"\n"
	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
        "\n"
        "// REQUIRES: long_tests\n"
        "\n"
        "#include <arm_neon.h>\n"
        "\n";

  // ARM tests must be emitted before AArch64 tests to ensure
  // tests for intrinsics that are common to ARM and AArch64
  // appear only once in the output stream.
  // The check for uniqueness is done in genTargetTest.
  StringMap<OpKind> EmittedMap;

  genTargetTest(OS, EmittedMap, false);

  genTargetTest(OS, EmittedMap, true);
}

namespace clang {
void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).run(OS);
}
void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runHeader(OS);
}
void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runTests(OS);
}
} // End namespace clang
@


1.1.1.1
log
@Import Clang 3.4rc1 r195771.
@
text
@@


1.1.1.2
log
@Import clang 3.4rc2 r196603.
Many bug fixes and improvements for the driver NetBSD/ARM EABI.
@
text
@a535 4
    case 'F':
      type = 'd';
      usgn = false;
      break;
d768 1
a768 1
    if (mod == 'F' || (ck != ClassB && type == 'd'))
d790 1
a790 1
  if (mod == 'F' || (ck != ClassB && type == 'd'))
a1090 1
    case 'F':
d2167 1
a2167 1
  if (mod == 'v' || mod == 'f' || mod == 'F')
a2212 2
// We don't check 'a' in this function, because for builtin function the
// argument matching to 'a' uses a vector type splatted from a scalar type.
d2216 1
a2216 6
          || proto.find('z') != std::string::npos
          || proto.find('r') != std::string::npos
          || proto.find('b') != std::string::npos
          || proto.find('$') != std::string::npos
          || proto.find('y') != std::string::npos
          || proto.find('o') != std::string::npos);
a2785 2
    if (!ProtoHasScalar(Proto))
      ck = ClassB;
d2822 3
a2824 7
        unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
        // Narrow shift has half the upper bound
        if (R->getValueAsBit("isScalarNarrowShift"))
          upBound /= 2;

        rangestr += "u = " + utostr(upBound);
      } else if (R->getValueAsBit("isShift")) {
d2827 3
a2829 5
        shiftstr = ", true";

        // Right shifts have an 'r' in the name, left shifts do not.
        if (name.find('r') != std::string::npos)
          rangestr = "l = 1; ";
d2831 4
@


1.1.1.3
log
@Import clang 3.5svn r198450.
@
text
@a54 2
  OpMullHiP64,
  OpMullHiN,
a55 1
  OpMlalHiN,
a58 1
  OpMlslHiN,
a61 2
  OpFMlaN,
  OpFMlsN,
a128 1
  OpQDMullHiN,
a129 1
  OpQDMlalHiN,
a130 1
  OpQDMlslHiN,
d149 1
a149 3
  OpScalarQRDMulHiLaneQ,
  OpScalarGetLane,
  OpScalarSetLane
a185 1
    Poly128,
a226 2
    OpMap["OP_MULLHi_P64"]  = OpMullHiP64;
    OpMap["OP_MULLHi_N"]  = OpMullHiN;
a227 1
    OpMap["OP_MLALHi_N"]  = OpMlalHiN;
a230 1
    OpMap["OP_MLSLHi_N"] = OpMlslHiN;
a233 2
    OpMap["OP_FMLA_N"] = OpFMlaN;
    OpMap["OP_FMLS_N"] = OpFMlsN;
a300 1
    OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
a301 1
    OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
a302 1
    OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
d322 1
a322 2
    OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
    OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
a386 1
      case 'k':
a410 2
    case 'l':
      return 'k';
a429 2
    case 'k':
      return 'l';
a452 3
      case 'k':
        s += 'l';
        break;
a663 3
    case 'k':
      s += "poly128";
      break;
a728 3
  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
                         scal && type != 'f' && type != 'd');

d740 2
a752 2
    else if (type == 'k') // 128-bit long
      s = "LLLi";
a848 4
  case 'k':
    assert(poly && "Unrecognized 128 bit integer.");
    typeCode = "p128";
    break;
d917 1
a917 2
  if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
      name == "vcvt_f64_f32")
d1180 1
a1588 1
  case 'k': nElts = 1; break;
a1657 8
  case OpFMlaN:
    s += MangleName("vfma", typestr, ClassS);
    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
    break;
  case OpFMlsN:
    s += MangleName("vfms", typestr, ClassS);
    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
    break;
a1692 11
  case OpMullHiP64: {
    std::string Op1 = GetHigh("__a", typestr);
    std::string Op2 = GetHigh("__b", typestr);
    s += MangleName("vmull", typestr, ClassS);
    s += "((poly64_t)" + Op1 + ", (poly64_t)" + Op2 + ");";
    break;
  }
  case OpMullHiN:
    s += MangleName("vmull_n", typestr, ClassS);
    s += "(" + GetHigh("__a", typestr) + ", __b);";
    return s;
a1695 4
  case OpMlalHiN:
    s += MangleName("vmlal_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    return s;
a1734 4
  case OpMlslHiN:
    s += MangleName("vmlsl_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    break;
a2000 4
  case OpQDMullHiN:
    s += MangleName("vqdmull_n", typestr, ClassS);
    s += "(" + GetHigh("__a", typestr) + ", __b);";
    return s;
a2003 4
  case OpQDMlalHiN:
    s += MangleName("vqdmlal_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    return s;
a2006 4
  case OpQDMlslHiN:
    s += MangleName("vqdmlsl_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    return s;
a2162 28
  case OpScalarGetLane:{
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    if (quad) {
     s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n";
     s += "  vgetq_lane_s16(__a1, __b);";
    } else {
     s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n";
     s += "  vget_lane_s16(__a1, __b);";
    }
    break;
  }
  case OpScalarSetLane:{
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += "int16_t __a1 = (int16_t)__a;\\\n";
    if (quad) {
     s += "  int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n";
     s += "  int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n";
     s += "  vreinterpretq_f16_s16(__b2);";
    } else {
     s += "  int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n";
     s += "  int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n";
     s += "  vreinterpret_f16_s16(__b2);";
    }
    break;
  }

a2201 3
    case 'k':
      ET = NeonTypeFlags::Poly128;
      break;
d2474 1
a2474 1
  OS << "#if !defined(__ARM_NEON)\n";
a2492 1
  OS << "typedef __uint128_t poly128_t;\n";
a2616 1
  emitIntrinsic(OS, Records.getDef("VMULL_P64"), EmittedMap);
a2724 2
    case 'k':
      return 0;
a2747 2
    case 'k':
      return 127;
d2914 1
@


1.1.1.4
log
@Import Clang 3.5svn r201163.
@
text
@d377 2
a378 1
  void genBuiltinsDef(raw_ostream &OS);
d1486 3
d1511 4
d1517 2
d1521 2
a1642 12
//
// Note that some intrinsic definitions around 'lane' are being implemented
// with macros, because they all contain constant integer argument, and we
// statically check the range of the lane index to meet the semantic
// requirement of different intrinsics.
//
// For the intrinsics implemented with macro, if they contain another intrinsic
// implemented with maco, we have to avoid using the same argument names for
// the nested instrinsics. For example, macro vfms_lane is being implemented
// with another macor vfma_lane, so we rename all arguments for vfms_lane by
// adding a suffix '1'.

a2111 2
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
d2113 2
a2114 2
         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d); \\\n  " +
         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b);";
a2119 2
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
d2121 1
a2121 1
         "(__c1, __d); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b);";
a2126 2
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
d2128 1
a2128 1
         "(__c1, __d); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b);";
d2141 2
a2142 4
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
      "(__b1, __c);\\\n  __a1 * __d1;";
a2150 2
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
d2152 2
a2153 2
      "(__b1, __c);\\\n  vmulx" + type + "_" +
      typeCode +  "(__a1, __d1);";
a2161 2
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
d2163 2
a2164 2
      typeCode + "(__b1, __c);\\\n  vmulx" + type +
      "_" + typeCode +  "(__a1, __d1);";
a2173 2
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
d2175 1
a2175 1
      typeCode + "(__a1, 0);\\\n" +
d2177 1
a2177 1
      typeCode + "(__b1, __c);\\\n" +
a2190 2
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
d2192 1
a2192 1
      typeCode + "(__a1, 0);\\\n" +
d2194 1
a2194 1
      typeCode + "(__b1, __c);\\\n" +
d2204 2
a2205 4
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
    "vget_lane_" + typeCode + "(__b1, __c));";
d2211 2
a2212 4
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
    "vgetq_lane_" + typeCode + "(__b1, __c));";
d2218 2
a2219 4
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
    "vget_lane_" + typeCode + "(__b1, __c));";
d2225 2
a2226 4
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
    "vgetq_lane_" + typeCode + "(__b1, __c));";
d2232 2
a2233 4
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
    "vget_lane_" + typeCode + "(__b1, __c));";
d2239 2
a2240 4
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
    "vgetq_lane_" + typeCode + "(__b1, __c));";
d2246 7
a2252 13
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";

    std::string intType = quad ? "int16x8_t" : "int16x4_t";
    std::string intName = quad ? "vgetq" : "vget";

    // reinterpret float16 vector as int16 vector
    s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";

    s += "  int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";

    // reinterpret int16 vector as float16 vector
    s += "  float16_t __a4 = *(float16_t *)(&__a3);\\\n";
    s += "  __a4;";
d2258 10
a2267 16
    s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n  ";

    std::string origType = quad ? "float16x8_t" : "float16x4_t";
    std::string intType = quad ? "int16x8_t" : "int16x4_t";
    std::string intName = quad ? "vsetq" : "vset";

    // reinterpret float16_t as int16_t
    s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
    // reinterpret float16 vector as int16 vector
    s += "  " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";

    s += "  " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";

    // reinterpret int16 vector as float16 vector
    s += "  " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
    s += "__b4;";
a2749 3
  OS << "#endif\n\n";

  // Now emit all the crypto intrinsics together
d2755 1
d2762 2
a2763 1

d3013 4
a3016 1
      OS << "case NEON::BI__builtin_neon_";
d3127 4
a3130 1
      OS << "case NEON::BI__builtin_neon_";
d3140 4
a3143 1
      OS << "case NEON::BI__builtin_neon_";
d3158 3
a3160 1
void NeonEmitter::genBuiltinsDef(raw_ostream &OS) {
d3164 5
a3168 2
  // Generate BuiltinsNEON.
  OS << "#ifdef GET_NEON_BUILTINS\n";
d3194 15
d3252 5
a3256 2
  // Generate shared BuiltinsXXX.def
  genBuiltinsDef(OS);
@


1.1.1.5
log
@Import Clang 3.5svn r202566.
@
text
@a374 3
  void emitGuardedIntrinsic(raw_ostream &OS, Record *R,
                            std::string &CurrentGuard, bool &InGuard,
                            StringMap<ClassKind> &EmittedMap);
d378 8
a385 3
  void genOverloadTypeCheckCode(raw_ostream &OS);
  void genIntrinsicRangeCheckCode(raw_ostream &OS);
  void genTargetTest(raw_ostream &OS);
d785 1
a785 1
      s += "Wi";
d814 1
a814 1
      return quad ? "V2Wi" : "V1Wi";
d836 1
a836 1
    return quad ? "V2Wi" : "V1Wi";
d1441 1
a1441 1
static bool UseMacro(const std::string &proto, StringRef typestr) {
a1453 6
  // It is not permitted to pass or return an __fp16 by value, so intrinsics
  // taking a scalar float16_t must be implemented as macros.
  if (typestr.find('h') != std::string::npos &&
      proto.find('s') != std::string::npos)
    return true;

d1468 1
a1468 1
  bool define = UseMacro(proto, typestr);
d1647 1
a1647 1
  bool define = UseMacro(proto, typestr);
d2391 1
a2391 1
  bool define = UseMacro(proto, typestr);
d2455 1
a2455 1

d2536 1
a2536 1
  bool define = UseMacro(proto, outTypeStr) && kind != OpUnavailable;
d2585 1
a2585 1
  OS <<
a2737 2
  std::string CurrentGuard = "";
  bool InGuard = false;
d2739 25
a2763 14
  // Some intrinsics are used to express others. These need to be emitted near
  // the beginning so that the declarations are present when needed. This is
  // rather an ugly, arbitrary list, but probably simpler than actually tracking
  // dependency info.
  static const char *EarlyDefsArr[] =
      { "VFMA",      "VQMOVN",    "VQMOVUN",  "VABD",    "VMOVL",
        "VABDL",     "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH",
        "VMULL",     "VMLAL_N",   "VMLSL_N",  "VMULL_N", "VMULL_P64",
        "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" };
  ArrayRef<const char *> EarlyDefs(EarlyDefsArr);

  for (unsigned i = 0; i < EarlyDefs.size(); ++i) {
    Record *R = Records.getDef(EarlyDefs[i]);
    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
d2766 8
d2776 9
a2784 2
    if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) !=
        EarlyDefs.end())
d2787 1
a2787 1
    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
d2790 1
a2790 2
  if (InGuard)
    OS << "#endif\n\n";
d2792 2
a2793 3
  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_NEON_H */\n";
}
d2795 2
a2796 10
void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R,
                                       std::string &CurrentGuard, bool &InGuard,
                                       StringMap<ClassKind> &EmittedMap) {

  std::string NewGuard = R->getValueAsString("ArchGuard");
  if (NewGuard != CurrentGuard) {
    if (InGuard)
      OS << "#endif\n\n";
    if (NewGuard.size())
      OS << "#if " << NewGuard << '\n';
d2798 5
a2802 2
    CurrentGuard = NewGuard;
    InGuard = NewGuard.size() != 0;
d2805 5
a2809 1
  emitIntrinsic(OS, R, EmittedMap);
d2852 1
a2852 3
      if (EmittedMap.count(s)) {
        errs() << "warning: duplicate definition: " << name
               << " (type: " << TypeString('d', TypeVec[ti]) << ")\n";
a2853 1
      }
d2914 3
a2916 1
NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS) {
d2921 4
a2924 1
  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
d2959 13
a3009 8
      } else if (ck == ClassB) {
        // ClassB intrinsics have a type (and hence lane number) that is only
        // known at runtime.
        assert(immPos > 0 && "unexpected immediate operand");
        if (R->getValueAsBit("isLaneQ"))
          rangestr = "u = RFT(TV, false, true)";
        else
          rangestr = "u = RFT(TV, false, false)";
a3061 5
struct OverloadInfo {
  uint64_t Mask;
  int PtrArgNum;
  bool HasConstPtr;
};
d3065 3
a3067 1
NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS) {
d3071 4
a3074 7
  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";

  // We record each overload check line before emitting because subsequent Inst
  // definitions may extend the number of permitted types (i.e. augment the
  // Mask). Use std::map to avoid sorting the table by hash number.
  std::map<std::string, OverloadInfo> OverloadMap;
  typedef std::map<std::string, OverloadInfo>::iterator OverloadIterator;
d3086 1
a3086 1

d3103 15
d3165 8
a3172 9
      std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
          MangleName(name, TypeVec[si], ClassB), OverloadInfo()));
      OverloadInfo &Record = I.first->second;
      if (!I.second)
        assert(Record.PtrArgNum == PtrArgNum &&
               Record.HasConstPtr == HasConstPtr);
      Record.Mask |= mask;
      Record.PtrArgNum = PtrArgNum;
      Record.HasConstPtr = HasConstPtr;
d3175 8
a3182 9
      std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
          MangleName(name, TypeVec[qi], ClassB), OverloadInfo()));
      OverloadInfo &Record = I.first->second;
      if (!I.second)
        assert(Record.PtrArgNum == PtrArgNum &&
               Record.HasConstPtr == HasConstPtr);
      Record.Mask |= qmask;
      Record.PtrArgNum = PtrArgNum;
      Record.HasConstPtr = HasConstPtr;
a3184 13

  for (OverloadIterator I = OverloadMap.begin(), E = OverloadMap.end(); I != E;
       ++I) {
    OverloadInfo &BuiltinOverloads = I->second;
    OS << "case NEON::BI__builtin_neon_" << I->first << ": ";
    OS << "mask = " << "0x" << utohexstr(BuiltinOverloads.Mask) << "ULL";
    if (BuiltinOverloads.PtrArgNum >= 0)
      OS << "; PtrArgNum = " << BuiltinOverloads.PtrArgNum;
    if (BuiltinOverloads.HasConstPtr)
      OS << "; HasConstPtr = true";
    OS << "; break;\n";
  }

d3192 1
d3194 2
a3195 3
  // We want to emit the intrinsics in alphabetical order, so use the more
  // expensive std::map to gather them together first.
  std::map<std::string, OpKind> EmittedMap;
d3230 1
a3232 9

  // Generate BuiltinsNEON.
  OS << "#ifdef GET_NEON_BUILTINS\n";

  for (std::map<std::string, OpKind>::iterator I = EmittedMap.begin(),
                                               E = EmittedMap.end();
       I != E; ++I)
    OS << I->first << "\n";

d3243 21
d3268 4
a3271 1
  genOverloadTypeCheckCode(OS);
d3274 4
a3277 1
  genIntrinsicRangeCheckCode(OS);
d3307 3
a3309 3
    GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
                               isHiddenLOp, FileCheckPatterns);
    s+= "// CHECK_ARM: test_" + mangledName + "\n";
d3367 4
a3370 4
void NeonEmitter::genTargetTest(raw_ostream &OS) {
  StringMap<OpKind> EmittedMap;
  std::string CurrentGuard = "";
  bool InGuard = false;
d3381 1
d3383 4
a3386 10
    std::string NewGuard = R->getValueAsString("ArchGuard");
    if (NewGuard != CurrentGuard) {
      if (InGuard)
        OS << "#endif\n\n";
      if (NewGuard.size())
        OS << "#if " << NewGuard << '\n';

      CurrentGuard = NewGuard;
      InGuard = NewGuard.size() != 0;
    }
d3406 1
a3406 1
          std::string testFuncProto;
d3408 2
a3409 2
                                  isShift, isHiddenLOp, ck, InstName,
                                  CurrentGuard.size(), testFuncProto);
d3416 6
a3421 4
        std::string testFuncProto;
        std::string s =
            GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp,
                    ck, InstName, CurrentGuard.size(), testFuncProto);
d3427 2
a3428 2
  if (InGuard)
    OS << "#endif\n";
d3437 4
a3440 4
        "\n"
        "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
        "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
        "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
d3447 9
a3455 1
  genTargetTest(OS);
@


1.1.1.5.2.1
log
@Rebase.
@
text
@d2658 1
a2658 1
  ParseTypes(nullptr, TypedefTypes, TDTypeVec);
@


1.1.1.6
log
@Import Clang 3.5svn r209886.
@
text
@d2658 1
a2658 1
  ParseTypes(nullptr, TypedefTypes, TDTypeVec);
@


1.1.1.7
log
@Import clang 3.6svn r215315.
@
text
@d21 2
a22 3
// called, rather than the normal run() entry point.
//
// See also the documentation in include/clang/Basic/arm_neon.td.
a33 1
#include "llvm/TableGen/SetTheory.h"
a35 4
#include <sstream>
#include <vector>
#include <map>
#include <algorithm>
d38 124
a161 14
namespace {

// While globals are generally bad, this one allows us to perform assertions
// liberally and somehow still trace them back to the def they indirectly
// came from.
static Record *CurrentRecord = nullptr;
static void assert_with_loc(bool Assertion, const std::string &Str) {
  if (!Assertion) {
    if (CurrentRecord)
      PrintFatalError(CurrentRecord->getLoc(), Str);
    else
      PrintFatalError(Str);
  }
}
d165 9
a173 9
  ClassI,     // generic integer instruction, e.g., "i8" suffix
  ClassS,     // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
  ClassW,     // width-specific instruction, e.g., "8" suffix
  ClassB,     // bitcast arguments with enum argument to specify type
  ClassL,     // Logical instructions which are op instructions
              // but we need to not emit any suffix for in our
              // tests.
  ClassNoTest // Instructions which we do not test since they are
              // not TRUE instructions.
d179 8
a186 2
namespace NeonTypeFlags {
enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
a187 29
enum EltType {
  Int8,
  Int16,
  Int32,
  Int64,
  Poly8,
  Poly16,
  Poly64,
  Poly128,
  Float16,
  Float32,
  Float64
};
}

class Intrinsic;
class NeonEmitter;
class Type;
class Variable;

//===----------------------------------------------------------------------===//
// TypeSpec
//===----------------------------------------------------------------------===//

/// A TypeSpec is just a simple wrapper around a string, but gets its own type
/// for strong typing purposes.
///
/// A TypeSpec can be used to create a type.
class TypeSpec : public std::string {
d189 13
a201 24
  static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
    std::vector<TypeSpec> Ret;
    TypeSpec Acc;
    for (char I : Str.str()) {
      if (islower(I)) {
        Acc.push_back(I);
        Ret.push_back(TypeSpec(Acc));
        Acc.clear();
      } else {
        Acc.push_back(I);
      }
    }
    return Ret;
  }
};

//===----------------------------------------------------------------------===//
// Type
//===----------------------------------------------------------------------===//

/// A Type. Not much more to say here.
class Type {
private:
  TypeSpec TS;
d203 6
a208 18
  bool Float, Signed, Void, Poly, Constant, Pointer;
  // ScalarForMangling and NoManglingQ are really not suited to live here as
  // they are not related to the type. But they live in the TypeSpec (not the
  // prototype), so this is really the only place to store them.
  bool ScalarForMangling, NoManglingQ;
  unsigned Bitwidth, ElementBitwidth, NumVectors;

public:
  Type()
      : Float(false), Signed(false), Void(true), Poly(false), Constant(false),
        Pointer(false), ScalarForMangling(false), NoManglingQ(false),
        Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}

  Type(TypeSpec TS, char CharMod)
      : TS(TS), Float(false), Signed(false), Void(false), Poly(false),
        Constant(false), Pointer(false), ScalarForMangling(false),
        NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
    applyModifier(CharMod);
d211 1
a211 83
  /// Returns a type representing "void".
  static Type getVoid() { return Type(); }

  bool operator==(const Type &Other) const { return str() == Other.str(); }
  bool operator!=(const Type &Other) const { return !operator==(Other); }

  //
  // Query functions
  //
  bool isScalarForMangling() const { return ScalarForMangling; }
  bool noManglingQ() const { return NoManglingQ; }

  bool isPointer() const { return Pointer; }
  bool isFloating() const { return Float; }
  bool isInteger() const { return !Float && !Poly; }
  bool isSigned() const { return Signed; }
  bool isScalar() const { return NumVectors == 0; }
  bool isVector() const { return NumVectors > 0; }
  bool isFloat() const { return Float && ElementBitwidth == 32; }
  bool isDouble() const { return Float && ElementBitwidth == 64; }
  bool isHalf() const { return Float && ElementBitwidth == 16; }
  bool isPoly() const { return Poly; }
  bool isChar() const { return ElementBitwidth == 8; }
  bool isShort() const { return !Float && ElementBitwidth == 16; }
  bool isInt() const { return !Float && ElementBitwidth == 32; }
  bool isLong() const { return !Float && ElementBitwidth == 64; }
  bool isVoid() const { return Void; }
  unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
  unsigned getSizeInBits() const { return Bitwidth; }
  unsigned getElementSizeInBits() const { return ElementBitwidth; }
  unsigned getNumVectors() const { return NumVectors; }

  //
  // Mutator functions
  //
  void makeUnsigned() { Signed = false; }
  void makeSigned() { Signed = true; }
  void makeInteger(unsigned ElemWidth, bool Sign) {
    Float = false;
    Poly = false;
    Signed = Sign;
    ElementBitwidth = ElemWidth;
  }
  void makeScalar() {
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
  }
  void makeOneVector() {
    assert(isVector());
    NumVectors = 1;
  }
  void doubleLanes() {
    assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
    Bitwidth = 128;
  }
  void halveLanes() {
    assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
    Bitwidth = 64;
  }

  /// Return the C string representation of a type, which is the typename
  /// defined in stdint.h or arm_neon.h.
  std::string str() const;

  /// Return the string representation of a type, which is an encoded
  /// string for passing to the BUILTIN() macro in Builtins.def.
  std::string builtin_str() const;

  /// Return the value in NeonTypeFlags for this type.
  unsigned getNeonEnum() const;

  /// Parse a type from a stdint.h or arm_neon.h typedef name,
  /// for example uint32x2_t or int64_t.
  static Type fromTypedefName(StringRef Name);

private:
  /// Creates the type based on the typespec string in TS.
  /// Sets "Quad" to true if the "Q" or "H" modifiers were
  /// seen. This is needed by applyModifier as some modifiers
  /// only take effect if the type size was changed by "Q" or "H".
  void applyTypespec(bool &Quad);
  /// Applies a prototype modifier to the type.
  void applyModifier(char Mod);
d213 1
d215 1
a215 246
//===----------------------------------------------------------------------===//
// Variable
//===----------------------------------------------------------------------===//

/// A variable is a simple class that just has a type and a name.
class Variable {
  Type T;
  std::string N;

public:
  Variable() : T(Type::getVoid()), N("") {}
  Variable(Type T, std::string N) : T(T), N(N) {}

  Type getType() const { return T; }
  std::string getName() const { return "__" + N; }
};

//===----------------------------------------------------------------------===//
// Intrinsic
//===----------------------------------------------------------------------===//

/// The main grunt class. This represents an instantiation of an intrinsic with
/// a particular typespec and prototype.
class Intrinsic {
  friend class DagEmitter;

  /// The Record this intrinsic was created from.
  Record *R;
  /// The unmangled name and prototype.
  std::string Name, Proto;
  /// The input and output typespecs. InTS == OutTS except when
  /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
  TypeSpec OutTS, InTS;
  /// The base class kind. Most intrinsics use ClassS, which has full type
  /// info for integers (s32/u32). Some use ClassI, which doesn't care about
  /// signedness (i32), while some (ClassB) have no type at all, only a width
  /// (32).
  ClassKind CK;
  /// The list of DAGs for the body. May be empty, in which case we should
  /// emit a builtin call.
  ListInit *Body;
  /// The architectural #ifdef guard.
  std::string Guard;
  /// Set if the Unvailable bit is 1. This means we don't generate a body,
  /// just an "unavailable" attribute on a declaration.
  bool IsUnavailable;
  /// Is this intrinsic safe for big-endian? or does it need its arguments
  /// reversing?
  bool BigEndianSafe;

  /// The types of return value [0] and parameters [1..].
  std::vector<Type> Types;
  /// The local variables defined.
  std::map<std::string, Variable> Variables;
  /// NeededEarly - set if any other intrinsic depends on this intrinsic.
  bool NeededEarly;
  /// UseMacro - set if we should implement using a macro or unset for a
  ///            function.
  bool UseMacro;
  /// The set of intrinsics that this intrinsic uses/requires.
  std::set<Intrinsic *> Dependencies;
  /// The "base type", which is Type('d', OutTS). InBaseType is only
  /// different if CartesianProductOfTypes = 1 (for vreinterpret).
  Type BaseType, InBaseType;
  /// The return variable.
  Variable RetVar;
  /// A postfix to apply to every variable. Defaults to "".
  std::string VariablePostfix;

  NeonEmitter &Emitter;
  std::stringstream OS;

public:
  Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
            StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
      : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
        CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
        BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
        BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
    // If this builtin takes an immediate argument, we need to #define it rather
    // than use a standard declaration, so that SemaChecking can range check
    // the immediate passed by the user.
    if (Proto.find('i') != std::string::npos)
      UseMacro = true;

    // Pointer arguments need to use macros to avoid hiding aligned attributes
    // from the pointer type.
    if (Proto.find('p') != std::string::npos ||
        Proto.find('c') != std::string::npos)
      UseMacro = true;

    // It is not permitted to pass or return an __fp16 by value, so intrinsics
    // taking a scalar float16_t must be implemented as macros.
    if (OutTS.find('h') != std::string::npos &&
        Proto.find('s') != std::string::npos)
      UseMacro = true;

    // Modify the TypeSpec per-argument to get a concrete Type, and create
    // known variables for each.
    // Types[0] is the return value.
    Types.push_back(Type(OutTS, Proto[0]));
    for (unsigned I = 1; I < Proto.size(); ++I)
      Types.push_back(Type(InTS, Proto[I]));
  }

  /// Get the Record that this intrinsic is based off.
  Record *getRecord() const { return R; }
  /// Get the set of Intrinsics that this intrinsic calls.
  /// this is the set of immediate dependencies, NOT the
  /// transitive closure.
  const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
  /// Get the architectural guard string (#ifdef).
  std::string getGuard() const { return Guard; }
  /// Get the non-mangled name.
  std::string getName() const { return Name; }

  /// Return true if the intrinsic takes an immediate operand.
  bool hasImmediate() const {
    return Proto.find('i') != std::string::npos;
  }
  /// Return the parameter index of the immediate operand.
  unsigned getImmediateIdx() const {
    assert(hasImmediate());
    unsigned Idx = Proto.find('i');
    assert(Idx > 0 && "Can't return an immediate!");
    return Idx - 1;
  }

  /// Return true if the intrinsic takes an splat operand.
  bool hasSplat() const { return Proto.find('a') != std::string::npos; }
  /// Return the parameter index of the splat operand.
  unsigned getSplatIdx() const {
    assert(hasSplat());
    unsigned Idx = Proto.find('a');
    assert(Idx > 0 && "Can't return a splat!");
    return Idx - 1;
  }

  unsigned getNumParams() const { return Proto.size() - 1; }
  Type getReturnType() const { return Types[0]; }
  Type getParamType(unsigned I) const { return Types[I + 1]; }
  Type getBaseType() const { return BaseType; }
  /// Return the raw prototype string.
  std::string getProto() const { return Proto; }

  /// Return true if the prototype has a scalar argument.
  /// This does not return true for the "splat" code ('a').
  bool protoHasScalar();

  /// Return the index that parameter PIndex will sit at
  /// in a generated function call. This is often just PIndex,
  /// but may not be as things such as multiple-vector operands
  /// and sret parameters need to be taken into accont.
  unsigned getGeneratedParamIdx(unsigned PIndex) {
    unsigned Idx = 0;
    if (getReturnType().getNumVectors() > 1)
      // Multiple vectors are passed as sret.
      ++Idx;

    for (unsigned I = 0; I < PIndex; ++I)
      Idx += std::max(1U, getParamType(I).getNumVectors());

    return Idx;
  }

  bool hasBody() const { return Body && Body->getValues().size() > 0; }

  void setNeededEarly() { NeededEarly = true; }

  bool operator<(const Intrinsic &Other) const {
    // Sort lexicographically on a two-tuple (Guard, Name)
    if (Guard != Other.Guard)
      return Guard < Other.Guard;
    return Name < Other.Name;
  }

  ClassKind getClassKind(bool UseClassBIfScalar = false) {
    if (UseClassBIfScalar && !protoHasScalar())
      return ClassB;
    return CK;
  }

  /// Return the name, mangled with type information.
  /// If ForceClassS is true, use ClassS (u32/s32) instead
  /// of the intrinsic's own type class.
  std::string getMangledName(bool ForceClassS = false);
  /// Return the type code for a builtin function call.
  std::string getInstTypeCode(Type T, ClassKind CK);
  /// Return the type string for a BUILTIN() macro in Builtins.def.
  std::string getBuiltinTypeStr();

  /// Generate the intrinsic, returning code.
  std::string generate();
  /// Perform type checking and populate the dependency graph, but
  /// don't generate code yet.
  void indexBody();

private:
  std::string mangleName(std::string Name, ClassKind CK);

  void initVariables();
  std::string replaceParamsIn(std::string S);

  void emitBodyAsBuiltinCall();

  void generateImpl(bool ReverseArguments,
                    StringRef NamePrefix, StringRef CallPrefix);
  void emitReturn();
  void emitBody(StringRef CallPrefix);
  void emitShadowedArgs();
  void emitArgumentReversal();
  void emitReturnReversal();
  void emitReverseVariable(Variable &Dest, Variable &Src);
  void emitNewLine();
  void emitClosingBrace();
  void emitOpeningBrace();
  void emitPrototype(StringRef NamePrefix);

  class DagEmitter {
    Intrinsic &Intr;
    StringRef CallPrefix;

  public:
    DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
      Intr(Intr), CallPrefix(CallPrefix) {
    }
    std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
    std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
    std::pair<Type, std::string> emitDagSplat(DagInit *DI);
    std::pair<Type, std::string> emitDagDup(DagInit *DI);
    std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
    std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
    std::pair<Type, std::string> emitDagCall(DagInit *DI);
    std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
    std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
    std::pair<Type, std::string> emitDagOp(DagInit *DI);
    std::pair<Type, std::string> emitDag(DagInit *DI);
  };

};

//===----------------------------------------------------------------------===//
// NeonEmitter
//===----------------------------------------------------------------------===//

d218 2
a219 10
  DenseMap<Record *, ClassKind> ClassMap;
  std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap;
  unsigned UniqueNumber;

  void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
  void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
  void genOverloadTypeCheckCode(raw_ostream &OS,
                                SmallVectorImpl<Intrinsic *> &Defs);
  void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                  SmallVectorImpl<Intrinsic *> &Defs);
d222 123
a344 6
  /// Called by Intrinsic - this attempts to get an intrinsic that takes
  /// the given types as arguments.
  Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types);

  /// Called by Intrinsic - returns a globally-unique number.
  unsigned getUniqueNumber() { return UniqueNumber++; }
a345 1
  NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
d373 11
a384 1

d387 13
a399 3
//===----------------------------------------------------------------------===//
// Type implementation
//===----------------------------------------------------------------------===//
d401 13
a413 53
std::string Type::str() const {
  if (Void)
    return "void";
  std::string S;

  if (!Signed && isInteger())
    S += "u";

  if (Poly)
    S += "poly";
  else if (Float)
    S += "float";
  else
    S += "int";

  S += utostr(ElementBitwidth);
  if (isVector())
    S += "x" + utostr(getNumElements());
  if (NumVectors > 1)
    S += "x" + utostr(NumVectors);
  S += "_t";

  if (Constant)
    S += " const";
  if (Pointer)
    S += " *";

  return S;
}

std::string Type::builtin_str() const {
  std::string S;
  if (isVoid())
    return "v";

  if (Pointer)
    // All pointers are void pointers.
    S += "v";
  else if (isInteger())
    switch (ElementBitwidth) {
    case 8: S += "c"; break;
    case 16: S += "s"; break;
    case 32: S += "i"; break;
    case 64: S += "Wi"; break;
    case 128: S += "LLLi"; break;
    default: llvm_unreachable("Unhandled case!");
    }
  else
    switch (ElementBitwidth) {
    case 16: S += "h"; break;
    case 32: S += "f"; break;
    case 64: S += "d"; break;
    default: llvm_unreachable("Unhandled case!");
d415 5
d421 18
a438 10
  if (isChar() && !Pointer)
    // Make chars explicitly signed.
    S = "S" + S;
  else if (isInteger() && !Pointer && !Signed)
    S = "U" + S;

  if (isScalar()) {
    if (Constant) S += "C";
    if (Pointer) S += "*";
    return S;
a439 6

  std::string Ret;
  for (unsigned I = 0; I < NumVectors; ++I)
    Ret += "V" + utostr(getNumElements()) + S;

  return Ret;
d442 18
a459 9
unsigned Type::getNeonEnum() const {
  unsigned Addend;
  switch (ElementBitwidth) {
  case 8: Addend = 0; break;
  case 16: Addend = 1; break;
  case 32: Addend = 2; break;
  case 64: Addend = 3; break;
  case 128: Addend = 4; break;
  default: llvm_unreachable("Unhandled element bitwidth!");
d461 1
d463 21
a483 6
  unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
  if (Poly) {
    // Adjustment needed because Poly32 doesn't exist.
    if (Addend >= 2)
      --Addend;
    Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
a484 9
  if (Float) {
    assert(Addend != 0 && "Float8 doesn't exist!");
    Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
  }

  if (Bitwidth == 128)
    Base |= (unsigned)NeonTypeFlags::QuadFlag;
  if (isInteger() && !Signed)
    Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
d486 1
a486 1
  return Base;
d489 7
a495 11
Type Type::fromTypedefName(StringRef Name) {
  Type T;
  T.Void = false;
  T.Float = false;
  T.Poly = false;

  if (Name.front() == 'u') {
    T.Signed = false;
    Name = Name.drop_front();
  } else {
    T.Signed = true;
d497 4
a500 10

  if (Name.startswith("float")) {
    T.Float = true;
    Name = Name.drop_front(5);
  } else if (Name.startswith("poly")) {
    T.Poly = true;
    Name = Name.drop_front(4);
  } else {
    assert(Name.startswith("int"));
    Name = Name.drop_front(3);
d503 4
a506 4
  unsigned I = 0;
  for (I = 0; I < Name.size(); ++I) {
    if (!isdigit(Name[I]))
      break;
a507 2
  Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
  Name = Name.drop_front(I);
d509 4
a512 27
  T.Bitwidth = T.ElementBitwidth;
  T.NumVectors = 1;

  if (Name.front() == 'x') {
    Name = Name.drop_front();
    unsigned I = 0;
    for (I = 0; I < Name.size(); ++I) {
      if (!isdigit(Name[I]))
        break;
    }
    unsigned NumLanes;
    Name.substr(0, I).getAsInteger(10, NumLanes);
    Name = Name.drop_front(I);
    T.Bitwidth = T.ElementBitwidth * NumLanes;
  } else {
    // Was scalar.
    T.NumVectors = 0;
  }
  if (Name.front() == 'x') {
    Name = Name.drop_front();
    unsigned I = 0;
    for (I = 0; I < Name.size(); ++I) {
      if (!isdigit(Name[I]))
        break;
    }
    Name.substr(0, I).getAsInteger(10, T.NumVectors);
    Name = Name.drop_front(I);
d515 2
a516 2
  assert(Name.startswith("_t") && "Malformed typedef!");
  return T;
d519 67
a585 13
void Type::applyTypespec(bool &Quad) {
  std::string S = TS;
  ScalarForMangling = false;
  Void = false;
  Poly = Float = false;
  ElementBitwidth = ~0U;
  Signed = true;
  NumVectors = 1;

  for (char I : S) {
    switch (I) {
    case 'S':
      ScalarForMangling = true;
d587 4
a590 3
    case 'H':
      NoManglingQ = true;
      Quad = true;
d592 3
a594 2
    case 'Q':
      Quad = true;
d596 3
a598 2
    case 'P':
      Poly = true;
d600 6
a605 2
    case 'U':
      Signed = false;
d608 4
a611 1
      ElementBitwidth = 8;
d614 59
a672 2
      Float = true;
    // Fall through
d674 4
a677 1
      ElementBitwidth = 16;
a678 3
    case 'f':
      Float = true;
    // Fall through
d680 4
a683 1
      ElementBitwidth = 32;
a684 3
    case 'd':
      Float = true;
    // Fall through
d686 4
a689 1
      ElementBitwidth = 64;
d692 7
a698 4
      ElementBitwidth = 128;
      // Poly doesn't have a 128x1 type.
      if (Poly)
        NumVectors = 0;
d700 13
d714 1
a714 2
      llvm_unreachable("Unhandled type code!");
    }
a715 1
  assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
d717 71
a787 2
  Bitwidth = Quad ? 128 : 64;
}
d789 6
a794 3
void Type::applyModifier(char Mod) {
  bool AppliedQuad = false;
  applyTypespec(AppliedQuad);
d796 61
a856 3
  switch (Mod) {
  case 'v':
    Void = true;
d858 6
a863 4
  case 't':
    if (Poly) {
      Poly = false;
      Signed = false;
a865 61
  case 'b':
    Signed = false;
    Float = false;
    Poly = false;
    NumVectors = 0;
    Bitwidth = ElementBitwidth;
    break;
  case '$':
    Signed = true;
    Float = false;
    Poly = false;
    NumVectors = 0;
    Bitwidth = ElementBitwidth;
    break;
  case 'u':
    Signed = false;
    Poly = false;
    Float = false;
    break;
  case 'x':
    Signed = true;
    assert(!Poly && "'u' can't be used with poly types!");
    Float = false;
    break;
  case 'o':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = true;
    break;
  case 'y':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = true;
    break;
  case 'f':
    // Special case - if we're half-precision, a floating
    // point argument needs to be 128-bits (double size).
    if (isHalf())
      Bitwidth = 128;
    Float = true;
    ElementBitwidth = 32;
    break;
  case 'F':
    Float = true;
    ElementBitwidth = 64;
    break;
  case 'g':
    if (AppliedQuad)
      Bitwidth /= 2;
    break;
  case 'j':
    if (!AppliedQuad)
      Bitwidth *= 2;
    break;
  case 'w':
    ElementBitwidth *= 2;
    Bitwidth *= 2;
    break;
  case 'n':
    ElementBitwidth *= 2;
    break;
d867 6
a872 5
    Float = false;
    Poly = false;
    ElementBitwidth = Bitwidth = 32;
    NumVectors = 0;
    Signed = true;
d875 6
a880 20
    Float = false;
    Poly = false;
    ElementBitwidth = Bitwidth = 64;
    NumVectors = 0;
    Signed = false;
    break;
  case 'z':
    ElementBitwidth /= 2;
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
    break;
  case 'r':
    ElementBitwidth *= 2;
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
    break;
  case 's':
  case 'a':
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
d883 2
a884 9
    Bitwidth *= 2;
    break;
  case 'c':
    Constant = true;
  // Fall through
  case 'p':
    Pointer = true;
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
d887 6
a892 1
    ElementBitwidth /= 2;
d894 7
a900 11
  case 'q':
    ElementBitwidth /= 2;
    Bitwidth *= 2;
    break;
  case 'e':
    ElementBitwidth /= 2;
    Signed = false;
    break;
  case 'm':
    ElementBitwidth /= 2;
    Bitwidth /= 2;
d903 10
a912 24
    break;
  case '2':
    NumVectors = 2;
    break;
  case '3':
    NumVectors = 3;
    break;
  case '4':
    NumVectors = 4;
    break;
  case 'B':
    NumVectors = 2;
    if (!AppliedQuad)
      Bitwidth *= 2;
    break;
  case 'C':
    NumVectors = 3;
    if (!AppliedQuad)
      Bitwidth *= 2;
    break;
  case 'D':
    NumVectors = 4;
    if (!AppliedQuad)
      Bitwidth *= 2;
d915 1
a915 1
    llvm_unreachable("Unhandled character!");
d919 14
a932 27
//===----------------------------------------------------------------------===//
// Intrinsic implementation
//===----------------------------------------------------------------------===//

std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
  char typeCode = '\0';
  bool printNumber = true;

  if (CK == ClassB)
    return "";

  if (T.isPoly())
    typeCode = 'p';
  else if (T.isInteger())
    typeCode = T.isSigned() ? 's' : 'u';
  else
    typeCode = 'f';

  if (CK == ClassI) {
    switch (typeCode) {
    default:
      break;
    case 's':
    case 'u':
    case 'p':
      typeCode = 'i';
      break;
d935 1
a935 11
  if (CK == ClassB) {
    typeCode = '\0';
  }

  std::string S;
  if (typeCode != '\0')
    S.push_back(typeCode);
  if (printNumber)
    S += utostr(T.getElementSizeInBits());

  return S;
d938 6
a943 49
std::string Intrinsic::getBuiltinTypeStr() {
  ClassKind LocalCK = getClassKind(true);
  std::string S;

  Type RetT = getReturnType();
  if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
      !RetT.isFloating())
    RetT.makeInteger(RetT.getElementSizeInBits(), false);

  // Since the return value must be one type, return a vector type of the
  // appropriate width which we will bitcast.  An exception is made for
  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
  // fashion, storing them to a pointer arg.
  if (RetT.getNumVectors() > 1) {
    S += "vv*"; // void result with void* first argument
  } else {
    if (RetT.isPoly())
      RetT.makeInteger(RetT.getElementSizeInBits(), false);
    if (!RetT.isScalar() && !RetT.isSigned())
      RetT.makeSigned();

    bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f';
    if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
      // Cast to vector of 8-bit elements.
      RetT.makeInteger(8, true);

    S += RetT.builtin_str();
  }

  for (unsigned I = 0; I < getNumParams(); ++I) {
    Type T = getParamType(I);
    if (T.isPoly())
      T.makeInteger(T.getElementSizeInBits(), false);

    bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f';
    if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
      T.makeInteger(8, true);
    // Halves always get converted to 8-bit elements.
    if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
      T.makeInteger(8, true);

    if (LocalCK == ClassI)
      T.makeSigned();

    // Constant indices are always just "int".
    if (hasImmediate() && getImmediateIdx() == I)
      T.makeInteger(32, true);

    S += T.builtin_str();
d945 1
a945 6

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (LocalCK == ClassB)
    S += "i";

  return S;
d948 9
a956 7
std::string Intrinsic::getMangledName(bool ForceClassS) {
  // Check if the prototype has a scalar operand with the type of the vector
  // elements.  If not, bitcasting the args will take care of arg checking.
  // The actual signedness etc. will be taken care of with special enums.
  ClassKind LocalCK = CK;
  if (!protoHasScalar())
    LocalCK = ClassB;
d958 2
a959 2
  return mangleName(Name, ForceClassS ? ClassS : LocalCK);
}
d961 1
a961 3
std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) {
  std::string typeCode = getInstTypeCode(BaseType, LocalCK);
  std::string S = Name;
d963 1
a963 3
  if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" ||
      Name == "vcvt_f64_f32")
    return Name;
d966 3
a968 4
    // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
    if (Name.size() >= 3 && isdigit(Name.back()) &&
        Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
      S.insert(S.length() - 3, "_" + typeCode);
d970 1
a970 6
      S += "_" + typeCode;
  }

  if (BaseType != InBaseType) {
    // A reinterpret - out the input base type at the end.
    S += "_" + getInstTypeCode(InBaseType, LocalCK);
d973 2
a974 2
  if (LocalCK == ClassB)
    S += "_v";
d978 40
a1017 3
  if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
    size_t Pos = S.find('_');
    S.insert(Pos, "q");
d1019 1
d1021 26
a1046 8
  char Suffix = '\0';
  if (BaseType.isScalarForMangling()) {
    switch (BaseType.getElementSizeInBits()) {
    case 8: Suffix = 'b'; break;
    case 16: Suffix = 'h'; break;
    case 32: Suffix = 's'; break;
    case 64: Suffix = 'd'; break;
    default: llvm_unreachable("Bad suffix!");
d1048 2
a1049 5
  }
  if (Suffix != '\0') {
    size_t Pos = S.find('_');
    S.insert(Pos, &Suffix, 1);
  }
d1051 21
a1071 2
  return S;
}
d1073 7
a1079 6
std::string Intrinsic::replaceParamsIn(std::string S) {
  while (S.find('$') != std::string::npos) {
    size_t Pos = S.find('$');
    size_t End = Pos + 1;
    while (isalpha(S[End]))
      ++End;
d1081 1
a1081 4
    std::string VarName = S.substr(Pos + 1, End - Pos - 1);
    assert_with_loc(Variables.find(VarName) != Variables.end(),
                    "Variable not defined!");
    S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
a1082 2

  return S;
d1085 80
a1164 9
void Intrinsic::initVariables() {
  Variables.clear();

  // Modify the TypeSpec per-argument to get a concrete Type, and create
  // known variables for each.
  for (unsigned I = 1; I < Proto.size(); ++I) {
    char NameC = '0' + (I - 1);
    std::string Name = "p";
    Name.push_back(NameC);
d1166 33
a1198 1
    Variables[Name] = Variable(Types[I], Name + VariablePostfix);
a1199 1
  RetVar = Variable(Types[0], "ret" + VariablePostfix);
d1202 12
a1213 5
void Intrinsic::emitPrototype(StringRef NamePrefix) {
  if (UseMacro)
    OS << "#define ";
  else
    OS << "__ai " << Types[0].str() << " ";
d1215 1
a1215 1
  OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
d1217 1
a1217 3
  for (unsigned I = 0; I < getNumParams(); ++I) {
    if (I != 0)
      OS << ", ";
d1219 2
a1220 9
    char NameC = '0' + I;
    std::string Name = "p";
    Name.push_back(NameC);
    assert(Variables.find(Name) != Variables.end());
    Variable &V = Variables[Name];

    if (!UseMacro)
      OS << V.getType().str() << " ";
    OS << V.getName();
d1223 2
a1224 17
  OS << ")";
}

void Intrinsic::emitOpeningBrace() {
  if (UseMacro)
    OS << " __extension__ ({";
  else
    OS << " {";
  emitNewLine();
}

void Intrinsic::emitClosingBrace() {
  if (UseMacro)
    OS << "})";
  else
    OS << "}";
}
d1226 11
a1236 20
void Intrinsic::emitNewLine() {
  if (UseMacro)
    OS << " \\\n";
  else
    OS << "\n";
}

void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
  if (Dest.getType().getNumVectors() > 1) {
    emitNewLine();

    for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
      OS << "  " << Dest.getName() << ".val[" << utostr(K) << "] = "
         << "__builtin_shufflevector("
         << Src.getName() << ".val[" << utostr(K) << "], "
         << Src.getName() << ".val[" << utostr(K) << "]";
      for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
        OS << ", " << utostr(J);
      OS << ");";
      emitNewLine();
d1238 1
d1240 18
a1257 8
    OS << "  " << Dest.getName()
       << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
    for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
      OS << ", " << utostr(J);
    OS << ");";
    emitNewLine();
  }
}
d1259 3
a1261 3
void Intrinsic::emitArgumentReversal() {
  if (BigEndianSafe)
    return;
d1263 3
a1265 4
  // Reverse all vector arguments.
  for (unsigned I = 0; I < getNumParams(); ++I) {
    std::string Name = "p" + utostr(I);
    std::string NewName = "rev" + utostr(I);
d1267 5
a1271 5
    Variable &V = Variables[Name];
    Variable NewV(V.getType(), NewName + VariablePostfix);

    if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
      continue;
d1273 2
a1274 3
    OS << "  " << NewV.getType().str() << " " << NewV.getName() << ";";
    emitReverseVariable(NewV, V);
    V = NewV;
d1278 18
a1295 2
void Intrinsic::emitReturnReversal() {
  if (BigEndianSafe)
d1297 3
a1299 2
  if (!getReturnType().isVector() || getReturnType().isVoid() ||
      getReturnType().getNumElements() == 1)
d1301 1
a1301 2
  emitReverseVariable(RetVar, RetVar);
}
d1304 40
a1343 4
void Intrinsic::emitShadowedArgs() {
  // Macro arguments are not type-checked like inline function arguments,
  // so assign them to local temporaries to get the right type checking.
  if (!UseMacro)
d1345 1
d1347 11
a1357 9
  for (unsigned I = 0; I < getNumParams(); ++I) {
    // Do not create a temporary for an immediate argument.
    // That would defeat the whole point of using a macro!
    if (hasImmediate() && Proto[I+1] == 'i')
      continue;
    // Do not create a temporary for pointer arguments. The input
    // pointer may have an alignment hint.
    if (getParamType(I).isPointer())
      continue;
d1359 1
a1359 1
    std::string Name = "p" + utostr(I);
d1361 12
a1372 2
    assert(Variables.find(Name) != Variables.end());
    Variable &V = Variables[Name];
d1374 24
a1397 2
    std::string NewName = "s" + utostr(I);
    Variable V2(V.getType(), NewName + VariablePostfix);
d1399 7
a1405 3
    OS << "  " << V2.getType().str() << " " << V2.getName() << " = "
       << V.getName() << ";";
    emitNewLine();
d1407 84
a1490 1
    V = V2;
a1491 1
}
d1493 2
a1494 10
// We don't check 'a' in this function, because for builtin function the
// argument matching to 'a' uses a vector type splatted from a scalar type.
bool Intrinsic::protoHasScalar() {
  return (Proto.find('s') != std::string::npos ||
          Proto.find('z') != std::string::npos ||
          Proto.find('r') != std::string::npos ||
          Proto.find('b') != std::string::npos ||
          Proto.find('$') != std::string::npos ||
          Proto.find('y') != std::string::npos ||
          Proto.find('o') != std::string::npos);
d1497 7
a1503 2
void Intrinsic::emitBodyAsBuiltinCall() {
  std::string S;
d1505 124
a1628 9
  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
  // sret-like argument.
  bool SRet = getReturnType().getNumVectors() >= 2;

  StringRef N = Name;
  if (hasSplat()) {
    // Call the non-splat builtin: chop off the "_n" suffix from the name.
    assert(N.endswith("_n"));
    N = N.drop_back(2);
d1630 3
d1634 395
a2028 45
  ClassKind LocalCK = CK;
  if (!protoHasScalar())
    LocalCK = ClassB;

  if (!getReturnType().isVoid() && !SRet)
    S += "(" + RetVar.getType().str() + ") ";

  S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";

  if (SRet)
    S += "&" + RetVar.getName() + ", ";

  for (unsigned I = 0; I < getNumParams(); ++I) {
    Variable &V = Variables["p" + utostr(I)];
    Type T = V.getType();

    // Handle multiple-vector values specially, emitting each subvector as an
    // argument to the builtin.
    if (T.getNumVectors() > 1) {
      // Check if an explicit cast is needed.
      std::string Cast;
      if (T.isChar() || T.isPoly() || !T.isSigned()) {
        Type T2 = T;
        T2.makeOneVector();
        T2.makeInteger(8, /*Signed=*/true);
        Cast = "(" + T2.str() + ")";
      }

      for (unsigned J = 0; J < T.getNumVectors(); ++J)
        S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
      continue;
    }

    std::string Arg;
    Type CastToType = T;
    if (hasSplat() && I == getSplatIdx()) {
      Arg = "(" + BaseType.str() + ") {";
      for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
        if (J != 0)
          Arg += ", ";
        Arg += V.getName();
      }
      Arg += "}";

      CastToType = BaseType;
d2030 1
a2030 1
      Arg = V.getName();
d2032 282
d2315 2
a2316 7
    // Check if an explicit cast is needed.
    if (CastToType.isVector()) {
      CastToType.makeInteger(8, true);
      Arg = "(" + CastToType.str() + ")" + Arg;
    }

    S += Arg + ", ";
d2318 2
d2321 2
a2322 7
  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (getClassKind(true) == ClassB) {
    Type ThisTy = getReturnType();
    if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F')
      ThisTy = getParamType(0);
    if (ThisTy.isPointer())
      ThisTy = getParamType(1);
d2324 2
a2325 7
    S += utostr(ThisTy.getNeonEnum());
  } else {
    // Remove extraneous ", ".
    S.pop_back();
    S.pop_back();
  }
  S += ");";
d2327 6
a2332 3
  std::string RetExpr;
  if (!SRet && !RetVar.getType().isVoid())
    RetExpr = RetVar.getName() + " = ";
d2334 2
a2335 3
  OS << "  " << RetExpr << S;
  emitNewLine();
}
d2337 2
a2338 2
void Intrinsic::emitBody(StringRef CallPrefix) {
  std::vector<std::string> Lines;
d2340 28
a2367 5
  assert(RetVar.getType() == Types[0]);
  // Create a return variable, if we're not void.
  if (!RetVar.getType().isVoid()) {
    OS << "  " << RetVar.getType().str() << " " << RetVar.getName() << ";";
    emitNewLine();
d2369 3
d2373 12
a2384 5
  if (!Body || Body->getValues().size() == 0) {
    // Nothing specific to output - must output a builtin.
    emitBodyAsBuiltinCall();
    return;
  }
d2386 4
a2389 9
  // We have a list of "things to output". The last should be returned.
  for (auto *I : Body->getValues()) {
    if (StringInit *SI = dyn_cast<StringInit>(I)) {
      Lines.push_back(replaceParamsIn(SI->getAsString()));
    } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
      DagEmitter DE(*this, CallPrefix);
      Lines.push_back(DE.emitDag(DI).second + ";");
    }
  }
d2391 3
a2393 3
  assert(Lines.size() && "Empty def?");
  if (!RetVar.getType().isVoid())
    Lines.back().insert(0, RetVar.getName() + " = ");
d2395 1
a2395 5
  for (auto &L : Lines) {
    OS << "  " << L;
    emitNewLine();
  }
}
d2397 5
a2401 9
void Intrinsic::emitReturn() {
  if (RetVar.getType().isVoid())
    return;
  if (UseMacro)
    OS << "  " << RetVar.getName() << ";";
  else
    OS << "  return " << RetVar.getName() << ";";
  emitNewLine();
}
d2403 2
a2404 44
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
  // At this point we should only be seeing a def.
  DefInit *DefI = cast<DefInit>(DI->getOperator());
  std::string Op = DefI->getAsString();

  if (Op == "cast" || Op == "bitcast")
    return emitDagCast(DI, Op == "bitcast");
  if (Op == "shuffle")
    return emitDagShuffle(DI);
  if (Op == "dup")
    return emitDagDup(DI);
  if (Op == "splat")
    return emitDagSplat(DI);
  if (Op == "save_temp")
    return emitDagSaveTemp(DI);
  if (Op == "op")
    return emitDagOp(DI);
  if (Op == "call")
    return emitDagCall(DI);
  if (Op == "name_replace")
    return emitDagNameReplace(DI);
  if (Op == "literal")
    return emitDagLiteral(DI);
  assert_with_loc(false, "Unknown operation!");
  return std::make_pair(Type::getVoid(), "");
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
  std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  if (DI->getNumArgs() == 2) {
    // Unary op.
    std::pair<Type, std::string> R =
        emitDagArg(DI->getArg(1), DI->getArgName(1));
    return std::make_pair(R.first, Op + R.second);
  } else {
    assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
    std::pair<Type, std::string> R1 =
        emitDagArg(DI->getArg(1), DI->getArgName(1));
    std::pair<Type, std::string> R2 =
        emitDagArg(DI->getArg(2), DI->getArgName(2));
    assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
    return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
  }
}
d2406 7
a2412 55
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
  std::vector<Type> Types;
  std::vector<std::string> Values;
  for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
    std::pair<Type, std::string> R =
        emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1));
    Types.push_back(R.first);
    Values.push_back(R.second);
  }

  // Look up the called intrinsic.
  std::string N;
  if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
    N = SI->getAsUnquotedString();
  else
    N = emitDagArg(DI->getArg(0), "").second;
  Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types);
  assert(Callee && "getIntrinsic should not return us nullptr!");

  // Make sure the callee is known as an early def.
  Callee->setNeededEarly();
  Intr.Dependencies.insert(Callee);

  // Now create the call itself.
  std::string S = CallPrefix.str() + Callee->getMangledName(true) + "(";
  for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
    if (I != 0)
      S += ", ";
    S += Values[I];
  }
  S += ")";

  return std::make_pair(Callee->getReturnType(), S);
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
                                                                bool IsBitCast){
  // (cast MOD* VAL) -> cast VAL to type given by MOD.
  std::pair<Type, std::string> R = emitDagArg(
      DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1));
  Type castToType = R.first;
  for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {

    // MOD can take several forms:
    //   1. $X - take the type of parameter / variable X.
    //   2. The value "R" - take the type of the return type.
    //   3. a type string
    //   4. The value "U" or "S" to switch the signedness.
    //   5. The value "H" or "D" to half or double the bitwidth.
    //   6. The value "8" to convert to 8-bit (signed) integer lanes.
    if (DI->getArgName(ArgIdx).size()) {
      assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) !=
                      Intr.Variables.end(),
                      "Variable not found");
      castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType();
d2414 1
a2414 19
      StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
      assert_with_loc(SI, "Expected string type or $Name for cast type");

      if (SI->getAsUnquotedString() == "R") {
        castToType = Intr.getReturnType();
      } else if (SI->getAsUnquotedString() == "U") {
        castToType.makeUnsigned();
      } else if (SI->getAsUnquotedString() == "S") {
        castToType.makeSigned();
      } else if (SI->getAsUnquotedString() == "H") {
        castToType.halveLanes();
      } else if (SI->getAsUnquotedString() == "D") {
        castToType.doubleLanes();
      } else if (SI->getAsUnquotedString() == "8") {
        castToType.makeInteger(8, true);
      } else {
        castToType = Type::fromTypedefName(SI->getAsUnquotedString());
        assert_with_loc(!castToType.isVoid(), "Unknown typedef");
      }
d2418 1
a2418 9
  std::string S;
  if (IsBitCast) {
    // Emit a reinterpret cast. The second operand must be an lvalue, so create
    // a temporary.
    std::string N = "reint";
    unsigned I = 0;
    while (Intr.Variables.find(N) != Intr.Variables.end())
      N = "reint" + utostr(++I);
    Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
d2420 5
a2424 5
    Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
            << R.second << ";";
    Intr.emitNewLine();

    S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
d2426 1
a2426 2
    // Emit a normal (static) cast.
    S = "(" + castToType.str() + ")(" + R.second + ")";
d2428 1
d2430 20
a2449 2
  return std::make_pair(castToType, S);
}
d2451 7
a2457 11
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
  // See the documentation in arm_neon.td for a description of these operators.
  class LowHalf : public SetTheory::Operator {
  public:
    virtual void anchor() {}
    virtual ~LowHalf() {}
    virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
                       ArrayRef<SMLoc> Loc) {
      SetTheory::RecSet Elts2;
      ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
      Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
a2458 14
  };
  class HighHalf : public SetTheory::Operator {
  public:
    virtual void anchor() {}
    virtual ~HighHalf() {}
    virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
                       ArrayRef<SMLoc> Loc) {
      SetTheory::RecSet Elts2;
      ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
      Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
    }
  };
  class Rev : public SetTheory::Operator {
    unsigned ElementSize;
d2460 9
a2468 17
  public:
    Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
    virtual void anchor() {}
    virtual ~Rev() {}
    virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
                       ArrayRef<SMLoc> Loc) {
      SetTheory::RecSet Elts2;
      ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);

      int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
      VectorSize /= ElementSize;

      std::vector<Record *> Revved;
      for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
        for (int LI = VectorSize - 1; LI >= 0; --LI) {
          Revved.push_back(Elts2[VI + LI]);
        }
d2470 2
d2473 1
a2473 1
      Elts.insert(Revved.begin(), Revved.end());
a2474 28
  };
  class MaskExpander : public SetTheory::Expander {
    unsigned N;

  public:
    MaskExpander(unsigned N) : N(N) {}
    virtual void anchor() {}
    virtual ~MaskExpander() {}
    virtual void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) {
      unsigned Addend = 0;
      if (R->getName() == "mask0")
        Addend = 0;
      else if (R->getName() == "mask1")
        Addend = N;
      else
        return;
      for (unsigned I = 0; I < N; ++I)
        Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
    }
  };

  // (shuffle arg1, arg2, sequence)
  std::pair<Type, std::string> Arg1 =
      emitDagArg(DI->getArg(0), DI->getArgName(0));
  std::pair<Type, std::string> Arg2 =
      emitDagArg(DI->getArg(1), DI->getArgName(1));
  assert_with_loc(Arg1.first == Arg2.first,
                  "Different types in arguments to shuffle!");
d2476 2
a2477 11
  SetTheory ST;
  LowHalf LH;
  HighHalf HH;
  MaskExpander ME(Arg1.first.getNumElements());
  Rev R(Arg1.first.getElementSizeInBits());
  SetTheory::RecSet Elts;
  ST.addOperator("lowhalf", &LH);
  ST.addOperator("highhalf", &HH);
  ST.addOperator("rev", &R);
  ST.addExpander("MaskExpand", &ME);
  ST.evaluate(DI->getArg(2), Elts, ArrayRef<SMLoc>());
d2479 10
a2488 8
  std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
  for (auto &E : Elts) {
    StringRef Name = E->getName();
    assert_with_loc(Name.startswith("sv"),
                    "Incorrect element kind in shuffle mask!");
    S += ", " + Name.drop_front(2).str();
  }
  S += ")";
d2490 3
a2492 12
  // Recalculate the return type - the shuffle may have halved or doubled it.
  Type T(Arg1.first);
  if (Elts.size() > T.getNumElements()) {
    assert_with_loc(
        Elts.size() == T.getNumElements() * 2,
        "Can only double or half the number of elements in a shuffle!");
    T.doubleLanes();
  } else if (Elts.size() < T.getNumElements()) {
    assert_with_loc(
        Elts.size() == T.getNumElements() / 2,
        "Can only double or half the number of elements in a shuffle!");
    T.halveLanes();
d2495 3
a2497 2
  return std::make_pair(T, S);
}
d2499 1
a2499 4
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
  assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
  assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
d2501 5
a2505 7
  Type T = Intr.getBaseType();
  assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
  std::string S = "(" + T.str() + ") {";
  for (unsigned I = 0; I < T.getNumElements(); ++I) {
    if (I != 0)
      S += ", ";
    S += A.second;
d2507 1
a2507 3
  S += "}";

  return std::make_pair(T, S);
d2510 4
a2513 7
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
  assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
  std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1));

  assert_with_loc(B.first.isScalar(),
                  "splat() requires a scalar int as the second argument");
d2515 5
a2519 5
  std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
  for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
    S += ", " + B.second;
  }
  S += ")";
d2521 2
a2522 2
  return std::make_pair(Intr.getBaseType(), S);
}
d2524 2
a2525 3
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
  assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1));
d2527 3
a2529 2
  assert_with_loc(!A.first.isVoid(),
                  "Argument to save_temp() must have non-void type!");
d2531 3
a2533 2
  std::string N = DI->getArgName(0);
  assert_with_loc(N.size(), "save_temp() expects a name as the first argument");
d2535 13
a2547 3
  assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
                  "Variable already defined!");
  Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
d2549 24
a2572 2
  std::string S =
      A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
d2574 10
a2583 1
  return std::make_pair(Type::getVoid(), S);
d2586 39
a2624 3
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
  std::string S = Intr.Name;
d2626 2
a2627 3
  assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
  std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
d2629 3
a2631 1
  size_t Idx = S.find(ToReplace);
d2633 1
a2633 2
  assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
  S.replace(Idx, ToReplace.size(), ReplaceWith);
d2635 3
a2637 2
  return std::make_pair(Type::getVoid(), S);
}
d2639 3
a2641 5
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
  std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  return std::make_pair(Type::fromTypedefName(Ty), Value);
}
d2643 10
a2652 10
std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
  if (ArgName.size()) {
    assert_with_loc(!Arg->isComplete(),
                    "Arguments must either be DAGs or names, not both!");
    assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
                    "Variable not defined!");
    Variable &V = Intr.Variables[ArgName];
    return std::make_pair(V.getType(), V.getName());
  }
d2654 5
a2658 3
  assert(Arg && "Neither ArgName nor Arg?!");
  DagInit *DI = dyn_cast<DagInit>(Arg);
  assert_with_loc(DI, "Arguments must either be DAGs or names!");
d2660 21
a2680 2
  return emitDag(DI);
}
d2682 4
a2685 4
std::string Intrinsic::generate() {
  // Little endian intrinsics are simple and don't require any argument
  // swapping.
  OS << "#ifdef __LITTLE_ENDIAN__\n";
d2687 4
a2690 1
  generateImpl(false, "", "");
d2692 2
a2693 1
  OS << "#else\n";
a2694 13
  // Big endian intrinsics are more complex. The user intended these
  // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
  // but we load as-if (V)LD1. So we should swap all arguments and
  // swap the return value too.
  //
  // If we call sub-intrinsics, we should call a version that does
  // not re-swap the arguments!
  generateImpl(true, "", "__noswap_");

  // If we're needed early, create a non-swapping variant for
  // big-endian.
  if (NeededEarly) {
    generateImpl(false, "__noswap_", "__noswap_");
d2696 4
a2699 1
  OS << "#endif\n\n";
d2701 20
a2720 2
  return OS.str();
}
d2722 14
a2735 3
void Intrinsic::generateImpl(bool ReverseArguments,
                             StringRef NamePrefix, StringRef CallPrefix) {
  CurrentRecord = R;
d2737 1
a2737 10
  // If we call a macro, our local variables may be corrupted due to
  // lack of proper lexical scoping. So, add a globally unique postfix
  // to every variable.
  //
  // indexBody() should have set up the Dependencies set by now.
  for (auto *I : Dependencies)
    if (I->UseMacro) {
      VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
      break;
    }
d2739 1
a2739 1
  initVariables();
d2741 25
a2765 1
  emitPrototype(NamePrefix);
d2767 1
a2767 12
  if (IsUnavailable) {
    OS << " __attribute__((unavailable));";
  } else {
    emitOpeningBrace();
    emitShadowedArgs();
    if (ReverseArguments)
      emitArgumentReversal();
    emitBody(CallPrefix);
    if (ReverseArguments)
      emitReturnReversal();
    emitReturn();
    emitClosingBrace();
a2768 1
  OS << "\n";
d2770 2
a2771 2
  CurrentRecord = nullptr;
}
d2773 2
a2774 8
void Intrinsic::indexBody() {
  CurrentRecord = R;

  initVariables();
  emitBody("");
  OS.str("");

  CurrentRecord = nullptr;
d2777 3
a2779 32
//===----------------------------------------------------------------------===//
// NeonEmitter implementation
//===----------------------------------------------------------------------===//

Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
  // First, look up the name in the intrinsic map.
  assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
                  ("Intrinsic '" + Name + "' not found!").str());
  std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()];
  std::vector<Intrinsic *> GoodVec;

  // Create a string to print if we end up failing.
  std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
  for (unsigned I = 0; I < Types.size(); ++I) {
    if (I != 0)
      ErrMsg += ", ";
    ErrMsg += Types[I].str();
  }
  ErrMsg += ")'\n";
  ErrMsg += "Available overloads:\n";

  // Now, look through each intrinsic implementation and see if the types are
  // compatible.
  for (auto *I : V) {
    ErrMsg += "  - " + I->getReturnType().str() + " " + I->getMangledName();
    ErrMsg += "(";
    for (unsigned A = 0; A < I->getNumParams(); ++A) {
      if (A != 0)
        ErrMsg += ", ";
      ErrMsg += I->getParamType(A).str();
    }
    ErrMsg += ")\n";
d2781 6
a2786 2
    if (I->getNumParams() != Types.size())
      continue;
d2788 2
a2789 9
    bool Good = true;
    for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
      if (I->getParamType(Arg) != Types[Arg]) {
        Good = false;
        break;
      }
    }
    if (Good)
      GoodVec.push_back(I);
d2792 1
a2792 5
  assert_with_loc(GoodVec.size() > 0,
                  "No compatible intrinsic found - " + ErrMsg);
  assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);

  return GoodVec.front();
d2795 5
a2799 3
void NeonEmitter::createIntrinsic(Record *R,
                                  SmallVectorImpl<Intrinsic *> &Out) {
  std::string Name = R->getValueAsString("Name");
a2801 5
  Record *OperationRec = R->getValueAsDef("Operation");
  bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
  bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
  std::string Guard = R->getValueAsString("ArchGuard");
  bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
d2803 2
a2804 3
  // Set the global current record. This allows assert_with_loc to produce
  // decent location information even when highly nested.
  CurrentRecord = R;
d2806 1
a2806 1
  ListInit *Body = OperationRec->getValueAsListInit("Ops");
d2808 1
a2808 3
  std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);

  ClassKind CK = ClassNone;
d2810 18
a2827 10
    CK = ClassMap[R->getSuperClasses()[1]];

  std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
  for (auto TS : TypeSpecs) {
    if (CartesianProductOfTypes) {
      Type DefaultT(TS, 'd');
      for (auto SrcTS : TypeSpecs) {
        Type DefaultSrcT(SrcTS, 'd');
        if (TS == SrcTS ||
            DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
d2829 2
a2830 1
        NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
d2833 9
a2841 1
      NewTypeSpecs.push_back(std::make_pair(TS, TS));
d2844 8
d2853 18
a2870 2
  std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
  std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end());
d2872 5
a2876 3
  for (auto &I : NewTypeSpecs) {
    Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body,
                                  *this, Guard, IsUnavailable, BigEndianSafe);
d2878 16
a2893 2
    IntrinsicMap[Name].push_back(IT);
    Out.push_back(IT);
d2895 1
d2897 6
a2902 2
  CurrentRecord = nullptr;
}
d2904 2
a2905 5
/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
/// declaration of builtins, checking for unique builtin declarations.
void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs) {
  OS << "#ifdef GET_NEON_BUILTINS\n";
d2907 2
a2908 3
  // We only want to emit a builtin once, and we want to emit them in
  // alphabetical order, so use a std::set.
  std::set<std::string> Builtins;
d2910 2
a2911 2
  for (auto *Def : Defs) {
    if (Def->hasBody())
d2913 6
d2921 7
a2927 1
    if (Def->hasSplat())
d2930 67
a2996 1
    std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
d2998 2
a2999 2
    S += Def->getBuiltinTypeStr();
    S += "\", \"n\")";
d3001 33
a3033 1
    Builtins.insert(S);
a3034 3

  for (auto &S : Builtins)
    OS << S << "\n";
d3038 5
d3045 5
a3049 2
void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
                                           SmallVectorImpl<Intrinsic *> &Defs) {
a3054 6
  struct OverloadInfo {
    uint64_t Mask;
    int PtrArgNum;
    bool HasConstPtr;
    OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
  };
d3056 1
d3058 4
a3061 4
  for (auto *Def : Defs) {
    // If the def has a body (that is, it has Operation DAGs), it won't call
    // __builtin_neon_* so we don't need to generate a definition for it.
    if (Def->hasBody())
d3063 6
d3071 1
a3071 1
    if (Def->hasSplat())
d3073 1
d3076 1
a3076 1
    if (Def->protoHasScalar())
d3079 5
a3083 7
    uint64_t Mask = 0ULL;
    Type Ty = Def->getReturnType();
    if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' ||
        Def->getProto()[0] == 'F')
      Ty = Def->getParamType(0);
    if (Ty.isPointer())
      Ty = Def->getParamType(1);
d3085 15
a3099 1
    Mask |= 1ULL << Ty.getNeonEnum();
d3101 1
a3101 2
    // Check if the function has a pointer or const pointer argument.
    std::string Proto = Def->getProto();
d3104 2
a3105 2
    for (unsigned I = 0; I < Def->getNumParams(); ++I) {
      char ArgType = Proto[I + 1];
d3108 1
a3108 1
        PtrArgNum = I;
d3112 1
a3112 1
        PtrArgNum = I;
d3117 1
a3117 1
    if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
a3119 1
    std::string Name = Def->getName();
d3126 1
a3126 1
    if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
d3131 21
a3151 7
    if (Mask) {
      std::string Name = Def->getMangledName();
      OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
      OverloadInfo &OI = OverloadMap[Name];
      OI.Mask |= Mask;
      OI.PtrArgNum |= PtrArgNum;
      OI.HasConstPtr = HasConstPtr;
d3155 8
a3162 8
  for (auto &I : OverloadMap) {
    OverloadInfo &OI = I.second;

    OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
    OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL";
    if (OI.PtrArgNum >= 0)
      OS << "; PtrArgNum = " << OI.PtrArgNum;
    if (OI.HasConstPtr)
d3166 1
d3170 14
a3183 4
void
NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
                                        SmallVectorImpl<Intrinsic *> &Defs) {
  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
d3185 3
a3187 1
  std::set<std::string> Emitted;
a3188 3
  for (auto *Def : Defs) {
    if (Def->hasBody())
      continue;
d3191 1
a3191 7
    if (Def->hasSplat())
      continue;
    // Functions which do not have an immediate do not need to have range
    // checking code emitted.
    if (!Def->hasImmediate())
      continue;
    if (Emitted.find(Def->getMangledName()) != Emitted.end())
d3194 20
a3213 1
    std::string LowerBound, UpperBound;
d3215 2
a3216 51
    Record *R = Def->getRecord();
    if (R->getValueAsBit("isVCVT_N")) {
      // VCVT between floating- and fixed-point values takes an immediate
      // in the range [1, 32) for f32 or [1, 64) for f64.
      LowerBound = "1";
      if (Def->getBaseType().getElementSizeInBits() == 32)
        UpperBound = "31";
      else
        UpperBound = "63";
    } else if (R->getValueAsBit("isScalarShift")) {
      // Right shifts have an 'r' in the name, left shifts do not. Convert
      // instructions have the same bounds and right shifts.
      if (Def->getName().find('r') != std::string::npos ||
          Def->getName().find("cvt") != std::string::npos)
        LowerBound = "1";

      UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
    } else if (R->getValueAsBit("isShift")) {
      // Builtins which are overloaded by type will need to have their upper
      // bound computed at Sema time based on the type constant.

      // Right shifts have an 'r' in the name, left shifts do not.
      if (Def->getName().find('r') != std::string::npos)
        LowerBound = "1";
      UpperBound = "RFT(TV, true)";
    } else if (Def->getClassKind(true) == ClassB) {
      // ClassB intrinsics have a type (and hence lane number) that is only
      // known at runtime.
      if (R->getValueAsBit("isLaneQ"))
        UpperBound = "RFT(TV, false, true)";
      else
        UpperBound = "RFT(TV, false, false)";
    } else {
      // The immediate generally refers to a lane in the preceding argument.
      assert(Def->getImmediateIdx() > 0);
      Type T = Def->getParamType(Def->getImmediateIdx() - 1);
      UpperBound = utostr(T.getNumElements() - 1);
    }

    // Calculate the index of the immediate that should be range checked.
    unsigned Idx = Def->getNumParams();
    if (Def->hasImmediate())
      Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());

    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
       << "i = " << Idx << ";";
    if (LowerBound.size())
      OS << " l = " << LowerBound << ";";
    if (UpperBound.size())
      OS << " u = " << UpperBound << ";";
    OS << " break;\n";
d3218 4
a3221 2
    Emitted.insert(Def->getMangledName());
  }
a3232 4
  SmallVector<Intrinsic *, 128> Defs;
  for (auto *R : RV)
    createIntrinsic(R, Defs);

d3234 1
a3234 1
  genBuiltinsDef(OS, Defs);
d3237 1
a3237 1
  genOverloadTypeCheckCode(OS, Defs);
d3240 1
a3240 1
  genIntrinsicRangeCheckCode(OS, Defs);
d3243 42
a3284 93
/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::run(raw_ostream &OS) {
  OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
        "------------------------------"
        "---===\n"
        " *\n"
        " * Permission is hereby granted, free of charge, to any person "
        "obtaining "
        "a copy\n"
        " * of this software and associated documentation files (the "
        "\"Software\"),"
        " to deal\n"
        " * in the Software without restriction, including without limitation "
        "the "
        "rights\n"
        " * to use, copy, modify, merge, publish, distribute, sublicense, "
        "and/or sell\n"
        " * copies of the Software, and to permit persons to whom the Software "
        "is\n"
        " * furnished to do so, subject to the following conditions:\n"
        " *\n"
        " * The above copyright notice and this permission notice shall be "
        "included in\n"
        " * all copies or substantial portions of the Software.\n"
        " *\n"
        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
        "EXPRESS OR\n"
        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
        "MERCHANTABILITY,\n"
        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
        "SHALL THE\n"
        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
        "OTHER\n"
        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
        "ARISING FROM,\n"
        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
        "DEALINGS IN\n"
        " * THE SOFTWARE.\n"
        " *\n"
        " *===-----------------------------------------------------------------"
        "---"
        "---===\n"
        " */\n\n";

  OS << "#ifndef __ARM_NEON_H\n";
  OS << "#define __ARM_NEON_H\n\n";

  OS << "#if !defined(__ARM_NEON)\n";
  OS << "#error \"NEON support not enabled\"\n";
  OS << "#endif\n\n";

  OS << "#include <stdint.h>\n\n";

  // Emit NEON-specific scalar typedefs.
  OS << "typedef float float32_t;\n";
  OS << "typedef __fp16 float16_t;\n";

  OS << "#ifdef __aarch64__\n";
  OS << "typedef double float64_t;\n";
  OS << "#endif\n\n";

  // For now, signedness of polynomial types depends on target
  OS << "#ifdef __aarch64__\n";
  OS << "typedef uint8_t poly8_t;\n";
  OS << "typedef uint16_t poly16_t;\n";
  OS << "typedef uint64_t poly64_t;\n";
  OS << "typedef __uint128_t poly128_t;\n";
  OS << "#else\n";
  OS << "typedef int8_t poly8_t;\n";
  OS << "typedef int16_t poly16_t;\n";
  OS << "#endif\n";

  // Emit Neon vector typedefs.
  std::string TypedefTypes(
      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
  std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);

  // Emit vector typedefs.
  bool InIfdef = false;
  for (auto &TS : TDTypeVec) {
    bool IsA64 = false;
    Type T(TS, 'd');
    if (T.isDouble() || (T.isPoly() && T.isLong()))
      IsA64 = true;

    if (InIfdef && !IsA64) {
      OS << "#endif\n";
      InIfdef = false;
    }
    if (!InIfdef && IsA64) {
      OS << "#ifdef __aarch64__\n";
      InIfdef = true;
a3285 11

    if (T.isPoly())
      OS << "typedef __attribute__((neon_polyvector_type(";
    else
      OS << "typedef __attribute__((neon_vector_type(";

    Type T2 = T;
    T2.makeScalar();
    OS << utostr(T.getNumElements()) << "))) ";
    OS << T2.str();
    OS << " " << T.str() << ";\n";
a3286 3
  if (InIfdef)
    OS << "#endif\n";
  OS << "\n";
d3288 32
a3319 26
  // Emit struct typedefs.
  InIfdef = false;
  for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
    for (auto &TS : TDTypeVec) {
      bool IsA64 = false;
      Type T(TS, 'd');
      if (T.isDouble() || (T.isPoly() && T.isLong()))
        IsA64 = true;

      if (InIfdef && !IsA64) {
        OS << "#endif\n";
        InIfdef = false;
      }
      if (!InIfdef && IsA64) {
        OS << "#ifdef __aarch64__\n";
        InIfdef = true;
      }

      char M = '2' + (NumMembers - 2);
      Type VT(TS, M);
      OS << "typedef struct " << VT.str() << " {\n";
      OS << "  " << T.str() << " val";
      OS << "[" << utostr(NumMembers) << "]";
      OS << ";\n} ";
      OS << VT.str() << ";\n";
      OS << "\n";
d3321 2
d3324 3
a3326 3
  if (InIfdef)
    OS << "#endif\n";
  OS << "\n";
d3328 6
a3333 2
  OS << "#define __ai static inline __attribute__((__always_inline__, "
        "__nodebug__))\n\n";
a3334 1
  SmallVector<Intrinsic *, 128> Defs;
d3336 53
a3388 37
  for (auto *R : RV)
    createIntrinsic(R, Defs);

  for (auto *I : Defs)
    I->indexBody();

  std::stable_sort(
      Defs.begin(), Defs.end(),
      [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });

  // Only emit a def when its requirements have been met.
  // FIXME: This loop could be made faster, but it's fast enough for now.
  bool MadeProgress = true;
  std::string InGuard = "";
  while (!Defs.empty() && MadeProgress) {
    MadeProgress = false;

    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
         I != Defs.end(); /*No step*/) {
      bool DependenciesSatisfied = true;
      for (auto *II : (*I)->getDependencies()) {
        if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
          DependenciesSatisfied = false;
      }
      if (!DependenciesSatisfied) {
        // Try the next one.
        ++I;
        continue;
      }

      // Emit #endif/#if pair if needed.
      if ((*I)->getGuard() != InGuard) {
        if (!InGuard.empty())
          OS << "#endif\n";
        InGuard = (*I)->getGuard();
        if (!InGuard.empty())
          OS << "#if " << InGuard << "\n";
a3389 6

      // Actually generate the intrinsic code.
      OS << (*I)->generate();

      MadeProgress = true;
      I = Defs.erase(I);
d3392 2
a3393 2
  assert(Defs.empty() && "Some requirements were not satisfied!");
  if (!InGuard.empty())
d3395 17
d3413 1
a3413 3
  OS << "\n";
  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_NEON_H */\n";
d3424 1
a3424 1
  llvm_unreachable("Neon test generation no longer implemented!");
@


1.1.1.7.2.1
log
@Update LLVM to 3.6.1, requested by joerg in ticket 824.
@
text
@d37 1
a37 2
#include <algorithm>
#include <map>
a38 1
#include <string>
d40 2
d1649 1
a1649 1
  ST.evaluate(DI->getArg(2), Elts, None);
@


1.1.1.8
log
@Import Clang 3.6RC1 r227398.
@
text
@d37 1
a37 2
#include <algorithm>
#include <map>
a38 1
#include <string>
d40 2
d1649 1
a1649 1
  ST.evaluate(DI->getArg(2), Elts, None);
@


1.1.1.9
log
@Import Clang 3.8.0rc3 r261930.
@
text
@a27 1
#include "llvm/ADT/STLExtras.h"
a37 1
#include <deque>
d133 1
a133 1
  bool Float, Signed, Immediate, Void, Poly, Constant, Pointer;
d142 3
a144 3
      : Float(false), Signed(false), Immediate(false), Void(true), Poly(false),
        Constant(false), Pointer(false), ScalarForMangling(false),
        NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
d147 2
a148 2
      : TS(TS), Float(false), Signed(false), Immediate(false), Void(false),
        Poly(false), Constant(false), Pointer(false), ScalarForMangling(false),
a168 1
  bool isImmediate() const { return Immediate; }
a193 8
    Immediate = false;
    ElementBitwidth = ElemWidth;
  }
  void makeImmediate(unsigned ElemWidth) {
    Float = false;
    Poly = false;
    Signed = true;
    Immediate = true;
d339 1
a339 1
    Types.emplace_back(OutTS, Proto[0]);
d341 1
a341 1
      Types.emplace_back(InTS, Proto[I]);
d386 1
a386 1
  bool protoHasScalar() const;
d424 1
a424 1
  std::string getMangledName(bool ForceClassS = false) const;
d426 1
a426 1
  std::string getInstTypeCode(Type T, ClassKind CK) const;
d437 1
a437 1
  std::string mangleName(std::string Name, ClassKind CK) const;
d487 1
a487 1
  std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;
d500 1
a500 1
  Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types);
a601 6
  // Constant indices are "int", but have the "constant expression" modifier.
  if (isImmediate()) {
    assert(isInteger() && isSigned());
    S = "I" + S;
  }

d823 4
a854 1
    Immediate = true;
a861 1
    Immediate = true;
d939 1
a939 1
std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
a976 4
static bool isFloatingPointProtoModifier(char Mod) {
  return Mod == 'F' || Mod == 'f';
}

d998 1
a998 1
    bool ForcedVectorFloatingType = isFloatingPointProtoModifier(Proto[0]);
d1011 1
a1011 1
    bool ForcedFloatingType = isFloatingPointProtoModifier(Proto[I + 1]);
d1021 1
d1023 1
a1023 1
      T.makeImmediate(32);
d1035 1
a1035 1
std::string Intrinsic::getMangledName(bool ForceClassS) const {
d1046 1
a1046 1
std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
d1050 2
a1051 2
  if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
      Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32")
d1264 1
a1264 1
bool Intrinsic::protoHasScalar() const {
d1349 1
a1349 1
    if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0]))
d1396 1
a1396 1
  assert(!Lines.empty() && "Empty def?");
d1477 2
a1478 1
  Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types);
d1481 2
a1482 2
  Callee.setNeededEarly();
  Intr.Dependencies.insert(&Callee);
d1485 1
a1485 1
  std::string S = CallPrefix.str() + Callee.getMangledName(true) + "(";
d1493 1
a1493 1
  return std::make_pair(Callee.getReturnType(), S);
d1566 4
a1569 2
    void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
               ArrayRef<SMLoc> Loc) override {
d1577 4
a1580 2
    void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
               ArrayRef<SMLoc> Loc) override {
d1591 4
a1594 2
    void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
               ArrayRef<SMLoc> Loc) override {
d1616 3
a1618 1
    void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
d1640 4
d1645 4
a1648 6
  ST.addOperator("lowhalf", llvm::make_unique<LowHalf>());
  ST.addOperator("highhalf", llvm::make_unique<HighHalf>());
  ST.addOperator("rev",
                 llvm::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
  ST.addExpander("MaskExpand",
                 llvm::make_unique<MaskExpander>(Arg1.first.getNumElements()));
d1851 1
a1851 1
Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
d1855 1
a1855 1
  auto &V = IntrinsicMap.find(Name.str())->second;
d1870 2
a1871 2
  for (auto &I : V) {
    ErrMsg += "  - " + I.getReturnType().str() + " " + I.getMangledName();
d1873 1
a1873 1
    for (unsigned A = 0; A < I.getNumParams(); ++A) {
d1876 1
a1876 1
      ErrMsg += I.getParamType(A).str();
d1880 1
a1880 1
    if (I.getNumParams() != Types.size())
d1885 1
a1885 1
      if (I.getParamType(Arg) != Types[Arg]) {
d1891 1
a1891 1
      GoodVec.push_back(&I);
d1898 1
a1898 1
  return *GoodVec.front();
d1941 1
a1941 3
  NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
		     NewTypeSpecs.end());
  auto &Entry = IntrinsicMap[Name];
d1944 5
a1948 3
    Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
                       Guard, IsUnavailable, BigEndianSafe);
    Out.push_back(&Entry.back());
d2018 2
a2019 2
    if (Def->getProto()[0] == 'v' ||
        isFloatingPointProtoModifier(Def->getProto()[0]))
@


1.1.1.9.2.1
log
@Sync with HEAD
@
text
@a26 1
#include "llvm/ADT/ArrayRef.h"
d28 2
a29 1
#include "llvm/ADT/None.h"
a30 1
#include "llvm/ADT/STLExtras.h"
d32 1
a32 2
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
a33 1
#include "llvm/Support/raw_ostream.h"
d37 1
a38 4
#include <cassert>
#include <cctype>
#include <cstddef>
#include <cstdint>
a40 1
#include <set>
a42 1
#include <utility>
a43 1

a77 1

d93 1
d95 1
a95 2
} // end namespace NeonTypeFlags

d97 2
d149 3
a151 4
      : TS(std::move(TS)), Float(false), Signed(false), Immediate(false),
        Void(false), Poly(false), Constant(false), Pointer(false),
        ScalarForMangling(false), NoManglingQ(false), Bitwidth(0),
        ElementBitwidth(0), NumVectors(0) {
a192 1

a199 1

a206 1

a210 1

a214 1

a218 1

d260 1
a260 1
  Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
a369 1

a379 1

d415 1
a415 1
  bool hasBody() const { return Body && !Body->getValues().empty(); }
d488 1
d1072 1
a1072 1
  if (!typeCode.empty()) {
d1198 1
a1198 1
      OS << "  " << Dest.getName() << ".val[" << K << "] = "
d1200 2
a1201 2
         << Src.getName() << ".val[" << K << "], "
         << Src.getName() << ".val[" << K << "]";
d1203 1
a1203 1
        OS << ", " << J;
d1211 1
a1211 1
      OS << ", " << J;
d1247 1
d1398 1
a1398 1
  if (!Body || Body->getValues().empty()) {
d1466 1
a1466 1
        emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
d1471 1
a1471 1
        emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
d1473 1
a1473 1
        emitDagArg(DI->getArg(2), DI->getArgNameStr(2));
d1484 1
a1484 1
        emitDagArg(DI->getArg(I + 1), DI->getArgNameStr(I + 1));
d1517 1
a1517 2
      DI->getArg(DI->getNumArgs() - 1),
      DI->getArgNameStr(DI->getNumArgs() - 1));
d1528 2
a1529 2
    if (!DI->getArgNameStr(ArgIdx).empty()) {
      assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) !=
d1532 1
a1532 1
      castToType = Intr.Variables[DI->getArgNameStr(ArgIdx)].getType();
a1589 1

a1598 1

a1603 1

a1621 1

a1626 1

d1642 1
a1642 1
      emitDagArg(DI->getArg(0), DI->getArgNameStr(0));
d1644 1
a1644 1
      emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
d1686 1
a1686 2
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
                                              DI->getArgNameStr(0));
d1704 2
a1705 4
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
                                              DI->getArgNameStr(0));
  std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
                                              DI->getArgNameStr(1));
d1721 1
a1721 2
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(1),
                                              DI->getArgNameStr(1));
d1726 2
a1727 3
  std::string N = DI->getArgNameStr(0);
  assert_with_loc(!N.empty(),
                  "save_temp() expects a name as the first argument");
d1763 1
a1763 1
  if (!ArgName.empty()) {
d1901 1
a1901 1
  assert_with_loc(!GoodVec.empty(),
d1929 1
a1929 1
    CK = ClassMap[R->getSuperClasses()[1].first];
d2158 1
a2158 1
    if (!LowerBound.empty())
d2160 1
a2160 1
    if (!UpperBound.empty())
d2351 1
a2351 1
  std::string InGuard;
a2393 1

a2396 1

a2399 1

d2403 1
a2403 2

} // end namespace clang
@


1.1.1.10
log
@Import Clang pre-4.0.0 r291444.
@
text
@a26 1
#include "llvm/ADT/ArrayRef.h"
d28 2
a29 1
#include "llvm/ADT/None.h"
a30 1
#include "llvm/ADT/STLExtras.h"
d32 1
a32 2
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
a33 1
#include "llvm/Support/raw_ostream.h"
d37 1
a38 4
#include <cassert>
#include <cctype>
#include <cstddef>
#include <cstdint>
a40 1
#include <set>
a42 1
#include <utility>
a43 1

a77 1

d93 1
d95 1
a95 2
} // end namespace NeonTypeFlags

d97 2
d149 3
a151 4
      : TS(std::move(TS)), Float(false), Signed(false), Immediate(false),
        Void(false), Poly(false), Constant(false), Pointer(false),
        ScalarForMangling(false), NoManglingQ(false), Bitwidth(0),
        ElementBitwidth(0), NumVectors(0) {
a192 1

a199 1

a206 1

a210 1

a214 1

a218 1

d260 1
a260 1
  Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
a369 1

a379 1

d415 1
a415 1
  bool hasBody() const { return Body && !Body->getValues().empty(); }
d488 1
d1072 1
a1072 1
  if (!typeCode.empty()) {
d1198 1
a1198 1
      OS << "  " << Dest.getName() << ".val[" << K << "] = "
d1200 2
a1201 2
         << Src.getName() << ".val[" << K << "], "
         << Src.getName() << ".val[" << K << "]";
d1203 1
a1203 1
        OS << ", " << J;
d1211 1
a1211 1
      OS << ", " << J;
d1247 1
d1398 1
a1398 1
  if (!Body || Body->getValues().empty()) {
d1466 1
a1466 1
        emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
d1471 1
a1471 1
        emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
d1473 1
a1473 1
        emitDagArg(DI->getArg(2), DI->getArgNameStr(2));
d1484 1
a1484 1
        emitDagArg(DI->getArg(I + 1), DI->getArgNameStr(I + 1));
d1517 1
a1517 2
      DI->getArg(DI->getNumArgs() - 1),
      DI->getArgNameStr(DI->getNumArgs() - 1));
d1528 2
a1529 2
    if (!DI->getArgNameStr(ArgIdx).empty()) {
      assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) !=
d1532 1
a1532 1
      castToType = Intr.Variables[DI->getArgNameStr(ArgIdx)].getType();
a1589 1

a1598 1

a1603 1

a1621 1

a1626 1

d1642 1
a1642 1
      emitDagArg(DI->getArg(0), DI->getArgNameStr(0));
d1644 1
a1644 1
      emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
d1686 1
a1686 2
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
                                              DI->getArgNameStr(0));
d1704 2
a1705 4
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
                                              DI->getArgNameStr(0));
  std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
                                              DI->getArgNameStr(1));
d1721 1
a1721 2
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(1),
                                              DI->getArgNameStr(1));
d1726 2
a1727 3
  std::string N = DI->getArgNameStr(0);
  assert_with_loc(!N.empty(),
                  "save_temp() expects a name as the first argument");
d1763 1
a1763 1
  if (!ArgName.empty()) {
d1901 1
a1901 1
  assert_with_loc(!GoodVec.empty(),
d1929 1
a1929 1
    CK = ClassMap[R->getSuperClasses()[1].first];
d2158 1
a2158 1
    if (!LowerBound.empty())
d2160 1
a2160 1
    if (!UpperBound.empty())
d2351 1
a2351 1
  std::string InGuard;
a2393 1

a2396 1

a2399 1

d2403 1
a2403 2

} // end namespace clang
@


1.1.1.10.14.1
log
@Sync with HEAD
@
text
@d307 1
a307 1
  /// Set if the Unavailable bit is 1. This means we don't generate a body,
a554 3
  // runFP16 - Emit arm_fp16.h.inc
  void runFP16(raw_ostream &o);

a555 1
	// and arm_fp16.h
d776 1
a776 1
      LLVM_FALLTHROUGH;
d782 1
a782 1
      LLVM_FALLTHROUGH;
d788 1
a788 1
      LLVM_FALLTHROUGH;
a854 29
  case 'Y':
    Bitwidth = ElementBitwidth = 16;
    NumVectors = 0;
    Float = true;
    break;
  case 'I':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = false;
    Signed = true;
    break;
  case 'L':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = false;
    Signed = true;
    break;
  case 'U':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = false;
    Signed = false;
    break;
  case 'O':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = false;
    Signed = false;
    break;
a862 4
  case 'H':
    Float = true;
    ElementBitwidth = 16;
    break;
d914 1
a914 1
    LLVM_FALLTHROUGH;
a960 13
  case '7':
    if (AppliedQuad)
      Bitwidth /= 2;
    ElementBitwidth = 8;
    break;
  case '8':
    ElementBitwidth = 8;
    break;
  case '9':
    if (!AppliedQuad)
      Bitwidth *= 2;
    ElementBitwidth = 8;
    break;
d1009 1
a1009 1
  return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
d1973 1
a1973 1
  llvm::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
d2105 1
a2105 1
    OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
d2115 2
a2116 1
void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
d2141 1
a2141 1
      // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
d2143 1
a2143 5
	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
		  Def->getName().find('h') != std::string::npos)
		// VCVTh operating on FP16 intrinsics in range [1, 16)
		UpperBound = "15";
	  else if (Def->getBaseType().getElementSizeInBits() == 32)
d2145 1
a2145 1
	  else
d2319 1
a2319 1
    OS << T.getNumElements() << "))) ";
d2349 1
a2349 1
      OS << "[" << NumMembers << "]";
a2418 109
/// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::runFP16(raw_ostream &OS) {
  OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
        "------------------------------"
        "---===\n"
        " *\n"
        " * Permission is hereby granted, free of charge, to any person "
        "obtaining a copy\n"
        " * of this software and associated documentation files (the "
				"\"Software\"), to deal\n"
        " * in the Software without restriction, including without limitation "
				"the rights\n"
        " * to use, copy, modify, merge, publish, distribute, sublicense, "
				"and/or sell\n"
        " * copies of the Software, and to permit persons to whom the Software "
				"is\n"
        " * furnished to do so, subject to the following conditions:\n"
        " *\n"
        " * The above copyright notice and this permission notice shall be "
        "included in\n"
        " * all copies or substantial portions of the Software.\n"
        " *\n"
        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
        "EXPRESS OR\n"
        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
        "MERCHANTABILITY,\n"
        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
        "SHALL THE\n"
        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
        "OTHER\n"
        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
        "ARISING FROM,\n"
        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
        "DEALINGS IN\n"
        " * THE SOFTWARE.\n"
        " *\n"
        " *===-----------------------------------------------------------------"
        "---"
        "---===\n"
        " */\n\n";

  OS << "#ifndef __ARM_FP16_H\n";
  OS << "#define __ARM_FP16_H\n\n";

  OS << "#include <stdint.h>\n\n";

  OS << "typedef __fp16 float16_t;\n";

  OS << "#define __ai static inline __attribute__((__always_inline__, "
        "__nodebug__))\n\n";

  SmallVector<Intrinsic *, 128> Defs;
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  for (auto *R : RV)
    createIntrinsic(R, Defs);

  for (auto *I : Defs)
    I->indexBody();

  std::stable_sort(
      Defs.begin(), Defs.end(),
      [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });

  // Only emit a def when its requirements have been met.
  // FIXME: This loop could be made faster, but it's fast enough for now.
  bool MadeProgress = true;
  std::string InGuard;
  while (!Defs.empty() && MadeProgress) {
    MadeProgress = false;

    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
         I != Defs.end(); /*No step*/) {
      bool DependenciesSatisfied = true;
      for (auto *II : (*I)->getDependencies()) {
        if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
          DependenciesSatisfied = false;
      }
      if (!DependenciesSatisfied) {
        // Try the next one.
        ++I;
        continue;
      }

      // Emit #endif/#if pair if needed.
      if ((*I)->getGuard() != InGuard) {
        if (!InGuard.empty())
          OS << "#endif\n";
        InGuard = (*I)->getGuard();
        if (!InGuard.empty())
          OS << "#if " << InGuard << "\n";
      }

      // Actually generate the intrinsic code.
      OS << (*I)->generate();

      MadeProgress = true;
      I = Defs.erase(I);
    }
  }
  assert(Defs.empty() && "Some requirements were not satisfied!");
  if (!InGuard.empty())
    OS << "#endif\n";

  OS << "\n";
  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_FP16_H */\n";
}

a2424 4
void EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runFP16(OS);
}

@


1.1.1.10.14.2
log
@Mostly merge changes from HEAD upto 20200411
@
text
@@


1.1.1.10.12.1
log
@Sync with HEAD
@
text
@d307 1
a307 1
  /// Set if the Unavailable bit is 1. This means we don't generate a body,
a554 3
  // runFP16 - Emit arm_fp16.h.inc
  void runFP16(raw_ostream &o);

a555 1
	// and arm_fp16.h
d776 1
a776 1
      LLVM_FALLTHROUGH;
d782 1
a782 1
      LLVM_FALLTHROUGH;
d788 1
a788 1
      LLVM_FALLTHROUGH;
a854 29
  case 'Y':
    Bitwidth = ElementBitwidth = 16;
    NumVectors = 0;
    Float = true;
    break;
  case 'I':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = false;
    Signed = true;
    break;
  case 'L':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = false;
    Signed = true;
    break;
  case 'U':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = false;
    Signed = false;
    break;
  case 'O':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = false;
    Signed = false;
    break;
a862 4
  case 'H':
    Float = true;
    ElementBitwidth = 16;
    break;
d914 1
a914 1
    LLVM_FALLTHROUGH;
a960 13
  case '7':
    if (AppliedQuad)
      Bitwidth /= 2;
    ElementBitwidth = 8;
    break;
  case '8':
    ElementBitwidth = 8;
    break;
  case '9':
    if (!AppliedQuad)
      Bitwidth *= 2;
    ElementBitwidth = 8;
    break;
d1009 1
a1009 1
  return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
d1973 1
a1973 1
  llvm::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
d2105 1
a2105 1
    OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
d2115 2
a2116 1
void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
d2141 1
a2141 1
      // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
d2143 1
a2143 5
	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
		  Def->getName().find('h') != std::string::npos)
		// VCVTh operating on FP16 intrinsics in range [1, 16)
		UpperBound = "15";
	  else if (Def->getBaseType().getElementSizeInBits() == 32)
d2145 1
a2145 1
	  else
d2319 1
a2319 1
    OS << T.getNumElements() << "))) ";
d2349 1
a2349 1
      OS << "[" << NumMembers << "]";
a2418 109
/// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::runFP16(raw_ostream &OS) {
  OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
        "------------------------------"
        "---===\n"
        " *\n"
        " * Permission is hereby granted, free of charge, to any person "
        "obtaining a copy\n"
        " * of this software and associated documentation files (the "
				"\"Software\"), to deal\n"
        " * in the Software without restriction, including without limitation "
				"the rights\n"
        " * to use, copy, modify, merge, publish, distribute, sublicense, "
				"and/or sell\n"
        " * copies of the Software, and to permit persons to whom the Software "
				"is\n"
        " * furnished to do so, subject to the following conditions:\n"
        " *\n"
        " * The above copyright notice and this permission notice shall be "
        "included in\n"
        " * all copies or substantial portions of the Software.\n"
        " *\n"
        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
        "EXPRESS OR\n"
        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
        "MERCHANTABILITY,\n"
        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
        "SHALL THE\n"
        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
        "OTHER\n"
        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
        "ARISING FROM,\n"
        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
        "DEALINGS IN\n"
        " * THE SOFTWARE.\n"
        " *\n"
        " *===-----------------------------------------------------------------"
        "---"
        "---===\n"
        " */\n\n";

  OS << "#ifndef __ARM_FP16_H\n";
  OS << "#define __ARM_FP16_H\n\n";

  OS << "#include <stdint.h>\n\n";

  OS << "typedef __fp16 float16_t;\n";

  OS << "#define __ai static inline __attribute__((__always_inline__, "
        "__nodebug__))\n\n";

  SmallVector<Intrinsic *, 128> Defs;
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  for (auto *R : RV)
    createIntrinsic(R, Defs);

  for (auto *I : Defs)
    I->indexBody();

  std::stable_sort(
      Defs.begin(), Defs.end(),
      [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });

  // Only emit a def when its requirements have been met.
  // FIXME: This loop could be made faster, but it's fast enough for now.
  bool MadeProgress = true;
  std::string InGuard;
  while (!Defs.empty() && MadeProgress) {
    MadeProgress = false;

    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
         I != Defs.end(); /*No step*/) {
      bool DependenciesSatisfied = true;
      for (auto *II : (*I)->getDependencies()) {
        if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
          DependenciesSatisfied = false;
      }
      if (!DependenciesSatisfied) {
        // Try the next one.
        ++I;
        continue;
      }

      // Emit #endif/#if pair if needed.
      if ((*I)->getGuard() != InGuard) {
        if (!InGuard.empty())
          OS << "#endif\n";
        InGuard = (*I)->getGuard();
        if (!InGuard.empty())
          OS << "#if " << InGuard << "\n";
      }

      // Actually generate the intrinsic code.
      OS << (*I)->generate();

      MadeProgress = true;
      I = Defs.erase(I);
    }
  }
  assert(Defs.empty() && "Some requirements were not satisfied!");
  if (!InGuard.empty())
    OS << "#endif\n";

  OS << "\n";
  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_FP16_H */\n";
}

a2424 4
void EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runFP16(OS);
}

@


1.1.1.11
log
@Import clang r337282 from trunk
@
text
@d307 1
a307 1
  /// Set if the Unavailable bit is 1. This means we don't generate a body,
a554 3
  // runFP16 - Emit arm_fp16.h.inc
  void runFP16(raw_ostream &o);

a555 1
	// and arm_fp16.h
d776 1
a776 1
      LLVM_FALLTHROUGH;
d782 1
a782 1
      LLVM_FALLTHROUGH;
d788 1
a788 1
      LLVM_FALLTHROUGH;
a854 29
  case 'Y':
    Bitwidth = ElementBitwidth = 16;
    NumVectors = 0;
    Float = true;
    break;
  case 'I':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = false;
    Signed = true;
    break;
  case 'L':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = false;
    Signed = true;
    break;
  case 'U':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = false;
    Signed = false;
    break;
  case 'O':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = false;
    Signed = false;
    break;
a862 4
  case 'H':
    Float = true;
    ElementBitwidth = 16;
    break;
d914 1
a914 1
    LLVM_FALLTHROUGH;
a960 13
  case '7':
    if (AppliedQuad)
      Bitwidth /= 2;
    ElementBitwidth = 8;
    break;
  case '8':
    ElementBitwidth = 8;
    break;
  case '9':
    if (!AppliedQuad)
      Bitwidth *= 2;
    ElementBitwidth = 8;
    break;
d1009 1
a1009 1
  return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
d1973 1
a1973 1
  llvm::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
d2105 1
a2105 1
    OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
d2115 2
a2116 1
void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
d2141 1
a2141 1
      // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
d2143 1
a2143 5
	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
		  Def->getName().find('h') != std::string::npos)
		// VCVTh operating on FP16 intrinsics in range [1, 16)
		UpperBound = "15";
	  else if (Def->getBaseType().getElementSizeInBits() == 32)
d2145 1
a2145 1
	  else
d2319 1
a2319 1
    OS << T.getNumElements() << "))) ";
d2349 1
a2349 1
      OS << "[" << NumMembers << "]";
a2418 109
/// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::runFP16(raw_ostream &OS) {
  OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
        "------------------------------"
        "---===\n"
        " *\n"
        " * Permission is hereby granted, free of charge, to any person "
        "obtaining a copy\n"
        " * of this software and associated documentation files (the "
				"\"Software\"), to deal\n"
        " * in the Software without restriction, including without limitation "
				"the rights\n"
        " * to use, copy, modify, merge, publish, distribute, sublicense, "
				"and/or sell\n"
        " * copies of the Software, and to permit persons to whom the Software "
				"is\n"
        " * furnished to do so, subject to the following conditions:\n"
        " *\n"
        " * The above copyright notice and this permission notice shall be "
        "included in\n"
        " * all copies or substantial portions of the Software.\n"
        " *\n"
        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
        "EXPRESS OR\n"
        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
        "MERCHANTABILITY,\n"
        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
        "SHALL THE\n"
        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
        "OTHER\n"
        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
        "ARISING FROM,\n"
        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
        "DEALINGS IN\n"
        " * THE SOFTWARE.\n"
        " *\n"
        " *===-----------------------------------------------------------------"
        "---"
        "---===\n"
        " */\n\n";

  OS << "#ifndef __ARM_FP16_H\n";
  OS << "#define __ARM_FP16_H\n\n";

  OS << "#include <stdint.h>\n\n";

  OS << "typedef __fp16 float16_t;\n";

  OS << "#define __ai static inline __attribute__((__always_inline__, "
        "__nodebug__))\n\n";

  SmallVector<Intrinsic *, 128> Defs;
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  for (auto *R : RV)
    createIntrinsic(R, Defs);

  for (auto *I : Defs)
    I->indexBody();

  std::stable_sort(
      Defs.begin(), Defs.end(),
      [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });

  // Only emit a def when its requirements have been met.
  // FIXME: This loop could be made faster, but it's fast enough for now.
  bool MadeProgress = true;
  std::string InGuard;
  while (!Defs.empty() && MadeProgress) {
    MadeProgress = false;

    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
         I != Defs.end(); /*No step*/) {
      bool DependenciesSatisfied = true;
      for (auto *II : (*I)->getDependencies()) {
        if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
          DependenciesSatisfied = false;
      }
      if (!DependenciesSatisfied) {
        // Try the next one.
        ++I;
        continue;
      }

      // Emit #endif/#if pair if needed.
      if ((*I)->getGuard() != InGuard) {
        if (!InGuard.empty())
          OS << "#endif\n";
        InGuard = (*I)->getGuard();
        if (!InGuard.empty())
          OS << "#if " << InGuard << "\n";
      }

      // Actually generate the intrinsic code.
      OS << (*I)->generate();

      MadeProgress = true;
      I = Defs.erase(I);
    }
  }
  assert(Defs.empty() && "Some requirements were not satisfied!");
  if (!InGuard.empty())
    OS << "#endif\n";

  OS << "\n";
  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_FP16_H */\n";
}

a2424 4
void EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runFP16(OS);
}

@


1.1.1.12
log
@Mark old LLVM instance as dead.
@
text
@@


1.1.1.7.4.1
log
@file NeonEmitter.cpp was added on branch tls-maxphys on 2014-08-19 23:49:29 +0000
@
text
@d1 2396
@


1.1.1.7.4.2
log
@Rebase to HEAD as of a few days ago.
@
text
@a0 2396
//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend is responsible for emitting arm_neon.h, which includes
// a declaration and definition of each function specified by the ARM NEON
// compiler interface.  See ARM document DUI0348B.
//
// Each NEON instruction is implemented in terms of 1 or more functions which
// are suffixed with the element type of the input vectors.  Functions may be
// implemented in terms of generic vector operations such as +, *, -, etc. or
// by calling a __builtin_-prefixed function which will be handled by clang's
// CodeGen library.
//
// Additional validation code can be generated by this file when runHeader() is
// called, rather than the normal run() entry point.
//
// See also the documentation in include/clang/Basic/arm_neon.td.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <string>
#include <sstream>
#include <vector>
#include <map>
#include <algorithm>
using namespace llvm;

namespace {

// While globals are generally bad, this one allows us to perform assertions
// liberally and somehow still trace them back to the def they indirectly
// came from.
static Record *CurrentRecord = nullptr;
static void assert_with_loc(bool Assertion, const std::string &Str) {
  if (!Assertion) {
    if (CurrentRecord)
      PrintFatalError(CurrentRecord->getLoc(), Str);
    else
      PrintFatalError(Str);
  }
}

enum ClassKind {
  ClassNone,
  ClassI,     // generic integer instruction, e.g., "i8" suffix
  ClassS,     // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
  ClassW,     // width-specific instruction, e.g., "8" suffix
  ClassB,     // bitcast arguments with enum argument to specify type
  ClassL,     // Logical instructions which are op instructions
              // but we need to not emit any suffix for in our
              // tests.
  ClassNoTest // Instructions which we do not test since they are
              // not TRUE instructions.
};

/// NeonTypeFlags - Flags to identify the types for overloaded Neon
/// builtins.  These must be kept in sync with the flags in
/// include/clang/Basic/TargetBuiltins.h.
namespace NeonTypeFlags {
enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };

enum EltType {
  Int8,
  Int16,
  Int32,
  Int64,
  Poly8,
  Poly16,
  Poly64,
  Poly128,
  Float16,
  Float32,
  Float64
};
}

class Intrinsic;
class NeonEmitter;
class Type;
class Variable;

//===----------------------------------------------------------------------===//
// TypeSpec
//===----------------------------------------------------------------------===//

/// A TypeSpec is just a simple wrapper around a string, but gets its own type
/// for strong typing purposes.
///
/// A TypeSpec can be used to create a type.
class TypeSpec : public std::string {
public:
  static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
    std::vector<TypeSpec> Ret;
    TypeSpec Acc;
    for (char I : Str.str()) {
      if (islower(I)) {
        Acc.push_back(I);
        Ret.push_back(TypeSpec(Acc));
        Acc.clear();
      } else {
        Acc.push_back(I);
      }
    }
    return Ret;
  }
};

//===----------------------------------------------------------------------===//
// Type
//===----------------------------------------------------------------------===//

/// A Type. Not much more to say here.
class Type {
private:
  TypeSpec TS;

  bool Float, Signed, Void, Poly, Constant, Pointer;
  // ScalarForMangling and NoManglingQ are really not suited to live here as
  // they are not related to the type. But they live in the TypeSpec (not the
  // prototype), so this is really the only place to store them.
  bool ScalarForMangling, NoManglingQ;
  unsigned Bitwidth, ElementBitwidth, NumVectors;

public:
  Type()
      : Float(false), Signed(false), Void(true), Poly(false), Constant(false),
        Pointer(false), ScalarForMangling(false), NoManglingQ(false),
        Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}

  Type(TypeSpec TS, char CharMod)
      : TS(TS), Float(false), Signed(false), Void(false), Poly(false),
        Constant(false), Pointer(false), ScalarForMangling(false),
        NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
    applyModifier(CharMod);
  }

  /// Returns a type representing "void".
  static Type getVoid() { return Type(); }

  bool operator==(const Type &Other) const { return str() == Other.str(); }
  bool operator!=(const Type &Other) const { return !operator==(Other); }

  //
  // Query functions
  //
  bool isScalarForMangling() const { return ScalarForMangling; }
  bool noManglingQ() const { return NoManglingQ; }

  bool isPointer() const { return Pointer; }
  bool isFloating() const { return Float; }
  bool isInteger() const { return !Float && !Poly; }
  bool isSigned() const { return Signed; }
  bool isScalar() const { return NumVectors == 0; }
  bool isVector() const { return NumVectors > 0; }
  bool isFloat() const { return Float && ElementBitwidth == 32; }
  bool isDouble() const { return Float && ElementBitwidth == 64; }
  bool isHalf() const { return Float && ElementBitwidth == 16; }
  bool isPoly() const { return Poly; }
  bool isChar() const { return ElementBitwidth == 8; }
  bool isShort() const { return !Float && ElementBitwidth == 16; }
  bool isInt() const { return !Float && ElementBitwidth == 32; }
  bool isLong() const { return !Float && ElementBitwidth == 64; }
  bool isVoid() const { return Void; }
  unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
  unsigned getSizeInBits() const { return Bitwidth; }
  unsigned getElementSizeInBits() const { return ElementBitwidth; }
  unsigned getNumVectors() const { return NumVectors; }

  //
  // Mutator functions
  //
  void makeUnsigned() { Signed = false; }
  void makeSigned() { Signed = true; }
  void makeInteger(unsigned ElemWidth, bool Sign) {
    Float = false;
    Poly = false;
    Signed = Sign;
    ElementBitwidth = ElemWidth;
  }
  void makeScalar() {
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
  }
  void makeOneVector() {
    assert(isVector());
    NumVectors = 1;
  }
  void doubleLanes() {
    assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
    Bitwidth = 128;
  }
  void halveLanes() {
    assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
    Bitwidth = 64;
  }

  /// Return the C string representation of a type, which is the typename
  /// defined in stdint.h or arm_neon.h.
  std::string str() const;

  /// Return the string representation of a type, which is an encoded
  /// string for passing to the BUILTIN() macro in Builtins.def.
  std::string builtin_str() const;

  /// Return the value in NeonTypeFlags for this type.
  unsigned getNeonEnum() const;

  /// Parse a type from a stdint.h or arm_neon.h typedef name,
  /// for example uint32x2_t or int64_t.
  static Type fromTypedefName(StringRef Name);

private:
  /// Creates the type based on the typespec string in TS.
  /// Sets "Quad" to true if the "Q" or "H" modifiers were
  /// seen. This is needed by applyModifier as some modifiers
  /// only take effect if the type size was changed by "Q" or "H".
  void applyTypespec(bool &Quad);
  /// Applies a prototype modifier to the type.
  void applyModifier(char Mod);
};

//===----------------------------------------------------------------------===//
// Variable
//===----------------------------------------------------------------------===//

/// A variable is a simple class that just has a type and a name.
class Variable {
  Type T;
  std::string N;

public:
  Variable() : T(Type::getVoid()), N("") {}
  Variable(Type T, std::string N) : T(T), N(N) {}

  Type getType() const { return T; }
  std::string getName() const { return "__" + N; }
};

//===----------------------------------------------------------------------===//
// Intrinsic
//===----------------------------------------------------------------------===//

/// The main grunt class. This represents an instantiation of an intrinsic with
/// a particular typespec and prototype.
class Intrinsic {
  friend class DagEmitter;

  /// The Record this intrinsic was created from.
  Record *R;
  /// The unmangled name and prototype.
  std::string Name, Proto;
  /// The input and output typespecs. InTS == OutTS except when
  /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
  TypeSpec OutTS, InTS;
  /// The base class kind. Most intrinsics use ClassS, which has full type
  /// info for integers (s32/u32). Some use ClassI, which doesn't care about
  /// signedness (i32), while some (ClassB) have no type at all, only a width
  /// (32).
  ClassKind CK;
  /// The list of DAGs for the body. May be empty, in which case we should
  /// emit a builtin call.
  ListInit *Body;
  /// The architectural #ifdef guard.
  std::string Guard;
  /// Set if the Unvailable bit is 1. This means we don't generate a body,
  /// just an "unavailable" attribute on a declaration.
  bool IsUnavailable;
  /// Is this intrinsic safe for big-endian? or does it need its arguments
  /// reversing?
  bool BigEndianSafe;

  /// The types of return value [0] and parameters [1..].
  std::vector<Type> Types;
  /// The local variables defined.
  std::map<std::string, Variable> Variables;
  /// NeededEarly - set if any other intrinsic depends on this intrinsic.
  bool NeededEarly;
  /// UseMacro - set if we should implement using a macro or unset for a
  ///            function.
  bool UseMacro;
  /// The set of intrinsics that this intrinsic uses/requires.
  std::set<Intrinsic *> Dependencies;
  /// The "base type", which is Type('d', OutTS). InBaseType is only
  /// different if CartesianProductOfTypes = 1 (for vreinterpret).
  Type BaseType, InBaseType;
  /// The return variable.
  Variable RetVar;
  /// A postfix to apply to every variable. Defaults to "".
  std::string VariablePostfix;

  NeonEmitter &Emitter;
  std::stringstream OS;

public:
  Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
            StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
      : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
        CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
        BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
        BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
    // If this builtin takes an immediate argument, we need to #define it rather
    // than use a standard declaration, so that SemaChecking can range check
    // the immediate passed by the user.
    if (Proto.find('i') != std::string::npos)
      UseMacro = true;

    // Pointer arguments need to use macros to avoid hiding aligned attributes
    // from the pointer type.
    if (Proto.find('p') != std::string::npos ||
        Proto.find('c') != std::string::npos)
      UseMacro = true;

    // It is not permitted to pass or return an __fp16 by value, so intrinsics
    // taking a scalar float16_t must be implemented as macros.
    if (OutTS.find('h') != std::string::npos &&
        Proto.find('s') != std::string::npos)
      UseMacro = true;

    // Modify the TypeSpec per-argument to get a concrete Type, and create
    // known variables for each.
    // Types[0] is the return value.
    Types.push_back(Type(OutTS, Proto[0]));
    for (unsigned I = 1; I < Proto.size(); ++I)
      Types.push_back(Type(InTS, Proto[I]));
  }

  /// Get the Record that this intrinsic is based off.
  Record *getRecord() const { return R; }
  /// Get the set of Intrinsics that this intrinsic calls.
  /// this is the set of immediate dependencies, NOT the
  /// transitive closure.
  const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
  /// Get the architectural guard string (#ifdef).
  std::string getGuard() const { return Guard; }
  /// Get the non-mangled name.
  std::string getName() const { return Name; }

  /// Return true if the intrinsic takes an immediate operand.
  bool hasImmediate() const {
    return Proto.find('i') != std::string::npos;
  }
  /// Return the parameter index of the immediate operand.
  unsigned getImmediateIdx() const {
    assert(hasImmediate());
    unsigned Idx = Proto.find('i');
    assert(Idx > 0 && "Can't return an immediate!");
    return Idx - 1;
  }

  /// Return true if the intrinsic takes an splat operand.
  bool hasSplat() const { return Proto.find('a') != std::string::npos; }
  /// Return the parameter index of the splat operand.
  unsigned getSplatIdx() const {
    assert(hasSplat());
    unsigned Idx = Proto.find('a');
    assert(Idx > 0 && "Can't return a splat!");
    return Idx - 1;
  }

  unsigned getNumParams() const { return Proto.size() - 1; }
  Type getReturnType() const { return Types[0]; }
  Type getParamType(unsigned I) const { return Types[I + 1]; }
  Type getBaseType() const { return BaseType; }
  /// Return the raw prototype string.
  std::string getProto() const { return Proto; }

  /// Return true if the prototype has a scalar argument.
  /// This does not return true for the "splat" code ('a').
  bool protoHasScalar();

  /// Return the index that parameter PIndex will sit at
  /// in a generated function call. This is often just PIndex,
  /// but may not be as things such as multiple-vector operands
  /// and sret parameters need to be taken into accont.
  unsigned getGeneratedParamIdx(unsigned PIndex) {
    unsigned Idx = 0;
    if (getReturnType().getNumVectors() > 1)
      // Multiple vectors are passed as sret.
      ++Idx;

    for (unsigned I = 0; I < PIndex; ++I)
      Idx += std::max(1U, getParamType(I).getNumVectors());

    return Idx;
  }

  bool hasBody() const { return Body && Body->getValues().size() > 0; }

  void setNeededEarly() { NeededEarly = true; }

  bool operator<(const Intrinsic &Other) const {
    // Sort lexicographically on a two-tuple (Guard, Name)
    if (Guard != Other.Guard)
      return Guard < Other.Guard;
    return Name < Other.Name;
  }

  ClassKind getClassKind(bool UseClassBIfScalar = false) {
    if (UseClassBIfScalar && !protoHasScalar())
      return ClassB;
    return CK;
  }

  /// Return the name, mangled with type information.
  /// If ForceClassS is true, use ClassS (u32/s32) instead
  /// of the intrinsic's own type class.
  std::string getMangledName(bool ForceClassS = false);
  /// Return the type code for a builtin function call.
  std::string getInstTypeCode(Type T, ClassKind CK);
  /// Return the type string for a BUILTIN() macro in Builtins.def.
  std::string getBuiltinTypeStr();

  /// Generate the intrinsic, returning code.
  std::string generate();
  /// Perform type checking and populate the dependency graph, but
  /// don't generate code yet.
  void indexBody();

private:
  std::string mangleName(std::string Name, ClassKind CK);

  void initVariables();
  std::string replaceParamsIn(std::string S);

  void emitBodyAsBuiltinCall();

  void generateImpl(bool ReverseArguments,
                    StringRef NamePrefix, StringRef CallPrefix);
  void emitReturn();
  void emitBody(StringRef CallPrefix);
  void emitShadowedArgs();
  void emitArgumentReversal();
  void emitReturnReversal();
  void emitReverseVariable(Variable &Dest, Variable &Src);
  void emitNewLine();
  void emitClosingBrace();
  void emitOpeningBrace();
  void emitPrototype(StringRef NamePrefix);

  class DagEmitter {
    Intrinsic &Intr;
    StringRef CallPrefix;

  public:
    DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
      Intr(Intr), CallPrefix(CallPrefix) {
    }
    std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
    std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
    std::pair<Type, std::string> emitDagSplat(DagInit *DI);
    std::pair<Type, std::string> emitDagDup(DagInit *DI);
    std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
    std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
    std::pair<Type, std::string> emitDagCall(DagInit *DI);
    std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
    std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
    std::pair<Type, std::string> emitDagOp(DagInit *DI);
    std::pair<Type, std::string> emitDag(DagInit *DI);
  };

};

//===----------------------------------------------------------------------===//
// NeonEmitter
//===----------------------------------------------------------------------===//

class NeonEmitter {
  RecordKeeper &Records;
  DenseMap<Record *, ClassKind> ClassMap;
  std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap;
  unsigned UniqueNumber;

  void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
  void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
  void genOverloadTypeCheckCode(raw_ostream &OS,
                                SmallVectorImpl<Intrinsic *> &Defs);
  void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                  SmallVectorImpl<Intrinsic *> &Defs);

public:
  /// Called by Intrinsic - this attempts to get an intrinsic that takes
  /// the given types as arguments.
  Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types);

  /// Called by Intrinsic - returns a globally-unique number.
  unsigned getUniqueNumber() { return UniqueNumber++; }

  NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
    Record *SI = R.getClass("SInst");
    Record *II = R.getClass("IInst");
    Record *WI = R.getClass("WInst");
    Record *SOpI = R.getClass("SOpInst");
    Record *IOpI = R.getClass("IOpInst");
    Record *WOpI = R.getClass("WOpInst");
    Record *LOpI = R.getClass("LOpInst");
    Record *NoTestOpI = R.getClass("NoTestOpInst");

    ClassMap[SI] = ClassS;
    ClassMap[II] = ClassI;
    ClassMap[WI] = ClassW;
    ClassMap[SOpI] = ClassS;
    ClassMap[IOpI] = ClassI;
    ClassMap[WOpI] = ClassW;
    ClassMap[LOpI] = ClassL;
    ClassMap[NoTestOpI] = ClassNoTest;
  }

  // run - Emit arm_neon.h.inc
  void run(raw_ostream &o);

  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
  void runHeader(raw_ostream &o);

  // runTests - Emit tests for all the Neon intrinsics.
  void runTests(raw_ostream &o);
};

} // end anonymous namespace

//===----------------------------------------------------------------------===//
// Type implementation
//===----------------------------------------------------------------------===//

std::string Type::str() const {
  if (Void)
    return "void";
  std::string S;

  if (!Signed && isInteger())
    S += "u";

  if (Poly)
    S += "poly";
  else if (Float)
    S += "float";
  else
    S += "int";

  S += utostr(ElementBitwidth);
  if (isVector())
    S += "x" + utostr(getNumElements());
  if (NumVectors > 1)
    S += "x" + utostr(NumVectors);
  S += "_t";

  if (Constant)
    S += " const";
  if (Pointer)
    S += " *";

  return S;
}

std::string Type::builtin_str() const {
  std::string S;
  if (isVoid())
    return "v";

  if (Pointer)
    // All pointers are void pointers.
    S += "v";
  else if (isInteger())
    switch (ElementBitwidth) {
    case 8: S += "c"; break;
    case 16: S += "s"; break;
    case 32: S += "i"; break;
    case 64: S += "Wi"; break;
    case 128: S += "LLLi"; break;
    default: llvm_unreachable("Unhandled case!");
    }
  else
    switch (ElementBitwidth) {
    case 16: S += "h"; break;
    case 32: S += "f"; break;
    case 64: S += "d"; break;
    default: llvm_unreachable("Unhandled case!");
    }

  if (isChar() && !Pointer)
    // Make chars explicitly signed.
    S = "S" + S;
  else if (isInteger() && !Pointer && !Signed)
    S = "U" + S;

  if (isScalar()) {
    if (Constant) S += "C";
    if (Pointer) S += "*";
    return S;
  }

  std::string Ret;
  for (unsigned I = 0; I < NumVectors; ++I)
    Ret += "V" + utostr(getNumElements()) + S;

  return Ret;
}

unsigned Type::getNeonEnum() const {
  unsigned Addend;
  switch (ElementBitwidth) {
  case 8: Addend = 0; break;
  case 16: Addend = 1; break;
  case 32: Addend = 2; break;
  case 64: Addend = 3; break;
  case 128: Addend = 4; break;
  default: llvm_unreachable("Unhandled element bitwidth!");
  }

  unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
  if (Poly) {
    // Adjustment needed because Poly32 doesn't exist.
    if (Addend >= 2)
      --Addend;
    Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
  }
  if (Float) {
    assert(Addend != 0 && "Float8 doesn't exist!");
    Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
  }

  if (Bitwidth == 128)
    Base |= (unsigned)NeonTypeFlags::QuadFlag;
  if (isInteger() && !Signed)
    Base |= (unsigned)NeonTypeFlags::UnsignedFlag;

  return Base;
}

Type Type::fromTypedefName(StringRef Name) {
  Type T;
  T.Void = false;
  T.Float = false;
  T.Poly = false;

  if (Name.front() == 'u') {
    T.Signed = false;
    Name = Name.drop_front();
  } else {
    T.Signed = true;
  }

  if (Name.startswith("float")) {
    T.Float = true;
    Name = Name.drop_front(5);
  } else if (Name.startswith("poly")) {
    T.Poly = true;
    Name = Name.drop_front(4);
  } else {
    assert(Name.startswith("int"));
    Name = Name.drop_front(3);
  }

  unsigned I = 0;
  for (I = 0; I < Name.size(); ++I) {
    if (!isdigit(Name[I]))
      break;
  }
  Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
  Name = Name.drop_front(I);

  T.Bitwidth = T.ElementBitwidth;
  T.NumVectors = 1;

  if (Name.front() == 'x') {
    Name = Name.drop_front();
    unsigned I = 0;
    for (I = 0; I < Name.size(); ++I) {
      if (!isdigit(Name[I]))
        break;
    }
    unsigned NumLanes;
    Name.substr(0, I).getAsInteger(10, NumLanes);
    Name = Name.drop_front(I);
    T.Bitwidth = T.ElementBitwidth * NumLanes;
  } else {
    // Was scalar.
    T.NumVectors = 0;
  }
  if (Name.front() == 'x') {
    Name = Name.drop_front();
    unsigned I = 0;
    for (I = 0; I < Name.size(); ++I) {
      if (!isdigit(Name[I]))
        break;
    }
    Name.substr(0, I).getAsInteger(10, T.NumVectors);
    Name = Name.drop_front(I);
  }

  assert(Name.startswith("_t") && "Malformed typedef!");
  return T;
}

void Type::applyTypespec(bool &Quad) {
  std::string S = TS;
  ScalarForMangling = false;
  Void = false;
  Poly = Float = false;
  ElementBitwidth = ~0U;
  Signed = true;
  NumVectors = 1;

  for (char I : S) {
    switch (I) {
    case 'S':
      ScalarForMangling = true;
      break;
    case 'H':
      NoManglingQ = true;
      Quad = true;
      break;
    case 'Q':
      Quad = true;
      break;
    case 'P':
      Poly = true;
      break;
    case 'U':
      Signed = false;
      break;
    case 'c':
      ElementBitwidth = 8;
      break;
    case 'h':
      Float = true;
    // Fall through
    case 's':
      ElementBitwidth = 16;
      break;
    case 'f':
      Float = true;
    // Fall through
    case 'i':
      ElementBitwidth = 32;
      break;
    case 'd':
      Float = true;
    // Fall through
    case 'l':
      ElementBitwidth = 64;
      break;
    case 'k':
      ElementBitwidth = 128;
      // Poly doesn't have a 128x1 type.
      if (Poly)
        NumVectors = 0;
      break;
    default:
      llvm_unreachable("Unhandled type code!");
    }
  }
  assert(ElementBitwidth != ~0U && "Bad element bitwidth!");

  Bitwidth = Quad ? 128 : 64;
}

void Type::applyModifier(char Mod) {
  bool AppliedQuad = false;
  applyTypespec(AppliedQuad);

  switch (Mod) {
  case 'v':
    Void = true;
    break;
  case 't':
    if (Poly) {
      Poly = false;
      Signed = false;
    }
    break;
  case 'b':
    Signed = false;
    Float = false;
    Poly = false;
    NumVectors = 0;
    Bitwidth = ElementBitwidth;
    break;
  case '$':
    Signed = true;
    Float = false;
    Poly = false;
    NumVectors = 0;
    Bitwidth = ElementBitwidth;
    break;
  case 'u':
    Signed = false;
    Poly = false;
    Float = false;
    break;
  case 'x':
    Signed = true;
    assert(!Poly && "'u' can't be used with poly types!");
    Float = false;
    break;
  case 'o':
    Bitwidth = ElementBitwidth = 64;
    NumVectors = 0;
    Float = true;
    break;
  case 'y':
    Bitwidth = ElementBitwidth = 32;
    NumVectors = 0;
    Float = true;
    break;
  case 'f':
    // Special case - if we're half-precision, a floating
    // point argument needs to be 128-bits (double size).
    if (isHalf())
      Bitwidth = 128;
    Float = true;
    ElementBitwidth = 32;
    break;
  case 'F':
    Float = true;
    ElementBitwidth = 64;
    break;
  case 'g':
    if (AppliedQuad)
      Bitwidth /= 2;
    break;
  case 'j':
    if (!AppliedQuad)
      Bitwidth *= 2;
    break;
  case 'w':
    ElementBitwidth *= 2;
    Bitwidth *= 2;
    break;
  case 'n':
    ElementBitwidth *= 2;
    break;
  case 'i':
    Float = false;
    Poly = false;
    ElementBitwidth = Bitwidth = 32;
    NumVectors = 0;
    Signed = true;
    break;
  case 'l':
    Float = false;
    Poly = false;
    ElementBitwidth = Bitwidth = 64;
    NumVectors = 0;
    Signed = false;
    break;
  case 'z':
    ElementBitwidth /= 2;
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
    break;
  case 'r':
    ElementBitwidth *= 2;
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
    break;
  case 's':
  case 'a':
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
    break;
  case 'k':
    Bitwidth *= 2;
    break;
  case 'c':
    Constant = true;
  // Fall through
  case 'p':
    Pointer = true;
    Bitwidth = ElementBitwidth;
    NumVectors = 0;
    break;
  case 'h':
    ElementBitwidth /= 2;
    break;
  case 'q':
    ElementBitwidth /= 2;
    Bitwidth *= 2;
    break;
  case 'e':
    ElementBitwidth /= 2;
    Signed = false;
    break;
  case 'm':
    ElementBitwidth /= 2;
    Bitwidth /= 2;
    break;
  case 'd':
    break;
  case '2':
    NumVectors = 2;
    break;
  case '3':
    NumVectors = 3;
    break;
  case '4':
    NumVectors = 4;
    break;
  case 'B':
    NumVectors = 2;
    if (!AppliedQuad)
      Bitwidth *= 2;
    break;
  case 'C':
    NumVectors = 3;
    if (!AppliedQuad)
      Bitwidth *= 2;
    break;
  case 'D':
    NumVectors = 4;
    if (!AppliedQuad)
      Bitwidth *= 2;
    break;
  default:
    llvm_unreachable("Unhandled character!");
  }
}

//===----------------------------------------------------------------------===//
// Intrinsic implementation
//===----------------------------------------------------------------------===//

std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
  char typeCode = '\0';
  bool printNumber = true;

  if (CK == ClassB)
    return "";

  if (T.isPoly())
    typeCode = 'p';
  else if (T.isInteger())
    typeCode = T.isSigned() ? 's' : 'u';
  else
    typeCode = 'f';

  if (CK == ClassI) {
    switch (typeCode) {
    default:
      break;
    case 's':
    case 'u':
    case 'p':
      typeCode = 'i';
      break;
    }
  }
  if (CK == ClassB) {
    typeCode = '\0';
  }

  std::string S;
  if (typeCode != '\0')
    S.push_back(typeCode);
  if (printNumber)
    S += utostr(T.getElementSizeInBits());

  return S;
}

std::string Intrinsic::getBuiltinTypeStr() {
  ClassKind LocalCK = getClassKind(true);
  std::string S;

  Type RetT = getReturnType();
  if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
      !RetT.isFloating())
    RetT.makeInteger(RetT.getElementSizeInBits(), false);

  // Since the return value must be one type, return a vector type of the
  // appropriate width which we will bitcast.  An exception is made for
  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
  // fashion, storing them to a pointer arg.
  if (RetT.getNumVectors() > 1) {
    S += "vv*"; // void result with void* first argument
  } else {
    if (RetT.isPoly())
      RetT.makeInteger(RetT.getElementSizeInBits(), false);
    if (!RetT.isScalar() && !RetT.isSigned())
      RetT.makeSigned();

    bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f';
    if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
      // Cast to vector of 8-bit elements.
      RetT.makeInteger(8, true);

    S += RetT.builtin_str();
  }

  for (unsigned I = 0; I < getNumParams(); ++I) {
    Type T = getParamType(I);
    if (T.isPoly())
      T.makeInteger(T.getElementSizeInBits(), false);

    bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f';
    if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
      T.makeInteger(8, true);
    // Halves always get converted to 8-bit elements.
    if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
      T.makeInteger(8, true);

    if (LocalCK == ClassI)
      T.makeSigned();

    // Constant indices are always just "int".
    if (hasImmediate() && getImmediateIdx() == I)
      T.makeInteger(32, true);

    S += T.builtin_str();
  }

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (LocalCK == ClassB)
    S += "i";

  return S;
}

std::string Intrinsic::getMangledName(bool ForceClassS) {
  // Check if the prototype has a scalar operand with the type of the vector
  // elements.  If not, bitcasting the args will take care of arg checking.
  // The actual signedness etc. will be taken care of with special enums.
  ClassKind LocalCK = CK;
  if (!protoHasScalar())
    LocalCK = ClassB;

  return mangleName(Name, ForceClassS ? ClassS : LocalCK);
}

std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) {
  std::string typeCode = getInstTypeCode(BaseType, LocalCK);
  std::string S = Name;

  if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" ||
      Name == "vcvt_f64_f32")
    return Name;

  if (typeCode.size() > 0) {
    // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
    if (Name.size() >= 3 && isdigit(Name.back()) &&
        Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
      S.insert(S.length() - 3, "_" + typeCode);
    else
      S += "_" + typeCode;
  }

  if (BaseType != InBaseType) {
    // A reinterpret - out the input base type at the end.
    S += "_" + getInstTypeCode(InBaseType, LocalCK);
  }

  if (LocalCK == ClassB)
    S += "_v";

  // Insert a 'q' before the first '_' character so that it ends up before
  // _lane or _n on vector-scalar operations.
  if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
    size_t Pos = S.find('_');
    S.insert(Pos, "q");
  }

  char Suffix = '\0';
  if (BaseType.isScalarForMangling()) {
    switch (BaseType.getElementSizeInBits()) {
    case 8: Suffix = 'b'; break;
    case 16: Suffix = 'h'; break;
    case 32: Suffix = 's'; break;
    case 64: Suffix = 'd'; break;
    default: llvm_unreachable("Bad suffix!");
    }
  }
  if (Suffix != '\0') {
    size_t Pos = S.find('_');
    S.insert(Pos, &Suffix, 1);
  }

  return S;
}

std::string Intrinsic::replaceParamsIn(std::string S) {
  while (S.find('$') != std::string::npos) {
    size_t Pos = S.find('$');
    size_t End = Pos + 1;
    while (isalpha(S[End]))
      ++End;

    std::string VarName = S.substr(Pos + 1, End - Pos - 1);
    assert_with_loc(Variables.find(VarName) != Variables.end(),
                    "Variable not defined!");
    S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
  }

  return S;
}

void Intrinsic::initVariables() {
  Variables.clear();

  // Modify the TypeSpec per-argument to get a concrete Type, and create
  // known variables for each.
  for (unsigned I = 1; I < Proto.size(); ++I) {
    char NameC = '0' + (I - 1);
    std::string Name = "p";
    Name.push_back(NameC);

    Variables[Name] = Variable(Types[I], Name + VariablePostfix);
  }
  RetVar = Variable(Types[0], "ret" + VariablePostfix);
}

void Intrinsic::emitPrototype(StringRef NamePrefix) {
  if (UseMacro)
    OS << "#define ";
  else
    OS << "__ai " << Types[0].str() << " ";

  OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";

  for (unsigned I = 0; I < getNumParams(); ++I) {
    if (I != 0)
      OS << ", ";

    char NameC = '0' + I;
    std::string Name = "p";
    Name.push_back(NameC);
    assert(Variables.find(Name) != Variables.end());
    Variable &V = Variables[Name];

    if (!UseMacro)
      OS << V.getType().str() << " ";
    OS << V.getName();
  }

  OS << ")";
}

void Intrinsic::emitOpeningBrace() {
  if (UseMacro)
    OS << " __extension__ ({";
  else
    OS << " {";
  emitNewLine();
}

void Intrinsic::emitClosingBrace() {
  if (UseMacro)
    OS << "})";
  else
    OS << "}";
}

void Intrinsic::emitNewLine() {
  if (UseMacro)
    OS << " \\\n";
  else
    OS << "\n";
}

void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
  if (Dest.getType().getNumVectors() > 1) {
    emitNewLine();

    for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
      OS << "  " << Dest.getName() << ".val[" << utostr(K) << "] = "
         << "__builtin_shufflevector("
         << Src.getName() << ".val[" << utostr(K) << "], "
         << Src.getName() << ".val[" << utostr(K) << "]";
      for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
        OS << ", " << utostr(J);
      OS << ");";
      emitNewLine();
    }
  } else {
    OS << "  " << Dest.getName()
       << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
    for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
      OS << ", " << utostr(J);
    OS << ");";
    emitNewLine();
  }
}

void Intrinsic::emitArgumentReversal() {
  if (BigEndianSafe)
    return;

  // Reverse all vector arguments.
  for (unsigned I = 0; I < getNumParams(); ++I) {
    std::string Name = "p" + utostr(I);
    std::string NewName = "rev" + utostr(I);

    Variable &V = Variables[Name];
    Variable NewV(V.getType(), NewName + VariablePostfix);

    if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
      continue;

    OS << "  " << NewV.getType().str() << " " << NewV.getName() << ";";
    emitReverseVariable(NewV, V);
    V = NewV;
  }
}

void Intrinsic::emitReturnReversal() {
  if (BigEndianSafe)
    return;
  if (!getReturnType().isVector() || getReturnType().isVoid() ||
      getReturnType().getNumElements() == 1)
    return;
  emitReverseVariable(RetVar, RetVar);
}


void Intrinsic::emitShadowedArgs() {
  // Macro arguments are not type-checked like inline function arguments,
  // so assign them to local temporaries to get the right type checking.
  if (!UseMacro)
    return;

  for (unsigned I = 0; I < getNumParams(); ++I) {
    // Do not create a temporary for an immediate argument.
    // That would defeat the whole point of using a macro!
    if (hasImmediate() && Proto[I+1] == 'i')
      continue;
    // Do not create a temporary for pointer arguments. The input
    // pointer may have an alignment hint.
    if (getParamType(I).isPointer())
      continue;

    std::string Name = "p" + utostr(I);

    assert(Variables.find(Name) != Variables.end());
    Variable &V = Variables[Name];

    std::string NewName = "s" + utostr(I);
    Variable V2(V.getType(), NewName + VariablePostfix);

    OS << "  " << V2.getType().str() << " " << V2.getName() << " = "
       << V.getName() << ";";
    emitNewLine();

    V = V2;
  }
}

// We don't check 'a' in this function, because for builtin function the
// argument matching to 'a' uses a vector type splatted from a scalar type.
bool Intrinsic::protoHasScalar() {
  return (Proto.find('s') != std::string::npos ||
          Proto.find('z') != std::string::npos ||
          Proto.find('r') != std::string::npos ||
          Proto.find('b') != std::string::npos ||
          Proto.find('$') != std::string::npos ||
          Proto.find('y') != std::string::npos ||
          Proto.find('o') != std::string::npos);
}

void Intrinsic::emitBodyAsBuiltinCall() {
  std::string S;

  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
  // sret-like argument.
  bool SRet = getReturnType().getNumVectors() >= 2;

  StringRef N = Name;
  if (hasSplat()) {
    // Call the non-splat builtin: chop off the "_n" suffix from the name.
    assert(N.endswith("_n"));
    N = N.drop_back(2);
  }

  ClassKind LocalCK = CK;
  if (!protoHasScalar())
    LocalCK = ClassB;

  if (!getReturnType().isVoid() && !SRet)
    S += "(" + RetVar.getType().str() + ") ";

  S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";

  if (SRet)
    S += "&" + RetVar.getName() + ", ";

  for (unsigned I = 0; I < getNumParams(); ++I) {
    Variable &V = Variables["p" + utostr(I)];
    Type T = V.getType();

    // Handle multiple-vector values specially, emitting each subvector as an
    // argument to the builtin.
    if (T.getNumVectors() > 1) {
      // Check if an explicit cast is needed.
      std::string Cast;
      if (T.isChar() || T.isPoly() || !T.isSigned()) {
        Type T2 = T;
        T2.makeOneVector();
        T2.makeInteger(8, /*Signed=*/true);
        Cast = "(" + T2.str() + ")";
      }

      for (unsigned J = 0; J < T.getNumVectors(); ++J)
        S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
      continue;
    }

    std::string Arg;
    Type CastToType = T;
    if (hasSplat() && I == getSplatIdx()) {
      Arg = "(" + BaseType.str() + ") {";
      for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
        if (J != 0)
          Arg += ", ";
        Arg += V.getName();
      }
      Arg += "}";

      CastToType = BaseType;
    } else {
      Arg = V.getName();
    }

    // Check if an explicit cast is needed.
    if (CastToType.isVector()) {
      CastToType.makeInteger(8, true);
      Arg = "(" + CastToType.str() + ")" + Arg;
    }

    S += Arg + ", ";
  }

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (getClassKind(true) == ClassB) {
    Type ThisTy = getReturnType();
    if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F')
      ThisTy = getParamType(0);
    if (ThisTy.isPointer())
      ThisTy = getParamType(1);

    S += utostr(ThisTy.getNeonEnum());
  } else {
    // Remove extraneous ", ".
    S.pop_back();
    S.pop_back();
  }
  S += ");";

  std::string RetExpr;
  if (!SRet && !RetVar.getType().isVoid())
    RetExpr = RetVar.getName() + " = ";

  OS << "  " << RetExpr << S;
  emitNewLine();
}

void Intrinsic::emitBody(StringRef CallPrefix) {
  std::vector<std::string> Lines;

  assert(RetVar.getType() == Types[0]);
  // Create a return variable, if we're not void.
  if (!RetVar.getType().isVoid()) {
    OS << "  " << RetVar.getType().str() << " " << RetVar.getName() << ";";
    emitNewLine();
  }

  if (!Body || Body->getValues().size() == 0) {
    // Nothing specific to output - must output a builtin.
    emitBodyAsBuiltinCall();
    return;
  }

  // We have a list of "things to output". The last should be returned.
  for (auto *I : Body->getValues()) {
    if (StringInit *SI = dyn_cast<StringInit>(I)) {
      Lines.push_back(replaceParamsIn(SI->getAsString()));
    } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
      DagEmitter DE(*this, CallPrefix);
      Lines.push_back(DE.emitDag(DI).second + ";");
    }
  }

  assert(Lines.size() && "Empty def?");
  if (!RetVar.getType().isVoid())
    Lines.back().insert(0, RetVar.getName() + " = ");

  for (auto &L : Lines) {
    OS << "  " << L;
    emitNewLine();
  }
}

void Intrinsic::emitReturn() {
  if (RetVar.getType().isVoid())
    return;
  if (UseMacro)
    OS << "  " << RetVar.getName() << ";";
  else
    OS << "  return " << RetVar.getName() << ";";
  emitNewLine();
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
  // At this point we should only be seeing a def.
  DefInit *DefI = cast<DefInit>(DI->getOperator());
  std::string Op = DefI->getAsString();

  if (Op == "cast" || Op == "bitcast")
    return emitDagCast(DI, Op == "bitcast");
  if (Op == "shuffle")
    return emitDagShuffle(DI);
  if (Op == "dup")
    return emitDagDup(DI);
  if (Op == "splat")
    return emitDagSplat(DI);
  if (Op == "save_temp")
    return emitDagSaveTemp(DI);
  if (Op == "op")
    return emitDagOp(DI);
  if (Op == "call")
    return emitDagCall(DI);
  if (Op == "name_replace")
    return emitDagNameReplace(DI);
  if (Op == "literal")
    return emitDagLiteral(DI);
  assert_with_loc(false, "Unknown operation!");
  return std::make_pair(Type::getVoid(), "");
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
  std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  if (DI->getNumArgs() == 2) {
    // Unary op.
    std::pair<Type, std::string> R =
        emitDagArg(DI->getArg(1), DI->getArgName(1));
    return std::make_pair(R.first, Op + R.second);
  } else {
    assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
    std::pair<Type, std::string> R1 =
        emitDagArg(DI->getArg(1), DI->getArgName(1));
    std::pair<Type, std::string> R2 =
        emitDagArg(DI->getArg(2), DI->getArgName(2));
    assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
    return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
  }
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
  std::vector<Type> Types;
  std::vector<std::string> Values;
  for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
    std::pair<Type, std::string> R =
        emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1));
    Types.push_back(R.first);
    Values.push_back(R.second);
  }

  // Look up the called intrinsic.
  std::string N;
  if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
    N = SI->getAsUnquotedString();
  else
    N = emitDagArg(DI->getArg(0), "").second;
  Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types);
  assert(Callee && "getIntrinsic should not return us nullptr!");

  // Make sure the callee is known as an early def.
  Callee->setNeededEarly();
  Intr.Dependencies.insert(Callee);

  // Now create the call itself.
  std::string S = CallPrefix.str() + Callee->getMangledName(true) + "(";
  for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
    if (I != 0)
      S += ", ";
    S += Values[I];
  }
  S += ")";

  return std::make_pair(Callee->getReturnType(), S);
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
                                                                bool IsBitCast){
  // (cast MOD* VAL) -> cast VAL to type given by MOD.
  std::pair<Type, std::string> R = emitDagArg(
      DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1));
  Type castToType = R.first;
  for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {

    // MOD can take several forms:
    //   1. $X - take the type of parameter / variable X.
    //   2. The value "R" - take the type of the return type.
    //   3. a type string
    //   4. The value "U" or "S" to switch the signedness.
    //   5. The value "H" or "D" to half or double the bitwidth.
    //   6. The value "8" to convert to 8-bit (signed) integer lanes.
    if (DI->getArgName(ArgIdx).size()) {
      assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) !=
                      Intr.Variables.end(),
                      "Variable not found");
      castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType();
    } else {
      StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
      assert_with_loc(SI, "Expected string type or $Name for cast type");

      if (SI->getAsUnquotedString() == "R") {
        castToType = Intr.getReturnType();
      } else if (SI->getAsUnquotedString() == "U") {
        castToType.makeUnsigned();
      } else if (SI->getAsUnquotedString() == "S") {
        castToType.makeSigned();
      } else if (SI->getAsUnquotedString() == "H") {
        castToType.halveLanes();
      } else if (SI->getAsUnquotedString() == "D") {
        castToType.doubleLanes();
      } else if (SI->getAsUnquotedString() == "8") {
        castToType.makeInteger(8, true);
      } else {
        castToType = Type::fromTypedefName(SI->getAsUnquotedString());
        assert_with_loc(!castToType.isVoid(), "Unknown typedef");
      }
    }
  }

  std::string S;
  if (IsBitCast) {
    // Emit a reinterpret cast. The second operand must be an lvalue, so create
    // a temporary.
    std::string N = "reint";
    unsigned I = 0;
    while (Intr.Variables.find(N) != Intr.Variables.end())
      N = "reint" + utostr(++I);
    Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);

    Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
            << R.second << ";";
    Intr.emitNewLine();

    S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
  } else {
    // Emit a normal (static) cast.
    S = "(" + castToType.str() + ")(" + R.second + ")";
  }

  return std::make_pair(castToType, S);
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
  // See the documentation in arm_neon.td for a description of these operators.
  class LowHalf : public SetTheory::Operator {
  public:
    virtual void anchor() {}
    virtual ~LowHalf() {}
    virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
                       ArrayRef<SMLoc> Loc) {
      SetTheory::RecSet Elts2;
      ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
      Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
    }
  };
  class HighHalf : public SetTheory::Operator {
  public:
    virtual void anchor() {}
    virtual ~HighHalf() {}
    virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
                       ArrayRef<SMLoc> Loc) {
      SetTheory::RecSet Elts2;
      ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
      Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
    }
  };
  class Rev : public SetTheory::Operator {
    unsigned ElementSize;

  public:
    Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
    virtual void anchor() {}
    virtual ~Rev() {}
    virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
                       ArrayRef<SMLoc> Loc) {
      SetTheory::RecSet Elts2;
      ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);

      int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
      VectorSize /= ElementSize;

      std::vector<Record *> Revved;
      for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
        for (int LI = VectorSize - 1; LI >= 0; --LI) {
          Revved.push_back(Elts2[VI + LI]);
        }
      }

      Elts.insert(Revved.begin(), Revved.end());
    }
  };
  class MaskExpander : public SetTheory::Expander {
    unsigned N;

  public:
    MaskExpander(unsigned N) : N(N) {}
    virtual void anchor() {}
    virtual ~MaskExpander() {}
    virtual void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) {
      unsigned Addend = 0;
      if (R->getName() == "mask0")
        Addend = 0;
      else if (R->getName() == "mask1")
        Addend = N;
      else
        return;
      for (unsigned I = 0; I < N; ++I)
        Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
    }
  };

  // (shuffle arg1, arg2, sequence)
  std::pair<Type, std::string> Arg1 =
      emitDagArg(DI->getArg(0), DI->getArgName(0));
  std::pair<Type, std::string> Arg2 =
      emitDagArg(DI->getArg(1), DI->getArgName(1));
  assert_with_loc(Arg1.first == Arg2.first,
                  "Different types in arguments to shuffle!");

  SetTheory ST;
  LowHalf LH;
  HighHalf HH;
  MaskExpander ME(Arg1.first.getNumElements());
  Rev R(Arg1.first.getElementSizeInBits());
  SetTheory::RecSet Elts;
  ST.addOperator("lowhalf", &LH);
  ST.addOperator("highhalf", &HH);
  ST.addOperator("rev", &R);
  ST.addExpander("MaskExpand", &ME);
  ST.evaluate(DI->getArg(2), Elts, ArrayRef<SMLoc>());

  std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
  for (auto &E : Elts) {
    StringRef Name = E->getName();
    assert_with_loc(Name.startswith("sv"),
                    "Incorrect element kind in shuffle mask!");
    S += ", " + Name.drop_front(2).str();
  }
  S += ")";

  // Recalculate the return type - the shuffle may have halved or doubled it.
  Type T(Arg1.first);
  if (Elts.size() > T.getNumElements()) {
    assert_with_loc(
        Elts.size() == T.getNumElements() * 2,
        "Can only double or half the number of elements in a shuffle!");
    T.doubleLanes();
  } else if (Elts.size() < T.getNumElements()) {
    assert_with_loc(
        Elts.size() == T.getNumElements() / 2,
        "Can only double or half the number of elements in a shuffle!");
    T.halveLanes();
  }

  return std::make_pair(T, S);
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
  assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
  assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");

  Type T = Intr.getBaseType();
  assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
  std::string S = "(" + T.str() + ") {";
  for (unsigned I = 0; I < T.getNumElements(); ++I) {
    if (I != 0)
      S += ", ";
    S += A.second;
  }
  S += "}";

  return std::make_pair(T, S);
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
  assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
  std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1));

  assert_with_loc(B.first.isScalar(),
                  "splat() requires a scalar int as the second argument");

  std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
  for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
    S += ", " + B.second;
  }
  S += ")";

  return std::make_pair(Intr.getBaseType(), S);
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
  assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
  std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1));

  assert_with_loc(!A.first.isVoid(),
                  "Argument to save_temp() must have non-void type!");

  std::string N = DI->getArgName(0);
  assert_with_loc(N.size(), "save_temp() expects a name as the first argument");

  assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
                  "Variable already defined!");
  Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);

  std::string S =
      A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;

  return std::make_pair(Type::getVoid(), S);
}

std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
  std::string S = Intr.Name;

  assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
  std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();

  size_t Idx = S.find(ToReplace);

  assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
  S.replace(Idx, ToReplace.size(), ReplaceWith);

  return std::make_pair(Type::getVoid(), S);
}

std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
  std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  return std::make_pair(Type::fromTypedefName(Ty), Value);
}

std::pair<Type, std::string>
Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
  if (ArgName.size()) {
    assert_with_loc(!Arg->isComplete(),
                    "Arguments must either be DAGs or names, not both!");
    assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
                    "Variable not defined!");
    Variable &V = Intr.Variables[ArgName];
    return std::make_pair(V.getType(), V.getName());
  }

  assert(Arg && "Neither ArgName nor Arg?!");
  DagInit *DI = dyn_cast<DagInit>(Arg);
  assert_with_loc(DI, "Arguments must either be DAGs or names!");

  return emitDag(DI);
}

std::string Intrinsic::generate() {
  // Little endian intrinsics are simple and don't require any argument
  // swapping.
  OS << "#ifdef __LITTLE_ENDIAN__\n";

  generateImpl(false, "", "");

  OS << "#else\n";

  // Big endian intrinsics are more complex. The user intended these
  // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
  // but we load as-if (V)LD1. So we should swap all arguments and
  // swap the return value too.
  //
  // If we call sub-intrinsics, we should call a version that does
  // not re-swap the arguments!
  generateImpl(true, "", "__noswap_");

  // If we're needed early, create a non-swapping variant for
  // big-endian.
  if (NeededEarly) {
    generateImpl(false, "__noswap_", "__noswap_");
  }
  OS << "#endif\n\n";

  return OS.str();
}

void Intrinsic::generateImpl(bool ReverseArguments,
                             StringRef NamePrefix, StringRef CallPrefix) {
  CurrentRecord = R;

  // If we call a macro, our local variables may be corrupted due to
  // lack of proper lexical scoping. So, add a globally unique postfix
  // to every variable.
  //
  // indexBody() should have set up the Dependencies set by now.
  for (auto *I : Dependencies)
    if (I->UseMacro) {
      VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
      break;
    }

  initVariables();

  emitPrototype(NamePrefix);

  if (IsUnavailable) {
    OS << " __attribute__((unavailable));";
  } else {
    emitOpeningBrace();
    emitShadowedArgs();
    if (ReverseArguments)
      emitArgumentReversal();
    emitBody(CallPrefix);
    if (ReverseArguments)
      emitReturnReversal();
    emitReturn();
    emitClosingBrace();
  }
  OS << "\n";

  CurrentRecord = nullptr;
}

void Intrinsic::indexBody() {
  CurrentRecord = R;

  initVariables();
  emitBody("");
  OS.str("");

  CurrentRecord = nullptr;
}

//===----------------------------------------------------------------------===//
// NeonEmitter implementation
//===----------------------------------------------------------------------===//

Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
  // First, look up the name in the intrinsic map.
  assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
                  ("Intrinsic '" + Name + "' not found!").str());
  std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()];
  std::vector<Intrinsic *> GoodVec;

  // Create a string to print if we end up failing.
  std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
  for (unsigned I = 0; I < Types.size(); ++I) {
    if (I != 0)
      ErrMsg += ", ";
    ErrMsg += Types[I].str();
  }
  ErrMsg += ")'\n";
  ErrMsg += "Available overloads:\n";

  // Now, look through each intrinsic implementation and see if the types are
  // compatible.
  for (auto *I : V) {
    ErrMsg += "  - " + I->getReturnType().str() + " " + I->getMangledName();
    ErrMsg += "(";
    for (unsigned A = 0; A < I->getNumParams(); ++A) {
      if (A != 0)
        ErrMsg += ", ";
      ErrMsg += I->getParamType(A).str();
    }
    ErrMsg += ")\n";

    if (I->getNumParams() != Types.size())
      continue;

    bool Good = true;
    for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
      if (I->getParamType(Arg) != Types[Arg]) {
        Good = false;
        break;
      }
    }
    if (Good)
      GoodVec.push_back(I);
  }

  assert_with_loc(GoodVec.size() > 0,
                  "No compatible intrinsic found - " + ErrMsg);
  assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);

  return GoodVec.front();
}

void NeonEmitter::createIntrinsic(Record *R,
                                  SmallVectorImpl<Intrinsic *> &Out) {
  std::string Name = R->getValueAsString("Name");
  std::string Proto = R->getValueAsString("Prototype");
  std::string Types = R->getValueAsString("Types");
  Record *OperationRec = R->getValueAsDef("Operation");
  bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
  bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
  std::string Guard = R->getValueAsString("ArchGuard");
  bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");

  // Set the global current record. This allows assert_with_loc to produce
  // decent location information even when highly nested.
  CurrentRecord = R;

  ListInit *Body = OperationRec->getValueAsListInit("Ops");

  std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);

  ClassKind CK = ClassNone;
  if (R->getSuperClasses().size() >= 2)
    CK = ClassMap[R->getSuperClasses()[1]];

  std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
  for (auto TS : TypeSpecs) {
    if (CartesianProductOfTypes) {
      Type DefaultT(TS, 'd');
      for (auto SrcTS : TypeSpecs) {
        Type DefaultSrcT(SrcTS, 'd');
        if (TS == SrcTS ||
            DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
          continue;
        NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
      }
    } else {
      NewTypeSpecs.push_back(std::make_pair(TS, TS));
    }
  }

  std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
  std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end());

  for (auto &I : NewTypeSpecs) {
    Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body,
                                  *this, Guard, IsUnavailable, BigEndianSafe);

    IntrinsicMap[Name].push_back(IT);
    Out.push_back(IT);
  }

  CurrentRecord = nullptr;
}

/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
/// declaration of builtins, checking for unique builtin declarations.
void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs) {
  OS << "#ifdef GET_NEON_BUILTINS\n";

  // We only want to emit a builtin once, and we want to emit them in
  // alphabetical order, so use a std::set.
  std::set<std::string> Builtins;

  for (auto *Def : Defs) {
    if (Def->hasBody())
      continue;
    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Def->hasSplat())
      continue;

    std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";

    S += Def->getBuiltinTypeStr();
    S += "\", \"n\")";

    Builtins.insert(S);
  }

  for (auto &S : Builtins)
    OS << S << "\n";
  OS << "#endif\n\n";
}

/// Generate the ARM and AArch64 overloaded type checking code for
/// SemaChecking.cpp, checking for unique builtin declarations.
void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
                                           SmallVectorImpl<Intrinsic *> &Defs) {
  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";

  // We record each overload check line before emitting because subsequent Inst
  // definitions may extend the number of permitted types (i.e. augment the
  // Mask). Use std::map to avoid sorting the table by hash number.
  struct OverloadInfo {
    uint64_t Mask;
    int PtrArgNum;
    bool HasConstPtr;
    OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
  };
  std::map<std::string, OverloadInfo> OverloadMap;

  for (auto *Def : Defs) {
    // If the def has a body (that is, it has Operation DAGs), it won't call
    // __builtin_neon_* so we don't need to generate a definition for it.
    if (Def->hasBody())
      continue;
    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Def->hasSplat())
      continue;
    // Functions which have a scalar argument cannot be overloaded, no need to
    // check them if we are emitting the type checking code.
    if (Def->protoHasScalar())
      continue;

    uint64_t Mask = 0ULL;
    Type Ty = Def->getReturnType();
    if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' ||
        Def->getProto()[0] == 'F')
      Ty = Def->getParamType(0);
    if (Ty.isPointer())
      Ty = Def->getParamType(1);

    Mask |= 1ULL << Ty.getNeonEnum();

    // Check if the function has a pointer or const pointer argument.
    std::string Proto = Def->getProto();
    int PtrArgNum = -1;
    bool HasConstPtr = false;
    for (unsigned I = 0; I < Def->getNumParams(); ++I) {
      char ArgType = Proto[I + 1];
      if (ArgType == 'c') {
        HasConstPtr = true;
        PtrArgNum = I;
        break;
      }
      if (ArgType == 'p') {
        PtrArgNum = I;
        break;
      }
    }
    // For sret builtins, adjust the pointer argument index.
    if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
      PtrArgNum += 1;

    std::string Name = Def->getName();
    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
    // and vst1_lane intrinsics.  Using a pointer to the vector element
    // type with one of those operations causes codegen to select an aligned
    // load/store instruction.  If you want an unaligned operation,
    // the pointer argument needs to have less alignment than element type,
    // so just accept any pointer type.
    if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
      PtrArgNum = -1;
      HasConstPtr = false;
    }

    if (Mask) {
      std::string Name = Def->getMangledName();
      OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
      OverloadInfo &OI = OverloadMap[Name];
      OI.Mask |= Mask;
      OI.PtrArgNum |= PtrArgNum;
      OI.HasConstPtr = HasConstPtr;
    }
  }

  for (auto &I : OverloadMap) {
    OverloadInfo &OI = I.second;

    OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
    OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL";
    if (OI.PtrArgNum >= 0)
      OS << "; PtrArgNum = " << OI.PtrArgNum;
    if (OI.HasConstPtr)
      OS << "; HasConstPtr = true";
    OS << "; break;\n";
  }
  OS << "#endif\n\n";
}

void
NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
                                        SmallVectorImpl<Intrinsic *> &Defs) {
  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";

  std::set<std::string> Emitted;

  for (auto *Def : Defs) {
    if (Def->hasBody())
      continue;
    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Def->hasSplat())
      continue;
    // Functions which do not have an immediate do not need to have range
    // checking code emitted.
    if (!Def->hasImmediate())
      continue;
    if (Emitted.find(Def->getMangledName()) != Emitted.end())
      continue;

    std::string LowerBound, UpperBound;

    Record *R = Def->getRecord();
    if (R->getValueAsBit("isVCVT_N")) {
      // VCVT between floating- and fixed-point values takes an immediate
      // in the range [1, 32) for f32 or [1, 64) for f64.
      LowerBound = "1";
      if (Def->getBaseType().getElementSizeInBits() == 32)
        UpperBound = "31";
      else
        UpperBound = "63";
    } else if (R->getValueAsBit("isScalarShift")) {
      // Right shifts have an 'r' in the name, left shifts do not. Convert
      // instructions have the same bounds and right shifts.
      if (Def->getName().find('r') != std::string::npos ||
          Def->getName().find("cvt") != std::string::npos)
        LowerBound = "1";

      UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
    } else if (R->getValueAsBit("isShift")) {
      // Builtins which are overloaded by type will need to have their upper
      // bound computed at Sema time based on the type constant.

      // Right shifts have an 'r' in the name, left shifts do not.
      if (Def->getName().find('r') != std::string::npos)
        LowerBound = "1";
      UpperBound = "RFT(TV, true)";
    } else if (Def->getClassKind(true) == ClassB) {
      // ClassB intrinsics have a type (and hence lane number) that is only
      // known at runtime.
      if (R->getValueAsBit("isLaneQ"))
        UpperBound = "RFT(TV, false, true)";
      else
        UpperBound = "RFT(TV, false, false)";
    } else {
      // The immediate generally refers to a lane in the preceding argument.
      assert(Def->getImmediateIdx() > 0);
      Type T = Def->getParamType(Def->getImmediateIdx() - 1);
      UpperBound = utostr(T.getNumElements() - 1);
    }

    // Calculate the index of the immediate that should be range checked.
    unsigned Idx = Def->getNumParams();
    if (Def->hasImmediate())
      Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());

    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
       << "i = " << Idx << ";";
    if (LowerBound.size())
      OS << " l = " << LowerBound << ";";
    if (UpperBound.size())
      OS << " u = " << UpperBound << ";";
    OS << " break;\n";

    Emitted.insert(Def->getMangledName());
  }

  OS << "#endif\n\n";
}

/// runHeader - Emit a file with sections defining:
/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
/// 2. the SemaChecking code for the type overload checking.
/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
void NeonEmitter::runHeader(raw_ostream &OS) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");

  SmallVector<Intrinsic *, 128> Defs;
  for (auto *R : RV)
    createIntrinsic(R, Defs);

  // Generate shared BuiltinsXXX.def
  genBuiltinsDef(OS, Defs);

  // Generate ARM overloaded type checking code for SemaChecking.cpp
  genOverloadTypeCheckCode(OS, Defs);

  // Generate ARM range checking code for shift/lane immediates.
  genIntrinsicRangeCheckCode(OS, Defs);
}

/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::run(raw_ostream &OS) {
  OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
        "------------------------------"
        "---===\n"
        " *\n"
        " * Permission is hereby granted, free of charge, to any person "
        "obtaining "
        "a copy\n"
        " * of this software and associated documentation files (the "
        "\"Software\"),"
        " to deal\n"
        " * in the Software without restriction, including without limitation "
        "the "
        "rights\n"
        " * to use, copy, modify, merge, publish, distribute, sublicense, "
        "and/or sell\n"
        " * copies of the Software, and to permit persons to whom the Software "
        "is\n"
        " * furnished to do so, subject to the following conditions:\n"
        " *\n"
        " * The above copyright notice and this permission notice shall be "
        "included in\n"
        " * all copies or substantial portions of the Software.\n"
        " *\n"
        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
        "EXPRESS OR\n"
        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
        "MERCHANTABILITY,\n"
        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
        "SHALL THE\n"
        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
        "OTHER\n"
        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
        "ARISING FROM,\n"
        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
        "DEALINGS IN\n"
        " * THE SOFTWARE.\n"
        " *\n"
        " *===-----------------------------------------------------------------"
        "---"
        "---===\n"
        " */\n\n";

  OS << "#ifndef __ARM_NEON_H\n";
  OS << "#define __ARM_NEON_H\n\n";

  OS << "#if !defined(__ARM_NEON)\n";
  OS << "#error \"NEON support not enabled\"\n";
  OS << "#endif\n\n";

  OS << "#include <stdint.h>\n\n";

  // Emit NEON-specific scalar typedefs.
  OS << "typedef float float32_t;\n";
  OS << "typedef __fp16 float16_t;\n";

  OS << "#ifdef __aarch64__\n";
  OS << "typedef double float64_t;\n";
  OS << "#endif\n\n";

  // For now, signedness of polynomial types depends on target
  OS << "#ifdef __aarch64__\n";
  OS << "typedef uint8_t poly8_t;\n";
  OS << "typedef uint16_t poly16_t;\n";
  OS << "typedef uint64_t poly64_t;\n";
  OS << "typedef __uint128_t poly128_t;\n";
  OS << "#else\n";
  OS << "typedef int8_t poly8_t;\n";
  OS << "typedef int16_t poly16_t;\n";
  OS << "#endif\n";

  // Emit Neon vector typedefs.
  std::string TypedefTypes(
      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
  std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);

  // Emit vector typedefs.
  bool InIfdef = false;
  for (auto &TS : TDTypeVec) {
    bool IsA64 = false;
    Type T(TS, 'd');
    if (T.isDouble() || (T.isPoly() && T.isLong()))
      IsA64 = true;

    if (InIfdef && !IsA64) {
      OS << "#endif\n";
      InIfdef = false;
    }
    if (!InIfdef && IsA64) {
      OS << "#ifdef __aarch64__\n";
      InIfdef = true;
    }

    if (T.isPoly())
      OS << "typedef __attribute__((neon_polyvector_type(";
    else
      OS << "typedef __attribute__((neon_vector_type(";

    Type T2 = T;
    T2.makeScalar();
    OS << utostr(T.getNumElements()) << "))) ";
    OS << T2.str();
    OS << " " << T.str() << ";\n";
  }
  if (InIfdef)
    OS << "#endif\n";
  OS << "\n";

  // Emit struct typedefs.
  InIfdef = false;
  for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
    for (auto &TS : TDTypeVec) {
      bool IsA64 = false;
      Type T(TS, 'd');
      if (T.isDouble() || (T.isPoly() && T.isLong()))
        IsA64 = true;

      if (InIfdef && !IsA64) {
        OS << "#endif\n";
        InIfdef = false;
      }
      if (!InIfdef && IsA64) {
        OS << "#ifdef __aarch64__\n";
        InIfdef = true;
      }

      char M = '2' + (NumMembers - 2);
      Type VT(TS, M);
      OS << "typedef struct " << VT.str() << " {\n";
      OS << "  " << T.str() << " val";
      OS << "[" << utostr(NumMembers) << "]";
      OS << ";\n} ";
      OS << VT.str() << ";\n";
      OS << "\n";
    }
  }
  if (InIfdef)
    OS << "#endif\n";
  OS << "\n";

  OS << "#define __ai static inline __attribute__((__always_inline__, "
        "__nodebug__))\n\n";

  SmallVector<Intrinsic *, 128> Defs;
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  for (auto *R : RV)
    createIntrinsic(R, Defs);

  for (auto *I : Defs)
    I->indexBody();

  std::stable_sort(
      Defs.begin(), Defs.end(),
      [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });

  // Only emit a def when its requirements have been met.
  // FIXME: This loop could be made faster, but it's fast enough for now.
  bool MadeProgress = true;
  std::string InGuard = "";
  while (!Defs.empty() && MadeProgress) {
    MadeProgress = false;

    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
         I != Defs.end(); /*No step*/) {
      bool DependenciesSatisfied = true;
      for (auto *II : (*I)->getDependencies()) {
        if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
          DependenciesSatisfied = false;
      }
      if (!DependenciesSatisfied) {
        // Try the next one.
        ++I;
        continue;
      }

      // Emit #endif/#if pair if needed.
      if ((*I)->getGuard() != InGuard) {
        if (!InGuard.empty())
          OS << "#endif\n";
        InGuard = (*I)->getGuard();
        if (!InGuard.empty())
          OS << "#if " << InGuard << "\n";
      }

      // Actually generate the intrinsic code.
      OS << (*I)->generate();

      MadeProgress = true;
      I = Defs.erase(I);
    }
  }
  assert(Defs.empty() && "Some requirements were not satisfied!");
  if (!InGuard.empty())
    OS << "#endif\n";

  OS << "\n";
  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_NEON_H */\n";
}

namespace clang {
void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).run(OS);
}
void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runHeader(OS);
}
void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
  llvm_unreachable("Neon test generation no longer implemented!");
}
} // End namespace clang
@


1.1.1.5.4.1
log
@file NeonEmitter.cpp was added on branch yamt-pagecache on 2014-05-22 16:19:50 +0000
@
text
@d1 3426
@


1.1.1.5.4.2
log
@sync with head.

for a reference, the tree before this commit was tagged
as yamt-pagecache-tag8.

this commit was splitted into small chunks to avoid
a limitation of cvs.  ("Protocol error: too many arguments")
@
text
@a0 3426
//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend is responsible for emitting arm_neon.h, which includes
// a declaration and definition of each function specified by the ARM NEON
// compiler interface.  See ARM document DUI0348B.
//
// Each NEON instruction is implemented in terms of 1 or more functions which
// are suffixed with the element type of the input vectors.  Functions may be
// implemented in terms of generic vector operations such as +, *, -, etc. or
// by calling a __builtin_-prefixed function which will be handled by clang's
// CodeGen library.
//
// Additional validation code can be generated by this file when runHeader() is
// called, rather than the normal run() entry point.  A complete set of tests
// for Neon intrinsics can be generated by calling the runTests() entry point.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <string>
using namespace llvm;

enum OpKind {
  OpNone,
  OpUnavailable,
  OpAdd,
  OpAddl,
  OpAddlHi,
  OpAddw,
  OpAddwHi,
  OpSub,
  OpSubl,
  OpSublHi,
  OpSubw,
  OpSubwHi,
  OpMul,
  OpMla,
  OpMlal,
  OpMullHi,
  OpMullHiP64,
  OpMullHiN,
  OpMlalHi,
  OpMlalHiN,
  OpMls,
  OpMlsl,
  OpMlslHi,
  OpMlslHiN,
  OpMulN,
  OpMlaN,
  OpMlsN,
  OpFMlaN,
  OpFMlsN,
  OpMlalN,
  OpMlslN,
  OpMulLane,
  OpMulXLane,
  OpMullLane,
  OpMullHiLane,
  OpMlaLane,
  OpMlsLane,
  OpMlalLane,
  OpMlalHiLane,
  OpMlslLane,
  OpMlslHiLane,
  OpQDMullLane,
  OpQDMullHiLane,
  OpQDMlalLane,
  OpQDMlalHiLane,
  OpQDMlslLane,
  OpQDMlslHiLane,
  OpQDMulhLane,
  OpQRDMulhLane,
  OpFMSLane,
  OpFMSLaneQ,
  OpTrn1,
  OpZip1,
  OpUzp1,
  OpTrn2,
  OpZip2,
  OpUzp2,
  OpEq,
  OpGe,
  OpLe,
  OpGt,
  OpLt,
  OpNeg,
  OpNot,
  OpAnd,
  OpOr,
  OpXor,
  OpAndNot,
  OpOrNot,
  OpCast,
  OpConcat,
  OpDup,
  OpDupLane,
  OpHi,
  OpLo,
  OpSelect,
  OpRev16,
  OpRev32,
  OpRev64,
  OpXtnHi,
  OpSqxtunHi,
  OpQxtnHi,
  OpFcvtnHi,
  OpFcvtlHi,
  OpFcvtxnHi,
  OpReinterpret,
  OpAddhnHi,
  OpRAddhnHi,
  OpSubhnHi,
  OpRSubhnHi,
  OpAbdl,
  OpAbdlHi,
  OpAba,
  OpAbal,
  OpAbalHi,
  OpQDMullHi,
  OpQDMullHiN,
  OpQDMlalHi,
  OpQDMlalHiN,
  OpQDMlslHi,
  OpQDMlslHiN,
  OpDiv,
  OpLongHi,
  OpNarrowHi,
  OpMovlHi,
  OpCopyLane,
  OpCopyQLane,
  OpCopyLaneQ,
  OpScalarMulLane,
  OpScalarMulLaneQ,
  OpScalarMulXLane,
  OpScalarMulXLaneQ,
  OpScalarVMulXLane,
  OpScalarVMulXLaneQ,
  OpScalarQDMullLane,
  OpScalarQDMullLaneQ,
  OpScalarQDMulHiLane,
  OpScalarQDMulHiLaneQ,
  OpScalarQRDMulHiLane,
  OpScalarQRDMulHiLaneQ,
  OpScalarGetLane,
  OpScalarSetLane
};

enum ClassKind {
  ClassNone,
  ClassI,           // generic integer instruction, e.g., "i8" suffix
  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
  ClassW,           // width-specific instruction, e.g., "8" suffix
  ClassB,           // bitcast arguments with enum argument to specify type
  ClassL,           // Logical instructions which are op instructions
                    // but we need to not emit any suffix for in our
                    // tests.
  ClassNoTest       // Instructions which we do not test since they are
                    // not TRUE instructions.
};

/// NeonTypeFlags - Flags to identify the types for overloaded Neon
/// builtins.  These must be kept in sync with the flags in
/// include/clang/Basic/TargetBuiltins.h.
namespace {
class NeonTypeFlags {
  enum {
    EltTypeMask = 0xf,
    UnsignedFlag = 0x10,
    QuadFlag = 0x20
  };
  uint32_t Flags;

public:
  enum EltType {
    Int8,
    Int16,
    Int32,
    Int64,
    Poly8,
    Poly16,
    Poly64,
    Poly128,
    Float16,
    Float32,
    Float64
  };

  NeonTypeFlags(unsigned F) : Flags(F) {}
  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
    if (IsUnsigned)
      Flags |= UnsignedFlag;
    if (IsQuad)
      Flags |= QuadFlag;
  }

  uint32_t getFlags() const { return Flags; }
};
} // end anonymous namespace

namespace {
class NeonEmitter {
  RecordKeeper &Records;
  StringMap<OpKind> OpMap;
  DenseMap<Record*, ClassKind> ClassMap;

public:
  NeonEmitter(RecordKeeper &R) : Records(R) {
    OpMap["OP_NONE"]  = OpNone;
    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
    OpMap["OP_ADD"]   = OpAdd;
    OpMap["OP_ADDL"]  = OpAddl;
    OpMap["OP_ADDLHi"] = OpAddlHi;
    OpMap["OP_ADDW"]  = OpAddw;
    OpMap["OP_ADDWHi"] = OpAddwHi;
    OpMap["OP_SUB"]   = OpSub;
    OpMap["OP_SUBL"]  = OpSubl;
    OpMap["OP_SUBLHi"] = OpSublHi;
    OpMap["OP_SUBW"]  = OpSubw;
    OpMap["OP_SUBWHi"] = OpSubwHi;
    OpMap["OP_MUL"]   = OpMul;
    OpMap["OP_MLA"]   = OpMla;
    OpMap["OP_MLAL"]  = OpMlal;
    OpMap["OP_MULLHi"]  = OpMullHi;
    OpMap["OP_MULLHi_P64"]  = OpMullHiP64;
    OpMap["OP_MULLHi_N"]  = OpMullHiN;
    OpMap["OP_MLALHi"]  = OpMlalHi;
    OpMap["OP_MLALHi_N"]  = OpMlalHiN;
    OpMap["OP_MLS"]   = OpMls;
    OpMap["OP_MLSL"]  = OpMlsl;
    OpMap["OP_MLSLHi"] = OpMlslHi;
    OpMap["OP_MLSLHi_N"] = OpMlslHiN;
    OpMap["OP_MUL_N"] = OpMulN;
    OpMap["OP_MLA_N"] = OpMlaN;
    OpMap["OP_MLS_N"] = OpMlsN;
    OpMap["OP_FMLA_N"] = OpFMlaN;
    OpMap["OP_FMLS_N"] = OpFMlsN;
    OpMap["OP_MLAL_N"] = OpMlalN;
    OpMap["OP_MLSL_N"] = OpMlslN;
    OpMap["OP_MUL_LN"]= OpMulLane;
    OpMap["OP_MULX_LN"]= OpMulXLane;
    OpMap["OP_MULL_LN"] = OpMullLane;
    OpMap["OP_MULLHi_LN"] = OpMullHiLane;
    OpMap["OP_MLA_LN"]= OpMlaLane;
    OpMap["OP_MLS_LN"]= OpMlsLane;
    OpMap["OP_MLAL_LN"] = OpMlalLane;
    OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
    OpMap["OP_MLSL_LN"] = OpMlslLane;
    OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
    OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
    OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
    OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
    OpMap["OP_FMS_LN"] = OpFMSLane;
    OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
    OpMap["OP_TRN1"]  = OpTrn1;
    OpMap["OP_ZIP1"]  = OpZip1;
    OpMap["OP_UZP1"]  = OpUzp1;
    OpMap["OP_TRN2"]  = OpTrn2;
    OpMap["OP_ZIP2"]  = OpZip2;
    OpMap["OP_UZP2"]  = OpUzp2;
    OpMap["OP_EQ"]    = OpEq;
    OpMap["OP_GE"]    = OpGe;
    OpMap["OP_LE"]    = OpLe;
    OpMap["OP_GT"]    = OpGt;
    OpMap["OP_LT"]    = OpLt;
    OpMap["OP_NEG"]   = OpNeg;
    OpMap["OP_NOT"]   = OpNot;
    OpMap["OP_AND"]   = OpAnd;
    OpMap["OP_OR"]    = OpOr;
    OpMap["OP_XOR"]   = OpXor;
    OpMap["OP_ANDN"]  = OpAndNot;
    OpMap["OP_ORN"]   = OpOrNot;
    OpMap["OP_CAST"]  = OpCast;
    OpMap["OP_CONC"]  = OpConcat;
    OpMap["OP_HI"]    = OpHi;
    OpMap["OP_LO"]    = OpLo;
    OpMap["OP_DUP"]   = OpDup;
    OpMap["OP_DUP_LN"] = OpDupLane;
    OpMap["OP_SEL"]   = OpSelect;
    OpMap["OP_REV16"] = OpRev16;
    OpMap["OP_REV32"] = OpRev32;
    OpMap["OP_REV64"] = OpRev64;
    OpMap["OP_XTN"] = OpXtnHi;
    OpMap["OP_SQXTUN"] = OpSqxtunHi;
    OpMap["OP_QXTN"] = OpQxtnHi;
    OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
    OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
    OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
    OpMap["OP_REINT"] = OpReinterpret;
    OpMap["OP_ADDHNHi"] = OpAddhnHi;
    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
    OpMap["OP_SUBHNHi"] = OpSubhnHi;
    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
    OpMap["OP_ABDL"]  = OpAbdl;
    OpMap["OP_ABDLHi"] = OpAbdlHi;
    OpMap["OP_ABA"]   = OpAba;
    OpMap["OP_ABAL"]  = OpAbal;
    OpMap["OP_ABALHi"] = OpAbalHi;
    OpMap["OP_QDMULLHi"] = OpQDMullHi;
    OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
    OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
    OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
    OpMap["OP_DIV"] = OpDiv;
    OpMap["OP_LONG_HI"] = OpLongHi;
    OpMap["OP_NARROW_HI"] = OpNarrowHi;
    OpMap["OP_MOVL_HI"] = OpMovlHi;
    OpMap["OP_COPY_LN"] = OpCopyLane;
    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
    OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
    OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
    OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
    OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
    OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
    OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
    OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
    OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
    OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
    OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
    OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
    OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
    OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
    OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;

    Record *SI = R.getClass("SInst");
    Record *II = R.getClass("IInst");
    Record *WI = R.getClass("WInst");
    Record *SOpI = R.getClass("SOpInst");
    Record *IOpI = R.getClass("IOpInst");
    Record *WOpI = R.getClass("WOpInst");
    Record *LOpI = R.getClass("LOpInst");
    Record *NoTestOpI = R.getClass("NoTestOpInst");

    ClassMap[SI] = ClassS;
    ClassMap[II] = ClassI;
    ClassMap[WI] = ClassW;
    ClassMap[SOpI] = ClassS;
    ClassMap[IOpI] = ClassI;
    ClassMap[WOpI] = ClassW;
    ClassMap[LOpI] = ClassL;
    ClassMap[NoTestOpI] = ClassNoTest;
  }

  // run - Emit arm_neon.h.inc
  void run(raw_ostream &o);

  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
  void runHeader(raw_ostream &o);

  // runTests - Emit tests for all the Neon intrinsics.
  void runTests(raw_ostream &o);

private:
  void emitGuardedIntrinsic(raw_ostream &OS, Record *R,
                            std::string &CurrentGuard, bool &InGuard,
                            StringMap<ClassKind> &EmittedMap);
  void emitIntrinsic(raw_ostream &OS, Record *R,
                     StringMap<ClassKind> &EmittedMap);
  void genBuiltinsDef(raw_ostream &OS);
  void genOverloadTypeCheckCode(raw_ostream &OS);
  void genIntrinsicRangeCheckCode(raw_ostream &OS);
  void genTargetTest(raw_ostream &OS);
};
} // end anonymous namespace

/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
/// which each StringRef representing a single type declared in the string.
/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
/// 2xfloat and 4xfloat respectively.
static void ParseTypes(Record *r, std::string &s,
                       SmallVectorImpl<StringRef> &TV) {
  const char *data = s.data();
  int len = 0;

  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
                         || data[len] == 'H' || data[len] == 'S')
      continue;

    switch (data[len]) {
      case 'c':
      case 's':
      case 'i':
      case 'l':
      case 'k':
      case 'h':
      case 'f':
      case 'd':
        break;
      default:
        PrintFatalError(r->getLoc(),
                      "Unexpected letter: " + std::string(data + len, 1));
    }
    TV.push_back(StringRef(data, len + 1));
    data += len + 1;
    len = -1;
  }
}

/// Widen - Convert a type code into the next wider type.  char -> short,
/// short -> int, etc.
static char Widen(const char t) {
  switch (t) {
    case 'c':
      return 's';
    case 's':
      return 'i';
    case 'i':
      return 'l';
    case 'l':
      return 'k';
    case 'h':
      return 'f';
    case 'f':
      return 'd';
    default:
      PrintFatalError("unhandled type in widen!");
  }
}

/// Narrow - Convert a type code into the next smaller type.  short -> char,
/// float -> half float, etc.
static char Narrow(const char t) {
  switch (t) {
    case 's':
      return 'c';
    case 'i':
      return 's';
    case 'l':
      return 'i';
    case 'k':
      return 'l';
    case 'f':
      return 'h';
    case 'd':
      return 'f';
    default:
      PrintFatalError("unhandled type in narrow!");
  }
}

static std::string GetNarrowTypestr(StringRef ty)
{
  std::string s;
  for (size_t i = 0, end = ty.size(); i < end; i++) {
    switch (ty[i]) {
      case 's':
        s += 'c';
        break;
      case 'i':
        s += 's';
        break;
      case 'l':
        s += 'i';
        break;
      case 'k':
        s += 'l';
        break;
      default:
        s += ty[i];
        break;
    }
  }

  return s;
}

/// For a particular StringRef, return the base type code, and whether it has
/// the quad-vector, polynomial, or unsigned modifiers set.
static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
  unsigned off = 0;
  // ignore scalar.
  if (ty[off] == 'S') {
    ++off;
  }
  // remember quad.
  if (ty[off] == 'Q' || ty[off] == 'H') {
    quad = true;
    ++off;
  }

  // remember poly.
  if (ty[off] == 'P') {
    poly = true;
    ++off;
  }

  // remember unsigned.
  if (ty[off] == 'U') {
    usgn = true;
    ++off;
  }

  // base type to get the type string for.
  return ty[off];
}

/// ModType - Transform a type code and its modifiers based on a mod code. The
/// mod code definitions may be found at the top of arm_neon.td.
static char ModType(const char mod, char type, bool &quad, bool &poly,
                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
  switch (mod) {
    case 't':
      if (poly) {
        poly = false;
        usgn = true;
      }
      break;
    case 'b':
      scal = true;
    case 'u':
      usgn = true;
      poly = false;
      if (type == 'f')
        type = 'i';
      if (type == 'd')
        type = 'l';
      break;
    case '$':
      scal = true;
    case 'x':
      usgn = false;
      poly = false;
      if (type == 'f')
        type = 'i';
      if (type == 'd')
        type = 'l';
      break;
    case 'o':
      scal = true;
      type = 'd';
      usgn = false;
      break;
    case 'y':
      scal = true;
    case 'f':
      if (type == 'h')
        quad = true;
      type = 'f';
      usgn = false;
      break;
    case 'F':
      type = 'd';
      usgn = false;
      break;
    case 'g':
      quad = false;
      break;
    case 'B':
    case 'C':
    case 'D':
    case 'j':
      quad = true;
      break;
    case 'w':
      type = Widen(type);
      quad = true;
      break;
    case 'n':
      type = Widen(type);
      break;
    case 'i':
      type = 'i';
      scal = true;
      break;
    case 'l':
      type = 'l';
      scal = true;
      usgn = true;
      break;
    case 'z':
      type = Narrow(type);
      scal = true;
      break;
    case 'r':
      type = Widen(type);
      scal = true;
      break;
    case 's':
    case 'a':
      scal = true;
      break;
    case 'k':
      quad = true;
      break;
    case 'c':
      cnst = true;
    case 'p':
      pntr = true;
      scal = true;
      break;
    case 'h':
      type = Narrow(type);
      if (type == 'h')
        quad = false;
      break;
    case 'q':
      type = Narrow(type);
      quad = true;
      break;
    case 'e':
      type = Narrow(type);
      usgn = true;
      break;
    case 'm':
      type = Narrow(type);
      quad = false;
      break;
    default:
      break;
  }
  return type;
}

static bool IsMultiVecProto(const char p) {
  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
}

/// TypeString - for a modifier and type, generate the name of the typedef for
/// that type.  QUc -> uint8x8_t.
static std::string TypeString(const char mod, StringRef typestr) {
  bool quad = false;
  bool poly = false;
  bool usgn = false;
  bool scal = false;
  bool cnst = false;
  bool pntr = false;

  if (mod == 'v')
    return "void";
  if (mod == 'i')
    return "int";

  // base type to get the type string for.
  char type = ClassifyType(typestr, quad, poly, usgn);

  // Based on the modifying character, change the type and width if necessary.
  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);

  SmallString<128> s;

  if (usgn)
    s.push_back('u');

  switch (type) {
    case 'c':
      s += poly ? "poly8" : "int8";
      if (scal)
        break;
      s += quad ? "x16" : "x8";
      break;
    case 's':
      s += poly ? "poly16" : "int16";
      if (scal)
        break;
      s += quad ? "x8" : "x4";
      break;
    case 'i':
      s += "int32";
      if (scal)
        break;
      s += quad ? "x4" : "x2";
      break;
    case 'l':
      s += (poly && !usgn)? "poly64" : "int64";
      if (scal)
        break;
      s += quad ? "x2" : "x1";
      break;
    case 'k':
      s += "poly128";
      break;
    case 'h':
      s += "float16";
      if (scal)
        break;
      s += quad ? "x8" : "x4";
      break;
    case 'f':
      s += "float32";
      if (scal)
        break;
      s += quad ? "x4" : "x2";
      break;
    case 'd':
      s += "float64";
      if (scal)
        break;
      s += quad ? "x2" : "x1";
      break;

    default:
      PrintFatalError("unhandled type!");
  }

  if (mod == '2' || mod == 'B')
    s += "x2";
  if (mod == '3' || mod == 'C')
    s += "x3";
  if (mod == '4' || mod == 'D')
    s += "x4";

  // Append _t, finishing the type string typedef type.
  s += "_t";

  if (cnst)
    s += " const";

  if (pntr)
    s += " *";

  return s.str();
}

/// BuiltinTypeString - for a modifier and type, generate the clang
/// BuiltinsARM.def prototype code for the function.  See the top of clang's
/// Builtins.def for a description of the type strings.
static std::string BuiltinTypeString(const char mod, StringRef typestr,
                                     ClassKind ck, bool ret) {
  bool quad = false;
  bool poly = false;
  bool usgn = false;
  bool scal = false;
  bool cnst = false;
  bool pntr = false;

  if (mod == 'v')
    return "v"; // void
  if (mod == 'i')
    return "i"; // int

  // base type to get the type string for.
  char type = ClassifyType(typestr, quad, poly, usgn);

  // Based on the modifying character, change the type and width if necessary.
  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);

  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
                         scal && type != 'f' && type != 'd');

  // All pointers are void* pointers.  Change type to 'v' now.
  if (pntr) {
    usgn = false;
    poly = false;
    type = 'v';
  }
  // Treat half-float ('h') types as unsigned short ('s') types.
  if (type == 'h') {
    type = 's';
    usgn = true;
  }

  if (scal) {
    SmallString<128> s;

    if (usgn)
      s.push_back('U');
    else if (type == 'c')
      s.push_back('S'); // make chars explicitly signed

    if (type == 'l') // 64-bit long
      s += "Wi";
    else if (type == 'k') // 128-bit long
      s = "LLLi";
    else
      s.push_back(type);

    if (cnst)
      s.push_back('C');
    if (pntr)
      s.push_back('*');
    return s.str();
  }

  // Since the return value must be one type, return a vector type of the
  // appropriate width which we will bitcast.  An exception is made for
  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
  // fashion, storing them to a pointer arg.
  if (ret) {
    if (IsMultiVecProto(mod))
      return "vv*"; // void result with void* first argument
    if (mod == 'f' || (ck != ClassB && type == 'f'))
      return quad ? "V4f" : "V2f";
    if (mod == 'F' || (ck != ClassB && type == 'd'))
      return quad ? "V2d" : "V1d";
    if (ck != ClassB && type == 's')
      return quad ? "V8s" : "V4s";
    if (ck != ClassB && type == 'i')
      return quad ? "V4i" : "V2i";
    if (ck != ClassB && type == 'l')
      return quad ? "V2Wi" : "V1Wi";

    return quad ? "V16Sc" : "V8Sc";
  }

  // Non-return array types are passed as individual vectors.
  if (mod == '2' || mod == 'B')
    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
  if (mod == '3' || mod == 'C')
    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
  if (mod == '4' || mod == 'D')
    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";

  if (mod == 'f' || (ck != ClassB && type == 'f'))
    return quad ? "V4f" : "V2f";
  if (mod == 'F' || (ck != ClassB && type == 'd'))
    return quad ? "V2d" : "V1d";
  if (ck != ClassB && type == 's')
    return quad ? "V8s" : "V4s";
  if (ck != ClassB && type == 'i')
    return quad ? "V4i" : "V2i";
  if (ck != ClassB && type == 'l')
    return quad ? "V2Wi" : "V1Wi";

  return quad ? "V16Sc" : "V8Sc";
}

/// InstructionTypeCode - Computes the ARM argument character code and
/// quad status for a specific type string and ClassKind.
static void InstructionTypeCode(const StringRef &typeStr,
                                const ClassKind ck,
                                bool &quad,
                                std::string &typeCode) {
  bool poly = false;
  bool usgn = false;
  char type = ClassifyType(typeStr, quad, poly, usgn);

  switch (type) {
  case 'c':
    switch (ck) {
    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
    case ClassI: typeCode = "i8"; break;
    case ClassW: typeCode = "8"; break;
    default: break;
    }
    break;
  case 's':
    switch (ck) {
    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
    case ClassI: typeCode = "i16"; break;
    case ClassW: typeCode = "16"; break;
    default: break;
    }
    break;
  case 'i':
    switch (ck) {
    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
    case ClassI: typeCode = "i32"; break;
    case ClassW: typeCode = "32"; break;
    default: break;
    }
    break;
  case 'l':
    switch (ck) {
    case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
    case ClassI: typeCode = "i64"; break;
    case ClassW: typeCode = "64"; break;
    default: break;
    }
    break;
  case 'k':
    assert(poly && "Unrecognized 128 bit integer.");
    typeCode = "p128";
    break;
  case 'h':
    switch (ck) {
    case ClassS:
    case ClassI: typeCode = "f16"; break;
    case ClassW: typeCode = "16"; break;
    default: break;
    }
    break;
  case 'f':
    switch (ck) {
    case ClassS:
    case ClassI: typeCode = "f32"; break;
    case ClassW: typeCode = "32"; break;
    default: break;
    }
    break;
  case 'd':
    switch (ck) {
    case ClassS:
    case ClassI:
      typeCode += "f64";
      break;
    case ClassW:
      PrintFatalError("unhandled type!");
    default:
      break;
    }
    break;
  default:
    PrintFatalError("unhandled type!");
  }
}

static char Insert_BHSD_Suffix(StringRef typestr){
  unsigned off = 0;
  if(typestr[off++] == 'S'){
    while(typestr[off] == 'Q' || typestr[off] == 'H'||
          typestr[off] == 'P' || typestr[off] == 'U')
      ++off;
    switch (typestr[off]){
    default  : break;
    case 'c' : return 'b';
    case 's' : return 'h';
    case 'i' :
    case 'f' : return 's';
    case 'l' :
    case 'd' : return 'd';
    }
  }
  return 0;
}

static bool endsWith_xN(std::string const &name) {
  if (name.length() > 3) {
    if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
        name.compare(name.length() - 3, 3, "_x3") == 0 ||
        name.compare(name.length() - 3, 3, "_x4") == 0)
      return true;
  }
  return false;
}

/// MangleName - Append a type or width suffix to a base neon function name,
/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
static std::string MangleName(const std::string &name, StringRef typestr,
                              ClassKind ck) {
  if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
      name == "vcvt_f64_f32")
    return name;

  bool quad = false;
  std::string typeCode = "";

  InstructionTypeCode(typestr, ck, quad, typeCode);

  std::string s = name;

  if (typeCode.size() > 0) {
    // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
    if (endsWith_xN(s))
      s.insert(s.length() - 3, "_" + typeCode);
    else
      s += "_" + typeCode;
  }

  if (ck == ClassB)
    s += "_v";

  // Insert a 'q' before the first '_' character so that it ends up before
  // _lane or _n on vector-scalar operations.
  if (typestr.find("Q") != StringRef::npos) {
      size_t pos = s.find('_');
      s = s.insert(pos, "q");
  }
  char ins = Insert_BHSD_Suffix(typestr);
  if(ins){
    size_t pos = s.find('_');
    s = s.insert(pos, &ins, 1);
  }

  return s;
}

static void PreprocessInstruction(const StringRef &Name,
                                  const std::string &InstName,
                                  std::string &Prefix,
                                  bool &HasNPostfix,
                                  bool &HasLanePostfix,
                                  bool &HasDupPostfix,
                                  bool &IsSpecialVCvt,
                                  size_t &TBNumber) {
  // All of our instruction name fields from arm_neon.td are of the form
  //   <instructionname>_...
  // Thus we grab our instruction name via computation of said Prefix.
  const size_t PrefixEnd = Name.find_first_of('_');
  // If InstName is passed in, we use that instead of our name Prefix.
  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;

  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());

  HasNPostfix = Postfix.count("_n");
  HasLanePostfix = Postfix.count("_lane");
  HasDupPostfix = Postfix.count("_dup");
  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");

  if (InstName.compare("vtbl") == 0 ||
      InstName.compare("vtbx") == 0) {
    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
    // encoding to get its true value.
    TBNumber = Name[Name.size()-1] - 48;
  }
}

/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
/// extracted, generate a FileCheck pattern for a Load Or Store
static void
GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
                                          const std::string& OutTypeCode,
                                          const bool &IsQuad,
                                          const bool &HasDupPostfix,
                                          const bool &HasLanePostfix,
                                          const size_t Count,
                                          std::string &RegisterSuffix) {
  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
  // will output a series of v{ld,st}1s, so we have to handle it specially.
  if ((Count == 3 || Count == 4) && IsQuad) {
    RegisterSuffix += "{";
    for (size_t i = 0; i < Count; i++) {
      RegisterSuffix += "d{{[0-9]+}}";
      if (HasDupPostfix) {
        RegisterSuffix += "[]";
      }
      if (HasLanePostfix) {
        RegisterSuffix += "[{{[0-9]+}}]";
      }
      if (i < Count-1) {
        RegisterSuffix += ", ";
      }
    }
    RegisterSuffix += "}";
  } else {

    // Handle normal loads and stores.
    RegisterSuffix += "{";
    for (size_t i = 0; i < Count; i++) {
      RegisterSuffix += "d{{[0-9]+}}";
      if (HasDupPostfix) {
        RegisterSuffix += "[]";
      }
      if (HasLanePostfix) {
        RegisterSuffix += "[{{[0-9]+}}]";
      }
      if (IsQuad && !HasLanePostfix) {
        RegisterSuffix += ", d{{[0-9]+}}";
        if (HasDupPostfix) {
          RegisterSuffix += "[]";
        }
      }
      if (i < Count-1) {
        RegisterSuffix += ", ";
      }
    }
    RegisterSuffix += "}, [r{{[0-9]+}}";

    // We only include the alignment hint if we have a vld1.*64 or
    // a dup/lane instruction.
    if (IsLDSTOne) {
      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
        RegisterSuffix += ":" + OutTypeCode;
      }
    }

    RegisterSuffix += "]";
  }
}

static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
                                     const bool &HasNPostfix) {
  return (NameRef.count("vmla") ||
          NameRef.count("vmlal") ||
          NameRef.count("vmlsl") ||
          NameRef.count("vmull") ||
          NameRef.count("vqdmlal") ||
          NameRef.count("vqdmlsl") ||
          NameRef.count("vqdmulh") ||
          NameRef.count("vqdmull") ||
          NameRef.count("vqrdmulh")) && HasNPostfix;
}

static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
                                         const bool &HasLanePostfix) {
  return (NameRef.count("vmla") ||
          NameRef.count("vmls") ||
          NameRef.count("vmlal") ||
          NameRef.count("vmlsl") ||
          (NameRef.count("vmul") && NameRef.size() == 3)||
          NameRef.count("vqdmlal") ||
          NameRef.count("vqdmlsl") ||
          NameRef.count("vqdmulh") ||
          NameRef.count("vqrdmulh")) && HasLanePostfix;
}

static bool IsSpecialLaneMultiply(const StringRef &NameRef,
                                  const bool &HasLanePostfix,
                                  const bool &IsQuad) {
  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
                               && IsQuad;
  const bool IsVMull = NameRef.count("mull") && !IsQuad;
  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
}

static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
                                                     const std::string &Proto,
                                                     const bool &HasNPostfix,
                                                     const bool &IsQuad,
                                                     const bool &HasLanePostfix,
                                                     const bool &HasDupPostfix,
                                                     std::string &NormedProto) {
  // Handle generic case.
  const StringRef NameRef(Name);
  for (size_t i = 0, end = Proto.size(); i < end; i++) {
    switch (Proto[i]) {
    case 'u':
    case 'f':
    case 'F':
    case 'd':
    case 's':
    case 'x':
    case 't':
    case 'n':
      NormedProto += IsQuad? 'q' : 'd';
      break;
    case 'w':
    case 'k':
      NormedProto += 'q';
      break;
    case 'g':
    case 'j':
    case 'h':
    case 'e':
      NormedProto += 'd';
      break;
    case 'i':
      NormedProto += HasLanePostfix? 'a' : 'i';
      break;
    case 'a':
      if (HasLanePostfix) {
        NormedProto += 'a';
      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
        NormedProto += IsQuad? 'q' : 'd';
      } else {
        NormedProto += 'i';
      }
      break;
    }
  }

  // Handle Special Cases.
  const bool IsNotVExt = !NameRef.count("vext");
  const bool IsVPADAL = NameRef.count("vpadal");
  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
                                                           HasLanePostfix);
  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
                                                      IsQuad);

  if (IsSpecialLaneMul) {
    // If
    NormedProto[2] = NormedProto[3];
    NormedProto.erase(3);
  } else if (NormedProto.size() == 4 &&
             NormedProto[0] == NormedProto[1] &&
             IsNotVExt) {
    // If NormedProto.size() == 4 and the first two proto characters are the
    // same, ignore the first.
    NormedProto = NormedProto.substr(1, 3);
  } else if (Is5OpLaneAccum) {
    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
    std::string tmp = NormedProto.substr(1,2);
    tmp += NormedProto[4];
    NormedProto = tmp;
  } else if (IsVPADAL) {
    // If we have VPADAL, ignore the first character.
    NormedProto = NormedProto.substr(0, 2);
  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
    // If our instruction is a dup instruction, keep only the first and
    // last characters.
    std::string tmp = "";
    tmp += NormedProto[0];
    tmp += NormedProto[NormedProto.size()-1];
    NormedProto = tmp;
  }
}

/// GenerateRegisterCheckPatterns - Given a bunch of data we have
/// extracted, generate a FileCheck pattern to check that an
/// instruction's arguments are correct.
static void GenerateRegisterCheckPattern(const std::string &Name,
                                         const std::string &Proto,
                                         const std::string &OutTypeCode,
                                         const bool &HasNPostfix,
                                         const bool &IsQuad,
                                         const bool &HasLanePostfix,
                                         const bool &HasDupPostfix,
                                         const size_t &TBNumber,
                                         std::string &RegisterSuffix) {

  RegisterSuffix = "";

  const StringRef NameRef(Name);

  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
    return;
  }

  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");

  if (IsLoadStore) {
    // Grab N value from  v{ld,st}N using its ascii representation.
    const size_t Count = NameRef[3] - 48;

    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
                                              HasDupPostfix, HasLanePostfix,
                                              Count, RegisterSuffix);
  } else if (IsTBXOrTBL) {
    RegisterSuffix += "d{{[0-9]+}}, {";
    for (size_t i = 0; i < TBNumber-1; i++) {
      RegisterSuffix += "d{{[0-9]+}}, ";
    }
    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
  } else {
    // Handle a normal instruction.
    if (NameRef.count("vget") || NameRef.count("vset"))
      return;

    // We first normalize our proto, since we only need to emit 4
    // different types of checks, yet have more than 4 proto types
    // that map onto those 4 patterns.
    std::string NormalizedProto("");
    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
                                             HasLanePostfix, HasDupPostfix,
                                             NormalizedProto);

    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
      const char &c = NormalizedProto[i];
      switch (c) {
      case 'q':
        RegisterSuffix += "q{{[0-9]+}}, ";
        break;

      case 'd':
        RegisterSuffix += "d{{[0-9]+}}, ";
        break;

      case 'i':
        RegisterSuffix += "#{{[0-9]+}}, ";
        break;

      case 'a':
        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
        break;
      }
    }

    // Remove extra ", ".
    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
  }
}

/// GenerateChecksForIntrinsic - Given a specific instruction name +
/// typestr + class kind, generate the proper set of FileCheck
/// Patterns to check for. We could just return a string, but instead
/// use a vector since it provides us with the extra flexibility of
/// emitting multiple checks, which comes in handy for certain cases
/// like mla where we want to check for 2 different instructions.
static void GenerateChecksForIntrinsic(const std::string &Name,
                                       const std::string &Proto,
                                       StringRef &OutTypeStr,
                                       StringRef &InTypeStr,
                                       ClassKind Ck,
                                       const std::string &InstName,
                                       bool IsHiddenLOp,
                                       std::vector<std::string>& Result) {

  // If Ck is a ClassNoTest instruction, just return so no test is
  // emitted.
  if(Ck == ClassNoTest)
    return;

  if (Name == "vcvt_f32_f16") {
    Result.push_back("vcvt.f32.f16");
    return;
  }


  // Now we preprocess our instruction given the data we have to get the
  // data that we need.
  // Create a StringRef for String Manipulation of our Name.
  const StringRef NameRef(Name);
  // Instruction Prefix.
  std::string Prefix;
  // The type code for our out type string.
  std::string OutTypeCode;
  // To handle our different cases, we need to check for different postfixes.
  // Is our instruction a quad instruction.
  bool IsQuad = false;
  // Our instruction is of the form <instructionname>_n.
  bool HasNPostfix = false;
  // Our instruction is of the form <instructionname>_lane.
  bool HasLanePostfix = false;
  // Our instruction is of the form <instructionname>_dup.
  bool HasDupPostfix  = false;
  // Our instruction is a vcvt instruction which requires special handling.
  bool IsSpecialVCvt = false;
  // If we have a vtbxN or vtblN instruction, this is set to N.
  size_t TBNumber = -1;
  // Register Suffix
  std::string RegisterSuffix;

  PreprocessInstruction(NameRef, InstName, Prefix,
                        HasNPostfix, HasLanePostfix, HasDupPostfix,
                        IsSpecialVCvt, TBNumber);

  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
                               HasLanePostfix, HasDupPostfix, TBNumber,
                               RegisterSuffix);

  // In the following section, we handle a bunch of special cases. You can tell
  // a special case by the fact we are returning early.

  // If our instruction is a logical instruction without postfix or a
  // hidden LOp just return the current Prefix.
  if (Ck == ClassL || IsHiddenLOp) {
    Result.push_back(Prefix + " " + RegisterSuffix);
    return;
  }

  // If we have a vmov, due to the many different cases, some of which
  // vary within the different intrinsics generated for a single
  // instruction type, just output a vmov. (e.g. given an instruction
  // A, A.u32 might be vmov and A.u8 might be vmov.8).
  //
  // FIXME: Maybe something can be done about this. The two cases that we care
  // about are vmov as an LType and vmov as a WType.
  if (Prefix == "vmov") {
    Result.push_back(Prefix + " " + RegisterSuffix);
    return;
  }

  // In the following section, we handle special cases.

  if (OutTypeCode == "64") {
    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
    // type, the intrinsic will be optimized away, so just return
    // nothing.  On the other hand if we are handling an uint64x2_t
    // (i.e. quad instruction), vdup/vmov instructions should be
    // emitted.
    if (Prefix == "vdup" || Prefix == "vext") {
      if (IsQuad) {
        Result.push_back("{{vmov|vdup}}");
      }
      return;
    }

    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
    // multiple register operands.
    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
                            || Prefix == "vld4";
    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
                            || Prefix == "vst4";
    if (MultiLoadPrefix || MultiStorePrefix) {
      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
      return;
    }

    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
    // emitting said instructions. So return a check for
    // vldr/vstr/vmov/str instead.
    if (HasLanePostfix || HasDupPostfix) {
      if (Prefix == "vst1") {
        Result.push_back("{{str|vstr|vmov}}");
        return;
      } else if (Prefix == "vld1") {
        Result.push_back("{{ldr|vldr|vmov}}");
        return;
      }
    }
  }

  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
  // sometimes disassembled as vtrn.32. We use a regex to handle both
  // cases.
  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
    return;
  }

  // Currently on most ARM processors, we do not use vmla/vmls for
  // quad floating point operations. Instead we output vmul + vadd. So
  // check if we have one of those instructions and just output a
  // check for vmul.
  if (OutTypeCode == "f32") {
    if (Prefix == "vmls") {
      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
      Result.push_back("vsub." + OutTypeCode);
      return;
    } else if (Prefix == "vmla") {
      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
      Result.push_back("vadd." + OutTypeCode);
      return;
    }
  }

  // If we have vcvt, get the input type from the instruction name
  // (which should be of the form instname_inputtype) and append it
  // before the output type.
  if (Prefix == "vcvt") {
    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
    Prefix += "." + inTypeCode;
  }

  // Append output type code to get our final mangled instruction.
  Prefix += "." + OutTypeCode;

  Result.push_back(Prefix + " " + RegisterSuffix);
}

/// UseMacro - Examine the prototype string to determine if the intrinsic
/// should be defined as a preprocessor macro instead of an inline function.
static bool UseMacro(const std::string &proto, StringRef typestr) {
  // If this builtin takes an immediate argument, we need to #define it rather
  // than use a standard declaration, so that SemaChecking can range check
  // the immediate passed by the user.
  if (proto.find('i') != std::string::npos)
    return true;

  // Pointer arguments need to use macros to avoid hiding aligned attributes
  // from the pointer type.
  if (proto.find('p') != std::string::npos ||
      proto.find('c') != std::string::npos)
    return true;

  // It is not permitted to pass or return an __fp16 by value, so intrinsics
  // taking a scalar float16_t must be implemented as macros.
  if (typestr.find('h') != std::string::npos &&
      proto.find('s') != std::string::npos)
    return true;

  return false;
}

/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
/// defined as a macro should be accessed directly instead of being first
/// assigned to a local temporary.
static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
  // True for constant ints (i), pointers (p) and const pointers (c).
  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
}

// Generate the string "(argtype a, argtype b, ...)"
static std::string GenArgs(const std::string &proto, StringRef typestr,
                           const std::string &name) {
  bool define = UseMacro(proto, typestr);
  char arg = 'a';

  std::string s;
  s += "(";

  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    if (define) {
      // Some macro arguments are used directly instead of being assigned
      // to local temporaries; prepend an underscore prefix to make their
      // names consistent with the local temporaries.
      if (MacroArgUsedDirectly(proto, i))
        s += "__";
    } else {
      s += TypeString(proto[i], typestr) + " __";
    }
    s.push_back(arg);
    if ((i + 1) < e)
      s += ", ";
  }

  s += ")";
  return s;
}

// Macro arguments are not type-checked like inline function arguments, so
// assign them to local temporaries to get the right type checking.
static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
                                  const std::string &name ) {
  char arg = 'a';
  std::string s;
  bool generatedLocal = false;

  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    // Do not create a temporary for an immediate argument.
    // That would defeat the whole point of using a macro!
    if (MacroArgUsedDirectly(proto, i))
      continue;
    generatedLocal = true;
    s += TypeString(proto[i], typestr) + " __";
    s.push_back(arg);
    s += " = (";
    s.push_back(arg);
    s += "); ";
  }

  if (generatedLocal)
    s += "\\\n  ";
  return s;
}

// Use the vmovl builtin to sign-extend or zero-extend a vector.
static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
  std::string s, high;
  high = h ? "_high" : "";
  s = MangleName("vmovl" + high, typestr, ClassS);
  s += "(" + a + ")";
  return s;
}

// Get the high 64-bit part of a vector
static std::string GetHigh(const std::string &a, StringRef typestr) {
  std::string s;
  s = MangleName("vget_high", typestr, ClassS);
  s += "(" + a + ")";
  return s;
}

// Gen operation with two operands and get high 64-bit for both of two operands.
static std::string Gen2OpWith2High(StringRef typestr,
                                   const std::string &op,
                                   const std::string &a,
                                   const std::string &b) {
  std::string s;
  std::string Op1 = GetHigh(a, typestr);
  std::string Op2 = GetHigh(b, typestr);
  s = MangleName(op, typestr, ClassS);
  s += "(" + Op1 + ", " + Op2 + ");";
  return s;
}

// Gen operation with three operands and get high 64-bit of the latter 
// two operands.
static std::string Gen3OpWith2High(StringRef typestr,
                                   const std::string &op,
                                   const std::string &a,
                                   const std::string &b,
                                   const std::string &c) {
  std::string s;
  std::string Op1 = GetHigh(b, typestr);
  std::string Op2 = GetHigh(c, typestr);
  s = MangleName(op, typestr, ClassS);
  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
  return s;
}

// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
static std::string GenCombine(std::string typestr,
                              const std::string &a,
                              const std::string &b) {
  std::string s;
  s = MangleName("vcombine", typestr, ClassS);
  s += "(" + a + ", " + b + ")";
  return s;
}

static std::string Duplicate(unsigned nElts, StringRef typestr,
                             const std::string &a) {
  std::string s;

  s = "(" + TypeString('d', typestr) + "){ ";
  for (unsigned i = 0; i != nElts; ++i) {
    s += a;
    if ((i + 1) < nElts)
      s += ", ";
  }
  s += " }";

  return s;
}

static std::string SplatLane(unsigned nElts, const std::string &vec,
                             const std::string &lane) {
  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
  for (unsigned i = 0; i < nElts; ++i)
    s += ", " + lane;
  s += ")";
  return s;
}

static std::string RemoveHigh(const std::string &name) {
  std::string s = name;
  std::size_t found = s.find("_high_");
  if (found == std::string::npos)
    PrintFatalError("name should contain \"_high_\" for high intrinsics");
  s.replace(found, 5, "");
  return s;
}

static unsigned GetNumElements(StringRef typestr, bool &quad) {
  quad = false;
  bool dummy = false;
  char type = ClassifyType(typestr, quad, dummy, dummy);
  unsigned nElts = 0;
  switch (type) {
  case 'c': nElts = 8; break;
  case 's': nElts = 4; break;
  case 'i': nElts = 2; break;
  case 'l': nElts = 1; break;
  case 'k': nElts = 1; break;
  case 'h': nElts = 4; break;
  case 'f': nElts = 2; break;
  case 'd':
    nElts = 1;
    break;
  default:
    PrintFatalError("unhandled type!");
  }
  if (quad) nElts <<= 1;
  return nElts;
}

// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
//
// Note that some intrinsic definitions around 'lane' are being implemented
// with macros, because they all contain constant integer argument, and we
// statically check the range of the lane index to meet the semantic
// requirement of different intrinsics.
//
// For the intrinsics implemented with macro, if they contain another intrinsic
// implemented with maco, we have to avoid using the same argument names for
// the nested instrinsics. For example, macro vfms_lane is being implemented
// with another macor vfma_lane, so we rename all arguments for vfms_lane by
// adding a suffix '1'.

static std::string GenOpString(const std::string &name, OpKind op,
                               const std::string &proto, StringRef typestr) {
  bool quad;
  unsigned nElts = GetNumElements(typestr, quad);
  bool define = UseMacro(proto, typestr);

  std::string ts = TypeString(proto[0], typestr);
  std::string s;
  if (!define) {
    s = "return ";
  }

  switch(op) {
  case OpAdd:
    s += "__a + __b;";
    break;
  case OpAddl:
    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
    break;
  case OpAddlHi:
    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpAddw:
    s += "__a + " + Extend(typestr, "__b") + ";";
    break;
  case OpAddwHi:
    s += "__a + " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpSub:
    s += "__a - __b;";
    break;
  case OpSubl:
    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
    break;
  case OpSublHi:
    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpSubw:
    s += "__a - " + Extend(typestr, "__b") + ";";
    break;
  case OpSubwHi:
    s += "__a - " + Extend(typestr, "__b", 1) + ";";
    break;
  case OpMulN:
    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
    break;
  case OpMulLane:
    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
    break;
  case OpMulXLane:
    s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpMul:
    s += "__a * __b;";
    break;
  case OpFMlaN:
    s += MangleName("vfma", typestr, ClassS);
    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
    break;
  case OpFMlsN:
    s += MangleName("vfms", typestr, ClassS);
    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
    break;
  case OpMullLane:
    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpMullHiLane:
    s += MangleName("vmull", typestr, ClassS) + "(" +
      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpMlaN:
    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlaLane:
    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMla:
    s += "__a + (__b * __c);";
    break;
  case OpMlalN:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlalLane:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlalHiLane:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlal:
    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    break;
  case OpMullHi:
    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
    break;
  case OpMullHiP64: {
    std::string Op1 = GetHigh("__a", typestr);
    std::string Op2 = GetHigh("__b", typestr);
    s += MangleName("vmull", typestr, ClassS);
    s += "((poly64_t)" + Op1 + ", (poly64_t)" + Op2 + ");";
    break;
  }
  case OpMullHiN:
    s += MangleName("vmull_n", typestr, ClassS);
    s += "(" + GetHigh("__a", typestr) + ", __b);";
    return s;
  case OpMlalHi:
    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
    break;
  case OpMlalHiN:
    s += MangleName("vmlal_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    return s;
  case OpMlsN:
    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlsLane:
    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpFMSLane:
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
    s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
    break;
  case OpFMSLaneQ:
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
    s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
    break;
  case OpMls:
    s += "__a - (__b * __c);";
    break;
  case OpMlslN:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      Duplicate(nElts, typestr, "__c") + ");";
    break;
  case OpMlslLane:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlslHiLane:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpMlsl:
    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    break;
  case OpMlslHi:
    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
    break;
  case OpMlslHiN:
    s += MangleName("vmlsl_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    break;
  case OpQDMullLane:
    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpQDMullHiLane:
    s += MangleName("vqdmull", typestr, ClassS) + "(" +
      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpQDMlalLane:
    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMlalHiLane:
    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMlslLane:
    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
      SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMlslHiLane:
    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
    break;
  case OpQDMulhLane:
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpQRDMulhLane:
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
      SplatLane(nElts, "__b", "__c") + ");";
    break;
  case OpEq:
    s += "(" + ts + ")(__a == __b);";
    break;
  case OpGe:
    s += "(" + ts + ")(__a >= __b);";
    break;
  case OpLe:
    s += "(" + ts + ")(__a <= __b);";
    break;
  case OpGt:
    s += "(" + ts + ")(__a > __b);";
    break;
  case OpLt:
    s += "(" + ts + ")(__a < __b);";
    break;
  case OpNeg:
    s += " -__a;";
    break;
  case OpNot:
    s += " ~__a;";
    break;
  case OpAnd:
    s += "__a & __b;";
    break;
  case OpOr:
    s += "__a | __b;";
    break;
  case OpXor:
    s += "__a ^ __b;";
    break;
  case OpAndNot:
    s += "__a & ~__b;";
    break;
  case OpOrNot:
    s += "__a | ~__b;";
    break;
  case OpCast:
    s += "(" + ts + ")__a;";
    break;
  case OpConcat:
    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
    s += ", (int64x1_t)__b, 0, 1);";
    break;
  case OpHi:
    // nElts is for the result vector, so the source is twice that number.
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = nElts; i < nElts * 2; ++i)
      s += ", " + utostr(i);
    s+= ");";
    break;
  case OpLo:
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = 0; i < nElts; ++i)
      s += ", " + utostr(i);
    s+= ");";
    break;
  case OpDup:
    s += Duplicate(nElts, typestr, "__a") + ";";
    break;
  case OpDupLane:
    s += SplatLane(nElts, "__a", "__b") + ";";
    break;
  case OpSelect:
    // ((0 & 1) | (~0 & 2))
    s += "(" + ts + ")";
    ts = TypeString(proto[1], typestr);
    s += "((__a & (" + ts + ")__b) | ";
    s += "(~__a & (" + ts + ")__c));";
    break;
  case OpRev16:
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = 2; i <= nElts; i += 2)
      for (unsigned j = 0; j != 2; ++j)
        s += ", " + utostr(i - j - 1);
    s += ");";
    break;
  case OpRev32: {
    unsigned WordElts = nElts >> (1 + (int)quad);
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = WordElts; i <= nElts; i += WordElts)
      for (unsigned j = 0; j != WordElts; ++j)
        s += ", " + utostr(i - j - 1);
    s += ");";
    break;
  }
  case OpRev64: {
    unsigned DblWordElts = nElts >> (int)quad;
    s += "__builtin_shufflevector(__a, __a";
    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
      for (unsigned j = 0; j != DblWordElts; ++j)
        s += ", " + utostr(i - j - 1);
    s += ");";
    break;
  }
  case OpXtnHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpSqxtunHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpQxtnHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpFcvtnHi: {
    std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName(FName, typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpFcvtlHi: {
    std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
    s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
        ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
    break;
  }
  case OpFcvtxnHi: {
    s = TypeString(proto[1], typestr) + " __a1 = " +
        MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
        "return __builtin_shufflevector(__a, __a1";
    for (unsigned i = 0; i < nElts * 4; ++i)
      s += ", " + utostr(i);
    s += ");";
    break;
  }
  case OpUzp1:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < nElts; i++)
      s += ", " + utostr(2*i);
    s += ");";
    break;
  case OpUzp2:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < nElts; i++)
      s += ", " + utostr(2*i+1);
    s += ");";
    break;
  case OpZip1:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < (nElts/2); i++)
       s += ", " + utostr(i) + ", " + utostr(i+nElts);
    s += ");";
    break;
  case OpZip2:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = nElts/2; i < nElts; i++)
       s += ", " + utostr(i) + ", " + utostr(i+nElts);
    s += ");";
    break;
  case OpTrn1:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < (nElts/2); i++)
       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
    s += ");";
    break;
  case OpTrn2:
    s += "__builtin_shufflevector(__a, __b";
    for (unsigned i = 0; i < (nElts/2); i++)
       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
    s += ");";
    break;
  case OpAbdl: {
    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
    if (typestr[0] != 'U') {
      // vabd results are always unsigned and must be zero-extended.
      std::string utype = "U" + typestr.str();
      s += "(" + TypeString(proto[0], typestr) + ")";
      abd = "(" + TypeString('d', utype) + ")" + abd;
      s += Extend(utype, abd) + ";";
    } else {
      s += Extend(typestr, abd) + ";";
    }
    break;
  }
  case OpAbdlHi:
    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
    break;
  case OpAddhnHi: {
    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
    s += ";";
    break;
  }
  case OpRAddhnHi: {
    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
    s += ";";
    break;
  }
  case OpSubhnHi: {
    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
    s += ";";
    break;
  }
  case OpRSubhnHi: {
    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
    s += ";";
    break;
  }
  case OpAba:
    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
    break;
  case OpAbal:
    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
    break;
  case OpAbalHi:
    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
    break;
  case OpQDMullHi:
    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
    break;
  case OpQDMullHiN:
    s += MangleName("vqdmull_n", typestr, ClassS);
    s += "(" + GetHigh("__a", typestr) + ", __b);";
    return s;
  case OpQDMlalHi:
    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
    break;
  case OpQDMlalHiN:
    s += MangleName("vqdmlal_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    return s;
  case OpQDMlslHi:
    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
    break;
  case OpQDMlslHiN:
    s += MangleName("vqdmlsl_n", typestr, ClassS);
    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
    return s;
  case OpDiv:
    s += "__a / __b;";
    break;
  case OpMovlHi: {
    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
    s += "(__a1, 0);";
    break;
  }
  case OpLongHi: {
    // Another local variable __a1 is needed for calling a Macro,
    // or using __a will have naming conflict when Macro expanding.
    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
         "(__a1, __b);";
    break;
  }
  case OpNarrowHi: {
    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
    break;
  }
  case OpCopyLane: {
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
    s += TypeString('s', typestr) + " __c2 = " +
         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d); \\\n  " +
         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b);";
    break;
  }
  case OpCopyQLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
         "(__c1, __d); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b);";
    break;
  }
  case OpCopyLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
         "(__c1, __d); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b);";
    break;
  }
  case OpScalarMulLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
      "(__b, __c);\\\n  __a * __d1;";
    break;
  }
  case OpScalarMulLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
      "(__b1, __c);\\\n  __a1 * __d1;";
    break;
  }
  case OpScalarMulXLane: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
      "(__b1, __c);\\\n  vmulx" + type + "_" +
      typeCode +  "(__a1, __d1);";
    break;
  }
  case OpScalarMulXLaneQ: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
      typeCode + "(__b1, __c);\\\n  vmulx" + type +
      "_" + typeCode +  "(__a1, __d1);";
    break;
  }

  case OpScalarVMulXLane: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
      typeCode + "(__a1, 0);\\\n" +
      "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
      typeCode + "(__b1, __c);\\\n" +
      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
      typeCode + "(__d1, __e1);\\\n" +
      "  " + TypeString('d', typestr) + " __g1;\\\n" +
      "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
    break;
  }

  case OpScalarVMulXLaneQ: {
    bool dummy = false;
    char type = ClassifyType(typestr, dummy, dummy, dummy);
    if (type == 'f') type = 's';
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
      typeCode + "(__a1, 0);\\\n" +
      "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
      typeCode + "(__b1, __c);\\\n" +
      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
      typeCode + "(__d1, __e1);\\\n" +
      "  " + TypeString('d', typestr) + " __g1;\\\n" +
      "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
    break;
  }
  case OpScalarQDMullLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
    "vget_lane_" + typeCode + "(__b1, __c));";
    break;
  }
  case OpScalarQDMullLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
    "vgetq_lane_" + typeCode + "(__b1, __c));";
    break;
  }
  case OpScalarQDMulHiLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
    "vget_lane_" + typeCode + "(__b1, __c));";
    break;
  }
  case OpScalarQDMulHiLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
    "vgetq_lane_" + typeCode + "(__b1, __c));";
    break;
  }
  case OpScalarQRDMulHiLane: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
    "vget_lane_" + typeCode + "(__b1, __c));";
    break;
  }
  case OpScalarQRDMulHiLaneQ: {
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
    "vgetq_lane_" + typeCode + "(__b1, __c));";
    break;
  }
  case OpScalarGetLane:{
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";

    std::string intType = quad ? "int16x8_t" : "int16x4_t";
    std::string intName = quad ? "vgetq" : "vget";

    // reinterpret float16 vector as int16 vector
    s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";

    s += "  int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";

    // reinterpret int16 vector as float16 vector
    s += "  float16_t __a4 = *(float16_t *)(&__a3);\\\n";
    s += "  __a4;";
    break;
  }
  case OpScalarSetLane:{
    std::string typeCode = "";
    InstructionTypeCode(typestr, ClassS, quad, typeCode);
    s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n  ";

    std::string origType = quad ? "float16x8_t" : "float16x4_t";
    std::string intType = quad ? "int16x8_t" : "int16x4_t";
    std::string intName = quad ? "vsetq" : "vset";

    // reinterpret float16_t as int16_t
    s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
    // reinterpret float16 vector as int16 vector
    s += "  " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";

    s += "  " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";

    // reinterpret int16 vector as float16 vector
    s += "  " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
    s += "__b4;";
    break;
  }

  default:
    PrintFatalError("unknown OpKind!");
  }
  return s;
}

static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
  unsigned mod = proto[0];

  if (mod == 'v' || mod == 'f' || mod == 'F')
    mod = proto[1];

  bool quad = false;
  bool poly = false;
  bool usgn = false;
  bool scal = false;
  bool cnst = false;
  bool pntr = false;

  // Base type to get the type string for.
  char type = ClassifyType(typestr, quad, poly, usgn);

  // Based on the modifying character, change the type and width if necessary.
  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);

  NeonTypeFlags::EltType ET;
  switch (type) {
    case 'c':
      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
      break;
    case 's':
      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
      break;
    case 'i':
      ET = NeonTypeFlags::Int32;
      break;
    case 'l':
      ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
      break;
    case 'k':
      ET = NeonTypeFlags::Poly128;
      break;
    case 'h':
      ET = NeonTypeFlags::Float16;
      break;
    case 'f':
      ET = NeonTypeFlags::Float32;
      break;
    case 'd':
      ET = NeonTypeFlags::Float64;
      break;
    default:
      PrintFatalError("unhandled type!");
  }
  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
  return Flags.getFlags();
}

// We don't check 'a' in this function, because for builtin function the
// argument matching to 'a' uses a vector type splatted from a scalar type.
static bool ProtoHasScalar(const std::string proto)
{
  return (proto.find('s') != std::string::npos
          || proto.find('z') != std::string::npos
          || proto.find('r') != std::string::npos
          || proto.find('b') != std::string::npos
          || proto.find('$') != std::string::npos
          || proto.find('y') != std::string::npos
          || proto.find('o') != std::string::npos);
}

// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
static std::string GenBuiltin(const std::string &name, const std::string &proto,
                              StringRef typestr, ClassKind ck) {
  std::string s;

  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
  // sret-like argument.
  bool sret = IsMultiVecProto(proto[0]);

  bool define = UseMacro(proto, typestr);

  // Check if the prototype has a scalar operand with the type of the vector
  // elements.  If not, bitcasting the args will take care of arg checking.
  // The actual signedness etc. will be taken care of with special enums.
  if (!ProtoHasScalar(proto))
    ck = ClassB;

  if (proto[0] != 'v') {
    std::string ts = TypeString(proto[0], typestr);

    if (define) {
      if (sret)
        s += ts + " r; ";
      else
        s += "(" + ts + ")";
    } else if (sret) {
      s += ts + " r; ";
    } else {
      s += "return (" + ts + ")";
    }
  }

  bool splat = proto.find('a') != std::string::npos;

  s += "__builtin_neon_";
  if (splat) {
    // Call the non-splat builtin: chop off the "_n" suffix from the name.
    std::string vname(name, 0, name.size()-2);
    s += MangleName(vname, typestr, ck);
  } else {
    s += MangleName(name, typestr, ck);
  }
  s += "(";

  // Pass the address of the return variable as the first argument to sret-like
  // builtins.
  if (sret)
    s += "&r, ";

  char arg = 'a';
  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    std::string args = std::string(&arg, 1);

    // Use the local temporaries instead of the macro arguments.
    args = "__" + args;

    bool argQuad = false;
    bool argPoly = false;
    bool argUsgn = false;
    bool argScalar = false;
    bool dummy = false;
    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
                      dummy, dummy);

    // Handle multiple-vector values specially, emitting each subvector as an
    // argument to the __builtin.
    unsigned NumOfVec = 0;
    if (proto[i] >= '2' && proto[i] <= '4') {
      NumOfVec = proto[i] - '0';
    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
      NumOfVec = proto[i] - 'A' + 1;
    }

    if (NumOfVec > 0) {
      // Check if an explicit cast is needed.
      if (argType != 'c' || argPoly || argUsgn)
        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;

      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
        s += args + ".val[" + utostr(vi) + "]";
        if ((vi + 1) < ve)
          s += ", ";
      }
      if ((i + 1) < e)
        s += ", ";

      continue;
    }

    if (splat && (i + 1) == e)
      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);

    // Check if an explicit cast is needed.
    if ((splat || !argScalar) &&
        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
      std::string argTypeStr = "c";
      if (ck != ClassB)
        argTypeStr = argType;
      if (argQuad)
        argTypeStr = "Q" + argTypeStr;
      args = "(" + TypeString('d', argTypeStr) + ")" + args;
    }

    s += args;
    if ((i + 1) < e)
      s += ", ";
  }

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (ck == ClassB)
    s += ", " + utostr(GetNeonEnum(proto, typestr));

  s += ");";

  if (proto[0] != 'v' && sret) {
    if (define)
      s += " r;";
    else
      s += " return r;";
  }
  return s;
}

static std::string GenBuiltinDef(const std::string &name,
                                 const std::string &proto,
                                 StringRef typestr, ClassKind ck) {
  std::string s("BUILTIN(__builtin_neon_");

  // If all types are the same size, bitcasting the args will take care
  // of arg checking.  The actual signedness etc. will be taken care of with
  // special enums.
  if (!ProtoHasScalar(proto))
    ck = ClassB;

  s += MangleName(name, typestr, ck);
  s += ", \"";

  for (unsigned i = 0, e = proto.size(); i != e; ++i)
    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (ck == ClassB)
    s += "i";

  s += "\", \"n\")";
  return s;
}

static std::string GenIntrinsic(const std::string &name,
                                const std::string &proto,
                                StringRef outTypeStr, StringRef inTypeStr,
                                OpKind kind, ClassKind classKind) {
  assert(!proto.empty() && "");
  bool define = UseMacro(proto, outTypeStr) && kind != OpUnavailable;
  std::string s;

  // static always inline + return type
  if (define)
    s += "#define ";
  else
    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";

  // Function name with type suffix
  std::string mangledName = MangleName(name, outTypeStr, ClassS);
  if (outTypeStr != inTypeStr) {
    // If the input type is different (e.g., for vreinterpret), append a suffix
    // for the input type.  String off a "Q" (quad) prefix so that MangleName
    // does not insert another "q" in the name.
    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
  }
  s += mangledName;

  // Function arguments
  s += GenArgs(proto, inTypeStr, name);

  // Definition.
  if (define) {
    s += " __extension__ ({ \\\n  ";
    s += GenMacroLocals(proto, inTypeStr, name);
  } else if (kind == OpUnavailable) {
    s += " __attribute__((unavailable));\n";
    return s;
  } else
    s += " {\n  ";

  if (kind != OpNone)
    s += GenOpString(name, kind, proto, outTypeStr);
  else
    s += GenBuiltin(name, proto, outTypeStr, classKind);
  if (define)
    s += " })";
  else
    s += " }";
  s += "\n";
  return s;
}

/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::run(raw_ostream &OS) {
  OS <<
    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
    "---===\n"
    " *\n"
    " * Permission is hereby granted, free of charge, to any person obtaining "
    "a copy\n"
    " * of this software and associated documentation files (the \"Software\"),"
    " to deal\n"
    " * in the Software without restriction, including without limitation the "
    "rights\n"
    " * to use, copy, modify, merge, publish, distribute, sublicense, "
    "and/or sell\n"
    " * copies of the Software, and to permit persons to whom the Software is\n"
    " * furnished to do so, subject to the following conditions:\n"
    " *\n"
    " * The above copyright notice and this permission notice shall be "
    "included in\n"
    " * all copies or substantial portions of the Software.\n"
    " *\n"
    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
    "EXPRESS OR\n"
    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
    "MERCHANTABILITY,\n"
    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
    "SHALL THE\n"
    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
    "OTHER\n"
    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
    "ARISING FROM,\n"
    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
    "DEALINGS IN\n"
    " * THE SOFTWARE.\n"
    " *\n"
    " *===--------------------------------------------------------------------"
    "---===\n"
    " */\n\n";

  OS << "#ifndef __ARM_NEON_H\n";
  OS << "#define __ARM_NEON_H\n\n";

  OS << "#if !defined(__ARM_NEON)\n";
  OS << "#error \"NEON support not enabled\"\n";
  OS << "#endif\n\n";

  OS << "#include <stdint.h>\n\n";

  // Emit NEON-specific scalar typedefs.
  OS << "typedef float float32_t;\n";
  OS << "typedef __fp16 float16_t;\n";

  OS << "#ifdef __aarch64__\n";
  OS << "typedef double float64_t;\n";
  OS << "#endif\n\n";

  // For now, signedness of polynomial types depends on target
  OS << "#ifdef __aarch64__\n";
  OS << "typedef uint8_t poly8_t;\n";
  OS << "typedef uint16_t poly16_t;\n";
  OS << "typedef uint64_t poly64_t;\n";
  OS << "typedef __uint128_t poly128_t;\n";
  OS << "#else\n";
  OS << "typedef int8_t poly8_t;\n";
  OS << "typedef int16_t poly16_t;\n";
  OS << "#endif\n";

  // Emit Neon vector typedefs.
  std::string TypedefTypes(
      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
  SmallVector<StringRef, 24> TDTypeVec;
  ParseTypes(0, TypedefTypes, TDTypeVec);

  // Emit vector typedefs.
  bool isA64 = false;
  bool preinsert;
  bool postinsert;
  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
    bool dummy, quad = false, poly = false;
    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
    preinsert = false;
    postinsert = false;

    if (type == 'd' || (type == 'l' && poly)) {
      preinsert = isA64? false: true;
      isA64 = true;
    } else {
      postinsert = isA64? true: false;
      isA64 = false;
    }
    if (postinsert)
      OS << "#endif\n";
    if (preinsert)
      OS << "#ifdef __aarch64__\n";

    if (poly)
      OS << "typedef __attribute__((neon_polyvector_type(";
    else
      OS << "typedef __attribute__((neon_vector_type(";

    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
    OS << utostr(nElts) << "))) ";
    if (nElts < 10)
      OS << " ";

    OS << TypeString('s', TDTypeVec[i]);
    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";

  }
  postinsert = isA64? true: false;
  if (postinsert)
    OS << "#endif\n";
  OS << "\n";

  // Emit struct typedefs.
  isA64 = false;
  for (unsigned vi = 2; vi != 5; ++vi) {
    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
      bool dummy, quad = false, poly = false;
      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
      preinsert = false;
      postinsert = false;

      if (type == 'd' || (type == 'l' && poly)) {
        preinsert = isA64? false: true;
        isA64 = true;
      } else {
        postinsert = isA64? true: false;
        isA64 = false;
      }
      if (postinsert)
        OS << "#endif\n";
      if (preinsert)
        OS << "#ifdef __aarch64__\n";

      std::string ts = TypeString('d', TDTypeVec[i]);
      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
      OS << "typedef struct " << vs << " {\n";
      OS << "  " << ts << " val";
      OS << "[" << utostr(vi) << "]";
      OS << ";\n} ";
      OS << vs << ";\n";
      OS << "\n";
    }
  }
  postinsert = isA64? true: false;
  if (postinsert)
    OS << "#endif\n";
  OS << "\n";

  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";

  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");

  StringMap<ClassKind> EmittedMap;
  std::string CurrentGuard = "";
  bool InGuard = false;

  // Some intrinsics are used to express others. These need to be emitted near
  // the beginning so that the declarations are present when needed. This is
  // rather an ugly, arbitrary list, but probably simpler than actually tracking
  // dependency info.
  static const char *EarlyDefsArr[] =
      { "VFMA",      "VQMOVN",    "VQMOVUN",  "VABD",    "VMOVL",
        "VABDL",     "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH",
        "VMULL",     "VMLAL_N",   "VMLSL_N",  "VMULL_N", "VMULL_P64",
        "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" };
  ArrayRef<const char *> EarlyDefs(EarlyDefsArr);

  for (unsigned i = 0; i < EarlyDefs.size(); ++i) {
    Record *R = Records.getDef(EarlyDefs[i]);
    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
  }

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];
    if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) !=
        EarlyDefs.end())
      continue;

    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
  }

  if (InGuard)
    OS << "#endif\n\n";

  OS << "#undef __ai\n\n";
  OS << "#endif /* __ARM_NEON_H */\n";
}

void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R,
                                       std::string &CurrentGuard, bool &InGuard,
                                       StringMap<ClassKind> &EmittedMap) {

  std::string NewGuard = R->getValueAsString("ArchGuard");
  if (NewGuard != CurrentGuard) {
    if (InGuard)
      OS << "#endif\n\n";
    if (NewGuard.size())
      OS << "#if " << NewGuard << '\n';

    CurrentGuard = NewGuard;
    InGuard = NewGuard.size() != 0;
  }

  emitIntrinsic(OS, R, EmittedMap);
}

/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
/// intrinsics specified by record R checking for intrinsic uniqueness.
void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
                                StringMap<ClassKind> &EmittedMap) {
  std::string name = R->getValueAsString("Name");
  std::string Proto = R->getValueAsString("Prototype");
  std::string Types = R->getValueAsString("Types");

  SmallVector<StringRef, 16> TypeVec;
  ParseTypes(R, Types, TypeVec);

  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];

  ClassKind classKind = ClassNone;
  if (R->getSuperClasses().size() >= 2)
    classKind = ClassMap[R->getSuperClasses()[1]];
  if (classKind == ClassNone && kind == OpNone)
    PrintFatalError(R->getLoc(), "Builtin has no class kind");

  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
    if (kind == OpReinterpret) {
      bool outQuad = false;
      bool dummy = false;
      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
      for (unsigned srcti = 0, srcte = TypeVec.size();
           srcti != srcte; ++srcti) {
        bool inQuad = false;
        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
        if (srcti == ti || inQuad != outQuad)
          continue;
        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
                                     OpCast, ClassS);
        if (EmittedMap.count(s))
          continue;
        EmittedMap[s] = ClassS;
        OS << s;
      }
    } else {
      std::string s =
          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
      if (EmittedMap.count(s)) {
        errs() << "warning: duplicate definition: " << name
               << " (type: " << TypeString('d', TypeVec[ti]) << ")\n";
        continue;
      }
      EmittedMap[s] = classKind;
      OS << s;
    }
  }
  OS << "\n";
}

static unsigned RangeFromType(const char mod, StringRef typestr) {
  // base type to get the type string for.
  bool quad = false, dummy = false;
  char type = ClassifyType(typestr, quad, dummy, dummy);
  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);

  switch (type) {
    case 'c':
      return (8 << (int)quad) - 1;
    case 'h':
    case 's':
      return (4 << (int)quad) - 1;
    case 'f':
    case 'i':
      return (2 << (int)quad) - 1;
    case 'd':
    case 'l':
      return (1 << (int)quad) - 1;
    case 'k':
      return 0;
    default:
      PrintFatalError("unhandled type!");
  }
}

static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
  // base type to get the type string for.
  bool dummy = false;
  char type = ClassifyType(typestr, dummy, dummy, dummy);
  type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);

  switch (type) {
    case 'c':
      return 7;
    case 'h':
    case 's':
      return 15;
    case 'f':
    case 'i':
      return 31;
    case 'd':
    case 'l':
      return 63;
    case 'k':
      return 127;
    default:
      PrintFatalError("unhandled type!");
  }
}

/// Generate the ARM and AArch64 intrinsic range checking code for
/// shift/lane immediates, checking for unique declarations.
void
NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  StringMap<OpKind> EmittedMap;

  // Generate the intrinsic range checking code for shift/lane immediates.
  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];

    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
    if (k != OpNone)
      continue;

    std::string name = R->getValueAsString("Name");
    std::string Proto = R->getValueAsString("Prototype");
    std::string Types = R->getValueAsString("Types");
    std::string Rename = name + "@@" + Proto;

    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Proto.find('a') != std::string::npos)
      continue;

    // Functions which do not have an immediate do not need to have range
    // checking code emitted.
    size_t immPos = Proto.find('i');
    if (immPos == std::string::npos)
      continue;

    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    if (R->getSuperClasses().size() < 2)
      PrintFatalError(R->getLoc(), "Builtin has no class kind");

    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
    if (!ProtoHasScalar(Proto))
      ck = ClassB;

    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      std::string namestr, shiftstr, rangestr;

      if (R->getValueAsBit("isVCVT_N")) {
        // VCVT between floating- and fixed-point values takes an immediate
        // in the range [1, 32] for f32, or [1, 64] for f64.
        ck = ClassB;
        if (name.find("32") != std::string::npos)
          rangestr = "l = 1; u = 31"; // upper bound = l + u
        else if (name.find("64") != std::string::npos)
          rangestr = "l = 1; u = 63";
        else
          PrintFatalError(R->getLoc(),
              "Fixed point convert name should contains \"32\" or \"64\"");

      } else if (R->getValueAsBit("isScalarShift")) {
        // Right shifts have an 'r' in the name, left shifts do not.  Convert
        // instructions have the same bounds and right shifts.
        if (name.find('r') != std::string::npos ||
            name.find("cvt") != std::string::npos)
          rangestr = "l = 1; ";

        unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
        // Narrow shift has half the upper bound
        if (R->getValueAsBit("isScalarNarrowShift"))
          upBound /= 2;

        rangestr += "u = " + utostr(upBound);
      } else if (R->getValueAsBit("isShift")) {
        // Builtins which are overloaded by type will need to have their upper
        // bound computed at Sema time based on the type constant.
        shiftstr = ", true";

        // Right shifts have an 'r' in the name, left shifts do not.
        if (name.find('r') != std::string::npos)
          rangestr = "l = 1; ";

        rangestr += "u = RFT(TV" + shiftstr + ")";
      } else if (ck == ClassB) {
        // ClassB intrinsics have a type (and hence lane number) that is only
        // known at runtime.
        assert(immPos > 0 && "unexpected immediate operand");
        if (R->getValueAsBit("isLaneQ"))
          rangestr = "u = RFT(TV, false, true)";
        else
          rangestr = "u = RFT(TV, false, false)";
      } else {
        // The immediate generally refers to a lane in the preceding argument.
        assert(immPos > 0 && "unexpected immediate operand");
        rangestr =
            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
      }
      // Make sure cases appear only once by uniquing them in a string map.
      namestr = MangleName(name, TypeVec[ti], ck);
      if (EmittedMap.count(namestr))
        continue;
      EmittedMap[namestr] = OpNone;

      // Calculate the index of the immediate that should be range checked.
      unsigned immidx = 0;

      // Builtins that return a struct of multiple vectors have an extra
      // leading arg for the struct return.
      if (IsMultiVecProto(Proto[0]))
        ++immidx;

      // Add one to the index for each argument until we reach the immediate
      // to be checked.  Structs of vectors are passed as multiple arguments.
      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
        switch (Proto[ii]) {
        default:
          immidx += 1;
          break;
        case '2':
        case 'B':
          immidx += 2;
          break;
        case '3':
        case 'C':
          immidx += 3;
          break;
        case '4':
        case 'D':
          immidx += 4;
          break;
        case 'i':
          ie = ii + 1;
          break;
        }
      }
      OS << "case NEON::BI__builtin_neon_";
      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
         << rangestr << "; break;\n";
    }
  }
  OS << "#endif\n\n";
}

struct OverloadInfo {
  uint64_t Mask;
  int PtrArgNum;
  bool HasConstPtr;
};
/// Generate the ARM and AArch64 overloaded type checking code for
/// SemaChecking.cpp, checking for unique builtin declarations.
void
NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");

  // Generate the overloaded type checking code for SemaChecking.cpp
  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";

  // We record each overload check line before emitting because subsequent Inst
  // definitions may extend the number of permitted types (i.e. augment the
  // Mask). Use std::map to avoid sorting the table by hash number.
  std::map<std::string, OverloadInfo> OverloadMap;
  typedef std::map<std::string, OverloadInfo>::iterator OverloadIterator;

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];
    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
    if (k != OpNone)
      continue;

    std::string Proto = R->getValueAsString("Prototype");
    std::string Types = R->getValueAsString("Types");
    std::string name = R->getValueAsString("Name");
    std::string Rename = name + "@@" + Proto;

    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Proto.find('a') != std::string::npos)
      continue;

    // Functions which have a scalar argument cannot be overloaded, no need to
    // check them if we are emitting the type checking code.
    if (ProtoHasScalar(Proto))
      continue;

    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    if (R->getSuperClasses().size() < 2)
      PrintFatalError(R->getLoc(), "Builtin has no class kind");

    int si = -1, qi = -1;
    uint64_t mask = 0, qmask = 0;
    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      // Generate the switch case(s) for this builtin for the type validation.
      bool quad = false, poly = false, usgn = false;
      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);

      if (quad) {
        qi = ti;
        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
      } else {
        si = ti;
        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
      }
    }

    // Check if the builtin function has a pointer or const pointer argument.
    int PtrArgNum = -1;
    bool HasConstPtr = false;
    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
      char ArgType = Proto[arg];
      if (ArgType == 'c') {
        HasConstPtr = true;
        PtrArgNum = arg - 1;
        break;
      }
      if (ArgType == 'p') {
        PtrArgNum = arg - 1;
        break;
      }
    }
    // For sret builtins, adjust the pointer argument index.
    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
      PtrArgNum += 1;

    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
    // and vst1_lane intrinsics.  Using a pointer to the vector element
    // type with one of those operations causes codegen to select an aligned
    // load/store instruction.  If you want an unaligned operation,
    // the pointer argument needs to have less alignment than element type,
    // so just accept any pointer type.
    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
      PtrArgNum = -1;
      HasConstPtr = false;
    }

    if (mask) {
      std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
          MangleName(name, TypeVec[si], ClassB), OverloadInfo()));
      OverloadInfo &Record = I.first->second;
      if (!I.second)
        assert(Record.PtrArgNum == PtrArgNum &&
               Record.HasConstPtr == HasConstPtr);
      Record.Mask |= mask;
      Record.PtrArgNum = PtrArgNum;
      Record.HasConstPtr = HasConstPtr;
    }
    if (qmask) {
      std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
          MangleName(name, TypeVec[qi], ClassB), OverloadInfo()));
      OverloadInfo &Record = I.first->second;
      if (!I.second)
        assert(Record.PtrArgNum == PtrArgNum &&
               Record.HasConstPtr == HasConstPtr);
      Record.Mask |= qmask;
      Record.PtrArgNum = PtrArgNum;
      Record.HasConstPtr = HasConstPtr;
    }
  }

  for (OverloadIterator I = OverloadMap.begin(), E = OverloadMap.end(); I != E;
       ++I) {
    OverloadInfo &BuiltinOverloads = I->second;
    OS << "case NEON::BI__builtin_neon_" << I->first << ": ";
    OS << "mask = " << "0x" << utohexstr(BuiltinOverloads.Mask) << "ULL";
    if (BuiltinOverloads.PtrArgNum >= 0)
      OS << "; PtrArgNum = " << BuiltinOverloads.PtrArgNum;
    if (BuiltinOverloads.HasConstPtr)
      OS << "; HasConstPtr = true";
    OS << "; break;\n";
  }

  OS << "#endif\n\n";
}

/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
/// declaration of builtins, checking for unique builtin declarations.
void NeonEmitter::genBuiltinsDef(raw_ostream &OS) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");

  // We want to emit the intrinsics in alphabetical order, so use the more
  // expensive std::map to gather them together first.
  std::map<std::string, OpKind> EmittedMap;

  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];
    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
    if (k != OpNone)
      continue;

    std::string Proto = R->getValueAsString("Prototype");
    std::string name = R->getValueAsString("Name");
    std::string Rename = name + "@@" + Proto;

    // Functions with 'a' (the splat code) in the type prototype should not get
    // their own builtin as they use the non-splat variant.
    if (Proto.find('a') != std::string::npos)
      continue;

    std::string Types = R->getValueAsString("Types");
    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    if (R->getSuperClasses().size() < 2)
      PrintFatalError(R->getLoc(), "Builtin has no class kind");

    ClassKind ck = ClassMap[R->getSuperClasses()[1]];

    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      // Generate the declaration for this builtin, ensuring
      // that each unique BUILTIN() macro appears only once in the output
      // stream.
      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
      if (EmittedMap.count(bd))
        continue;

      EmittedMap[bd] = OpNone;
    }
  }

  // Generate BuiltinsNEON.
  OS << "#ifdef GET_NEON_BUILTINS\n";

  for (std::map<std::string, OpKind>::iterator I = EmittedMap.begin(),
                                               E = EmittedMap.end();
       I != E; ++I)
    OS << I->first << "\n";

  OS << "#endif\n\n";
}

/// runHeader - Emit a file with sections defining:
/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
/// 2. the SemaChecking code for the type overload checking.
/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
void NeonEmitter::runHeader(raw_ostream &OS) {
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");

  // Generate shared BuiltinsXXX.def
  genBuiltinsDef(OS);

  // Generate ARM overloaded type checking code for SemaChecking.cpp
  genOverloadTypeCheckCode(OS);

  // Generate ARM range checking code for shift/lane immediates.
  genIntrinsicRangeCheckCode(OS);
}

/// GenTest - Write out a test for the intrinsic specified by the name and
/// type strings, including the embedded patterns for FileCheck to match.
static std::string GenTest(const std::string &name,
                           const std::string &proto,
                           StringRef outTypeStr, StringRef inTypeStr,
                           bool isShift, bool isHiddenLOp,
                           ClassKind ck, const std::string &InstName,
                           bool isA64,
                           std::string & testFuncProto) {
  assert(!proto.empty() && "");
  std::string s;

  // Function name with type suffix
  std::string mangledName = MangleName(name, outTypeStr, ClassS);
  if (outTypeStr != inTypeStr) {
    // If the input type is different (e.g., for vreinterpret), append a suffix
    // for the input type.  String off a "Q" (quad) prefix so that MangleName
    // does not insert another "q" in the name.
    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
  }

  // todo: GenerateChecksForIntrinsic does not generate CHECK
  // for aarch64 instructions yet
  std::vector<std::string> FileCheckPatterns;
  if (!isA64) {
    GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
                               isHiddenLOp, FileCheckPatterns);
    s+= "// CHECK_ARM: test_" + mangledName + "\n";
  }
  s += "// CHECK_AARCH64: test_" + mangledName + "\n";

  // Emit the FileCheck patterns.
  // If for any reason we do not want to emit a check, mangledInst
  // will be the empty string.
  if (FileCheckPatterns.size()) {
    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
                                                  e = FileCheckPatterns.end();
         i != e;
         ++i) {
      s += "// CHECK_ARM: " + *i + "\n";
    }
  }

  // Emit the start of the test function.

  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
  char arg = 'a';
  std::string comma;
  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    // Do not create arguments for values that must be immediate constants.
    if (proto[i] == 'i')
      continue;
    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
    testFuncProto.push_back(arg);
    comma = ", ";
  }
  testFuncProto += ")";

  s+= testFuncProto;
  s+= " {\n  ";

  if (proto[0] != 'v')
    s += "return ";
  s += mangledName + "(";
  arg = 'a';
  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    if (proto[i] == 'i') {
      // For immediate operands, test the maximum value.
      if (isShift)
        s += "1"; // FIXME
      else
        // The immediate generally refers to a lane in the preceding argument.
        s += utostr(RangeFromType(proto[i-1], inTypeStr));
    } else {
      s.push_back(arg);
    }
    if ((i + 1) < e)
      s += ", ";
  }
  s += ");\n}\n\n";
  return s;
}

/// Write out all intrinsic tests for the specified target, checking
/// for intrinsic test uniqueness.
void NeonEmitter::genTargetTest(raw_ostream &OS) {
  StringMap<OpKind> EmittedMap;
  std::string CurrentGuard = "";
  bool InGuard = false;

  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
    Record *R = RV[i];
    std::string name = R->getValueAsString("Name");
    std::string Proto = R->getValueAsString("Prototype");
    std::string Types = R->getValueAsString("Types");
    bool isShift = R->getValueAsBit("isShift");
    std::string InstName = R->getValueAsString("InstName");
    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");

    std::string NewGuard = R->getValueAsString("ArchGuard");
    if (NewGuard != CurrentGuard) {
      if (InGuard)
        OS << "#endif\n\n";
      if (NewGuard.size())
        OS << "#if " << NewGuard << '\n';

      CurrentGuard = NewGuard;
      InGuard = NewGuard.size() != 0;
    }

    SmallVector<StringRef, 16> TypeVec;
    ParseTypes(R, Types, TypeVec);

    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
    if (kind == OpUnavailable)
      continue;
    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
      if (kind == OpReinterpret) {
        bool outQuad = false;
        bool dummy = false;
        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
        for (unsigned srcti = 0, srcte = TypeVec.size();
             srcti != srcte; ++srcti) {
          bool inQuad = false;
          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
          if (srcti == ti || inQuad != outQuad)
            continue;
          std::string testFuncProto;
          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
                                  isShift, isHiddenLOp, ck, InstName,
                                  CurrentGuard.size(), testFuncProto);
          if (EmittedMap.count(testFuncProto))
            continue;
          EmittedMap[testFuncProto] = kind;
          OS << s << "\n";
        }
      } else {
        std::string testFuncProto;
        std::string s =
            GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp,
                    ck, InstName, CurrentGuard.size(), testFuncProto);
        OS << s << "\n";
      }
    }
  }

  if (InGuard)
    OS << "#endif\n";
}
/// runTests - Write out a complete set of tests for all of the Neon
/// intrinsics.
void NeonEmitter::runTests(raw_ostream &OS) {
  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
        "apcs-gnu\\\n"
        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
        "\n"
        "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
        "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
        "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
        "\n"
        "// REQUIRES: long_tests\n"
        "\n"
        "#include <arm_neon.h>\n"
        "\n";

  genTargetTest(OS);
}

namespace clang {
void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).run(OS);
}
void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runHeader(OS);
}
void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
  NeonEmitter(Records).runTests(OS);
}
} // End namespace clang
@


