head	1.1;
branch	1.1.1;
access;
symbols
	netbsd-11-0-RC4:1.1.1.11
	netbsd-11-0-RC3:1.1.1.11
	netbsd-11-0-RC2:1.1.1.11
	netbsd-11-0-RC1:1.1.1.11
	perseant-exfatfs-base-20250801:1.1.1.11
	netbsd-11:1.1.1.11.0.10
	netbsd-11-base:1.1.1.11
	netbsd-10-1-RELEASE:1.1.1.11
	perseant-exfatfs-base-20240630:1.1.1.11
	perseant-exfatfs:1.1.1.11.0.8
	perseant-exfatfs-base:1.1.1.11
	netbsd-8-3-RELEASE:1.1.1.8
	netbsd-9-4-RELEASE:1.1.1.10
	netbsd-10-0-RELEASE:1.1.1.11
	netbsd-10-0-RC6:1.1.1.11
	netbsd-10-0-RC5:1.1.1.11
	netbsd-10-0-RC4:1.1.1.11
	netbsd-10-0-RC3:1.1.1.11
	netbsd-10-0-RC2:1.1.1.11
	netbsd-10-0-RC1:1.1.1.11
	netbsd-10:1.1.1.11.0.6
	netbsd-10-base:1.1.1.11
	netbsd-9-3-RELEASE:1.1.1.10
	cjep_sun2x:1.1.1.11.0.4
	cjep_sun2x-base:1.1.1.11
	cjep_staticlib_x-base1:1.1.1.11
	netbsd-9-2-RELEASE:1.1.1.10
	cjep_staticlib_x:1.1.1.11.0.2
	cjep_staticlib_x-base:1.1.1.11
	netbsd-9-1-RELEASE:1.1.1.10
	phil-wifi-20200421:1.1.1.11
	phil-wifi-20200411:1.1.1.11
	phil-wifi-20200406:1.1.1.11
	netbsd-8-2-RELEASE:1.1.1.8
	netbsd-9-0-RELEASE:1.1.1.10
	netbsd-9-0-RC2:1.1.1.10
	netbsd-9-0-RC1:1.1.1.10
	netbsd-9:1.1.1.10.0.2
	netbsd-9-base:1.1.1.10
	phil-wifi-20190609:1.1.1.10
	netbsd-8-1-RELEASE:1.1.1.8
	netbsd-8-1-RC1:1.1.1.8
	pgoyette-compat-merge-20190127:1.1.1.9.2.1
	pgoyette-compat-20190127:1.1.1.10
	pgoyette-compat-20190118:1.1.1.10
	pgoyette-compat-1226:1.1.1.10
	pgoyette-compat-1126:1.1.1.10
	pgoyette-compat-1020:1.1.1.10
	pgoyette-compat-0930:1.1.1.10
	pgoyette-compat-0906:1.1.1.10
	netbsd-7-2-RELEASE:1.1.1.5.2.1
	pgoyette-compat-0728:1.1.1.10
	clang-337282:1.1.1.10
	netbsd-8-0-RELEASE:1.1.1.8
	phil-wifi:1.1.1.9.0.4
	phil-wifi-base:1.1.1.9
	pgoyette-compat-0625:1.1.1.9
	netbsd-8-0-RC2:1.1.1.8
	pgoyette-compat-0521:1.1.1.9
	pgoyette-compat-0502:1.1.1.9
	pgoyette-compat-0422:1.1.1.9
	netbsd-8-0-RC1:1.1.1.8
	pgoyette-compat-0415:1.1.1.9
	pgoyette-compat-0407:1.1.1.9
	pgoyette-compat-0330:1.1.1.9
	pgoyette-compat-0322:1.1.1.9
	pgoyette-compat-0315:1.1.1.9
	netbsd-7-1-2-RELEASE:1.1.1.5.2.1
	pgoyette-compat:1.1.1.9.0.2
	pgoyette-compat-base:1.1.1.9
	netbsd-7-1-1-RELEASE:1.1.1.5.2.1
	clang-319952:1.1.1.9
	matt-nb8-mediatek:1.1.1.8.0.10
	matt-nb8-mediatek-base:1.1.1.8
	clang-309604:1.1.1.9
	perseant-stdc-iso10646:1.1.1.8.0.8
	perseant-stdc-iso10646-base:1.1.1.8
	netbsd-8:1.1.1.8.0.6
	netbsd-8-base:1.1.1.8
	prg-localcount2-base3:1.1.1.8
	prg-localcount2-base2:1.1.1.8
	prg-localcount2-base1:1.1.1.8
	prg-localcount2:1.1.1.8.0.4
	prg-localcount2-base:1.1.1.8
	pgoyette-localcount-20170426:1.1.1.8
	bouyer-socketcan-base1:1.1.1.8
	pgoyette-localcount-20170320:1.1.1.8
	netbsd-7-1:1.1.1.5.2.1.0.6
	netbsd-7-1-RELEASE:1.1.1.5.2.1
	netbsd-7-1-RC2:1.1.1.5.2.1
	clang-294123:1.1.1.8
	netbsd-7-nhusb-base-20170116:1.1.1.5.2.1
	bouyer-socketcan:1.1.1.8.0.2
	bouyer-socketcan-base:1.1.1.8
	clang-291444:1.1.1.8
	pgoyette-localcount-20170107:1.1.1.7
	netbsd-7-1-RC1:1.1.1.5.2.1
	pgoyette-localcount-20161104:1.1.1.7
	netbsd-7-0-2-RELEASE:1.1.1.5.2.1
	localcount-20160914:1.1.1.7
	netbsd-7-nhusb:1.1.1.5.2.1.0.4
	netbsd-7-nhusb-base:1.1.1.5.2.1
	clang-280599:1.1.1.7
	pgoyette-localcount-20160806:1.1.1.7
	pgoyette-localcount-20160726:1.1.1.7
	pgoyette-localcount:1.1.1.7.0.2
	pgoyette-localcount-base:1.1.1.7
	netbsd-7-0-1-RELEASE:1.1.1.5.2.1
	clang-261930:1.1.1.7
	netbsd-7-0:1.1.1.5.2.1.0.2
	netbsd-7-0-RELEASE:1.1.1.5.2.1
	netbsd-7-0-RC3:1.1.1.5.2.1
	netbsd-7-0-RC2:1.1.1.5.2.1
	netbsd-7-0-RC1:1.1.1.5.2.1
	clang-237755:1.1.1.6
	clang-232565:1.1.1.6
	clang-227398:1.1.1.6
	tls-maxphys-base:1.1.1.5
	tls-maxphys:1.1.1.5.0.4
	netbsd-7:1.1.1.5.0.2
	netbsd-7-base:1.1.1.5
	clang-215315:1.1.1.5
	clang-209886:1.1.1.4
	yamt-pagecache:1.1.1.3.0.4
	yamt-pagecache-base9:1.1.1.3
	tls-earlyentropy:1.1.1.3.0.2
	tls-earlyentropy-base:1.1.1.4
	riastradh-xf86-video-intel-2-7-1-pre-2-21-15:1.1.1.3
	riastradh-drm2-base3:1.1.1.3
	clang-202566:1.1.1.3
	clang-201163:1.1.1.2
	clang-199312:1.1.1.1
	LLVM:1.1.1;
locks; strict;
comment	@// @;


1.1
date	2014.01.15.21.26.25;	author joerg;	state Exp;
branches
	1.1.1.1;
next	;
commitid	NQXlzzA0SPkc5glx;

1.1.1.1
date	2014.01.15.21.26.25;	author joerg;	state Exp;
branches;
next	1.1.1.2;
commitid	NQXlzzA0SPkc5glx;

1.1.1.2
date	2014.02.14.20.07.10;	author joerg;	state Exp;
branches;
next	1.1.1.3;
commitid	annVkZ1sc17rF6px;

1.1.1.3
date	2014.03.04.19.53.46;	author joerg;	state Exp;
branches
	1.1.1.3.2.1
	1.1.1.3.4.1;
next	1.1.1.4;
commitid	29z1hJonZISIXprx;

1.1.1.4
date	2014.05.30.18.14.40;	author joerg;	state Exp;
branches;
next	1.1.1.5;
commitid	8q0kdlBlCn09GACx;

1.1.1.5
date	2014.08.10.17.08.36;	author joerg;	state Exp;
branches
	1.1.1.5.2.1
	1.1.1.5.4.1;
next	1.1.1.6;
commitid	N85tXAN6Ex9VZPLx;

1.1.1.6
date	2015.01.29.19.57.31;	author joerg;	state Exp;
branches;
next	1.1.1.7;
commitid	mlISSizlPKvepX7y;

1.1.1.7
date	2016.02.27.22.12.10;	author joerg;	state Exp;
branches
	1.1.1.7.2.1;
next	1.1.1.8;
commitid	tIimz3oDlh1NpBWy;

1.1.1.8
date	2017.01.11.10.33.36;	author joerg;	state Exp;
branches;
next	1.1.1.9;
commitid	CNnUNfII1jgNmxBz;

1.1.1.9
date	2017.08.01.19.35.19;	author joerg;	state Exp;
branches
	1.1.1.9.2.1
	1.1.1.9.4.1;
next	1.1.1.10;
commitid	pMuDy65V0VicSx1A;

1.1.1.10
date	2018.07.17.18.31.01;	author joerg;	state Exp;
branches;
next	1.1.1.11;
commitid	wDzL46ALjrCZgwKA;

1.1.1.11
date	2019.11.13.22.19.21;	author joerg;	state dead;
branches;
next	;
commitid	QD8YATxuNG34YJKB;

1.1.1.3.2.1
date	2014.08.10.07.08.07;	author tls;	state Exp;
branches;
next	;
commitid	t01A1TLTYxkpGMLx;

1.1.1.3.4.1
date	2014.03.04.19.53.46;	author yamt;	state dead;
branches;
next	1.1.1.3.4.2;
commitid	WSrDtL5nYAUyiyBx;

1.1.1.3.4.2
date	2014.05.22.16.18.27;	author yamt;	state Exp;
branches;
next	;
commitid	WSrDtL5nYAUyiyBx;

1.1.1.5.2.1
date	2015.06.04.20.04.28;	author snj;	state Exp;
branches;
next	;
commitid	yRnjq9fueSo6n9oy;

1.1.1.5.4.1
date	2014.08.10.17.08.36;	author tls;	state dead;
branches;
next	1.1.1.5.4.2;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.5.4.2
date	2014.08.19.23.47.27;	author tls;	state Exp;
branches;
next	;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.7.2.1
date	2017.03.20.06.52.37;	author pgoyette;	state Exp;
branches;
next	;
commitid	jjw7cAwgyKq7RfKz;

1.1.1.9.2.1
date	2018.07.28.04.33.18;	author pgoyette;	state Exp;
branches;
next	;
commitid	1UP1xAIUxv1ZgRLA;

1.1.1.9.4.1
date	2019.06.10.21.45.22;	author christos;	state Exp;
branches;
next	1.1.1.9.4.2;
commitid	jtc8rnCzWiEEHGqB;

1.1.1.9.4.2
date	2020.04.13.07.46.32;	author martin;	state dead;
branches;
next	;
commitid	X01YhRUPVUDaec4C;


desc
@@


1.1
log
@Initial revision
@
text
@//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Instrumentation-based profile-guided optimization
//
//===----------------------------------------------------------------------===//

#include "CodeGenPGO.h"
#include "CodeGenFunction.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "llvm/Config/config.h" // for strtoull()/strtoll() define
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/FileSystem.h"

using namespace clang;
using namespace CodeGen;

static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
  DiagnosticsEngine &Diags = CGM.getDiags();
  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, Message);
  Diags.Report(DiagID);
}

PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
  : CGM(CGM) {
  if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
    ReportBadPGOData(CGM, "failed to open pgo data file");
    return;
  }

  if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
    ReportBadPGOData(CGM, "pgo data file too big");
    return;
  }

  // Scan through the data file and map each function to the corresponding
  // file offset where its counts are stored.
  const char *BufferStart = DataBuffer->getBufferStart();
  const char *BufferEnd = DataBuffer->getBufferEnd();
  const char *CurPtr = BufferStart;
  while (CurPtr < BufferEnd) {
    // Read the mangled function name.
    const char *FuncName = CurPtr;
    // FIXME: Something will need to be added to distinguish static functions.
    CurPtr = strchr(CurPtr, ' ');
    if (!CurPtr) {
      ReportBadPGOData(CGM, "pgo data file has malformed function entry");
      return;
    }
    StringRef MangledName(FuncName, CurPtr - FuncName);

    // Read the number of counters.
    char *EndPtr;
    unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
    if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
      ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
      return;
    }
    CurPtr = EndPtr;

    // There is one line for each counter; skip over those lines.
    for (unsigned N = 0; N < NumCounters; ++N) {
      CurPtr = strchr(++CurPtr, '\n');
      if (!CurPtr) {
        ReportBadPGOData(CGM, "pgo data file is missing some counter info");
        return;
      }
    }

    // Skip over the blank line separating functions.
    CurPtr += 2;

    DataOffsets[MangledName] = FuncName - BufferStart;
  }
}

bool PGOProfileData::getFunctionCounts(StringRef MangledName,
                                       std::vector<uint64_t> &Counts) {
  // Find the relevant section of the pgo-data file.
  llvm::StringMap<unsigned>::const_iterator OffsetIter =
    DataOffsets.find(MangledName);
  if (OffsetIter == DataOffsets.end())
    return true;
  const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();

  // Skip over the function name.
  CurPtr = strchr(CurPtr, ' ');
  assert(CurPtr && "pgo-data has corrupted function entry");

  // Read the number of counters.
  char *EndPtr;
  unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
  assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
         "pgo-data file has corrupted number of counters");
  CurPtr = EndPtr;

  Counts.reserve(NumCounters);

  for (unsigned N = 0; N < NumCounters; ++N) {
    // Read the count value.
    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
    if (EndPtr == CurPtr || *EndPtr != '\n') {
      ReportBadPGOData(CGM, "pgo-data file has bad count value");
      return true;
    }
    Counts.push_back(Count);
    CurPtr = EndPtr + 1;
  }

  // Make sure the number of counters matches up.
  if (Counts.size() != NumCounters) {
    ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
    return true;
  }

  return false;
}

void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) {
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
    return;

  llvm::LLVMContext &Ctx = CGM.getLLVMContext();

  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);

  llvm::Function *WriteoutF =
    CGM.getModule().getFunction("__llvm_pgo_writeout");
  if (!WriteoutF) {
    llvm::FunctionType *WriteoutFTy =
      llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
    WriteoutF = llvm::Function::Create(WriteoutFTy,
                                       llvm::GlobalValue::InternalLinkage,
                                       "__llvm_pgo_writeout", &CGM.getModule());
  }
  WriteoutF->setUnnamedAddr(true);
  WriteoutF->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    WriteoutF->addFnAttr(llvm::Attribute::NoRedZone);

  llvm::BasicBlock *BB = WriteoutF->empty() ?
    llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock();

  CGBuilderTy PGOBuilder(BB);

  llvm::Instruction *I = BB->getTerminator();
  if (!I)
    I = PGOBuilder.CreateRetVoid();
  PGOBuilder.SetInsertPoint(I);

  llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
  llvm::Type *Args[] = {
    Int8PtrTy,                       // const char *MangledName
    Int32Ty,                         // uint32_t NumCounters
    Int64PtrTy                       // uint64_t *Counters
  };
  llvm::FunctionType *FTy =
    llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false);
  llvm::Constant *EmitFunc =
    CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy);

  llvm::Constant *MangledName =
    CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name");
  MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy);
  PGOBuilder.CreateCall3(EmitFunc, MangledName,
                         PGOBuilder.getInt32(NumRegionCounters),
                         PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy));
}

llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
  llvm::Function *WriteoutF =
    CGM.getModule().getFunction("__llvm_pgo_writeout");
  if (!WriteoutF)
    return NULL;

  // Create a small bit of code that registers the "__llvm_pgo_writeout" to
  // be executed at exit.
  llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init");
  if (F)
    return NULL;

  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
                                                    false);
  F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
                             "__llvm_pgo_init", &CGM.getModule());
  F->setUnnamedAddr(true);
  F->setLinkage(llvm::GlobalValue::InternalLinkage);
  F->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    F->addFnAttr(llvm::Attribute::NoRedZone);

  llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F);
  CGBuilderTy PGOBuilder(BB);

  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false);
  llvm::Type *Params[] = {
    llvm::PointerType::get(FTy, 0)
  };
  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false);

  // Inialize the environment and register the local writeout function.
  llvm::Constant *PGOInit =
    CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy);
  PGOBuilder.CreateCall(PGOInit, WriteoutF);
  PGOBuilder.CreateRetVoid();

  return F;
}

namespace {
  /// A StmtVisitor that fills a map of statements to PGO counters.
  struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
    /// The next counter value to assign.
    unsigned NextCounter;
    /// The map of statements to counters.
    llvm::DenseMap<const Stmt*, unsigned> *CounterMap;

    MapRegionCounters(llvm::DenseMap<const Stmt*, unsigned> *CounterMap) :
      NextCounter(0), CounterMap(CounterMap) {
    }

    void VisitChildren(const Stmt *S) {
      for (Stmt::const_child_range I = S->children(); I; ++I)
        if (*I)
         this->Visit(*I);
    }
    void VisitStmt(const Stmt *S) { VisitChildren(S); }

    /// Assign a counter to track entry to the function body.
    void VisitFunctionDecl(const FunctionDecl *S) {
      (*CounterMap)[S->getBody()] = NextCounter++;
      Visit(S->getBody());
    }
    /// Assign a counter to track the block following a label.
    void VisitLabelStmt(const LabelStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getSubStmt());
    }
    /// Assign three counters - one for the body of the loop, one for breaks
    /// from the loop, and one for continues.
    ///
    /// The break and continue counters cover all such statements in this loop,
    /// and are used in calculations to find the number of times the condition
    /// and exit of the loop occur. They are needed so we can differentiate
    /// these statements from non-local exits like return and goto.
    void VisitWhileStmt(const WhileStmt *S) {
      (*CounterMap)[S] = NextCounter;
      NextCounter += 3;
      Visit(S->getCond());
      Visit(S->getBody());
    }
    /// Assign counters for the body of the loop, and for breaks and
    /// continues. See VisitWhileStmt.
    void VisitDoStmt(const DoStmt *S) {
      (*CounterMap)[S] = NextCounter;
      NextCounter += 3;
      Visit(S->getBody());
      Visit(S->getCond());
    }
    /// Assign counters for the body of the loop, and for breaks and
    /// continues. See VisitWhileStmt.
    void VisitForStmt(const ForStmt *S) {
      (*CounterMap)[S] = NextCounter;
      NextCounter += 3;
      const Expr *E;
      if ((E = S->getCond()))
        Visit(E);
      Visit(S->getBody());
      if ((E = S->getInc()))
        Visit(E);
    }
    /// Assign counters for the body of the loop, and for breaks and
    /// continues. See VisitWhileStmt.
    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
      (*CounterMap)[S] = NextCounter;
      NextCounter += 3;
      const Expr *E;
      if ((E = S->getCond()))
        Visit(E);
      Visit(S->getBody());
      if ((E = S->getInc()))
        Visit(E);
    }
    /// Assign counters for the body of the loop, and for breaks and
    /// continues. See VisitWhileStmt.
    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
      (*CounterMap)[S] = NextCounter;
      NextCounter += 3;
      Visit(S->getElement());
      Visit(S->getBody());
    }
    /// Assign a counter for the exit block of the switch statement.
    void VisitSwitchStmt(const SwitchStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getCond());
      Visit(S->getBody());
    }
    /// Assign a counter for a particular case in a switch. This counts jumps
    /// from the switch header as well as fallthrough from the case before this
    /// one.
    void VisitCaseStmt(const CaseStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getSubStmt());
    }
    /// Assign a counter for the default case of a switch statement. The count
    /// is the number of branches from the loop header to the default, and does
    /// not include fallthrough from previous cases. If we have multiple
    /// conditional branch blocks from the switch instruction to the default
    /// block, as with large GNU case ranges, this is the counter for the last
    /// edge in that series, rather than the first.
    void VisitDefaultStmt(const DefaultStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getSubStmt());
    }
    /// Assign a counter for the "then" part of an if statement. The count for
    /// the "else" part, if it exists, will be calculated from this counter.
    void VisitIfStmt(const IfStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getCond());
      Visit(S->getThen());
      if (S->getElse())
        Visit(S->getElse());
    }
    /// Assign a counter for the continuation block of a C++ try statement.
    void VisitCXXTryStmt(const CXXTryStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getTryBlock());
      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
        Visit(S->getHandler(I));
    }
    /// Assign a counter for a catch statement's handler block.
    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getHandlerBlock());
    }
    /// Assign a counter for the "true" part of a conditional operator. The
    /// count in the "false" part will be calculated from this counter.
    void VisitConditionalOperator(const ConditionalOperator *E) {
      (*CounterMap)[E] = NextCounter++;
      Visit(E->getCond());
      Visit(E->getTrueExpr());
      Visit(E->getFalseExpr());
    }
    /// Assign a counter for the right hand side of a logical and operator.
    void VisitBinLAnd(const BinaryOperator *E) {
      (*CounterMap)[E] = NextCounter++;
      Visit(E->getLHS());
      Visit(E->getRHS());
    }
    /// Assign a counter for the right hand side of a logical or operator.
    void VisitBinLOr(const BinaryOperator *E) {
      (*CounterMap)[E] = NextCounter++;
      Visit(E->getLHS());
      Visit(E->getRHS());
    }
  };
}

void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) {
  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
  PGOProfileData *PGOData = CGM.getPGOData();
  if (!InstrumentRegions && !PGOData)
    return;
  const Decl *D = GD.getDecl();
  if (!D)
    return;
  mapRegionCounters(D);
  if (InstrumentRegions)
    emitCounterVariables();
  if (PGOData)
    loadRegionCounts(GD, PGOData);
}

void CodeGenPGO::mapRegionCounters(const Decl *D) {
  RegionCounterMap = new llvm::DenseMap<const Stmt*, unsigned>();
  MapRegionCounters Walker(RegionCounterMap);
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    Walker.VisitFunctionDecl(FD);
  NumRegionCounters = Walker.NextCounter;
}

void CodeGenPGO::emitCounterVariables() {
  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
                                                    NumRegionCounters);
  RegionCounters =
    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false,
                             llvm::GlobalVariable::PrivateLinkage,
                             llvm::Constant::getNullValue(CounterTy),
                             "__llvm_pgo_ctr");
}

void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
    return;
  llvm::Value *Addr =
    Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
  Builder.CreateStore(Count, Addr);
}

void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) {
  // For now, ignore the counts from the PGO data file only if the number of
  // counters does not match. This could be tightened down in the future to
  // ignore counts when the input changes in various ways, e.g., by comparing a
  // hash value based on some characteristics of the input.
  RegionCounts = new std::vector<uint64_t>();
  if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) ||
      RegionCounts->size() != NumRegionCounters) {
    delete RegionCounts;
    RegionCounts = 0;
  }
}

void CodeGenPGO::destroyRegionCounters() {
  if (RegionCounterMap != 0)
    delete RegionCounterMap;
  if (RegionCounts != 0)
    delete RegionCounts;
}

llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
                                              uint64_t FalseCount) {
  if (!TrueCount && !FalseCount)
    return 0;

  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
  // TODO: need to scale down to 32-bits
  // According to Laplace's Rule of Succession, it is better to compute the
  // weight based on the count plus 1.
  return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
}

llvm::MDNode *
CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
  // TODO: need to scale down to 32-bits, instead of just truncating.
  // According to Laplace's Rule of Succession, it is better to compute the
  // weight based on the count plus 1.
  SmallVector<uint32_t, 16> ScaledWeights;
  ScaledWeights.reserve(Weights.size());
  for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
       WI != WE; ++WI) {
    ScaledWeights.push_back(*WI + 1);
  }
  return MDHelper.createBranchWeights(ScaledWeights);
}
@


1.1.1.1
log
@Import Clang 3.5svn r199312
@
text
@@


1.1.1.2
log
@Import Clang 3.5svn r201163.
@
text
@d27 2
a28 2
  unsigned diagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0");
  Diags.Report(diagID) << Message;
a47 1
  uint64_t MaxCount = 0;
a67 10
    // Read function count.
    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
    if (EndPtr == CurPtr || *EndPtr != '\n') {
      ReportBadPGOData(CGM, "pgo-data file has bad count value");
      return;
    }
    CurPtr = EndPtr + 1;
    FunctionCounts[MangledName] = Count;
    MaxCount = Count > MaxCount ? Count : MaxCount;

d69 1
a69 2
    // Since function count is already read, we start the loop from 1.
    for (unsigned N = 1; N < NumCounters; ++N) {
a81 27
  MaxFunctionCount = MaxCount;
}

/// Return true if a function is hot. If we know nothing about the function,
/// return false.
bool PGOProfileData::isHotFunction(StringRef MangledName) {
  llvm::StringMap<uint64_t>::const_iterator CountIter =
    FunctionCounts.find(MangledName);
  // If we know nothing about the function, return false.
  if (CountIter == FunctionCounts.end())
    return false;
  // FIXME: functions with >= 30% of the maximal function count are
  // treated as hot. This number is from preliminary tuning on SPEC.
  return CountIter->getValue() >= (uint64_t)(0.3 * (double)MaxFunctionCount);
}

/// Return true if a function is cold. If we know nothing about the function,
/// return false.
bool PGOProfileData::isColdFunction(StringRef MangledName) {
  llvm::StringMap<uint64_t>::const_iterator CountIter =
    FunctionCounts.find(MangledName);
  // If we know nothing about the function, return false.
  if (CountIter == FunctionCounts.end())
    return false;
  // FIXME: functions with <= 1% of the maximal function count are treated as
  // cold. This number is from preliminary tuning on SPEC.
  return CountIter->getValue() <= (uint64_t)(0.01 * (double)MaxFunctionCount);
@


1.1.1.3
log
@Import Clang 3.5svn r202566.
@
text
@d75 1
a75 1
    CurPtr = EndPtr; // Point to '\n'.
d287 7
a293 1
    /// Assign a counter for the body of a while loop.
d295 2
a296 1
      (*CounterMap)[S] = NextCounter++;
d300 2
a301 1
    /// Assign a counter for the body of a do-while loop.
d303 2
a304 1
      (*CounterMap)[S] = NextCounter++;
d308 2
a309 1
    /// Assign a counter for the body of a for loop.
d311 2
a312 3
      (*CounterMap)[S] = NextCounter++;
      if (S->getInit())
        Visit(S->getInit());
d316 1
a318 1
      Visit(S->getBody());
d320 2
a321 1
    /// Assign a counter for the body of a for-range loop.
d323 5
a327 5
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getRangeStmt());
      Visit(S->getBeginEndStmt());
      Visit(S->getCond());
      Visit(S->getLoopVarStmt());
d329 2
a330 1
      Visit(S->getInc());
d332 2
a333 1
    /// Assign a counter for the body of a for-collection loop.
d335 2
a336 1
      (*CounterMap)[S] = NextCounter++;
a404 337

  /// A StmtVisitor that propagates the raw counts through the AST and
  /// records the count at statements where the value may change.
  struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
    /// PGO state.
    CodeGenPGO &PGO;

    /// A flag that is set when the current count should be recorded on the
    /// next statement, such as at the exit of a loop.
    bool RecordNextStmtCount;

    /// The map of statements to count values.
    llvm::DenseMap<const Stmt*, uint64_t> *CountMap;

    /// BreakContinueStack - Keep counts of breaks and continues inside loops. 
    struct BreakContinue {
      uint64_t BreakCount;
      uint64_t ContinueCount;
      BreakContinue() : BreakCount(0), ContinueCount(0) {}
    };
    SmallVector<BreakContinue, 8> BreakContinueStack;

    ComputeRegionCounts(llvm::DenseMap<const Stmt*, uint64_t> *CountMap,
                        CodeGenPGO &PGO) :
      PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {
    }

    void RecordStmtCount(const Stmt *S) {
      if (RecordNextStmtCount) {
        (*CountMap)[S] = PGO.getCurrentRegionCount();
        RecordNextStmtCount = false;
      }
    }

    void VisitStmt(const Stmt *S) {
      RecordStmtCount(S);
      for (Stmt::const_child_range I = S->children(); I; ++I) {
        if (*I)
         this->Visit(*I);
      }
    }

    void VisitFunctionDecl(const FunctionDecl *S) {
      RegionCounter Cnt(PGO, S->getBody());
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
    }

    void VisitReturnStmt(const ReturnStmt *S) {
      RecordStmtCount(S);
      if (S->getRetValue())
        Visit(S->getRetValue());
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitGotoStmt(const GotoStmt *S) {
      RecordStmtCount(S);
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitLabelStmt(const LabelStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      (*CountMap)[S] = PGO.getCurrentRegionCount();
      Visit(S->getSubStmt());
    }

    void VisitBreakStmt(const BreakStmt *S) {
      RecordStmtCount(S);
      assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
      BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitContinueStmt(const ContinueStmt *S) {
      RecordStmtCount(S);
      assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
      BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitWhileStmt(const WhileStmt *S) {
      RecordStmtCount(S);
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first so the break/continue adjustments can be
      // included when visiting the condition.
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // ...then go back and propagate counts through the condition. The count
      // at the start of the condition is the sum of the incoming edges,
      // the backedge from the end of the loop body, and the edges from
      // continue statements.
      BreakContinue BC = BreakContinueStack.pop_back_val();
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() + BC.ContinueCount);
      (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitDoStmt(const DoStmt *S) {
      RecordStmtCount(S);
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      BreakContinue BC = BreakContinueStack.pop_back_val();
      // The count at the start of the condition is equal to the count at the
      // end of the body. The adjusted count does not include either the
      // fall-through count coming into the loop or the continue count, so add
      // both of those separately. This is coincidentally the same equation as
      // with while loops but for different reasons.
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() + BC.ContinueCount);
      (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitForStmt(const ForStmt *S) {
      RecordStmtCount(S);
      if (S->getInit())
        Visit(S->getInit());
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first. (This is basically the same as a while
      // loop; see further comments in VisitWhileStmt.)
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // The increment is essentially part of the body but it needs to include
      // the count for all the continue statements.
      if (S->getInc()) {
        Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
                                  BreakContinueStack.back().ContinueCount);
        (*CountMap)[S->getInc()] = PGO.getCurrentRegionCount();
        Visit(S->getInc());
        Cnt.adjustForControlFlow();
      }

      BreakContinue BC = BreakContinueStack.pop_back_val();

      // ...then go back and propagate counts through the condition.
      if (S->getCond()) {
        Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                  Cnt.getAdjustedCount() +
                                  BC.ContinueCount);
        (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
        Visit(S->getCond());
        Cnt.adjustForControlFlow();
      }
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
      RecordStmtCount(S);
      Visit(S->getRangeStmt());
      Visit(S->getBeginEndStmt());
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first. (This is basically the same as a while
      // loop; see further comments in VisitWhileStmt.)
      Cnt.beginRegion();
      (*CountMap)[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
      Visit(S->getLoopVarStmt());
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // The increment is essentially part of the body but it needs to include
      // the count for all the continue statements.
      Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
                                BreakContinueStack.back().ContinueCount);
      (*CountMap)[S->getInc()] = PGO.getCurrentRegionCount();
      Visit(S->getInc());
      Cnt.adjustForControlFlow();

      BreakContinue BC = BreakContinueStack.pop_back_val();

      // ...then go back and propagate counts through the condition.
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() +
                                BC.ContinueCount);
      (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
      RecordStmtCount(S);
      Visit(S->getElement());
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      BreakContinue BC = BreakContinueStack.pop_back_val();
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitSwitchStmt(const SwitchStmt *S) {
      RecordStmtCount(S);
      Visit(S->getCond());
      PGO.setCurrentRegionUnreachable();
      BreakContinueStack.push_back(BreakContinue());
      Visit(S->getBody());
      // If the switch is inside a loop, add the continue counts.
      BreakContinue BC = BreakContinueStack.pop_back_val();
      if (!BreakContinueStack.empty())
        BreakContinueStack.back().ContinueCount += BC.ContinueCount;
      RegionCounter ExitCnt(PGO, S);
      ExitCnt.beginRegion();
      RecordNextStmtCount = true;
    }

    void VisitCaseStmt(const CaseStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      (*CountMap)[S] = Cnt.getCount();
      RecordNextStmtCount = true;
      Visit(S->getSubStmt());
    }

    void VisitDefaultStmt(const DefaultStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      (*CountMap)[S] = Cnt.getCount();
      RecordNextStmtCount = true;
      Visit(S->getSubStmt());
    }

    void VisitIfStmt(const IfStmt *S) {
      RecordStmtCount(S);
      RegionCounter Cnt(PGO, S);
      Visit(S->getCond());

      Cnt.beginRegion();
      (*CountMap)[S->getThen()] = PGO.getCurrentRegionCount();
      Visit(S->getThen());
      Cnt.adjustForControlFlow();

      if (S->getElse()) {
        Cnt.beginElseRegion();
        (*CountMap)[S->getElse()] = PGO.getCurrentRegionCount();
        Visit(S->getElse());
        Cnt.adjustForControlFlow();
      }
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitCXXTryStmt(const CXXTryStmt *S) {
      RecordStmtCount(S);
      Visit(S->getTryBlock());
      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
        Visit(S->getHandler(I));
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      RecordNextStmtCount = true;
    }

    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      (*CountMap)[S] = PGO.getCurrentRegionCount();
      Visit(S->getHandlerBlock());
    }

    void VisitConditionalOperator(const ConditionalOperator *E) {
      RecordStmtCount(E);
      RegionCounter Cnt(PGO, E);
      Visit(E->getCond());

      Cnt.beginRegion();
      (*CountMap)[E->getTrueExpr()] = PGO.getCurrentRegionCount();
      Visit(E->getTrueExpr());
      Cnt.adjustForControlFlow();

      Cnt.beginElseRegion();
      (*CountMap)[E->getFalseExpr()] = PGO.getCurrentRegionCount();
      Visit(E->getFalseExpr());
      Cnt.adjustForControlFlow();

      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitBinLAnd(const BinaryOperator *E) {
      RecordStmtCount(E);
      RegionCounter Cnt(PGO, E);
      Visit(E->getLHS());
      Cnt.beginRegion();
      (*CountMap)[E->getRHS()] = PGO.getCurrentRegionCount();
      Visit(E->getRHS());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitBinLOr(const BinaryOperator *E) {
      RecordStmtCount(E);
      RegionCounter Cnt(PGO, E);
      Visit(E->getLHS());
      Cnt.beginRegion();
      (*CountMap)[E->getRHS()] = PGO.getCurrentRegionCount();
      Visit(E->getRHS());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }
  };
d418 1
a418 1
  if (PGOData) {
a419 2
    computeRegionCounts(D);
  }
a429 7
void CodeGenPGO::computeRegionCounts(const Decl *D) {
  StmtCountMap = new llvm::DenseMap<const Stmt*, uint64_t>();
  ComputeRegionCounts Walker(StmtCountMap, *this);
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    Walker.VisitFunctionDecl(FD);
}

a466 2
  if (StmtCountMap != 0)
    delete StmtCountMap;
d483 2
a484 1
llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
a496 15

llvm::MDNode *CodeGenPGO::createLoopWeights(const Stmt *Cond,
                                            RegionCounter &Cnt) {
  if (!haveRegionCounts())
    return 0;
  uint64_t LoopCount = Cnt.getCount();
  uint64_t CondCount = 0;
  bool Found = getStmtCount(Cond, CondCount);
  assert(Found && "missing expected loop condition count");
  (void)Found;
  if (CondCount == 0)
    return 0;
  return createBranchWeights(LoopCount,
                             std::max(CondCount, LoopCount) - LoopCount);
}
@


1.1.1.3.2.1
log
@Rebase.
@
text
@d18 1
a19 2
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/Endian.h"
a20 1
#include "llvm/Support/MD5.h"
d25 5
a29 2
void CodeGenPGO::setFuncName(llvm::Function *Fn) {
  RawFuncName = Fn->getName();
d31 6
a36 5
  // Function names may be prefixed with a binary '1' to indicate
  // that the backend should not modify the symbols due to any platform
  // naming convention. Do not include that '1' in the PGO profile name.
  if (RawFuncName[0] == '\1')
    RawFuncName = RawFuncName.substr(1);
d38 2
a39 2
  if (!Fn->hasLocalLinkage()) {
    PrefixedFuncName.reset(new std::string(RawFuncName));
d43 45
a87 34
  // For local symbols, prepend the main file name to distinguish them.
  // Do not include the full path in the file name since there's no guarantee
  // that it will stay the same, e.g., if the files are checked out from
  // version control in different locations.
  PrefixedFuncName.reset(new std::string(CGM.getCodeGenOpts().MainFileName));
  if (PrefixedFuncName->empty())
    PrefixedFuncName->assign("<unknown>");
  PrefixedFuncName->append(":");
  PrefixedFuncName->append(RawFuncName);
}

static llvm::Function *getRegisterFunc(CodeGenModule &CGM) {
  return CGM.getModule().getFunction("__llvm_profile_register_functions");
}

static llvm::BasicBlock *getOrInsertRegisterBB(CodeGenModule &CGM) {
  // Don't do this for Darwin.  compiler-rt uses linker magic.
  if (CGM.getTarget().getTriple().isOSDarwin())
    return nullptr;

  // Only need to insert this once per module.
  if (llvm::Function *RegisterF = getRegisterFunc(CGM))
    return &RegisterF->getEntryBlock();

  // Construct the function.
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *RegisterFTy = llvm::FunctionType::get(VoidTy, false);
  auto *RegisterF = llvm::Function::Create(RegisterFTy,
                                           llvm::GlobalValue::InternalLinkage,
                                           "__llvm_profile_register_functions",
                                           &CGM.getModule());
  RegisterF->setUnnamedAddr(true);
  if (CGM.getCodeGenOpts().DisableRedZone)
    RegisterF->addFnAttr(llvm::Attribute::NoRedZone);
d89 2
a90 6
  // Construct and return the entry block.
  auto *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", RegisterF);
  CGBuilderTy Builder(BB);
  Builder.CreateRetVoid();
  return BB;
}
d92 3
a94 6
static llvm::Constant *getOrInsertRuntimeRegister(CodeGenModule &CGM) {
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
  return CGM.getModule().getOrInsertFunction("__llvm_profile_register_function",
                                             RuntimeRegisterTy);
d97 58
a154 3
static bool isMachO(const CodeGenModule &CGM) {
  return CGM.getTarget().getTriple().isOSBinFormatMachO();
}
d156 5
a160 3
static StringRef getCountersSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
}
d162 1
a162 2
static StringRef getNameSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
d165 3
a167 3
static StringRef getDataSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
}
a168 2
llvm::GlobalVariable *CodeGenPGO::buildDataVar() {
  // Create name variable.
d170 12
a181 40
  auto *VarName = llvm::ConstantDataArray::getString(Ctx, getFuncName(),
                                                     false);
  auto *Name = new llvm::GlobalVariable(CGM.getModule(), VarName->getType(),
                                        true, VarLinkage, VarName,
                                        getFuncVarName("name"));
  Name->setSection(getNameSection(CGM));
  Name->setAlignment(1);

  // Create data variable.
  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  auto *Int64Ty = llvm::Type::getInt64Ty(Ctx);
  auto *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
  auto *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
  llvm::Type *DataTypes[] = {
    Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy
  };
  auto *DataTy = llvm::StructType::get(Ctx, makeArrayRef(DataTypes));
  llvm::Constant *DataVals[] = {
    llvm::ConstantInt::get(Int32Ty, getFuncName().size()),
    llvm::ConstantInt::get(Int32Ty, NumRegionCounters),
    llvm::ConstantInt::get(Int64Ty, FunctionHash),
    llvm::ConstantExpr::getBitCast(Name, Int8PtrTy),
    llvm::ConstantExpr::getBitCast(RegionCounters, Int64PtrTy)
  };
  auto *Data =
    new llvm::GlobalVariable(CGM.getModule(), DataTy, true, VarLinkage,
                             llvm::ConstantStruct::get(DataTy, DataVals),
                             getFuncVarName("data"));

  // All the data should be packed into an array in its own section.
  Data->setSection(getDataSection(CGM));
  Data->setAlignment(8);

  // Hide all these symbols so that we correctly get a copy for each
  // executable.  The profile format expects names and counters to be
  // contiguous, so references into shared objects would be invalid.
  if (!llvm::GlobalValue::isLocalLinkage(VarLinkage)) {
    Name->setVisibility(llvm::GlobalValue::HiddenVisibility);
    Data->setVisibility(llvm::GlobalValue::HiddenVisibility);
    RegionCounters->setVisibility(llvm::GlobalValue::HiddenVisibility);
d183 4
d188 2
a189 4
  // Make sure the data doesn't get deleted.
  CGM.addUsedGlobal(Data);
  return Data;
}
d191 1
a191 3
void CodeGenPGO::emitInstrumentationData() {
  if (!RegionCounters)
    return;
d193 22
a214 11
  // Build the data.
  auto *Data = buildDataVar();

  // Register the data.
  auto *RegisterBB = getOrInsertRegisterBB(CGM);
  if (!RegisterBB)
    return;
  CGBuilderTy Builder(RegisterBB->getTerminator());
  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  Builder.CreateCall(getOrInsertRuntimeRegister(CGM),
                     Builder.CreateBitCast(Data, VoidPtrTy));
d218 10
a227 2
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
    return nullptr;
d229 5
a233 13
  assert(CGM.getModule().getFunction("__llvm_profile_init") == nullptr &&
         "profile initialization already emitted");

  // Get the function to call at initialization.
  llvm::Constant *RegisterF = getRegisterFunc(CGM);
  if (!RegisterF)
    return nullptr;

  // Create the initialization function.
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *F = llvm::Function::Create(llvm::FunctionType::get(VoidTy, false),
                                   llvm::GlobalValue::InternalLinkage,
                                   "__llvm_profile_init", &CGM.getModule());
d235 1
d240 14
a253 4
  // Add the basic block and the necessary calls.
  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F));
  Builder.CreateCall(RegisterF);
  Builder.CreateRetVoid();
d259 2
a260 63
/// \brief Stable hasher for PGO region counters.
///
/// PGOHash produces a stable hash of a given function's control flow.
///
/// Changing the output of this hash will invalidate all previously generated
/// profiles -- i.e., don't do it.
///
/// \note  When this hash does eventually change (years?), we still need to
/// support old hashes.  We'll need to pull in the version number from the
/// profile data format and use the matching hash function.
class PGOHash {
  uint64_t Working;
  unsigned Count;
  llvm::MD5 MD5;

  static const int NumBitsPerType = 6;
  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
  static const unsigned TooBig = 1u << NumBitsPerType;

public:
  /// \brief Hash values for AST nodes.
  ///
  /// Distinct values for AST nodes that have region counters attached.
  ///
  /// These values must be stable.  All new members must be added at the end,
  /// and no members should be removed.  Changing the enumeration value for an
  /// AST node will affect the hash of every function that contains that node.
  enum HashType : unsigned char {
    None = 0,
    LabelStmt = 1,
    WhileStmt,
    DoStmt,
    ForStmt,
    CXXForRangeStmt,
    ObjCForCollectionStmt,
    SwitchStmt,
    CaseStmt,
    DefaultStmt,
    IfStmt,
    CXXTryStmt,
    CXXCatchStmt,
    ConditionalOperator,
    BinaryOperatorLAnd,
    BinaryOperatorLOr,
    BinaryConditionalOperator,

    // Keep this last.  It's for the static assert that follows.
    LastHashType
  };
  static_assert(LastHashType <= TooBig, "Too many types in HashType");

  // TODO: When this format changes, take in a version number here, and use the
  // old hash calculation for file formats that used the old hash.
  PGOHash() : Working(0), Count(0) {}
  void combine(HashType Type);
  uint64_t finalize();
};
const int PGOHash::NumBitsPerType;
const unsigned PGOHash::NumTypesPerWord;
const unsigned PGOHash::TooBig;

  /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
  struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
a262 2
    /// The function hash.
    PGOHash Hash;
d264 1
a264 1
    llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
d266 3
a268 2
    MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
        : NextCounter(0), CounterMap(CounterMap) {}
d270 4
a273 22
    // Blocks and lambdas are handled as separate functions, so we need not
    // traverse them in the parent context.
    bool TraverseBlockExpr(BlockExpr *BE) { return true; }
    bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
    bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }

    bool VisitDecl(const Decl *D) {
      switch (D->getKind()) {
      default:
        break;
      case Decl::Function:
      case Decl::CXXMethod:
      case Decl::CXXConstructor:
      case Decl::CXXDestructor:
      case Decl::CXXConversion:
      case Decl::ObjCMethod:
      case Decl::Block:
      case Decl::Captured:
        CounterMap[D->getBody()] = NextCounter++;
        break;
      }
      return true;
d275 1
d277 107
a383 8
    bool VisitStmt(const Stmt *S) {
      auto Type = getHashType(S);
      if (Type == PGOHash::None)
        return true;

      CounterMap[S] = NextCounter++;
      Hash.combine(Type);
      return true;
d385 5
a389 42
    PGOHash::HashType getHashType(const Stmt *S) {
      switch (S->getStmtClass()) {
      default:
        break;
      case Stmt::LabelStmtClass:
        return PGOHash::LabelStmt;
      case Stmt::WhileStmtClass:
        return PGOHash::WhileStmt;
      case Stmt::DoStmtClass:
        return PGOHash::DoStmt;
      case Stmt::ForStmtClass:
        return PGOHash::ForStmt;
      case Stmt::CXXForRangeStmtClass:
        return PGOHash::CXXForRangeStmt;
      case Stmt::ObjCForCollectionStmtClass:
        return PGOHash::ObjCForCollectionStmt;
      case Stmt::SwitchStmtClass:
        return PGOHash::SwitchStmt;
      case Stmt::CaseStmtClass:
        return PGOHash::CaseStmt;
      case Stmt::DefaultStmtClass:
        return PGOHash::DefaultStmt;
      case Stmt::IfStmtClass:
        return PGOHash::IfStmt;
      case Stmt::CXXTryStmtClass:
        return PGOHash::CXXTryStmt;
      case Stmt::CXXCatchStmtClass:
        return PGOHash::CXXCatchStmt;
      case Stmt::ConditionalOperatorClass:
        return PGOHash::ConditionalOperator;
      case Stmt::BinaryConditionalOperatorClass:
        return PGOHash::BinaryConditionalOperator;
      case Stmt::BinaryOperatorClass: {
        const BinaryOperator *BO = cast<BinaryOperator>(S);
        if (BO->getOpcode() == BO_LAnd)
          return PGOHash::BinaryOperatorLAnd;
        if (BO->getOpcode() == BO_LOr)
          return PGOHash::BinaryOperatorLOr;
        break;
      }
      }
      return PGOHash::None;
d404 1
a404 1
    llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
d406 1
a406 1
    /// BreakContinueStack - Keep counts of breaks and continues inside loops.
d414 4
a417 3
    ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
                        CodeGenPGO &PGO)
        : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
d421 1
a421 1
        CountMap[S] = PGO.getCurrentRegionCount();
d434 2
a435 3
    void VisitFunctionDecl(const FunctionDecl *D) {
      // Counter tracks entry to the function body.
      RegionCounter Cnt(PGO, D->getBody());
d437 2
a438 31
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    // Skip lambda expressions. We visit these as FunctionDecls when we're
    // generating them and aren't interested in the body when generating a
    // parent context.
    void VisitLambdaExpr(const LambdaExpr *LE) {}

    void VisitCapturedDecl(const CapturedDecl *D) {
      // Counter tracks entry to the capture body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
      // Counter tracks entry to the method body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    void VisitBlockDecl(const BlockDecl *D) {
      // Counter tracks entry to the block body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
a456 1
      // Counter tracks the block following the label.
d459 1
a459 1
      CountMap[S] = PGO.getCurrentRegionCount();
a480 1
      // Counter tracks the body of the loop.
d486 1
a486 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
d497 1
a497 1
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a505 1
      // Counter tracks the body of the loop.
d509 1
a509 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
d521 1
a521 1
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a531 1
      // Counter tracks the body of the loop.
d537 1
a537 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
d546 1
a546 1
        CountMap[S->getInc()] = PGO.getCurrentRegionCount();
d558 1
a558 1
        CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a569 1
      // Counter tracks the body of the loop.
d575 1
a575 1
      CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
d584 1
a584 1
      CountMap[S->getInc()] = PGO.getCurrentRegionCount();
d594 1
a594 1
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a603 1
      // Counter tracks the body of the loop.
d607 1
a607 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
a624 1
      // Counter tracks the exit block of the switch.
a631 3
      // Counter for this particular case. This counts only jumps from the
      // switch header and does not include fallthrough from the case before
      // this one.
d634 1
a634 1
      CountMap[S] = Cnt.getCount();
a640 2
      // Counter for this default case. This does not include fallthrough from
      // the previous case.
d643 1
a643 1
      CountMap[S] = Cnt.getCount();
a649 2
      // Counter tracks the "then" part of an if statement. The count for
      // the "else" part, if it exists, will be calculated from this counter.
d654 1
a654 1
      CountMap[S->getThen()] = PGO.getCurrentRegionCount();
d660 1
a660 1
        CountMap[S->getElse()] = PGO.getCurrentRegionCount();
a672 1
      // Counter tracks the continuation block of the try statement.
a679 1
      // Counter tracks the catch statement's handler block.
d682 1
a682 1
      CountMap[S] = PGO.getCurrentRegionCount();
d686 1
a686 2
    void VisitAbstractConditionalOperator(
        const AbstractConditionalOperator *E) {
a687 2
      // Counter tracks the "true" part of a conditional operator. The
      // count in the "false" part will be calculated from this counter.
d692 1
a692 1
      CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
d697 1
a697 1
      CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
a706 1
      // Counter tracks the right hand side of a logical and operator.
d710 1
a710 1
      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
a718 1
      // Counter tracks the right hand side of a logical or operator.
d722 1
a722 1
      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
d731 1
a731 67
void PGOHash::combine(HashType Type) {
  // Check that we never combine 0 and only have six bits.
  assert(Type && "Hash is invalid: unexpected type 0");
  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");

  // Pass through MD5 if enough work has built up.
  if (Count && Count % NumTypesPerWord == 0) {
    using namespace llvm::support;
    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
    Working = 0;
  }

  // Accumulate the current type.
  ++Count;
  Working = Working << NumBitsPerType | Type;
}

uint64_t PGOHash::finalize() {
  // Use Working as the hash directly if we never used MD5.
  if (Count <= NumTypesPerWord)
    // No need to byte swap here, since none of the math was endian-dependent.
    // This number will be byte-swapped as required on endianness transitions,
    // so we will see the same value on the other side.
    return Working;

  // Check for remaining work in Working.
  if (Working)
    MD5.update(Working);

  // Finalize the MD5 and return the hash.
  llvm::MD5::MD5Result Result;
  MD5.final(Result);
  using namespace llvm::support;
  return endian::read<uint64_t, little, unaligned>(Result);
}

static void emitRuntimeHook(CodeGenModule &CGM) {
  const char *const RuntimeVarName = "__llvm_profile_runtime";
  const char *const RuntimeUserName = "__llvm_profile_runtime_user";
  if (CGM.getModule().getGlobalVariable(RuntimeVarName))
    return;

  // Declare the runtime hook.
  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  auto *Var = new llvm::GlobalVariable(CGM.getModule(), Int32Ty, false,
                                       llvm::GlobalValue::ExternalLinkage,
                                       nullptr, RuntimeVarName);

  // Make a function that uses it.
  auto *User = llvm::Function::Create(llvm::FunctionType::get(Int32Ty, false),
                                      llvm::GlobalValue::LinkOnceODRLinkage,
                                      RuntimeUserName, &CGM.getModule());
  User->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    User->addFnAttr(llvm::Attribute::NoRedZone);
  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", User));
  auto *Load = Builder.CreateLoad(Var);
  Builder.CreateRet(Load);

  // Create a use of the function.  Now the definition of the runtime variable
  // should get pulled in, along with any static initializears.
  CGM.addUsedGlobal(User);
}

void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) {
d733 2
a734 2
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
  if (!InstrumentRegions && !PGOReader)
d736 2
a737 1
  if (D->isImplicit())
a738 17
  setFuncName(Fn);

  // Set the linkage for variables based on the function linkage.  Usually, we
  // want to match it, but available_externally and extern_weak both have the
  // wrong semantics.
  VarLinkage = Fn->getLinkage();
  switch (VarLinkage) {
  case llvm::GlobalValue::ExternalWeakLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceAnyLinkage;
    break;
  case llvm::GlobalValue::AvailableExternallyLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceODRLinkage;
    break;
  default:
    break;
  }

d740 1
a740 2
  if (InstrumentRegions) {
    emitRuntimeHook(CGM);
d742 2
a743 3
  }
  if (PGOReader) {
    loadRegionCounts(PGOReader);
a744 1
    applyFunctionAttributes(PGOReader, Fn);
d749 2
a750 2
  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
  MapRegionCounters Walker(*RegionCounterMap);
d752 1
a752 8
    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
a753 1
  FunctionHash = Walker.Hash.finalize();
d757 2
a758 2
  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
  ComputeRegionCounts Walker(*StmtCountMap, *this);
a760 24
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    Walker.VisitObjCMethodDecl(MD);
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    Walker.VisitBlockDecl(BD);
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
}

void
CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
                                    llvm::Function *Fn) {
  if (!haveRegionCounts())
    return;

  uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount();
  uint64_t FunctionCount = getRegionCount(0);
  if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
    // Turn on InlineHint attribute for hot functions.
    // FIXME: 30% is from preliminary tuning on SPEC, it may not be optimal.
    Fn->addFnAttr(llvm::Attribute::InlineHint);
  else if (FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount))
    // Turn on Cold attribute for cold functions.
    // FIXME: 1% is from preliminary tuning on SPEC, it may not be optimal.
    Fn->addFnAttr(llvm::Attribute::Cold);
d768 2
a769 1
    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false, VarLinkage,
d771 1
a771 3
                             getFuncVarName("counters"));
  RegionCounters->setAlignment(8);
  RegionCounters->setSection(getCountersSection(CGM));
d775 1
a775 1
  if (!RegionCounters)
d784 10
a793 11
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader) {
  CGM.getPGOStats().Visited++;
  RegionCounts.reset(new std::vector<uint64_t>);
  uint64_t Hash;
  if (PGOReader->getFunctionCounts(getFuncName(), Hash, *RegionCounts)) {
    CGM.getPGOStats().Missing++;
    RegionCounts.reset();
  } else if (Hash != FunctionHash ||
             RegionCounts->size() != NumRegionCounters) {
    CGM.getPGOStats().Mismatched++;
    RegionCounts.reset();
d798 6
a803 28
  RegionCounterMap.reset();
  StmtCountMap.reset();
  RegionCounts.reset();
  RegionCounters = nullptr;
}

/// \brief Calculate what to divide by to scale weights.
///
/// Given the maximum weight, calculate a divisor that will scale all the
/// weights to strictly less than UINT32_MAX.
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
}

/// \brief Scale an individual branch weight (and add 1).
///
/// Scale a 64-bit weight down to 32-bits using \c Scale.
///
/// According to Laplace's Rule of Succession, it is better to compute the
/// weight based on the count plus 1, so universally add 1 to the value.
///
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
/// greater than \c Weight.
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
  assert(Scale && "scale by 0?");
  uint64_t Scaled = Weight / Scale + 1;
  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
  return Scaled;
a807 1
  // Check for empty weights.
d809 1
a809 4
    return nullptr;

  // Calculate how to scale down to 32-bits.
  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
d812 4
a815 2
  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
                                      scaleBranchWeight(FalseCount, Scale));
d819 4
a822 12
  // We need at least two elements to create meaningful weights.
  if (Weights.size() < 2)
    return nullptr;

  // Check for empty weights.
  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
  if (MaxWeight == 0)
    return nullptr;

  // Calculate how to scale down to 32-bits.
  uint64_t Scale = calculateWeightScale(MaxWeight);

d825 4
a828 4
  for (uint64_t W : Weights)
    ScaledWeights.push_back(scaleBranchWeight(W, Scale));

  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
d835 1
a835 1
    return nullptr;
d842 1
a842 1
    return nullptr;
@


1.1.1.4
log
@Import Clang 3.5svn r209886.
@
text
@d18 1
a19 2
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/Endian.h"
a20 1
#include "llvm/Support/MD5.h"
d25 5
a29 2
void CodeGenPGO::setFuncName(llvm::Function *Fn) {
  RawFuncName = Fn->getName();
d31 6
a36 5
  // Function names may be prefixed with a binary '1' to indicate
  // that the backend should not modify the symbols due to any platform
  // naming convention. Do not include that '1' in the PGO profile name.
  if (RawFuncName[0] == '\1')
    RawFuncName = RawFuncName.substr(1);
d38 2
a39 2
  if (!Fn->hasLocalLinkage()) {
    PrefixedFuncName.reset(new std::string(RawFuncName));
d43 45
a87 34
  // For local symbols, prepend the main file name to distinguish them.
  // Do not include the full path in the file name since there's no guarantee
  // that it will stay the same, e.g., if the files are checked out from
  // version control in different locations.
  PrefixedFuncName.reset(new std::string(CGM.getCodeGenOpts().MainFileName));
  if (PrefixedFuncName->empty())
    PrefixedFuncName->assign("<unknown>");
  PrefixedFuncName->append(":");
  PrefixedFuncName->append(RawFuncName);
}

static llvm::Function *getRegisterFunc(CodeGenModule &CGM) {
  return CGM.getModule().getFunction("__llvm_profile_register_functions");
}

static llvm::BasicBlock *getOrInsertRegisterBB(CodeGenModule &CGM) {
  // Don't do this for Darwin.  compiler-rt uses linker magic.
  if (CGM.getTarget().getTriple().isOSDarwin())
    return nullptr;

  // Only need to insert this once per module.
  if (llvm::Function *RegisterF = getRegisterFunc(CGM))
    return &RegisterF->getEntryBlock();

  // Construct the function.
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *RegisterFTy = llvm::FunctionType::get(VoidTy, false);
  auto *RegisterF = llvm::Function::Create(RegisterFTy,
                                           llvm::GlobalValue::InternalLinkage,
                                           "__llvm_profile_register_functions",
                                           &CGM.getModule());
  RegisterF->setUnnamedAddr(true);
  if (CGM.getCodeGenOpts().DisableRedZone)
    RegisterF->addFnAttr(llvm::Attribute::NoRedZone);
d89 2
a90 6
  // Construct and return the entry block.
  auto *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", RegisterF);
  CGBuilderTy Builder(BB);
  Builder.CreateRetVoid();
  return BB;
}
d92 3
a94 6
static llvm::Constant *getOrInsertRuntimeRegister(CodeGenModule &CGM) {
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
  return CGM.getModule().getOrInsertFunction("__llvm_profile_register_function",
                                             RuntimeRegisterTy);
d97 58
a154 3
static bool isMachO(const CodeGenModule &CGM) {
  return CGM.getTarget().getTriple().isOSBinFormatMachO();
}
d156 5
a160 3
static StringRef getCountersSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
}
d162 1
a162 2
static StringRef getNameSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
d165 3
a167 3
static StringRef getDataSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
}
a168 2
llvm::GlobalVariable *CodeGenPGO::buildDataVar() {
  // Create name variable.
d170 12
a181 40
  auto *VarName = llvm::ConstantDataArray::getString(Ctx, getFuncName(),
                                                     false);
  auto *Name = new llvm::GlobalVariable(CGM.getModule(), VarName->getType(),
                                        true, VarLinkage, VarName,
                                        getFuncVarName("name"));
  Name->setSection(getNameSection(CGM));
  Name->setAlignment(1);

  // Create data variable.
  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  auto *Int64Ty = llvm::Type::getInt64Ty(Ctx);
  auto *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
  auto *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
  llvm::Type *DataTypes[] = {
    Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy
  };
  auto *DataTy = llvm::StructType::get(Ctx, makeArrayRef(DataTypes));
  llvm::Constant *DataVals[] = {
    llvm::ConstantInt::get(Int32Ty, getFuncName().size()),
    llvm::ConstantInt::get(Int32Ty, NumRegionCounters),
    llvm::ConstantInt::get(Int64Ty, FunctionHash),
    llvm::ConstantExpr::getBitCast(Name, Int8PtrTy),
    llvm::ConstantExpr::getBitCast(RegionCounters, Int64PtrTy)
  };
  auto *Data =
    new llvm::GlobalVariable(CGM.getModule(), DataTy, true, VarLinkage,
                             llvm::ConstantStruct::get(DataTy, DataVals),
                             getFuncVarName("data"));

  // All the data should be packed into an array in its own section.
  Data->setSection(getDataSection(CGM));
  Data->setAlignment(8);

  // Hide all these symbols so that we correctly get a copy for each
  // executable.  The profile format expects names and counters to be
  // contiguous, so references into shared objects would be invalid.
  if (!llvm::GlobalValue::isLocalLinkage(VarLinkage)) {
    Name->setVisibility(llvm::GlobalValue::HiddenVisibility);
    Data->setVisibility(llvm::GlobalValue::HiddenVisibility);
    RegionCounters->setVisibility(llvm::GlobalValue::HiddenVisibility);
d183 4
d188 2
a189 4
  // Make sure the data doesn't get deleted.
  CGM.addUsedGlobal(Data);
  return Data;
}
d191 1
a191 3
void CodeGenPGO::emitInstrumentationData() {
  if (!RegionCounters)
    return;
d193 22
a214 11
  // Build the data.
  auto *Data = buildDataVar();

  // Register the data.
  auto *RegisterBB = getOrInsertRegisterBB(CGM);
  if (!RegisterBB)
    return;
  CGBuilderTy Builder(RegisterBB->getTerminator());
  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  Builder.CreateCall(getOrInsertRuntimeRegister(CGM),
                     Builder.CreateBitCast(Data, VoidPtrTy));
d218 10
a227 2
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
    return nullptr;
d229 5
a233 13
  assert(CGM.getModule().getFunction("__llvm_profile_init") == nullptr &&
         "profile initialization already emitted");

  // Get the function to call at initialization.
  llvm::Constant *RegisterF = getRegisterFunc(CGM);
  if (!RegisterF)
    return nullptr;

  // Create the initialization function.
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *F = llvm::Function::Create(llvm::FunctionType::get(VoidTy, false),
                                   llvm::GlobalValue::InternalLinkage,
                                   "__llvm_profile_init", &CGM.getModule());
d235 1
d240 14
a253 4
  // Add the basic block and the necessary calls.
  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F));
  Builder.CreateCall(RegisterF);
  Builder.CreateRetVoid();
d259 2
a260 63
/// \brief Stable hasher for PGO region counters.
///
/// PGOHash produces a stable hash of a given function's control flow.
///
/// Changing the output of this hash will invalidate all previously generated
/// profiles -- i.e., don't do it.
///
/// \note  When this hash does eventually change (years?), we still need to
/// support old hashes.  We'll need to pull in the version number from the
/// profile data format and use the matching hash function.
class PGOHash {
  uint64_t Working;
  unsigned Count;
  llvm::MD5 MD5;

  static const int NumBitsPerType = 6;
  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
  static const unsigned TooBig = 1u << NumBitsPerType;

public:
  /// \brief Hash values for AST nodes.
  ///
  /// Distinct values for AST nodes that have region counters attached.
  ///
  /// These values must be stable.  All new members must be added at the end,
  /// and no members should be removed.  Changing the enumeration value for an
  /// AST node will affect the hash of every function that contains that node.
  enum HashType : unsigned char {
    None = 0,
    LabelStmt = 1,
    WhileStmt,
    DoStmt,
    ForStmt,
    CXXForRangeStmt,
    ObjCForCollectionStmt,
    SwitchStmt,
    CaseStmt,
    DefaultStmt,
    IfStmt,
    CXXTryStmt,
    CXXCatchStmt,
    ConditionalOperator,
    BinaryOperatorLAnd,
    BinaryOperatorLOr,
    BinaryConditionalOperator,

    // Keep this last.  It's for the static assert that follows.
    LastHashType
  };
  static_assert(LastHashType <= TooBig, "Too many types in HashType");

  // TODO: When this format changes, take in a version number here, and use the
  // old hash calculation for file formats that used the old hash.
  PGOHash() : Working(0), Count(0) {}
  void combine(HashType Type);
  uint64_t finalize();
};
const int PGOHash::NumBitsPerType;
const unsigned PGOHash::NumTypesPerWord;
const unsigned PGOHash::TooBig;

  /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
  struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
a262 2
    /// The function hash.
    PGOHash Hash;
d264 1
a264 1
    llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
d266 3
a268 2
    MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
        : NextCounter(0), CounterMap(CounterMap) {}
d270 4
a273 22
    // Blocks and lambdas are handled as separate functions, so we need not
    // traverse them in the parent context.
    bool TraverseBlockExpr(BlockExpr *BE) { return true; }
    bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
    bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }

    bool VisitDecl(const Decl *D) {
      switch (D->getKind()) {
      default:
        break;
      case Decl::Function:
      case Decl::CXXMethod:
      case Decl::CXXConstructor:
      case Decl::CXXDestructor:
      case Decl::CXXConversion:
      case Decl::ObjCMethod:
      case Decl::Block:
      case Decl::Captured:
        CounterMap[D->getBody()] = NextCounter++;
        break;
      }
      return true;
d275 1
d277 107
a383 8
    bool VisitStmt(const Stmt *S) {
      auto Type = getHashType(S);
      if (Type == PGOHash::None)
        return true;

      CounterMap[S] = NextCounter++;
      Hash.combine(Type);
      return true;
d385 5
a389 42
    PGOHash::HashType getHashType(const Stmt *S) {
      switch (S->getStmtClass()) {
      default:
        break;
      case Stmt::LabelStmtClass:
        return PGOHash::LabelStmt;
      case Stmt::WhileStmtClass:
        return PGOHash::WhileStmt;
      case Stmt::DoStmtClass:
        return PGOHash::DoStmt;
      case Stmt::ForStmtClass:
        return PGOHash::ForStmt;
      case Stmt::CXXForRangeStmtClass:
        return PGOHash::CXXForRangeStmt;
      case Stmt::ObjCForCollectionStmtClass:
        return PGOHash::ObjCForCollectionStmt;
      case Stmt::SwitchStmtClass:
        return PGOHash::SwitchStmt;
      case Stmt::CaseStmtClass:
        return PGOHash::CaseStmt;
      case Stmt::DefaultStmtClass:
        return PGOHash::DefaultStmt;
      case Stmt::IfStmtClass:
        return PGOHash::IfStmt;
      case Stmt::CXXTryStmtClass:
        return PGOHash::CXXTryStmt;
      case Stmt::CXXCatchStmtClass:
        return PGOHash::CXXCatchStmt;
      case Stmt::ConditionalOperatorClass:
        return PGOHash::ConditionalOperator;
      case Stmt::BinaryConditionalOperatorClass:
        return PGOHash::BinaryConditionalOperator;
      case Stmt::BinaryOperatorClass: {
        const BinaryOperator *BO = cast<BinaryOperator>(S);
        if (BO->getOpcode() == BO_LAnd)
          return PGOHash::BinaryOperatorLAnd;
        if (BO->getOpcode() == BO_LOr)
          return PGOHash::BinaryOperatorLOr;
        break;
      }
      }
      return PGOHash::None;
d404 1
a404 1
    llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
d406 1
a406 1
    /// BreakContinueStack - Keep counts of breaks and continues inside loops.
d414 4
a417 3
    ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
                        CodeGenPGO &PGO)
        : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
d421 1
a421 1
        CountMap[S] = PGO.getCurrentRegionCount();
d434 2
a435 3
    void VisitFunctionDecl(const FunctionDecl *D) {
      // Counter tracks entry to the function body.
      RegionCounter Cnt(PGO, D->getBody());
d437 2
a438 31
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    // Skip lambda expressions. We visit these as FunctionDecls when we're
    // generating them and aren't interested in the body when generating a
    // parent context.
    void VisitLambdaExpr(const LambdaExpr *LE) {}

    void VisitCapturedDecl(const CapturedDecl *D) {
      // Counter tracks entry to the capture body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
      // Counter tracks entry to the method body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    void VisitBlockDecl(const BlockDecl *D) {
      // Counter tracks entry to the block body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
a456 1
      // Counter tracks the block following the label.
d459 1
a459 1
      CountMap[S] = PGO.getCurrentRegionCount();
a480 1
      // Counter tracks the body of the loop.
d486 1
a486 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
d497 1
a497 1
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a505 1
      // Counter tracks the body of the loop.
d509 1
a509 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
d521 1
a521 1
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a531 1
      // Counter tracks the body of the loop.
d537 1
a537 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
d546 1
a546 1
        CountMap[S->getInc()] = PGO.getCurrentRegionCount();
d558 1
a558 1
        CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a569 1
      // Counter tracks the body of the loop.
d575 1
a575 1
      CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
d584 1
a584 1
      CountMap[S->getInc()] = PGO.getCurrentRegionCount();
d594 1
a594 1
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
a603 1
      // Counter tracks the body of the loop.
d607 1
a607 1
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
a624 1
      // Counter tracks the exit block of the switch.
a631 3
      // Counter for this particular case. This counts only jumps from the
      // switch header and does not include fallthrough from the case before
      // this one.
d634 1
a634 1
      CountMap[S] = Cnt.getCount();
a640 2
      // Counter for this default case. This does not include fallthrough from
      // the previous case.
d643 1
a643 1
      CountMap[S] = Cnt.getCount();
a649 2
      // Counter tracks the "then" part of an if statement. The count for
      // the "else" part, if it exists, will be calculated from this counter.
d654 1
a654 1
      CountMap[S->getThen()] = PGO.getCurrentRegionCount();
d660 1
a660 1
        CountMap[S->getElse()] = PGO.getCurrentRegionCount();
a672 1
      // Counter tracks the continuation block of the try statement.
a679 1
      // Counter tracks the catch statement's handler block.
d682 1
a682 1
      CountMap[S] = PGO.getCurrentRegionCount();
d686 1
a686 2
    void VisitAbstractConditionalOperator(
        const AbstractConditionalOperator *E) {
a687 2
      // Counter tracks the "true" part of a conditional operator. The
      // count in the "false" part will be calculated from this counter.
d692 1
a692 1
      CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
d697 1
a697 1
      CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
a706 1
      // Counter tracks the right hand side of a logical and operator.
d710 1
a710 1
      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
a718 1
      // Counter tracks the right hand side of a logical or operator.
d722 1
a722 1
      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
d731 1
a731 67
void PGOHash::combine(HashType Type) {
  // Check that we never combine 0 and only have six bits.
  assert(Type && "Hash is invalid: unexpected type 0");
  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");

  // Pass through MD5 if enough work has built up.
  if (Count && Count % NumTypesPerWord == 0) {
    using namespace llvm::support;
    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
    Working = 0;
  }

  // Accumulate the current type.
  ++Count;
  Working = Working << NumBitsPerType | Type;
}

uint64_t PGOHash::finalize() {
  // Use Working as the hash directly if we never used MD5.
  if (Count <= NumTypesPerWord)
    // No need to byte swap here, since none of the math was endian-dependent.
    // This number will be byte-swapped as required on endianness transitions,
    // so we will see the same value on the other side.
    return Working;

  // Check for remaining work in Working.
  if (Working)
    MD5.update(Working);

  // Finalize the MD5 and return the hash.
  llvm::MD5::MD5Result Result;
  MD5.final(Result);
  using namespace llvm::support;
  return endian::read<uint64_t, little, unaligned>(Result);
}

static void emitRuntimeHook(CodeGenModule &CGM) {
  const char *const RuntimeVarName = "__llvm_profile_runtime";
  const char *const RuntimeUserName = "__llvm_profile_runtime_user";
  if (CGM.getModule().getGlobalVariable(RuntimeVarName))
    return;

  // Declare the runtime hook.
  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  auto *Var = new llvm::GlobalVariable(CGM.getModule(), Int32Ty, false,
                                       llvm::GlobalValue::ExternalLinkage,
                                       nullptr, RuntimeVarName);

  // Make a function that uses it.
  auto *User = llvm::Function::Create(llvm::FunctionType::get(Int32Ty, false),
                                      llvm::GlobalValue::LinkOnceODRLinkage,
                                      RuntimeUserName, &CGM.getModule());
  User->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    User->addFnAttr(llvm::Attribute::NoRedZone);
  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", User));
  auto *Load = Builder.CreateLoad(Var);
  Builder.CreateRet(Load);

  // Create a use of the function.  Now the definition of the runtime variable
  // should get pulled in, along with any static initializears.
  CGM.addUsedGlobal(User);
}

void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) {
d733 2
a734 2
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
  if (!InstrumentRegions && !PGOReader)
d736 2
a737 1
  if (D->isImplicit())
a738 17
  setFuncName(Fn);

  // Set the linkage for variables based on the function linkage.  Usually, we
  // want to match it, but available_externally and extern_weak both have the
  // wrong semantics.
  VarLinkage = Fn->getLinkage();
  switch (VarLinkage) {
  case llvm::GlobalValue::ExternalWeakLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceAnyLinkage;
    break;
  case llvm::GlobalValue::AvailableExternallyLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceODRLinkage;
    break;
  default:
    break;
  }

d740 1
a740 2
  if (InstrumentRegions) {
    emitRuntimeHook(CGM);
d742 2
a743 3
  }
  if (PGOReader) {
    loadRegionCounts(PGOReader);
a744 1
    applyFunctionAttributes(PGOReader, Fn);
d749 2
a750 2
  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
  MapRegionCounters Walker(*RegionCounterMap);
d752 1
a752 8
    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
a753 1
  FunctionHash = Walker.Hash.finalize();
d757 2
a758 2
  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
  ComputeRegionCounts Walker(*StmtCountMap, *this);
a760 24
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    Walker.VisitObjCMethodDecl(MD);
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    Walker.VisitBlockDecl(BD);
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
}

void
CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
                                    llvm::Function *Fn) {
  if (!haveRegionCounts())
    return;

  uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount();
  uint64_t FunctionCount = getRegionCount(0);
  if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
    // Turn on InlineHint attribute for hot functions.
    // FIXME: 30% is from preliminary tuning on SPEC, it may not be optimal.
    Fn->addFnAttr(llvm::Attribute::InlineHint);
  else if (FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount))
    // Turn on Cold attribute for cold functions.
    // FIXME: 1% is from preliminary tuning on SPEC, it may not be optimal.
    Fn->addFnAttr(llvm::Attribute::Cold);
d768 2
a769 1
    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false, VarLinkage,
d771 1
a771 3
                             getFuncVarName("counters"));
  RegionCounters->setAlignment(8);
  RegionCounters->setSection(getCountersSection(CGM));
d775 1
a775 1
  if (!RegionCounters)
d784 10
a793 11
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader) {
  CGM.getPGOStats().Visited++;
  RegionCounts.reset(new std::vector<uint64_t>);
  uint64_t Hash;
  if (PGOReader->getFunctionCounts(getFuncName(), Hash, *RegionCounts)) {
    CGM.getPGOStats().Missing++;
    RegionCounts.reset();
  } else if (Hash != FunctionHash ||
             RegionCounts->size() != NumRegionCounters) {
    CGM.getPGOStats().Mismatched++;
    RegionCounts.reset();
d798 6
a803 28
  RegionCounterMap.reset();
  StmtCountMap.reset();
  RegionCounts.reset();
  RegionCounters = nullptr;
}

/// \brief Calculate what to divide by to scale weights.
///
/// Given the maximum weight, calculate a divisor that will scale all the
/// weights to strictly less than UINT32_MAX.
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
}

/// \brief Scale an individual branch weight (and add 1).
///
/// Scale a 64-bit weight down to 32-bits using \c Scale.
///
/// According to Laplace's Rule of Succession, it is better to compute the
/// weight based on the count plus 1, so universally add 1 to the value.
///
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
/// greater than \c Weight.
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
  assert(Scale && "scale by 0?");
  uint64_t Scaled = Weight / Scale + 1;
  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
  return Scaled;
a807 1
  // Check for empty weights.
d809 1
a809 4
    return nullptr;

  // Calculate how to scale down to 32-bits.
  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
d812 4
a815 2
  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
                                      scaleBranchWeight(FalseCount, Scale));
d819 4
a822 12
  // We need at least two elements to create meaningful weights.
  if (Weights.size() < 2)
    return nullptr;

  // Check for empty weights.
  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
  if (MaxWeight == 0)
    return nullptr;

  // Calculate how to scale down to 32-bits.
  uint64_t Scale = calculateWeightScale(MaxWeight);

d825 4
a828 4
  for (uint64_t W : Weights)
    ScaledWeights.push_back(scaleBranchWeight(W, Scale));

  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
d835 1
a835 1
    return nullptr;
d842 1
a842 1
    return nullptr;
@


1.1.1.5
log
@Import clang 3.6svn r215315.
@
text
@a15 1
#include "CoverageMappingGen.h"
d27 2
a28 3
void CodeGenPGO::setFuncName(StringRef Name,
                             llvm::GlobalValue::LinkageTypes Linkage) {
  RawFuncName = Name;
d36 1
a36 1
  if (!llvm::GlobalValue::isLocalLinkage(Linkage)) {
a51 21
void CodeGenPGO::setFuncName(llvm::Function *Fn) {
  setFuncName(Fn->getName(), Fn->getLinkage());
}

void CodeGenPGO::setVarLinkage(llvm::GlobalValue::LinkageTypes Linkage) {
  // Set the linkage for variables based on the function linkage.  Usually, we
  // want to match it, but available_externally and extern_weak both have the
  // wrong semantics.
  VarLinkage = Linkage;
  switch (VarLinkage) {
  case llvm::GlobalValue::ExternalWeakLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceAnyLinkage;
    break;
  case llvm::GlobalValue::AvailableExternallyLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceODRLinkage;
    break;
  default:
    break;
  }
}

d123 19
a141 28
  llvm::GlobalVariable *Data = nullptr;
  if (RegionCounters) {
    llvm::Type *DataTypes[] = {
      Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy
    };
    auto *DataTy = llvm::StructType::get(Ctx, makeArrayRef(DataTypes));
    llvm::Constant *DataVals[] = {
      llvm::ConstantInt::get(Int32Ty, getFuncName().size()),
      llvm::ConstantInt::get(Int32Ty, NumRegionCounters),
      llvm::ConstantInt::get(Int64Ty, FunctionHash),
      llvm::ConstantExpr::getBitCast(Name, Int8PtrTy),
      llvm::ConstantExpr::getBitCast(RegionCounters, Int64PtrTy)
    };
    Data =
      new llvm::GlobalVariable(CGM.getModule(), DataTy, true, VarLinkage,
                               llvm::ConstantStruct::get(DataTy, DataVals),
                               getFuncVarName("data"));

    // All the data should be packed into an array in its own section.
    Data->setSection(getDataSection(CGM));
    Data->setAlignment(8);
  }

  // Create coverage mapping data variable.
  if (!CoverageMapping.empty())
    CGM.getCoverageMapping()->addFunctionMappingRecord(Name,
                                                       getFuncName(),
                                                       CoverageMapping);
d148 2
a149 4
    if (Data) {
      Data->setVisibility(llvm::GlobalValue::HiddenVisibility);
      RegionCounters->setVisibility(llvm::GlobalValue::HiddenVisibility);
    }
d153 1
a153 1
  if (Data) CGM.addUsedGlobal(Data);
a809 14
void CodeGenPGO::checkGlobalDecl(GlobalDecl GD) {
  // Make sure we only emit coverage mapping for one constructor/destructor.
  // Clang emits several functions for the constructor and the destructor of
  // a class. Every function is instrumented, but we only want to provide
  // coverage for one of them. Because of that we only emit the coverage mapping
  // for the base constructor/destructor.
  if ((isa<CXXConstructorDecl>(GD.getDecl()) &&
       GD.getCtorType() != Ctor_Base) ||
      (isa<CXXDestructorDecl>(GD.getDecl()) &&
       GD.getDtorType() != Dtor_Base)) {
    SkipCoverageMapping = true;
  }
}

a816 1
  CGM.ClearUnusedCoverageMapping(D);
d818 15
a832 1
  setVarLinkage(Fn->getLinkage());
a837 2
    if (CGM.getCodeGenOpts().CoverageMapping)
      emitCounterRegionMapping(D);
d840 1
a840 2
    SourceManager &SM = CGM.getContext().getSourceManager();
    loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
a861 39
void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
  if (SkipCoverageMapping)
    return;
  // Don't map the functions inside the system headers
  auto Loc = D->getBody()->getLocStart();
  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
    return;

  llvm::raw_string_ostream OS(CoverageMapping);
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
                                CGM.getContext().getSourceManager(),
                                CGM.getLangOpts(), RegionCounterMap.get(),
                                NumRegionCounters);
  MappingGen.emitCounterMapping(D, OS);
  OS.flush();
}

void
CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
                                    llvm::GlobalValue::LinkageTypes Linkage) {
  if (SkipCoverageMapping)
    return;
  setFuncName(FuncName, Linkage);
  setVarLinkage(Linkage);

  // Don't map the functions inside the system headers
  auto Loc = D->getBody()->getLocStart();
  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
    return;

  llvm::raw_string_ostream OS(CoverageMapping);
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
                                CGM.getContext().getSourceManager(),
                                CGM.getLangOpts());
  MappingGen.emitEmptyMapping(D, OS);
  OS.flush();
  buildDataVar();
}

d915 2
a916 3
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                                  bool IsInMainFile) {
  CGM.getPGOStats().addVisited(IsInMainFile);
d918 7
a924 9
  if (std::error_code EC = PGOReader->getFunctionCounts(
          getFuncName(), FunctionHash, *RegionCounts)) {
    if (EC == llvm::instrprof_error::unknown_function)
      CGM.getPGOStats().addMissing(IsInMainFile);
    else if (EC == llvm::instrprof_error::hash_mismatch)
      CGM.getPGOStats().addMismatched(IsInMainFile);
    else if (EC == llvm::instrprof_error::malformed)
      // TODO: Consider a more specific warning for this case.
      CGM.getPGOStats().addMismatched(IsInMainFile);
@


1.1.1.5.2.1
log
@Update LLVM to 3.6.1, requested by joerg in ticket 824.
@
text
@a18 1
#include "llvm/IR/Intrinsics.h"
d30 1
a30 1
  StringRef RawFuncName = Name;
d38 3
a40 10
  FuncName = RawFuncName;
  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
    // For local symbols, prepend the main file name to distinguish them.
    // Do not include the full path in the file name since there's no guarantee
    // that it will stay the same, e.g., if the files are checked out from
    // version control in different locations.
    if (CGM.getCodeGenOpts().MainFileName.empty())
      FuncName = FuncName.insert(0, "<unknown>:");
    else
      FuncName = FuncName.insert(0, CGM.getCodeGenOpts().MainFileName + ":");
d43 9
a51 3
  // If we're generating a profile, create a variable for the name.
  if (CGM.getCodeGenOpts().ProfileInstrGenerate)
    createFuncNameVar(Linkage);
d58 178
a235 17
void CodeGenPGO::createFuncNameVar(llvm::GlobalValue::LinkageTypes Linkage) {
  // Usually, we want to match the function's linkage, but
  // available_externally and extern_weak both have the wrong semantics.
  if (Linkage == llvm::GlobalValue::ExternalWeakLinkage)
    Linkage = llvm::GlobalValue::LinkOnceAnyLinkage;
  else if (Linkage == llvm::GlobalValue::AvailableExternallyLinkage)
    Linkage = llvm::GlobalValue::LinkOnceODRLinkage;

  auto *Value =
      llvm::ConstantDataArray::getString(CGM.getLLVMContext(), FuncName, false);
  FuncNameVar =
      new llvm::GlobalVariable(CGM.getModule(), Value->getType(), true, Linkage,
                               Value, "__llvm_profile_name_" + FuncName);

  // Hide the symbol so that we correctly get a copy for each executable.
  if (!llvm::GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
    FuncNameVar->setVisibility(llvm::GlobalValue::HiddenVisibility);
d815 29
d867 1
d870 6
a875 2
  if (CGM.getCodeGenOpts().CoverageMapping)
    emitCounterRegionMapping(D);
a907 1
  std::string CoverageMapping;
d911 2
a912 1
                                CGM.getLangOpts(), RegionCounterMap.get());
a914 6

  if (CoverageMapping.empty())
    return;

  CGM.getCoverageMapping()->addFunctionMappingRecord(
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
d923 1
a929 1
  std::string CoverageMapping;
d936 1
a936 6

  if (CoverageMapping.empty())
    return;

  CGM.getCoverageMapping()->addFunctionMappingRecord(
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
d970 12
d983 1
a983 1
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
d985 5
a989 8
  if (!Builder.GetInsertPoint())
    return;
  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  Builder.CreateCall4(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
                      llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
                      Builder.getInt64(FunctionHash),
                      Builder.getInt32(NumRegionCounters),
                      Builder.getInt32(Counter));
d995 3
a997 3
  RegionCounts.clear();
  if (std::error_code EC =
          PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
d1005 1
a1005 1
    RegionCounts.clear();
d1009 7
@


1.1.1.6
log
@Import Clang 3.6RC1 r227398.
@
text
@a18 1
#include "llvm/IR/Intrinsics.h"
d30 1
a30 1
  StringRef RawFuncName = Name;
d38 3
a40 10
  FuncName = RawFuncName;
  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
    // For local symbols, prepend the main file name to distinguish them.
    // Do not include the full path in the file name since there's no guarantee
    // that it will stay the same, e.g., if the files are checked out from
    // version control in different locations.
    if (CGM.getCodeGenOpts().MainFileName.empty())
      FuncName = FuncName.insert(0, "<unknown>:");
    else
      FuncName = FuncName.insert(0, CGM.getCodeGenOpts().MainFileName + ":");
d43 9
a51 3
  // If we're generating a profile, create a variable for the name.
  if (CGM.getCodeGenOpts().ProfileInstrGenerate)
    createFuncNameVar(Linkage);
d58 178
a235 17
void CodeGenPGO::createFuncNameVar(llvm::GlobalValue::LinkageTypes Linkage) {
  // Usually, we want to match the function's linkage, but
  // available_externally and extern_weak both have the wrong semantics.
  if (Linkage == llvm::GlobalValue::ExternalWeakLinkage)
    Linkage = llvm::GlobalValue::LinkOnceAnyLinkage;
  else if (Linkage == llvm::GlobalValue::AvailableExternallyLinkage)
    Linkage = llvm::GlobalValue::LinkOnceODRLinkage;

  auto *Value =
      llvm::ConstantDataArray::getString(CGM.getLLVMContext(), FuncName, false);
  FuncNameVar =
      new llvm::GlobalVariable(CGM.getModule(), Value->getType(), true, Linkage,
                               Value, "__llvm_profile_name_" + FuncName);

  // Hide the symbol so that we correctly get a copy for each executable.
  if (!llvm::GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
    FuncNameVar->setVisibility(llvm::GlobalValue::HiddenVisibility);
d815 29
d867 1
d870 6
a875 2
  if (CGM.getCodeGenOpts().CoverageMapping)
    emitCounterRegionMapping(D);
a907 1
  std::string CoverageMapping;
d911 2
a912 1
                                CGM.getLangOpts(), RegionCounterMap.get());
a914 6

  if (CoverageMapping.empty())
    return;

  CGM.getCoverageMapping()->addFunctionMappingRecord(
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
d923 1
a929 1
  std::string CoverageMapping;
d936 1
a936 6

  if (CoverageMapping.empty())
    return;

  CGM.getCoverageMapping()->addFunctionMappingRecord(
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
d970 12
d983 1
a983 1
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
d985 5
a989 8
  if (!Builder.GetInsertPoint())
    return;
  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  Builder.CreateCall4(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
                      llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
                      Builder.getInt64(FunctionHash),
                      Builder.getInt32(NumRegionCounters),
                      Builder.getInt32(Counter));
d995 3
a997 3
  RegionCounts.clear();
  if (std::error_code EC =
          PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
d1005 1
a1005 1
    RegionCounts.clear();
d1009 7
@


1.1.1.7
log
@Import Clang 3.8.0rc3 r261930.
@
text
@d31 19
a49 4
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
  FuncName = llvm::getPGOFuncName(
      Name, Linkage, CGM.getCodeGenOpts().MainFileName,
      PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
d53 1
a53 1
    FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
d60 19
d141 34
a174 32
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
  /// The next counter value to assign.
  unsigned NextCounter;
  /// The function hash.
  PGOHash Hash;
  /// The map of statements to counters.
  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;

  MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
      : NextCounter(0), CounterMap(CounterMap) {}

  // Blocks and lambdas are handled as separate functions, so we need not
  // traverse them in the parent context.
  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
  bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }

  bool VisitDecl(const Decl *D) {
    switch (D->getKind()) {
    default:
      break;
    case Decl::Function:
    case Decl::CXXMethod:
    case Decl::CXXConstructor:
    case Decl::CXXDestructor:
    case Decl::CXXConversion:
    case Decl::ObjCMethod:
    case Decl::Block:
    case Decl::Captured:
      CounterMap[D->getBody()] = NextCounter++;
      break;
a175 2
    return true;
  }
d177 7
a183 3
  bool VisitStmt(const Stmt *S) {
    auto Type = getHashType(S);
    if (Type == PGOHash::None)
d185 45
d231 30
a260 43
    CounterMap[S] = NextCounter++;
    Hash.combine(Type);
    return true;
  }
  PGOHash::HashType getHashType(const Stmt *S) {
    switch (S->getStmtClass()) {
    default:
      break;
    case Stmt::LabelStmtClass:
      return PGOHash::LabelStmt;
    case Stmt::WhileStmtClass:
      return PGOHash::WhileStmt;
    case Stmt::DoStmtClass:
      return PGOHash::DoStmt;
    case Stmt::ForStmtClass:
      return PGOHash::ForStmt;
    case Stmt::CXXForRangeStmtClass:
      return PGOHash::CXXForRangeStmt;
    case Stmt::ObjCForCollectionStmtClass:
      return PGOHash::ObjCForCollectionStmt;
    case Stmt::SwitchStmtClass:
      return PGOHash::SwitchStmt;
    case Stmt::CaseStmtClass:
      return PGOHash::CaseStmt;
    case Stmt::DefaultStmtClass:
      return PGOHash::DefaultStmt;
    case Stmt::IfStmtClass:
      return PGOHash::IfStmt;
    case Stmt::CXXTryStmtClass:
      return PGOHash::CXXTryStmt;
    case Stmt::CXXCatchStmtClass:
      return PGOHash::CXXCatchStmt;
    case Stmt::ConditionalOperatorClass:
      return PGOHash::ConditionalOperator;
    case Stmt::BinaryConditionalOperatorClass:
      return PGOHash::BinaryConditionalOperator;
    case Stmt::BinaryOperatorClass: {
      const BinaryOperator *BO = cast<BinaryOperator>(S);
      if (BO->getOpcode() == BO_LAnd)
        return PGOHash::BinaryOperatorLAnd;
      if (BO->getOpcode() == BO_LOr)
        return PGOHash::BinaryOperatorLOr;
      break;
d262 7
a269 3
    return PGOHash::None;
  }
};
d271 7
a277 23
/// A StmtVisitor that propagates the raw counts through the AST and
/// records the count at statements where the value may change.
struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
  /// PGO state.
  CodeGenPGO &PGO;

  /// A flag that is set when the current count should be recorded on the
  /// next statement, such as at the exit of a loop.
  bool RecordNextStmtCount;

  /// The count at the current location in the traversal.
  uint64_t CurrentCount;

  /// The map of statements to count values.
  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;

  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
  struct BreakContinue {
    uint64_t BreakCount;
    uint64_t ContinueCount;
    BreakContinue() : BreakCount(0), ContinueCount(0) {}
  };
  SmallVector<BreakContinue, 8> BreakContinueStack;
d279 11
a289 8
  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
                      CodeGenPGO &PGO)
      : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}

  void RecordStmtCount(const Stmt *S) {
    if (RecordNextStmtCount) {
      CountMap[S] = CurrentCount;
      RecordNextStmtCount = false;
a290 1
  }
d292 7
a298 5
  /// Set and return the current count.
  uint64_t setCount(uint64_t Count) {
    CurrentCount = Count;
    return Count;
  }
d300 7
a306 6
  void VisitStmt(const Stmt *S) {
    RecordStmtCount(S);
    for (const Stmt *Child : S->children())
      if (Child)
        this->Visit(Child);
  }
d308 7
a314 6
  void VisitFunctionDecl(const FunctionDecl *D) {
    // Counter tracks entry to the function body.
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    CountMap[D->getBody()] = BodyCount;
    Visit(D->getBody());
  }
d316 5
a320 11
  // Skip lambda expressions. We visit these as FunctionDecls when we're
  // generating them and aren't interested in the body when generating a
  // parent context.
  void VisitLambdaExpr(const LambdaExpr *LE) {}

  void VisitCapturedDecl(const CapturedDecl *D) {
    // Counter tracks entry to the capture body.
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    CountMap[D->getBody()] = BodyCount;
    Visit(D->getBody());
  }
d322 8
a329 6
  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
    // Counter tracks entry to the method body.
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    CountMap[D->getBody()] = BodyCount;
    Visit(D->getBody());
  }
d331 7
a337 6
  void VisitBlockDecl(const BlockDecl *D) {
    // Counter tracks entry to the block body.
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    CountMap[D->getBody()] = BodyCount;
    Visit(D->getBody());
  }
d339 7
a345 7
  void VisitReturnStmt(const ReturnStmt *S) {
    RecordStmtCount(S);
    if (S->getRetValue())
      Visit(S->getRetValue());
    CurrentCount = 0;
    RecordNextStmtCount = true;
  }
d347 25
a371 7
  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
    RecordStmtCount(E);
    if (E->getSubExpr())
      Visit(E->getSubExpr());
    CurrentCount = 0;
    RecordNextStmtCount = true;
  }
d373 24
a396 5
  void VisitGotoStmt(const GotoStmt *S) {
    RecordStmtCount(S);
    CurrentCount = 0;
    RecordNextStmtCount = true;
  }
d398 38
a435 7
  void VisitLabelStmt(const LabelStmt *S) {
    RecordNextStmtCount = false;
    // Counter tracks the block following the label.
    uint64_t BlockCount = setCount(PGO.getRegionCount(S));
    CountMap[S] = BlockCount;
    Visit(S->getSubStmt());
  }
d437 22
a458 7
  void VisitBreakStmt(const BreakStmt *S) {
    RecordStmtCount(S);
    assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
    BreakContinueStack.back().BreakCount += CurrentCount;
    CurrentCount = 0;
    RecordNextStmtCount = true;
  }
d460 1
a460 7
  void VisitContinueStmt(const ContinueStmt *S) {
    RecordStmtCount(S);
    assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
    BreakContinueStack.back().ContinueCount += CurrentCount;
    CurrentCount = 0;
    RecordNextStmtCount = true;
  }
d462 10
a471 24
  void VisitWhileStmt(const WhileStmt *S) {
    RecordStmtCount(S);
    uint64_t ParentCount = CurrentCount;

    BreakContinueStack.push_back(BreakContinue());
    // Visit the body region first so the break/continue adjustments can be
    // included when visiting the condition.
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    CountMap[S->getBody()] = CurrentCount;
    Visit(S->getBody());
    uint64_t BackedgeCount = CurrentCount;

    // ...then go back and propagate counts through the condition. The count
    // at the start of the condition is the sum of the incoming edges,
    // the backedge from the end of the loop body, and the edges from
    // continue statements.
    BreakContinue BC = BreakContinueStack.pop_back_val();
    uint64_t CondCount =
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
    CountMap[S->getCond()] = CondCount;
    Visit(S->getCond());
    setCount(BC.BreakCount + CondCount - BodyCount);
    RecordNextStmtCount = true;
  }
d473 13
a485 43
  void VisitDoStmt(const DoStmt *S) {
    RecordStmtCount(S);
    uint64_t LoopCount = PGO.getRegionCount(S);

    BreakContinueStack.push_back(BreakContinue());
    // The count doesn't include the fallthrough from the parent scope. Add it.
    uint64_t BodyCount = setCount(LoopCount + CurrentCount);
    CountMap[S->getBody()] = BodyCount;
    Visit(S->getBody());
    uint64_t BackedgeCount = CurrentCount;

    BreakContinue BC = BreakContinueStack.pop_back_val();
    // The count at the start of the condition is equal to the count at the
    // end of the body, plus any continues.
    uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
    CountMap[S->getCond()] = CondCount;
    Visit(S->getCond());
    setCount(BC.BreakCount + CondCount - LoopCount);
    RecordNextStmtCount = true;
  }

  void VisitForStmt(const ForStmt *S) {
    RecordStmtCount(S);
    if (S->getInit())
      Visit(S->getInit());

    uint64_t ParentCount = CurrentCount;

    BreakContinueStack.push_back(BreakContinue());
    // Visit the body region first. (This is basically the same as a while
    // loop; see further comments in VisitWhileStmt.)
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    CountMap[S->getBody()] = BodyCount;
    Visit(S->getBody());
    uint64_t BackedgeCount = CurrentCount;
    BreakContinue BC = BreakContinueStack.pop_back_val();

    // The increment is essentially part of the body but it needs to include
    // the count for all the continue statements.
    if (S->getInc()) {
      uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
      CountMap[S->getInc()] = IncCount;
      Visit(S->getInc());
d488 2
a489 5
    // ...then go back and propagate counts through the condition.
    uint64_t CondCount =
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
    if (S->getCond()) {
      CountMap[S->getCond()] = CondCount;
d491 11
a502 3
    setCount(BC.BreakCount + CondCount - BodyCount);
    RecordNextStmtCount = true;
  }
d504 11
a514 30
  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
    RecordStmtCount(S);
    Visit(S->getLoopVarStmt());
    Visit(S->getRangeStmt());
    Visit(S->getBeginEndStmt());

    uint64_t ParentCount = CurrentCount;
    BreakContinueStack.push_back(BreakContinue());
    // Visit the body region first. (This is basically the same as a while
    // loop; see further comments in VisitWhileStmt.)
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    CountMap[S->getBody()] = BodyCount;
    Visit(S->getBody());
    uint64_t BackedgeCount = CurrentCount;
    BreakContinue BC = BreakContinueStack.pop_back_val();

    // The increment is essentially part of the body but it needs to include
    // the count for all the continue statements.
    uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
    CountMap[S->getInc()] = IncCount;
    Visit(S->getInc());

    // ...then go back and propagate counts through the condition.
    uint64_t CondCount =
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
    CountMap[S->getCond()] = CondCount;
    Visit(S->getCond());
    setCount(BC.BreakCount + CondCount - BodyCount);
    RecordNextStmtCount = true;
  }
d516 10
a525 16
  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
    RecordStmtCount(S);
    Visit(S->getElement());
    uint64_t ParentCount = CurrentCount;
    BreakContinueStack.push_back(BreakContinue());
    // Counter tracks the body of the loop.
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    CountMap[S->getBody()] = BodyCount;
    Visit(S->getBody());
    uint64_t BackedgeCount = CurrentCount;
    BreakContinue BC = BreakContinueStack.pop_back_val();

    setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
             BodyCount);
    RecordNextStmtCount = true;
  }
d527 6
a532 14
  void VisitSwitchStmt(const SwitchStmt *S) {
    RecordStmtCount(S);
    Visit(S->getCond());
    CurrentCount = 0;
    BreakContinueStack.push_back(BreakContinue());
    Visit(S->getBody());
    // If the switch is inside a loop, add the continue counts.
    BreakContinue BC = BreakContinueStack.pop_back_val();
    if (!BreakContinueStack.empty())
      BreakContinueStack.back().ContinueCount += BC.ContinueCount;
    // Counter tracks the exit block of the switch.
    setCount(PGO.getRegionCount(S));
    RecordNextStmtCount = true;
  }
d534 14
a547 13
  void VisitSwitchCase(const SwitchCase *S) {
    RecordNextStmtCount = false;
    // Counter for this particular case. This counts only jumps from the
    // switch header and does not include fallthrough from the case before
    // this one.
    uint64_t CaseCount = PGO.getRegionCount(S);
    setCount(CurrentCount + CaseCount);
    // We need the count without fallthrough in the mapping, so it's more useful
    // for branch probabilities.
    CountMap[S] = CaseCount;
    RecordNextStmtCount = true;
    Visit(S->getSubStmt());
  }
d549 10
a558 23
  void VisitIfStmt(const IfStmt *S) {
    RecordStmtCount(S);
    uint64_t ParentCount = CurrentCount;
    Visit(S->getCond());

    // Counter tracks the "then" part of an if statement. The count for
    // the "else" part, if it exists, will be calculated from this counter.
    uint64_t ThenCount = setCount(PGO.getRegionCount(S));
    CountMap[S->getThen()] = ThenCount;
    Visit(S->getThen());
    uint64_t OutCount = CurrentCount;

    uint64_t ElseCount = ParentCount - ThenCount;
    if (S->getElse()) {
      setCount(ElseCount);
      CountMap[S->getElse()] = ElseCount;
      Visit(S->getElse());
      OutCount += CurrentCount;
    } else
      OutCount += ElseCount;
    setCount(OutCount);
    RecordNextStmtCount = true;
  }
d560 8
a567 9
  void VisitCXXTryStmt(const CXXTryStmt *S) {
    RecordStmtCount(S);
    Visit(S->getTryBlock());
    for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
      Visit(S->getHandler(I));
    // Counter tracks the continuation block of the try statement.
    setCount(PGO.getRegionCount(S));
    RecordNextStmtCount = true;
  }
d569 17
a585 7
  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
    RecordNextStmtCount = false;
    // Counter tracks the catch statement's handler block.
    uint64_t CatchCount = setCount(PGO.getRegionCount(S));
    CountMap[S] = CatchCount;
    Visit(S->getHandlerBlock());
  }
d587 3
a589 16
  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
    RecordStmtCount(E);
    uint64_t ParentCount = CurrentCount;
    Visit(E->getCond());

    // Counter tracks the "true" part of a conditional operator. The
    // count in the "false" part will be calculated from this counter.
    uint64_t TrueCount = setCount(PGO.getRegionCount(E));
    CountMap[E->getTrueExpr()] = TrueCount;
    Visit(E->getTrueExpr());
    uint64_t OutCount = CurrentCount;

    uint64_t FalseCount = setCount(ParentCount - TrueCount);
    CountMap[E->getFalseExpr()] = FalseCount;
    Visit(E->getFalseExpr());
    OutCount += CurrentCount;
d591 12
a602 3
    setCount(OutCount);
    RecordNextStmtCount = true;
  }
d604 14
a617 25
  void VisitBinLAnd(const BinaryOperator *E) {
    RecordStmtCount(E);
    uint64_t ParentCount = CurrentCount;
    Visit(E->getLHS());
    // Counter tracks the right hand side of a logical and operator.
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
    CountMap[E->getRHS()] = RHSCount;
    Visit(E->getRHS());
    setCount(ParentCount + RHSCount - CurrentCount);
    RecordNextStmtCount = true;
  }

  void VisitBinLOr(const BinaryOperator *E) {
    RecordStmtCount(E);
    uint64_t ParentCount = CurrentCount;
    Visit(E->getLHS());
    // Counter tracks the right hand side of a logical or operator.
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
    CountMap[E->getRHS()] = RHSCount;
    Visit(E->getRHS());
    setCount(ParentCount + RHSCount - CurrentCount);
    RecordNextStmtCount = true;
  }
};
} // end anonymous namespace
d656 15
a670 2
void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
  const Decl *D = GD.getDecl();
a676 10
  // Constructors and destructors may be represented by several functions in IR.
  // If so, instrument only base variant, others are implemented by delegation
  // to the base one, it would be counted twice otherwise.
  if (CGM.getTarget().getCXXABI().hasConstructorVariants() &&
      ((isa<CXXConstructorDecl>(GD.getDecl()) &&
        GD.getCtorType() != Ctor_Base) ||
       (isa<CXXDestructorDecl>(GD.getDecl()) &&
        GD.getDtorType() != Dtor_Base))) {
      return;
  }
d731 1
a731 1
CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
d735 2
a752 1
  setFuncName(Name, Linkage);
d754 1
a754 1
      FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
d776 10
a785 2
  uint64_t FunctionCount = getRegionCount(nullptr);
  Fn->setEntryCount(FunctionCount);
d788 1
a788 1
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
d791 1
a791 1
  if (!Builder.GetInsertBlock())
a792 2

  unsigned Counter = (*RegionCounterMap)[S];
d794 2
a795 2
  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
                     {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
d798 1
a798 1
                      Builder.getInt32(Counter)});
d842 2
a843 2
llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
                                                    uint64_t FalseCount) {
d856 1
a856 2
llvm::MDNode *
CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
d878 3
a880 3
llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
                                                           uint64_t LoopCount) {
  if (!PGO.haveRegionCounts())
d882 6
a887 3
  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
  assert(CondCount.hasValue() && "missing expected loop condition count");
  if (*CondCount == 0)
d889 2
a890 2
  return createProfileWeights(LoopCount,
                              std::max(*CondCount, LoopCount) - LoopCount);
@


1.1.1.7.2.1
log
@Sync with HEAD
@
text
@d21 1
a25 4
static llvm::cl::opt<bool> EnableValueProfiling(
  "enable-value-profiling", llvm::cl::ZeroOrMore,
  llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));

d37 1
a37 1
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
a42 2
  // Create PGOFuncName meta data.
  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
d409 1
a409 2
    Visit(S->getBeginStmt());
    Visit(S->getEndStmt());
a454 2
    if (S->getInit())
      Visit(S->getInit());
a484 2
    if (S->getInit())
      Visit(S->getInit());
d610 1
a610 1
  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
d656 1
a656 1
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
d658 2
a659 4
    return true;

  // Don't map the functions in system headers.
  const auto &SM = CGM.getContext().getSourceManager();
d661 1
a661 5
  return SM.isInSystemHeader(Loc);
}

void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
  if (skipRegionMappingForDecl(D))
d682 5
a686 1
  if (skipRegionMappingForDecl(D))
d729 1
a729 1
  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
a742 50
// This method either inserts a call to the profile run-time during
// instrumentation or puts profile data into metadata for PGO use.
void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {

  if (!EnableValueProfiling)
    return;

  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
    return;

  if (isa<llvm::Constant>(ValuePtr))
    return;

  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
  if (InstrumentValueSites && RegionCounterMap) {
    auto BuilderInsertPoint = Builder.saveIP();
    Builder.SetInsertPoint(ValueSite);
    llvm::Value *Args[5] = {
        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
        Builder.getInt64(FunctionHash),
        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
        Builder.getInt32(ValueKind),
        Builder.getInt32(NumValueSites[ValueKind]++)
    };
    Builder.CreateCall(
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
    Builder.restoreIP(BuilderInsertPoint);
    return;
  }

  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
  if (PGOReader && haveRegionCounts()) {
    // We record the top most called three functions at each call site.
    // Profile metadata contains "VP" string identifying this metadata
    // as value profiling data, then a uint32_t value for the value profiling
    // kind, a uint64_t value for the total number of times the call is
    // executed, followed by the function hash and execution count (uint64_t)
    // pairs for each function.
    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
      return;

    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
                            (llvm::InstrProfValueKind)ValueKind,
                            NumValueSites[ValueKind]);

    NumValueSites[ValueKind]++;
  }
}

d747 3
a749 5
  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
  if (auto E = RecordExpected.takeError()) {
    auto IPE = llvm::InstrProfError::take(std::move(E));
    if (IPE == llvm::instrprof_error::unknown_function)
d751 1
a751 1
    else if (IPE == llvm::instrprof_error::hash_mismatch)
d753 1
a753 1
    else if (IPE == llvm::instrprof_error::malformed)
d756 1
a756 1
    return;
a757 3
  ProfRecord =
      llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
  RegionCounts = ProfRecord->Counts;
@


1.1.1.8
log
@Import Clang pre-4.0.0 r291444.
@
text
@d21 1
a25 4
static llvm::cl::opt<bool> EnableValueProfiling(
  "enable-value-profiling", llvm::cl::ZeroOrMore,
  llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));

d37 1
a37 1
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
a42 2
  // Create PGOFuncName meta data.
  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
d409 1
a409 2
    Visit(S->getBeginStmt());
    Visit(S->getEndStmt());
a454 2
    if (S->getInit())
      Visit(S->getInit());
a484 2
    if (S->getInit())
      Visit(S->getInit());
d610 1
a610 1
  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
d656 1
a656 1
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
d658 2
a659 4
    return true;

  // Don't map the functions in system headers.
  const auto &SM = CGM.getContext().getSourceManager();
d661 1
a661 5
  return SM.isInSystemHeader(Loc);
}

void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
  if (skipRegionMappingForDecl(D))
d682 5
a686 1
  if (skipRegionMappingForDecl(D))
d729 1
a729 1
  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
a742 50
// This method either inserts a call to the profile run-time during
// instrumentation or puts profile data into metadata for PGO use.
void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {

  if (!EnableValueProfiling)
    return;

  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
    return;

  if (isa<llvm::Constant>(ValuePtr))
    return;

  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
  if (InstrumentValueSites && RegionCounterMap) {
    auto BuilderInsertPoint = Builder.saveIP();
    Builder.SetInsertPoint(ValueSite);
    llvm::Value *Args[5] = {
        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
        Builder.getInt64(FunctionHash),
        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
        Builder.getInt32(ValueKind),
        Builder.getInt32(NumValueSites[ValueKind]++)
    };
    Builder.CreateCall(
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
    Builder.restoreIP(BuilderInsertPoint);
    return;
  }

  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
  if (PGOReader && haveRegionCounts()) {
    // We record the top most called three functions at each call site.
    // Profile metadata contains "VP" string identifying this metadata
    // as value profiling data, then a uint32_t value for the value profiling
    // kind, a uint64_t value for the total number of times the call is
    // executed, followed by the function hash and execution count (uint64_t)
    // pairs for each function.
    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
      return;

    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
                            (llvm::InstrProfValueKind)ValueKind,
                            NumValueSites[ValueKind]);

    NumValueSites[ValueKind]++;
  }
}

d747 3
a749 5
  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
  if (auto E = RecordExpected.takeError()) {
    auto IPE = llvm::InstrProfError::take(std::move(E));
    if (IPE == llvm::instrprof_error::unknown_function)
d751 1
a751 1
    else if (IPE == llvm::instrprof_error::hash_mismatch)
d753 1
a753 1
    else if (IPE == llvm::instrprof_error::malformed)
d756 1
a756 1
    return;
a757 3
  ProfRecord =
      llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
  RegionCounts = ProfRecord->Counts;
@


1.1.1.9
log
@Import clang r309604 from branches/release_50
@
text
@d615 1
a615 1
  return Result.low();
a619 3
  if (!D->hasBody())
    return;

d629 5
a633 2
  if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
    if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base)
a634 5

    if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
      if (GD.getCtorType() != Ctor_Base &&
          CodeGenFunction::IsConstructorDelegationValid(CCD))
        return;
d667 1
a667 1
  if (!D->getBody())
d740 1
a740 2
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
                                      llvm::Value *StepV) {
d748 5
a752 12

  llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
                         Builder.getInt64(FunctionHash),
                         Builder.getInt32(NumRegionCounters),
                         Builder.getInt32(Counter), StepV};
  if (!StepV)
    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
                       makeArrayRef(Args, 4));
  else
    Builder.CreateCall(
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
        makeArrayRef(Args));
@


1.1.1.9.4.1
log
@Sync with HEAD
@
text
@d25 3
a27 4
static llvm::cl::opt<bool>
    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
                         llvm::cl::desc("Enable value profiling"),
                         llvm::cl::Hidden, llvm::cl::init(false));
a49 9
/// The version of the PGO hash algorithm.
enum PGOHashVersion : unsigned {
  PGO_HASH_V1,
  PGO_HASH_V2,

  // Keep this set to the latest hash version.
  PGO_HASH_LATEST = PGO_HASH_V2
};

d51 1
a51 1
/// Stable hasher for PGO region counters.
a63 1
  PGOHashVersion HashVersion;
d71 1
a71 1
  /// Hash values for AST nodes.
a95 19
    // The preceding values are available with PGO_HASH_V1.

    EndOfScope,
    IfThenBranch,
    IfElseBranch,
    GotoStmt,
    IndirectGotoStmt,
    BreakStmt,
    ContinueStmt,
    ReturnStmt,
    ThrowExpr,
    UnaryOperatorLNot,
    BinaryOperatorLT,
    BinaryOperatorGT,
    BinaryOperatorLE,
    BinaryOperatorGE,
    BinaryOperatorEQ,
    BinaryOperatorNE,
    // The preceding values are available with PGO_HASH_V2.
d102 3
a104 2
  PGOHash(PGOHashVersion HashVersion)
      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
a106 1
  PGOHashVersion getHashVersion() const { return HashVersion; }
a111 8
/// Get the PGO hash version used in the given indexed profile.
static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
                                        CodeGenModule &CGM) {
  if (PGOReader->getVersion() <= 4)
    return PGO_HASH_V1;
  return PGO_HASH_V2;
}

a113 2
  using Base = RecursiveASTVisitor<MapRegionCounters>;

d121 2
a122 3
  MapRegionCounters(PGOHashVersion HashVersion,
                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {}
d148 4
a151 18
  /// If \p S gets a fresh counter, update the counter mappings. Return the
  /// V1 hash of \p S.
  PGOHash::HashType updateCounterMappings(Stmt *S) {
    auto Type = getHashType(PGO_HASH_V1, S);
    if (Type != PGOHash::None)
      CounterMap[S] = NextCounter++;
    return Type;
  }

  /// Include \p S in the function hash.
  bool VisitStmt(Stmt *S) {
    auto Type = updateCounterMappings(S);
    if (Hash.getHashVersion() != PGO_HASH_V1)
      Type = getHashType(Hash.getHashVersion(), S);
    if (Type != PGOHash::None)
      Hash.combine(Type);
    return true;
  }
d153 2
a154 17
  bool TraverseIfStmt(IfStmt *If) {
    // If we used the V1 hash, use the default traversal.
    if (Hash.getHashVersion() == PGO_HASH_V1)
      return Base::TraverseIfStmt(If);

    // Otherwise, keep track of which branch we're in while traversing.
    VisitStmt(If);
    for (Stmt *CS : If->children()) {
      if (!CS)
        continue;
      if (CS == If->getThen())
        Hash.combine(PGOHash::IfThenBranch);
      else if (CS == If->getElse())
        Hash.combine(PGOHash::IfElseBranch);
      TraverseStmt(CS);
    }
    Hash.combine(PGOHash::EndOfScope);
d157 1
a157 22

// If the statement type \p N is nestable, and its nesting impacts profile
// stability, define a custom traversal which tracks the end of the statement
// in the hash (provided we're not using the V1 hash).
#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
  bool Traverse##N(N *S) {                                                     \
    Base::Traverse##N(S);                                                      \
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
      Hash.combine(PGOHash::EndOfScope);                                       \
    return true;                                                               \
  }

  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)

  /// Get version \p HashVersion of the PGO hash for \p S.
  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
a194 18
      if (HashVersion == PGO_HASH_V2) {
        switch (BO->getOpcode()) {
        default:
          break;
        case BO_LT:
          return PGOHash::BinaryOperatorLT;
        case BO_GT:
          return PGOHash::BinaryOperatorGT;
        case BO_LE:
          return PGOHash::BinaryOperatorLE;
        case BO_GE:
          return PGOHash::BinaryOperatorGE;
        case BO_EQ:
          return PGOHash::BinaryOperatorEQ;
        case BO_NE:
          return PGOHash::BinaryOperatorNE;
        }
      }
a197 26

    if (HashVersion == PGO_HASH_V2) {
      switch (S->getStmtClass()) {
      default:
        break;
      case Stmt::GotoStmtClass:
        return PGOHash::GotoStmt;
      case Stmt::IndirectGotoStmtClass:
        return PGOHash::IndirectGotoStmt;
      case Stmt::BreakStmtClass:
        return PGOHash::BreakStmt;
      case Stmt::ContinueStmtClass:
        return PGOHash::ContinueStmt;
      case Stmt::ReturnStmtClass:
        return PGOHash::ReturnStmt;
      case Stmt::CXXThrowExprClass:
        return PGOHash::ThrowExpr;
      case Stmt::UnaryOperatorClass: {
        const UnaryOperator *UO = cast<UnaryOperator>(S);
        if (UO->getOpcode() == UO_LNot)
          return PGOHash::UnaryOperatorLNot;
        break;
      }
      }
    }

a655 6
  // Use the latest hash version when inserting instrumentation, but use the
  // version in the indexed profile if we're reading PGO data.
  PGOHashVersion HashVersion = PGO_HASH_LATEST;
  if (auto *PGOReader = CGM.getPGOReader())
    HashVersion = getPGOHashVersion(PGOReader, CGM);

d657 1
a657 1
  MapRegionCounters Walker(HashVersion, *RegionCounterMap);
d840 1
a840 1
/// Calculate what to divide by to scale weights.
d848 1
a848 1
/// Scale an individual branch weight (and add 1).
@


1.1.1.9.4.2
log
@Mostly merge changes from HEAD upto 20200411
@
text
@@


1.1.1.9.2.1
log
@Sync with HEAD
@
text
@d25 3
a27 4
static llvm::cl::opt<bool>
    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
                         llvm::cl::desc("Enable value profiling"),
                         llvm::cl::Hidden, llvm::cl::init(false));
a49 9
/// The version of the PGO hash algorithm.
enum PGOHashVersion : unsigned {
  PGO_HASH_V1,
  PGO_HASH_V2,

  // Keep this set to the latest hash version.
  PGO_HASH_LATEST = PGO_HASH_V2
};

d51 1
a51 1
/// Stable hasher for PGO region counters.
a63 1
  PGOHashVersion HashVersion;
d71 1
a71 1
  /// Hash values for AST nodes.
a95 19
    // The preceding values are available with PGO_HASH_V1.

    EndOfScope,
    IfThenBranch,
    IfElseBranch,
    GotoStmt,
    IndirectGotoStmt,
    BreakStmt,
    ContinueStmt,
    ReturnStmt,
    ThrowExpr,
    UnaryOperatorLNot,
    BinaryOperatorLT,
    BinaryOperatorGT,
    BinaryOperatorLE,
    BinaryOperatorGE,
    BinaryOperatorEQ,
    BinaryOperatorNE,
    // The preceding values are available with PGO_HASH_V2.
d102 3
a104 2
  PGOHash(PGOHashVersion HashVersion)
      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
a106 1
  PGOHashVersion getHashVersion() const { return HashVersion; }
a111 8
/// Get the PGO hash version used in the given indexed profile.
static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
                                        CodeGenModule &CGM) {
  if (PGOReader->getVersion() <= 4)
    return PGO_HASH_V1;
  return PGO_HASH_V2;
}

a113 2
  using Base = RecursiveASTVisitor<MapRegionCounters>;

d121 2
a122 3
  MapRegionCounters(PGOHashVersion HashVersion,
                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {}
d148 4
a151 18
  /// If \p S gets a fresh counter, update the counter mappings. Return the
  /// V1 hash of \p S.
  PGOHash::HashType updateCounterMappings(Stmt *S) {
    auto Type = getHashType(PGO_HASH_V1, S);
    if (Type != PGOHash::None)
      CounterMap[S] = NextCounter++;
    return Type;
  }

  /// Include \p S in the function hash.
  bool VisitStmt(Stmt *S) {
    auto Type = updateCounterMappings(S);
    if (Hash.getHashVersion() != PGO_HASH_V1)
      Type = getHashType(Hash.getHashVersion(), S);
    if (Type != PGOHash::None)
      Hash.combine(Type);
    return true;
  }
d153 2
a154 17
  bool TraverseIfStmt(IfStmt *If) {
    // If we used the V1 hash, use the default traversal.
    if (Hash.getHashVersion() == PGO_HASH_V1)
      return Base::TraverseIfStmt(If);

    // Otherwise, keep track of which branch we're in while traversing.
    VisitStmt(If);
    for (Stmt *CS : If->children()) {
      if (!CS)
        continue;
      if (CS == If->getThen())
        Hash.combine(PGOHash::IfThenBranch);
      else if (CS == If->getElse())
        Hash.combine(PGOHash::IfElseBranch);
      TraverseStmt(CS);
    }
    Hash.combine(PGOHash::EndOfScope);
d157 1
a157 22

// If the statement type \p N is nestable, and its nesting impacts profile
// stability, define a custom traversal which tracks the end of the statement
// in the hash (provided we're not using the V1 hash).
#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
  bool Traverse##N(N *S) {                                                     \
    Base::Traverse##N(S);                                                      \
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
      Hash.combine(PGOHash::EndOfScope);                                       \
    return true;                                                               \
  }

  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)

  /// Get version \p HashVersion of the PGO hash for \p S.
  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
a194 18
      if (HashVersion == PGO_HASH_V2) {
        switch (BO->getOpcode()) {
        default:
          break;
        case BO_LT:
          return PGOHash::BinaryOperatorLT;
        case BO_GT:
          return PGOHash::BinaryOperatorGT;
        case BO_LE:
          return PGOHash::BinaryOperatorLE;
        case BO_GE:
          return PGOHash::BinaryOperatorGE;
        case BO_EQ:
          return PGOHash::BinaryOperatorEQ;
        case BO_NE:
          return PGOHash::BinaryOperatorNE;
        }
      }
a197 26

    if (HashVersion == PGO_HASH_V2) {
      switch (S->getStmtClass()) {
      default:
        break;
      case Stmt::GotoStmtClass:
        return PGOHash::GotoStmt;
      case Stmt::IndirectGotoStmtClass:
        return PGOHash::IndirectGotoStmt;
      case Stmt::BreakStmtClass:
        return PGOHash::BreakStmt;
      case Stmt::ContinueStmtClass:
        return PGOHash::ContinueStmt;
      case Stmt::ReturnStmtClass:
        return PGOHash::ReturnStmt;
      case Stmt::CXXThrowExprClass:
        return PGOHash::ThrowExpr;
      case Stmt::UnaryOperatorClass: {
        const UnaryOperator *UO = cast<UnaryOperator>(S);
        if (UO->getOpcode() == UO_LNot)
          return PGOHash::UnaryOperatorLNot;
        break;
      }
      }
    }

a655 6
  // Use the latest hash version when inserting instrumentation, but use the
  // version in the indexed profile if we're reading PGO data.
  PGOHashVersion HashVersion = PGO_HASH_LATEST;
  if (auto *PGOReader = CGM.getPGOReader())
    HashVersion = getPGOHashVersion(PGOReader, CGM);

d657 1
a657 1
  MapRegionCounters Walker(HashVersion, *RegionCounterMap);
d840 1
a840 1
/// Calculate what to divide by to scale weights.
d848 1
a848 1
/// Scale an individual branch weight (and add 1).
@


1.1.1.10
log
@Import clang r337282 from trunk
@
text
@d25 3
a27 4
static llvm::cl::opt<bool>
    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
                         llvm::cl::desc("Enable value profiling"),
                         llvm::cl::Hidden, llvm::cl::init(false));
a49 9
/// The version of the PGO hash algorithm.
enum PGOHashVersion : unsigned {
  PGO_HASH_V1,
  PGO_HASH_V2,

  // Keep this set to the latest hash version.
  PGO_HASH_LATEST = PGO_HASH_V2
};

d51 1
a51 1
/// Stable hasher for PGO region counters.
a63 1
  PGOHashVersion HashVersion;
d71 1
a71 1
  /// Hash values for AST nodes.
a95 19
    // The preceding values are available with PGO_HASH_V1.

    EndOfScope,
    IfThenBranch,
    IfElseBranch,
    GotoStmt,
    IndirectGotoStmt,
    BreakStmt,
    ContinueStmt,
    ReturnStmt,
    ThrowExpr,
    UnaryOperatorLNot,
    BinaryOperatorLT,
    BinaryOperatorGT,
    BinaryOperatorLE,
    BinaryOperatorGE,
    BinaryOperatorEQ,
    BinaryOperatorNE,
    // The preceding values are available with PGO_HASH_V2.
d102 3
a104 2
  PGOHash(PGOHashVersion HashVersion)
      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
a106 1
  PGOHashVersion getHashVersion() const { return HashVersion; }
a111 8
/// Get the PGO hash version used in the given indexed profile.
static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
                                        CodeGenModule &CGM) {
  if (PGOReader->getVersion() <= 4)
    return PGO_HASH_V1;
  return PGO_HASH_V2;
}

a113 2
  using Base = RecursiveASTVisitor<MapRegionCounters>;

d121 2
a122 3
  MapRegionCounters(PGOHashVersion HashVersion,
                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {}
d148 4
a151 18
  /// If \p S gets a fresh counter, update the counter mappings. Return the
  /// V1 hash of \p S.
  PGOHash::HashType updateCounterMappings(Stmt *S) {
    auto Type = getHashType(PGO_HASH_V1, S);
    if (Type != PGOHash::None)
      CounterMap[S] = NextCounter++;
    return Type;
  }

  /// Include \p S in the function hash.
  bool VisitStmt(Stmt *S) {
    auto Type = updateCounterMappings(S);
    if (Hash.getHashVersion() != PGO_HASH_V1)
      Type = getHashType(Hash.getHashVersion(), S);
    if (Type != PGOHash::None)
      Hash.combine(Type);
    return true;
  }
d153 2
a154 17
  bool TraverseIfStmt(IfStmt *If) {
    // If we used the V1 hash, use the default traversal.
    if (Hash.getHashVersion() == PGO_HASH_V1)
      return Base::TraverseIfStmt(If);

    // Otherwise, keep track of which branch we're in while traversing.
    VisitStmt(If);
    for (Stmt *CS : If->children()) {
      if (!CS)
        continue;
      if (CS == If->getThen())
        Hash.combine(PGOHash::IfThenBranch);
      else if (CS == If->getElse())
        Hash.combine(PGOHash::IfElseBranch);
      TraverseStmt(CS);
    }
    Hash.combine(PGOHash::EndOfScope);
d157 1
a157 22

// If the statement type \p N is nestable, and its nesting impacts profile
// stability, define a custom traversal which tracks the end of the statement
// in the hash (provided we're not using the V1 hash).
#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
  bool Traverse##N(N *S) {                                                     \
    Base::Traverse##N(S);                                                      \
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
      Hash.combine(PGOHash::EndOfScope);                                       \
    return true;                                                               \
  }

  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)

  /// Get version \p HashVersion of the PGO hash for \p S.
  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
a194 18
      if (HashVersion == PGO_HASH_V2) {
        switch (BO->getOpcode()) {
        default:
          break;
        case BO_LT:
          return PGOHash::BinaryOperatorLT;
        case BO_GT:
          return PGOHash::BinaryOperatorGT;
        case BO_LE:
          return PGOHash::BinaryOperatorLE;
        case BO_GE:
          return PGOHash::BinaryOperatorGE;
        case BO_EQ:
          return PGOHash::BinaryOperatorEQ;
        case BO_NE:
          return PGOHash::BinaryOperatorNE;
        }
      }
a197 26

    if (HashVersion == PGO_HASH_V2) {
      switch (S->getStmtClass()) {
      default:
        break;
      case Stmt::GotoStmtClass:
        return PGOHash::GotoStmt;
      case Stmt::IndirectGotoStmtClass:
        return PGOHash::IndirectGotoStmt;
      case Stmt::BreakStmtClass:
        return PGOHash::BreakStmt;
      case Stmt::ContinueStmtClass:
        return PGOHash::ContinueStmt;
      case Stmt::ReturnStmtClass:
        return PGOHash::ReturnStmt;
      case Stmt::CXXThrowExprClass:
        return PGOHash::ThrowExpr;
      case Stmt::UnaryOperatorClass: {
        const UnaryOperator *UO = cast<UnaryOperator>(S);
        if (UO->getOpcode() == UO_LNot)
          return PGOHash::UnaryOperatorLNot;
        break;
      }
      }
    }

a655 6
  // Use the latest hash version when inserting instrumentation, but use the
  // version in the indexed profile if we're reading PGO data.
  PGOHashVersion HashVersion = PGO_HASH_LATEST;
  if (auto *PGOReader = CGM.getPGOReader())
    HashVersion = getPGOHashVersion(PGOReader, CGM);

d657 1
a657 1
  MapRegionCounters Walker(HashVersion, *RegionCounterMap);
d840 1
a840 1
/// Calculate what to divide by to scale weights.
d848 1
a848 1
/// Scale an individual branch weight (and add 1).
@


1.1.1.11
log
@Mark old LLVM instance as dead.
@
text
@@


1.1.1.5.4.1
log
@file CodeGenPGO.cpp was added on branch tls-maxphys on 2014-08-19 23:47:27 +0000
@
text
@d1 1089
@


1.1.1.5.4.2
log
@Rebase to HEAD as of a few days ago.
@
text
@a0 1089
//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Instrumentation-based profile-guided optimization
//
//===----------------------------------------------------------------------===//

#include "CodeGenPGO.h"
#include "CodeGenFunction.h"
#include "CoverageMappingGen.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"

using namespace clang;
using namespace CodeGen;

void CodeGenPGO::setFuncName(StringRef Name,
                             llvm::GlobalValue::LinkageTypes Linkage) {
  RawFuncName = Name;

  // Function names may be prefixed with a binary '1' to indicate
  // that the backend should not modify the symbols due to any platform
  // naming convention. Do not include that '1' in the PGO profile name.
  if (RawFuncName[0] == '\1')
    RawFuncName = RawFuncName.substr(1);

  if (!llvm::GlobalValue::isLocalLinkage(Linkage)) {
    PrefixedFuncName.reset(new std::string(RawFuncName));
    return;
  }

  // For local symbols, prepend the main file name to distinguish them.
  // Do not include the full path in the file name since there's no guarantee
  // that it will stay the same, e.g., if the files are checked out from
  // version control in different locations.
  PrefixedFuncName.reset(new std::string(CGM.getCodeGenOpts().MainFileName));
  if (PrefixedFuncName->empty())
    PrefixedFuncName->assign("<unknown>");
  PrefixedFuncName->append(":");
  PrefixedFuncName->append(RawFuncName);
}

void CodeGenPGO::setFuncName(llvm::Function *Fn) {
  setFuncName(Fn->getName(), Fn->getLinkage());
}

void CodeGenPGO::setVarLinkage(llvm::GlobalValue::LinkageTypes Linkage) {
  // Set the linkage for variables based on the function linkage.  Usually, we
  // want to match it, but available_externally and extern_weak both have the
  // wrong semantics.
  VarLinkage = Linkage;
  switch (VarLinkage) {
  case llvm::GlobalValue::ExternalWeakLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceAnyLinkage;
    break;
  case llvm::GlobalValue::AvailableExternallyLinkage:
    VarLinkage = llvm::GlobalValue::LinkOnceODRLinkage;
    break;
  default:
    break;
  }
}

static llvm::Function *getRegisterFunc(CodeGenModule &CGM) {
  return CGM.getModule().getFunction("__llvm_profile_register_functions");
}

static llvm::BasicBlock *getOrInsertRegisterBB(CodeGenModule &CGM) {
  // Don't do this for Darwin.  compiler-rt uses linker magic.
  if (CGM.getTarget().getTriple().isOSDarwin())
    return nullptr;

  // Only need to insert this once per module.
  if (llvm::Function *RegisterF = getRegisterFunc(CGM))
    return &RegisterF->getEntryBlock();

  // Construct the function.
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *RegisterFTy = llvm::FunctionType::get(VoidTy, false);
  auto *RegisterF = llvm::Function::Create(RegisterFTy,
                                           llvm::GlobalValue::InternalLinkage,
                                           "__llvm_profile_register_functions",
                                           &CGM.getModule());
  RegisterF->setUnnamedAddr(true);
  if (CGM.getCodeGenOpts().DisableRedZone)
    RegisterF->addFnAttr(llvm::Attribute::NoRedZone);

  // Construct and return the entry block.
  auto *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", RegisterF);
  CGBuilderTy Builder(BB);
  Builder.CreateRetVoid();
  return BB;
}

static llvm::Constant *getOrInsertRuntimeRegister(CodeGenModule &CGM) {
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
  return CGM.getModule().getOrInsertFunction("__llvm_profile_register_function",
                                             RuntimeRegisterTy);
}

static bool isMachO(const CodeGenModule &CGM) {
  return CGM.getTarget().getTriple().isOSBinFormatMachO();
}

static StringRef getCountersSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
}

static StringRef getNameSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
}

static StringRef getDataSection(const CodeGenModule &CGM) {
  return isMachO(CGM) ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
}

llvm::GlobalVariable *CodeGenPGO::buildDataVar() {
  // Create name variable.
  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  auto *VarName = llvm::ConstantDataArray::getString(Ctx, getFuncName(),
                                                     false);
  auto *Name = new llvm::GlobalVariable(CGM.getModule(), VarName->getType(),
                                        true, VarLinkage, VarName,
                                        getFuncVarName("name"));
  Name->setSection(getNameSection(CGM));
  Name->setAlignment(1);

  // Create data variable.
  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  auto *Int64Ty = llvm::Type::getInt64Ty(Ctx);
  auto *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
  auto *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
  llvm::GlobalVariable *Data = nullptr;
  if (RegionCounters) {
    llvm::Type *DataTypes[] = {
      Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy
    };
    auto *DataTy = llvm::StructType::get(Ctx, makeArrayRef(DataTypes));
    llvm::Constant *DataVals[] = {
      llvm::ConstantInt::get(Int32Ty, getFuncName().size()),
      llvm::ConstantInt::get(Int32Ty, NumRegionCounters),
      llvm::ConstantInt::get(Int64Ty, FunctionHash),
      llvm::ConstantExpr::getBitCast(Name, Int8PtrTy),
      llvm::ConstantExpr::getBitCast(RegionCounters, Int64PtrTy)
    };
    Data =
      new llvm::GlobalVariable(CGM.getModule(), DataTy, true, VarLinkage,
                               llvm::ConstantStruct::get(DataTy, DataVals),
                               getFuncVarName("data"));

    // All the data should be packed into an array in its own section.
    Data->setSection(getDataSection(CGM));
    Data->setAlignment(8);
  }

  // Create coverage mapping data variable.
  if (!CoverageMapping.empty())
    CGM.getCoverageMapping()->addFunctionMappingRecord(Name,
                                                       getFuncName(),
                                                       CoverageMapping);

  // Hide all these symbols so that we correctly get a copy for each
  // executable.  The profile format expects names and counters to be
  // contiguous, so references into shared objects would be invalid.
  if (!llvm::GlobalValue::isLocalLinkage(VarLinkage)) {
    Name->setVisibility(llvm::GlobalValue::HiddenVisibility);
    if (Data) {
      Data->setVisibility(llvm::GlobalValue::HiddenVisibility);
      RegionCounters->setVisibility(llvm::GlobalValue::HiddenVisibility);
    }
  }

  // Make sure the data doesn't get deleted.
  if (Data) CGM.addUsedGlobal(Data);
  return Data;
}

void CodeGenPGO::emitInstrumentationData() {
  if (!RegionCounters)
    return;

  // Build the data.
  auto *Data = buildDataVar();

  // Register the data.
  auto *RegisterBB = getOrInsertRegisterBB(CGM);
  if (!RegisterBB)
    return;
  CGBuilderTy Builder(RegisterBB->getTerminator());
  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
  Builder.CreateCall(getOrInsertRuntimeRegister(CGM),
                     Builder.CreateBitCast(Data, VoidPtrTy));
}

llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
    return nullptr;

  assert(CGM.getModule().getFunction("__llvm_profile_init") == nullptr &&
         "profile initialization already emitted");

  // Get the function to call at initialization.
  llvm::Constant *RegisterF = getRegisterFunc(CGM);
  if (!RegisterF)
    return nullptr;

  // Create the initialization function.
  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
  auto *F = llvm::Function::Create(llvm::FunctionType::get(VoidTy, false),
                                   llvm::GlobalValue::InternalLinkage,
                                   "__llvm_profile_init", &CGM.getModule());
  F->setUnnamedAddr(true);
  F->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    F->addFnAttr(llvm::Attribute::NoRedZone);

  // Add the basic block and the necessary calls.
  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F));
  Builder.CreateCall(RegisterF);
  Builder.CreateRetVoid();

  return F;
}

namespace {
/// \brief Stable hasher for PGO region counters.
///
/// PGOHash produces a stable hash of a given function's control flow.
///
/// Changing the output of this hash will invalidate all previously generated
/// profiles -- i.e., don't do it.
///
/// \note  When this hash does eventually change (years?), we still need to
/// support old hashes.  We'll need to pull in the version number from the
/// profile data format and use the matching hash function.
class PGOHash {
  uint64_t Working;
  unsigned Count;
  llvm::MD5 MD5;

  static const int NumBitsPerType = 6;
  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
  static const unsigned TooBig = 1u << NumBitsPerType;

public:
  /// \brief Hash values for AST nodes.
  ///
  /// Distinct values for AST nodes that have region counters attached.
  ///
  /// These values must be stable.  All new members must be added at the end,
  /// and no members should be removed.  Changing the enumeration value for an
  /// AST node will affect the hash of every function that contains that node.
  enum HashType : unsigned char {
    None = 0,
    LabelStmt = 1,
    WhileStmt,
    DoStmt,
    ForStmt,
    CXXForRangeStmt,
    ObjCForCollectionStmt,
    SwitchStmt,
    CaseStmt,
    DefaultStmt,
    IfStmt,
    CXXTryStmt,
    CXXCatchStmt,
    ConditionalOperator,
    BinaryOperatorLAnd,
    BinaryOperatorLOr,
    BinaryConditionalOperator,

    // Keep this last.  It's for the static assert that follows.
    LastHashType
  };
  static_assert(LastHashType <= TooBig, "Too many types in HashType");

  // TODO: When this format changes, take in a version number here, and use the
  // old hash calculation for file formats that used the old hash.
  PGOHash() : Working(0), Count(0) {}
  void combine(HashType Type);
  uint64_t finalize();
};
const int PGOHash::NumBitsPerType;
const unsigned PGOHash::NumTypesPerWord;
const unsigned PGOHash::TooBig;

  /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
  struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
    /// The next counter value to assign.
    unsigned NextCounter;
    /// The function hash.
    PGOHash Hash;
    /// The map of statements to counters.
    llvm::DenseMap<const Stmt *, unsigned> &CounterMap;

    MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
        : NextCounter(0), CounterMap(CounterMap) {}

    // Blocks and lambdas are handled as separate functions, so we need not
    // traverse them in the parent context.
    bool TraverseBlockExpr(BlockExpr *BE) { return true; }
    bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
    bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }

    bool VisitDecl(const Decl *D) {
      switch (D->getKind()) {
      default:
        break;
      case Decl::Function:
      case Decl::CXXMethod:
      case Decl::CXXConstructor:
      case Decl::CXXDestructor:
      case Decl::CXXConversion:
      case Decl::ObjCMethod:
      case Decl::Block:
      case Decl::Captured:
        CounterMap[D->getBody()] = NextCounter++;
        break;
      }
      return true;
    }

    bool VisitStmt(const Stmt *S) {
      auto Type = getHashType(S);
      if (Type == PGOHash::None)
        return true;

      CounterMap[S] = NextCounter++;
      Hash.combine(Type);
      return true;
    }
    PGOHash::HashType getHashType(const Stmt *S) {
      switch (S->getStmtClass()) {
      default:
        break;
      case Stmt::LabelStmtClass:
        return PGOHash::LabelStmt;
      case Stmt::WhileStmtClass:
        return PGOHash::WhileStmt;
      case Stmt::DoStmtClass:
        return PGOHash::DoStmt;
      case Stmt::ForStmtClass:
        return PGOHash::ForStmt;
      case Stmt::CXXForRangeStmtClass:
        return PGOHash::CXXForRangeStmt;
      case Stmt::ObjCForCollectionStmtClass:
        return PGOHash::ObjCForCollectionStmt;
      case Stmt::SwitchStmtClass:
        return PGOHash::SwitchStmt;
      case Stmt::CaseStmtClass:
        return PGOHash::CaseStmt;
      case Stmt::DefaultStmtClass:
        return PGOHash::DefaultStmt;
      case Stmt::IfStmtClass:
        return PGOHash::IfStmt;
      case Stmt::CXXTryStmtClass:
        return PGOHash::CXXTryStmt;
      case Stmt::CXXCatchStmtClass:
        return PGOHash::CXXCatchStmt;
      case Stmt::ConditionalOperatorClass:
        return PGOHash::ConditionalOperator;
      case Stmt::BinaryConditionalOperatorClass:
        return PGOHash::BinaryConditionalOperator;
      case Stmt::BinaryOperatorClass: {
        const BinaryOperator *BO = cast<BinaryOperator>(S);
        if (BO->getOpcode() == BO_LAnd)
          return PGOHash::BinaryOperatorLAnd;
        if (BO->getOpcode() == BO_LOr)
          return PGOHash::BinaryOperatorLOr;
        break;
      }
      }
      return PGOHash::None;
    }
  };

  /// A StmtVisitor that propagates the raw counts through the AST and
  /// records the count at statements where the value may change.
  struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
    /// PGO state.
    CodeGenPGO &PGO;

    /// A flag that is set when the current count should be recorded on the
    /// next statement, such as at the exit of a loop.
    bool RecordNextStmtCount;

    /// The map of statements to count values.
    llvm::DenseMap<const Stmt *, uint64_t> &CountMap;

    /// BreakContinueStack - Keep counts of breaks and continues inside loops.
    struct BreakContinue {
      uint64_t BreakCount;
      uint64_t ContinueCount;
      BreakContinue() : BreakCount(0), ContinueCount(0) {}
    };
    SmallVector<BreakContinue, 8> BreakContinueStack;

    ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
                        CodeGenPGO &PGO)
        : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}

    void RecordStmtCount(const Stmt *S) {
      if (RecordNextStmtCount) {
        CountMap[S] = PGO.getCurrentRegionCount();
        RecordNextStmtCount = false;
      }
    }

    void VisitStmt(const Stmt *S) {
      RecordStmtCount(S);
      for (Stmt::const_child_range I = S->children(); I; ++I) {
        if (*I)
         this->Visit(*I);
      }
    }

    void VisitFunctionDecl(const FunctionDecl *D) {
      // Counter tracks entry to the function body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    // Skip lambda expressions. We visit these as FunctionDecls when we're
    // generating them and aren't interested in the body when generating a
    // parent context.
    void VisitLambdaExpr(const LambdaExpr *LE) {}

    void VisitCapturedDecl(const CapturedDecl *D) {
      // Counter tracks entry to the capture body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
      // Counter tracks entry to the method body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    void VisitBlockDecl(const BlockDecl *D) {
      // Counter tracks entry to the block body.
      RegionCounter Cnt(PGO, D->getBody());
      Cnt.beginRegion();
      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
      Visit(D->getBody());
    }

    void VisitReturnStmt(const ReturnStmt *S) {
      RecordStmtCount(S);
      if (S->getRetValue())
        Visit(S->getRetValue());
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitGotoStmt(const GotoStmt *S) {
      RecordStmtCount(S);
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitLabelStmt(const LabelStmt *S) {
      RecordNextStmtCount = false;
      // Counter tracks the block following the label.
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      CountMap[S] = PGO.getCurrentRegionCount();
      Visit(S->getSubStmt());
    }

    void VisitBreakStmt(const BreakStmt *S) {
      RecordStmtCount(S);
      assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
      BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitContinueStmt(const ContinueStmt *S) {
      RecordStmtCount(S);
      assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
      BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitWhileStmt(const WhileStmt *S) {
      RecordStmtCount(S);
      // Counter tracks the body of the loop.
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first so the break/continue adjustments can be
      // included when visiting the condition.
      Cnt.beginRegion();
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // ...then go back and propagate counts through the condition. The count
      // at the start of the condition is the sum of the incoming edges,
      // the backedge from the end of the loop body, and the edges from
      // continue statements.
      BreakContinue BC = BreakContinueStack.pop_back_val();
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() + BC.ContinueCount);
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitDoStmt(const DoStmt *S) {
      RecordStmtCount(S);
      // Counter tracks the body of the loop.
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      BreakContinue BC = BreakContinueStack.pop_back_val();
      // The count at the start of the condition is equal to the count at the
      // end of the body. The adjusted count does not include either the
      // fall-through count coming into the loop or the continue count, so add
      // both of those separately. This is coincidentally the same equation as
      // with while loops but for different reasons.
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() + BC.ContinueCount);
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitForStmt(const ForStmt *S) {
      RecordStmtCount(S);
      if (S->getInit())
        Visit(S->getInit());
      // Counter tracks the body of the loop.
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first. (This is basically the same as a while
      // loop; see further comments in VisitWhileStmt.)
      Cnt.beginRegion();
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // The increment is essentially part of the body but it needs to include
      // the count for all the continue statements.
      if (S->getInc()) {
        Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
                                  BreakContinueStack.back().ContinueCount);
        CountMap[S->getInc()] = PGO.getCurrentRegionCount();
        Visit(S->getInc());
        Cnt.adjustForControlFlow();
      }

      BreakContinue BC = BreakContinueStack.pop_back_val();

      // ...then go back and propagate counts through the condition.
      if (S->getCond()) {
        Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                  Cnt.getAdjustedCount() +
                                  BC.ContinueCount);
        CountMap[S->getCond()] = PGO.getCurrentRegionCount();
        Visit(S->getCond());
        Cnt.adjustForControlFlow();
      }
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
      RecordStmtCount(S);
      Visit(S->getRangeStmt());
      Visit(S->getBeginEndStmt());
      // Counter tracks the body of the loop.
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first. (This is basically the same as a while
      // loop; see further comments in VisitWhileStmt.)
      Cnt.beginRegion();
      CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
      Visit(S->getLoopVarStmt());
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // The increment is essentially part of the body but it needs to include
      // the count for all the continue statements.
      Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
                                BreakContinueStack.back().ContinueCount);
      CountMap[S->getInc()] = PGO.getCurrentRegionCount();
      Visit(S->getInc());
      Cnt.adjustForControlFlow();

      BreakContinue BC = BreakContinueStack.pop_back_val();

      // ...then go back and propagate counts through the condition.
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() +
                                BC.ContinueCount);
      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
      RecordStmtCount(S);
      Visit(S->getElement());
      // Counter tracks the body of the loop.
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      Cnt.beginRegion();
      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      BreakContinue BC = BreakContinueStack.pop_back_val();
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitSwitchStmt(const SwitchStmt *S) {
      RecordStmtCount(S);
      Visit(S->getCond());
      PGO.setCurrentRegionUnreachable();
      BreakContinueStack.push_back(BreakContinue());
      Visit(S->getBody());
      // If the switch is inside a loop, add the continue counts.
      BreakContinue BC = BreakContinueStack.pop_back_val();
      if (!BreakContinueStack.empty())
        BreakContinueStack.back().ContinueCount += BC.ContinueCount;
      // Counter tracks the exit block of the switch.
      RegionCounter ExitCnt(PGO, S);
      ExitCnt.beginRegion();
      RecordNextStmtCount = true;
    }

    void VisitCaseStmt(const CaseStmt *S) {
      RecordNextStmtCount = false;
      // Counter for this particular case. This counts only jumps from the
      // switch header and does not include fallthrough from the case before
      // this one.
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      CountMap[S] = Cnt.getCount();
      RecordNextStmtCount = true;
      Visit(S->getSubStmt());
    }

    void VisitDefaultStmt(const DefaultStmt *S) {
      RecordNextStmtCount = false;
      // Counter for this default case. This does not include fallthrough from
      // the previous case.
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      CountMap[S] = Cnt.getCount();
      RecordNextStmtCount = true;
      Visit(S->getSubStmt());
    }

    void VisitIfStmt(const IfStmt *S) {
      RecordStmtCount(S);
      // Counter tracks the "then" part of an if statement. The count for
      // the "else" part, if it exists, will be calculated from this counter.
      RegionCounter Cnt(PGO, S);
      Visit(S->getCond());

      Cnt.beginRegion();
      CountMap[S->getThen()] = PGO.getCurrentRegionCount();
      Visit(S->getThen());
      Cnt.adjustForControlFlow();

      if (S->getElse()) {
        Cnt.beginElseRegion();
        CountMap[S->getElse()] = PGO.getCurrentRegionCount();
        Visit(S->getElse());
        Cnt.adjustForControlFlow();
      }
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitCXXTryStmt(const CXXTryStmt *S) {
      RecordStmtCount(S);
      Visit(S->getTryBlock());
      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
        Visit(S->getHandler(I));
      // Counter tracks the continuation block of the try statement.
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      RecordNextStmtCount = true;
    }

    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
      RecordNextStmtCount = false;
      // Counter tracks the catch statement's handler block.
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      CountMap[S] = PGO.getCurrentRegionCount();
      Visit(S->getHandlerBlock());
    }

    void VisitAbstractConditionalOperator(
        const AbstractConditionalOperator *E) {
      RecordStmtCount(E);
      // Counter tracks the "true" part of a conditional operator. The
      // count in the "false" part will be calculated from this counter.
      RegionCounter Cnt(PGO, E);
      Visit(E->getCond());

      Cnt.beginRegion();
      CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
      Visit(E->getTrueExpr());
      Cnt.adjustForControlFlow();

      Cnt.beginElseRegion();
      CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
      Visit(E->getFalseExpr());
      Cnt.adjustForControlFlow();

      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitBinLAnd(const BinaryOperator *E) {
      RecordStmtCount(E);
      // Counter tracks the right hand side of a logical and operator.
      RegionCounter Cnt(PGO, E);
      Visit(E->getLHS());
      Cnt.beginRegion();
      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
      Visit(E->getRHS());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitBinLOr(const BinaryOperator *E) {
      RecordStmtCount(E);
      // Counter tracks the right hand side of a logical or operator.
      RegionCounter Cnt(PGO, E);
      Visit(E->getLHS());
      Cnt.beginRegion();
      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
      Visit(E->getRHS());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }
  };
}

void PGOHash::combine(HashType Type) {
  // Check that we never combine 0 and only have six bits.
  assert(Type && "Hash is invalid: unexpected type 0");
  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");

  // Pass through MD5 if enough work has built up.
  if (Count && Count % NumTypesPerWord == 0) {
    using namespace llvm::support;
    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
    Working = 0;
  }

  // Accumulate the current type.
  ++Count;
  Working = Working << NumBitsPerType | Type;
}

uint64_t PGOHash::finalize() {
  // Use Working as the hash directly if we never used MD5.
  if (Count <= NumTypesPerWord)
    // No need to byte swap here, since none of the math was endian-dependent.
    // This number will be byte-swapped as required on endianness transitions,
    // so we will see the same value on the other side.
    return Working;

  // Check for remaining work in Working.
  if (Working)
    MD5.update(Working);

  // Finalize the MD5 and return the hash.
  llvm::MD5::MD5Result Result;
  MD5.final(Result);
  using namespace llvm::support;
  return endian::read<uint64_t, little, unaligned>(Result);
}

static void emitRuntimeHook(CodeGenModule &CGM) {
  const char *const RuntimeVarName = "__llvm_profile_runtime";
  const char *const RuntimeUserName = "__llvm_profile_runtime_user";
  if (CGM.getModule().getGlobalVariable(RuntimeVarName))
    return;

  // Declare the runtime hook.
  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  auto *Var = new llvm::GlobalVariable(CGM.getModule(), Int32Ty, false,
                                       llvm::GlobalValue::ExternalLinkage,
                                       nullptr, RuntimeVarName);

  // Make a function that uses it.
  auto *User = llvm::Function::Create(llvm::FunctionType::get(Int32Ty, false),
                                      llvm::GlobalValue::LinkOnceODRLinkage,
                                      RuntimeUserName, &CGM.getModule());
  User->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    User->addFnAttr(llvm::Attribute::NoRedZone);
  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", User));
  auto *Load = Builder.CreateLoad(Var);
  Builder.CreateRet(Load);

  // Create a use of the function.  Now the definition of the runtime variable
  // should get pulled in, along with any static initializears.
  CGM.addUsedGlobal(User);
}

void CodeGenPGO::checkGlobalDecl(GlobalDecl GD) {
  // Make sure we only emit coverage mapping for one constructor/destructor.
  // Clang emits several functions for the constructor and the destructor of
  // a class. Every function is instrumented, but we only want to provide
  // coverage for one of them. Because of that we only emit the coverage mapping
  // for the base constructor/destructor.
  if ((isa<CXXConstructorDecl>(GD.getDecl()) &&
       GD.getCtorType() != Ctor_Base) ||
      (isa<CXXDestructorDecl>(GD.getDecl()) &&
       GD.getDtorType() != Dtor_Base)) {
    SkipCoverageMapping = true;
  }
}

void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) {
  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
  if (!InstrumentRegions && !PGOReader)
    return;
  if (D->isImplicit())
    return;
  CGM.ClearUnusedCoverageMapping(D);
  setFuncName(Fn);
  setVarLinkage(Fn->getLinkage());

  mapRegionCounters(D);
  if (InstrumentRegions) {
    emitRuntimeHook(CGM);
    emitCounterVariables();
    if (CGM.getCodeGenOpts().CoverageMapping)
      emitCounterRegionMapping(D);
  }
  if (PGOReader) {
    SourceManager &SM = CGM.getContext().getSourceManager();
    loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
    computeRegionCounts(D);
    applyFunctionAttributes(PGOReader, Fn);
  }
}

void CodeGenPGO::mapRegionCounters(const Decl *D) {
  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
  MapRegionCounters Walker(*RegionCounterMap);
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
  NumRegionCounters = Walker.NextCounter;
  FunctionHash = Walker.Hash.finalize();
}

void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
  if (SkipCoverageMapping)
    return;
  // Don't map the functions inside the system headers
  auto Loc = D->getBody()->getLocStart();
  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
    return;

  llvm::raw_string_ostream OS(CoverageMapping);
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
                                CGM.getContext().getSourceManager(),
                                CGM.getLangOpts(), RegionCounterMap.get(),
                                NumRegionCounters);
  MappingGen.emitCounterMapping(D, OS);
  OS.flush();
}

void
CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
                                    llvm::GlobalValue::LinkageTypes Linkage) {
  if (SkipCoverageMapping)
    return;
  setFuncName(FuncName, Linkage);
  setVarLinkage(Linkage);

  // Don't map the functions inside the system headers
  auto Loc = D->getBody()->getLocStart();
  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
    return;

  llvm::raw_string_ostream OS(CoverageMapping);
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
                                CGM.getContext().getSourceManager(),
                                CGM.getLangOpts());
  MappingGen.emitEmptyMapping(D, OS);
  OS.flush();
  buildDataVar();
}

void CodeGenPGO::computeRegionCounts(const Decl *D) {
  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
  ComputeRegionCounts Walker(*StmtCountMap, *this);
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    Walker.VisitFunctionDecl(FD);
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    Walker.VisitObjCMethodDecl(MD);
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    Walker.VisitBlockDecl(BD);
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
}

void
CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
                                    llvm::Function *Fn) {
  if (!haveRegionCounts())
    return;

  uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount();
  uint64_t FunctionCount = getRegionCount(0);
  if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
    // Turn on InlineHint attribute for hot functions.
    // FIXME: 30% is from preliminary tuning on SPEC, it may not be optimal.
    Fn->addFnAttr(llvm::Attribute::InlineHint);
  else if (FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount))
    // Turn on Cold attribute for cold functions.
    // FIXME: 1% is from preliminary tuning on SPEC, it may not be optimal.
    Fn->addFnAttr(llvm::Attribute::Cold);
}

void CodeGenPGO::emitCounterVariables() {
  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
                                                    NumRegionCounters);
  RegionCounters =
    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false, VarLinkage,
                             llvm::Constant::getNullValue(CounterTy),
                             getFuncVarName("counters"));
  RegionCounters->setAlignment(8);
  RegionCounters->setSection(getCountersSection(CGM));
}

void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
  if (!RegionCounters)
    return;
  llvm::Value *Addr =
    Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
  Builder.CreateStore(Count, Addr);
}

void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                                  bool IsInMainFile) {
  CGM.getPGOStats().addVisited(IsInMainFile);
  RegionCounts.reset(new std::vector<uint64_t>);
  if (std::error_code EC = PGOReader->getFunctionCounts(
          getFuncName(), FunctionHash, *RegionCounts)) {
    if (EC == llvm::instrprof_error::unknown_function)
      CGM.getPGOStats().addMissing(IsInMainFile);
    else if (EC == llvm::instrprof_error::hash_mismatch)
      CGM.getPGOStats().addMismatched(IsInMainFile);
    else if (EC == llvm::instrprof_error::malformed)
      // TODO: Consider a more specific warning for this case.
      CGM.getPGOStats().addMismatched(IsInMainFile);
    RegionCounts.reset();
  }
}

void CodeGenPGO::destroyRegionCounters() {
  RegionCounterMap.reset();
  StmtCountMap.reset();
  RegionCounts.reset();
  RegionCounters = nullptr;
}

/// \brief Calculate what to divide by to scale weights.
///
/// Given the maximum weight, calculate a divisor that will scale all the
/// weights to strictly less than UINT32_MAX.
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
}

/// \brief Scale an individual branch weight (and add 1).
///
/// Scale a 64-bit weight down to 32-bits using \c Scale.
///
/// According to Laplace's Rule of Succession, it is better to compute the
/// weight based on the count plus 1, so universally add 1 to the value.
///
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
/// greater than \c Weight.
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
  assert(Scale && "scale by 0?");
  uint64_t Scaled = Weight / Scale + 1;
  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
  return Scaled;
}

llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
                                              uint64_t FalseCount) {
  // Check for empty weights.
  if (!TrueCount && !FalseCount)
    return nullptr;

  // Calculate how to scale down to 32-bits.
  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));

  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
                                      scaleBranchWeight(FalseCount, Scale));
}

llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
  // We need at least two elements to create meaningful weights.
  if (Weights.size() < 2)
    return nullptr;

  // Check for empty weights.
  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
  if (MaxWeight == 0)
    return nullptr;

  // Calculate how to scale down to 32-bits.
  uint64_t Scale = calculateWeightScale(MaxWeight);

  SmallVector<uint32_t, 16> ScaledWeights;
  ScaledWeights.reserve(Weights.size());
  for (uint64_t W : Weights)
    ScaledWeights.push_back(scaleBranchWeight(W, Scale));

  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
  return MDHelper.createBranchWeights(ScaledWeights);
}

llvm::MDNode *CodeGenPGO::createLoopWeights(const Stmt *Cond,
                                            RegionCounter &Cnt) {
  if (!haveRegionCounts())
    return nullptr;
  uint64_t LoopCount = Cnt.getCount();
  uint64_t CondCount = 0;
  bool Found = getStmtCount(Cond, CondCount);
  assert(Found && "missing expected loop condition count");
  (void)Found;
  if (CondCount == 0)
    return nullptr;
  return createBranchWeights(LoopCount,
                             std::max(CondCount, LoopCount) - LoopCount);
}
@


1.1.1.3.4.1
log
@file CodeGenPGO.cpp was added on branch yamt-pagecache on 2014-05-22 16:18:27 +0000
@
text
@d1 845
@


1.1.1.3.4.2
log
@sync with head.

for a reference, the tree before this commit was tagged
as yamt-pagecache-tag8.

this commit was splitted into small chunks to avoid
a limitation of cvs.  ("Protocol error: too many arguments")
@
text
@a0 845
//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Instrumentation-based profile-guided optimization
//
//===----------------------------------------------------------------------===//

#include "CodeGenPGO.h"
#include "CodeGenFunction.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "llvm/Config/config.h" // for strtoull()/strtoll() define
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/FileSystem.h"

using namespace clang;
using namespace CodeGen;

static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
  DiagnosticsEngine &Diags = CGM.getDiags();
  unsigned diagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0");
  Diags.Report(diagID) << Message;
}

PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
  : CGM(CGM) {
  if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
    ReportBadPGOData(CGM, "failed to open pgo data file");
    return;
  }

  if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
    ReportBadPGOData(CGM, "pgo data file too big");
    return;
  }

  // Scan through the data file and map each function to the corresponding
  // file offset where its counts are stored.
  const char *BufferStart = DataBuffer->getBufferStart();
  const char *BufferEnd = DataBuffer->getBufferEnd();
  const char *CurPtr = BufferStart;
  uint64_t MaxCount = 0;
  while (CurPtr < BufferEnd) {
    // Read the mangled function name.
    const char *FuncName = CurPtr;
    // FIXME: Something will need to be added to distinguish static functions.
    CurPtr = strchr(CurPtr, ' ');
    if (!CurPtr) {
      ReportBadPGOData(CGM, "pgo data file has malformed function entry");
      return;
    }
    StringRef MangledName(FuncName, CurPtr - FuncName);

    // Read the number of counters.
    char *EndPtr;
    unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
    if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
      ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
      return;
    }
    CurPtr = EndPtr;

    // Read function count.
    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
    if (EndPtr == CurPtr || *EndPtr != '\n') {
      ReportBadPGOData(CGM, "pgo-data file has bad count value");
      return;
    }
    CurPtr = EndPtr; // Point to '\n'.
    FunctionCounts[MangledName] = Count;
    MaxCount = Count > MaxCount ? Count : MaxCount;

    // There is one line for each counter; skip over those lines.
    // Since function count is already read, we start the loop from 1.
    for (unsigned N = 1; N < NumCounters; ++N) {
      CurPtr = strchr(++CurPtr, '\n');
      if (!CurPtr) {
        ReportBadPGOData(CGM, "pgo data file is missing some counter info");
        return;
      }
    }

    // Skip over the blank line separating functions.
    CurPtr += 2;

    DataOffsets[MangledName] = FuncName - BufferStart;
  }
  MaxFunctionCount = MaxCount;
}

/// Return true if a function is hot. If we know nothing about the function,
/// return false.
bool PGOProfileData::isHotFunction(StringRef MangledName) {
  llvm::StringMap<uint64_t>::const_iterator CountIter =
    FunctionCounts.find(MangledName);
  // If we know nothing about the function, return false.
  if (CountIter == FunctionCounts.end())
    return false;
  // FIXME: functions with >= 30% of the maximal function count are
  // treated as hot. This number is from preliminary tuning on SPEC.
  return CountIter->getValue() >= (uint64_t)(0.3 * (double)MaxFunctionCount);
}

/// Return true if a function is cold. If we know nothing about the function,
/// return false.
bool PGOProfileData::isColdFunction(StringRef MangledName) {
  llvm::StringMap<uint64_t>::const_iterator CountIter =
    FunctionCounts.find(MangledName);
  // If we know nothing about the function, return false.
  if (CountIter == FunctionCounts.end())
    return false;
  // FIXME: functions with <= 1% of the maximal function count are treated as
  // cold. This number is from preliminary tuning on SPEC.
  return CountIter->getValue() <= (uint64_t)(0.01 * (double)MaxFunctionCount);
}

bool PGOProfileData::getFunctionCounts(StringRef MangledName,
                                       std::vector<uint64_t> &Counts) {
  // Find the relevant section of the pgo-data file.
  llvm::StringMap<unsigned>::const_iterator OffsetIter =
    DataOffsets.find(MangledName);
  if (OffsetIter == DataOffsets.end())
    return true;
  const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();

  // Skip over the function name.
  CurPtr = strchr(CurPtr, ' ');
  assert(CurPtr && "pgo-data has corrupted function entry");

  // Read the number of counters.
  char *EndPtr;
  unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
  assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
         "pgo-data file has corrupted number of counters");
  CurPtr = EndPtr;

  Counts.reserve(NumCounters);

  for (unsigned N = 0; N < NumCounters; ++N) {
    // Read the count value.
    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
    if (EndPtr == CurPtr || *EndPtr != '\n') {
      ReportBadPGOData(CGM, "pgo-data file has bad count value");
      return true;
    }
    Counts.push_back(Count);
    CurPtr = EndPtr + 1;
  }

  // Make sure the number of counters matches up.
  if (Counts.size() != NumCounters) {
    ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
    return true;
  }

  return false;
}

void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) {
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
    return;

  llvm::LLVMContext &Ctx = CGM.getLLVMContext();

  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
  llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);

  llvm::Function *WriteoutF =
    CGM.getModule().getFunction("__llvm_pgo_writeout");
  if (!WriteoutF) {
    llvm::FunctionType *WriteoutFTy =
      llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
    WriteoutF = llvm::Function::Create(WriteoutFTy,
                                       llvm::GlobalValue::InternalLinkage,
                                       "__llvm_pgo_writeout", &CGM.getModule());
  }
  WriteoutF->setUnnamedAddr(true);
  WriteoutF->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    WriteoutF->addFnAttr(llvm::Attribute::NoRedZone);

  llvm::BasicBlock *BB = WriteoutF->empty() ?
    llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock();

  CGBuilderTy PGOBuilder(BB);

  llvm::Instruction *I = BB->getTerminator();
  if (!I)
    I = PGOBuilder.CreateRetVoid();
  PGOBuilder.SetInsertPoint(I);

  llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
  llvm::Type *Args[] = {
    Int8PtrTy,                       // const char *MangledName
    Int32Ty,                         // uint32_t NumCounters
    Int64PtrTy                       // uint64_t *Counters
  };
  llvm::FunctionType *FTy =
    llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false);
  llvm::Constant *EmitFunc =
    CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy);

  llvm::Constant *MangledName =
    CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name");
  MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy);
  PGOBuilder.CreateCall3(EmitFunc, MangledName,
                         PGOBuilder.getInt32(NumRegionCounters),
                         PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy));
}

llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
  llvm::Function *WriteoutF =
    CGM.getModule().getFunction("__llvm_pgo_writeout");
  if (!WriteoutF)
    return NULL;

  // Create a small bit of code that registers the "__llvm_pgo_writeout" to
  // be executed at exit.
  llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init");
  if (F)
    return NULL;

  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
                                                    false);
  F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
                             "__llvm_pgo_init", &CGM.getModule());
  F->setUnnamedAddr(true);
  F->setLinkage(llvm::GlobalValue::InternalLinkage);
  F->addFnAttr(llvm::Attribute::NoInline);
  if (CGM.getCodeGenOpts().DisableRedZone)
    F->addFnAttr(llvm::Attribute::NoRedZone);

  llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F);
  CGBuilderTy PGOBuilder(BB);

  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false);
  llvm::Type *Params[] = {
    llvm::PointerType::get(FTy, 0)
  };
  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false);

  // Inialize the environment and register the local writeout function.
  llvm::Constant *PGOInit =
    CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy);
  PGOBuilder.CreateCall(PGOInit, WriteoutF);
  PGOBuilder.CreateRetVoid();

  return F;
}

namespace {
  /// A StmtVisitor that fills a map of statements to PGO counters.
  struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
    /// The next counter value to assign.
    unsigned NextCounter;
    /// The map of statements to counters.
    llvm::DenseMap<const Stmt*, unsigned> *CounterMap;

    MapRegionCounters(llvm::DenseMap<const Stmt*, unsigned> *CounterMap) :
      NextCounter(0), CounterMap(CounterMap) {
    }

    void VisitChildren(const Stmt *S) {
      for (Stmt::const_child_range I = S->children(); I; ++I)
        if (*I)
         this->Visit(*I);
    }
    void VisitStmt(const Stmt *S) { VisitChildren(S); }

    /// Assign a counter to track entry to the function body.
    void VisitFunctionDecl(const FunctionDecl *S) {
      (*CounterMap)[S->getBody()] = NextCounter++;
      Visit(S->getBody());
    }
    /// Assign a counter to track the block following a label.
    void VisitLabelStmt(const LabelStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getSubStmt());
    }
    /// Assign a counter for the body of a while loop.
    void VisitWhileStmt(const WhileStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getCond());
      Visit(S->getBody());
    }
    /// Assign a counter for the body of a do-while loop.
    void VisitDoStmt(const DoStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getBody());
      Visit(S->getCond());
    }
    /// Assign a counter for the body of a for loop.
    void VisitForStmt(const ForStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      if (S->getInit())
        Visit(S->getInit());
      const Expr *E;
      if ((E = S->getCond()))
        Visit(E);
      if ((E = S->getInc()))
        Visit(E);
      Visit(S->getBody());
    }
    /// Assign a counter for the body of a for-range loop.
    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getRangeStmt());
      Visit(S->getBeginEndStmt());
      Visit(S->getCond());
      Visit(S->getLoopVarStmt());
      Visit(S->getBody());
      Visit(S->getInc());
    }
    /// Assign a counter for the body of a for-collection loop.
    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getElement());
      Visit(S->getBody());
    }
    /// Assign a counter for the exit block of the switch statement.
    void VisitSwitchStmt(const SwitchStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getCond());
      Visit(S->getBody());
    }
    /// Assign a counter for a particular case in a switch. This counts jumps
    /// from the switch header as well as fallthrough from the case before this
    /// one.
    void VisitCaseStmt(const CaseStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getSubStmt());
    }
    /// Assign a counter for the default case of a switch statement. The count
    /// is the number of branches from the loop header to the default, and does
    /// not include fallthrough from previous cases. If we have multiple
    /// conditional branch blocks from the switch instruction to the default
    /// block, as with large GNU case ranges, this is the counter for the last
    /// edge in that series, rather than the first.
    void VisitDefaultStmt(const DefaultStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getSubStmt());
    }
    /// Assign a counter for the "then" part of an if statement. The count for
    /// the "else" part, if it exists, will be calculated from this counter.
    void VisitIfStmt(const IfStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getCond());
      Visit(S->getThen());
      if (S->getElse())
        Visit(S->getElse());
    }
    /// Assign a counter for the continuation block of a C++ try statement.
    void VisitCXXTryStmt(const CXXTryStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getTryBlock());
      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
        Visit(S->getHandler(I));
    }
    /// Assign a counter for a catch statement's handler block.
    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
      (*CounterMap)[S] = NextCounter++;
      Visit(S->getHandlerBlock());
    }
    /// Assign a counter for the "true" part of a conditional operator. The
    /// count in the "false" part will be calculated from this counter.
    void VisitConditionalOperator(const ConditionalOperator *E) {
      (*CounterMap)[E] = NextCounter++;
      Visit(E->getCond());
      Visit(E->getTrueExpr());
      Visit(E->getFalseExpr());
    }
    /// Assign a counter for the right hand side of a logical and operator.
    void VisitBinLAnd(const BinaryOperator *E) {
      (*CounterMap)[E] = NextCounter++;
      Visit(E->getLHS());
      Visit(E->getRHS());
    }
    /// Assign a counter for the right hand side of a logical or operator.
    void VisitBinLOr(const BinaryOperator *E) {
      (*CounterMap)[E] = NextCounter++;
      Visit(E->getLHS());
      Visit(E->getRHS());
    }
  };

  /// A StmtVisitor that propagates the raw counts through the AST and
  /// records the count at statements where the value may change.
  struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
    /// PGO state.
    CodeGenPGO &PGO;

    /// A flag that is set when the current count should be recorded on the
    /// next statement, such as at the exit of a loop.
    bool RecordNextStmtCount;

    /// The map of statements to count values.
    llvm::DenseMap<const Stmt*, uint64_t> *CountMap;

    /// BreakContinueStack - Keep counts of breaks and continues inside loops. 
    struct BreakContinue {
      uint64_t BreakCount;
      uint64_t ContinueCount;
      BreakContinue() : BreakCount(0), ContinueCount(0) {}
    };
    SmallVector<BreakContinue, 8> BreakContinueStack;

    ComputeRegionCounts(llvm::DenseMap<const Stmt*, uint64_t> *CountMap,
                        CodeGenPGO &PGO) :
      PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {
    }

    void RecordStmtCount(const Stmt *S) {
      if (RecordNextStmtCount) {
        (*CountMap)[S] = PGO.getCurrentRegionCount();
        RecordNextStmtCount = false;
      }
    }

    void VisitStmt(const Stmt *S) {
      RecordStmtCount(S);
      for (Stmt::const_child_range I = S->children(); I; ++I) {
        if (*I)
         this->Visit(*I);
      }
    }

    void VisitFunctionDecl(const FunctionDecl *S) {
      RegionCounter Cnt(PGO, S->getBody());
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
    }

    void VisitReturnStmt(const ReturnStmt *S) {
      RecordStmtCount(S);
      if (S->getRetValue())
        Visit(S->getRetValue());
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitGotoStmt(const GotoStmt *S) {
      RecordStmtCount(S);
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitLabelStmt(const LabelStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      (*CountMap)[S] = PGO.getCurrentRegionCount();
      Visit(S->getSubStmt());
    }

    void VisitBreakStmt(const BreakStmt *S) {
      RecordStmtCount(S);
      assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
      BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitContinueStmt(const ContinueStmt *S) {
      RecordStmtCount(S);
      assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
      BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
      PGO.setCurrentRegionUnreachable();
      RecordNextStmtCount = true;
    }

    void VisitWhileStmt(const WhileStmt *S) {
      RecordStmtCount(S);
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first so the break/continue adjustments can be
      // included when visiting the condition.
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // ...then go back and propagate counts through the condition. The count
      // at the start of the condition is the sum of the incoming edges,
      // the backedge from the end of the loop body, and the edges from
      // continue statements.
      BreakContinue BC = BreakContinueStack.pop_back_val();
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() + BC.ContinueCount);
      (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitDoStmt(const DoStmt *S) {
      RecordStmtCount(S);
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      BreakContinue BC = BreakContinueStack.pop_back_val();
      // The count at the start of the condition is equal to the count at the
      // end of the body. The adjusted count does not include either the
      // fall-through count coming into the loop or the continue count, so add
      // both of those separately. This is coincidentally the same equation as
      // with while loops but for different reasons.
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() + BC.ContinueCount);
      (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitForStmt(const ForStmt *S) {
      RecordStmtCount(S);
      if (S->getInit())
        Visit(S->getInit());
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first. (This is basically the same as a while
      // loop; see further comments in VisitWhileStmt.)
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // The increment is essentially part of the body but it needs to include
      // the count for all the continue statements.
      if (S->getInc()) {
        Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
                                  BreakContinueStack.back().ContinueCount);
        (*CountMap)[S->getInc()] = PGO.getCurrentRegionCount();
        Visit(S->getInc());
        Cnt.adjustForControlFlow();
      }

      BreakContinue BC = BreakContinueStack.pop_back_val();

      // ...then go back and propagate counts through the condition.
      if (S->getCond()) {
        Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                  Cnt.getAdjustedCount() +
                                  BC.ContinueCount);
        (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
        Visit(S->getCond());
        Cnt.adjustForControlFlow();
      }
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
      RecordStmtCount(S);
      Visit(S->getRangeStmt());
      Visit(S->getBeginEndStmt());
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      // Visit the body region first. (This is basically the same as a while
      // loop; see further comments in VisitWhileStmt.)
      Cnt.beginRegion();
      (*CountMap)[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
      Visit(S->getLoopVarStmt());
      Visit(S->getBody());
      Cnt.adjustForControlFlow();

      // The increment is essentially part of the body but it needs to include
      // the count for all the continue statements.
      Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
                                BreakContinueStack.back().ContinueCount);
      (*CountMap)[S->getInc()] = PGO.getCurrentRegionCount();
      Visit(S->getInc());
      Cnt.adjustForControlFlow();

      BreakContinue BC = BreakContinueStack.pop_back_val();

      // ...then go back and propagate counts through the condition.
      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
                                Cnt.getAdjustedCount() +
                                BC.ContinueCount);
      (*CountMap)[S->getCond()] = PGO.getCurrentRegionCount();
      Visit(S->getCond());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
      RecordStmtCount(S);
      Visit(S->getElement());
      RegionCounter Cnt(PGO, S);
      BreakContinueStack.push_back(BreakContinue());
      Cnt.beginRegion();
      (*CountMap)[S->getBody()] = PGO.getCurrentRegionCount();
      Visit(S->getBody());
      BreakContinue BC = BreakContinueStack.pop_back_val();
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
      RecordNextStmtCount = true;
    }

    void VisitSwitchStmt(const SwitchStmt *S) {
      RecordStmtCount(S);
      Visit(S->getCond());
      PGO.setCurrentRegionUnreachable();
      BreakContinueStack.push_back(BreakContinue());
      Visit(S->getBody());
      // If the switch is inside a loop, add the continue counts.
      BreakContinue BC = BreakContinueStack.pop_back_val();
      if (!BreakContinueStack.empty())
        BreakContinueStack.back().ContinueCount += BC.ContinueCount;
      RegionCounter ExitCnt(PGO, S);
      ExitCnt.beginRegion();
      RecordNextStmtCount = true;
    }

    void VisitCaseStmt(const CaseStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      (*CountMap)[S] = Cnt.getCount();
      RecordNextStmtCount = true;
      Visit(S->getSubStmt());
    }

    void VisitDefaultStmt(const DefaultStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
      (*CountMap)[S] = Cnt.getCount();
      RecordNextStmtCount = true;
      Visit(S->getSubStmt());
    }

    void VisitIfStmt(const IfStmt *S) {
      RecordStmtCount(S);
      RegionCounter Cnt(PGO, S);
      Visit(S->getCond());

      Cnt.beginRegion();
      (*CountMap)[S->getThen()] = PGO.getCurrentRegionCount();
      Visit(S->getThen());
      Cnt.adjustForControlFlow();

      if (S->getElse()) {
        Cnt.beginElseRegion();
        (*CountMap)[S->getElse()] = PGO.getCurrentRegionCount();
        Visit(S->getElse());
        Cnt.adjustForControlFlow();
      }
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitCXXTryStmt(const CXXTryStmt *S) {
      RecordStmtCount(S);
      Visit(S->getTryBlock());
      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
        Visit(S->getHandler(I));
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      RecordNextStmtCount = true;
    }

    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
      RecordNextStmtCount = false;
      RegionCounter Cnt(PGO, S);
      Cnt.beginRegion();
      (*CountMap)[S] = PGO.getCurrentRegionCount();
      Visit(S->getHandlerBlock());
    }

    void VisitConditionalOperator(const ConditionalOperator *E) {
      RecordStmtCount(E);
      RegionCounter Cnt(PGO, E);
      Visit(E->getCond());

      Cnt.beginRegion();
      (*CountMap)[E->getTrueExpr()] = PGO.getCurrentRegionCount();
      Visit(E->getTrueExpr());
      Cnt.adjustForControlFlow();

      Cnt.beginElseRegion();
      (*CountMap)[E->getFalseExpr()] = PGO.getCurrentRegionCount();
      Visit(E->getFalseExpr());
      Cnt.adjustForControlFlow();

      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitBinLAnd(const BinaryOperator *E) {
      RecordStmtCount(E);
      RegionCounter Cnt(PGO, E);
      Visit(E->getLHS());
      Cnt.beginRegion();
      (*CountMap)[E->getRHS()] = PGO.getCurrentRegionCount();
      Visit(E->getRHS());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }

    void VisitBinLOr(const BinaryOperator *E) {
      RecordStmtCount(E);
      RegionCounter Cnt(PGO, E);
      Visit(E->getLHS());
      Cnt.beginRegion();
      (*CountMap)[E->getRHS()] = PGO.getCurrentRegionCount();
      Visit(E->getRHS());
      Cnt.adjustForControlFlow();
      Cnt.applyAdjustmentsToRegion(0);
      RecordNextStmtCount = true;
    }
  };
}

void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) {
  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
  PGOProfileData *PGOData = CGM.getPGOData();
  if (!InstrumentRegions && !PGOData)
    return;
  const Decl *D = GD.getDecl();
  if (!D)
    return;
  mapRegionCounters(D);
  if (InstrumentRegions)
    emitCounterVariables();
  if (PGOData) {
    loadRegionCounts(GD, PGOData);
    computeRegionCounts(D);
  }
}

void CodeGenPGO::mapRegionCounters(const Decl *D) {
  RegionCounterMap = new llvm::DenseMap<const Stmt*, unsigned>();
  MapRegionCounters Walker(RegionCounterMap);
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    Walker.VisitFunctionDecl(FD);
  NumRegionCounters = Walker.NextCounter;
}

void CodeGenPGO::computeRegionCounts(const Decl *D) {
  StmtCountMap = new llvm::DenseMap<const Stmt*, uint64_t>();
  ComputeRegionCounts Walker(StmtCountMap, *this);
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    Walker.VisitFunctionDecl(FD);
}

void CodeGenPGO::emitCounterVariables() {
  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
  llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
                                                    NumRegionCounters);
  RegionCounters =
    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false,
                             llvm::GlobalVariable::PrivateLinkage,
                             llvm::Constant::getNullValue(CounterTy),
                             "__llvm_pgo_ctr");
}

void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
    return;
  llvm::Value *Addr =
    Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
  Builder.CreateStore(Count, Addr);
}

void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) {
  // For now, ignore the counts from the PGO data file only if the number of
  // counters does not match. This could be tightened down in the future to
  // ignore counts when the input changes in various ways, e.g., by comparing a
  // hash value based on some characteristics of the input.
  RegionCounts = new std::vector<uint64_t>();
  if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) ||
      RegionCounts->size() != NumRegionCounters) {
    delete RegionCounts;
    RegionCounts = 0;
  }
}

void CodeGenPGO::destroyRegionCounters() {
  if (RegionCounterMap != 0)
    delete RegionCounterMap;
  if (StmtCountMap != 0)
    delete StmtCountMap;
  if (RegionCounts != 0)
    delete RegionCounts;
}

llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
                                              uint64_t FalseCount) {
  if (!TrueCount && !FalseCount)
    return 0;

  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
  // TODO: need to scale down to 32-bits
  // According to Laplace's Rule of Succession, it is better to compute the
  // weight based on the count plus 1.
  return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
}

llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
  // TODO: need to scale down to 32-bits, instead of just truncating.
  // According to Laplace's Rule of Succession, it is better to compute the
  // weight based on the count plus 1.
  SmallVector<uint32_t, 16> ScaledWeights;
  ScaledWeights.reserve(Weights.size());
  for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
       WI != WE; ++WI) {
    ScaledWeights.push_back(*WI + 1);
  }
  return MDHelper.createBranchWeights(ScaledWeights);
}

llvm::MDNode *CodeGenPGO::createLoopWeights(const Stmt *Cond,
                                            RegionCounter &Cnt) {
  if (!haveRegionCounts())
    return 0;
  uint64_t LoopCount = Cnt.getCount();
  uint64_t CondCount = 0;
  bool Found = getStmtCount(Cond, CondCount);
  assert(Found && "missing expected loop condition count");
  (void)Found;
  if (CondCount == 0)
    return 0;
  return createBranchWeights(LoopCount,
                             std::max(CondCount, LoopCount) - LoopCount);
}
@


