head	1.1;
branch	1.1.1;
access;
symbols
	netbsd-11-0-RC7:1.1.1.11
	netbsd-11-0-RC6:1.1.1.11
	netbsd-11-0-RC5:1.1.1.11
	netbsd-11-0-RC4:1.1.1.11
	netbsd-11-0-RC3:1.1.1.11
	netbsd-11-0-RC2:1.1.1.11
	netbsd-11-0-RC1:1.1.1.11
	perseant-exfatfs-base-20250801:1.1.1.11
	netbsd-11:1.1.1.11.0.10
	netbsd-11-base:1.1.1.11
	netbsd-10-1-RELEASE:1.1.1.11
	perseant-exfatfs-base-20240630:1.1.1.11
	perseant-exfatfs:1.1.1.11.0.8
	perseant-exfatfs-base:1.1.1.11
	netbsd-8-3-RELEASE:1.1.1.8
	netbsd-9-4-RELEASE:1.1.1.10
	netbsd-10-0-RELEASE:1.1.1.11
	netbsd-10-0-RC6:1.1.1.11
	netbsd-10-0-RC5:1.1.1.11
	netbsd-10-0-RC4:1.1.1.11
	netbsd-10-0-RC3:1.1.1.11
	netbsd-10-0-RC2:1.1.1.11
	netbsd-10-0-RC1:1.1.1.11
	netbsd-10:1.1.1.11.0.6
	netbsd-10-base:1.1.1.11
	netbsd-9-3-RELEASE:1.1.1.10
	cjep_sun2x:1.1.1.11.0.4
	cjep_sun2x-base:1.1.1.11
	cjep_staticlib_x-base1:1.1.1.11
	netbsd-9-2-RELEASE:1.1.1.10
	cjep_staticlib_x:1.1.1.11.0.2
	cjep_staticlib_x-base:1.1.1.11
	netbsd-9-1-RELEASE:1.1.1.10
	phil-wifi-20200421:1.1.1.11
	phil-wifi-20200411:1.1.1.11
	phil-wifi-20200406:1.1.1.11
	netbsd-8-2-RELEASE:1.1.1.8
	netbsd-9-0-RELEASE:1.1.1.10
	netbsd-9-0-RC2:1.1.1.10
	netbsd-9-0-RC1:1.1.1.10
	netbsd-9:1.1.1.10.0.2
	netbsd-9-base:1.1.1.10
	phil-wifi-20190609:1.1.1.10
	netbsd-8-1-RELEASE:1.1.1.8
	netbsd-8-1-RC1:1.1.1.8
	pgoyette-compat-merge-20190127:1.1.1.9.2.1
	pgoyette-compat-20190127:1.1.1.10
	pgoyette-compat-20190118:1.1.1.10
	pgoyette-compat-1226:1.1.1.10
	pgoyette-compat-1126:1.1.1.10
	pgoyette-compat-1020:1.1.1.10
	pgoyette-compat-0930:1.1.1.10
	pgoyette-compat-0906:1.1.1.10
	netbsd-7-2-RELEASE:1.1.1.5.2.1
	pgoyette-compat-0728:1.1.1.10
	clang-337282:1.1.1.10
	netbsd-8-0-RELEASE:1.1.1.8
	phil-wifi:1.1.1.9.0.4
	phil-wifi-base:1.1.1.9
	pgoyette-compat-0625:1.1.1.9
	netbsd-8-0-RC2:1.1.1.8
	pgoyette-compat-0521:1.1.1.9
	pgoyette-compat-0502:1.1.1.9
	pgoyette-compat-0422:1.1.1.9
	netbsd-8-0-RC1:1.1.1.8
	pgoyette-compat-0415:1.1.1.9
	pgoyette-compat-0407:1.1.1.9
	pgoyette-compat-0330:1.1.1.9
	pgoyette-compat-0322:1.1.1.9
	pgoyette-compat-0315:1.1.1.9
	netbsd-7-1-2-RELEASE:1.1.1.5.2.1
	pgoyette-compat:1.1.1.9.0.2
	pgoyette-compat-base:1.1.1.9
	netbsd-7-1-1-RELEASE:1.1.1.5.2.1
	clang-319952:1.1.1.9
	matt-nb8-mediatek:1.1.1.8.0.10
	matt-nb8-mediatek-base:1.1.1.8
	clang-309604:1.1.1.9
	perseant-stdc-iso10646:1.1.1.8.0.8
	perseant-stdc-iso10646-base:1.1.1.8
	netbsd-8:1.1.1.8.0.6
	netbsd-8-base:1.1.1.8
	prg-localcount2-base3:1.1.1.8
	prg-localcount2-base2:1.1.1.8
	prg-localcount2-base1:1.1.1.8
	prg-localcount2:1.1.1.8.0.4
	prg-localcount2-base:1.1.1.8
	pgoyette-localcount-20170426:1.1.1.8
	bouyer-socketcan-base1:1.1.1.8
	pgoyette-localcount-20170320:1.1.1.8
	netbsd-7-1:1.1.1.5.2.1.0.6
	netbsd-7-1-RELEASE:1.1.1.5.2.1
	netbsd-7-1-RC2:1.1.1.5.2.1
	clang-294123:1.1.1.8
	netbsd-7-nhusb-base-20170116:1.1.1.5.2.1
	bouyer-socketcan:1.1.1.8.0.2
	bouyer-socketcan-base:1.1.1.8
	clang-291444:1.1.1.8
	pgoyette-localcount-20170107:1.1.1.7
	netbsd-7-1-RC1:1.1.1.5.2.1
	pgoyette-localcount-20161104:1.1.1.7
	netbsd-7-0-2-RELEASE:1.1.1.5.2.1
	localcount-20160914:1.1.1.7
	netbsd-7-nhusb:1.1.1.5.2.1.0.4
	netbsd-7-nhusb-base:1.1.1.5.2.1
	clang-280599:1.1.1.7
	pgoyette-localcount-20160806:1.1.1.7
	pgoyette-localcount-20160726:1.1.1.7
	pgoyette-localcount:1.1.1.7.0.2
	pgoyette-localcount-base:1.1.1.7
	netbsd-7-0-1-RELEASE:1.1.1.5.2.1
	clang-261930:1.1.1.7
	netbsd-7-0:1.1.1.5.2.1.0.2
	netbsd-7-0-RELEASE:1.1.1.5.2.1
	netbsd-7-0-RC3:1.1.1.5.2.1
	netbsd-7-0-RC2:1.1.1.5.2.1
	netbsd-7-0-RC1:1.1.1.5.2.1
	clang-237755:1.1.1.6
	clang-232565:1.1.1.6
	clang-227398:1.1.1.6
	tls-maxphys-base:1.1.1.5
	tls-maxphys:1.1.1.5.0.4
	netbsd-7:1.1.1.5.0.2
	netbsd-7-base:1.1.1.5
	clang-215315:1.1.1.5
	clang-209886:1.1.1.5
	yamt-pagecache:1.1.1.4.0.4
	yamt-pagecache-base9:1.1.1.4
	tls-earlyentropy:1.1.1.4.0.2
	tls-earlyentropy-base:1.1.1.5
	riastradh-xf86-video-intel-2-7-1-pre-2-21-15:1.1.1.4
	riastradh-drm2-base3:1.1.1.4
	clang-202566:1.1.1.4
	clang-201163:1.1.1.4
	clang-199312:1.1.1.3
	clang-198450:1.1.1.2
	clang-196603:1.1.1.1
	clang-195771:1.1.1.1
	LLVM:1.1.1;
locks; strict;
comment	@// @;


1.1
date	2013.11.28.14.14.49;	author joerg;	state Exp;
branches
	1.1.1.1;
next	;
commitid	ow8OybrawrB1f3fx;

1.1.1.1
date	2013.11.28.14.14.49;	author joerg;	state Exp;
branches;
next	1.1.1.2;
commitid	ow8OybrawrB1f3fx;

1.1.1.2
date	2014.01.05.15.38.28;	author joerg;	state Exp;
branches;
next	1.1.1.3;
commitid	wh3aCSIWykURqWjx;

1.1.1.3
date	2014.01.15.21.26.19;	author joerg;	state Exp;
branches;
next	1.1.1.4;
commitid	NQXlzzA0SPkc5glx;

1.1.1.4
date	2014.02.14.20.07.06;	author joerg;	state Exp;
branches
	1.1.1.4.2.1
	1.1.1.4.4.1;
next	1.1.1.5;
commitid	annVkZ1sc17rF6px;

1.1.1.5
date	2014.05.30.18.14.40;	author joerg;	state Exp;
branches
	1.1.1.5.2.1
	1.1.1.5.4.1;
next	1.1.1.6;
commitid	8q0kdlBlCn09GACx;

1.1.1.6
date	2015.01.29.19.57.32;	author joerg;	state Exp;
branches;
next	1.1.1.7;
commitid	mlISSizlPKvepX7y;

1.1.1.7
date	2016.02.27.22.11.52;	author joerg;	state Exp;
branches
	1.1.1.7.2.1;
next	1.1.1.8;
commitid	tIimz3oDlh1NpBWy;

1.1.1.8
date	2017.01.11.10.35.42;	author joerg;	state Exp;
branches;
next	1.1.1.9;
commitid	CNnUNfII1jgNmxBz;

1.1.1.9
date	2017.08.01.19.34.57;	author joerg;	state Exp;
branches
	1.1.1.9.2.1
	1.1.1.9.4.1;
next	1.1.1.10;
commitid	pMuDy65V0VicSx1A;

1.1.1.10
date	2018.07.17.18.31.04;	author joerg;	state Exp;
branches;
next	1.1.1.11;
commitid	wDzL46ALjrCZgwKA;

1.1.1.11
date	2019.11.13.22.19.20;	author joerg;	state dead;
branches;
next	;
commitid	QD8YATxuNG34YJKB;

1.1.1.4.2.1
date	2014.08.10.07.08.07;	author tls;	state Exp;
branches;
next	;
commitid	t01A1TLTYxkpGMLx;

1.1.1.4.4.1
date	2014.02.14.20.07.06;	author yamt;	state dead;
branches;
next	1.1.1.4.4.2;
commitid	WSrDtL5nYAUyiyBx;

1.1.1.4.4.2
date	2014.05.22.16.18.26;	author yamt;	state Exp;
branches;
next	;
commitid	WSrDtL5nYAUyiyBx;

1.1.1.5.2.1
date	2015.06.04.20.04.27;	author snj;	state Exp;
branches;
next	;
commitid	yRnjq9fueSo6n9oy;

1.1.1.5.4.1
date	2014.05.30.18.14.40;	author tls;	state dead;
branches;
next	1.1.1.5.4.2;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.5.4.2
date	2014.08.19.23.47.26;	author tls;	state Exp;
branches;
next	;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.7.2.1
date	2017.03.20.06.52.36;	author pgoyette;	state Exp;
branches;
next	;
commitid	jjw7cAwgyKq7RfKz;

1.1.1.9.2.1
date	2018.07.28.04.33.16;	author pgoyette;	state Exp;
branches;
next	;
commitid	1UP1xAIUxv1ZgRLA;

1.1.1.9.4.1
date	2019.06.10.21.45.20;	author christos;	state Exp;
branches;
next	1.1.1.9.4.2;
commitid	jtc8rnCzWiEEHGqB;

1.1.1.9.4.2
date	2020.04.13.07.46.30;	author martin;	state dead;
branches;
next	;
commitid	X01YhRUPVUDaec4C;


desc
@@


1.1
log
@Initial revision
@
text
@//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Recursive parser implementation for the matcher expression grammar.
///
//===----------------------------------------------------------------------===//

#include <string>
#include <vector>

#include "clang/ASTMatchers/Dynamic/Parser.h"
#include "clang/ASTMatchers/Dynamic/Registry.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/Twine.h"

namespace clang {
namespace ast_matchers {
namespace dynamic {

/// \brief Simple structure to hold information for one token from the parser.
struct Parser::TokenInfo {
  /// \brief Different possible tokens.
  enum TokenKind {
    TK_Eof = 0,
    TK_OpenParen = 1,
    TK_CloseParen = 2,
    TK_Comma = 3,
    TK_Period = 4,
    TK_Literal = 5,
    TK_Ident = 6,
    TK_InvalidChar = 7,
    TK_Error = 8
  };

  /// \brief Some known identifiers.
  static const char* const ID_Bind;

  TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}

  StringRef Text;
  TokenKind Kind;
  SourceRange Range;
  VariantValue Value;
};

const char* const Parser::TokenInfo::ID_Bind = "bind";

/// \brief Simple tokenizer for the parser.
class Parser::CodeTokenizer {
public:
  explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) {
    NextToken = getNextToken();
  }

  /// \brief Returns but doesn't consume the next token.
  const TokenInfo &peekNextToken() const { return NextToken; }

  /// \brief Consumes and returns the next token.
  TokenInfo consumeNextToken() {
    TokenInfo ThisToken = NextToken;
    NextToken = getNextToken();
    return ThisToken;
  }

  TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }

private:
  TokenInfo getNextToken() {
    consumeWhitespace();
    TokenInfo Result;
    Result.Range.Start = currentLocation();

    if (Code.empty()) {
      Result.Kind = TokenInfo::TK_Eof;
      Result.Text = "";
      return Result;
    }

    switch (Code[0]) {
    case ',':
      Result.Kind = TokenInfo::TK_Comma;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case '.':
      Result.Kind = TokenInfo::TK_Period;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case '(':
      Result.Kind = TokenInfo::TK_OpenParen;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case ')':
      Result.Kind = TokenInfo::TK_CloseParen;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;

    case '"':
    case '\'':
      // Parse a string literal.
      consumeStringLiteral(&Result);
      break;

    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      // Parse an unsigned literal.
      consumeUnsignedLiteral(&Result);
      break;

    default:
      if (isAlphanumeric(Code[0])) {
        // Parse an identifier
        size_t TokenLength = 1;
        while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength]))
          ++TokenLength;
        Result.Kind = TokenInfo::TK_Ident;
        Result.Text = Code.substr(0, TokenLength);
        Code = Code.drop_front(TokenLength);
      } else {
        Result.Kind = TokenInfo::TK_InvalidChar;
        Result.Text = Code.substr(0, 1);
        Code = Code.drop_front(1);
      }
      break;
    }

    Result.Range.End = currentLocation();
    return Result;
  }

  /// \brief Consume an unsigned literal.
  void consumeUnsignedLiteral(TokenInfo *Result) {
    unsigned Length = 1;
    if (Code.size() > 1) {
      // Consume the 'x' or 'b' radix modifier, if present.
      switch (toLowercase(Code[1])) {
      case 'x': case 'b': Length = 2;
      }
    }
    while (Length < Code.size() && isHexDigit(Code[Length]))
      ++Length;

    Result->Text = Code.substr(0, Length);
    Code = Code.drop_front(Length);

    unsigned Value;
    if (!Result->Text.getAsInteger(0, Value)) {
      Result->Kind = TokenInfo::TK_Literal;
      Result->Value = Value;
    } else {
      SourceRange Range;
      Range.Start = Result->Range.Start;
      Range.End = currentLocation();
      Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
      Result->Kind = TokenInfo::TK_Error;
    }
  }

  /// \brief Consume a string literal.
  ///
  /// \c Code must be positioned at the start of the literal (the opening
  /// quote). Consumed until it finds the same closing quote character.
  void consumeStringLiteral(TokenInfo *Result) {
    bool InEscape = false;
    const char Marker = Code[0];
    for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
      if (InEscape) {
        InEscape = false;
        continue;
      }
      if (Code[Length] == '\\') {
        InEscape = true;
        continue;
      }
      if (Code[Length] == Marker) {
        Result->Kind = TokenInfo::TK_Literal;
        Result->Text = Code.substr(0, Length + 1);
        Result->Value = Code.substr(1, Length - 1).str();
        Code = Code.drop_front(Length + 1);
        return;
      }
    }

    StringRef ErrorText = Code;
    Code = Code.drop_front(Code.size());
    SourceRange Range;
    Range.Start = Result->Range.Start;
    Range.End = currentLocation();
    Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
    Result->Kind = TokenInfo::TK_Error;
  }

  /// \brief Consume all leading whitespace from \c Code.
  void consumeWhitespace() {
    while (!Code.empty() && isWhitespace(Code[0])) {
      if (Code[0] == '\n') {
        ++Line;
        StartOfLine = Code.drop_front();
      }
      Code = Code.drop_front();
    }
  }

  SourceLocation currentLocation() {
    SourceLocation Location;
    Location.Line = Line;
    Location.Column = Code.data() - StartOfLine.data() + 1;
    return Location;
  }

  StringRef Code;
  StringRef StartOfLine;
  unsigned Line;
  Diagnostics *Error;
  TokenInfo NextToken;
};

Parser::Sema::~Sema() {}

/// \brief Parse and validate a matcher expression.
/// \return \c true on success, in which case \c Value has the matcher parsed.
///   If the input is malformed, or some argument has an error, it
///   returns \c false.
bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
  const TokenInfo NameToken = Tokenizer->consumeNextToken();
  assert(NameToken.Kind == TokenInfo::TK_Ident);
  const TokenInfo OpenToken = Tokenizer->consumeNextToken();
  if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
    Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
        << OpenToken.Text;
    return false;
  }

  std::vector<ParserValue> Args;
  TokenInfo EndToken;
  while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
    if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
      // End of args.
      EndToken = Tokenizer->consumeNextToken();
      break;
    }
    if (Args.size() > 0) {
      // We must find a , token to continue.
      const TokenInfo CommaToken = Tokenizer->consumeNextToken();
      if (CommaToken.Kind != TokenInfo::TK_Comma) {
        Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
            << CommaToken.Text;
        return false;
      }
    }

    Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
                             NameToken.Text, NameToken.Range, Args.size() + 1);
    ParserValue ArgValue;
    ArgValue.Text = Tokenizer->peekNextToken().Text;
    ArgValue.Range = Tokenizer->peekNextToken().Range;
    if (!parseExpressionImpl(&ArgValue.Value)) return false;

    Args.push_back(ArgValue);
  }

  if (EndToken.Kind == TokenInfo::TK_Eof) {
    Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
    return false;
  }

  std::string BindID;
  if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
    // Parse .bind("foo")
    Tokenizer->consumeNextToken();  // consume the period.
    const TokenInfo BindToken = Tokenizer->consumeNextToken();
    const TokenInfo OpenToken = Tokenizer->consumeNextToken();
    const TokenInfo IDToken = Tokenizer->consumeNextToken();
    const TokenInfo CloseToken = Tokenizer->consumeNextToken();

    // TODO: We could use different error codes for each/some to be more
    //       explicit about the syntax error.
    if (BindToken.Kind != TokenInfo::TK_Ident ||
        BindToken.Text != TokenInfo::ID_Bind) {
      Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
      Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
      Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
      Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    BindID = IDToken.Value.getString();
  }

  // Merge the start and end infos.
  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
                           NameToken.Text, NameToken.Range);
  SourceRange MatcherRange = NameToken.Range;
  MatcherRange.End = EndToken.Range.End;
  VariantMatcher Result = S->actOnMatcherExpression(
      NameToken.Text, MatcherRange, BindID, Args, Error);
  if (Result.isNull()) return false;

  *Value = Result;
  return true;
}

/// \brief Parse an <Expresssion>
bool Parser::parseExpressionImpl(VariantValue *Value) {
  switch (Tokenizer->nextTokenKind()) {
  case TokenInfo::TK_Literal:
    *Value = Tokenizer->consumeNextToken().Value;
    return true;

  case TokenInfo::TK_Ident:
    return parseMatcherExpressionImpl(Value);

  case TokenInfo::TK_Eof:
    Error->addError(Tokenizer->consumeNextToken().Range,
                    Error->ET_ParserNoCode);
    return false;

  case TokenInfo::TK_Error:
    // This error was already reported by the tokenizer.
    return false;

  case TokenInfo::TK_OpenParen:
  case TokenInfo::TK_CloseParen:
  case TokenInfo::TK_Comma:
  case TokenInfo::TK_Period:
  case TokenInfo::TK_InvalidChar:
    const TokenInfo Token = Tokenizer->consumeNextToken();
    Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
    return false;
  }

  llvm_unreachable("Unknown token kind.");
}

Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
               Diagnostics *Error)
    : Tokenizer(Tokenizer), S(S), Error(Error) {}

class RegistrySema : public Parser::Sema {
public:
  virtual ~RegistrySema() {}
  VariantMatcher actOnMatcherExpression(StringRef MatcherName,
                                        const SourceRange &NameRange,
                                        StringRef BindID,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
    if (BindID.empty()) {
      return Registry::constructMatcher(MatcherName, NameRange, Args, Error);
    } else {
      return Registry::constructBoundMatcher(MatcherName, NameRange, BindID,
                                             Args, Error);
    }
  }
};

bool Parser::parseExpression(StringRef Code, VariantValue *Value,
                             Diagnostics *Error) {
  RegistrySema S;
  return parseExpression(Code, &S, Value, Error);
}

bool Parser::parseExpression(StringRef Code, Sema *S,
                             VariantValue *Value, Diagnostics *Error) {
  CodeTokenizer Tokenizer(Code, Error);
  if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
  if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
    Error->addError(Tokenizer.peekNextToken().Range,
                    Error->ET_ParserTrailingCode);
    return false;
  }
  return true;
}

llvm::Optional<DynTypedMatcher>
Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) {
  RegistrySema S;
  return parseMatcherExpression(Code, &S, Error);
}

llvm::Optional<DynTypedMatcher>
Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S,
                               Diagnostics *Error) {
  VariantValue Value;
  if (!parseExpression(Code, S, &Value, Error))
    return llvm::Optional<DynTypedMatcher>();
  if (!Value.isMatcher()) {
    Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
    return llvm::Optional<DynTypedMatcher>();
  }
  llvm::Optional<DynTypedMatcher> Result =
      Value.getMatcher().getSingleMatcher();
  if (!Result.hasValue()) {
    Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
        << Value.getTypeAsString();
  }
  return Result;
}

}  // namespace dynamic
}  // namespace ast_matchers
}  // namespace clang
@


1.1.1.1
log
@Import Clang 3.4rc1 r195771.
@
text
@@


1.1.1.2
log
@Import clang 3.5svn r198450.
@
text
@a20 1
#include "llvm/ADT/Optional.h"
a244 3
  llvm::Optional<MatcherCtor> Ctor =
      S->lookupMatcherCtor(NameToken.Text, NameToken.Range, Error);

a308 3
  if (!Ctor)
    return false;

d315 1
a315 1
      *Ctor, MatcherRange, BindID, Args, Error);
d361 1
a361 6
  llvm::Optional<MatcherCtor> lookupMatcherCtor(StringRef MatcherName,
                                                const SourceRange &NameRange,
                                                Diagnostics *Error) {
    return Registry::lookupMatcherCtor(MatcherName, NameRange, Error);
  }
  VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
d367 1
a367 1
      return Registry::constructMatcher(Ctor, NameRange, Args, Error);
d369 2
a370 2
      return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
                                             Error);
@


1.1.1.3
log
@Import Clang 3.5svn r199312
@
text
@d15 3
a22 2
#include <string>
#include <vector>
@


1.1.1.4
log
@Import Clang 3.5svn r201163.
@
text
@d31 9
a39 10
    TK_Eof,
    TK_OpenParen,
    TK_CloseParen,
    TK_Comma,
    TK_Period,
    TK_Literal,
    TK_Ident,
    TK_InvalidChar,
    TK_Error,
    TK_CodeCompletion
d59 1
a59 9
      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
        CodeCompletionLocation(0) {
    NextToken = getNextToken();
  }

  CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
                unsigned CodeCompletionOffset)
      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
        CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
a80 7
    if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
      Result.Kind = TokenInfo::TK_CodeCompletion;
      Result.Text = StringRef(CodeCompletionLocation, 0);
      CodeCompletionLocation = 0;
      return Result;
    }

d125 1
a125 13
        while (1) {
          // A code completion location in/immediately after an identifier will
          // cause the portion of the identifier before the code completion
          // location to become a code completion token.
          if (CodeCompletionLocation == Code.data() + TokenLength) {
            CodeCompletionLocation = 0;
            Result.Kind = TokenInfo::TK_CodeCompletion;
            Result.Text = Code.substr(0, TokenLength);
            Code = Code.drop_front(TokenLength);
            return Result;
          }
          if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
            break;
a126 1
        }
a226 1
  const char *CodeCompletionLocation;
a230 16
struct Parser::ScopedContextEntry {
  Parser *P;

  ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
    P->ContextStack.push_back(std::make_pair(C, 0u));
  }

  ~ScopedContextEntry() {
    P->ContextStack.pop_back();
  }

  void nextArg() {
    ++P->ContextStack.back().second;
  }
};

d247 1
d250 13
a262 18

  {
    ScopedContextEntry SCE(this, Ctor ? *Ctor : 0);

    while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
      if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
        // End of args.
        EndToken = Tokenizer->consumeNextToken();
        break;
      }
      if (Args.size() > 0) {
        // We must find a , token to continue.
        const TokenInfo CommaToken = Tokenizer->consumeNextToken();
        if (CommaToken.Kind != TokenInfo::TK_Comma) {
          Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
              << CommaToken.Text;
          return false;
        }
d264 1
d266 6
a271 9
      Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
                               NameToken.Text, NameToken.Range,
                               Args.size() + 1);
      ParserValue ArgValue;
      ArgValue.Text = Tokenizer->peekNextToken().Text;
      ArgValue.Range = Tokenizer->peekNextToken().Range;
      if (!parseExpressionImpl(&ArgValue.Value)) {
        return false;
      }
d273 1
a273 3
      Args.push_back(ArgValue);
      SCE.nextArg();
    }
a285 5
    if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
      addCompletion(BindToken, "bind(\"", "bind");
      return false;
    }

a327 33
// If the prefix of this completion matches the completion token, add it to
// Completions minus the prefix.
void Parser::addCompletion(const TokenInfo &CompToken, StringRef TypedText,
                           StringRef Decl) {
  if (TypedText.size() >= CompToken.Text.size() &&
      TypedText.substr(0, CompToken.Text.size()) == CompToken.Text) {
    Completions.push_back(
        MatcherCompletion(TypedText.substr(CompToken.Text.size()), Decl));
  }
}

void Parser::addExpressionCompletions() {
  const TokenInfo CompToken = Tokenizer->consumeNextToken();
  assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);

  // We cannot complete code if there is an invalid element on the context
  // stack.
  for (ContextStackTy::iterator I = ContextStack.begin(),
                                E = ContextStack.end();
       I != E; ++I) {
    if (!I->first)
      return;
  }

  std::vector<MatcherCompletion> RegCompletions =
      Registry::getCompletions(ContextStack);
  for (std::vector<MatcherCompletion>::iterator I = RegCompletions.begin(),
                                                E = RegCompletions.end();
       I != E; ++I) {
    addCompletion(CompToken, I->TypedText, I->MatcherDecl);
  }
}

a337 4
  case TokenInfo::TK_CodeCompletion:
    addExpressionCompletions();
    return false;

a403 12
std::vector<MatcherCompletion>
Parser::completeExpression(StringRef Code, unsigned CompletionOffset) {
  Diagnostics Error;
  CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
  RegistrySema S;
  Parser P(&Tokenizer, &S, &Error);
  VariantValue Dummy;
  P.parseExpressionImpl(&Dummy);

  return P.Completions;
}

@


1.1.1.4.2.1
log
@Rebase.
@
text
@d61 1
a61 1
        CodeCompletionLocation(nullptr) {
d93 1
a93 1
      CodeCompletionLocation = nullptr;
d146 1
a146 1
            CodeCompletionLocation = nullptr;
a260 4
VariantValue Parser::Sema::getNamedValue(StringRef Name) {
  return VariantValue();
}

a276 31
/// \brief Parse expressions that start with an identifier.
///
/// This function can parse named values and matchers.
/// In case of failure it will try to determine the user's intent to give
/// an appropriate error message.
bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
  const TokenInfo NameToken = Tokenizer->consumeNextToken();

  if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
    // Parse as a named value.
    if (const VariantValue NamedValue = S->getNamedValue(NameToken.Text)) {
      *Value = NamedValue;
      return true;
    }
    // If the syntax is correct and the name is not a matcher either, report
    // unknown named value.
    if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
         Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
         Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
        !S->lookupMatcherCtor(NameToken.Text)) {
      Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
          << NameToken.Text;
      return false;
    }
    // Otherwise, fallback to the matcher parser.
  }

  // Parse as a matcher expression.
  return parseMatcherExpressionImpl(NameToken, Value);
}

d281 2
a282 2
bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
                                        VariantValue *Value) {
d291 2
a292 8
  llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);

  if (!Ctor) {
    Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
        << NameToken.Text;
    // Do not return here. We need to continue to give completion suggestions.
  }

d297 1
a297 1
    ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
d428 1
a428 1
    return parseIdentifierPrefixImpl(Value);
d460 19
a478 15
Parser::RegistrySema::~RegistrySema() {}

llvm::Optional<MatcherCtor>
Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
  return Registry::lookupMatcherCtor(MatcherName);
}

VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
    MatcherCtor Ctor, const SourceRange &NameRange, StringRef BindID,
    ArrayRef<ParserValue> Args, Diagnostics *Error) {
  if (BindID.empty()) {
    return Registry::constructMatcher(Ctor, NameRange, Args, Error);
  } else {
    return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
                                           Error);
d480 1
a480 1
}
@


1.1.1.5
log
@Import Clang 3.5svn r209886.
@
text
@d61 1
a61 1
        CodeCompletionLocation(nullptr) {
d93 1
a93 1
      CodeCompletionLocation = nullptr;
d146 1
a146 1
            CodeCompletionLocation = nullptr;
a260 4
VariantValue Parser::Sema::getNamedValue(StringRef Name) {
  return VariantValue();
}

a276 31
/// \brief Parse expressions that start with an identifier.
///
/// This function can parse named values and matchers.
/// In case of failure it will try to determine the user's intent to give
/// an appropriate error message.
bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
  const TokenInfo NameToken = Tokenizer->consumeNextToken();

  if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
    // Parse as a named value.
    if (const VariantValue NamedValue = S->getNamedValue(NameToken.Text)) {
      *Value = NamedValue;
      return true;
    }
    // If the syntax is correct and the name is not a matcher either, report
    // unknown named value.
    if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
         Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
         Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
        !S->lookupMatcherCtor(NameToken.Text)) {
      Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
          << NameToken.Text;
      return false;
    }
    // Otherwise, fallback to the matcher parser.
  }

  // Parse as a matcher expression.
  return parseMatcherExpressionImpl(NameToken, Value);
}

d281 2
a282 2
bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
                                        VariantValue *Value) {
d291 2
a292 8
  llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);

  if (!Ctor) {
    Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
        << NameToken.Text;
    // Do not return here. We need to continue to give completion suggestions.
  }

d297 1
a297 1
    ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
d428 1
a428 1
    return parseIdentifierPrefixImpl(Value);
d460 19
a478 15
Parser::RegistrySema::~RegistrySema() {}

llvm::Optional<MatcherCtor>
Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
  return Registry::lookupMatcherCtor(MatcherName);
}

VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
    MatcherCtor Ctor, const SourceRange &NameRange, StringRef BindID,
    ArrayRef<ParserValue> Args, Diagnostics *Error) {
  if (BindID.empty()) {
    return Registry::constructMatcher(Ctor, NameRange, Args, Error);
  } else {
    return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
                                           Error);
d480 1
a480 1
}
@


1.1.1.5.2.1
log
@Update LLVM to 3.6.1, requested by joerg in ticket 824.
@
text
@a19 1
#include "llvm/Support/ManagedStatic.h"
d261 2
a262 8
std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
    llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
  return std::vector<ArgKind>();
}

std::vector<MatcherCompletion>
Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
  return std::vector<MatcherCompletion>();
d291 1
a291 3
    if (const VariantValue NamedValue =
            NamedValues ? NamedValues->lookup(NameToken.Text)
                        : VariantValue()) {
d382 1
a382 1
      addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
d430 6
a435 20
void Parser::addCompletion(const TokenInfo &CompToken,
                           const MatcherCompletion& Completion) {
  if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
      Completion.Specificity > 0) {
    Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
                             Completion.MatcherDecl, Completion.Specificity);
  }
}

std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
    ArrayRef<ArgKind> AcceptedTypes) {
  if (!NamedValues) return std::vector<MatcherCompletion>();
  std::vector<MatcherCompletion> Result;
  for (const auto &Entry : *NamedValues) {
    unsigned Specificity;
    if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
      std::string Decl =
          (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
      Result.emplace_back(Entry.getKey(), Decl, Specificity);
    }
a436 1
  return Result;
d452 6
a457 7
  auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
  for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
    addCompletion(CompToken, Completion);
  }

  for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
    addCompletion(CompToken, Completion);
a496 2
static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;

d498 2
a499 3
               const NamedValueMap *NamedValues, Diagnostics *Error)
    : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
      NamedValues(NamedValues), Error(Error) {}
d519 4
a522 8
std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
    ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
  return Registry::getAcceptedCompletionTypes(Context);
}

std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
    ArrayRef<ArgKind> AcceptedTypes) {
  return Registry::getMatcherCompletions(AcceptedTypes);
a525 1
                             const NamedValueMap *NamedValues,
d528 1
a528 2
  if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
    return false;
d538 1
a538 2
Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
                           const NamedValueMap *NamedValues) {
d541 2
a542 1
  Parser P(&Tokenizer, S, NamedValues, &Error);
d546 2
a547 7
  // Sort by specificity, then by name.
  std::sort(P.Completions.begin(), P.Completions.end(),
            [](const MatcherCompletion &A, const MatcherCompletion &B) {
    if (A.Specificity != B.Specificity)
      return A.Specificity > B.Specificity;
    return A.TypedText < B.TypedText;
  });
d549 4
a552 1
  return P.Completions;
d556 1
a556 2
Parser::parseMatcherExpression(StringRef Code, Sema *S,
                               const NamedValueMap *NamedValues,
d559 1
a559 1
  if (!parseExpression(Code, S, NamedValues, &Value, Error))
@


1.1.1.6
log
@Import Clang 3.6RC1 r227398.
@
text
@a19 1
#include "llvm/Support/ManagedStatic.h"
d261 2
a262 8
std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
    llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
  return std::vector<ArgKind>();
}

std::vector<MatcherCompletion>
Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
  return std::vector<MatcherCompletion>();
d291 1
a291 3
    if (const VariantValue NamedValue =
            NamedValues ? NamedValues->lookup(NameToken.Text)
                        : VariantValue()) {
d382 1
a382 1
      addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
d430 6
a435 20
void Parser::addCompletion(const TokenInfo &CompToken,
                           const MatcherCompletion& Completion) {
  if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
      Completion.Specificity > 0) {
    Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
                             Completion.MatcherDecl, Completion.Specificity);
  }
}

std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
    ArrayRef<ArgKind> AcceptedTypes) {
  if (!NamedValues) return std::vector<MatcherCompletion>();
  std::vector<MatcherCompletion> Result;
  for (const auto &Entry : *NamedValues) {
    unsigned Specificity;
    if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
      std::string Decl =
          (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
      Result.emplace_back(Entry.getKey(), Decl, Specificity);
    }
a436 1
  return Result;
d452 6
a457 7
  auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
  for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
    addCompletion(CompToken, Completion);
  }

  for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
    addCompletion(CompToken, Completion);
a496 2
static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;

d498 2
a499 3
               const NamedValueMap *NamedValues, Diagnostics *Error)
    : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
      NamedValues(NamedValues), Error(Error) {}
d519 4
a522 8
std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
    ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
  return Registry::getAcceptedCompletionTypes(Context);
}

std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
    ArrayRef<ArgKind> AcceptedTypes) {
  return Registry::getMatcherCompletions(AcceptedTypes);
a525 1
                             const NamedValueMap *NamedValues,
d528 1
a528 2
  if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
    return false;
d538 1
a538 2
Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
                           const NamedValueMap *NamedValues) {
d541 2
a542 1
  Parser P(&Tokenizer, S, NamedValues, &Error);
d546 2
a547 7
  // Sort by specificity, then by name.
  std::sort(P.Completions.begin(), P.Completions.end(),
            [](const MatcherCompletion &A, const MatcherCompletion &B) {
    if (A.Specificity != B.Specificity)
      return A.Specificity > B.Specificity;
    return A.TypedText < B.TypedText;
  });
d549 4
a552 1
  return P.Completions;
d556 1
a556 2
Parser::parseMatcherExpression(StringRef Code, Sema *S,
                               const NamedValueMap *NamedValues,
d559 1
a559 1
  if (!parseExpression(Code, S, NamedValues, &Value, Error))
@


1.1.1.7
log
@Import Clang 3.8.0rc3 r261930.
@
text
@d219 1
a219 1
        Result->Value = Code.substr(1, Length - 1);
d537 1
a537 1
    MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
@


1.1.1.7.2.1
log
@Sync with HEAD
@
text
@d19 1
@


1.1.1.8
log
@Import Clang pre-4.0.0 r291444.
@
text
@d19 1
@


1.1.1.9
log
@Import clang r309604 from branches/release_50
@
text
@d133 2
a134 2
      // Parse an unsigned and float literal.
      consumeNumberLiteral(&Result);
d156 2
a157 10
        if (TokenLength == 4 && Code.startswith("true")) {
          Result.Kind = TokenInfo::TK_Literal;
          Result.Value = true;
        } else if (TokenLength == 5 && Code.startswith("false")) {
          Result.Kind = TokenInfo::TK_Literal;
          Result.Value = false;
        } else {
          Result.Kind = TokenInfo::TK_Ident;
          Result.Text = Code.substr(0, TokenLength);
        }
d171 2
a172 3
  /// \brief Consume an unsigned and float literal.
  void consumeNumberLiteral(TokenInfo *Result) {
    bool isFloatingLiteral = false;
a182 11
    // Try to recognize a floating point literal.
    while (Length < Code.size()) {
      char c = Code[Length];
      if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
        isFloatingLiteral = true;
        Length++;
      } else {
        break;
      }
    }

d186 4
a189 10
    if (isFloatingLiteral) {
      char *end;
      errno = 0;
      std::string Text = Result->Text.str();
      double doubleValue = strtod(Text.c_str(), &end);
      if (*end == 0 && errno == 0) {
        Result->Kind = TokenInfo::TK_Literal;
        Result->Value = doubleValue;
        return;
      }
d191 5
a195 6
      unsigned Value;
      if (!Result->Text.getAsInteger(0, Value)) {
        Result->Kind = TokenInfo::TK_Literal;
        Result->Value = Value;
        return;
      }
a196 6

    SourceRange Range;
    Range.Start = Result->Range.Start;
    Range.End = currentLocation();
    Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
    Result->Kind = TokenInfo::TK_Error;
@


1.1.1.9.4.1
log
@Sync with HEAD
@
text
@d1 1
a1 1
//===- Parser.cpp - Matcher expression parser -----------------------------===//
d11 1
a11 1
/// Recursive parser implementation for the matcher expression grammar.
a15 2
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
a18 2
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
a19 5
#include <algorithm>
#include <cassert>
#include <cerrno>
#include <cstddef>
#include <cstdlib>
a20 1
#include <utility>
d27 1
a27 1
/// Simple structure to hold information for one token from the parser.
d29 1
a29 1
  /// Different possible tokens.
d43 1
a43 1
  /// Some known identifiers.
d46 1
a46 1
  TokenInfo() = default;
d49 1
a49 1
  TokenKind Kind = TK_Eof;
d56 1
a56 1
/// Simple tokenizer for the parser.
d60 2
a61 1
      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
d67 1
a67 1
      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
d72 1
a72 1
  /// Returns but doesn't consume the next token.
d75 1
a75 1
  /// Consumes and returns the next token.
d141 1
a141 1
        while (true) {
d179 1
a179 1
  /// Consume an unsigned and float literal.
d232 1
a232 1
  /// Consume a string literal.
d266 1
a266 1
  /// Consume all leading whitespace from \c Code.
d286 1
a286 1
  unsigned Line = 1;
d289 1
a289 1
  const char *CodeCompletionLocation = nullptr;
d292 1
a292 1
Parser::Sema::~Sema() = default;
d296 1
a296 1
  return {};
d301 1
a301 1
  return {};
d320 1
a320 1
/// Parse expressions that start with an identifier.
d353 1
a353 1
/// Parse and validate a matcher expression.
d387 1
a387 1
      if (!Args.empty()) {
d518 1
a518 1
/// Parse an <Expression>
d561 1
a561 1
Parser::RegistrySema::~RegistrySema() = default;
d613 2
a614 2
  llvm::sort(P.Completions.begin(), P.Completions.end(),
             [](const MatcherCompletion &A, const MatcherCompletion &B) {
d643 3
a645 3
} // namespace dynamic
} // namespace ast_matchers
} // namespace clang
@


1.1.1.9.4.2
log
@Mostly merge changes from HEAD upto 20200411
@
text
@@


1.1.1.9.2.1
log
@Sync with HEAD
@
text
@d1 1
a1 1
//===- Parser.cpp - Matcher expression parser -----------------------------===//
d11 1
a11 1
/// Recursive parser implementation for the matcher expression grammar.
a15 2
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
a18 2
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
a19 5
#include <algorithm>
#include <cassert>
#include <cerrno>
#include <cstddef>
#include <cstdlib>
a20 1
#include <utility>
d27 1
a27 1
/// Simple structure to hold information for one token from the parser.
d29 1
a29 1
  /// Different possible tokens.
d43 1
a43 1
  /// Some known identifiers.
d46 1
a46 1
  TokenInfo() = default;
d49 1
a49 1
  TokenKind Kind = TK_Eof;
d56 1
a56 1
/// Simple tokenizer for the parser.
d60 2
a61 1
      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
d67 1
a67 1
      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
d72 1
a72 1
  /// Returns but doesn't consume the next token.
d75 1
a75 1
  /// Consumes and returns the next token.
d141 1
a141 1
        while (true) {
d179 1
a179 1
  /// Consume an unsigned and float literal.
d232 1
a232 1
  /// Consume a string literal.
d266 1
a266 1
  /// Consume all leading whitespace from \c Code.
d286 1
a286 1
  unsigned Line = 1;
d289 1
a289 1
  const char *CodeCompletionLocation = nullptr;
d292 1
a292 1
Parser::Sema::~Sema() = default;
d296 1
a296 1
  return {};
d301 1
a301 1
  return {};
d320 1
a320 1
/// Parse expressions that start with an identifier.
d353 1
a353 1
/// Parse and validate a matcher expression.
d387 1
a387 1
      if (!Args.empty()) {
d518 1
a518 1
/// Parse an <Expression>
d561 1
a561 1
Parser::RegistrySema::~RegistrySema() = default;
d613 2
a614 2
  llvm::sort(P.Completions.begin(), P.Completions.end(),
             [](const MatcherCompletion &A, const MatcherCompletion &B) {
d643 3
a645 3
} // namespace dynamic
} // namespace ast_matchers
} // namespace clang
@


1.1.1.10
log
@Import clang r337282 from trunk
@
text
@d1 1
a1 1
//===- Parser.cpp - Matcher expression parser -----------------------------===//
d11 1
a11 1
/// Recursive parser implementation for the matcher expression grammar.
a15 2
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
a18 2
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
a19 5
#include <algorithm>
#include <cassert>
#include <cerrno>
#include <cstddef>
#include <cstdlib>
a20 1
#include <utility>
d27 1
a27 1
/// Simple structure to hold information for one token from the parser.
d29 1
a29 1
  /// Different possible tokens.
d43 1
a43 1
  /// Some known identifiers.
d46 1
a46 1
  TokenInfo() = default;
d49 1
a49 1
  TokenKind Kind = TK_Eof;
d56 1
a56 1
/// Simple tokenizer for the parser.
d60 2
a61 1
      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
d67 1
a67 1
      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
d72 1
a72 1
  /// Returns but doesn't consume the next token.
d75 1
a75 1
  /// Consumes and returns the next token.
d141 1
a141 1
        while (true) {
d179 1
a179 1
  /// Consume an unsigned and float literal.
d232 1
a232 1
  /// Consume a string literal.
d266 1
a266 1
  /// Consume all leading whitespace from \c Code.
d286 1
a286 1
  unsigned Line = 1;
d289 1
a289 1
  const char *CodeCompletionLocation = nullptr;
d292 1
a292 1
Parser::Sema::~Sema() = default;
d296 1
a296 1
  return {};
d301 1
a301 1
  return {};
d320 1
a320 1
/// Parse expressions that start with an identifier.
d353 1
a353 1
/// Parse and validate a matcher expression.
d387 1
a387 1
      if (!Args.empty()) {
d518 1
a518 1
/// Parse an <Expression>
d561 1
a561 1
Parser::RegistrySema::~RegistrySema() = default;
d613 2
a614 2
  llvm::sort(P.Completions.begin(), P.Completions.end(),
             [](const MatcherCompletion &A, const MatcherCompletion &B) {
d643 3
a645 3
} // namespace dynamic
} // namespace ast_matchers
} // namespace clang
@


1.1.1.11
log
@Mark old LLVM instance as dead.
@
text
@@


1.1.1.5.4.1
log
@file Parser.cpp was added on branch tls-maxphys on 2014-08-19 23:47:26 +0000
@
text
@d1 576
@


1.1.1.5.4.2
log
@Rebase to HEAD as of a few days ago.
@
text
@a0 576
//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Recursive parser implementation for the matcher expression grammar.
///
//===----------------------------------------------------------------------===//

#include "clang/ASTMatchers/Dynamic/Parser.h"
#include "clang/ASTMatchers/Dynamic/Registry.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
#include <string>
#include <vector>

namespace clang {
namespace ast_matchers {
namespace dynamic {

/// \brief Simple structure to hold information for one token from the parser.
struct Parser::TokenInfo {
  /// \brief Different possible tokens.
  enum TokenKind {
    TK_Eof,
    TK_OpenParen,
    TK_CloseParen,
    TK_Comma,
    TK_Period,
    TK_Literal,
    TK_Ident,
    TK_InvalidChar,
    TK_Error,
    TK_CodeCompletion
  };

  /// \brief Some known identifiers.
  static const char* const ID_Bind;

  TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}

  StringRef Text;
  TokenKind Kind;
  SourceRange Range;
  VariantValue Value;
};

const char* const Parser::TokenInfo::ID_Bind = "bind";

/// \brief Simple tokenizer for the parser.
class Parser::CodeTokenizer {
public:
  explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
        CodeCompletionLocation(nullptr) {
    NextToken = getNextToken();
  }

  CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
                unsigned CodeCompletionOffset)
      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
        CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
    NextToken = getNextToken();
  }

  /// \brief Returns but doesn't consume the next token.
  const TokenInfo &peekNextToken() const { return NextToken; }

  /// \brief Consumes and returns the next token.
  TokenInfo consumeNextToken() {
    TokenInfo ThisToken = NextToken;
    NextToken = getNextToken();
    return ThisToken;
  }

  TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }

private:
  TokenInfo getNextToken() {
    consumeWhitespace();
    TokenInfo Result;
    Result.Range.Start = currentLocation();

    if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
      Result.Kind = TokenInfo::TK_CodeCompletion;
      Result.Text = StringRef(CodeCompletionLocation, 0);
      CodeCompletionLocation = nullptr;
      return Result;
    }

    if (Code.empty()) {
      Result.Kind = TokenInfo::TK_Eof;
      Result.Text = "";
      return Result;
    }

    switch (Code[0]) {
    case ',':
      Result.Kind = TokenInfo::TK_Comma;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case '.':
      Result.Kind = TokenInfo::TK_Period;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case '(':
      Result.Kind = TokenInfo::TK_OpenParen;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case ')':
      Result.Kind = TokenInfo::TK_CloseParen;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;

    case '"':
    case '\'':
      // Parse a string literal.
      consumeStringLiteral(&Result);
      break;

    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      // Parse an unsigned literal.
      consumeUnsignedLiteral(&Result);
      break;

    default:
      if (isAlphanumeric(Code[0])) {
        // Parse an identifier
        size_t TokenLength = 1;
        while (1) {
          // A code completion location in/immediately after an identifier will
          // cause the portion of the identifier before the code completion
          // location to become a code completion token.
          if (CodeCompletionLocation == Code.data() + TokenLength) {
            CodeCompletionLocation = nullptr;
            Result.Kind = TokenInfo::TK_CodeCompletion;
            Result.Text = Code.substr(0, TokenLength);
            Code = Code.drop_front(TokenLength);
            return Result;
          }
          if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
            break;
          ++TokenLength;
        }
        Result.Kind = TokenInfo::TK_Ident;
        Result.Text = Code.substr(0, TokenLength);
        Code = Code.drop_front(TokenLength);
      } else {
        Result.Kind = TokenInfo::TK_InvalidChar;
        Result.Text = Code.substr(0, 1);
        Code = Code.drop_front(1);
      }
      break;
    }

    Result.Range.End = currentLocation();
    return Result;
  }

  /// \brief Consume an unsigned literal.
  void consumeUnsignedLiteral(TokenInfo *Result) {
    unsigned Length = 1;
    if (Code.size() > 1) {
      // Consume the 'x' or 'b' radix modifier, if present.
      switch (toLowercase(Code[1])) {
      case 'x': case 'b': Length = 2;
      }
    }
    while (Length < Code.size() && isHexDigit(Code[Length]))
      ++Length;

    Result->Text = Code.substr(0, Length);
    Code = Code.drop_front(Length);

    unsigned Value;
    if (!Result->Text.getAsInteger(0, Value)) {
      Result->Kind = TokenInfo::TK_Literal;
      Result->Value = Value;
    } else {
      SourceRange Range;
      Range.Start = Result->Range.Start;
      Range.End = currentLocation();
      Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
      Result->Kind = TokenInfo::TK_Error;
    }
  }

  /// \brief Consume a string literal.
  ///
  /// \c Code must be positioned at the start of the literal (the opening
  /// quote). Consumed until it finds the same closing quote character.
  void consumeStringLiteral(TokenInfo *Result) {
    bool InEscape = false;
    const char Marker = Code[0];
    for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
      if (InEscape) {
        InEscape = false;
        continue;
      }
      if (Code[Length] == '\\') {
        InEscape = true;
        continue;
      }
      if (Code[Length] == Marker) {
        Result->Kind = TokenInfo::TK_Literal;
        Result->Text = Code.substr(0, Length + 1);
        Result->Value = Code.substr(1, Length - 1).str();
        Code = Code.drop_front(Length + 1);
        return;
      }
    }

    StringRef ErrorText = Code;
    Code = Code.drop_front(Code.size());
    SourceRange Range;
    Range.Start = Result->Range.Start;
    Range.End = currentLocation();
    Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
    Result->Kind = TokenInfo::TK_Error;
  }

  /// \brief Consume all leading whitespace from \c Code.
  void consumeWhitespace() {
    while (!Code.empty() && isWhitespace(Code[0])) {
      if (Code[0] == '\n') {
        ++Line;
        StartOfLine = Code.drop_front();
      }
      Code = Code.drop_front();
    }
  }

  SourceLocation currentLocation() {
    SourceLocation Location;
    Location.Line = Line;
    Location.Column = Code.data() - StartOfLine.data() + 1;
    return Location;
  }

  StringRef Code;
  StringRef StartOfLine;
  unsigned Line;
  Diagnostics *Error;
  TokenInfo NextToken;
  const char *CodeCompletionLocation;
};

Parser::Sema::~Sema() {}

VariantValue Parser::Sema::getNamedValue(StringRef Name) {
  return VariantValue();
}

struct Parser::ScopedContextEntry {
  Parser *P;

  ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
    P->ContextStack.push_back(std::make_pair(C, 0u));
  }

  ~ScopedContextEntry() {
    P->ContextStack.pop_back();
  }

  void nextArg() {
    ++P->ContextStack.back().second;
  }
};

/// \brief Parse expressions that start with an identifier.
///
/// This function can parse named values and matchers.
/// In case of failure it will try to determine the user's intent to give
/// an appropriate error message.
bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
  const TokenInfo NameToken = Tokenizer->consumeNextToken();

  if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
    // Parse as a named value.
    if (const VariantValue NamedValue = S->getNamedValue(NameToken.Text)) {
      *Value = NamedValue;
      return true;
    }
    // If the syntax is correct and the name is not a matcher either, report
    // unknown named value.
    if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
         Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
         Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
        !S->lookupMatcherCtor(NameToken.Text)) {
      Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
          << NameToken.Text;
      return false;
    }
    // Otherwise, fallback to the matcher parser.
  }

  // Parse as a matcher expression.
  return parseMatcherExpressionImpl(NameToken, Value);
}

/// \brief Parse and validate a matcher expression.
/// \return \c true on success, in which case \c Value has the matcher parsed.
///   If the input is malformed, or some argument has an error, it
///   returns \c false.
bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
                                        VariantValue *Value) {
  assert(NameToken.Kind == TokenInfo::TK_Ident);
  const TokenInfo OpenToken = Tokenizer->consumeNextToken();
  if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
    Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
        << OpenToken.Text;
    return false;
  }

  llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);

  if (!Ctor) {
    Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
        << NameToken.Text;
    // Do not return here. We need to continue to give completion suggestions.
  }

  std::vector<ParserValue> Args;
  TokenInfo EndToken;

  {
    ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);

    while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
      if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
        // End of args.
        EndToken = Tokenizer->consumeNextToken();
        break;
      }
      if (Args.size() > 0) {
        // We must find a , token to continue.
        const TokenInfo CommaToken = Tokenizer->consumeNextToken();
        if (CommaToken.Kind != TokenInfo::TK_Comma) {
          Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
              << CommaToken.Text;
          return false;
        }
      }

      Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
                               NameToken.Text, NameToken.Range,
                               Args.size() + 1);
      ParserValue ArgValue;
      ArgValue.Text = Tokenizer->peekNextToken().Text;
      ArgValue.Range = Tokenizer->peekNextToken().Range;
      if (!parseExpressionImpl(&ArgValue.Value)) {
        return false;
      }

      Args.push_back(ArgValue);
      SCE.nextArg();
    }
  }

  if (EndToken.Kind == TokenInfo::TK_Eof) {
    Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
    return false;
  }

  std::string BindID;
  if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
    // Parse .bind("foo")
    Tokenizer->consumeNextToken();  // consume the period.
    const TokenInfo BindToken = Tokenizer->consumeNextToken();
    if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
      addCompletion(BindToken, "bind(\"", "bind");
      return false;
    }

    const TokenInfo OpenToken = Tokenizer->consumeNextToken();
    const TokenInfo IDToken = Tokenizer->consumeNextToken();
    const TokenInfo CloseToken = Tokenizer->consumeNextToken();

    // TODO: We could use different error codes for each/some to be more
    //       explicit about the syntax error.
    if (BindToken.Kind != TokenInfo::TK_Ident ||
        BindToken.Text != TokenInfo::ID_Bind) {
      Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
      Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
      Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
      Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    BindID = IDToken.Value.getString();
  }

  if (!Ctor)
    return false;

  // Merge the start and end infos.
  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
                           NameToken.Text, NameToken.Range);
  SourceRange MatcherRange = NameToken.Range;
  MatcherRange.End = EndToken.Range.End;
  VariantMatcher Result = S->actOnMatcherExpression(
      *Ctor, MatcherRange, BindID, Args, Error);
  if (Result.isNull()) return false;

  *Value = Result;
  return true;
}

// If the prefix of this completion matches the completion token, add it to
// Completions minus the prefix.
void Parser::addCompletion(const TokenInfo &CompToken, StringRef TypedText,
                           StringRef Decl) {
  if (TypedText.size() >= CompToken.Text.size() &&
      TypedText.substr(0, CompToken.Text.size()) == CompToken.Text) {
    Completions.push_back(
        MatcherCompletion(TypedText.substr(CompToken.Text.size()), Decl));
  }
}

void Parser::addExpressionCompletions() {
  const TokenInfo CompToken = Tokenizer->consumeNextToken();
  assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);

  // We cannot complete code if there is an invalid element on the context
  // stack.
  for (ContextStackTy::iterator I = ContextStack.begin(),
                                E = ContextStack.end();
       I != E; ++I) {
    if (!I->first)
      return;
  }

  std::vector<MatcherCompletion> RegCompletions =
      Registry::getCompletions(ContextStack);
  for (std::vector<MatcherCompletion>::iterator I = RegCompletions.begin(),
                                                E = RegCompletions.end();
       I != E; ++I) {
    addCompletion(CompToken, I->TypedText, I->MatcherDecl);
  }
}

/// \brief Parse an <Expresssion>
bool Parser::parseExpressionImpl(VariantValue *Value) {
  switch (Tokenizer->nextTokenKind()) {
  case TokenInfo::TK_Literal:
    *Value = Tokenizer->consumeNextToken().Value;
    return true;

  case TokenInfo::TK_Ident:
    return parseIdentifierPrefixImpl(Value);

  case TokenInfo::TK_CodeCompletion:
    addExpressionCompletions();
    return false;

  case TokenInfo::TK_Eof:
    Error->addError(Tokenizer->consumeNextToken().Range,
                    Error->ET_ParserNoCode);
    return false;

  case TokenInfo::TK_Error:
    // This error was already reported by the tokenizer.
    return false;

  case TokenInfo::TK_OpenParen:
  case TokenInfo::TK_CloseParen:
  case TokenInfo::TK_Comma:
  case TokenInfo::TK_Period:
  case TokenInfo::TK_InvalidChar:
    const TokenInfo Token = Tokenizer->consumeNextToken();
    Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
    return false;
  }

  llvm_unreachable("Unknown token kind.");
}

Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
               Diagnostics *Error)
    : Tokenizer(Tokenizer), S(S), Error(Error) {}

Parser::RegistrySema::~RegistrySema() {}

llvm::Optional<MatcherCtor>
Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
  return Registry::lookupMatcherCtor(MatcherName);
}

VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
    MatcherCtor Ctor, const SourceRange &NameRange, StringRef BindID,
    ArrayRef<ParserValue> Args, Diagnostics *Error) {
  if (BindID.empty()) {
    return Registry::constructMatcher(Ctor, NameRange, Args, Error);
  } else {
    return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
                                           Error);
  }
}

bool Parser::parseExpression(StringRef Code, VariantValue *Value,
                             Diagnostics *Error) {
  RegistrySema S;
  return parseExpression(Code, &S, Value, Error);
}

bool Parser::parseExpression(StringRef Code, Sema *S,
                             VariantValue *Value, Diagnostics *Error) {
  CodeTokenizer Tokenizer(Code, Error);
  if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
  if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
    Error->addError(Tokenizer.peekNextToken().Range,
                    Error->ET_ParserTrailingCode);
    return false;
  }
  return true;
}

std::vector<MatcherCompletion>
Parser::completeExpression(StringRef Code, unsigned CompletionOffset) {
  Diagnostics Error;
  CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
  RegistrySema S;
  Parser P(&Tokenizer, &S, &Error);
  VariantValue Dummy;
  P.parseExpressionImpl(&Dummy);

  return P.Completions;
}

llvm::Optional<DynTypedMatcher>
Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) {
  RegistrySema S;
  return parseMatcherExpression(Code, &S, Error);
}

llvm::Optional<DynTypedMatcher>
Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S,
                               Diagnostics *Error) {
  VariantValue Value;
  if (!parseExpression(Code, S, &Value, Error))
    return llvm::Optional<DynTypedMatcher>();
  if (!Value.isMatcher()) {
    Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
    return llvm::Optional<DynTypedMatcher>();
  }
  llvm::Optional<DynTypedMatcher> Result =
      Value.getMatcher().getSingleMatcher();
  if (!Result.hasValue()) {
    Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
        << Value.getTypeAsString();
  }
  return Result;
}

}  // namespace dynamic
}  // namespace ast_matchers
}  // namespace clang
@


1.1.1.4.4.1
log
@file Parser.cpp was added on branch yamt-pagecache on 2014-05-22 16:18:26 +0000
@
text
@d1 539
@


1.1.1.4.4.2
log
@sync with head.

for a reference, the tree before this commit was tagged
as yamt-pagecache-tag8.

this commit was splitted into small chunks to avoid
a limitation of cvs.  ("Protocol error: too many arguments")
@
text
@a0 539
//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Recursive parser implementation for the matcher expression grammar.
///
//===----------------------------------------------------------------------===//

#include "clang/ASTMatchers/Dynamic/Parser.h"
#include "clang/ASTMatchers/Dynamic/Registry.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
#include <string>
#include <vector>

namespace clang {
namespace ast_matchers {
namespace dynamic {

/// \brief Simple structure to hold information for one token from the parser.
struct Parser::TokenInfo {
  /// \brief Different possible tokens.
  enum TokenKind {
    TK_Eof,
    TK_OpenParen,
    TK_CloseParen,
    TK_Comma,
    TK_Period,
    TK_Literal,
    TK_Ident,
    TK_InvalidChar,
    TK_Error,
    TK_CodeCompletion
  };

  /// \brief Some known identifiers.
  static const char* const ID_Bind;

  TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}

  StringRef Text;
  TokenKind Kind;
  SourceRange Range;
  VariantValue Value;
};

const char* const Parser::TokenInfo::ID_Bind = "bind";

/// \brief Simple tokenizer for the parser.
class Parser::CodeTokenizer {
public:
  explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
        CodeCompletionLocation(0) {
    NextToken = getNextToken();
  }

  CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
                unsigned CodeCompletionOffset)
      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
        CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
    NextToken = getNextToken();
  }

  /// \brief Returns but doesn't consume the next token.
  const TokenInfo &peekNextToken() const { return NextToken; }

  /// \brief Consumes and returns the next token.
  TokenInfo consumeNextToken() {
    TokenInfo ThisToken = NextToken;
    NextToken = getNextToken();
    return ThisToken;
  }

  TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }

private:
  TokenInfo getNextToken() {
    consumeWhitespace();
    TokenInfo Result;
    Result.Range.Start = currentLocation();

    if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
      Result.Kind = TokenInfo::TK_CodeCompletion;
      Result.Text = StringRef(CodeCompletionLocation, 0);
      CodeCompletionLocation = 0;
      return Result;
    }

    if (Code.empty()) {
      Result.Kind = TokenInfo::TK_Eof;
      Result.Text = "";
      return Result;
    }

    switch (Code[0]) {
    case ',':
      Result.Kind = TokenInfo::TK_Comma;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case '.':
      Result.Kind = TokenInfo::TK_Period;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case '(':
      Result.Kind = TokenInfo::TK_OpenParen;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;
    case ')':
      Result.Kind = TokenInfo::TK_CloseParen;
      Result.Text = Code.substr(0, 1);
      Code = Code.drop_front();
      break;

    case '"':
    case '\'':
      // Parse a string literal.
      consumeStringLiteral(&Result);
      break;

    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      // Parse an unsigned literal.
      consumeUnsignedLiteral(&Result);
      break;

    default:
      if (isAlphanumeric(Code[0])) {
        // Parse an identifier
        size_t TokenLength = 1;
        while (1) {
          // A code completion location in/immediately after an identifier will
          // cause the portion of the identifier before the code completion
          // location to become a code completion token.
          if (CodeCompletionLocation == Code.data() + TokenLength) {
            CodeCompletionLocation = 0;
            Result.Kind = TokenInfo::TK_CodeCompletion;
            Result.Text = Code.substr(0, TokenLength);
            Code = Code.drop_front(TokenLength);
            return Result;
          }
          if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
            break;
          ++TokenLength;
        }
        Result.Kind = TokenInfo::TK_Ident;
        Result.Text = Code.substr(0, TokenLength);
        Code = Code.drop_front(TokenLength);
      } else {
        Result.Kind = TokenInfo::TK_InvalidChar;
        Result.Text = Code.substr(0, 1);
        Code = Code.drop_front(1);
      }
      break;
    }

    Result.Range.End = currentLocation();
    return Result;
  }

  /// \brief Consume an unsigned literal.
  void consumeUnsignedLiteral(TokenInfo *Result) {
    unsigned Length = 1;
    if (Code.size() > 1) {
      // Consume the 'x' or 'b' radix modifier, if present.
      switch (toLowercase(Code[1])) {
      case 'x': case 'b': Length = 2;
      }
    }
    while (Length < Code.size() && isHexDigit(Code[Length]))
      ++Length;

    Result->Text = Code.substr(0, Length);
    Code = Code.drop_front(Length);

    unsigned Value;
    if (!Result->Text.getAsInteger(0, Value)) {
      Result->Kind = TokenInfo::TK_Literal;
      Result->Value = Value;
    } else {
      SourceRange Range;
      Range.Start = Result->Range.Start;
      Range.End = currentLocation();
      Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
      Result->Kind = TokenInfo::TK_Error;
    }
  }

  /// \brief Consume a string literal.
  ///
  /// \c Code must be positioned at the start of the literal (the opening
  /// quote). Consumed until it finds the same closing quote character.
  void consumeStringLiteral(TokenInfo *Result) {
    bool InEscape = false;
    const char Marker = Code[0];
    for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
      if (InEscape) {
        InEscape = false;
        continue;
      }
      if (Code[Length] == '\\') {
        InEscape = true;
        continue;
      }
      if (Code[Length] == Marker) {
        Result->Kind = TokenInfo::TK_Literal;
        Result->Text = Code.substr(0, Length + 1);
        Result->Value = Code.substr(1, Length - 1).str();
        Code = Code.drop_front(Length + 1);
        return;
      }
    }

    StringRef ErrorText = Code;
    Code = Code.drop_front(Code.size());
    SourceRange Range;
    Range.Start = Result->Range.Start;
    Range.End = currentLocation();
    Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
    Result->Kind = TokenInfo::TK_Error;
  }

  /// \brief Consume all leading whitespace from \c Code.
  void consumeWhitespace() {
    while (!Code.empty() && isWhitespace(Code[0])) {
      if (Code[0] == '\n') {
        ++Line;
        StartOfLine = Code.drop_front();
      }
      Code = Code.drop_front();
    }
  }

  SourceLocation currentLocation() {
    SourceLocation Location;
    Location.Line = Line;
    Location.Column = Code.data() - StartOfLine.data() + 1;
    return Location;
  }

  StringRef Code;
  StringRef StartOfLine;
  unsigned Line;
  Diagnostics *Error;
  TokenInfo NextToken;
  const char *CodeCompletionLocation;
};

Parser::Sema::~Sema() {}

struct Parser::ScopedContextEntry {
  Parser *P;

  ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
    P->ContextStack.push_back(std::make_pair(C, 0u));
  }

  ~ScopedContextEntry() {
    P->ContextStack.pop_back();
  }

  void nextArg() {
    ++P->ContextStack.back().second;
  }
};

/// \brief Parse and validate a matcher expression.
/// \return \c true on success, in which case \c Value has the matcher parsed.
///   If the input is malformed, or some argument has an error, it
///   returns \c false.
bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
  const TokenInfo NameToken = Tokenizer->consumeNextToken();
  assert(NameToken.Kind == TokenInfo::TK_Ident);
  const TokenInfo OpenToken = Tokenizer->consumeNextToken();
  if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
    Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
        << OpenToken.Text;
    return false;
  }

  llvm::Optional<MatcherCtor> Ctor =
      S->lookupMatcherCtor(NameToken.Text, NameToken.Range, Error);
  std::vector<ParserValue> Args;
  TokenInfo EndToken;

  {
    ScopedContextEntry SCE(this, Ctor ? *Ctor : 0);

    while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
      if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
        // End of args.
        EndToken = Tokenizer->consumeNextToken();
        break;
      }
      if (Args.size() > 0) {
        // We must find a , token to continue.
        const TokenInfo CommaToken = Tokenizer->consumeNextToken();
        if (CommaToken.Kind != TokenInfo::TK_Comma) {
          Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
              << CommaToken.Text;
          return false;
        }
      }

      Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
                               NameToken.Text, NameToken.Range,
                               Args.size() + 1);
      ParserValue ArgValue;
      ArgValue.Text = Tokenizer->peekNextToken().Text;
      ArgValue.Range = Tokenizer->peekNextToken().Range;
      if (!parseExpressionImpl(&ArgValue.Value)) {
        return false;
      }

      Args.push_back(ArgValue);
      SCE.nextArg();
    }
  }

  if (EndToken.Kind == TokenInfo::TK_Eof) {
    Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
    return false;
  }

  std::string BindID;
  if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
    // Parse .bind("foo")
    Tokenizer->consumeNextToken();  // consume the period.
    const TokenInfo BindToken = Tokenizer->consumeNextToken();
    if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
      addCompletion(BindToken, "bind(\"", "bind");
      return false;
    }

    const TokenInfo OpenToken = Tokenizer->consumeNextToken();
    const TokenInfo IDToken = Tokenizer->consumeNextToken();
    const TokenInfo CloseToken = Tokenizer->consumeNextToken();

    // TODO: We could use different error codes for each/some to be more
    //       explicit about the syntax error.
    if (BindToken.Kind != TokenInfo::TK_Ident ||
        BindToken.Text != TokenInfo::ID_Bind) {
      Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
      Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
      Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
      Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
      return false;
    }
    BindID = IDToken.Value.getString();
  }

  if (!Ctor)
    return false;

  // Merge the start and end infos.
  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
                           NameToken.Text, NameToken.Range);
  SourceRange MatcherRange = NameToken.Range;
  MatcherRange.End = EndToken.Range.End;
  VariantMatcher Result = S->actOnMatcherExpression(
      *Ctor, MatcherRange, BindID, Args, Error);
  if (Result.isNull()) return false;

  *Value = Result;
  return true;
}

// If the prefix of this completion matches the completion token, add it to
// Completions minus the prefix.
void Parser::addCompletion(const TokenInfo &CompToken, StringRef TypedText,
                           StringRef Decl) {
  if (TypedText.size() >= CompToken.Text.size() &&
      TypedText.substr(0, CompToken.Text.size()) == CompToken.Text) {
    Completions.push_back(
        MatcherCompletion(TypedText.substr(CompToken.Text.size()), Decl));
  }
}

void Parser::addExpressionCompletions() {
  const TokenInfo CompToken = Tokenizer->consumeNextToken();
  assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);

  // We cannot complete code if there is an invalid element on the context
  // stack.
  for (ContextStackTy::iterator I = ContextStack.begin(),
                                E = ContextStack.end();
       I != E; ++I) {
    if (!I->first)
      return;
  }

  std::vector<MatcherCompletion> RegCompletions =
      Registry::getCompletions(ContextStack);
  for (std::vector<MatcherCompletion>::iterator I = RegCompletions.begin(),
                                                E = RegCompletions.end();
       I != E; ++I) {
    addCompletion(CompToken, I->TypedText, I->MatcherDecl);
  }
}

/// \brief Parse an <Expresssion>
bool Parser::parseExpressionImpl(VariantValue *Value) {
  switch (Tokenizer->nextTokenKind()) {
  case TokenInfo::TK_Literal:
    *Value = Tokenizer->consumeNextToken().Value;
    return true;

  case TokenInfo::TK_Ident:
    return parseMatcherExpressionImpl(Value);

  case TokenInfo::TK_CodeCompletion:
    addExpressionCompletions();
    return false;

  case TokenInfo::TK_Eof:
    Error->addError(Tokenizer->consumeNextToken().Range,
                    Error->ET_ParserNoCode);
    return false;

  case TokenInfo::TK_Error:
    // This error was already reported by the tokenizer.
    return false;

  case TokenInfo::TK_OpenParen:
  case TokenInfo::TK_CloseParen:
  case TokenInfo::TK_Comma:
  case TokenInfo::TK_Period:
  case TokenInfo::TK_InvalidChar:
    const TokenInfo Token = Tokenizer->consumeNextToken();
    Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
    return false;
  }

  llvm_unreachable("Unknown token kind.");
}

Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
               Diagnostics *Error)
    : Tokenizer(Tokenizer), S(S), Error(Error) {}

class RegistrySema : public Parser::Sema {
public:
  virtual ~RegistrySema() {}
  llvm::Optional<MatcherCtor> lookupMatcherCtor(StringRef MatcherName,
                                                const SourceRange &NameRange,
                                                Diagnostics *Error) {
    return Registry::lookupMatcherCtor(MatcherName, NameRange, Error);
  }
  VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
                                        const SourceRange &NameRange,
                                        StringRef BindID,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
    if (BindID.empty()) {
      return Registry::constructMatcher(Ctor, NameRange, Args, Error);
    } else {
      return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
                                             Error);
    }
  }
};

bool Parser::parseExpression(StringRef Code, VariantValue *Value,
                             Diagnostics *Error) {
  RegistrySema S;
  return parseExpression(Code, &S, Value, Error);
}

bool Parser::parseExpression(StringRef Code, Sema *S,
                             VariantValue *Value, Diagnostics *Error) {
  CodeTokenizer Tokenizer(Code, Error);
  if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
  if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
    Error->addError(Tokenizer.peekNextToken().Range,
                    Error->ET_ParserTrailingCode);
    return false;
  }
  return true;
}

std::vector<MatcherCompletion>
Parser::completeExpression(StringRef Code, unsigned CompletionOffset) {
  Diagnostics Error;
  CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
  RegistrySema S;
  Parser P(&Tokenizer, &S, &Error);
  VariantValue Dummy;
  P.parseExpressionImpl(&Dummy);

  return P.Completions;
}

llvm::Optional<DynTypedMatcher>
Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) {
  RegistrySema S;
  return parseMatcherExpression(Code, &S, Error);
}

llvm::Optional<DynTypedMatcher>
Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S,
                               Diagnostics *Error) {
  VariantValue Value;
  if (!parseExpression(Code, S, &Value, Error))
    return llvm::Optional<DynTypedMatcher>();
  if (!Value.isMatcher()) {
    Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
    return llvm::Optional<DynTypedMatcher>();
  }
  llvm::Optional<DynTypedMatcher> Result =
      Value.getMatcher().getSingleMatcher();
  if (!Result.hasValue()) {
    Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
        << Value.getTypeAsString();
  }
  return Result;
}

}  // namespace dynamic
}  // namespace ast_matchers
}  // namespace clang
@