diff options
Diffstat (limited to 'scripts/qapi/parser.py')
-rw-r--r-- | scripts/qapi/parser.py | 232 |
1 files changed, 173 insertions, 59 deletions
diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py index ca5e8e18e0..f03ba2cfec 100644 --- a/scripts/qapi/parser.py +++ b/scripts/qapi/parser.py @@ -17,14 +17,26 @@ from collections import OrderedDict import os import re - +from typing import ( + Dict, + List, + Optional, + Set, + Union, +) + +from .common import must_match from .error import QAPISemError, QAPISourceError from .source import QAPISourceInfo +# Return value alias for get_expr(). +_ExprValue = Union[List[object], Dict[str, object], str, bool] + + class QAPIParseError(QAPISourceError): """Error class for all QAPI schema parsing errors.""" - def __init__(self, parser, msg): + def __init__(self, parser: 'QAPISchemaParser', msg: str): col = 1 for ch in parser.src[parser.line_pos:parser.pos]: if ch == '\t': @@ -35,31 +47,69 @@ class QAPIParseError(QAPISourceError): class QAPISchemaParser: + """ + Parse QAPI schema source. - def __init__(self, fname, previously_included=None, incl_info=None): - previously_included = previously_included or set() - previously_included.add(os.path.abspath(fname)) + Parse a JSON-esque schema file and process directives. See + qapi-code-gen.txt section "Schema Syntax" for the exact syntax. + Grammatical validation is handled later by `expr.check_exprs()`. - try: - fp = open(fname, 'r', encoding='utf-8') - self.src = fp.read() - except IOError as e: - raise QAPISemError(incl_info or QAPISourceInfo(None, None, None), - "can't read %s file '%s': %s" - % ("include" if incl_info else "schema", - fname, - e.strerror)) + :param fname: Source file name. + :param previously_included: + The absolute names of previously included source files, + if being invoked from another parser. + :param incl_info: + `QAPISourceInfo` belonging to the parent module. + ``None`` implies this is the root module. - if self.src == '' or self.src[-1] != '\n': - self.src += '\n' + :ivar exprs: Resulting parsed expressions. + :ivar docs: Resulting parsed documentation blocks. + + :raise OSError: For problems reading the root schema document. + :raise QAPIError: For errors in the schema source. + """ + def __init__(self, + fname: str, + previously_included: Optional[Set[str]] = None, + incl_info: Optional[QAPISourceInfo] = None): + self._fname = fname + self._included = previously_included or set() + self._included.add(os.path.abspath(self._fname)) + self.src = '' + + # Lexer state (see `accept` for details): + self.info = QAPISourceInfo(self._fname, incl_info) + self.tok: Union[None, str] = None + self.pos = 0 self.cursor = 0 - self.info = QAPISourceInfo(fname, 1, incl_info) + self.val: Optional[Union[bool, str]] = None self.line_pos = 0 - self.exprs = [] - self.docs = [] - self.accept() + + # Parser output: + self.exprs: List[Dict[str, object]] = [] + self.docs: List[QAPIDoc] = [] + + # Showtime! + self._parse() + + def _parse(self) -> None: + """ + Parse the QAPI schema document. + + :return: None. Results are stored in ``.exprs`` and ``.docs``. + """ cur_doc = None + # May raise OSError; allow the caller to handle it. + with open(self._fname, 'r', encoding='utf-8') as fp: + self.src = fp.read() + if self.src == '' or self.src[-1] != '\n': + self.src += '\n' + + # Prime the lexer: + self.accept() + + # Parse until done: while self.tok is not None: info = self.info if self.tok == '#': @@ -68,7 +118,11 @@ class QAPISchemaParser: self.docs.append(cur_doc) continue - expr = self.get_expr(False) + expr = self.get_expr() + if not isinstance(expr, dict): + raise QAPISemError( + info, "top-level expression must be an object") + if 'include' in expr: self.reject_expr_doc(cur_doc) if len(expr) != 1: @@ -77,12 +131,12 @@ class QAPISchemaParser: if not isinstance(include, str): raise QAPISemError(info, "value of 'include' must be a string") - incl_fname = os.path.join(os.path.dirname(fname), + incl_fname = os.path.join(os.path.dirname(self._fname), include) self.exprs.append({'expr': {'include': incl_fname}, 'info': info}) exprs_include = self._include(include, info, incl_fname, - previously_included) + self._included) if exprs_include: self.exprs.extend(exprs_include.exprs) self.docs.extend(exprs_include.docs) @@ -109,17 +163,22 @@ class QAPISchemaParser: self.reject_expr_doc(cur_doc) @staticmethod - def reject_expr_doc(doc): + def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: if doc and doc.symbol: raise QAPISemError( doc.info, "documentation for '%s' is not followed by the definition" % doc.symbol) - def _include(self, include, info, incl_fname, previously_included): + @staticmethod + def _include(include: str, + info: QAPISourceInfo, + incl_fname: str, + previously_included: Set[str] + ) -> Optional['QAPISchemaParser']: incl_abs_fname = os.path.abspath(incl_fname) # catch inclusion cycle - inf = info + inf: Optional[QAPISourceInfo] = info while inf: if incl_abs_fname == os.path.abspath(inf.fname): raise QAPISemError(info, "inclusion loop for %s" % include) @@ -129,34 +188,86 @@ class QAPISchemaParser: if incl_abs_fname in previously_included: return None - return QAPISchemaParser(incl_fname, previously_included, info) - - def _check_pragma_list_of_str(self, name, value, info): - if (not isinstance(value, list) - or any([not isinstance(elt, str) for elt in value])): + try: + return QAPISchemaParser(incl_fname, previously_included, info) + except OSError as err: raise QAPISemError( info, - "pragma %s must be a list of strings" % name) + f"can't read include file '{incl_fname}': {err.strerror}" + ) from err + + @staticmethod + def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: + + def check_list_str(name: str, value: object) -> List[str]: + if (not isinstance(value, list) or + any(not isinstance(elt, str) for elt in value)): + raise QAPISemError( + info, + "pragma %s must be a list of strings" % name) + return value + + pragma = info.pragma - def _pragma(self, name, value, info): if name == 'doc-required': if not isinstance(value, bool): raise QAPISemError(info, "pragma 'doc-required' must be boolean") - info.pragma.doc_required = value + pragma.doc_required = value elif name == 'command-name-exceptions': - self._check_pragma_list_of_str(name, value, info) - info.pragma.command_name_exceptions = value + pragma.command_name_exceptions = check_list_str(name, value) elif name == 'command-returns-exceptions': - self._check_pragma_list_of_str(name, value, info) - info.pragma.command_returns_exceptions = value + pragma.command_returns_exceptions = check_list_str(name, value) elif name == 'member-name-exceptions': - self._check_pragma_list_of_str(name, value, info) - info.pragma.member_name_exceptions = value + pragma.member_name_exceptions = check_list_str(name, value) else: raise QAPISemError(info, "unknown pragma '%s'" % name) - def accept(self, skip_comment=True): + def accept(self, skip_comment: bool = True) -> None: + """ + Read and store the next token. + + :param skip_comment: + When false, return COMMENT tokens ("#"). + This is used when reading documentation blocks. + + :return: + None. Several instance attributes are updated instead: + + - ``.tok`` represents the token type. See below for values. + - ``.info`` describes the token's source location. + - ``.val`` is the token's value, if any. See below. + - ``.pos`` is the buffer index of the first character of + the token. + + * Single-character tokens: + + These are "{", "}", ":", ",", "[", and "]". + ``.tok`` holds the single character and ``.val`` is None. + + * Multi-character tokens: + + * COMMENT: + + This token is not normally returned by the lexer, but it can + be when ``skip_comment`` is False. ``.tok`` is "#", and + ``.val`` is a string including all chars until end-of-line, + including the "#" itself. + + * STRING: + + ``.tok`` is "'", the single quote. ``.val`` contains the + string, excluding the surrounding quotes. + + * TRUE and FALSE: + + ``.tok`` is either "t" or "f", ``.val`` will be the + corresponding bool value. + + * EOF: + + ``.tok`` and ``.val`` will both be None at EOF. + """ while True: self.tok = self.src[self.cursor] self.pos = self.cursor @@ -216,12 +327,12 @@ class QAPISchemaParser: elif not self.tok.isspace(): # Show up to next structural, whitespace or quote # character - match = re.match('[^[\\]{}:,\\s\'"]+', - self.src[self.cursor-1:]) + match = must_match('[^[\\]{}:,\\s\'"]+', + self.src[self.cursor-1:]) raise QAPIParseError(self, "stray '%s'" % match.group(0)) - def get_members(self): - expr = OrderedDict() + def get_members(self) -> Dict[str, object]: + expr: Dict[str, object] = OrderedDict() if self.tok == '}': self.accept() return expr @@ -229,13 +340,15 @@ class QAPISchemaParser: raise QAPIParseError(self, "expected string or '}'") while True: key = self.val + assert isinstance(key, str) # Guaranteed by tok == "'" + self.accept() if self.tok != ':': raise QAPIParseError(self, "expected ':'") self.accept() if key in expr: raise QAPIParseError(self, "duplicate key '%s'" % key) - expr[key] = self.get_expr(True) + expr[key] = self.get_expr() if self.tok == '}': self.accept() return expr @@ -245,16 +358,16 @@ class QAPISchemaParser: if self.tok != "'": raise QAPIParseError(self, "expected string") - def get_values(self): - expr = [] + def get_values(self) -> List[object]: + expr: List[object] = [] if self.tok == ']': self.accept() return expr - if self.tok not in "{['tf": + if self.tok not in tuple("{['tf"): raise QAPIParseError( self, "expected '{', '[', ']', string, or boolean") while True: - expr.append(self.get_expr(True)) + expr.append(self.get_expr()) if self.tok == ']': self.accept() return expr @@ -262,16 +375,16 @@ class QAPISchemaParser: raise QAPIParseError(self, "expected ',' or ']'") self.accept() - def get_expr(self, nested): - if self.tok != '{' and not nested: - raise QAPIParseError(self, "expected '{'") + def get_expr(self) -> _ExprValue: + expr: _ExprValue if self.tok == '{': self.accept() expr = self.get_members() elif self.tok == '[': self.accept() expr = self.get_values() - elif self.tok in "'tf": + elif self.tok in tuple("'tf"): + assert isinstance(self.val, (str, bool)) expr = self.val self.accept() else: @@ -279,7 +392,7 @@ class QAPISchemaParser: self, "expected '{', '[', string, or boolean") return expr - def get_doc(self, info): + def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']: if self.val != '##': raise QAPIParseError( self, "junk after '##' at start of documentation comment") @@ -288,6 +401,7 @@ class QAPISchemaParser: cur_doc = QAPIDoc(self, info) self.accept(False) while self.tok == '#': + assert isinstance(self.val, str) if self.val.startswith('##'): # End of doc comment if self.val != '##': @@ -346,7 +460,7 @@ class QAPIDoc: # Strip leading spaces corresponding to the expected indent level # Blank lines are always OK. if line: - indent = re.match(r'\s*', line).end() + indent = must_match(r'\s*', line).end() if indent < self._indent: raise QAPIParseError( self._parser, @@ -482,7 +596,7 @@ class QAPIDoc: # from line and replace it with spaces so that 'f' has the # same index as it did in the original line and can be # handled the same way we will handle following lines. - indent = re.match(r'@\S*:\s*', line).end() + indent = must_match(r'@\S*:\s*', line).end() line = line[indent:] if not line: # Line was just the "@arg:" header; following lines @@ -517,7 +631,7 @@ class QAPIDoc: # from line and replace it with spaces so that 'f' has the # same index as it did in the original line and can be # handled the same way we will handle following lines. - indent = re.match(r'@\S*:\s*', line).end() + indent = must_match(r'@\S*:\s*', line).end() line = line[indent:] if not line: # Line was just the "@arg:" header; following lines @@ -563,7 +677,7 @@ class QAPIDoc: # from line and replace it with spaces so that 'f' has the # same index as it did in the original line and can be # handled the same way we will handle following lines. - indent = re.match(r'\S*:\s*', line).end() + indent = must_match(r'\S*:\s*', line).end() line = line[indent:] if not line: # Line was just the "Section:" header; following lines |