"""
:mod:`sqlparams` is a utility package for converting between various SQL
parameter styles.
"""
from __future__ import annotations
import re
from typing import (
Any,
Dict, # Replaced by `dict` in 3.9.
Iterable, # Replaced by `collections.abc.Iterable` in 3.9.
List, # Replaced by `list` in 3.9.
Optional, # Replaced by `X | None` in 3.10.
Pattern, # Replaced by `re.Pattern` in 3.9.
Sequence, # Replaced by `collections.abc.Sequence` in 3.9.
Tuple, # Replaced by `tuple` in 3.9.
Type, # Replaced by `type` in 3.9.
Union) # Replaced by `X | Y` in 3.10.
from . import _converting
from . import _styles
from . import _util
from .typing import (
TSqlStr)
from ._meta import (
__author__,
__copyright__,
__credits__,
__license__,
__version__)
_BYTES_ENCODING = 'latin1'
"""
The encoding to use when parsing a byte query string.
"""
DEFAULT_COMMENTS: Sequence[Union[str, Tuple[str, str]]] = (
("/*", "*/"),
"--",
)
"""
The default comment styles to strip. This strips single line comments beginning
with ``"--"`` and multiline comments beginning with ``"/*"`` and ending with
``"*/"``.
"""
[docs]
class SQLParams(object):
"""
The :class:`.SQLParams` class is used to support named parameters in
SQL queries where they are not otherwise supported (e.g., pyodbc).
This is done by converting from one parameter style query to another
parameter style query.
By default, when converting to a numeric or ordinal style any
:class:`tuple` parameter will be expanded into "(?,?,...)" to support
the widely used "IN {tuple}" SQL expression without leaking any
unescaped values.
"""
[docs]
def __init__(
self,
in_style: str,
out_style: str,
escape_char: Union[str, bool, None] = None,
expand_tuples: Optional[bool] = None,
strip_comments: Union[Sequence[Union[str, Tuple[str, str]]], bool, None] = None,
allow_out_quotes: Optional[bool] = None,
) -> None:
"""
Instantiates the :class:`.SQLParams` instance.
*in_style* (:class:`str`) is the parameter style that will be used in an SQL
query before being parsed and converted to :attr:`.SQLParams.out_style`.
*out_style* (:class:`str`) is the parameter style that the SQL query will be
converted to.
*escape_char* (:class:`str`, :class:`bool`, or ``None``) is the escape
character used to prevent matching an in-style parameter. If ``True``, use
the default escape character (repeat the initial character to escape it;
e.g., "%%"). If ``False``, do not use an escape character. Default is
``None`` for ``False``.
*expand_tuples* (:class:`bool` or ``None``) is whether to expand tuples into
a sequence of parameters. Default is ``None`` to let it be determined by
*out_style* (to maintain backward compatibility). If *out_style* is a
numeric or ordinal style, expand tuples by default (``True``). If
*out_style* is a named style, do not expand tuples by default (``False``).
.. NOTE:: Empty tuples will be safely expanded to ``(NULL)`` to prevent SQL
syntax errors.
*strip_comments* (:class:`~collections.abc.Sequence`, :class:`bool`, or
``None``) whether to strip out comments and what style of comments to
remove. If a :class:`~collections.abc.Sequence`, this defines the comment
styles. A single line comment is defined using a :class:`str` (e.g., ``"--"``
or ``"#"``). A multiline comment is defined using a :class:`tuple` of
:class:`str` (e.g., ``("/*", "*/")``). In order for a comment to be matched,
it must be the first string of non-whitespace characters on the line.
Trailing comments are not supported and will be ignored. A multiline comment
will consume characters until the ending string is matched. If ``True``,
``DEFAULT_COMMENTS`` will be used (``"--"`` and ``("/*", "*/")`` styles).
Default is ``None`` to not remove comments.
*allow_out_quotes* (:class:`bool` or ``None``) is whether to quote the out
parameters when *out_style* supports it. Default is ``None`` for ``False``.
The following parameter styles are supported by both *in_style* and
*out_style*:
- For all named styles the parameter keys must be valid `Python identifiers`_.
They cannot start with a digit. This is to help prevent incorrectly
matching common strings such as date-times.
Named styles:
- "named" indicates parameters will use the named style::
... WHERE name = :name
- "named_dollar" indicates parameters will use the named dollar sign
style::
... WHERE name = $name
.. NOTE:: This is not defined by `PEP 249`_.
- "named_oracle" indicates parameters will use the named style supported
by Oracle which allows for case-insensitive unquoted parameters and
case-sensitive quoted parameters::
... WHERE name = :name OR name = :"name"
.. NOTE:: This is not defined by `PEP 249`_.
- "named_sqlserver" indicates parameters will use the named at-sign style
supported by Microsoft SQL Server::
... WHERE name = @name
.. NOTE:: This is not defined by `PEP 249`_.
- "pyformat" indicates parameters will use the named Python extended
format style::
... WHERE name = %(name)s
.. NOTE:: Strictly speaking, `PEP 249`_ only specifies "%(name)s" for
the "pyformat" parameter style so only that form (without any other
conversions or flags) is supported.
- All numeric styles start at ``1``. When using a :class:`~collections.abc.Sequence`
for the parameters, the 1st parameter (e.g., ":1") will correspond to the
1st element of the sequence (i.e., index ``0``). When using a
:class:`~collections.abc.Mapping` for the parameters, the 1st parameter
(e.g., ":1") will correspond to the matching key (i.e., ``1`` or ``"1"``).
Numeric styles:
- "numeric" indicates parameters will use the numeric style::
... WHERE name = :1
- "numeric_dollar" indicates parameters will use the numeric dollar sign
style (starts at ``1``)::
... WHERE name = $1
.. NOTE:: This is not defined by `PEP 249`_.
- Ordinal styles:
- "format" indicates parameters will use the ordinal Python format style::
... WHERE name = %s
.. NOTE:: Strictly speaking, `PEP 249`_ only specifies "%s" for the
"format" parameter styles so only that form (without any other
conversions or flags) is supported.
- "qmark" indicates parameters will use the ordinal question mark style::
... WHERE name = ?
.. _`PEP 249`: http://www.python.org/dev/peps/pep-0249/
.. _`Python identifiers`: https://docs.python.org/3/reference/lexical_analysis.html#identifiers
"""
if not isinstance(in_style, str):
raise TypeError(f"{in_style=!r} is not a string.")
if not isinstance(out_style, str):
raise TypeError(f"{out_style=!r} is not a string.")
in_obj = _styles.STYLES[in_style]
out_obj = _styles.STYLES[out_style]
if escape_char is True:
use_char = in_obj.escape_char
elif not escape_char:
use_char = None
elif isinstance(escape_char, str):
use_char = escape_char
else:
raise TypeError(f"{escape_char=!r} is not a string or bool.")
if expand_tuples is None:
expand_tuples = not isinstance(out_obj, _styles.NamedStyle)
else:
expand_tuples = bool(expand_tuples)
if strip_comments is True:
strip_comments = DEFAULT_COMMENTS
elif strip_comments is False:
strip_comments = None
in_regex = self.__create_in_regex(
escape_char=use_char,
in_obj=in_obj,
out_obj=out_obj,
)
self.__converter: _converting.Converter = self.__create_converter(
escape_char=use_char,
expand_tuples=expand_tuples,
in_obj=in_obj,
in_regex=in_regex,
in_style=in_style,
out_obj=out_obj,
out_style=out_style,
allow_out_quotes=bool(allow_out_quotes),
)
"""
*__converter* (:class:`._converting.Converter`) is the parameter converter
to use.
"""
self.__escape_char: Optional[str] = use_char
"""
*__escape_char* (:class:`str` or ``None``) is the escape character used to
prevent matching an in-style parameter.
"""
self.__expand_tuples: bool = expand_tuples
"""
*__expand_tuples* (:class:`bool`) is whether to convert tuples into a
sequence of parameters.
"""
self.__in_obj: _styles.Style = in_obj
"""
*__in_obj* (:class:`._styles.Style`) is the in-style parameter object.
"""
self.__in_regex: Pattern = in_regex
"""
*__in_regex* (:class:`re.Pattern`) is the regular expression used to extract
the in-style parameters.
"""
self.__in_style: str = in_style
"""
*__in_style* (:class:`str`) is the parameter style that will be used in an
SQL query before being parsed and converted to :attr:`.SQLParams.out_style`.
"""
self.__out_obj: _styles.Style = out_obj
"""
*__out_obj* (:class:`._styles.Style`) is the out-style parameter object.
"""
self.__out_style: str = out_style
"""
*__out_style* (:class:`str`) is the parameter style that the SQL query will
be converted to.
"""
self.__strip_comment_regexes: List[Pattern] = self.__create_strip_comment_regexes(
strip_comments=strip_comments,
)
"""
*__strip_comment_regexes* (:class:`list` of :class:`Pattern`) contains the
regular expressions to strip out comments.
"""
self.__strip_comments: Optional[Sequence[Union[str, Tuple[str, str]]]] = strip_comments
"""
*__strip_comments* (:class:`~collections.abc.Sequence` or ``None``) contains
the comment styles to remove.
"""
def __repr__(self) -> str:
"""
Returns the canonical string representation (:class:`str`) of this instance.
"""
return "{}.{}({!r}, {!r})".format(
self.__class__.__module__,
self.__class__.__name__,
self.__in_style,
self.__out_style,
)
@staticmethod
def __create_converter(
escape_char: Optional[str],
expand_tuples: bool,
in_obj: _styles.Style,
in_regex: Pattern,
in_style: str,
out_obj: _styles.Style,
out_style: str,
allow_out_quotes: bool,
) -> _converting.Converter:
"""
Create the parameter style converter.
*escape_char* (:class:`str` or ``None``) is the escape character used to
prevent matching an in-style parameter.
*expand_tuples* (:class:`bool`) is whether to convert tuples into a sequence
of parameters.
*in_obj* (:class:`._styles.Style`) is the in-style parameter object.
*in_style* (:class:`str`) is the in-style name.
*in_regex* (:class:`re.Pattern`) is the regular expression used to extract
the in-style parameters.
*out_obj* (:class:`._styles.Style`) is the out-style parameter object.
*out_style* (:class:`str`) is the out-style name.
Returns the parameter style converter (:class:`._converting.Converter`).
"""
# Determine converter class.
converter_class: Type[_converting.Converter]
if isinstance(in_obj, _styles.NamedStyle):
if isinstance(out_obj, _styles.NamedStyle):
converter_class = _converting.NamedToNamedConverter
elif isinstance(out_obj, _styles.NumericStyle):
converter_class = _converting.NamedToNumericConverter
elif isinstance(out_obj, _styles.OrdinalStyle):
converter_class = _converting.NamedToOrdinalConverter
else:
raise TypeError(f"{out_style=!r} maps to an unexpected type: {out_obj!r}")
elif isinstance(in_obj, _styles.NumericStyle):
if isinstance(out_obj, _styles.NamedStyle):
converter_class = _converting.NumericToNamedConverter
elif isinstance(out_obj, _styles.NumericStyle):
converter_class = _converting.NumericToNumericConverter
elif isinstance(out_obj, _styles.OrdinalStyle):
converter_class = _converting.NumericToOrdinalConverter
else:
raise TypeError(f"{out_style=!r} maps to an unexpected type: {out_obj!r}")
elif isinstance(in_obj, _styles.OrdinalStyle):
if isinstance(out_obj, _styles.NamedStyle):
converter_class = _converting.OrdinalToNamedConverter
elif isinstance(out_obj, _styles.NumericStyle):
converter_class = _converting.OrdinalToNumericConverter
elif isinstance(out_obj, _styles.OrdinalStyle):
converter_class = _converting.OrdinalToOrdinalConverter
else:
raise TypeError(f"{out_style=!r} maps to an unexpected type: {out_obj!r}")
else:
raise TypeError(f"{in_style=!r} maps to an unexpected type: {in_obj!r}")
# Create converter.
converter = converter_class(
escape_char=escape_char,
expand_tuples=expand_tuples,
in_regex=in_regex,
in_style=in_obj,
out_style=out_obj,
allow_out_quotes=allow_out_quotes
)
return converter
@staticmethod
def __create_in_regex(
escape_char: str,
in_obj: _styles.Style,
out_obj: _styles.Style,
) -> Pattern:
"""
Create the in-style parameter regular expression.
*escape_char* (:class:`str` or ``None``) is the escape character to prevent
matching an in-style parameter.
*in_obj* (:class:`._styles.Style`) is the in-style parameter object.
*out_obj* (:class:`._styles.Style`) is the out-style parameter object.
Returns the in-style parameter regular expression (:class:`re.Pattern`).
"""
regex_parts = []
if in_obj.escape_char != "%" and out_obj.escape_char == "%":
regex_parts.append("(?P<out_percent>%)")
if escape_char:
# Escaping is enabled.
escape = in_obj.escape_regex.format(char=re.escape(escape_char))
regex_parts.append(escape)
regex_parts.append(in_obj.param_regex)
return re.compile("|".join(regex_parts))
@staticmethod
def __create_strip_comment_regexes(
strip_comments: Optional[Sequence[Union[str, Tuple[str, str]]]],
) -> List[Pattern]:
"""
Create the regular expressions to strip comments.
*strip_comments* (:class:`~collections.abc.Sequence` or ``None``) contains
the comment styles to remove.
Returns the regular expressions (:class:`list` of :class:`re.Pattern`).
"""
if strip_comments is None:
return []
out_regexes = []
for i, comment_style in enumerate(strip_comments):
if isinstance(comment_style, str):
# Compile regular expression to strip single line comment.
out_regexes.append(re.compile("^[ \t]*{comment}.*(?:\n|\r\n)?".format(
comment=re.escape(comment_style),
), re.M))
elif _util.is_sequence(comment_style):
# Compile regular expression to strip multiline comment.
start_comment, end_comment = comment_style # type: str
out_regexes.append(re.compile("^[ \t]*{start}.*?{end}(?:\n|\r\n)?".format(
start=re.escape(start_comment),
end=re.escape(end_comment),
), re.DOTALL | re.M))
pass
else:
raise TypeError(f"strip_comments[{i}]={comment_style!r} must be either a str or tuple.")
return out_regexes
@property
def escape_char(self) -> Optional[str]:
"""
*escape_char* (:class:`str` or ``None``) is the escape character used to
prevent matching an in-style parameter.
"""
return self.__escape_char
@property
def expand_tuples(self) -> bool:
"""
*expand_tuples* (:class:`bool`) is whether to convert tuples into a sequence
of parameters.
"""
return self.__expand_tuples
@property
def in_style(self) -> str:
"""
*in_style* (:class:`str`) is the parameter style to expect in an SQL query
when being parsed.
"""
return self.__in_style
@property
def out_style(self) -> str:
"""
*out_style* (:class:`str`) is the parameter style that the SQL query will be
converted to.
"""
return self.__out_style
@property
def strip_comments(self) -> Optional[Sequence[Union[str, Tuple[str, str]]]]:
"""
*strip_comments* (:class:`~collections.abc.Sequence` or ``None``) contains
the comment styles to remove.
"""
return self.__strip_comments
def __strip_comments_from_sql(self, sql: str) -> str:
"""
Strip comments from the SQL.
*sql* (:class:`str`) is the SQL query.
Returns the stripped SQL query (:class:`str`).
"""
out_sql = sql
for comment_regex in self.__strip_comment_regexes:
out_sql = comment_regex.sub("", out_sql)
return out_sql