Skip to main content
Version: 0.17.19

PandasS3Datasource

class great_expectations.datasource.fluent.PandasS3Datasource(*, type: Literal['pandas_s3'] = 'pandas_s3', name: str, id: Optional[uuid.UUID] = None, assets: List[great_expectations.datasource.fluent.file_path_data_asset._FilePathDataAsset] = [], bucket: str, boto3_options: Dict[str, Union[great_expectations.datasource.fluent.config_str.ConfigStr, Any]] = )#
add_csv_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, sep: Optional[str] = None, delimiter: Optional[str] = None, header: Union[int, Sequence[int], None, Literal['infer']] = 'infer', names: Union[Sequence[str], None] = None, index_col: Union[IndexLabel, Literal[False], None] = None, usecols: Optional[Union[int, str, Sequence[int]]] = None, dtype: Optional[dict] = None, engine: Union[CSVEngine, None] = None, converters: Any = None, true_values: Any = None, false_values: Any = None, skipinitialspace: bool = False, skiprows: Optional[Union[Sequence[int], int]] = None, skipfooter: int = 0, nrows: Optional[int] = None, na_values: Any = None, keep_default_na: bool = True, na_filter: bool = True, verbose: bool = False, skip_blank_lines: bool = True, parse_dates: Union[bool, Sequence[str], None] = None, infer_datetime_format: bool = None, keep_date_col: bool = False, date_parser: Any = None, date_format: Optional[str] = None, dayfirst: bool = False, cache_dates: bool = True, iterator: bool = False, chunksize: Optional[int] = None, compression: CompressionOptions = 'infer', thousands: Optional[str] = None, decimal: str = '.', lineterminator: Optional[str] = None, quotechar: str = '"', quoting: int = 0, doublequote: bool = True, escapechar: Optional[str] = None, comment: Optional[str] = None, encoding: Optional[str] = None, encoding_errors: Optional[str] = 'strict', dialect: Optional[str] = None, on_bad_lines: str = 'error', delim_whitespace: bool = False, low_memory: Any = True, memory_map: bool = False, float_precision: Union[Literal['high', 'legacy'], None] = None, storage_options: StorageOptions = None, dtype_backend: DtypeBackend = None, **data) pydantic.BaseModel#
add_excel_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, sheet_name: Optional[Union[str, int, List[Union[int, str]]]] = 0, header: Union[int, Sequence[int], None] = 0, names: Optional[List[str]] = None, index_col: Union[int, Sequence[int], None] = None, usecols: Optional[Union[int, str, Sequence[int]]] = None, dtype: Optional[dict] = None, engine: Union[Literal['xlrd', 'openpyxl', 'odf', 'pyxlsb'], None] = None, true_values: Union[Iterable[str], None] = None, false_values: Union[Iterable[str], None] = None, skiprows: Optional[Union[Sequence[int], int]] = None, nrows: Optional[int] = None, na_values: Any = None, keep_default_na: bool = True, na_filter: bool = True, verbose: bool = False, parse_dates: Union[List, Dict, bool] = False, date_format: Optional[str] = None, thousands: Optional[str] = None, decimal: str = '.', comment: Optional[str] = None, skipfooter: int = 0, storage_options: StorageOptions = None, dtype_backend: DtypeBackend = None, **data) pydantic.BaseModel#
add_feather_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, columns: Union[Sequence[str], None] = None, use_threads: bool = True, storage_options: StorageOptions = None, dtype_backend: DtypeBackend = None, **data) pydantic.BaseModel#
add_fwf_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, colspecs: Union[Sequence[Tuple[int, int]], str, None] = 'infer', widths: Union[Sequence[int], None] = None, infer_nrows: int = 100, dtype_backend: DtypeBackend = None, kwargs: Optional[dict] = None, **data) pydantic.BaseModel#
add_hdf_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, key: Any = None, mode: str = 'r', errors: str = 'strict', where: Optional[Union[str, List]] = None, start: Optional[int] = None, stop: Optional[int] = None, columns: Optional[List[str]] = None, iterator: bool = False, chunksize: Optional[int] = None, kwargs: Optional[dict] = None, **data) pydantic.BaseModel#
add_html_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, match: Union[str, Pattern] = '.+', flavor: Optional[str] = None, header: Union[int, Sequence[int], None] = None, index_col: Union[int, Sequence[int], None] = None, skiprows: Optional[Union[Sequence[int], int]] = None, attrs: Optional[Dict[str, str]] = None, parse_dates: bool = False, thousands: Optional[str] = ',', encoding: Optional[str] = None, decimal: str = '.', converters: Optional[Dict] = None, na_values: Union[Iterable[object], None] = None, keep_default_na: bool = True, displayed_only: bool = True, extract_links: Literal[None, 'header', 'footer', 'body', 'all'] = None, dtype_backend: DtypeBackend = None, **data) pydantic.BaseModel#
add_json_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, orient: Optional[str] = None, typ: Literal['frame', 'series'] = 'frame', dtype: Optional[dict] = None, convert_axes: Any = None, convert_dates: Union[bool, List[str]] = True, keep_default_dates: bool = True, precise_float: bool = False, date_unit: Optional[str] = None, encoding: Optional[str] = None, encoding_errors: Optional[str] = 'strict', lines: bool = False, chunksize: Optional[int] = None, compression: CompressionOptions = 'infer', nrows: Optional[int] = None, storage_options: StorageOptions = None, dtype_backend: DtypeBackend = None, **data) pydantic.BaseModel#
add_orc_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, columns: Optional[List[str]] = None, dtype_backend: DtypeBackend = None, kwargs: Optional[dict] = None, **data) pydantic.BaseModel#
add_parquet_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, engine: str = 'auto', columns: Optional[List[str]] = None, storage_options: StorageOptions = None, use_nullable_dtypes: bool = None, dtype_backend: DtypeBackend = None, kwargs: Optional[dict] = None, **data) pydantic.BaseModel#
add_pickle_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, compression: CompressionOptions = 'infer', storage_options: StorageOptions = None, **data) pydantic.BaseModel#
add_sas_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, format: Optional[str] = None, index: Optional[str] = None, encoding: Optional[str] = None, chunksize: Optional[int] = None, iterator: bool = False, compression: CompressionOptions = 'infer', **data) pydantic.BaseModel#
add_spss_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, usecols: Optional[Union[int, str, Sequence[int]]] = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend = None, **data) pydantic.BaseModel#
add_stata_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, convert_dates: bool = True, convert_categoricals: bool = True, index_col: Optional[str] = None, convert_missing: bool = False, preserve_dtypes: bool = True, columns: Union[Sequence[str], None] = None, order_categoricals: bool = True, chunksize: Optional[int] = None, iterator: bool = False, compression: CompressionOptions = 'infer', storage_options: StorageOptions = None, **data) pydantic.BaseModel#
add_xml_asset(name: str, *, id: Optional[uuid.UUID] = None, order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None, batch_metadata: Dict[str, Any] = None, batching_regex: Pattern = re.compile('.*'), connect_options: Mapping = None, splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None, xpath: str = './*', namespaces: Optional[Dict[str, str]] = None, elems_only: bool = False, attrs_only: bool = False, names: Union[Sequence[str], None] = None, dtype: Optional[dict] = None, encoding: Optional[str] = 'utf-8', stylesheet: Union[FilePath, None] = None, iterparse: Optional[Dict[str, List[str]]] = None, compression: CompressionOptions = 'infer', storage_options: StorageOptions = None, dtype_backend: DtypeBackend = None, **data) pydantic.BaseModel#