55from typing import Any , Optional
66
77from pydantic import BaseModel
8+ from pyspark .errors .exceptions .base import AnalysisException
89from pyspark .sql import DataFrame , SparkSession
910from pyspark .sql import functions as sf
1011from pyspark .sql .column import Column
1112from pyspark .sql .types import StringType , StructField , StructType
12- from pyspark .sql .utils import AnalysisException
1313from typing_extensions import Literal
1414
1515from dve .core_engine .backends .base .reader import read_function
16- from dve .core_engine .backends .exceptions import EmptyFileError
16+ from dve .core_engine .backends .exceptions import EmptyFileError , MessageBearingError
1717from dve .core_engine .backends .implementations .spark .spark_helpers import (
1818 df_is_empty ,
1919 get_type_from_annotation ,
2020 spark_write_parquet ,
2121)
2222from dve .core_engine .backends .readers .xml import BasicXMLFileReader , XMLStreamReader
23- from dve .core_engine .backends .utilities import dump_errors
2423from dve .core_engine .type_hints import URI , EntityName
25- from dve .parser .file_handling import get_content_length , get_parent
24+ from dve .parser .file_handling import get_content_length
2625from dve .parser .file_handling .service import open_stream
2726
2827SparkXMLMode = Literal ["PERMISSIVE" , "FAILFAST" , "DROPMALFORMED" ]
@@ -44,7 +43,7 @@ def read_to_dataframe(
4443 ) -> DataFrame :
4544 """Stream an XML file into a Spark data frame"""
4645 if not self .spark :
47- self .spark = SparkSession .builder .getOrCreate ()
46+ self .spark = SparkSession .builder .getOrCreate () # type: ignore
4847 spark_schema = get_type_from_annotation (schema )
4948 return self .spark .createDataFrame ( # type: ignore
5049 list (self .read_to_py_iterator (resource , entity_name , schema )),
@@ -90,7 +89,7 @@ def __init__(
9089 rules_location = rules_location ,
9190 )
9291
93- self .spark_session = spark_session or SparkSession .builder .getOrCreate ()
92+ self .spark_session = spark_session or SparkSession .builder .getOrCreate () # type: ignore
9493 self .sampling_ratio = sampling_ratio
9594 self .exclude_attribute = exclude_attribute
9695 self .mode = mode
@@ -122,9 +121,9 @@ def read_to_dataframe(
122121 if self .xsd_location :
123122 msg = self ._run_xmllint (file_uri = resource )
124123 if msg :
125- working_folder = get_parent ( resource )
126- dump_errors (
127- working_folder = working_folder , step_name = "file_transformation" , messages = [msg ]
124+ raise MessageBearingError (
125+ "Submitted file failed XSD validation." ,
126+ messages = [msg ],
128127 )
129128
130129 spark_schema : StructType = get_type_from_annotation (schema )
0 commit comments