Skip to content

Commit f9fb3a1

Browse files
[BUG] Temporarily fix issue #1586 by marking some failed tests as non-strict expected fail. (#1587)
#### Metadata * Reference Issue: Temporarily fix issue #1586 #### Details - Running the pytest locally, I found only one failed test which is: `tests/test_runs/test_run_functions.py::test__run_task_get_arffcontent_2` - However, when trying to go through the failed tests in the recent runed jobs in different recent PRs, I found many other failed tests, I picked some of them and tried to make a kind of analysis, and here are my findings: ##### Primary Failure Patterns 1. OpenML Test Server Issues (Most Common) The majority of failures are caused by: - `OpenMLServerError: Unexpected server error when calling https://test.openml.org/... with Status code: 500` - Database connection errors: `Database connection error. Usually due to high server load. Please wait N seconds and try again.` - Timeout errors: `TIMEOUT: Failed to fetch uploaded dataset` 2. Cache/Filesystem Issues - `ValueError: Cannot remove faulty tasks cache directory ... Please do this manually!` - `FileNotFoundError: No such file or directory` 3. Data Format Issues - `KeyError: ['type'] not found in axis` - `KeyError: ['class'] not found in axis` - `KeyError: ['Class'] not found in axis`
1 parent bd8ae77 commit f9fb3a1

File tree

11 files changed

+59
-0
lines changed

11 files changed

+59
-0
lines changed

tests/test_datasets/test_dataset_functions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ def test_dataset_by_name_cannot_access_private_data(self):
280280
self.use_production_server()
281281
self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE")
282282

283+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
283284
def test_get_dataset_lazy_all_functions(self):
284285
"""Test that all expected functionality is available without downloading the dataset."""
285286
dataset = openml.datasets.get_dataset(1)
@@ -664,6 +665,7 @@ def test_attributes_arff_from_df_unknown_dtype(self):
664665
with pytest.raises(ValueError, match=err_msg):
665666
attributes_arff_from_df(df)
666667

668+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
667669
def test_create_dataset_numpy(self):
668670
data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
669671

@@ -751,6 +753,7 @@ def test_create_dataset_list(self):
751753
), "Uploaded ARFF does not match original one"
752754
assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
753755

756+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
754757
def test_create_dataset_sparse(self):
755758
# test the scipy.sparse.coo_matrix
756759
sparse_data = scipy.sparse.coo_matrix(
@@ -868,6 +871,7 @@ def test_get_online_dataset_arff(self):
868871
return_type=arff.DENSE if d_format == "arff" else arff.COO,
869872
), "ARFF files are not equal"
870873

874+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
871875
def test_topic_api_error(self):
872876
# Check server exception when non-admin accessses apis
873877
self.assertRaisesRegex(
@@ -895,6 +899,7 @@ def test_get_online_dataset_format(self):
895899
dataset_id
896900
), "The format of the ARFF files is different"
897901

902+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
898903
def test_create_dataset_pandas(self):
899904
data = [
900905
["a", "sunny", 85.0, 85.0, "FALSE", "no"],
@@ -1119,6 +1124,7 @@ def test_ignore_attributes_dataset(self):
11191124
paper_url=paper_url,
11201125
)
11211126

1127+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
11221128
def test_publish_fetch_ignore_attribute(self):
11231129
"""Test to upload and retrieve dataset and check ignore_attributes"""
11241130
data = [
@@ -1237,6 +1243,7 @@ def test_create_dataset_row_id_attribute_error(self):
12371243
paper_url=paper_url,
12381244
)
12391245

1246+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
12401247
def test_create_dataset_row_id_attribute_inference(self):
12411248
# meta-information
12421249
name = f"{self._get_sentinel()}-pandas_testing_dataset"
@@ -1400,6 +1407,7 @@ def test_data_edit_non_critical_field(self):
14001407
edited_dataset = openml.datasets.get_dataset(did)
14011408
assert edited_dataset.description == desc
14021409

1410+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
14031411
def test_data_edit_critical_field(self):
14041412
# Case 2
14051413
# only owners (or admin) can edit all critical fields of datasets
@@ -1448,6 +1456,7 @@ def test_data_edit_requires_valid_dataset(self):
14481456
description="xor operation dataset",
14491457
)
14501458

1459+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
14511460
def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
14521461
# Need to own a dataset to be able to edit meta-data
14531462
# Will be creating a forked version of an existing dataset to allow the unit test user

tests/test_flows/test_flow.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ def test_to_xml_from_xml(self):
178178
assert new_flow is not flow
179179

180180
@pytest.mark.sklearn()
181+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
181182
def test_publish_flow(self):
182183
flow = openml.OpenMLFlow(
183184
name="sklearn.dummy.DummyClassifier",
@@ -219,6 +220,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
219220
)
220221

221222
@pytest.mark.sklearn()
223+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
222224
def test_publish_flow_with_similar_components(self):
223225
clf = sklearn.ensemble.VotingClassifier(
224226
[("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))],
@@ -269,6 +271,7 @@ def test_publish_flow_with_similar_components(self):
269271
TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
270272

271273
@pytest.mark.sklearn()
274+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
272275
def test_semi_legal_flow(self):
273276
# TODO: Test if parameters are set correctly!
274277
# should not throw error as it contains two differentiable forms of
@@ -377,6 +380,7 @@ def get_sentinel():
377380
assert not flow_id
378381

379382
@pytest.mark.sklearn()
383+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
380384
def test_existing_flow_exists(self):
381385
# create a flow
382386
nb = sklearn.naive_bayes.GaussianNB()
@@ -417,6 +421,7 @@ def test_existing_flow_exists(self):
417421
assert downloaded_flow_id == flow.flow_id
418422

419423
@pytest.mark.sklearn()
424+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
420425
def test_sklearn_to_upload_to_flow(self):
421426
iris = sklearn.datasets.load_iris()
422427
X = iris.data

tests/test_flows/test_flow_functions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ def test_are_flows_equal_ignore_if_older(self):
274274
assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None)
275275

276276
@pytest.mark.sklearn()
277+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
277278
@unittest.skipIf(
278279
Version(sklearn.__version__) < Version("0.20"),
279280
reason="OrdinalEncoder introduced in 0.20. "
@@ -388,6 +389,7 @@ def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
388389
assert "sklearn==0.19.1" not in flow.dependencies
389390

390391
@pytest.mark.sklearn()
392+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
391393
def test_get_flow_id(self):
392394
if self.long_version:
393395
list_all = openml.utils._list_all

tests/test_runs/test_run.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def _check_array(array, type_):
118118
assert run_prime_trace_content is None
119119

120120
@pytest.mark.sklearn()
121+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
121122
def test_to_from_filesystem_vanilla(self):
122123
model = Pipeline(
123124
[
@@ -153,6 +154,7 @@ def test_to_from_filesystem_vanilla(self):
153154

154155
@pytest.mark.sklearn()
155156
@pytest.mark.flaky()
157+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
156158
def test_to_from_filesystem_search(self):
157159
model = Pipeline(
158160
[
@@ -187,6 +189,7 @@ def test_to_from_filesystem_search(self):
187189
)
188190

189191
@pytest.mark.sklearn()
192+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
190193
def test_to_from_filesystem_no_model(self):
191194
model = Pipeline(
192195
[("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
@@ -292,6 +295,7 @@ def assert_run_prediction_data(task, run, model):
292295
assert_method(y_test, saved_y_test)
293296

294297
@pytest.mark.sklearn()
298+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
295299
def test_publish_with_local_loaded_flow(self):
296300
"""
297301
Publish a run tied to a local flow after it has first been saved to
@@ -335,6 +339,7 @@ def test_publish_with_local_loaded_flow(self):
335339
openml.runs.get_run(loaded_run.run_id)
336340

337341
@pytest.mark.sklearn()
342+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
338343
def test_offline_and_online_run_identical(self):
339344
extension = SklearnExtension()
340345

tests/test_runs/test_run_functions.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ def test_run_regression_on_classif_task(self):
413413
task=task,
414414
)
415415

416+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
416417
@pytest.mark.sklearn()
417418
def test_check_erronous_sklearn_flow_fails(self):
418419
task_id = 115 # diabetes; crossvalidation
@@ -881,6 +882,7 @@ def test_run_and_upload_maskedarrays(self):
881882

882883
##########################################################################
883884

885+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
884886
@pytest.mark.sklearn()
885887
def test_learning_curve_task_1(self):
886888
task_id = 801 # diabates dataset
@@ -905,6 +907,7 @@ def test_learning_curve_task_1(self):
905907
)
906908
self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
907909

910+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
908911
@pytest.mark.sklearn()
909912
def test_learning_curve_task_2(self):
910913
task_id = 801 # diabates dataset
@@ -941,6 +944,7 @@ def test_learning_curve_task_2(self):
941944
)
942945
self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
943946

947+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
944948
@pytest.mark.sklearn()
945949
@unittest.skipIf(
946950
Version(sklearn.__version__) < Version("0.21"),
@@ -1019,6 +1023,7 @@ def _test_local_evaluations(self, run):
10191023
assert alt_scores[idx] >= 0
10201024
assert alt_scores[idx] <= 1
10211025

1026+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
10221027
@pytest.mark.sklearn()
10231028
def test_local_run_swapped_parameter_order_model(self):
10241029
clf = DecisionTreeClassifier()
@@ -1034,6 +1039,7 @@ def test_local_run_swapped_parameter_order_model(self):
10341039

10351040
self._test_local_evaluations(run)
10361041

1042+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
10371043
@pytest.mark.sklearn()
10381044
@unittest.skipIf(
10391045
Version(sklearn.__version__) < Version("0.20"),
@@ -1062,6 +1068,7 @@ def test_local_run_swapped_parameter_order_flow(self):
10621068

10631069
self._test_local_evaluations(run)
10641070

1071+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
10651072
@pytest.mark.sklearn()
10661073
@unittest.skipIf(
10671074
Version(sklearn.__version__) < Version("0.20"),
@@ -1099,6 +1106,7 @@ def test_online_run_metric_score(self):
10991106

11001107
self._test_local_evaluations(run)
11011108

1109+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
11021110
@pytest.mark.sklearn()
11031111
@unittest.skipIf(
11041112
Version(sklearn.__version__) < Version("0.20"),
@@ -1160,6 +1168,7 @@ def test_initialize_model_from_run(self):
11601168
assert flowS.components["Imputer"].parameters["strategy"] == '"most_frequent"'
11611169
assert flowS.components["VarianceThreshold"].parameters["threshold"] == "0.05"
11621170

1171+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
11631172
@pytest.mark.sklearn()
11641173
@unittest.skipIf(
11651174
Version(sklearn.__version__) < Version("0.20"),
@@ -1219,6 +1228,7 @@ def test__run_exists(self):
12191228
run_ids = run_exists(task.task_id, setup_exists)
12201229
assert run_ids, (run_ids, clf)
12211230

1231+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
12221232
@pytest.mark.sklearn()
12231233
def test_run_with_illegal_flow_id(self):
12241234
# check the case where the user adds an illegal flow id to a
@@ -1238,6 +1248,7 @@ def test_run_with_illegal_flow_id(self):
12381248
avoid_duplicate_runs=True,
12391249
)
12401250

1251+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
12411252
@pytest.mark.sklearn()
12421253
def test_run_with_illegal_flow_id_after_load(self):
12431254
# Same as `test_run_with_illegal_flow_id`, but test this error is also
@@ -1294,6 +1305,7 @@ def test_run_with_illegal_flow_id_1(self):
12941305
avoid_duplicate_runs=True,
12951306
)
12961307

1308+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
12971309
@pytest.mark.sklearn()
12981310
def test_run_with_illegal_flow_id_1_after_load(self):
12991311
# Same as `test_run_with_illegal_flow_id_1`, but test this error is
@@ -1332,6 +1344,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
13321344
loaded_run.publish,
13331345
)
13341346

1347+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
13351348
@pytest.mark.sklearn()
13361349
@unittest.skipIf(
13371350
Version(sklearn.__version__) < Version("0.20"),
@@ -1559,6 +1572,7 @@ def test_get_runs_list_by_tag(self):
15591572
runs = openml.runs.list_runs(tag="curves", size=2)
15601573
assert len(runs) >= 1
15611574

1575+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
15621576
@pytest.mark.sklearn()
15631577
@unittest.skipIf(
15641578
Version(sklearn.__version__) < Version("0.20"),
@@ -1595,6 +1609,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
15951609
# repeat, fold, row_id, 6 confidences, prediction and correct label
15961610
assert len(row) == 12
15971611

1612+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
15981613
@pytest.mark.sklearn()
15991614
@unittest.skipIf(
16001615
Version(sklearn.__version__) < Version("0.20"),
@@ -1647,6 +1662,7 @@ def test_get_uncached_run(self):
16471662
with pytest.raises(openml.exceptions.OpenMLCacheException):
16481663
openml.runs.functions._get_cached_run(10)
16491664

1665+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
16501666
@pytest.mark.sklearn()
16511667
def test_run_flow_on_task_downloaded_flow(self):
16521668
model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
@@ -1687,6 +1703,7 @@ def test_format_prediction_classification_no_probabilities(self):
16871703
with pytest.raises(ValueError, match="`proba` is required for classification task"):
16881704
format_prediction(classification, *ignored_input, proba=None)
16891705

1706+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
16901707
def test_format_prediction_classification_incomplete_probabilities(self):
16911708
classification = openml.tasks.get_task(
16921709
self.TEST_SERVER_TASK_SIMPLE["task_id"],
@@ -1707,13 +1724,15 @@ def test_format_prediction_task_without_classlabels_set(self):
17071724
with pytest.raises(ValueError, match="The classification task must have class labels set"):
17081725
format_prediction(classification, *ignored_input, proba={})
17091726

1727+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
17101728
def test_format_prediction_task_learning_curve_sample_not_set(self):
17111729
learning_curve = openml.tasks.get_task(801, download_data=False) # diabetes;crossvalidation
17121730
probabilities = {c: 0.2 for c in learning_curve.class_labels}
17131731
ignored_input = [0] * 5
17141732
with pytest.raises(ValueError, match="`sample` can not be none for LearningCurveTask"):
17151733
format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities)
17161734

1735+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
17171736
def test_format_prediction_task_regression(self):
17181737
task_meta_data = self.TEST_SERVER_TASK_REGRESSION["task_meta_data"]
17191738
_task_id = check_task_existence(**task_meta_data)
@@ -1743,6 +1762,7 @@ def test_format_prediction_task_regression(self):
17431762

17441763

17451764

1765+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
17461766
@unittest.skipIf(
17471767
Version(sklearn.__version__) < Version("0.20"),
17481768
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1843,6 +1863,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
18431863

18441864

18451865
@pytest.mark.sklearn()
1866+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
18461867
@unittest.skipIf(
18471868
Version(sklearn.__version__) < Version("0.21"),
18481869
reason="couldn't perform local tests successfully w/o bloating RAM",
@@ -1919,6 +1940,7 @@ def test__run_task_get_arffcontent_2(parallel_mock):
19191940
)
19201941

19211942

1943+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
19221944
@pytest.mark.sklearn()
19231945
@unittest.skipIf(
19241946
Version(sklearn.__version__) < Version("0.21"),

tests/test_setups/test_setup_functions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def setUp(self):
3434
self.extension = SklearnExtension()
3535
super().setUp()
3636

37+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
3738
@pytest.mark.sklearn()
3839
def test_nonexisting_setup_exists(self):
3940
# first publish a non-existing flow
@@ -81,6 +82,7 @@ def _existing_setup_exists(self, classif):
8182
setup_id = openml.setups.setup_exists(flow)
8283
assert setup_id == run.setup_id
8384

85+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
8486
@pytest.mark.sklearn()
8587
def test_existing_setup_exists_1(self):
8688
def side_effect(self):
@@ -96,11 +98,13 @@ def side_effect(self):
9698
nb = sklearn.naive_bayes.GaussianNB()
9799
self._existing_setup_exists(nb)
98100

101+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
99102
@pytest.mark.sklearn()
100103
def test_exisiting_setup_exists_2(self):
101104
# Check a flow with one hyperparameter
102105
self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())
103106

107+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
104108
@pytest.mark.sklearn()
105109
def test_existing_setup_exists_3(self):
106110
# Check a flow with many hyperparameters

tests/test_tasks/test_classification_task.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,21 @@ def setUp(self, n_levels: int = 1):
1818
self.task_type = TaskType.SUPERVISED_CLASSIFICATION
1919
self.estimation_procedure = 5
2020

21+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
2122
def test_download_task(self):
2223
task = super().test_download_task()
2324
assert task.task_id == self.task_id
2425
assert task.task_type_id == TaskType.SUPERVISED_CLASSIFICATION
2526
assert task.dataset_id == 20
2627
assert task.estimation_procedure_id == self.estimation_procedure
2728

29+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
2830
def test_class_labels(self):
2931
task = get_task(self.task_id)
3032
assert task.class_labels == ["tested_negative", "tested_positive"]
3133

3234

35+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
3336
@pytest.mark.server()
3437
def test_get_X_and_Y():
3538
task = get_task(119)

0 commit comments

Comments
 (0)