Source: scikit-learn Version: 1.4.2+dfsg-7 Severity: normal scikit-learn is failing tests with scipy 1.15 from experimental
https://ci.debian.net/data/autopkgtest/unstable/amd64/s/scikit-learn/57873442/log.gz 256s FAILED ../../../../usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_polynomial.py::test_csr_polynomial_expansion_index_overflow[csr_array-False-True-2-65535] 256s FAILED ../../../../usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_polynomial.py::test_csr_polynomial_expansion_index_overflow[csr_array-False-True-3-2344] 256s = 2 failed, 29260 passed, 3388 skipped, 88 xfailed, 45 xpassed, 8173 warnings in 129.25s (0:02:09) = 255s __ test_csr_polynomial_expansion_index_overflow[csr_array-False-True-2-65535] __ 255s [gw51] linux -- Python 3.12.9 /usr/bin/python3.12 255s 255s degree = 2, n_features = 65535, interaction_only = True, include_bias = False 255s csr_container = <class 'scipy.sparse._csr.csr_array'> 255s 255s @pytest.mark.parametrize( 255s "degree, n_features", 255s [ 255s # Needs promotion to int64 when interaction_only=False 255s (2, 65535), 255s (3, 2344), 255s # This guarantees that the intermediate operation when calculating 255s # output columns would overflow a C-long, hence checks that python- 255s # longs are being used. 255s (2, int(np.sqrt(np.iinfo(np.int64).max) + 1)), 255s (3, 65535), 255s # This case tests the second clause of the overflow check which 255s # takes into account the value of `n_features` itself. 255s (2, int(np.sqrt(np.iinfo(np.int64).max))), 255s ], 255s ) 255s @pytest.mark.parametrize("interaction_only", [True, False]) 255s @pytest.mark.parametrize("include_bias", [True, False]) 255s @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) 255s def test_csr_polynomial_expansion_index_overflow( 255s degree, n_features, interaction_only, include_bias, csr_container 255s ): 255s """Tests known edge-cases to the dtype promotion strategy and custom 255s Cython code, including a current bug in the upstream 255s `scipy.sparse.hstack`. 255s """ 255s data = [1.0] 255s row = [0] 255s col = [n_features - 1] 255s 255s # First degree index 255s expected_indices = [ 255s n_features - 1 + int(include_bias), 255s ] 255s # Second degree index 255s expected_indices.append(n_features * (n_features + 1) // 2 + expected_indices[0]) 255s # Third degree index 255s expected_indices.append( 255s n_features * (n_features + 1) * (n_features + 2) // 6 + expected_indices[1] 255s ) 255s 255s X = csr_container((data, (row, col))) 255s pf = PolynomialFeatures( 255s interaction_only=interaction_only, include_bias=include_bias, degree=degree 255s ) 255s 255s # Calculate the number of combinations a-priori, and if needed check for 255s # the correct ValueError and terminate the test early. 255s num_combinations = pf._num_combinations( 255s n_features=n_features, 255s min_degree=0, 255s max_degree=degree, 255s interaction_only=pf.interaction_only, 255s include_bias=pf.include_bias, 255s ) 255s if num_combinations > np.iinfo(np.intp).max: 255s msg = ( 255s r"The output that would result from the current configuration would have" 255s r" \d* features which is too large to be indexed" 255s ) 255s with pytest.raises(ValueError, match=msg): 255s pf.fit(X) 255s return 255s 255s # In SciPy < 1.8, a bug occurs when an intermediate matrix in 255s # `to_stack` in `hstack` fits within int32 however would require int64 when 255s # combined with all previous matrices in `to_stack`. 255s if sp_version < parse_version("1.8.0"): 255s has_bug = False 255s max_int32 = np.iinfo(np.int32).max 255s cumulative_size = n_features + include_bias 255s for deg in range(2, degree + 1): 255s max_indptr = _calc_total_nnz(X.indptr, interaction_only, deg) 255s max_indices = _calc_expanded_nnz(n_features, interaction_only, deg) - 1 255s cumulative_size += max_indices + 1 255s needs_int64 = max(max_indices, max_indptr) > max_int32 255s has_bug |= not needs_int64 and cumulative_size > max_int32 255s if has_bug: 255s msg = r"In scipy versions `<1.8.0`, the function `scipy.sparse.hstack`" 255s with pytest.raises(ValueError, match=msg): 255s X_trans = pf.fit_transform(X) 255s return 255s 255s # When `n_features>=65535`, `scipy.sparse.hstack` may not use the right 255s # dtype for representing indices and indptr if `n_features` is still 255s # small enough so that each block matrix's indices and indptr arrays 255s # can be represented with `np.int32`. We test `n_features==65535` 255s # since it is guaranteed to run into this bug. 255s if ( 255s sp_version < parse_version("1.9.2") 255s and n_features == 65535 255s and degree == 2 255s and not interaction_only 255s ): # pragma: no cover 255s msg = r"In scipy versions `<1.9.2`, the function `scipy.sparse.hstack`" 255s with pytest.raises(ValueError, match=msg): 255s X_trans = pf.fit_transform(X) 255s return 255s X_trans = pf.fit_transform(X) 255s 255s expected_dtype = np.int64 if num_combinations > np.iinfo(np.int32).max else np.int32 255s # Terms higher than first degree 255s non_bias_terms = 1 + (degree - 1) * int(not interaction_only) 255s expected_nnz = int(include_bias) + non_bias_terms 255s assert X_trans.dtype == X.dtype 255s assert X_trans.shape == (1, pf.n_output_features_) 255s > assert X_trans.indptr.dtype == X_trans.indices.dtype == expected_dtype 255s E AssertionError: assert dtype('int64') == <class 'numpy.int32'> 255s E + where dtype('int64') = array([65534]).dtype 255s E + where array([65534]) = <Compressed Sparse Row sparse array of dtype 'float64'\n with 1 stored elements and shape (1, 2147450880)>.indices 255s 255s /usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_polynomial.py:1132: AssertionError 255s __ test_csr_polynomial_expansion_index_overflow[csr_array-False-True-3-2344] ___ 255s [gw51] linux -- Python 3.12.9 /usr/bin/python3.12 255s 255s degree = 3, n_features = 2344, interaction_only = True, include_bias = False 255s csr_container = <class 'scipy.sparse._csr.csr_array'> 255s 255s @pytest.mark.parametrize( 255s "degree, n_features", 255s [ 255s # Needs promotion to int64 when interaction_only=False 255s (2, 65535), 255s (3, 2344), 255s # This guarantees that the intermediate operation when calculating 255s # output columns would overflow a C-long, hence checks that python- 255s # longs are being used. 255s (2, int(np.sqrt(np.iinfo(np.int64).max) + 1)), 255s (3, 65535), 255s # This case tests the second clause of the overflow check which 255s # takes into account the value of `n_features` itself. 255s (2, int(np.sqrt(np.iinfo(np.int64).max))), 255s ], 255s ) 255s @pytest.mark.parametrize("interaction_only", [True, False]) 255s @pytest.mark.parametrize("include_bias", [True, False]) 255s @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) 255s def test_csr_polynomial_expansion_index_overflow( 255s degree, n_features, interaction_only, include_bias, csr_container 255s ): 255s """Tests known edge-cases to the dtype promotion strategy and custom 255s Cython code, including a current bug in the upstream 255s `scipy.sparse.hstack`. 255s """ 255s data = [1.0] 255s row = [0] 255s col = [n_features - 1] 255s 255s # First degree index 255s expected_indices = [ 255s n_features - 1 + int(include_bias), 255s ] 255s # Second degree index 255s expected_indices.append(n_features * (n_features + 1) // 2 + expected_indices[0]) 255s # Third degree index 255s expected_indices.append( 255s n_features * (n_features + 1) * (n_features + 2) // 6 + expected_indices[1] 255s ) 255s 255s X = csr_container((data, (row, col))) 255s pf = PolynomialFeatures( 255s interaction_only=interaction_only, include_bias=include_bias, degree=degree 255s ) 255s 255s # Calculate the number of combinations a-priori, and if needed check for 255s # the correct ValueError and terminate the test early. 255s num_combinations = pf._num_combinations( 255s n_features=n_features, 255s min_degree=0, 255s max_degree=degree, 255s interaction_only=pf.interaction_only, 255s include_bias=pf.include_bias, 255s ) 255s if num_combinations > np.iinfo(np.intp).max: 255s msg = ( 256s r"The output that would result from the current configuration would have" 256s r" \d* features which is too large to be indexed" 256s ) 256s with pytest.raises(ValueError, match=msg): 256s pf.fit(X) 256s return 256s 256s # In SciPy < 1.8, a bug occurs when an intermediate matrix in 256s # `to_stack` in `hstack` fits within int32 however would require int64 when 256s # combined with all previous matrices in `to_stack`. 256s if sp_version < parse_version("1.8.0"): 256s has_bug = False 256s max_int32 = np.iinfo(np.int32).max 256s cumulative_size = n_features + include_bias 256s for deg in range(2, degree + 1): 256s max_indptr = _calc_total_nnz(X.indptr, interaction_only, deg) 256s max_indices = _calc_expanded_nnz(n_features, interaction_only, deg) - 1 256s cumulative_size += max_indices + 1 256s needs_int64 = max(max_indices, max_indptr) > max_int32 256s has_bug |= not needs_int64 and cumulative_size > max_int32 256s if has_bug: 256s msg = r"In scipy versions `<1.8.0`, the function `scipy.sparse.hstack`" 256s with pytest.raises(ValueError, match=msg): 256s X_trans = pf.fit_transform(X) 256s return 256s 256s # When `n_features>=65535`, `scipy.sparse.hstack` may not use the right 256s # dtype for representing indices and indptr if `n_features` is still 256s # small enough so that each block matrix's indices and indptr arrays 256s # can be represented with `np.int32`. We test `n_features==65535` 256s # since it is guaranteed to run into this bug. 256s if ( 256s sp_version < parse_version("1.9.2") 256s and n_features == 65535 256s and degree == 2 256s and not interaction_only 256s ): # pragma: no cover 256s msg = r"In scipy versions `<1.9.2`, the function `scipy.sparse.hstack`" 256s with pytest.raises(ValueError, match=msg): 256s X_trans = pf.fit_transform(X) 256s return 256s X_trans = pf.fit_transform(X) 256s 256s expected_dtype = np.int64 if num_combinations > np.iinfo(np.int32).max else np.int32 256s # Terms higher than first degree 256s non_bias_terms = 1 + (degree - 1) * int(not interaction_only) 256s expected_nnz = int(include_bias) + non_bias_terms 256s assert X_trans.dtype == X.dtype 256s assert X_trans.shape == (1, pf.n_output_features_) 256s > assert X_trans.indptr.dtype == X_trans.indices.dtype == expected_dtype 256s E AssertionError: assert dtype('int64') == <class 'numpy.int32'> 256s E + where dtype('int64') = array([2343]).dtype 256s E + where array([2343]) = <Compressed Sparse Row sparse array of dtype 'float64'\n with 1 stored elements and shape (1, 2146455884)>.indices 256s 256s /usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_polynomial.py:1132: AssertionError -- debian-science-maintainers mailing list debian-science-maintainers@alioth-lists.debian.net https://alioth-lists.debian.net/cgi-bin/mailman/listinfo/debian-science-maintainers