Coverage for NeuralTSNE/NeuralTSNE/Utils/Preprocessing/preprocessing.py: 100%
13 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-18 16:32 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-18 16:32 +0000
1import numpy as np
2import torch
4from NeuralTSNE.Utils.Preprocessing.Normalizers import normalize_columns
5from NeuralTSNE.Utils.Preprocessing.Filters import (
6 filter_data_by_variance,
7)
8from NeuralTSNE.Utils.Writers.StatWriters import save_means_and_vars
11def prepare_data(variance_threshold: float, data: np.ndarray) -> torch.Tensor:
12 """
13 Prepare data for further analysis by filtering based on variance,
14 saving means and variances, and normalizing columns.
16 Parameters
17 ----------
18 `variance_threshold` : `float`
19 Threshold for variance-based feature selection.
20 `data` : `np.ndarray`
21 Input data array.
23 Returns
24 -------
25 `torch.Tensor`
26 Processed and normalized data tensor.
28 Note
29 ----
30 The function filters the input `data` based on the provided `variance threshold`,
31 saves means and variances, and then normalizes the columns of the `data` before
32 converting it into a `torch.Tensor`.
33 """
34 filtered = filter_data_by_variance(data, variance_threshold)
35 save_means_and_vars(data, filtered)
36 if filtered is not None:
37 data = filtered
39 data = torch.from_numpy(data).float()
40 data = normalize_columns(data)
41 return data