Coverage for NeuralTSNE/NeuralTSNE/TSNE/neural_tsne.py: 19%
92 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-18 16:32 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-18 16:32 +0000
1import io
2import sys
3import os
4from typing import Tuple
5import argparse
6from argparse_range import range_action
8import numpy as np
9import torch
11import pytorch_lightning as L
12from pytorch_lightning.callbacks import EarlyStopping
13from pytorch_lightning.tuner import Tuner
15from NeuralTSNE.DatasetLoader import get_datasets
16from NeuralTSNE.Utils.Validators.FileTypeValidators import (
17 FileTypeWithExtensionCheck,
18 FileTypeWithExtensionCheckWithPredefinedDatasets,
19)
20from NeuralTSNE.Utils.Writers.StatWriters import save_results
21from NeuralTSNE.Utils.Writers.LabelWriters import save_labels_data
22from NeuralTSNE.Utils.Loaders.LabelLoaders import load_labels
23from NeuralTSNE.Utils.Loaders.FileLoaders import (
24 load_npy_file,
25 load_text_file,
26 load_torch_dataset,
27)
29from NeuralTSNE.TSNE.ParametricTSNE import ParametricTSNE
30from NeuralTSNE.TSNE.Modules import DimensionalityReduction
33def run_tsne(
34 input_file,
35 iter=1000,
36 labels=None,
37 no_dims=2,
38 perplexity=30.0,
39 exclude_cols=None,
40 step=1,
41 exaggeration_iter=0,
42 exaggeration_value=12,
43 o="result.txt",
44 model_save=None,
45 model_load=None,
46 shuffle=False,
47 train_size=None,
48 test_size=None,
49 jobs=1,
50 batch_size=1000,
51 header=False,
52 net_multipliers=None,
53 variance_threshold=None,
54 cpu=False,
55 early_stopping_delta=1e-5,
56 early_stopping_patience=3,
57 lr=1e-3,
58 auto_lr=False,
59):
60 available_datasets = []
61 if "NeuralTSNE.DatasetLoader.get_datasets" in sys.modules:
62 available_datasets = get_datasets._get_available_datasets()
64 if net_multipliers is None:
65 net_multipliers = [0.75, 0.75, 0.75]
67 skip_data_splitting = False
68 if (
69 not isinstance(input_file, io.TextIOWrapper)
70 and len(available_datasets) > 0
71 and (name := input_file.lower()) in available_datasets
72 ):
73 train, test = load_torch_dataset(name, step, o)
74 skip_data_splitting = True
75 features = np.prod(train.dataset.data.shape[1:])
76 else:
77 labels = load_labels(labels)
79 if input_file.endswith(".npy"):
80 data = load_npy_file(input_file, step, exclude_cols, variance_threshold)
81 else:
82 data = load_text_file(
83 input_file, step, header, exclude_cols, variance_threshold
84 )
85 features = data.shape[1]
87 tsne = ParametricTSNE(
88 loss_fn="kl_divergence",
89 n_components=no_dims,
90 perplexity=perplexity,
91 batch_size=batch_size,
92 early_exaggeration_epochs=exaggeration_iter,
93 early_exaggeration_value=exaggeration_value,
94 max_iterations=iter,
95 features=features,
96 multipliers=net_multipliers,
97 n_jobs=jobs,
98 force_cpu=cpu,
99 )
101 early_stopping = EarlyStopping(
102 "train_loss_epoch",
103 min_delta=early_stopping_delta,
104 patience=early_stopping_patience,
105 )
107 is_gpu = tsne.device == torch.device("cuda:0")
109 trainer = L.Trainer(
110 accelerator="gpu" if is_gpu else "cpu",
111 devices=1 if is_gpu else tsne.n_jobs,
112 log_every_n_steps=1,
113 max_epochs=tsne.max_iterations,
114 callbacks=[early_stopping],
115 )
117 classifier = DimensionalityReduction(tsne, shuffle, lr=lr)
119 if model_load:
120 tsne.read_model(model_load)
121 train, test = (
122 tsne.split_dataset(data, y=labels, test_size=1)
123 if not skip_data_splitting
124 else tsne.create_dataloaders(train, test)
125 )
126 if not skip_data_splitting:
127 save_labels_data({"o": o}, test)
128 Y = trainer.predict(classifier, test)
129 else:
130 train, test = (
131 tsne.split_dataset(
132 data, y=labels, train_size=train_size, test_size=test_size
133 )
134 if not skip_data_splitting
135 else tsne.create_dataloaders(train, test)
136 )
137 if auto_lr:
138 tuner = Tuner(trainer)
139 tuner.lr_find(classifier, train)
140 classifier.reset_exaggeration_status()
141 if not skip_data_splitting:
142 save_labels_data({"o": o}, test)
143 trainer.fit(classifier, train, [test])
144 if model_save:
145 tsne.save_model(model_save)
146 if test is not None:
147 Y = trainer.predict(classifier, test)
149 save_results({"o": o, "step": step}, test, Y)
152def parse_args():
153 available_datasets = []
154 if "NeuralTSNE.DatasetLoader.get_datasets" in sys.modules:
155 available_datasets = get_datasets._get_available_datasets()
156 parser = argparse.ArgumentParser(description="t-SNE Algorithm")
157 parser.add_argument(
158 "input_file",
159 type=FileTypeWithExtensionCheckWithPredefinedDatasets(
160 valid_extensions=("txt", "data", "npy"),
161 available_datasets=available_datasets,
162 ),
163 help="Input file",
164 )
165 parser.add_argument(
166 "-iter", type=int, default=1000, help="Number of iterations", required=False
167 )
168 parser.add_argument(
169 "-labels",
170 type=FileTypeWithExtensionCheck(valid_extensions=("txt", "data")),
171 help="Labels file",
172 required=False,
173 )
174 parser.add_argument(
175 "-no_dims", type=int, help="Number of dimensions", required=True, default=2
176 )
177 parser.add_argument(
178 "-perplexity",
179 type=float,
180 help="Perplexity of the Gaussian kernel",
181 required=True,
182 default=30.0,
183 )
184 parser.add_argument(
185 "-exclude_cols", type=int, nargs="+", help="Columns to exclude", required=False
186 )
187 parser.add_argument(
188 "-step", type=int, help="Step between samples", required=False, default=1
189 )
190 parser.add_argument(
191 "-exaggeration_iter",
192 type=int,
193 help="Early exaggeration end",
194 required=False,
195 default=0,
196 )
197 parser.add_argument(
198 "-exaggeration_value",
199 type=float,
200 help="Early exaggeration value",
201 required=False,
202 default=12,
203 )
204 parser.add_argument(
205 "-o", type=str, help="Output filename", required=False, default="result.txt"
206 )
207 parser.add_argument(
208 "-model_save",
209 type=str,
210 help="Filename to save model to",
211 required=False,
212 )
213 parser.add_argument(
214 "-model_load",
215 type=str,
216 help="Filename to load model from",
217 required=False,
218 )
219 parser.add_argument("-shuffle", action="store_true", help="Shuffle data")
220 parser.add_argument(
221 "-train_size",
222 type=float,
223 action=range_action(0, 1),
224 help="Train size",
225 required=False,
226 )
227 parser.add_argument(
228 "-test_size",
229 type=float,
230 action=range_action(0, 1),
231 help="Test size",
232 required=False,
233 )
234 # parser.add_argument(
235 # "-jobs", type=int, help="Number of jobs", required=False, default=1
236 # )
237 parser.add_argument(
238 "-batch_size", type=int, help="Batch size", required=False, default=1000
239 )
241 parser.add_argument("-header", action="store_true", help="Data has header")
242 parser.add_argument(
243 "-net_multipliers",
244 type=float,
245 nargs="+",
246 help="Network multipliers",
247 default=[0.75, 0.75, 0.75],
248 )
249 parser.add_argument("-variance_threshold", type=float, help="Variance threshold")
250 parser.add_argument("-cpu", action="store_true", help="Use CPU")
251 parser.add_argument(
252 "-early_stopping_delta", type=float, help="Early stopping delta", default=1e-5
253 )
254 parser.add_argument(
255 "-early_stopping_patience", type=int, help="Early stopping patience", default=3
256 )
257 parser.add_argument("-lr", type=float, help="Learning rate", default=1e-3)
258 parser.add_argument("-auto_lr", action="store_true", help="Auto learning rate")
260 return parser.parse_args()
263def main():
264 args = parse_args()
265 run_tsne(**vars(args))