Skip to content

Commit a9b5315

Browse files
authored
Merge pull request #32 from CESNET/dev
Version 2.1.0 release
2 parents 8e71cbb + 166f17d commit a9b5315

File tree

312 files changed

+7013
-37088
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

312 files changed

+7013
-37088
lines changed

.github/workflows/pypi_publish.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Publish to PyPI on merge to main
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
jobs:
9+
build-and-upload-testpypi:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Checkout source
14+
uses: actions/checkout@v4
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version: '3.12.3'
20+
21+
- name: Install build tools
22+
run: |
23+
python3 -m pip install --upgrade pip
24+
python3 -m pip install --upgrade build
25+
python3 -m pip install --upgrade twine
26+
27+
- name: Build package
28+
run: python3 -m build
29+
30+
- name: Upload to PyPI
31+
env:
32+
TWINE_USERNAME: __token__
33+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
34+
run: |
35+
python3 -m twine upload dist/*

README.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ The goal of `cesnet-tszoo` project is to provide time series datasets with usefu
1414
- Data can be split into train/val/test sets. Split can be done by time series or by time periods.
1515
- Transforming of data with built-in transformers or with custom transformers.
1616
- Handling missing values built-in fillers or with custom fillers.
17+
- Applying custom handlers.
18+
- Changing order of when are preprocesses applied/fitted
1719
- Creation and import of benchmarks, for easy reproducibility of experiments.
1820
- Creation and import of annotations. Can create annotations for specific time series, specific time or specific time in specific time series.
1921

@@ -45,8 +47,23 @@ or for editable install with:
4547
pip install -e git+https://github.com/CESNET/cesnet-tszoo#egg=cesnet-tszoo
4648
```
4749

50+
## Citation
51+
52+
If you use CESNET TS-Zoo, please cite our paper:
53+
54+
```
55+
@misc{kures2025,
56+
title={CESNET TS-Zoo: A Library for Reproducible Analysis of Network Traffic Time Series},
57+
author={Milan Kureš and Josef Koumar and Karel Hynek},
58+
booktitle={2025 21th International Conference on Network and Service Management (CNSM)},
59+
year={2025}
60+
}
61+
```
62+
4863
## Examples
4964

65+
For detailed examples refer to [`Tutorial notebooks`](https://github.com/CESNET/cesnet-ts-zoo-tutorials)
66+
5067
### Initialize dataset to create train, validation, and test dataframes
5168

5269
#### Using [`TimeBasedCesnetDataset`](https://cesnet.github.io/cesnet-tszoo/reference_time_based_cesnet_dataset/) dataset
@@ -132,6 +149,4 @@ val_dataframe = dataset.get_val_df()
132149
test_dataframe = dataset.get_test_df()
133150
```
134151

135-
Whether loaded dataset is series-based or time-based depends on the benchmark. What can be loaded corresponds to previous datasets.
136-
137-
## Papers
152+
Loaded dataset can be one of the above.

cesnet_tszoo/benchmarks.py

Lines changed: 46 additions & 37 deletions
Large diffs are not rendered by default.

cesnet_tszoo/configs/base_config.py

Lines changed: 389 additions & 154 deletions
Large diffs are not rendered by default.

cesnet_tszoo/configs/config_editors/__init__.py

Whitespace-only changes.
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from abc import ABC, abstractmethod
2+
from dataclasses import dataclass, field
3+
from typing import Literal
4+
from numbers import Number
5+
import logging
6+
7+
import numpy as np
8+
9+
from cesnet_tszoo.data_models.dataset_metadata import DatasetMetadata
10+
from cesnet_tszoo.configs.base_config import DatasetConfig
11+
from cesnet_tszoo.utils.enums import FillerType, TransformerType, AnomalyHandlerType
12+
from cesnet_tszoo.utils.transformer import Transformer
13+
import cesnet_tszoo.utils.filler.factory as filler_factories
14+
import cesnet_tszoo.utils.transformer.factory as transformer_factories
15+
import cesnet_tszoo.utils.anomaly_handler.factory as anomaly_handler_factories
16+
17+
18+
@dataclass
19+
class ConfigEditor(ABC):
20+
"""Used for choosing which values in config to modify."""
21+
22+
default_config: DatasetConfig
23+
default_values: list[Number] | dict[str, Number] | Number | Literal["default"] | None | Literal["config"]
24+
train_batch_size: int | Literal["config"]
25+
val_batch_size: int | Literal["config"]
26+
test_batch_size: int | Literal["config"]
27+
all_batch_size: int | Literal["config"]
28+
preprocess_order: list[str, type] | Literal["config"]
29+
fill_missing_with: type | FillerType | Literal["mean_filler", "forward_filler", "linear_interpolation_filler"] | None | Literal["config"]
30+
transform_with: type | list[Transformer] | np.ndarray[Transformer] | TransformerType | Transformer | Literal["min_max_scaler", "standard_scaler", "max_abs_scaler", "log_transformer", "robust_scaler", "power_transformer", "quantile_transformer", "l2_normalizer"] | None | Literal["config"]
31+
handle_anomalies_with: type | AnomalyHandlerType | Literal["z-score", "interquartile_range"] | None | Literal["config"]
32+
create_transformer_per_time_series: bool | Literal["config"]
33+
partial_fit_initialized_transformers: bool | Literal["config"]
34+
train_workers: int | Literal["config"]
35+
val_workers: int | Literal["config"]
36+
test_workers: int | Literal["config"]
37+
all_workers: int | Literal["config"]
38+
init_workers: int | Literal["config"]
39+
requires_init: bool = field(default=False, init=False)
40+
41+
def __post_init__(self):
42+
self.logger = logging.getLogger("config_editor")
43+
44+
if self.default_values == "config":
45+
self.default_values = self.default_config.default_values
46+
else:
47+
self.requires_init = True
48+
49+
if self.preprocess_order == "config":
50+
self.preprocess_order = self.default_config.preprocess_order
51+
else:
52+
self.requires_init = True
53+
54+
if self.train_batch_size == "config":
55+
self.train_batch_size = self.default_config.train_batch_size
56+
if self.val_batch_size == "config":
57+
self.val_batch_size = self.default_config.val_batch_size
58+
if self.test_batch_size == "config":
59+
self.test_batch_size = self.default_config.test_batch_size
60+
if self.all_batch_size == "config":
61+
self.all_batch_size = self.default_config.all_batch_size
62+
63+
if self.fill_missing_with == "config":
64+
self.fill_missing_with = self.default_config.filler_factory.filler_type
65+
else:
66+
self.requires_init = True
67+
68+
if self.create_transformer_per_time_series == "config":
69+
self.create_transformer_per_time_series = self.default_config.create_transformer_per_time_series
70+
else:
71+
self.requires_init = True
72+
73+
if self.partial_fit_initialized_transformers == "config":
74+
self.partial_fit_initialized_transformers = self.default_config.partial_fit_initialized_transformers
75+
else:
76+
self.requires_init = True
77+
78+
if self.transform_with == "config":
79+
if self.default_config.transformer_factory.has_already_initialized:
80+
self.transform_with = self.default_config.transformer_factory.initialized_transformers
81+
else:
82+
self.transform_with = self.default_config.transformer_factory.transformer_type
83+
else:
84+
self.requires_init = True
85+
86+
if self.handle_anomalies_with == "config":
87+
self.handle_anomalies_with = self.default_config.anomaly_handler_factory.anomaly_handler_type
88+
else:
89+
self.requires_init = True
90+
91+
if self.train_workers == "config":
92+
self.train_workers = self.default_config.train_workers
93+
if self.val_workers == "config":
94+
self.val_workers = self.default_config.val_workers
95+
if self.test_workers == "config":
96+
self.test_workers = self.default_config.test_workers
97+
if self.all_workers == "config":
98+
self.all_workers = self.default_config.all_workers
99+
if self.init_workers == "config":
100+
self.init_workers = self.default_config.init_workers
101+
102+
def modify_dataset_config(self, dataset_config: DatasetConfig, metadata: DatasetMetadata):
103+
"""Modifies dataset config based on passed values in constructor. Used by CesnetDataset classes when editing config values. """
104+
105+
if self.requires_init:
106+
self._soft_modify(dataset_config, metadata)
107+
self._hard_modify(dataset_config, metadata)
108+
dataset_config._validate_construction()
109+
else:
110+
self._soft_modify(dataset_config, metadata)
111+
112+
@abstractmethod
113+
def _hard_modify(self, config: DatasetConfig, dataset_metadata: DatasetMetadata):
114+
config.default_values = self.default_values
115+
config.preprocess_order = self.preprocess_order
116+
config.partial_fit_initialized_transformers = self.partial_fit_initialized_transformers
117+
config.create_transformer_per_time_series = self.create_transformer_per_time_series
118+
config.filler_factory = filler_factories.get_filler_factory(self.fill_missing_with)
119+
config.transformer_factory = transformer_factories.get_transformer_factory(self.transform_with, self.create_transformer_per_time_series, self.partial_fit_initialized_transformers)
120+
config.anomaly_handler_factory = anomaly_handler_factories.get_anomaly_handler_factory(self.handle_anomalies_with)
121+
122+
@abstractmethod
123+
def _soft_modify(self, config: DatasetConfig, dataset_metadata: DatasetMetadata):
124+
config._update_batch_sizes(self.train_batch_size, self.val_batch_size, self.test_batch_size, self.all_batch_size)
125+
config._update_workers(self.train_workers, self.val_workers, self.test_workers, self.all_workers, self.init_workers)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from dataclasses import dataclass, field
2+
from typing import Literal
3+
import logging
4+
5+
from cesnet_tszoo.data_models.dataset_metadata import DatasetMetadata
6+
from cesnet_tszoo.configs.config_editors.config_editor import ConfigEditor
7+
from cesnet_tszoo.configs import DisjointTimeBasedConfig
8+
9+
10+
@dataclass
11+
class DisjointTimeBasedConfigEditor(ConfigEditor):
12+
"""Used for choosing which values in config to modify."""
13+
14+
default_config: DisjointTimeBasedConfig
15+
sliding_window_size: int | None | Literal["config"]
16+
sliding_window_prediction_size: int | None | Literal["config"]
17+
sliding_window_step: int | Literal["config"]
18+
set_shared_size: float | int | Literal["config"]
19+
20+
all_batch_size: int = field(default=1, init=False)
21+
all_workers: int = field(default=1, init=False)
22+
23+
def __post_init__(self):
24+
if self.sliding_window_size == "config":
25+
self.sliding_window_size = self.default_config.sliding_window_size
26+
if self.sliding_window_prediction_size == "config":
27+
self.sliding_window_prediction_size = self.default_config.sliding_window_prediction_size
28+
if self.sliding_window_step == "config":
29+
self.sliding_window_step = self.default_config.sliding_window_step
30+
if self.set_shared_size == "config":
31+
self.set_shared_size = self.default_config.set_shared_size
32+
else:
33+
self.requires_init = True
34+
35+
super().__post_init__()
36+
37+
self.logger = logging.getLogger("disjoint_time_based_config_editor")
38+
39+
def _hard_modify(self, config: DisjointTimeBasedConfig, dataset_metadata: DatasetMetadata):
40+
super()._hard_modify(config, dataset_metadata)
41+
42+
def _soft_modify(self, config: DisjointTimeBasedConfig, dataset_metadata: DatasetMetadata):
43+
super()._soft_modify(config, dataset_metadata)
44+
config._update_sliding_window(self.sliding_window_size, self.sliding_window_prediction_size, self.sliding_window_step, self.set_shared_size, dataset_metadata.time_indices)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from dataclasses import dataclass
2+
import logging
3+
4+
from cesnet_tszoo.data_models.dataset_metadata import DatasetMetadata
5+
from cesnet_tszoo.configs.config_editors.config_editor import ConfigEditor
6+
from cesnet_tszoo.configs import SeriesBasedConfig
7+
8+
9+
@dataclass
10+
class SeriesBasedConfigEditor(ConfigEditor):
11+
"""Used for choosing which values in config to modify."""
12+
13+
default_config: SeriesBasedConfig
14+
15+
def __post_init__(self):
16+
super().__post_init__()
17+
18+
self.logger = logging.getLogger("series_based_config_editor")
19+
20+
def _hard_modify(self, config: SeriesBasedConfig, dataset_metadata: DatasetMetadata):
21+
super()._hard_modify(config, dataset_metadata)
22+
23+
def _soft_modify(self, config: SeriesBasedConfig, dataset_metadata: DatasetMetadata):
24+
super()._soft_modify(config, dataset_metadata)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from dataclasses import dataclass
2+
from typing import Literal
3+
import logging
4+
5+
from cesnet_tszoo.data_models.dataset_metadata import DatasetMetadata
6+
from cesnet_tszoo.configs.config_editors.config_editor import ConfigEditor
7+
from cesnet_tszoo.configs import TimeBasedConfig
8+
9+
10+
@dataclass
11+
class TimeBasedConfigEditor(ConfigEditor):
12+
"""Used for choosing which values in config to modify."""
13+
14+
default_config: TimeBasedConfig
15+
sliding_window_size: int | None | Literal["config"]
16+
sliding_window_prediction_size: int | None | Literal["config"]
17+
sliding_window_step: int | Literal["config"]
18+
set_shared_size: float | int | Literal["config"]
19+
20+
def __post_init__(self):
21+
if self.sliding_window_size == "config":
22+
self.sliding_window_size = self.default_config.sliding_window_size
23+
if self.sliding_window_prediction_size == "config":
24+
self.sliding_window_prediction_size = self.default_config.sliding_window_prediction_size
25+
if self.sliding_window_step == "config":
26+
self.sliding_window_step = self.default_config.sliding_window_step
27+
if self.set_shared_size == "config":
28+
self.set_shared_size = self.default_config.set_shared_size
29+
else:
30+
self.requires_init = True
31+
32+
super().__post_init__()
33+
34+
self.logger = logging.getLogger("time_based_config_editor")
35+
36+
def _hard_modify(self, config: TimeBasedConfig, dataset_metadata: DatasetMetadata):
37+
super()._hard_modify(config, dataset_metadata)
38+
39+
def _soft_modify(self, config: TimeBasedConfig, dataset_metadata: DatasetMetadata):
40+
super()._soft_modify(config, dataset_metadata)
41+
config._update_sliding_window(self.sliding_window_size, self.sliding_window_prediction_size, self.sliding_window_step, self.set_shared_size, dataset_metadata.time_indices)

0 commit comments

Comments
 (0)