Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous #3135

Open
moghadas76 opened this issue Feb 28, 2024 · 1 comment
Labels
bug Something isn't working

Comments

@moghadas76
Copy link

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (32,) + inhomogeneous part.

`from typing import Any, Dict, Iterable, Optional
from gluonts.dataset.loader import TrainDataLoader
from gluonts.itertools import Cached
from gluonts.torch.batchify import batchify
import pytorch_lightning as pl
import torch
from gluonts.core.component import validated
from gluonts.dataset.common import Dataset
from gluonts.dataset.field_names import FieldName
from gluonts.dataset.loader import as_stacked_batches
from gluonts.dataset.stat import calculate_dataset_statistics
from gluonts.itertools import Cyclic
from gluonts.time_feature import (
get_lags_for_frequency,
time_features_from_frequency_str,
)
from gluonts.torch.model.estimator import PyTorchLightningEstimator
from gluonts.torch.model.predictor import PyTorchPredictor
from gluonts.torch.modules.loss import DistributionLoss, NegativeLogLikelihood
from gluonts.transform import (
AddObservedValuesIndicator,
AddTimeFeatures,
Chain,
DummyValueImputation,
ExpectedNumInstanceSampler,
InstanceSampler,
InstanceSplitter,
TestSplitSampler,
Transformation,
ValidationSplitSampler,
VstackFeatures
)
from peft import LoraConfig, get_peft_model

from gluonts.torch.model.deepar import DeepAREstimator
from gluonts.torch.distributions import StudentTOutput, NormalOutput
from gluon_utils.gluon_ts_distributions.implicit_quantile_network import (
ImplicitQuantileNetworkOutput,
)

from lag_llama.gluon.lightning_module import LagLlamaLightningModule

PREDICTION_INPUT_NAMES = [
"past_target",
"past_observed_values",
]
TRAINING_INPUT_NAMES = PREDICTION_INPUT_NAMES + [
"future_target",
"future_observed_values",
]

class LagLlamaEstimator(PyTorchLightningEstimator):
"""
An estimator training a ConvTSMixer model for forecasting.

This class is uses the model defined in ``ConvTSMixerModel``,
and wraps it into a ``ConvTSMixerLightningModule`` for training
purposes: training is performed using PyTorch Lightning's ``pl.Trainer``
class.

Parameters
----------
prediction_length
    Length of the prediction horizon.
context_length
    Number of time steps prior to prediction time that the model
    takes as inputs (default: ``10 * prediction_length``).
lr
    Learning rate (default: ``1e-3``).
weight_decay
    Weight decay regularization parameter (default: ``1e-8``).
distr_output
    Distribution to use to evaluate observations and sample predictions
    (default: StudentTOutput()).
loss
    Loss to be optimized during training
    (default: ``NegativeLogLikelihood()``).
batch_norm
    Whether to apply batch normalization.
batch_size
    The size of the batches to be used for training (default: 32).
num_batches_per_epoch
    Number of batches to be processed in each training epoch
        (default: 50).
trainer_kwargs
    Additional arguments to provide to ``pl.Trainer`` for construction.
train_sampler
    Controls the sampling of windows during training.
validation_sampler
    Controls the sampling of windows during validation.
"""

@validated()
def __init__(
    self,
    prediction_length: int,
    context_length: Optional[int] = None,
    input_size: int = 1,
    n_layer: int = 1,
    n_embd_per_head: int = 32,
    n_head: int = 4,
    max_context_length: int = 2048,
    rope_scaling=None,
    scaling: Optional[str] = "mean",
    lr: float = 1e-3,
    weight_decay: float = 1e-8,
    # Augmentations arguments
    aug_prob: float = 0.1,
    freq_mask_rate: float = 0.1,
    freq_mixing_rate: float = 0.1,
    jitter_prob: float = 0.0,
    jitter_sigma: float = 0.03,
    scaling_prob: float = 0.0,
    scaling_sigma: float = 0.1,
    rotation_prob: float = 0.0,
    permutation_prob: float = 0.0,
    permutation_max_segments: int = 5,
    permutation_seg_mode: str = "equal",
    magnitude_warp_prob: float = 0.0,
    magnitude_warp_sigma: float = 0.2,
    magnitude_warp_knot: int = 4,
    time_warp_prob: float = 0.0,
    time_warp_sigma: float = 0.2,
    time_warp_knot: int = 4,
    window_slice_prob: float = 0.0,
    window_slice_reduce_ratio: float = 0.9,
    window_warp_prob: float = 0.0,
    window_warp_window_ratio: float = 0.1,
    window_warp_scales: list = [0.5, 2.0],
    # Continuning model arguments
    distr_output: str = "studentT",
    loss: DistributionLoss = NegativeLogLikelihood(),
    num_parallel_samples: int = 100,
    batch_size: int = 32,
    num_batches_per_epoch: int = 50,
    trainer_kwargs: Optional[Dict[str, Any]] = None,
    train_sampler: Optional[InstanceSampler] = None,
    validation_sampler: Optional[InstanceSampler] = None,
    time_feat: bool = False,
    dropout: float = 0.0,
    lags_seq: list = ["Q", "M", "W", "D", "H", "T", "S"],
    data_id_to_name_map: dict = {},
    use_cosine_annealing_lr: bool = False,
    cosine_annealing_lr_args: dict = {},
    track_loss_per_series: bool = False,
    ckpt_path: Optional[str] = None,
    use_feat_dynamic_real=True,
) -> None:
    default_trainer_kwargs = {"max_epochs": 100}
    if trainer_kwargs is not None:
        default_trainer_kwargs.update(trainer_kwargs)
    super().__init__(trainer_kwargs=default_trainer_kwargs)

    self.scaling = scaling
    self.input_size = input_size
    self.prediction_length = prediction_length
    self.context_length = context_length
    self.max_context_length = max_context_length

    lag_indices = []
    for freq in lags_seq:
        lag_indices.extend(
            get_lags_for_frequency(freq_str=freq, num_default_lags=1)
        )

    if len(lag_indices):
        self.lags_seq = sorted(set(lag_indices))
        self.lags_seq = [lag_index - 1 for lag_index in self.lags_seq] # len 83, max: 1092
    else:
        self.lags_seq = []

    self.n_head = n_head
    self.n_layer = n_layer
    self.n_embd_per_head = n_embd_per_head
    self.rope_scaling = rope_scaling

    self.lr = lr
    self.weight_decay = weight_decay
    if distr_output == "studentT":
        distr_output = StudentTOutput()
    elif distr_output == "iqn":
        distr_output = ImplicitQuantileNetworkOutput()
    self.distr_output = distr_output
    self.num_parallel_samples = num_parallel_samples
    self.loss = loss
    self.batch_size = batch_size # 32
    self.num_batches_per_epoch = num_batches_per_epoch # 50

    self.train_sampler = train_sampler or ExpectedNumInstanceSampler(
        num_instances=1.0, min_future=prediction_length
    )
    self.validation_sampler = validation_sampler or ValidationSplitSampler(
        min_future=prediction_length
    )

    self.aug_prob = aug_prob
    self.freq_mask_rate = freq_mask_rate
    self.freq_mixing_rate = freq_mixing_rate
    self.jitter_prob = jitter_prob
    self.jitter_sigma = jitter_sigma
    self.scaling_prob = scaling_prob
    self.scaling_sigma = scaling_sigma
    self.rotation_prob = rotation_prob
    self.permutation_prob = permutation_prob
    self.permutation_max_segments = permutation_max_segments
    self.permutation_seg_mode = permutation_seg_mode
    self.magnitude_warp_prob = magnitude_warp_prob
    self.magnitude_warp_sigma = magnitude_warp_sigma
    self.magnitude_warp_knot = magnitude_warp_knot
    self.time_warp_prob = time_warp_prob
    self.time_warp_sigma = time_warp_sigma
    self.time_warp_knot = time_warp_knot
    self.window_slice_prob = window_slice_prob
    self.window_slice_reduce_ratio = window_slice_reduce_ratio
    self.window_warp_prob = window_warp_prob
    self.window_warp_window_ratio = window_warp_window_ratio
    self.window_warp_scales = window_warp_scales
    self.track_loss_per_series = track_loss_per_series

    self.time_feat = time_feat
    self.dropout = dropout
    self.data_id_to_name_map = data_id_to_name_map
    self.ckpt_path = ckpt_path

    self.use_cosine_annealing_lr = use_cosine_annealing_lr
    self.cosine_annealing_lr_args = cosine_annealing_lr_args
    # self.transformation = self.create_transformation()

@classmethod
def derive_auto_fields(cls, train_iter):
    stats = calculate_dataset_statistics(train_iter)

    return {
        "num_feat_dynamic_real": stats.num_feat_dynamic_real,
        "num_feat_static_cat": len(stats.feat_static_cat),
        "cardinality": [len(cats) for cats in stats.feat_static_cat],
    }

def create_transformation(self) -> Transformation:
    if self.time_feat:
        return Chain(
            [
                AddTimeFeatures(
                    start_field=FieldName.START,
                    target_field=FieldName.TARGET,
                    output_field=FieldName.FEAT_TIME,
                    time_features=time_features_from_frequency_str("S"),
                    pred_length=self.prediction_length,
                ),
                # VstackFeatures(
                #     output_field=FieldName.FEAT_TIME,
                #     input_fields=[FieldName.FEAT_TIME] + [FieldName.FEAT_DYNAMIC_REAL]
                # ),
                # FilterTransformation(lambda x: sum(abs(x[FieldName.TARGET])) > 0),
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                    imputation_method=DummyValueImputation(0.0),
                ),
                
            ]
        )
    else:
        return Chain(
            [
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                    imputation_method=DummyValueImputation(0.0),
                ),
            ]
        )

def create_lightning_module(self, use_kv_cache: bool = False) -> pl.LightningModule:
    model_kwargs = {
        "input_size": self.input_size,
        "context_length": self.context_length,
        "max_context_length": self.max_context_length,
        "lags_seq": self.lags_seq,
        "n_layer": self.n_layer,
        "n_embd_per_head": self.n_embd_per_head,
        "n_head": self.n_head,
        "scaling": self.scaling,
        "distr_output": self.distr_output,
        "num_parallel_samples": self.num_parallel_samples,
        "rope_scaling": self.rope_scaling,
        "time_feat": self.time_feat,
        "dropout": self.dropout,
    }
    if self.ckpt_path is not None:
        module = LagLlamaLightningModule.load_from_checkpoint(
            checkpoint_path=self.ckpt_path,
            loss=self.loss,
            lr=self.lr,
            weight_decay=self.weight_decay,
            context_length=self.context_length,
            prediction_length=self.prediction_length,
            model_kwargs=model_kwargs,
            # Augmentations
            aug_prob=self.aug_prob,
            freq_mask_rate=self.freq_mask_rate,
            freq_mixing_rate=self.freq_mixing_rate,
            jitter_prob=self.jitter_prob,
            jitter_sigma=self.jitter_sigma,
            scaling_prob=self.scaling_prob,
            scaling_sigma=self.scaling_sigma,
            rotation_prob=self.rotation_prob,
            permutation_prob=self.permutation_prob,
            permutation_max_segments=self.permutation_max_segments,
            permutation_seg_mode=self.permutation_seg_mode,
            magnitude_warp_prob=self.magnitude_warp_prob,
            magnitude_warp_sigma=self.magnitude_warp_sigma,
            magnitude_warp_knot=self.magnitude_warp_knot,
            time_warp_prob=self.time_warp_prob,
            time_warp_sigma=self.time_warp_sigma,
            time_warp_knot=self.time_warp_knot,
            window_slice_prob=self.window_slice_prob,
            window_slice_reduce_ratio=self.window_slice_reduce_ratio,
            window_warp_prob=self.window_warp_prob,
            window_warp_window_ratio=self.window_warp_window_ratio,
            window_warp_scales=self.window_warp_scales,
            use_kv_cache=use_kv_cache,
            data_id_to_name_map=self.data_id_to_name_map,
            use_cosine_annealing_lr=self.use_cosine_annealing_lr,
            cosine_annealing_lr_args=self.cosine_annealing_lr_args,
            track_loss_per_series=self.track_loss_per_series,
        )
        # config = LoraConfig(
        #     r=16,
        #     lora_alpha=16,
        #     target_modules=["q_proj", "kv_proj"],
        #     lora_dropout=0.1,
        #     # use_original_init=False,
        #     bias="none",
        #     modules_to_save=["classifier"],
        # )
        # lora_model = get_peft_model(module.model, config)
        # module.model = lora_model
        return module
    else:
        return LagLlamaLightningModule(
            loss=self.loss,
            lr=self.lr,
            weight_decay=self.weight_decay,
            context_length=self.context_length,
            prediction_length=self.prediction_length,
            model_kwargs=model_kwargs,
            # Augmentations
            aug_prob=self.aug_prob,
            freq_mask_rate=self.freq_mask_rate,
            freq_mixing_rate=self.freq_mixing_rate,
            jitter_prob=self.jitter_prob,
            jitter_sigma=self.jitter_sigma,
            scaling_prob=self.scaling_prob,
            scaling_sigma=self.scaling_sigma,
            rotation_prob=self.rotation_prob,
            permutation_prob=self.permutation_prob,
            permutation_max_segments=self.permutation_max_segments,
            permutation_seg_mode=self.permutation_seg_mode,
            magnitude_warp_prob=self.magnitude_warp_prob,
            magnitude_warp_sigma=self.magnitude_warp_sigma,
            magnitude_warp_knot=self.magnitude_warp_knot,
            time_warp_prob=self.time_warp_prob,
            time_warp_sigma=self.time_warp_sigma,
            time_warp_knot=self.time_warp_knot,
            window_slice_prob=self.window_slice_prob,
            window_slice_reduce_ratio=self.window_slice_reduce_ratio,
            window_warp_prob=self.window_warp_prob,
            window_warp_window_ratio=self.window_warp_window_ratio,
            window_warp_scales=self.window_warp_scales,
            use_kv_cache=use_kv_cache,
            data_id_to_name_map=self.data_id_to_name_map,
            use_cosine_annealing_lr=self.use_cosine_annealing_lr,
            cosine_annealing_lr_args=self.cosine_annealing_lr_args,
            track_loss_per_series=self.track_loss_per_series,
        )

def _create_instance_splitter(self, module: LagLlamaLightningModule, mode: str):
    assert mode in ["training", "validation", "test"]

    instance_sampler = {
        "training": self.train_sampler,
        "validation": self.validation_sampler,
        "test": TestSplitSampler(),
    }[mode]

    return InstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        instance_sampler=instance_sampler,
        past_length=self.context_length + max(self.lags_seq),
        future_length=self.prediction_length,
        time_series_fields=[FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES]
        if self.time_feat
        else [FieldName.OBSERVED_VALUES],
        dummy_value=self.distr_output.value_in_support,
    )

def create_training_data_loader(
    self,
    data: Dataset,
    module: LagLlamaLightningModule,
    shuffle_buffer_length: Optional[int] = None,
    **kwargs,
) -> Iterable:
    data = Cyclic(data).stream()
    instances = self._create_instance_splitter(module, "training").apply(
        data, is_train=True
    )
    if self.time_feat:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            shuffle_buffer_length=shuffle_buffer_length,
            field_names=TRAINING_INPUT_NAMES
            + ["past_time_feat", "future_time_feat", "data_id", "item_id"],
            # + ["past_time_feat", "future_time_feat"],
            output_type=torch.tensor,
            num_batches_per_epoch=self.num_batches_per_epoch,
        )

    else:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            shuffle_buffer_length=shuffle_buffer_length,
            # field_names=TRAINING_INPUT_NAMES,
            field_names=TRAINING_INPUT_NAMES + ["data_id", "item_id"],
            output_type=torch.tensor,
            num_batches_per_epoch=self.num_batches_per_epoch,
        )

def create_validation_data_loader(
    self,
    data: Dataset,
    module: LagLlamaLightningModule,
    **kwargs,
) -> Iterable:
    instances = self._create_instance_splitter(module, "validation").apply(
        data, is_train=True
    )
    if self.time_feat:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            field_names=TRAINING_INPUT_NAMES
            + ["past_time_feat", "future_time_feat", "data_id", "item_id"],
            # + ["past_time_feat", "future_time_feat"],
            output_type=torch.tensor,
        )
    else:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            field_names=TRAINING_INPUT_NAMES + ["data_id", "item_id"],
            # field_names=TRAINING_INPUT_NAMES,
            output_type=torch.tensor,
        )

def create_trainer_dl(self, dataset, module):
    # instances = self._create_instance_splitter(module, "training").apply(
    #     dataset, is_train=True
    # )
    if self.time_feat:
        # return as_stacked_batches(
        #     instances,
        #     batch_size=self.batch_size,
        #     field_names=TRAINING_INPUT_NAMES
        #     + ["past_time_feat", "future_time_feat", "data_id", "item_id"],
        #     # + ["past_time_feat", "future_time_feat"],
        #     output_type=torch.tensor,
        # )
        data_loader = TrainDataLoader(
# We cache the dataset, to make training faster
            Cached(dataset),
            batch_size=self.batch_size,
            stack_fn=batchify,
            transform=self.create_transformation(),
            num_batches_per_epoch=100,
        )
        return data_loader



def create_predictor(
    self,
    transformation: Transformation,
    module,
) -> PyTorchPredictor:
    prediction_splitter = self._create_instance_splitter(module, "test")
    if self.time_feat:
        return PyTorchPredictor(
            input_transform=transformation + prediction_splitter,
            input_names=PREDICTION_INPUT_NAMES
            + ["past_time_feat", "future_time_feat"],
            prediction_net=module,
            batch_size=self.batch_size,
            prediction_length=self.prediction_length,
            device="cuda" if torch.cuda.is_available() else "cpu",
        )
    else:
        return PyTorchPredictor(
            input_transform=transformation + prediction_splitter,
            input_names=PREDICTION_INPUT_NAMES,
            prediction_net=module,
            batch_size=self.batch_size,
            prediction_length=self.prediction_length,
            device="cuda" if torch.cuda.is_available() else "cpu",
        )

`

@moghadas76 moghadas76 added the bug Something isn't working label Feb 28, 2024
@Usama-Samad
Copy link

Usama-Samad commented Apr 24, 2024

Hello, Im Also getting the same error, can someone please check

**>

dataset = get_dataset("solar_nips", regenerate=False)
dataset.metadata
train_grouper = MultivariateGrouper(max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality))

test_grouper = MultivariateGrouper(num_test_dates=int(len(dataset.test)/len(dataset.train)),
max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality))
dataset_train = train_grouper(dataset.train)
dataset_test = test_grouper(dataset.test)**


ValueError Traceback (most recent call last)
Cell In[7], line 2
1 dataset_train = train_grouper(dataset.train)
----> 2 dataset_test = test_grouper(dataset.test)

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:87, in MultivariateGrouper.call(self, dataset)
85 def call(self, dataset: Dataset) -> Dataset:
86 self._preprocess(dataset)
---> 87 return self._group_all(dataset)

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:125, in MultivariateGrouper._group_all(self, dataset)
123 grouped_dataset = self._prepare_train_data(dataset)
124 else:
--> 125 grouped_dataset = self._prepare_test_data(dataset)
126 return grouped_dataset

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:152, in MultivariateGrouper._prepare_test_data(self, dataset)
148 assert self.num_test_dates is not None
150 logging.info("group test time series to datasets")
--> 152 grouped_data = self._transform_target(self._left_pad_data, dataset)
153 # splits test dataset with rolling date into N R^d time series where
154 # N is the number of rolling evaluation dates
155 split_dataset = np.split(
156 grouped_data[FieldName.TARGET], self.num_test_dates
157 )

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:205, in MultivariateGrouper._transform_target(funcs, dataset)
203 @staticmethod
204 def _transform_target(funcs, dataset: Dataset) -> DataEntry:
--> 205 return {FieldName.TARGET: np.array([funcs(data) for data in dataset])}

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (959,) + inhomogeneous part.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants