how can i use the context features like movie title text in listwise ranking #344

ravichoudhary33 · 2023-02-06T10:28:19Z

in the tutorial documentation we have a basic implementation that does not use the movie title text features or some other movie feature in listwise ranking., i was trying to implement the same. i tried to use the movie title text as the feature in the listwise ranking model, here is the model definition, here i am also using query features:-

class RankingModel(tfrs.Model):

  def __init__(self, loss):
    super().__init__()
    embedding_dimension = 32
    max_tokens = 10_000

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids),
      tf.keras.layers.Embedding(len(unique_user_ids) + 2, embedding_dimension)
    ])

    # compute query embeddings
    self.query_vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=max_tokens)

    self.query_text_embedding = tf.keras.Sequential([
      self.query_vectorizer,
      tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
      tf.keras.layers.GlobalAveragePooling1D(),
    ])

    self.query_vectorizer.adapt(queries)

    # Compute embeddings for movies.
    self.movie_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_movie_titles),
      tf.keras.layers.Embedding(len(unique_movie_titles) + 2, embedding_dimension)
    ])

    # compute embeddings for movies title text
    self.movie_title_text_vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=max_tokens)

    self.movie_title_text_embedding = tf.keras.Sequential([
      #tf.keras.layers.Faltten(),
      self.movie_title_text_vectorizer,
      tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
      # We average the embedding of individual words to get one embedding vector
      # per title.
      tf.keras.layers.GlobalAveragePooling1D()
    ])

    self.movie_title_text_vectorizer.adapt(movies)

    # Compute predictions.
    self.score_model = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
    ])

    self.task = tfrs.tasks.Ranking(
      loss=loss,
      metrics=[
        tfr.keras.metrics.NDCGMetric(name="ndcg_metric"),
        tf.keras.metrics.RootMeanSquaredError()
      ]
    )

  def call(self, features):
    # We first convert the id features into embeddings.
    # User embeddings are a [batch_size, embedding_dim] tensor.
    user_embeddings = self.user_embeddings(features["user_id"])

    # User embeddings are a [batch_size, embedding_dim] tensor.
    query_embeddings = self.query_text_embedding(features["query"])

    # Movie embeddings are a [batch_size, num_movies_in_list, embedding_dim]
    # tensor.
    movie_embeddings = self.movie_embeddings(features["movie_title"])
    
    # movie title text embedding
    movie_title_embeddings = self.movie_title_text_embedding(features["movie_title"])

    ## print the shape of movie embeddings and movie title embedding
    print(f"movie embedding shape: {movie_embeddings.shape} & movie title embedding shape: {movie_title_embeddings.shape}")
    
    # We want to concatenate user embeddings with movie emebeddings to pass
    # them into the ranking model. To do so, we need to reshape the user
    # embeddings to match the shape of movie embeddings.
    list_length = features["movie_title"].shape[1]
    user_embedding_repeated = tf.repeat(
        tf.expand_dims(user_embeddings, 1), [list_length], axis=1)
    
    # reshape the query embedding like the user embedding
    query_embedding_repeated = tf.repeat(
        tf.expand_dims(query_embeddings, 1), [list_length], axis=1)
    

    ## print query repeated and non repeated shape
    print(f"user embeddings shape: {user_embeddings.shape} & user embedding repeated shape: {query_embedding_repeated.shape}")

    # Once reshaped, we concatenate and pass into the dense layers to generate
    # predictions.
    concatenated_embeddings = tf.concat(
        [user_embedding_repeated, query_embedding_repeated, movie_title_embeddings , movie_embeddings], 2)
    
    return self.score_model(concatenated_embeddings)

  def compute_loss(self, features, training=False):
    labels = features.pop("user_rating")

    scores = self(features)

    return self.task(
        labels=labels,
        predictions=tf.squeeze(scores, axis=-1),
    )

here is how my input dataset looks like

{'movie_title': <tf.Tensor: shape=(5,), dtype=string, numpy=
array([b'Monolith by Monoprice THX Certified Satellite Speakers (Pair)',
       b'Monolith by Monoprice M-OW1 THX Certified On-Wall Speaker (Pair)',
       b'Monolith by Monoprice THX-365C THX Certified Ultra Center Channel Speaker (Each)',
       b'Monolith by Monoprice Encore B5 Bookshelf Speakers (Each)',
       b'Monolith by Monoprice Encore T6 Tower Speaker (Each)'],
      dtype=object)>,
 'query': <tf.Tensor: shape=(), dtype=string, numpy=b'Floorstanding and Bookshelf Speakers'>,
 'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'uid-1652799522552-93464'>,
 'user_rating': <tf.Tensor: shape=(5,), dtype=float64, numpy=array([0.3030303 , 0.15151515, 0.3030303 , 0.15151515, 0.15151515])>}

But once i am trying to train the model, it's says When using TextVectorization to tokenize strings, the input rank must be 1 or the last shape dimension must be 1. Received: inputs.shape=(None, 5) with rank=2. Any help or pointer is much appreciated. thanks.

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

how can i use the context features like movie title text in listwise ranking #344

how can i use the context features like movie title text in listwise ranking #344

ravichoudhary33 commented Feb 6, 2023

how can i use the context features like movie title text in listwise ranking #344

how can i use the context features like movie title text in listwise ranking #344

Comments

ravichoudhary33 commented Feb 6, 2023