You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
X = df.drop(columns=['Shipping Company'], axis=1)
y = df['Shipping Company']
Import requirements
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense
Get some insights about the shape of ocean DataFrame:
print(f"The shape of the Ocean DataFrame is: {X.shape}")
print(f"The number of unique shipping companies is: {len(y.unique())}")
print(f"The number of unique loading countries is: {len(X['Loading Country'].unique())}")
print(f"The number of unique loading portss is: {len(X['Loading Port'].unique())}")
print(f"The number of unique destination countries is: {len(X['Destination Country'].unique())}")
print(f"The number of unique destination countries is: {len(X['Destination Port'].unique())}")
Initialize and adapt StringLookUp layers for categorical columns
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
'model_ocean_{epoch:02d}.h5', # Saves the model with the epoch number
save_best_only=False, # Saves all models
verbose=1 # Print out messages when saving the model
)
loading_port_encoded = lookup_loading_port(tf.constant([loading_port]))
loading_country_encoded = lookup_loading_country(tf.constant([loading_country]))
destination_port_encoded = lookup_destination_port(tf.constant([destination_port]))
destination_country_encoded = lookup_destination_country(tf.constant([destination_country]))
numerical_features = np.zeros((1, 17)) # Assuming you have 25 other numerical features
numerical_features[0, 16] = legs # Set the last element to 'legs'
predictions = model_selected.predict([
loading_port_encoded,
loading_country_encoded,
destination_port_encoded,
destination_country_encoded,
numerical_features
])
# Find the indices of the top 3 predictions
top_indices = np.argsort(predictions[0])[-1:][::-1]
top_confidences = [predictions[0][i] for i in top_indices]
# Adjust confidences based on model's overall accuracy
model_accuracy = 0.9112
adjusted_confidences = [conf * model_accuracy for conf in top_confidences]
# Get the shipping companies names
top_companies = [lookup_shipping_company.get_vocabulary()[i] for i in top_indices]
return top_companies[0]
# Create a results table
#results_table = pd.DataFrame({
# 'Shipping Company': top_companies,
# 'Confidence (%)': [f"{conf * 100:.2f}%" for conf in adjusted_confidences]
#})
#return results_table
This is a model and function to predict top shipping companies based on user's input.
Everything works, up to the predict_top_companies functions.
The only thing that I haven't got it to work, not even disabling tf eager mode, is the SHAP DeepExplainer on my model.
Any thoughts on how to accomplish this? deepexplainer_tf.txt
I have uploaded the code as a txt file if someone who is eager to help wants to play with it.
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
I have this code:
-- coding: utf-8 --
"""Ocean/
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1tYi3EA30eKxdlJeU0njY3_sTtKvQN73J
"""
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
import shap
shap.initjs()
Make sure all data is in float32 format for TensorFlow compatibility
int64_columns_df = df.select_dtypes(include=['int'])
df[int64_columns_df.columns] = df[int64_columns_df.columns].astype('float32', copy=False)
X = df.drop(columns=['Shipping Company'], axis=1)
y = df['Shipping Company']
Import requirements
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense
Get some insights about the shape of ocean DataFrame:
print(f"The shape of the Ocean DataFrame is: {X.shape}")
print(f"The number of unique shipping companies is: {len(y.unique())}")
print(f"The number of unique loading countries is: {len(X['Loading Country'].unique())}")
print(f"The number of unique loading portss is: {len(X['Loading Port'].unique())}")
print(f"The number of unique destination countries is: {len(X['Destination Country'].unique())}")
print(f"The number of unique destination countries is: {len(X['Destination Port'].unique())}")
Initialize and adapt StringLookUp layers for categorical columns
lookup_loading_country = tf.keras.layers.StringLookup()
lookup_loading_country.adapt(X['Loading Country'])
lookup_loading_port = tf.keras.layers.StringLookup()
lookup_loading_port.adapt(X['Loading Port'])
lookup_destination_country = tf.keras.layers.StringLookup()
lookup_destination_country.adapt(X['Destination Country'])
lookup_destination_port = tf.keras.layers.StringLookup()
lookup_destination_port.adapt(X['Destination Port'])
lookup_shipping_company = tf.keras.layers.StringLookup()
lookup_shipping_company.adapt(y)
Create the TensorFlow dataset
def map_features(row):
# Apply StringLookup layers to the respective features within the dataset mapping function
return {
'input_loading_country': lookup_loading_country(row['Loading Country']),
'input_loading_port': lookup_loading_port(row['Loading Port']),
'input_destination_country': lookup_destination_country(row['Destination Country']),
'input_destination_port': lookup_destination_port(row['Destination Port']),
'input_numerical': [#row['Time elapsed ATA-ATD'], #1
row['Average_Delay_Per_Route'],#2
row['Time elapsed ATD-ETD'],#3
#row['Time elapsed ETD-ETA'], #4
row['LateEarly'], #5
row['On_Time_Percentage_per_Ship_Comp'],#6
#row['ATA_day'], #7
#row['ATA_weekday'],#8
row['ATD_weekday'],#9
#row['ATA_month'], #10
row['ATD_month'], #11
row['ATD_year'], #12
#row['ATA_year'], #13
row['ATD_day'], #14
row['Gross Weight'], #15
row['Billable Weight'],#16
row['Delay'],#17
row['EarlyDelivery'], #18
row['ETAAccuracy'], #19
row['Hazard'], # 20
row['Legs'], # 21
row['Volume'],# 22
row['Pack Qty'],# 23
row['DelayedDeparture'],# 24
row['OnTimeArrival'], #25
row['DeliveryDelay'], #26
row['Average_Delay'], #27
row['Amount Containers 20'], #28
row['Amount Containers 40'], #29
]}
def process_dataframe(features_df, target_df):
Apply conversion to dataset
full_dataset = process_dataframe(X, y)
Shuffle and batch the dataset
full_dataset = full_dataset.shuffle(buffer_size=len(X)).batch(32)
Calculate the number of batches to split into training and validation
train_size = int(0.8 * len(X))
val_size = len(X) - train_size
train_dataset = full_dataset.take(train_size // 32) # Use train_size divided by batch size
val_dataset = full_dataset.skip(train_size // 32)
Define the model with functional API to handle multiple inputs
input_loading_country = tf.keras.Input(shape=(1,), name='input_loading_country', dtype=tf.float32)
input_loading_port = tf.keras.Input(shape=(1,), name='input_loading_port', dtype=tf.float32)
input_destination_country = tf.keras.Input(shape=(1,), name='input_destination_country', dtype=tf.float32)
input_destination_port = tf.keras.Input(shape=(1,), name='input_destination_port', dtype=tf.float32)
input_numerical = tf.keras.Input(shape=(23,), name='input_numerical')
Check for best output_dim based on input_dim:
import math
vocabulary_size_loading_country = lookup_loading_country.vocabulary_size()
vocabulary_size_loading_port = lookup_loading_port.vocabulary_size()
vocabulary_size_destination_country = lookup_destination_country.vocabulary_size()
vocabulary_size_destination_port = lookup_destination_port.vocabulary_size()
vocabulary_size_shipping = lookup_shipping_company.vocabulary_size()
loading_country_dim = int(math.sqrt(vocabulary_size_loading_country))
loading_port_dim = int(math.sqrt(vocabulary_size_loading_port))
destination_country_dim = int(math.sqrt(vocabulary_size_destination_country))
destination_port_dim = int(math.sqrt(vocabulary_size_destination_port))
shipping_dim = int(math.sqrt(vocabulary_size_shipping))
Embeddings for categorical inputs
loading_embedding_country = tf.keras.layers.Embedding(
input_dim = lookup_loading_country.vocabulary_size(),
output_dim = loading_country_dim)(input_loading_country)
loading_embedding_port = tf.keras.layers.Embedding(
input_dim = lookup_loading_port.vocabulary_size(),
output_dim = loading_port_dim)(input_loading_port)
destination_embedding_country = tf.keras.layers.Embedding(
input_dim = lookup_destination_country.vocabulary_size(),
output_dim = destination_country_dim)(input_destination_country)
destination_embedding_port = tf.keras.layers.Embedding(
input_dim = lookup_destination_port.vocabulary_size(),
output_dim = destination_port_dim)(input_destination_port)
Flatten embeddings and concatenate with numerical inputs
loading_country_flat = tf.keras.layers.Flatten()(loading_embedding_country)
loading_port_flat = tf.keras.layers.Flatten()(loading_embedding_port)
destination_country_flat = tf.keras.layers.Flatten()(destination_embedding_country)
destination_port_flat = tf.keras.layers.Flatten()(destination_embedding_port)
concatenated = tf.keras.layers.Concatenate()([loading_country_flat,
loading_port_flat,
destination_country_flat,
destination_port_flat,
input_numerical
])
x = tf.keras.layers.Dense(256, activation='relu')(concatenated)
output = tf.keras.layers.Dense(lookup_shipping_company.vocabulary_size(), activation='softmax')(x)
Assemble the model
model = tf.keras.Model(inputs=[
input_loading_country,
input_loading_port,
input_destination_country,
input_destination_port,
input_numerical
], outputs=output)
#from tensorflow.keras.callbacks import ModelCheckpoint
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
'model_ocean_{epoch:02d}.h5', # Saves the model with the epoch number
save_best_only=False, # Saves all models
verbose=1 # Print out messages when saving the model
)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
Fit the model with the callback
history = model.fit(
train_dataset,
epochs=50,
validation_data=val_dataset,
callbacks=[model_checkpoint]
)
tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")
def predict_top_companies(loading_port,
loading_country,
destination_port,
destination_country,
legs):
Example usage of the function
#results = predict_top_companies(loading_port='Tel-Aviv',
loading_country='Israel',
destination_port='Dallas',
destination_country="USA",
legs=5)
#print("Top 1 Shipping Companies Ocean:")
#print(results)
explainer = shap.DeepExplainer(model, (model.layers[0].input, model.layers[-1].output))
shap_values - explainer.shap_values(val_dataset)?
This is a model and function to predict top shipping companies based on user's input.
Everything works, up to the predict_top_companies functions.
The only thing that I haven't got it to work, not even disabling tf eager mode, is the SHAP DeepExplainer on my model.
Any thoughts on how to accomplish this?
deepexplainer_tf.txt
I have uploaded the code as a txt file if someone who is eager to help wants to play with it.
Beta Was this translation helpful? Give feedback.
All reactions