文章/答案/技术大牛

发布

社区首页 >问答首页 >用tensorflow训练深层神经推荐模型时的Nan损失

问用tensorflow训练深层神经推荐模型时的Nan损失
EN

Stack Overflow用户

提问于 2022-04-08 02:46:02

回答 2查看 311关注 0票数 1

我试图遵循tensorflow文档，并将同样的技术应用于一个玩具数据集。

在训练过程中，我和南一样，失去了一切。我尝试过使用Debugger V2调试相同的值，并且我可以看到，由于除以0，tf.keras.layers.GlobalAveragePooling1D给出了Nan，这使得所有值在反向传播过程中都是Nan。但是，调试器V2图形用户界面中不清楚的是，为什么和变成0。我确实试图减少特性的数量和数据集的大小，但是每次活动都会给我带来新的错误(可能以后我会为每个问题启动一个单独的问题线程)。

下面是参考代码。我提供数据集以及这里。我在Google上尝试了下面的代码。

import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

tf.debugging.experimental.enable_dump_debug_info(
    "./tfdbg2_logdir",
    tensor_debug_mode="FULL_HEALTH",
    circular_buffer_size=-1)

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs

准备数据

ds=pd.read_csv('train_recom.csv')
ds['year'].replace(0,1,inplace=True)
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})
ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

读取数据到tensorflow数据集

ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: x["song_id"])

准备训练和测试数据集

tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=np.concatenate(list(ratings.map(lambda x: x['year']).batch(4000)))

用户模型类

class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['release']).batch(4000))))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.artist_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['artist_name']).batch(4000))))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['title']).batch(4000))))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
            ])

    def call(self, inputs):
      return tf.concat([
          self.user_embedding(inputs['user_id']),
          self.release_text_embedding(inputs['release'])
          ,
          self.year_embedding(inputs['year']), 
          self.artist_text_embedding(inputs['artist_name']),
          self.title_text_embedding(inputs['title']),
             ], axis=1)

项目模型

class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

    def call(self, inputs):
      return self.title_embedding(inputs)

查询模型创建深度模型

class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

为项目模型创建深度模型

class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

将查询和候选模型结合起来

class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
                "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],
        })

        item_embeddings = self.candidate_model(features["song_id"])

        return self.task(query_embeddings, item_embeddings)

训练模型

model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)

下面是我得到的

WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-26-fdc864fc30cf>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-26-fdc864fc30cf>
WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-25-e3009db55439>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-25-e3009db55439>
Epoch 1/9
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
10/10 [==============================] - 63s 1s/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0022 - factorized_top_k/top_10_categorical_accuracy: 0.0033 - factorized_top_k/top_50_categorical_accuracy: 0.0073 - factorized_top_k/top_100_categorical_accuracy: 0.0103 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 2/9
10/10 [==============================] - 9s 945ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 3/9
10/10 [==============================] - 10s 953ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 4/9
10/10 [==============================] - 9s 948ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 5/9
10/10 [==============================] - 10s 966ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 6/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 7/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 8/9
10/10 [==============================] - 10s 958ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 9/9
10/10 [==============================] - 10s 971ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan

python

tensorflow

keras

deep-learning

recommendation-engine

回答 2

Stack Overflow用户

回答已采纳

发布于 2022-05-31 11:16:31

在自定义数据集上使用tfrs时，我也遇到了类似的错误。结果发现我的数据中没有打印字符和sysmbols。我只需搜索和删除符号(手动，一些regex)，我还将dataframe中的文本列限制为仅可打印字符。

from string import printable as pt

allowed_set = set(pt)
df[col] = df[col].apply(lambda x:  ''.join([' ' if  s not in  allowed_set else s for s in x]))

希望能帮上忙。

票数 1

Stack Overflow用户

发布于 2022-06-06 16:14:49

问题是，当我们用空格替换特殊字符时，对于一条记录，整个数据变为空(对于release字段)。总之，这是数据问题，而不是代码问题。然后，我们在下面添加了两行代码来处理这种情况，ds.replace(r'^\s*$', 'None', regex=True)。下面是包含所有更改的整个代码

import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs  

ds=pd.read_csv('train_recom.csv')

print(ds['release'].isnull().sum())
print(ds['title'].isnull().sum())
print(ds['artist_name'].isnull().sum())
print(ds['year'].isnull().sum())
print(ds.isna().any(axis=None))
print(any(ds[c].hasnans for c in ds))
for c in ds:
  if ds[c].hasnans:
    print(c)

ds['year'].replace(0,1,inplace=True)
ds.release.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.artist_name.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.title.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds2=ds.replace(r'^\s*$', np.nan, regex=True)
ds2['release']=ds2['release'].fillna('None')
ds=ds2
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})

ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: {
    "song_id":x["song_id"],
    "release":x["release"],
    "artist_name":x["artist_name"],
    "title":x["title"],
    "year":x["year"],
}) 

tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000).map(lambda x: x["title"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=list(songs.map(lambda x: x['year']))

class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])



    def call(self, inputs):
      return self.user_embedding(inputs['user_id'])

class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000_00

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(songs.map(lambda x: x['release']))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.artist_vectorizer.adapt(songs.map(lambda x: x['artist_name']))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(songs.map(lambda x: x['title']))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
              # tf.keras.layers.Embedding(2501, 32),
            ])
        
    def call(self, inputs):
      # return self.title_embedding(inputs['title'])
      return tf.concat([
    self.title_embedding(inputs['title']),
    self.release_text_embedding(inputs['release'])
    ,
    self.year_embedding(inputs['year']), 
    self.artist_text_embedding(inputs['artist_name']),
    self.title_text_embedding(inputs['title']),
        ], axis=1)

class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
        })

        item_embeddings = self.candidate_model({            
            "song_id": features["song_id"],
                "title" : features["title"],
                 "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],

        })

        return self.task(query_embeddings, item_embeddings)

model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)

票数 0

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/71791115

复制

相似问题

问用tensorflow训练深层神经推荐模型时的Nan损失
EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问用tensorflow训练深层神经推荐模型时的Nan损失EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问用tensorflow训练深层神经推荐模型时的Nan损失
EN