首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >用tensorflow训练深层神经推荐模型时的Nan损失

用tensorflow训练深层神经推荐模型时的Nan损失
EN

Stack Overflow用户
提问于 2022-04-08 02:46:02
回答 2查看 311关注 0票数 1

我试图遵循tensorflow文档,并将同样的技术应用于一个玩具数据集。

在训练过程中,我和南一样,失去了一切。我尝试过使用Debugger V2调试相同的值,并且我可以看到,由于除以0,tf.keras.layers.GlobalAveragePooling1D给出了Nan,这使得所有值在反向传播过程中都是Nan。但是,调试器V2图形用户界面中不清楚的是,为什么和变成0。我确实试图减少特性的数量和数据集的大小,但是每次活动都会给我带来新的错误(可能以后我会为每个问题启动一个单独的问题线程)。

下面是参考代码。我提供数据集以及这里。我在Google上尝试了下面的代码。

代码语言:javascript
复制
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

tf.debugging.experimental.enable_dump_debug_info(
    "./tfdbg2_logdir",
    tensor_debug_mode="FULL_HEALTH",
    circular_buffer_size=-1)

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs  

准备数据

代码语言:javascript
复制
ds=pd.read_csv('train_recom.csv')
ds['year'].replace(0,1,inplace=True)
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})
ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

读取数据到tensorflow数据集

代码语言:javascript
复制
ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: x["song_id"]) 

准备训练和测试数据集

代码语言:javascript
复制
tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=np.concatenate(list(ratings.map(lambda x: x['year']).batch(4000)))

用户模型类

代码语言:javascript
复制
class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['release']).batch(4000))))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.artist_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['artist_name']).batch(4000))))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['title']).batch(4000))))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
            ])

    def call(self, inputs):
      return tf.concat([
          self.user_embedding(inputs['user_id']),
          self.release_text_embedding(inputs['release'])
          ,
          self.year_embedding(inputs['year']), 
          self.artist_text_embedding(inputs['artist_name']),
          self.title_text_embedding(inputs['title']),
             ], axis=1)

项目模型

代码语言:javascript
复制
class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

    def call(self, inputs):
      return self.title_embedding(inputs)

查询模型创建深度模型

代码语言:javascript
复制
class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

为项目模型创建深度模型

代码语言:javascript
复制
class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

将查询和候选模型结合起来

代码语言:javascript
复制
class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
                "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],
        })

        item_embeddings = self.candidate_model(features["song_id"])

        return self.task(query_embeddings, item_embeddings)

训练模型

代码语言:javascript
复制
model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)

下面是我得到的

代码语言:javascript
复制
WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-26-fdc864fc30cf>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-26-fdc864fc30cf>
WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-25-e3009db55439>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-25-e3009db55439>
Epoch 1/9
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
10/10 [==============================] - 63s 1s/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0022 - factorized_top_k/top_10_categorical_accuracy: 0.0033 - factorized_top_k/top_50_categorical_accuracy: 0.0073 - factorized_top_k/top_100_categorical_accuracy: 0.0103 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 2/9
10/10 [==============================] - 9s 945ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 3/9
10/10 [==============================] - 10s 953ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 4/9
10/10 [==============================] - 9s 948ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 5/9
10/10 [==============================] - 10s 966ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 6/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 7/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 8/9
10/10 [==============================] - 10s 958ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 9/9
10/10 [==============================] - 10s 971ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
EN

回答 2

Stack Overflow用户

回答已采纳

发布于 2022-05-31 11:16:31

在自定义数据集上使用tfrs时,我也遇到了类似的错误。结果发现我的数据中没有打印字符和sysmbols。我只需搜索和删除符号(手动,一些regex),我还将dataframe中的文本列限制为仅可打印字符。

代码语言:javascript
复制
from string import printable as pt

allowed_set = set(pt)
df[col] = df[col].apply(lambda x:  ''.join([' ' if  s not in  allowed_set else s for s in x]))

希望能帮上忙。

票数 1
EN

Stack Overflow用户

发布于 2022-06-06 16:14:49

问题是,当我们用空格替换特殊字符时,对于一条记录,整个数据变为空(对于release字段)。总之,这是数据问题,而不是代码问题。然后,我们在下面添加了两行代码来处理这种情况,ds.replace(r'^\s*$', 'None', regex=True)。下面是包含所有更改的整个代码

代码语言:javascript
复制
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs  

ds=pd.read_csv('train_recom.csv')

print(ds['release'].isnull().sum())
print(ds['title'].isnull().sum())
print(ds['artist_name'].isnull().sum())
print(ds['year'].isnull().sum())
print(ds.isna().any(axis=None))
print(any(ds[c].hasnans for c in ds))
for c in ds:
  if ds[c].hasnans:
    print(c)

ds['year'].replace(0,1,inplace=True)
ds.release.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.artist_name.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.title.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds2=ds.replace(r'^\s*$', np.nan, regex=True)
ds2['release']=ds2['release'].fillna('None')
ds=ds2
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})

ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: {
    "song_id":x["song_id"],
    "release":x["release"],
    "artist_name":x["artist_name"],
    "title":x["title"],
    "year":x["year"],
}) 

tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000).map(lambda x: x["title"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=list(songs.map(lambda x: x['year']))

class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])



    def call(self, inputs):
      return self.user_embedding(inputs['user_id'])

class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000_00

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(songs.map(lambda x: x['release']))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.artist_vectorizer.adapt(songs.map(lambda x: x['artist_name']))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(songs.map(lambda x: x['title']))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
              # tf.keras.layers.Embedding(2501, 32),
            ])
        
    def call(self, inputs):
      # return self.title_embedding(inputs['title'])
      return tf.concat([
    self.title_embedding(inputs['title']),
    self.release_text_embedding(inputs['release'])
    ,
    self.year_embedding(inputs['year']), 
    self.artist_text_embedding(inputs['artist_name']),
    self.title_text_embedding(inputs['title']),
        ], axis=1)

class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
        })

        item_embeddings = self.candidate_model({            
            "song_id": features["song_id"],
                "title" : features["title"],
                 "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],

        })

        return self.task(query_embeddings, item_embeddings)

model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/71791115

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档