首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >打印由.fit_generator类发送到.fit_generator()函数的一批火车数据

打印由.fit_generator类发送到.fit_generator()函数的一批火车数据
EN

Stack Overflow用户
提问于 2019-12-14 18:11:53
回答 2查看 1.1K关注 0票数 1

我已经成功地实现了自定义DataGenerator类,并且由于数据大,我在keras中使用了.fit_generator()函数。但是为了调试的目的,我想打印DataGenerator类隐式地发送到自定义DataGenerator类的之外的.fit_generator()函数(因为在DataGenerator类中可以很容易地在_ _getitem __方法中打印)。main.py文件是我要打印值的地方,而my_classes.py是我的自定义DataGenerator类接收的地方。

main.py

代码语言:javascript
复制
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Model
from my_classes import DataGenerator

params = {'dim': (224, 224),'batch_size': 4,'n_classes': 3,'n_channels': 3,'shuffle': True}
partition = {'train': ['id-1', 'id-2', 'id-3','id-4', 'id-5', 'id-6','id-7', 'id-8', 'id-9','id-10' ,'id-11', 'id-12', 'id-13', 'id-14', 'id-15', 'id-16'], 'validation': ['id-17', 'id-18', 'id-19', 'id-20']}
labels = {'id-1': 0, 'id-2': 0, 'id-3': 0, 'id-4': 2, 'id-5': 2, 'id-6': 0,'id-7': 2, 'id-8': 1, 'id-9': 1, 'id-10': 1, 'id-11': 2, 'id-12': 0, 'id-13': 1, 'id-14': 2, 'id-15': 1, 'id-16': 0, 'id-17': 2, 'id-18': 2, 'id-19': 1, 'id-20': 1}

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

inputs = Input(shape=(224, 224, 3))
x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=predictions)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#THIS IS WHERE I WANT TO PRINT THE BATCHES OF TRAINING DATA THAT ARE BEING FED BELOW BY DATAGENERATOR CLASS

#fit_generator function
model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=6)

my_classes.py

代码语言:javascript
复制
import numpy as np
import keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from numpy import expand_dims

class DataGenerator(keras.utils.Sequence):
    def __init__(self, list_IDs, labels, batch_size=4, dim=(224, 224), n_channels=3,
                 n_classes=3, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        #CAN ALSO BE PRINTED HERE
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            img = load_img('data/' + ID + '.jpg', target_size=(224, 224))
            img = img_to_array(img)
            X[i,] = expand_dims(img, axis=0)
            y[i] = self.labels[ID]
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)
EN

回答 2

Stack Overflow用户

发布于 2019-12-15 09:48:03

在这里,我发布了一个简单的oops来解决我自己的问题,希望它能帮助人们解决问题。以下是改动:

因此,我没有将这两个文件分开,而是合并了这两个文件,以克服可能产生以下第二点的循环导入问题,如果它们是两个单独的文件的话。然后,我对_ _getitem _()做了一个调整,方法是从类之外声明的打印(a,b)函数再进行一次函数调用,并将批处理作为参数发送到那里。

举起手来;)其余的保持不变,下面是完整的合并代码:

代码语言:javascript
复制
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Model
import numpy as np
import keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from numpy import expand_dims

class DataGenerator(keras.utils.Sequence):
    def __init__(self, list_IDs, labels, batch_size=4, dim=(224, 224), n_channels=3,n_classes=3, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        #MAKING CHANGES HERE BY CALLING printing() OUTSIDE THIS CLASS AND SENDING BATCHES IN REALTIME
        printing(X, y)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            img = load_img('data/' + ID + '.jpg', target_size=(224, 224))
            img = img_to_array(img)
            X[i,] = expand_dims(img, axis=0)
            y[i] = self.labels[ID]
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

#MERGING POINT HERE------------------------------------------------------------------------------------>

params = {'dim': (224, 224),'batch_size': 4,'n_classes': 3,'n_channels': 3,'shuffle': True}
partition = {'train': ['id-1', 'id-2', 'id-3','id-4', 'id-5', 'id-6','id-7', 'id-8', 'id-9','id-10' ,'id-11', 'id-12', 'id-13', 'id-14', 'id-15', 'id-16'], 'validation': ['id-17', 'id-18', 'id-19', 'id-20']}
labels = {'id-1': 0, 'id-2': 0, 'id-3': 0, 'id-4': 2, 'id-5': 2, 'id-6': 0,'id-7': 2, 'id-8': 1, 'id-9': 1, 'id-10': 1, 'id-11': 2, 'id-12': 0, 'id-13': 1, 'id-14': 2, 'id-15': 1, 'id-16': 0, 'id-17': 2, 'id-18': 2, 'id-19': 1, 'id-20': 1}

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

inputs = Input(shape=(224, 224, 3))
x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=predictions)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#BATCHES BEING SENT AS ARGUMENTS HERE
def printing(a, b):
    print("the value being sent by __getitem__ here is", a, b)

model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=6)

如果有人改进了答案,或者更好地想出了一个更好的解决方案,我会很高兴的,但是现在你开始了!

票数 1
EN

Stack Overflow用户

发布于 2020-05-03 05:37:46

与其在my_classes.py之外添加“list_Ids”函数,不如按以下方式在def _getitem__()中打印'list_Ids‘

代码语言:javascript
复制
def __getitem__(self, index):
    'Generate one batch of data'
    # Generate indexes of the batch
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

    # Find list of IDs
    list_IDs_temp = [self.list_IDs[k] for k in indexes]

    # Generate data
    X, y = self.__data_generation(list_IDs_temp)
    print(list_IDs_temp)
    return X, y
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/59337836

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档