我试图使用Django部署我的多愁善感分析模型,但得到了以下错误。
views.py文件
from django.shortcuts import render
from django.http import HttpResponse
from django.contrib.auth import authenticate
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from string import punctuation
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
# Create your views here.
badfood=pickle.load(open('mymodel/pkl/BadFood.pickle','rb'))
def index(request):
return render(request,'mymodel/index.html')
def remove_non_ascii_1(text):
return ''.join(i for i in text if ord(i)<128)
def clean_text(input_str):
lemmatizer= WordNetLemmatizer()
input_str=input_str.lower()
remove_num=re.sub(r'\d+','',input_str)
remove_punc=remove_num.translate(str.maketrans("","",punctuation))
remove_white=remove_punc.strip()
stop_words=set(stopwords.words('english'))
tokens=word_tokenize(remove_white)
result=[i for i in tokens if not i in stop_words]
lemmatized_words=[lemmatizer.lemmatize(word) for word in result]
review=' '.join(lemmatized_words)
return review
def predict(request):
# Grabbing data from user
if request.method=="POST":
review=request.POST.get('review','')
city=request.POST.get('city','')
input_data=[{'review':review,'city':city}]
dataset=pd.DataFrame(input_data)
dataset = dataset.replace(r'\r',' ', regex=True)
dataset['review']=dataset['review'].apply(lambda x:remove_non_ascii_1(x))
dataset['review']=dataset['review'].apply(lambda x:clean_text(x))
# Bag of words
vectorizer=CountVectorizer()
features_data=
pd.DataFrame(vectorizer.fit_transform(dataset.review).toarray())
features_data.columns=vectorizer.get_feature_names()
features_data.insert(0,'city_x',dataset['city'])
# Label Encoding the city column
labelencoder=LabelEncoder()
features_data['city_x']=labelencoder.fit_transform(features_data.city_x)
features_data['city_x']=features_data['city_x'].astype('category')
regressor=badfood.predict(features_data)
return render(request,'mymodel/result.html',{'res':regressor})模型的/ValueError/数量特性必须与输入匹配。模型n_features为7397,输入n_features为12。
我设计了一个用户界面,在其中我给出了一个文本框字段,用户可以在其中输入评论和下拉列表,用户可以从中选择城市。上面是我的views.py文件,它包含了所有必要的预测代码。现在我如何使我的输入数据特征与训练的模型特征( 7397 )相等,因为每次进行不同的检查时,所有在字标记化后形成的列都会有所不同。我不知道words.My将形成哪些列,目的是从它所培训的用户输入中创建完全相同的数据格式,以便进行正确的预测。代码的缩进在粘贴代码时受到干扰,请不要认为它是错误的。
非常感谢
发布于 2019-08-01 13:10:56
因为你已经训练了7397特征的模型,并且对12个特征做了预测,所以你会有错误。
您还需要保存矢量器,并且需要使用vectorizer.transform() (它将根据所学的词汇表转换数据),而不是使用fit_transform
#example
from sklearn.feature_extraction.text import CountVectorizer
import pickle
vect = CountVectorizer()
vect.fit(['how are you','have a nice day', 'good morning'])
#saving vectorizer
with open('vectorizer.pkl', 'wb') as file:
pickle.dump(vect, file)
# loading vectorizer
with open('vectorizer.pkl', 'rb') as file: #Now use vect to vectorize your new text
vect = pickle.load(file)
vect.transform(['new comment'])https://stackoverflow.com/questions/57302522
复制相似问题