我是机器学习的新手。我注意到以前也有人提出过这样的问题,但没有得到适当的解决方案。下面是语义相似度的代码,我想实现LIME作为基础。请帮帮我。
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('paraphrase-distilroberta-base-v1')
# Two lists of sentences
sentences1 = ['The cat sits outside',
'A man is playing guitar',
'The new movie is awesome']
sentences2 = ['The cat sits outside',
'A woman watches TV',
'The new movie is so great']
#Compute embedding for both lists
embeddings1 = model.encode(sentences1, convert_to_tensor=True)
embeddings2 = model.encode(sentences2, convert_to_tensor=True)
#Compute cosine-similarits
cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)
#Output the pairs with their score
for i in range(len(sentences1)):
print("{} \t\t {} \t\t Score: {:.4f}".format(sentences1[i], sentences2[i], cosine_scores[i][i]))发布于 2021-03-24 11:23:16
我不知道Bert是什么,但是试一下这个示例代码,看看它是否对您有帮助。
import pandas as pd
import numpy as np
import sklearn
import sklearn.ensemble
import sklearn.metrics
from sklearn.utils import shuffle
from io import StringIO
import re
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import lime
from lime import lime_text
from lime.lime_text import LimeTextExplainer
from sklearn.pipeline import make_pipeline
df = pd.read_csv('C:\\Users\\ryans\\OneDrive\\Desktop\\Briefcase\\PDFs\\1-ALL PYTHON & R CODE SAMPLES\\A - GITHUB\\Natural Language Processing - Amazon Reviews\\Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products.csv')
# let's experiment with some sentiment analysis concepts
# first we need to clean up the stuff in the independent field of the DF we are workign with
df.replace('\'','', regex=True, inplace=True)
df['review_title'] = df[['reviews.title']].astype(str)
df['review_text'] = df[['reviews.text']].astype(str)
df['review_title'] = df['reviews.title'].str.replace('\d+', '')
df['review_text'] = df['reviews.text'].str.replace('\d+', '')
# get rid of special characters
df['review_title'] = df['reviews.title'].str.replace(r'[^\w\s]+', '')
df['review_text'] = df['reviews.text'].str.replace(r'[^\w\s]+', '')
# get rid of double spaces
df['review_title'] = df['reviews.title'].str.replace(r'\^[a-zA-Z]\s+', '')
df['review_text'] = df['reviews.text'].str.replace(r'\^[a-zA-Z]\s+', '')
# convert all case to lower
df['review_title'] = df['reviews.title'].str.lower()
df['review_text'] = df['reviews.text'].str.lower()
list_corpus = df["review_text"].tolist()
list_labels = df["reviews.rating"].tolist()
X_train, X_test, y_train, y_test = train_test_split(list_corpus, list_labels, test_size=0.2, random_state=40)
vectorizer = CountVectorizer(analyzer='word',token_pattern=r'\w{1,}', ngram_range=(1, 3), stop_words = 'english', binary=True)
train_vectors = vectorizer.fit_transform(X_train)
test_vectors = vectorizer.transform(X_test)
logreg = LogisticRegression(n_jobs=1, C=1e5)
logreg.fit(train_vectors, y_train)
pred = logreg.predict(test_vectors)
accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred, average='weighted')
recall = recall_score(y_test, pred, average='weighted')
f1 = f1_score(y_test, pred, average='weighted')
print("accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" % (accuracy, precision, recall, f1))
list_corpus[3]
c = make_pipeline(vectorizer, logreg)
class_names=list(df.review_title.unique())
explainer = LimeTextExplainer(class_names=class_names)
idx = 3
exp = explainer.explain_instance(X_test[idx], c.predict_proba, num_features=6, labels=[1, 1])
print('Document id: %d' % idx)
print('Predicted class =', class_names[logreg.predict(test_vectors[idx]).reshape(1,-1)[0,0]])
print('True class: %s' % class_names[y_test[idx]])
print ('Explanation for class %s' % class_names[1])
print ('\n'.join(map(str, exp.as_list(label=1))))
exp = explainer.explain_instance(X_test[idx], c.predict_proba, num_features=6, top_labels=2)
print(exp.available_labels())
exp.show_in_notebook(text=False)

https://towardsdatascience.com/explain-nlp-models-with-lime-shap-5c5a9f84d59b
https://marcotcr.github.io/lime/tutorials/Lime%20-%20multiclass.html
https://towardsdatascience.com/understanding-model-predictions-with-lime-a582fdff3a3b
https://stackoverflow.com/questions/66753434
复制相似问题