from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.mllib.evaluation import BinaryClassificationMetrics, MulticlassMetrics
def print_performance_metrics(predictions):
# Evaluate model
evaluator = BinaryClassificationEvaluator(rawPredictionCol="rawPrediction")
auc = evaluator.evaluate(predictions, {evaluator.metricName: "areaUnderROC"})
aupr = evaluator.evaluate(predictions, {evaluator.metricName: "areaUnderPR"})
print("auc = {}".format(auc))
print("aupr = {}".format(aupr))
# get rdd of predictions and labels for mllib eval metrics
predictionAndLabels = predictions.select("prediction","label").rdd
# Instantiate metrics objects
binary_metrics = BinaryClassificationMetrics(predictionAndLabels)
multi_metrics = MulticlassMetrics(predictionAndLabels)
# Area under precision-recall curve
print("Area under PR = {}".format(binary_metrics.areaUnderPR))
# Area under ROC curve
print("Area under ROC = {}".format(binary_metrics.areaUnderROC))
# Accuracy
print("Accuracy = {}".format(multi_metrics.accuracy))
# Confusion Matrix
print(multi_metrics.confusionMatrix())
### Question F1, Precision Score ###
# F1
print("F1 = {}".format(multi_metrics.fMeasure()))
# Precision
# print("Precision = {}".format(multi_metrics.precision()))
# Recall
# print("Recall = {}".format(multi_metrics.recall()))
# FPR
# print("FPR = {}".format(multi_metrics.falsePositiveRate(0.0)))
# TPR
# print("TPR = {}".for
mat(multi_metrics.truePositiveRate(0.0)))我正在为模型分数做上述功能。它给了我auc和准确性评分,但它不适用于多类度量。我无法获得它给出的f1和精确分数。
fMeasure()缺少一个必需的位置参数:'label‘
发布于 2021-07-19 14:39:54
如果你看一下F1评分公式,你会发现它只对一个类的真/假标记感兴趣。因此,F1评分是以标签为中心的。它考虑了某一类模型的真假标号,对一种模型的准确性进行了评估。MultiClassMetrics的正式文档下的“示例”部分清楚地表明,您需要在现有类中选择一个标签来调用fMeasure,当您意识到它是以标签为中心的度量时,这是合理的,正如我所解释的。
https://stackoverflow.com/questions/68441073
复制相似问题