AttributeError: 'MulticlassMetrics' object has no attribute '_sc'

Azhuvath, RajeevX Sun, 23 Jun 2024 22:02:45 -0700

Getting the error "AttributeError: 'MulticlassMetrics' object has no attribute 
'_sc'" while executing the standalone attached code in a bare metal system.


Thanks and Regards,
Rajeev

from pyspark import SparkContext, SparkConf
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.tree import RandomForest
from pyspark.mllib.evaluation import MulticlassMetrics
from urllib.request import urlopen
import numpy as np

# Set Spark configuration
conf = SparkConf() \
    .setAppName("Iris Flower Classification with PySpark") \
    .setMaster("local[10]")  # Using 10 threads to simulate 10 executors in 
local mode

# Initialize Spark context
sc = SparkContext(conf=conf)

# Map string labels to numeric labels
label_map = {
    'Iris-setosa': 0,
    'Iris-versicolor': 1,
    'Iris-virginica': 2
}

# Function to parse Iris dataset lines
def parse_line(line):
    parts = line.strip().split(',')
    features = np.array(parts[:4], dtype=float)
    label = label_map[parts[4]]
    return LabeledPoint(label, features)

# Download Iris dataset (if not already downloaded)
iris_data_url = 
"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data";
data = urlopen(iris_data_url).read().decode('utf-8').strip().split('\n')

# Create RDD from data
iris_rdd = sc.parallelize(data)

# Parse and transform the data
parsed_rdd = iris_rdd.map(parse_line)

# Split the data into training (80%) and test (20%)
training_rdd, test_rdd = parsed_rdd.randomSplit([0.8, 0.2], seed=42)

# Train the Random Forest model
model = RandomForest.trainClassifier(training_rdd, numClasses=3, 
categoricalFeaturesInfo={},
                                     numTrees=10, featureSubsetStrategy="auto",
                                     impurity='gini', maxDepth=4, maxBins=32)

# Make predictions on the test data
predictions_and_labels = test_rdd.map(lambda lp: 
(float(model.predict(lp.features)), lp.label))

# Evaluate the model
metrics = MulticlassMetrics(predictions_and_labels)

# Print confusion matrix and F1 score
print("Confusion Matrix:")
print(metrics.confusionMatrix().toArray())
print("F1 Score (weighted):", metrics.weightedFMeasure())

# Stop Spark context
sc.stop()

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscr...@spark.apache.org

AttributeError: 'MulticlassMetrics' object has no attribute '_sc'

Reply via email to