from pyspark.sql import SparkSession
def create_session():
spk = SparkSession.builder
.appName("Corona_cases_statewise.com")
.getOrCreate()
return spk
def create_RDD(sc_obj, data):
df = sc.parallelize(data)
return df
if __name__ == "__main__":
input_data = [("Uttar Pradesh", 122000, 89600, 12238),
("Maharashtra", 454000, 380000, 67985),
("Tamil Nadu", 115000, 102000, 13933),
("Karnataka", 147000, 111000, 15306),
("Kerala", 153000, 124000, 5259)]
spark = create_session()
sc = spark.sparkContext
rd_df = create_RDD(sc, input_data)
print(type(rd_df))