const codeblock = [
  {
    id: "m1c1",
    code: `echo Hello World`,
  },
  {
    id: "m1c2",
    code: `# Spark SQL
# 1. create the temporal view from the dataframe
food_df.createTempView("foods_view")

# 2. select the columns from sql view
spark_sql = self.session.sql(
    "select id, amount, date, description from foods_view"
)

# Dataframe API
# 1. select the columns from the dataframe
dataframe_api = food_df.select(
    col("id"), col("amount"), col("date"), col("description")
)`,
  },
  {
    id: "m2c1",
    code: `# Read the data csv, json, parquet
_csv_df = spark.read.csv('_file_name.csv')
_json_df = spark.read.json('_file_name.json')
_parquet_df = spark.read.parquet('_file_name.parquet')`,
  },
  {
    id: "m2c2",
    code: `# Handling Schema Evolution for Parquet Files
_parquet_df = spark
            .read
            .option("mergeSchema", "true")
            .parquet('_file_name.parquet');
    `,
  },
  {
    id: "m2c3",
    code: `# Explicit Schema Definition for Data Type Inference
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.DataTypes;

# Define the schema explicitly
StructType schema = new StructType(new StructField[]{
    StructField("id", DataTypes.IntegerType, false),
    StructField("amount", DataTypes.DoubleType, false),
    StructField("date", DataTypes.DateType, false),
    StructField("description", DataTypes.StringType, true),
});

# Use the defined schema to read a CSV file
_csv_df = spark.read.schema(schema).csv('_file_name.csv');

# Similarly, apply the schema for JSON and Parquet if needed
_json_df = spark.read.schema(schema).json('_file_name.json');
_parquet_df = spark.read.schema(schema).parquet('_file_name.parquet');`,
  },
  {
    id: "m2c4",
    code: `echo`,
  },
  {
    id: "m3c1",
    code: `# Spark SQL
spark_sql = self.sql("SELECT sum(amount), date FROM foods_view WHERE date >= '2023-08-13' GROUP BY date")

# Dataframe API
dataframe_api = food_df.filter(col("date") == "2023-08-13").select(
            col("id"), col("amount"), col("date"), col("description")
).groupby(col("date")).agg({"amount": "sum"})`,
  },
  {
    id: "m3c2",
    code: `# Joining two dataframes, filtering and ordering
# Spark SQL
sparl_sql = self.sql(
    "SELECT * FROM foods_view "
    "JOIN people_view ON foods_view.people_id = people_view.id "
    "WHERE foods_view.date >= '2023-08-13' ORDER BY people_view.id"
)

# Dataframe API
dataframe_api = food_df.alias("j1").join(
    people_df.alias("j2"), col("j1.people_id") == col("j2.id"), "inner"
)`,
  },
  {
    id: "m3c3",
    code: `echo`,
  },
  {
    id: "m4c1",
    code: `echo`,
  },
  {
    id: "m4c2",
    code: `echo Hello World`,
  },
  {
    id: "m4c3",
    code: `# Caching the dataframe
food_df.cache() 

# Persisting the dataframe
food_df.persist(StorageLevel.MEMORY_AND_DISK)

# Unpersisting the dataframe
food_df.unpersist()

Data Persistence Strategies:
* MEMORY_ONLY
* MEMORY_AND_DISK
* DISK_ONLY
* OFF_HEAP`,
  },
  {
    id: "m4c4",
    code: `echo`,
  },
  {
    id: "m5c1",
    code: `echo`,
  },
  {
    id: "m5c2",
    code: `echo Hello World`,
  },
  {
    id: "m5c3",
    code: `echo Hello World`,
  },
  {
    id: "m6c1",
    code: `echo`,
  },
  {
    id: "m6c2",
    code: `echo Hello World`,
  },
  {
    id: "m6c3",
    code: `echo Hello World`,
  },
];

export default codeblock;
