import pandas as pd
import ast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the data
file_path = "/mnt/data/B21BYAlayli107_225_H.csv"
data = pd.read_csv(file_path)

# Function to expand a single column with list data into multiple columns
def expand_column(df, column):
    expanded_df = df.copy()
    expanded_column = expanded_df[column].apply(ast.literal_eval)
    expanded_columns_df = pd.DataFrame(expanded_column.tolist(), index=expanded_df.index)
    expanded_columns_df.columns = [f"{column}_{i+1}" for i in range(expanded_columns_df.shape[1])]
    expanded_df = expanded_df.drop(columns=[column]).join(expanded_columns_df)
    return expanded_df

# List of spectral bands to expand
spectral_bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'ST_B10']

# Expand each spectral band column
for band in spectral_bands:
    data = expand_column(data, band)

# Display the first few rows of the expanded data
data_expanded = data.head()
data_expanded

# Drop non-numeric columns for machine learning
data_ml = data.drop(columns=['system:index', 'adaNo', 'alan', 'il', 'ilce', 'mahalle', 'mevkii', 'nitelik', 'pafta', 'parselNo', '.geo'])

# Prepare the feature matrix (X) and the target vector (y)
X = data_ml.drop(columns=['target_column'])  # Replace 'target_column' with the actual target column name
y = data_ml['target_column']  # Replace 'target_column' with the actual target column name

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature matrix
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Display the shape of the training and testing sets
X_train_scaled.shape, X_test_scaled.shape, y_train.shape, y_test.shape