import pandas as pd import ast from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler # Load the data file_path = "/mnt/data/B21BYAlayli107_225_H.csv" data = pd.read_csv(file_path) # Function to expand a single column with list data into multiple columns def expand_column(df, column): expanded_df = df.copy() expanded_column = expanded_df[column].apply(ast.literal_eval) expanded_columns_df = pd.DataFrame(expanded_column.tolist(), index=expanded_df.index) expanded_columns_df.columns = [f"{column}_{i+1}" for i in range(expanded_columns_df.shape[1])] expanded_df = expanded_df.drop(columns=[column]).join(expanded_columns_df) return expanded_df # List of spectral bands to expand spectral_bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'ST_B10'] # Expand each spectral band column for band in spectral_bands: data = expand_column(data, band) # Display the first few rows of the expanded data data_expanded = data.head() data_expanded # Drop non-numeric columns for machine learning data_ml = data.drop(columns=['system:index', 'adaNo', 'alan', 'il', 'ilce', 'mahalle', 'mevkii', 'nitelik', 'pafta', 'parselNo', '.geo']) # Prepare the feature matrix (X) and the target vector (y) X = data_ml.drop(columns=['target_column']) # Replace 'target_column' with the actual target column name y = data_ml['target_column'] # Replace 'target_column' with the actual target column name # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Normalize the feature matrix scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Display the shape of the training and testing sets X_train_scaled.shape, X_test_scaled.shape, y_train.shape, y_test.shape