정보통신기술(ICT)

부산의 연간 강수량 예측(1904년 ~ 2023년)

해머슴 2024. 10. 1. 12:31

 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import tkinter as tk
from tkinter import filedialog, messagebox
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

# 엑셀 파일을 불러오는 함수
def load_file():
    file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx *.xls")])
    if not file_path:
        return None
    try:
        df = pd.read_excel(file_path)
        df.columns = df.columns.str.strip()  # Trim any extra spaces in column names
        return df
    except Exception as e:
        messagebox.showerror("Error", f"Failed to load or process the file.\nError: {str(e)}")
        return None

# 데이터 전처리 및 모델 평가 함수
def process_data(df):
    try:
        # Check for required columns
        required_columns = ['Year', 'Rainfall']
        if not all(col in df.columns for col in required_columns):
            messagebox.showerror("Error", f"DataFrame must contain the following columns: {', '.join(required_columns)}")
            return None
       
        # Null값 체크
        if df.isnull().values.any():
            messagebox.showwarning("Warning", "Dataset contains missing values. Filling them with mean values.")
            df.fillna(df.mean(), inplace=True)  # Fill NaN values with mean
       
        df = df[required_columns]  # Extract relevant columns
        X = df[['Year']].values
        y = df['Rainfall'].values
       
        # 훈련용/테스트용 데이터 분리
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
       
        # 여러 모델 비교
        models = {
            'Ridge': Ridge(alpha=1.0),  # Adjust alpha for Ridge regression
            'Random Forest': RandomForestRegressor(n_estimators=100)
        }
       
        best_model = None
        best_mse = float('inf')
       
        for name, model in models.items():
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            print(f'{name} MSE: {mse}, R2: {r2}')
           
            if mse < best_mse:
                best_mse = mse
                best_model = model
       
        return best_model, X_test, y_test
    except Exception as e:
        messagebox.showerror("Error", f"Error processing data: {str(e)}")
        return None

# 그래프 그리는 함수
def plot_graph(root, df, model, X_test, y_test):
    try:
        y_pred = model.predict(X_test)

        # Create a Matplotlib figure
        fig, ax = plt.subplots(figsize=(8, 6))
        ax.scatter(df['Year'], df['Rainfall'], color='blue', label='Actual Rainfall')
        ax.scatter(X_test, y_test, color='green', label='Test Data')  # Separate the test data

        # Fit a 9th degree polynomial to the data
        coefficients = np.polyfit(df['Year'], df['Rainfall'], 9)
        polynomial = np.poly1d(coefficients)
       
        # Generate x values for the polynomial line
        x_range = np.linspace(df['Year'].min(), df['Year'].max(), 100)
        y_range = polynomial(x_range)

        ax.plot(x_range, y_range, color='red', label='9th Degree Polynomial Fit')
        ax.set_title('Rainfall Prediction Over the Years')
        ax.set_xlabel('Year')
        ax.set_ylabel('Rainfall (mm)')
        ax.legend()
        ax.grid(True)

        # Embed the plot in Tkinter
        canvas = FigureCanvasTkAgg(fig, master=root)
        canvas.draw()
        canvas.get_tk_widget().pack()
    except Exception as e:
        messagebox.showerror("Error", f"Error plotting graph: {str(e)}")

# GUI 프로그램
def main():
    root = tk.Tk()
    root.title("Rainfall Prediction")
   
    # Variable to store the best model
    best_model = None

    def on_load():
        nonlocal best_model  # Allow access to the outer variable
        df = load_file()
        if df is not None:
            result = process_data(df)
            if result:
                best_model, X_test, y_test = result
                plot_graph(root, df, best_model, X_test, y_test)  # Pass root to plot_graph

    def predict_rainfall():
        if best_model is not None:
            try:
                year = int(year_entry.get())
                prediction = best_model.predict([[year]])
                result_label.config(text=f"Predicted Rainfall for {year}: {prediction[0]:.2f} mm")
            except ValueError:
                messagebox.showerror("Error", "Please enter a valid year.")

    load_button = tk.Button(root, text="Load Excel File", command=on_load)
    load_button.pack(pady=20)

    # Input for year prediction
    year_entry = tk.Entry(root)
    year_entry.pack(pady=10)
    year_entry.insert(0, "Enter year (e.g., 2035)")

    predict_button = tk.Button(root, text="Predict Rainfall", command=predict_rainfall)
    predict_button.pack(pady=10)

    # Label to display prediction results
    result_label = tk.Label(root, text="")
    result_label.pack(pady=10)

    # Quit Button
    quit_button = tk.Button(root, text="Quit", command=root.quit)
    quit_button.pack(pady=20)

    print("Starting the GUI...")  # This will print to the console
    root.mainloop()

if __name__ == "__main__":
    main()

 

 

'정보통신기술(ICT)' 카테고리의 다른 글

시그모이드 함수  (0) 2024.10.04
로그 함수  (0) 2024.10.02
날씨 소프트웨어 코딩  (1) 2024.09.30
단위 계단 함수  (0) 2024.09.27
가우시안 델타 함수  (0) 2024.09.26