import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import tkinter as tk
from tkinter import filedialog, messagebox
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
# 엑셀 파일을 불러오는 함수
def load_file():
file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx *.xls")])
if not file_path:
return None
try:
df = pd.read_excel(file_path)
df.columns = df.columns.str.strip() # Trim any extra spaces in column names
return df
except Exception as e:
messagebox.showerror("Error", f"Failed to load or process the file.\nError: {str(e)}")
return None
# 데이터 전처리 및 모델 평가 함수
def process_data(df):
try:
# Check for required columns
required_columns = ['Year', 'Rainfall']
if not all(col in df.columns for col in required_columns):
messagebox.showerror("Error", f"DataFrame must contain the following columns: {', '.join(required_columns)}")
return None
# Null값 체크
if df.isnull().values.any():
messagebox.showwarning("Warning", "Dataset contains missing values. Filling them with mean values.")
df.fillna(df.mean(), inplace=True) # Fill NaN values with mean
df = df[required_columns] # Extract relevant columns
X = df[['Year']].values
y = df['Rainfall'].values
# 훈련용/테스트용 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 여러 모델 비교
models = {
'Ridge': Ridge(alpha=1.0), # Adjust alpha for Ridge regression
'Random Forest': RandomForestRegressor(n_estimators=100)
}
best_model = None
best_mse = float('inf')
for name, model in models.items():
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'{name} MSE: {mse}, R2: {r2}')
if mse < best_mse:
best_mse = mse
best_model = model
return best_model, X_test, y_test
except Exception as e:
messagebox.showerror("Error", f"Error processing data: {str(e)}")
return None
# 그래프 그리는 함수
def plot_graph(root, df, model, X_test, y_test):
try:
y_pred = model.predict(X_test)
# Create a Matplotlib figure
fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(df['Year'], df['Rainfall'], color='blue', label='Actual Rainfall')
ax.scatter(X_test, y_test, color='green', label='Test Data') # Separate the test data
# Fit a 9th degree polynomial to the data
coefficients = np.polyfit(df['Year'], df['Rainfall'], 9)
polynomial = np.poly1d(coefficients)
# Generate x values for the polynomial line
x_range = np.linspace(df['Year'].min(), df['Year'].max(), 100)
y_range = polynomial(x_range)
ax.plot(x_range, y_range, color='red', label='9th Degree Polynomial Fit')
ax.set_title('Rainfall Prediction Over the Years')
ax.set_xlabel('Year')
ax.set_ylabel('Rainfall (mm)')
ax.legend()
ax.grid(True)
# Embed the plot in Tkinter
canvas = FigureCanvasTkAgg(fig, master=root)
canvas.draw()
canvas.get_tk_widget().pack()
except Exception as e:
messagebox.showerror("Error", f"Error plotting graph: {str(e)}")
# GUI 프로그램
def main():
root = tk.Tk()
root.title("Rainfall Prediction")
# Variable to store the best model
best_model = None
def on_load():
nonlocal best_model # Allow access to the outer variable
df = load_file()
if df is not None:
result = process_data(df)
if result:
best_model, X_test, y_test = result
plot_graph(root, df, best_model, X_test, y_test) # Pass root to plot_graph
def predict_rainfall():
if best_model is not None:
try:
year = int(year_entry.get())
prediction = best_model.predict([[year]])
result_label.config(text=f"Predicted Rainfall for {year}: {prediction[0]:.2f} mm")
except ValueError:
messagebox.showerror("Error", "Please enter a valid year.")
load_button = tk.Button(root, text="Load Excel File", command=on_load)
load_button.pack(pady=20)
# Input for year prediction
year_entry = tk.Entry(root)
year_entry.pack(pady=10)
year_entry.insert(0, "Enter year (e.g., 2035)")
predict_button = tk.Button(root, text="Predict Rainfall", command=predict_rainfall)
predict_button.pack(pady=10)
# Label to display prediction results
result_label = tk.Label(root, text="")
result_label.pack(pady=10)
# Quit Button
quit_button = tk.Button(root, text="Quit", command=root.quit)
quit_button.pack(pady=20)
print("Starting the GUI...") # This will print to the console
root.mainloop()
if __name__ == "__main__":
main()
'정보통신기술(ICT)' 카테고리의 다른 글
시그모이드 함수 (0) | 2024.10.04 |
---|---|
로그 함수 (0) | 2024.10.02 |
날씨 소프트웨어 코딩 (1) | 2024.09.30 |
단위 계단 함수 (0) | 2024.09.27 |
가우시안 델타 함수 (0) | 2024.09.26 |