@article {10.3844/jcssp.2025.2273.2291, article_type = {journal}, title = {Random Forest and Extreme Gradient Boosting with Bayesian Hyperparameter Optimization for Landslide Susceptibility Mapping in Penang Island, Malaysia}, author = {Atok, Dorothy Anak Martin and Chai, Soo See and Goh, Kok Luong and Gautam, Neha and Chin, Kim On}, volume = {21}, number = {10}, year = {2025}, month = {Dec}, pages = {2273-2291}, doi = {10.3844/jcssp.2025.2273.2291}, url = {https://thescipub.com/abstract/jcssp.2025.2273.2291}, abstract = {Landslide susceptibility models often face challenges of overfitting and overestimation. This research focuses on improving the predictive capabilities of the Extreme Gradient Boosting (XGBoost) and Random Forest (RF) algorithms by applying Bayesian Hyperparameter Optimization (BayesOpt). Penang Island, a region in Malaysia prone to frequent landslides, was chosen as the study area. Ten Landslide Conditioning Factors (LCFs), including elevation, slope angle, NDVI, and proximity to streams and roads, were derived using Geographic Information Systems (GIS). From the total of 886 landslide and non-landslide data points, a 70:30 split was employed for training and testing, respectively. BayesOpt-RF emerged as the top-performing model among all those assessed with an AUC of 99.50% (Success Rate) and 95.80% (Prediction Rate). RF (SR: 100.00%, PR: 95.60%), XGBoost (SR: 100.00%, PR: 95.20%), and BayesOpt-XGBoost (SR: 96.70%, PR: 93.00%) followed. While BayesOpt did not consistently improve prediction performance, it effectively minimized overfitting and ensured optimal model operation. For effective site selection, the generated landslide susceptibility maps are significant, infrastructure planning, and disaster mitigation.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }