@article {10.3844/jcssp.2026.1476.1483,
article_type = {journal},
title = {Vision Tune: A Deep Learning Framework for Sentiment Driven Video, Image and Music Creation},
author = {Kadu, Lukesh Rameshpant and Deshpande, Manoj and Pawar, Vijaykumar},
volume = {22},
number = {4},
year = {2026},
month = {Apr},
pages = {1476-1483},
doi = {10.3844/jcssp.2026.1476.1483},
url = {https://thescipub.com/abstract/jcssp.2026.1476.1483},
abstract = {Artificial intelligence has enabled powerful generative models for text, images, video, and music, yet most tools still operate independently without a unified, multi-modal workflow. This article proposes an integrated AI framework, Vision Tune, that consolidates these isolated capabilities into a single, sentiment-aware platform for end-to-end media creation. The system leverages deep learning and multi-scope AI models to automatically generate written content, images, videos, and music for both creative and analytical applications, while emphasizing scalability, modular design, and user-centric interaction. By supporting cross-domain media synthesis and sentiment-driven customization, the framework targets real-world use cases in marketing, education, entertainment, and content production, where coordinated multi-modal outputs can enhance engagement and productivity. Beyond unification, the work highlights how the proposed architecture advances current AI media pipelines by reducing tool fragmentation, enabling cross-modal consistency, and providing a foundation for future extensions such as real- time generation, personalization, and human AI collaborative creation.},
journal = {Journal of Computer Science},
publisher = {Science Publications}
}