@article {10.3844/jcssp.2026.1476.1483, article_type = {journal}, title = {Vision Tune: A Deep Learning Framework for Sentiment Driven Video, Image and Music Creation}, author = {Kadu, Lukesh Rameshpant and Deshpande, Manoj and Pawar, Vijaykumar}, volume = {22}, number = {4}, year = {2026}, month = {Apr}, pages = {1476-1483}, doi = {10.3844/jcssp.2026.1476.1483}, url = {https://thescipub.com/abstract/jcssp.2026.1476.1483}, abstract = {Artificial intelligence has enabled powerful generative models for text, images, video, and music, yet most tools still operate independently without a unified, multi-modal workflow. This article proposes an integrated AI framework, Vision Tune, that consolidates these isolated capabilities into a single, sentiment-aware platform for end-to-end media creation. The system leverages deep learning and multi-scope AI models to automatically generate written content, images, videos, and music for both creative and analytical applications, while emphasizing scalability, modular design, and user-centric interaction. By supporting cross-domain media synthesis and sentiment-driven customization, the framework targets real-world use cases in marketing, education, entertainment, and content production, where coordinated multi-modal outputs can enhance engagement and productivity. Beyond unification, the work highlights how the proposed architecture advances current AI media pipelines by reducing tool fragmentation, enabling cross-modal consistency, and providing a foundation for future extensions such as real- time generation, personalization, and human AI collaborative creation.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }