EXPERIMENT-8
Natural Language Processing Lab
Write a python program to convert audio file to text and text file to audio
files using NLTK Package.
Requirements
To run this program, you'll need to install the following packages:
pip install nltk SpeechRecognition gtts pydub
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
import speech_recognition as sr
from gtts import gTTS
import os
# Download NLTK data (only needed once)
nltk.download('punkt')
from pydub import AudioSegment
import os
def mp3_to_wav(mp3_file_path, wav_file_path=None):
"""
Convert MP3 file to WAV format using pydub.
Args:
mp3_file_path (str): Path to the input MP3 file
wav_file_path (str): Path to save the output WAV file (optional)
If not provided, replaces .mp3 with .wav
Returns:
str: Path to the created WAV file
"""
try:
# If output path not specified, create one by replacing extension
if wav_file_path is None:
wav_file_path = os.path.splitext(mp3_file_path)[0] + '.wav'
# Load MP3 file
audio = AudioSegment.from_mp3(mp3_file_path)
# Export as WAV
audio.export(wav_file_path, format="wav")
print(f"Successfully converted {mp3_file_path} to {wav_file_path}")
return wav_file_path
except Exception as e:
print(f"Error converting MP3 to WAV: {e}")
return None
# Example usage
if __name__ == "__main__":
input_mp3 = "input.mp3" # Change to your MP3 file path
output_wav = "output.wav" # Change to desired WAV file path
mp3_to_wav(input_mp3, output_wav)
def audio_to_text(mp3_file_path):
"""
Convert MP3 to WAV, then perform speech recognition and NLP processing
"""
try:
# First convert MP3 to WAV
wav_file = mp3_to_wav(mp3_file_path)
# Then do speech recognition
recognizer = sr.Recognizer()
with sr.AudioFile(wav_file) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
# NLP processing with NLTK
tokens = word_tokenize(text)
print("Recognized text tokens:", tokens)
return text
except Exception as e:
print(f"Error in MP3 to text conversion: {e}")
return None
def text_to_audio(text, output_file="output.mp3", language='en'):
"""
Convert text to speech and save as an audio file using gTTS.
"""
try:
# Tokenize text into sentences for better processing
sentences = sent_tokenize(text)
processed_text = ' '.join(sentences)
tts = gTTS(text=processed_text, lang=language, slow=False)
tts.save(output_file)
print(f"Audio file saved as {output_file}")
return output_file
except Exception as e:
print(f"Error in text-to-speech conversion: {e}")
return None
def text_file_to_audio(text_file_path, output_file="output.mp3", language='en'):
"""
Read text from a file and convert it to speech.
"""
try:
with open(text_file_path, 'r', encoding='utf-8') as file:
text = file.read()
return text_to_audio(text, output_file, language)
except Exception as e:
print(f"Error reading text file: {e}")
return None
def main():
print("Audio and Text Conversion Tool")
print("1. Audio file to Text")
print("2. Text file to Audio")
choice = input("Enter your choice (1 or 2): ")
if choice == '1':
audio_file = input("Enter audio file path (WAV, AIFF, FLAC): ")
text = audio_to_text(audio_file)
if text:
print("\nConverted Text:")
print(text)
# Save to file
save_choice = input("Save to text file? (y/n): ").lower()
if save_choice == 'y':
output_file = input("Enter output text file name (e.g., output.txt): ")
with open(output_file, 'w', encoding='utf-8') as f:
f.write(text)
print(f"Text saved to {output_file}")
elif choice == '2':
text_file = input("Enter text file path: ")
output_audio = input("Enter output audio file name (e.g., output.mp3): ")
result = text_file_to_audio(text_file, output_audio)
if result:
print(f"Successfully created audio file: {result}")
# Option to play the audio
play_choice = input("Play the audio file? (y/n): ").lower()
if play_choice == 'y':
os.system(f"start {result}" if os.name == 'nt' else f"xdg-open {result}")
else:
print("Invalid choice")
if __name__ == "__main__":
main()
Additionally, you'll need FFmpeg installed on your system:
FFmpeg Installation Guide for Windows
1. Download FFmpeg:
o Direct download link: https://www.gyan.dev/ffmpeg/builds/
o Choose: ffmpeg-release-essentials.zip (latest version)
o Alternative official source: https://ffmpeg.org/download.html
2. Install FFmpeg:
o Extract the ZIP file to a permanent location (e.g., C:\ffmpeg)
o Copy the path to the bin folder (e.g., C:\ffmpeg\bin)
3. Add FFmpeg to System PATH:
o Press Win + R, type sysdm.cpl, and press Enter
o Go to "Advanced" tab → "Environment Variables"
o Under "System variables", find and select "Path" → Click "Edit"
o Click "New" and paste your FFmpeg bin path (e.g., C:\ffmpeg\bin)
o Click "OK" on all windows to save
4. Verify Installation:
o Open Command Prompt (Win + R, type cmd)
o Run: ffmpeg -version
o You should see version information if installed correctly
Notes:
1. Audio to Text:
o Uses Google Speech Recognition API (free but requires internet)
o Works best with uncompressed WAV, AIFF, or FLAC files
o For other formats, you might need to convert them first
2. Text to Audio:
o Uses Google Text-to-Speech (gTTS) which requires internet
o Outputs as MP3 by default
o Includes NLTK sentence tokenization for better speech flow