Since I'm having problems with R. I'm leaving the python code:# Import necessary libraries
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# Download NLTK data
nltk.download('stopwords')
nltk.download('wordnet')
# Sample data related to clean energy
data = {
'id': [1, 2, 3, 4, 5],
'contents': [
"Solar energy is one of the most promising renewable energy sources available today.",
"Wind power has the potential to provide a significant portion of the world's electricity needs.",
"Hydropower is a well-established clean energy source that harnesses the power of moving water.",
"Geothermal energy offers a sustainable and reliable energy source by tapping into the heat of the Earth's core.",
"Bioenergy involves the use of organic materials, such as plants and waste, to generate electricity and heat."
]
}
# Create DataFrame
df = pd.DataFrame(data)
# Data Cleaning
def clean_text(text):
text = re.sub(r'<[^>]+>', '', text) # Remove HTML tags
text = re.sub(r'\s+', ' ', text) # Remove extra spaces
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
text = text.lower() # Convert to lowercase
return text
df['cleaned'] = df['contents'].apply(clean_text)
# Text Preprocessing
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
def preprocess(text):
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
df['processed'] = df['cleaned'].apply(preprocess)
# Combine all processed text into one string for word cloud generation
text = ' '.join(df['processed'])
# Generate Word Cloud
wordcloud = WordCloud(width=800, height=400, background_color ='white').generate(text)
# Plot the Word Cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Clean Energy Texts')
plt.show()

