import numpy as np
from PIL import Image
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
import matplotlib.font_manager as fm
# 한글 폰트 설정
font_path = './NanumGothic.ttf' # 폰트 경로를 본인 환경에 맞게 수정하세요.
# 이미지 마스크
alice_mask= np.array(Image.open('./cloud.png'))
# 웹 크롤링
url ="https://news.naver.com/section/105"
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
title_text = soup.find_all(class_='sa_text_strong')
concatStr = ''
for dsc_area_element in title_text:
concatStr += dsc_area_element.text.strip()
# 불용어 설정
stopword=set(STOPWORDS)
stopword.add('said')
# 워드 클라우드 생성
wc = WordCloud(font_path=font_path, background_color='white', max_words=2000, mask=alice_mask, stopwords=stopword)
wc = wc.generate(concatStr)
plt.figure(figsize=(8,8))
plt.imshow(alice_mask,cmap=plt.cm.gray, interpolation='bilinear')
plt.axis('off')
plt.show
# 결과 시각화
plt.figure(figsize=(12,12))
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.show()
<Result>
'Analysis' 카테고리의 다른 글
[데이터마이닝] Lab8-지도그리기 (4) | 2024.12.05 |
---|---|
[데이터마이닝] Lab7-aggregation group by (0) | 2024.12.05 |
[데이터마이닝] pandas-taxis 5lab (0) | 2024.12.05 |
[데이터마이닝] Scikit-learn Winedata Lab4-sample (0) | 2024.12.05 |
[데이터마이닝] matplotlib - random 3 lap 과제 (0) | 2024.12.05 |