반응형
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from datetime import datetime
import pandas as pd
import time
import re
driver = webdriver.Chrome('/Users/손학규/Downloads/chromedriver_win32 (1)/chromedriver')
def getCmt(url):
youtube_url = url
#YouTube 주소
driver.get(youtube_url)
body = driver.find_element_by_tag_name("body")
print('시작')
#기록
#(스크롤 내리기)
num_of_pagedowns = 7
datetime.today()
while num_of_pagedowns:
body.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
num_of_pagedowns -= 1
try:
driver.find_element_by_xpath('//*[@id="sort-menu"]').click()
#driver.find_element_by_xpath('//*[@id="menu"]/a[@tabindex="-1"]').click()
driver.find_element_by_xpath('//*[@id="menu"]/a[2]/paper-item/paper-item-body/div[text()="최근 날짜순"]').click()
except Exception as e:
pass
num_of_pagedowns = 20
while num_of_pagedowns:
body.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
num_of_pagedowns -= 1
html = driver.page_source
result = BeautifulSoup(html,'html.parser')
#print(html)
body = result.find("body")
#print(body)
title = body.find_all('yt-formatted-string', attrs={'class':'style-scope ytd-video-primary-info-renderer'})
title1=title[0].get_text()
print(title1)
thread=body.find_all('ytd-comment-renderer', attrs={'class':'style-scope ytd-comment-thread-renderer'})
cmtlist=[]
for items in thread:
#댓글 내용
div = items.find_all('yt-formatted-string', attrs={'id':'content-text'})
#기간(시점)
div2 = items.select('yt-formatted-string > a')[0].get_text()
for lists in div:
#print(lists)
if lists != None:
try:
cmt = lists.string
textcmt = re.sub(r'[^\w]',' ',cmt)
cmtlist.append([textcmt, div2])
print(textcmt)
except TypeError as e:
pass
else:
pass
#cmtlist.append([textcmt, div2])
print(div2)
print('-'*50)
#write_wb.cell(row = 1, column = len(cmtlist)).value=div2
print(len(cmtlist))
result = pd.DataFrame(cmtlist)
result.to_excel('log.xlsx', encoding='utf-8')
#for i in (urllist['0'])[7:]:
# print(i)
# url.append(i)
# getCmt(i)
url = '댓글을 추출할 url 입력'
getCmt(url)
print(url)
print('finished')
반응형
댓글