Post

네이버 jtbc 코멘트 크롤링하기 (selenium만 사용해서!)

뉴스 코멘트 크롤링 예시(jtbc)

바쁜 사람들을 위한 코드


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from selenium import webdriver
import time
import pandas as pd
from selenium.webdriver.common.by import By
import selenium

def scroll_down(driver):
        #scrollHeight = 창사이즈, 0에서부터 창사이즈까지 내림
        driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")#스크립트를 실행 
        time.sleep(0.5)

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
url = "https://n.news.naver.com/article/comment/437/0000366181"#크롤링 사이트 입력
driver.get(url)
driver.implicitly_wait(10)
scroll_down(driver=driver)
all_comment_count_str = driver.find_element(By.CLASS_NAME, 'u_cbox_count').text
print(all_comment_count_str)
all_comment_count = int(all_comment_count_str)

scroll_down_comment = all_comment_count // 20


for t in range(1, scroll_down_comment + 1):
        driver.find_element(By.CLASS_NAME, 'u_cbox_more_wrap').click()
        driver.implicitly_wait(10)
        scroll_down(driver=driver)
        print(t)
scroll_down(driver=driver)


date = driver.find_elements(By.CLASS_NAME, 'u_cbox_date')
comment = driver.find_elements(By.CLASS_NAME, 'u_cbox_contents')

date_text = []
comment_text = []
for da, co in zip(date, comment):
        date_text.append(da.text)
        comment_text.append(co.text)

print(date_text)
print(comment_text)

df = pd.DataFrame()
df['시간'] = date_text
df['코멘트'] = comment_text

df.to_csv("crawling_single_news_comments.txt", index = False ,sep = '\t')

time.sleep(5)

driver.quit()

코드 설명

라이브러리 가져오기

라이브러리
1
2
3
4
5
from selenium import webdriver
import time
import pandas as pd
from selenium.webdriver.common.by import By
import selenium


페이지 내리기

scrol down
1
2
3
4
def scroll_down(driver):
        #scrollHeight = 창사이즈, 0에서부터 창사이즈까지 내림
        driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")#스크립트를 실행 
        time.sleep(0.5)
This post is licensed under CC BY 4.0 by the author.