728x90
1. pip install selenium
2. download & copy "chromedriver.exe"
3. Run below code : Python kpop_crawling.py
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import urllib.request
import os
kpop_dict = {
"BTS" : ["RM", "Jin", "Suga", "J-Hope", "Jimin", "V","Jungkook"],
"Black Pink" : ["Jisoo", "Jennie", "Rosé", "Lisa"]
}
def crawling(target_name):
driver.get("https://www.google.co.kr/imghp?hl=ko&tab=wi&ogbl")
elem = driver.find_element_by_name("q")
elem.send_keys(target_name)
elem.send_keys(Keys.RETURN)
SCROLL_PAUSE_TIME = 3 #Increase this number if your network is slow
NUMBER_OF_PICTURES = 50 #Increase this number if you want to get more pictures
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
count = 0
while count<NUMBER_OF_PICTURES:
#while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
try:
driver.find_element_by_css_selector(".mye4qd").click()
except:
break
last_height = new_height
images = driver.find_elements_by_css_selector(".rg_i.Q4LuWd")
for image in images:
try:
image.click()
time.sleep(2)
imgUrl = driver.find_element_by_xpath('/html/body/div[2]/c-wiz/div[3]/div[2]/div[3]/div/div/div[3]/div[2]/c-wiz/div[1]/div[1]/div/div[2]/a/img').get_attribute("src")
# urllib.request.urlretrieve(imgUrl, os.path.join('./'+ target_name +'/', i + str(count) + ".jpg"))
urllib.request.urlretrieve(imgUrl, target_name + str(count) + ".jpg")
count = count+1
if count>=(NUMBER_OF_PICTURES+1):
break
except:
pass
driver = webdriver.Chrome()
for key in kpop_dict:
os.mkdir(key)
os.chdir(key)
for val in kpop_dict[key]:
os.mkdir(val)
os.chdir(val)
crawling(val)
os.chdir('..')
os.chdir('..')
driver.close()
'Python' 카테고리의 다른 글
Python 정리 노트 (0) | 2021.11.24 |
---|---|
연도별 디렉터리 만들어서 파일 옮기는 코드 (0) | 2021.02.08 |
연예인 사진을 다운로드 받으면서 동시에 얼굴만 오려내서 따로 저장하는 코드 (0) | 2021.01.22 |
python web server (0) | 2021.01.04 |