728x90
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import urllib.request
import os
import cv2
import sys
kwansang = {
"BTS" : ["RM", "Jin", "Suga", "J-Hope", "Jimin", "V","Jungkook"],
"Black Pink" : ["Jisoo", "Jennie", "Rosé", "Lisa"]
}
def face_crop(image_name):
image = cv2.imread(image_name)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.3,
minNeighbors=3,
minSize=(30, 30)
)
print("[INFO] Found {0} Faces.".format(len(faces)))
for (x, y, w, h) in faces:
#cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
roi_color = image[y:y + h, x:x + w]
cv2.imwrite(os.path.join('./faces/', image_name), roi_color)
def crawling(target_name):
driver.get("https://www.google.co.kr/imghp?hl=ko&tab=wi&ogbl")
elem = driver.find_element_by_name("q")
elem.send_keys(target_name)
elem.send_keys(Keys.RETURN)
SCROLL_PAUSE_TIME = 3 #(Seconds) Increase this number if your network is slow
NUMBER_OF_PICTURES = 50 #Increase this number if you want to get more pictures
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
count = 0
while count<NUMBER_OF_PICTURES:
#while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
try:
driver.find_element_by_css_selector(".mye4qd").click()
except:
break
last_height = new_height
images = driver.find_elements_by_css_selector(".rg_i.Q4LuWd")
for image in images:
try:
image.click()
time.sleep(2)
imgUrl = driver.find_element_by_xpath('/html/body/div[2]/c-wiz/div[3]/div[2]/div[3]/div/div/div[3]/div[2]/c-wiz/div[1]/div[1]/div/div[2]/a/img').get_attribute("src")
urllib.request.urlretrieve(imgUrl, str(count) + ".jpg")
face_crop(str(count) + ".jpg")
count = count+1
if count>=(NUMBER_OF_PICTURES+1):
break
except:
pass
driver = webdriver.Chrome()
for key in kwansang:
# print(key)
# new_dir = f"{key}"
os.makedirs(key, exist_ok=True)
os.chdir(key)
for val in kwansang[key]:
os.makedirs(val,exist_ok=True)
os.chdir(val)
os.makedirs('faces',exist_ok=True)
crawling(val)
os.chdir('..')
os.chdir('..')
driver.close()
'Python' 카테고리의 다른 글
Python 정리 노트 (0) | 2021.11.24 |
---|---|
연도별 디렉터리 만들어서 파일 옮기는 코드 (0) | 2021.02.08 |
python web server (0) | 2021.01.04 |
방탄소년단, 블랙핑크 맴버별 사진 다운로드 프로그램 => Crawling BTS & Black Pink member's pictures (with Python, Selenium) (0) | 2021.01.04 |