Python

연예인 사진을 다운로드 받으면서 동시에 얼굴만 오려내서 따로 저장하는 코드

EasyCoding 2021. 1. 22. 18:02
728x90
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import urllib.request
import os
import cv2
import sys



kwansang = {
  "BTS" : ["RM", "Jin", "Suga", "J-Hope", "Jimin", "V","Jungkook"],
  "Black Pink" : ["Jisoo", "Jennie", "Rosé", "Lisa"]
}

def face_crop(image_name):
    image = cv2.imread(image_name)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    faces = faceCascade.detectMultiScale(
    gray,
    scaleFactor=1.3,
    minNeighbors=3,
    minSize=(30, 30)
    )   
    print("[INFO] Found {0} Faces.".format(len(faces)))
    for (x, y, w, h) in faces:
        #cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        roi_color = image[y:y + h, x:x + w]
        cv2.imwrite(os.path.join('./faces/', image_name), roi_color)

   


def crawling(target_name):
    driver.get("https://www.google.co.kr/imghp?hl=ko&tab=wi&ogbl")
    elem = driver.find_element_by_name("q")
    elem.send_keys(target_name)
    elem.send_keys(Keys.RETURN)
    SCROLL_PAUSE_TIME = 3 #(Seconds) Increase this number if your network is slow
    NUMBER_OF_PICTURES = 50 #Increase this number if you want to get more pictures
    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    
    count = 0
    while count<NUMBER_OF_PICTURES:
    #while True:
        # Scroll down to bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait to load page
        time.sleep(SCROLL_PAUSE_TIME)
      

        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            try:
                driver.find_element_by_css_selector(".mye4qd").click()
            except:
                break
        last_height = new_height

        images = driver.find_elements_by_css_selector(".rg_i.Q4LuWd")

        

        for image in images:
            try:
                image.click()
                time.sleep(2)
                imgUrl = driver.find_element_by_xpath('/html/body/div[2]/c-wiz/div[3]/div[2]/div[3]/div/div/div[3]/div[2]/c-wiz/div[1]/div[1]/div/div[2]/a/img').get_attribute("src")
                urllib.request.urlretrieve(imgUrl, str(count) + ".jpg")
                face_crop(str(count) + ".jpg")
                count = count+1
                if count>=(NUMBER_OF_PICTURES+1):
                    break
            except:
                pass

    

driver = webdriver.Chrome()
for key in kwansang:
    # print(key)
    # new_dir = f"{key}"
    os.makedirs(key, exist_ok=True)
    os.chdir(key)
    for val in kwansang[key]:
        os.makedirs(val,exist_ok=True)
        os.chdir(val)
        os.makedirs('faces',exist_ok=True)
        crawling(val)
        os.chdir('..')
    os.chdir('..')
driver.close()