Collectives™ on Stack Overflow

Find centralized, trusted content and collaborate around the technologies you use most.

Learn more about Collectives

Teams

Q&A for work

Connect and share knowledge within a single location that is structured and easy to search.

Learn more about Teams

Selenium MaxRetryError(_pool, url, error or ResponseError(cause)) urllib3.exceptions.MaxRetryError

Ask Question chrome_driver = Service(ChromeDriverManager().install()) options = Options() options.add_experimental_option('detach',True) options.add_experimental_option('excludeSwitches',['enable-logging']) # options.add_argument('--headless') # options.add_argument('--window-size = x, y') # options.add_argument('--start-maximazed') # options.add_argument('--start-fullscreen') # options.add_argument('--mute-audio') self.driver = webdriver.Chrome(options=options,service=chrome_driver) self.now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S (%a)') self.hour = datetime.datetime.now().strftime('%H시_%M_분') self.today = date.today() self.folder = None self.today_file = None self.kakao_talk = kakao() def connect(self): url = 'http://minishop.gmarket.co.kr/meritblog' # url = 'http://minishop.gmarket.co.kr/hanvitis' self.driver.get(url) return url def shopping_mall(self): mall_name = self.driver.find_element(By.CSS_SELECTOR,'a.shop_title_ui_txt').text self.folder = f'./메리트몰_데이터베이스/지마켓' self.today_file = f'{self.today}_{mall_name}_지마켓.json' return mall_name def soup(self,url_param): response = requests.get(url_param) if response.status_code == 200: sp = BeautifulSoup(response.text, 'html.parser') return sp except requests.packages.urllib3.exceptions.MaxRetryError as e: print(str(e)) def total_product(self): total_items = 0 products = self.driver.find_element(By.ID,'ulCategory').find_elements(By.CSS_SELECTOR,'span.data_num') for product in products: items = int(product.text.replace('(',"").replace(')',"")) total_items += items # 391개 return total_items def paging(self,total_items,url): page_list = [] # 전체상품보기 클릭 self.driver.execute_script('arguments[0].click();',self.driver.find_element(By.CSS_SELECTOR,'.allList_view > a')) time.sleep(2) # 한 페이지의 상품 수 view_limit = int(self.driver.find_element(By.CSS_SELECTOR,'div.limit').text.replace("개씩","")) # 페이지 수 구하기 if total_items % view_limit == 0: page = total_items // view_limit else: page = total_items // view_limit + 1 # 페이지 리스트 for cnt in range(page): page_url = f'{url}/List?CategoryType=General&SortType=FocusRank&DisplayType=SmallImage&Page={cnt+1}&PageSize=60' page_list.append(page_url) # self.driver.quit() return page_list def data_one(self,page_list): """상품 url 리스트 정상가/할인가/할인율 딕셔너리""" url_list = [] price_dic = {} for page in page_list: html = self.soup(page) for items in html.find('ul',class_='type2').find_all('li'): # url item_url = items.find('a')['href'] # 상품코드 item_code = item_url[-10:] # 가격 및 할인율 if items.find('p',class_='prd_price').find('span',class_='del_important'): original_price = items.find('p',class_='prd_price').find('span',class_='del_important').text.replace("원","") discount_price = items.find('p',class_='prd_price').find('strong').text.replace("원","") sale_rate = items.find('p',class_='prd_price').find('span',class_='splt_ico usr_clr').text else: original_price = items.find('p',class_='prd_price').find('strong').text.replace("원","") discount_price = "없음" sale_rate = "없음" url_list.append(item_url) price_dic[item_code]={"정상가":original_price,"할인가":discount_price,"할인율":sale_rate} time.sleep(randint(1,10)) self.driver.quit() return url_list , price_dic def check_start(self): url = self.connect() mall_name = self.shopping_mall() total_items = self.total_product() page_list = self.paging(total_items,url) url_list,price_dic = self.data_one(page_list) if __name__ == "__main__": g_market = gmarket_sales() # g_market.check_start() schedule.every().hour.at(":20").do(g_market.check_start) while True: schedule.run_pending() time.sleep(1)

Hello, I am a student practicing web page crawling. I'm making a code that scrapes data by parsing a website with selenium.

I wrote the program so that it runs at regular intervals using the final schedule module. However, if chrome_driver is initialized in the init of the class and the driver.quit() command is put in the execution process, the MaxRetryError(_pool, url, error or ResponseError(cause)) urllib3.exceptions.MaxRetryError code is displayed when the second code is executed..

Below is the code I wrote. I would really appreciate it if you could point out any problems.

Thanks for contributing an answer to Stack Overflow!

  • Please be sure to answer the question . Provide details and share your research!

But avoid

  • Asking for help, clarification, or responding to other answers.
  • Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers .