From 60bc95b4a50adcac50ac4a94d2f63a79c092649c Mon Sep 17 00:00:00 2001
From: blacknon <blacknon@orebibou.com>
Date: Mon, 28 Aug 2023 23:54:40 +0900
Subject: [PATCH] =?UTF-8?q?update.=20=E7=84=A1=E7=90=86=E3=82=84=E3=82=8A?=
 =?UTF-8?q?=E3=83=9A=E3=83=BC=E3=82=B8=E3=83=B3=E3=82=B0=E3=81=95=E3=81=9B?=
 =?UTF-8?q?=E3=82=8B=E6=96=B9=E5=BC=8F=E3=81=A7=E5=AE=9F=E8=A3=85.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pydork/engine_google.py | 51 ++++++++---------------------------------
 1 file changed, 10 insertions(+), 41 deletions(-)

diff --git a/pydork/engine_google.py b/pydork/engine_google.py
index 9e5e501..a4d12b4 100644
--- a/pydork/engine_google.py
+++ b/pydork/engine_google.py
@@ -10,7 +10,7 @@
     * Google用の検索用Classを持つモジュール.
 """
 
-import sys
+# import sys
 
 import json
 import os
@@ -19,7 +19,7 @@
 from json.decoder import JSONDecodeError
 from urllib import parse
 from lxml import etree
-from bs4 import BeautifulSoup
+# from bs4 import BeautifulSoup
 
 from .common import Color
 from .recaptcha import TwoCaptcha
@@ -79,9 +79,9 @@ def gen_search_url(self, keyword: str, type: str):
             url_param = {
                 'q': keyword,   # 検索キーワード
                 'oq': keyword,   # 検索キーワード
-                'num': '100',   # 1ページごとの表示件数.
-                'filter': '0',  # 類似ページのフィルタリング(0...無効, 1...有効)
-                'nfpr': '1'     # もしかして検索(Escape hatch)を無効化
+                'num': 100,   # 1ページごとの表示件数.
+                'filter': 0,  # 類似ページのフィルタリング(0...無効, 1...有効)
+                'nfpr': 1     # もしかして検索(Escape hatch)を無効化
             }
 
             # lang/localeが設定されている場合
@@ -106,17 +106,11 @@ def gen_search_url(self, keyword: str, type: str):
 
             page = 0
             while True:
-                if page == 0:
-                    # parameterにページを開始する番号を指定
-                    url_param['start'] = str(page * 100)
-                    params = parse.urlencode(url_param)
-
-                    target_url = search_url + '?' + params
+                # parameterにページを開始する番号を指定
+                url_param['start'] = str(page * 100)
+                params = parse.urlencode(url_param)
 
-                else:
-                    target_url = self.SEARCH_NEXT_URL
-                    if self.SEARCH_NEXT_URL is None:
-                        break
+                target_url = search_url + '?' + params
 
                 yield 'GET', target_url, None
                 page += 1
@@ -221,7 +215,7 @@ def get_links(self, url: str, html: str, type: str):
                 self.SOUP_SELECT_NEXT_URL = '.AaVjTc > tbody > tr > td > a'
 
             # TODO: SEARCH_NEXT_URLを書き換える
-            self.get_nextpage_url(html)
+            # self.get_nextpage_url(html)
 
             # CommonEngineの処理を呼び出す
             links = super().get_links(url, html, type)
@@ -307,31 +301,6 @@ def get_suggest_list(self, suggests: list, char: str, html: str):
 
         return suggests
 
-    def get_nextpage_url(self, html: str):
-        # BeautifulSoupでの解析を実施
-        soup = BeautifulSoup(html, 'lxml')
-
-        # BeautifulSoupでnext urlの要素を確認する
-        elements = soup.select(self.SOUP_SELECT_NEXT_URL)
-
-        print(elements, file=sys.stderr)
-        # next urlのリストを取得する
-        elinks = [e['href'] for e in elements]
-
-        if len(elinks) == 0:
-            self.SEARCH_NEXT_URL = None
-
-        elif len(elinks) == 1:
-            next_url = parse.urljoin(
-                self.ENGINE_TOP_URL, elinks[0])  # type: ignore
-            self.SEARCH_NEXT_URL = next_url
-
-        elif len(elinks) > 1:
-            # DEBUG: なんかおかしいのでhtml確認して対応
-            next_url = parse.urljoin(
-                self.ENGINE_TOP_URL, elinks[1])  # type: ignore
-            self.SEARCH_NEXT_URL = next_url
-
     def processings_elist(self, elinks, etitles, etexts: list):
         """processings_elist