update.

blacknon · Jul 17, 2023 · d95d53d · d95d53d
1 parent 561cec8
commit d95d53d
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 31 deletions.
diff --git a/pydork/__init__.py b/pydork/__init__.py
@@ -140,7 +140,7 @@ def main():
  },
  {
  "args": ["--delete-cookies"],
- "type": bool,
+ "action": "store_true",
  "help": messages.help_message_op_delete_cookies,
  },
  ]

diff --git a/pydork/engine.py b/pydork/engine.py
@@ -113,7 +113,7 @@ def set_is_debug(self, is_debug: bool):
  debug (bool): debug flag(Enable debug with `True`).
  """
 
- self.ENGINE.IS_DEBUG = is_debug
+ self.ENGINE.IS_DEBUG = is_debug # type: ignore
 
  # commandフラグ(コマンドモードでの実行)を有効化する関数
  def set_is_command(self, is_command: bool):
@@ -125,7 +125,7 @@ def set_is_command(self, is_command: bool):
  Args:
  is_command (bool): command flag(Enable command mode with `True`).
  """
- self.ENGINE.IS_COMMAND = is_command
+ self.ENGINE.IS_COMMAND = is_command # type: ignore
 
  # color出力が有効か否か
  def set_is_color(self, is_color: bool = False):
@@ -163,7 +163,7 @@ def set_disable_headless(self, disable_headless: bool):
 
  """
 
- self.ENGINE.IS_DISABLE_HEADLESS = disable_headless
+ self.ENGINE.IS_DISABLE_HEADLESS = disable_headless # type: ignore
 
  # cookieファイルを入れているディレクトリを渡して、使用するcookieファイルを取得する関数
  def set_cookie_files(self, cookie_dir: str):
@@ -177,8 +177,8 @@ def set_cookie_files(self, cookie_dir: str):
  """
 
  # フルパスに変換
- cookie_dir = pathlib.Path(cookie_dir).expanduser()
- cookie_dir = pathlib.Path(cookie_dir).resolve()
+ cookie_dir = pathlib.Path(cookie_dir).expanduser() # type: ignore
+ cookie_dir = pathlib.Path(cookie_dir).resolve() # type: ignore
 
  # 存在チェックをして、ディレクトリがない場合は新規作成
  if not os.path.exists(cookie_dir):
@@ -205,7 +205,7 @@ def set_cookie_files(self, cookie_dir: str):
  open(cookie_file, 'a').close()
 
  # ENGINEのself変数にセットする
- self.ENGINE.COOKIE_FILE = cookie_file
+ self.ENGINE.COOKIE_FILE = cookie_file # type: ignore
 
  # クエリ実行ごとにCookieを削除して作り直しさせるかを指定する関数
  def set_cookie_files_delete(self, is_delete_cookie: bool):
@@ -218,7 +218,7 @@ def set_cookie_files_delete(self, is_delete_cookie: bool):
  """
 
  # ENGINEのself変数にセットする
- self.ENGINE.COOKIE_FILE_DELETE = is_delete_cookie
+ self.ENGINE.COOKIE_FILE_DELETE = is_delete_cookie # type: ignore
 
  # 検索エンジンにわたす言語・国の設定を受け付ける
  def set_lang(self, lang: str = "ja", locale: str = "JP"):
@@ -257,7 +257,7 @@ def set_proxy(self, proxy: str):
  self.ENGINE.set_proxy(proxy)
 
  # seleniumを有効にする
- def set_selenium(self, uri: str = None, browser: str = None):
+ def set_selenium(self, uri: str = None, browser: str = None): # type: ignore
  """set_selenium
 
  Use Selenium (priority over Splash).
@@ -282,7 +282,7 @@ def set_splash(self, splash_url: str):
  self.ENGINE.set_splash(splash_url)
 
  # user_agentの設定値を受け付ける
- def set_user_agent(self, useragent: str = None):
+ def set_user_agent(self, useragent: str = None): # type: ignore
  """set_user_agent
 
  Specify the UserAgent.
@@ -304,7 +304,7 @@ def set_ignore_ssl(self, verify: bool):
  Args:
  verify (bool): bool.
  """
- self.ENGINE.set_ignore_ssl = verify
+ self.ENGINE.set_ignore_ssl = verify # type: ignore
 
  # 検索を行う
  def search(self, keyword: str, search_type='text', maximum=100):
@@ -384,7 +384,8 @@ def search(self, keyword: str, search_type='text', maximum=100):
  )
 
  # 検索結果の取得
- html = self.ENGINE.get_result(url, method=method, data=data)
+ html = self.ENGINE.get_result(
+ url, method=method, data=data) # type: ignore
 
  # debug
  self.ENGINE.MESSAGE.print_text(
@@ -395,6 +396,9 @@ def search(self, keyword: str, search_type='text', maximum=100):
  Color.GRAY + '[DEBUG]: [Response]' + Color.END
  )
 
+ # 初期値
+ is_recaptcha = False
+
  while True:
  # ReCaptchaページかどうかを識別
  if html is not None:
@@ -414,7 +418,8 @@ def search(self, keyword: str, search_type='text', maximum=100):
  # headless browserを使っている場合
  if self.ENGINE.USE_SELENIUM or self.ENGINE.USE_SPLASH:
  # byass用の関数にわたす
- html = self.ENGINE.bypass_recaptcha(url, html)
+ html = self.ENGINE.bypass_recaptcha(
+ url, html) # type: ignore
 
  if html is not None:
  # debug
@@ -447,7 +452,8 @@ def search(self, keyword: str, search_type='text', maximum=100):
 
  # TODO: resultも関数に渡して重複チェックを行わせる
  # 検索結果をパースしてurlリストを取得する
- links = self.ENGINE.get_links(url, html, search_type)
+ links = self.ENGINE.get_links(
+ url, html, search_type) # type: ignore
 
  # linksの件数に応じて処理を実施
  if not len(links):
@@ -461,7 +467,7 @@ def search(self, keyword: str, search_type='text', maximum=100):
 
  # loopを抜ける
  if self.ENGINE.NAME == "Google":
- if self.ENGINE.SEARCH_NEXT_URL is None:
+ if self.ENGINE.SEARCH_NEXT_URL is None: # type: ignore
  break
  else:
  break
@@ -548,7 +554,8 @@ def suggest(self, keyword: str, jap=False, alph=False, num=False):
  html = self.ENGINE.get_result(url)
 
  # TODO: 各エンジンでjson/textの変換処理を別途実装する必要がある
- suggests = self.ENGINE.get_suggest_list(suggests, char, html)
+ suggests = self.ENGINE.get_suggest_list(
+ suggests, char, html) # type: ignore
 
  sleep(0.5)
 

diff --git a/pydork/engine_common.py b/pydork/engine_common.py
@@ -98,7 +98,7 @@ def set_range(self, start: datetime, end: datetime):
  self.RANGE_END = end
 
  # user_agentの設定値を受け付ける(引数がない場合はランダム。Seleniumの際は自動的に使用したbrowserのagentを指定)
- def set_user_agent(self, user_agent: str = None, browser: str = None):
+ def set_user_agent(self, user_agent: str = None, browser: str = None): # type: ignore
  """set_user_agent
 
  user_agentの値を受け付ける.
@@ -138,7 +138,7 @@ def set_user_agent(self, user_agent: str = None, browser: str = None):
  # - splashより優先
  # - host, browserは、指定がない場合はそれぞれデフォルト設定(hostは指定なし、browserはchrome)での動作
  # - browserは `chrome` or `firefox` のみ受け付ける
- def set_selenium(self, uri: str = None, browser: str = None):
+ def set_selenium(self, uri: str = None, browser: str = None): # type: ignore
  """set_selenium
 
  検索時にSelenium経由で通信を行う.
@@ -200,6 +200,13 @@ def read_cookies(self):
  現時点ではSeleniumでのみ動作.
  """
 
+ # cookieファイルが存在しない場合、空ファイルで作成する
+ exist_cookie_file = os.path.isfile(self.COOKIE_FILE)
+ if not exist_cookie_file:
+ cookie_file = open(self.COOKIE_FILE, 'w')
+ cookie_file.write('')
+ cookie_file.close()
+
  # cookieファイルのサイズを取得
  file_size = os.path.getsize(self.COOKIE_FILE)
 
@@ -211,7 +218,7 @@ def read_cookies(self):
  # seleniumを使う場合
  if self.USE_SELENIUM:
  # 事前アクセスが必要になるため、検索対象ドメインのTOPページにアクセスしておく
- self.driver.get(self.ENGINE_TOP_URL)
+ self.driver.get(self.ENGINE_TOP_URL) # type: ignore
 
  # cookieを設定していく
  for cookie in cookies:
@@ -397,7 +404,7 @@ def request_selenium(self, url: str, method='GET', data=None):
  EC.presence_of_all_elements_located)
 
  # wait 5 seconds(wait DOM)
- if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'):
+ if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'): # type: ignore
  self.driver.implicitly_wait(20)
 
  # get result
@@ -411,7 +418,7 @@ def request_selenium(self, url: str, method='GET', data=None):
  EC.presence_of_all_elements_located)
 
  # wait 5 seconds(wait DOM)
- if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'):
+ if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'): # type: ignore
  self.driver.implicitly_wait(20)
 
  # get result
@@ -452,7 +459,7 @@ def request_splash(self, url: str, method='GET', data=None):
 
  # NOTE: Googleの画像検索のPOSTがSplashではレンダリングできないので、特例対応でrequestsを使用する.
  # TODO: Splashでもレンダリングできるようになったら書き換える.
- elif method == 'POST' and self.NAME == 'Google' and self.IMAGE_URL in url:
+ elif method == 'POST' and self.NAME == 'Google' and self.IMAGE_URL in url: # type: ignore
  # create session
  session = requests.session()
 
@@ -478,7 +485,7 @@ def request_splash(self, url: str, method='GET', data=None):
  elif method == 'POST':
  headers = {'Content-Type': 'application/json'}
  params['http_method'] = 'POST'
- params['body'] = parse.urlencode(data)
+ params['body'] = parse.urlencode(data) # type: ignore
 
  result = self.session.post(
  splash_url,
@@ -631,7 +638,7 @@ def get_links(self, source_url, html: str, type: str):
 
  # before processing elists
  self.MESSAGE.print_text(
- ','.join(elinks),
+ ','.join(elinks), # type: ignore
  header=self.MESSAGE.HEADER + ': ' + Color.BLUE +
  '[BeforeProcessing elinks]' + Color.END,
  separator=" :",
@@ -653,7 +660,7 @@ def get_links(self, source_url, html: str, type: str):
 
  # after processing elists
  self.MESSAGE.print_text(
- ','.join(elinks),
+ ','.join(elinks), # type: ignore
  header=self.MESSAGE.HEADER + ': ' +
  Color.GREEN + '[AfterProcessing elinks]' + Color.END,
  separator=" :",

diff --git a/pydork/engine_google.py b/pydork/engine_google.py
@@ -304,7 +304,7 @@ def get_suggest_list(self, suggests: list, char: str, html: str):
  sug_data = sug_root.xpath("//suggestion")
  data = [s.get("data") for s in sug_data]
 
- suggests[char if char == '' else char[-1]] = data
+ suggests[char if char == '' else char[-1]] = data # type: ignore
 
  return suggests
 
@@ -322,11 +322,13 @@ def get_nextpage_url(self, html: str):
  self.SEARCH_NEXT_URL = None
 
  elif len(elinks) == 1:
- next_url = parse.urljoin(self.ENGINE_TOP_URL, elinks[0])
+ next_url = parse.urljoin(
+ self.ENGINE_TOP_URL, elinks[0]) # type: ignore
  self.SEARCH_NEXT_URL = next_url
 
  elif len(elinks) > 1:
- next_url = parse.urljoin(self.ENGINE_TOP_URL, elinks[1])
+ next_url = parse.urljoin(
+ self.ENGINE_TOP_URL, elinks[1]) # type: ignore
  self.SEARCH_NEXT_URL = next_url
 
  def processings_elist(self, elinks, etitles, etexts: list):

diff --git a/pydork/engine_yahoo.py b/pydork/engine_yahoo.py
@@ -188,10 +188,11 @@ def get_links(self, url: str, html: str, type: str):
  if self.IS_DEBUG:
  print(Color.PURPLE + '[JsonElement]' + Color.END,
  file=sys.stderr)
- print(Color.PURPLE + element + Color.END, file=sys.stderr)
+ print(Color.PURPLE + element + Color.END,
+ file=sys.stderr) # type: ignore
 
  # jsonからデータを抽出　
- j = json.loads(element)
+ j = json.loads(element) # type: ignore
 
  # debug
  if self.IS_DEBUG:
@@ -272,7 +273,7 @@ def get_suggest_list(self, suggests: list, char: str, html: str):
  soup = BeautifulSoup(html, features="lxml")
  html = soup.find("pre").text
  data = json.loads(html)
- suggests[char if char == '' else char[-1]] = [e['key']
+ suggests[char if char == '' else char[-1]] = [e['key'] # type: ignore
  for e in data['gossip']['results']]
 
  return suggests