diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.gitignore b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.pre-commit-config.yaml b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.pre-commit-config.yaml new file mode 100644 index 0000000000000..9b472131a0663 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + language_version: python3 diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/BUILD b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/BUILD new file mode 100644 index 0000000000000..0896ca890d8bf --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/BUILD @@ -0,0 +1,3 @@ +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/Makefile b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/Makefile new file mode 100644 index 0000000000000..7da024542a5ac --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files ||true + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/README.md b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/README.md new file mode 100644 index 0000000000000..b47374277229a --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/README.md @@ -0,0 +1,44 @@ +# LlamaIndex Readers Integration: Youtube-Metadata + +```bash +pip install llama_index.readers.youtube_metadata +``` + +This loader fetches the metadata of Youtube videos using the Google APIs. (https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={videos_string}&key={api_key}). You must have a Google API key to use. + +Transcripts of the text transcript of Youtube videos is fetched using the `youtube_transcript_api` Python package. + +## Usage + +Simply pass an array of YouTube Video_ID into `load_data`. + +```python +from llama_index.readers.youtube_metadata import YoutubeMetaData + +api_key = "Axxxxx" # youtube API Key + +video_ids = ["S_0hBL4ILAg", "a2skIq6hFiY"] + +youtube_meta = YoutubeMetaData(api_key) +details = youtube_meta.load_data(video_ids) +``` + +This can be combined with the YoutubeTranscriptReader to provide more information for RAG AI inquiries. + +```python +from llama_index.readers.youtube_transcript import YoutubeTranscriptReader +from llama_index.readers.youtube_metadata import YoutubeMetaData + +video_ids = ["S_0hBL4ILAg", "a2skIq6hFiY"] # Example video IDs +yt_metadata = YouTubeMetaData(api_key=api_key) +print("Testing YouTubeMetaData...") +print(yt_metadata.load_data(video_ids)) + +yt_meta_transcript = YouTubeMetaDataAndTranscript(api_key=api_key) +print("Testing YouTubeMetaDataAndTranscript...") +print(yt_meta_transcript.load_data(video_ids)) +``` + +The Video_id for youtube videos is right in the URL. In this URL: https://www.youtube.com/watch?v=a2skIq6hFiY&t=60s + +The video_Id is 'a2skIq6hFiY&t'. diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/BUILD b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/__init__.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/__init__.py new file mode 100644 index 0000000000000..258d374441600 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/__init__.py @@ -0,0 +1,6 @@ +from llama_index.readers.youtube_metadata.base import ( + YouTubeMetaData, + YouTubeMetaDataAndTranscript, +) + +__all__ = ["YouTubeMetaData", "YouTubeMetaDataAndTranscript"] diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/base.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/base.py new file mode 100644 index 0000000000000..91167b0b27666 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/base.py @@ -0,0 +1,66 @@ +# YoutubeMetaData.py +# Class to return Youtube Meta data for a video ID +import requests +from pydantic import Field +from typing import Any, List, Dict +from youtube_transcript_api import YouTubeTranscriptApi +from llama_index.core.readers.base import BasePydanticReader + + +class YouTubeMetaData(BasePydanticReader): + api_key: str + + def load_data(self, video_ids): + details = {} + + def chunks(lst, n): + """Yield successive n-sized chunks from lst.""" + for i in range(0, len(lst), n): + yield lst[i : i + n] + + video_id_chunks = list(chunks(video_ids, 20)) + for chunk in video_id_chunks: + videos_string = ",".join(chunk) + url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={videos_string}&key={self.api_key}" + response = requests.get(url).json() + if "items" not in response: + print("Error in API response:", response) + continue + + for item in response["items"]: + video_id = item["id"] + details[video_id] = { + "title": item["snippet"]["title"], + "description": item["snippet"]["description"], + "publishDate": item["snippet"]["publishedAt"], + "statistics": item["statistics"], + "tags": item["snippet"].get("tags", []), + "url": f"https://www.youtube.com/watch?v={video_id}", + } + + return details + + +class YouTubeMetaDataAndTranscript(BasePydanticReader): + api_key: str = Field(..., description="API key for YouTube data access") + metadata_loader: YouTubeMetaData = None # Don't instantiate here + transcript_loader: Any = YouTubeTranscriptApi # Assume this is a simple callable + + def initialize_loaders(self): + if not self.metadata_loader: + self.metadata_loader = YouTubeMetaData(api_key=self.api_key) + + def load_data(self, video_ids: List[str]) -> Dict[str, Any]: + self.initialize_loaders() # Make sure loaders are initialized + all_details = {} + for video_id in video_ids: + metadata = self.metadata_loader.load_data([video_id]) + try: + transcripts = self.transcript_loader.get_transcript(video_id) + except Exception as e: + transcripts = str(e) + all_details[video_id] = { + "metadata": metadata.get(video_id, {}), + "transcript": transcripts, + } + return all_details diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/pants b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/pants new file mode 100644 index 0000000000000..59f720d702ef6 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/pants @@ -0,0 +1,510 @@ +#!/usr/bin/env bash +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +# =============================== NOTE =============================== +# This ./pants bootstrap script comes from the pantsbuild/setup +# project. It is intended to be checked into your code repository so +# that other developers have the same setup. +# +# Learn more here: https://www.pantsbuild.org/docs/installation +# ==================================================================== + +set -eou pipefail + +# an arbitrary number: bump when there's a change that someone might want to query for +# (e.g. checking $(PANTS_BOOTSTRAP_TOOLS=1 ./pants version) >= ...) +SCRIPT_VERSION=1 + +# Source any custom bootstrap settings for Pants from PANTS_BOOTSTRAP if it exists. +: ${PANTS_BOOTSTRAP:=".pants.bootstrap"} +if [[ -f "${PANTS_BOOTSTRAP}" ]]; then + source "${PANTS_BOOTSTRAP}" +fi + +# NOTE: To use an unreleased version of Pants from the pantsbuild/pants main branch, +# locate the main branch SHA, set PANTS_SHA= in the environment, and run this script as usual. +# +# E.g., PANTS_SHA=725fdaf504237190f6787dda3d72c39010a4c574 ./pants --version +# +# You can also use PANTS_VERSION= to override the config version that is in the pants.toml file. +# +# E.g., PANTS_VERSION=2.13.0 ./pants --version + +PYTHON_BIN_NAME="${PYTHON:-unspecified}" + +# Set this to specify a non-standard location for this script to read the Pants version from. +# NB: This will *not* cause Pants itself to use this location as a config file. +# You can use PANTS_CONFIG_FILES or --pants-config-files to do so. +PANTS_TOML=${PANTS_TOML:-pants.toml} + +PANTS_BIN_NAME="${PANTS_BIN_NAME:-$0}" + +PANTS_SETUP_CACHE="${PANTS_SETUP_CACHE:-${XDG_CACHE_HOME:-$HOME/.cache}/pants/setup}" +# If given a relative path, we fix it to be absolute. +if [[ "$PANTS_SETUP_CACHE" != /* ]]; then + PANTS_SETUP_CACHE="${PWD}/${PANTS_SETUP_CACHE}" +fi + +PANTS_BOOTSTRAP="${PANTS_SETUP_CACHE}/bootstrap-$(uname -s)-$(uname -m)" + +_PEX_VERSION=2.1.103 +_PEX_URL="https://github.com/pantsbuild/pex/releases/download/v${_PEX_VERSION}/pex" +_PEX_EXPECTED_SHA256="4d45336511484100ae4e2bab24542a8b86b12c8cb89230463593c60d08c4b8d3" + +VIRTUALENV_VERSION=20.4.7 +VIRTUALENV_REQUIREMENTS=$( +cat << EOF +virtualenv==${VIRTUALENV_VERSION} --hash sha256:2b0126166ea7c9c3661f5b8e06773d28f83322de7a3ff7d06f0aed18c9de6a76 +filelock==3.0.12 --hash sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836 +six==1.16.0 --hash sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 +distlib==0.3.2 --hash sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c +appdirs==1.4.4 --hash sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128 +importlib-resources==5.1.4; python_version < "3.7" --hash sha256:e962bff7440364183203d179d7ae9ad90cb1f2b74dcb84300e88ecc42dca3351 +importlib-metadata==4.5.0; python_version < "3.8" --hash sha256:833b26fb89d5de469b24a390e9df088d4e52e4ba33b01dc5e0e4f41b81a16c00 +zipp==3.4.1; python_version < "3.10" --hash sha256:51cb66cc54621609dd593d1787f286ee42a5c0adbb4b29abea5a63edc3e03098 +typing-extensions==3.10.0.0; python_version < "3.8" --hash sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84 +EOF +) + +COLOR_RED="\x1b[31m" +COLOR_GREEN="\x1b[32m" +COLOR_YELLOW="\x1b[33m" +COLOR_RESET="\x1b[0m" + +INSTALL_URL="https://www.pantsbuild.org/docs/installation" + +function log() { + echo -e "$@" 1>&2 +} + +function die() { + (($# > 0)) && log "${COLOR_RED}$*${COLOR_RESET}" + exit 1 +} + +function green() { + (($# > 0)) && log "${COLOR_GREEN}$*${COLOR_RESET}" +} + +function warn() { + (($# > 0)) && log "${COLOR_YELLOW}$*${COLOR_RESET}" +} + +function tempdir { + mkdir -p "$1" + mktemp -d "$1"/pants.XXXXXX +} + +function get_exe_path_or_die { + local exe="$1" + if ! command -v "${exe}"; then + die "Could not find ${exe}. Please ensure ${exe} is on your PATH." + fi +} + +function get_pants_config_string_value { + local config_key="$1" + local optional_space="[[:space:]]*" + local prefix="^${config_key}${optional_space}=${optional_space}" + local raw_value + raw_value="$(sed -ne "/${prefix}/ s|${prefix}||p" "${PANTS_TOML}")" + local optional_suffix="${optional_space}(#.*)?$" + echo "${raw_value}" \ + | sed -E \ + -e "s|^'([^']*)'${optional_suffix}|\1|" \ + -e 's|^"([^"]*)"'"${optional_suffix}"'$|\1|' \ + && return 0 + return 0 +} + +function get_python_major_minor_version { + local python_exe="$1" + "$python_exe" </dev/null 2>&1; then + continue + fi + if [[ -n "$(check_python_exe_compatible_version "${interpreter_path}")" ]]; then + echo "${interpreter_path}" && return 0 + fi + done +} + +function determine_python_exe { + local pants_version="$1" + set_supported_python_versions "${pants_version}" + local requirement_str="For \`pants_version = \"${pants_version}\"\`, Pants requires Python ${supported_message} to run." + + local python_exe + if [[ "${PYTHON_BIN_NAME}" != 'unspecified' ]]; then + python_exe="$(get_exe_path_or_die "${PYTHON_BIN_NAME}")" || exit 1 + if [[ -z "$(check_python_exe_compatible_version "${python_exe}")" ]]; then + die "Invalid Python interpreter version for ${python_exe}. ${requirement_str}" + fi + else + python_exe="$(determine_default_python_exe)" + if [[ -z "${python_exe}" ]]; then + die "No valid Python interpreter found. ${requirement_str} Please check that a valid interpreter is installed and on your \$PATH." + fi + fi + echo "${python_exe}" +} + +function compute_sha256 { + local python="$1" + local path="$2" + + "$python" <&2 || exit 1 + fi + echo "${bootstrapped}" +} + +function scrub_env_vars { + # Ensure the virtualenv PEX runs as shrink-wrapped. + # See: https://github.com/pantsbuild/setup/issues/105 + local -r pex_env_vars=(${!PEX_@}) + if [[ ! ${#pex_env_vars[@]} -eq 0 ]]; then + local -r pex_env_vars_to_scrub="${pex_env_vars[@]/PEX_ROOT}" + if [[ -n "${pex_env_vars_to_scrub[@]}" ]]; then + warn "Scrubbing ${pex_env_vars_to_scrub[@]}" + unset ${pex_env_vars_to_scrub[@]} + fi + fi + # Also ensure pip doesn't think packages on PYTHONPATH + # are already installed. + if [ -n "${PYTHONPATH:-}" ]; then + warn "Scrubbing PYTHONPATH" + unset PYTHONPATH + fi +} + +function bootstrap_virtualenv { + local python="$1" + local bootstrapped="${PANTS_BOOTSTRAP}/virtualenv-${VIRTUALENV_VERSION}/virtualenv.pex" + if [[ ! -f "${bootstrapped}" ]]; then + ( + green "Creating the virtualenv PEX." + pex_path="$(bootstrap_pex "${python}")" || exit 1 + mkdir -p "${PANTS_BOOTSTRAP}" + local staging_dir + staging_dir=$(tempdir "${PANTS_BOOTSTRAP}") + echo "${VIRTUALENV_REQUIREMENTS}" > "${staging_dir}/requirements.txt" + ( + scrub_env_vars + "${python}" "${pex_path}" -r "${staging_dir}/requirements.txt" -c virtualenv -o "${staging_dir}/virtualenv.pex" + ) + mkdir -p "$(dirname "${bootstrapped}")" + mv -f "${staging_dir}/virtualenv.pex" "${bootstrapped}" + rm -rf "${staging_dir}" + ) 1>&2 || exit 1 + fi + echo "${bootstrapped}" +} + +function find_links_url { + local pants_version="$1" + local pants_sha="$2" + echo -n "https://binaries.pantsbuild.org/wheels/pantsbuild.pants/${pants_sha}/${pants_version/+/%2B}/index.html" +} + +function get_version_for_sha { + local sha="$1" + + # Retrieve the Pants version associated with this commit. + local pants_version + pants_version="$(curl --proto "=https" \ + --tlsv1.2 \ + --fail \ + --silent \ + --location \ + "https://raw.githubusercontent.com/pantsbuild/pants/${sha}/src/python/pants/VERSION")" + + # Construct the version as the release version from src/python/pants/VERSION, plus the string `+gitXXXXXXXX`, + # where the XXXXXXXX is the first 8 characters of the SHA. + echo "${pants_version}+git${sha:0:8}" +} + +function bootstrap_pants { + local pants_version="$1" + local python="$2" + local pants_sha="${3:-}" + local pants_debug="${4:-}" + + local pants_requirements=(pantsbuild.pants==${pants_version}) + local maybe_find_links + if [[ -z "${pants_sha}" ]]; then + maybe_find_links="" + else + maybe_find_links="--find-links=$(find_links_url "${pants_version}" "${pants_sha}")" + fi + + local debug_suffix + if [[ -z "${pants_debug}" ]]; then + debug_suffix="" + else + debug_suffix="-debug" + pants_requirements+=(debugpy==1.6.0) + fi + + local python_major_minor_version + python_major_minor_version="$(get_python_major_minor_version "${python}")" + local target_folder_name="${pants_version}_py${python_major_minor_version}${debug_suffix}" + local bootstrapped="${PANTS_BOOTSTRAP}/${target_folder_name}" + + if [[ ! -d "${bootstrapped}" ]]; then + ( + green "Bootstrapping Pants using ${python}" + local staging_dir + staging_dir=$(tempdir "${PANTS_BOOTSTRAP}") + local virtualenv_path + virtualenv_path="$(bootstrap_virtualenv "${python}")" || exit 1 + green "Installing ${pants_requirements[@]} into a virtual environment at ${bootstrapped}" + ( + scrub_env_vars + # shellcheck disable=SC2086 + "${python}" "${virtualenv_path}" --quiet --no-download "${staging_dir}/install" && \ + # Grab the latest pip, but don't advance setuptools past 58 which drops support for the + # `setup` kwarg `use_2to3` which Pants 1.x sdist dependencies (pystache) use. + "${staging_dir}/install/bin/pip" install --quiet -U pip "setuptools<58" && \ + "${staging_dir}/install/bin/pip" install ${maybe_find_links} --quiet --progress-bar off "${pants_requirements[@]}" + ) && \ + ln -s "${staging_dir}/install" "${staging_dir}/${target_folder_name}" && \ + mv "${staging_dir}/${target_folder_name}" "${bootstrapped}" && \ + green "New virtual environment successfully created at ${bootstrapped}." + ) 1>&2 || exit 1 + fi + echo "${bootstrapped}" +} + +function run_bootstrap_tools { + # functionality for introspecting the bootstrapping process, without actually doing it + if [[ "${PANTS_BOOTSTRAP_TOOLS}" -gt "${SCRIPT_VERSION}" ]]; then + die "$0 script (bootstrap version ${SCRIPT_VERSION}) is too old for this invocation (with PANTS_BOOTSTRAP_TOOLS=${PANTS_BOOTSTRAP_TOOLS}). +Please update it by following ${INSTALL_URL}" + fi + + case "${1:-}" in + bootstrap-cache-key) + local pants_version=$(determine_pants_version) + local python="$(determine_python_exe "${pants_version}")" + # the python above may be a shim (e.g. pyenv or homebrew), so let's get an estimate of the + # actual path, as will be symlinked in the virtualenv. (NB. virtualenv does more complicated + # things, but we at least emulate the symlink-resolution that it does.) + local python_executable_path="$("${python}" -c 'import os, sys; print(os.path.realpath(sys.executable))')" + + local requirements_file="$(mktemp)" + echo "${VIRTUALENV_REQUIREMENTS}" > "${requirements_file}" + local virtualenv_requirements_sha256="$(compute_sha256 "${python}" "${requirements_file}")" + rm "${requirements_file}" + + local parts=( + "os_name=$(uname -s)" + "arch=$(uname -m)" + "python_path=${python}" + "python_executable_path=${python_executable_path}" + # the full interpreter information, for maximum compatibility + "python_version=$("$python" --version)" + "pex_version=${_PEX_VERSION}" + "virtualenv_requirements_sha256=${virtualenv_requirements_sha256}" + "pants_version=${pants_version}" + ) + echo "${parts[*]}" + ;; + bootstrap-version) + echo "${SCRIPT_VERSION}" + ;; + help|"") + cat <"] +description = "llama-index readers youtube-metadata integration" +license = "MIT" +name = "llama-index-readers-youtube-metadata" +packages = [{include = "llama_index/"}] +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +llama-index-core = "^0.10.0" +youtube-transcript-api = "^0.6.2" + +[tool.poetry.group.dev.dependencies] +black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} +codespell = {extras = ["toml"], version = ">=v2.2.6"} +flake8 = "^7.0.0" +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 +types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/BUILD new file mode 100644 index 0000000000000..adca2b1c7549a --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/BUILD @@ -0,0 +1,5 @@ +python_sources() + +python_tests( + name="tests0", +) diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/test_readers_youtube_metadata.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/test_readers_youtube_metadata.py new file mode 100644 index 0000000000000..7f8dee48fdf40 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/test_readers_youtube_metadata.py @@ -0,0 +1,12 @@ +from llama_index.core.readers.base import BaseReader +from llama_index.readers.youtube_metadata import ( + YouTubeMetaData, + YouTubeMetaDataAndTranscript, +) + + +def test_class(): + names_of_base_classes = [b.__name__ for b in YouTubeMetaData.__mro__] + assert BaseReader.__name__ in names_of_base_classes + names_of_base_classes = [b.__name__ for b in YouTubeMetaDataAndTranscript.__mro__] + assert BaseReader.__name__ in names_of_base_classes