May '24 (edited) • CrewAI
Help Needed!!
Im trying to create an alternative to OpenAI when using YoutubeChannelSearchTool from crewai_tools,
the main objective is to use huggingface models instead of OpenAI API, i became with this code so far, that implements a customRagTool using huggingface models for llm and embeddings but i encounter an error that i can't identify the where the problem is so i can address it the best way, can someone help?
All the process of analyzing the channel, and processing each url is done correctly, it creates the embeddings and the chromadb vectorstore, i think its when trying to retrieve the results from the embeddings the problem rises.
```
from crewai_tools import YoutubeChannelSearchTool
from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter
# from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
# from langchain_community.chat_models.huggingface import ChatHuggingFace
from langchain_groq import ChatGroq
from embedchain import App
from embedchain.config import AppConfig, BaseEmbedderConfig
from embedchain.embedder.huggingface import HuggingFaceEmbedder
from typing import Dict, Any
from decouple import config
import os
os.environ["HUGGINGFACE_ACCESS_TOKEN"] = config("HUGGINGFACE_ACCESS_TOKEN")
os.environ["GROQ_API_KEY"] = config("GROQ_API_KEY")
""" model = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.2",
task="text_generation",
max_new_tokens=8192,
temperature=0.1,
top_k=5,
huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
repetition_penalty=1.03,
)
llm = ChatHuggingFace(llm=model) """
llm = ChatGroq(
temperature=0,
groq_api_key=os.environ["GROQ_API_KEY"],
model_name="llama3-8b-8192",
max_tokens=8192,
)
class CustomRagTool(EmbedchainAdapter):
"""A custom RagTool that manages Embedchain configuration internally."""
def __init__(
self,
name: str = "Knowledge base",
description: str = "A knowledge base that can be used to answer questions.",
summarize: bool = False,
chunk_size: int = 500,
vector_store_type: str = "chromadb",
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
**kwargs: Dict[str, Any], # Explicit type hinting for kwargs
):
# Create HuggingFace embedder config
hf_config = BaseEmbedderConfig(
model=embedding_model,
api_key=os.environ["HUGGINGFACE_ACCESS_TOKEN"]
)
embedder = HuggingFaceEmbedder(config=hf_config)
# Create Embedchain app with explicit configuration
app_config = AppConfig()
app = App(config=app_config, embedding_model=embedder)
# Initialize the EmbedchainAdapter with the Embedchain app
super().__init__(embedchain_app=app, summarize=summarize)
def _run(self, query: str, **kwargs: Dict[str, Any]) -> str:
"""
Concrete implementation of the _run method.
This will fetch and return relevant content from the knowledge base.
"""
self._before_run(
query, **kwargs) # Perform any pre-processing if needed
# Fetch relevant content using the adapter
relevant_content = self.adapter.query(query)
return f"Relevant Content:\n{relevant_content}"
# Initialize the tool
yt_search_tool = YoutubeChannelSearchTool(
config=dict(
llm=llm,
embedder=dict(
provider="huggingface",
config=dict(
model="sentence-transformers/all-MiniLM-L6-v2",
api_key=os.environ["HUGGINGFACE_ACCESS_TOKEN"]
),
),
),
youtube_channel_handle="@1littlecoder",
adapter=CustomRagTool(
name="YouTube Search Tool",
description="Tool to search and summarize YouTube channel content.",
),
)
```
0
9 comments
Herminio Henriques
1
Help Needed!!
AI Developer Accelerator
skool.com/ai-developer-accelerator
Master AI & software development to build apps and unlock new income streams. Transform ideas into profits. 💡➕🤖➕👨‍💻🟰💰
Leaderboard (30-day)
Powered by