Help Needed!! · AI Developer Accelerator

May '24 (edited) • CrewAI

Help Needed!!

Im trying to create an alternative to OpenAI when using YoutubeChannelSearchTool from crewai_tools,

the main objective is to use huggingface models instead of OpenAI API, i became with this code so far, that implements a customRagTool using huggingface models for llm and embeddings but i encounter an error that i can't identify the where the problem is so i can address it the best way, can someone help?

All the process of analyzing the channel, and processing each url is done correctly, it creates the embeddings and the chromadb vectorstore, i think its when trying to retrieve the results from the embeddings the problem rises.

```

from crewai_tools import YoutubeChannelSearchTool

from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter

# from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint

# from langchain_community.chat_models.huggingface import ChatHuggingFace

from langchain_groq import ChatGroq

from embedchain import App

from embedchain.config import AppConfig, BaseEmbedderConfig

from embedchain.embedder.huggingface import HuggingFaceEmbedder

from typing import Dict, Any

from decouple import config

import os

os.environ["HUGGINGFACE_ACCESS_TOKEN"] = config("HUGGINGFACE_ACCESS_TOKEN")

os.environ["GROQ_API_KEY"] = config("GROQ_API_KEY")

""" model = HuggingFaceEndpoint(

repo_id="mistralai/Mistral-7B-Instruct-v0.2",

task="text_generation",

max_new_tokens=8192,

temperature=0.1,

top_k=5,

huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],

repetition_penalty=1.03,

)

llm = ChatHuggingFace(llm=model) """

llm = ChatGroq(

temperature=0,

groq_api_key=os.environ["GROQ_API_KEY"],

model_name="llama3-8b-8192",

max_tokens=8192,

)

class CustomRagTool(EmbedchainAdapter):

"""A custom RagTool that manages Embedchain configuration internally."""

def __init__(

self,

name: str = "Knowledge base",

description: str = "A knowledge base that can be used to answer questions.",

summarize: bool = False,

chunk_size: int = 500,

vector_store_type: str = "chromadb",

embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",

**kwargs: Dict[str, Any], # Explicit type hinting for kwargs

# Create HuggingFace embedder config

hf_config = BaseEmbedderConfig(

model=embedding_model,

api_key=os.environ["HUGGINGFACE_ACCESS_TOKEN"]

)

embedder = HuggingFaceEmbedder(config=hf_config)

# Create Embedchain app with explicit configuration

app_config = AppConfig()

app = App(config=app_config, embedding_model=embedder)

# Initialize the EmbedchainAdapter with the Embedchain app

super().__init__(embedchain_app=app, summarize=summarize)

def _run(self, query: str, **kwargs: Dict[str, Any]) -> str:

"""

Concrete implementation of the _run method.

This will fetch and return relevant content from the knowledge base.

"""

self._before_run(

query, **kwargs) # Perform any pre-processing if needed

# Fetch relevant content using the adapter

relevant_content = self.adapter.query(query)

return f"Relevant Content:\n{relevant_content}"

# Initialize the tool

yt_search_tool = YoutubeChannelSearchTool(

config=dict(

llm=llm,

embedder=dict(

provider="huggingface",

config=dict(

model="sentence-transformers/all-MiniLM-L6-v2",

api_key=os.environ["HUGGINGFACE_ACCESS_TOKEN"]

youtube_channel_handle="@1littlecoder",

adapter=CustomRagTool(

name="YouTube Search Tool",

description="Tool to search and summarize YouTube channel content.",

)

```

9 comments

AI Developer Accelerator

skool.com/ai-developer-accelerator

Master AI & software development to build apps and unlock new income streams. Transform ideas into profits. 💡➕🤖➕👨‍💻🟰💰

Leaderboard (30-day)

+52

+30

+20

+14

+12