Soru Üreteci – Question Generator

Kod tanıtımı için https://youtu.be/43mdH4IuTyM adresindeki videoyu izleyebilirsiniz.
# https://medium.com/@foadmk/optimizing-everyday-tasks-with-crewai-fc655ca08944

# summarizer.py

# from dotenv import load_dotenv
from crewai import Agent, Task, Crew, Process
# from langchain_openai import ChatOpenAI
from langchain_community.tools import tool
import requests
from PyPDF2 import PdfReader
import re, time, datetime
from langchain_community.llms import Ollama

def show_time(message, t1):
    """
    Show total elapsed at the end of a process
    t1: start time
    t2: finishing time
    """
    t2 = time.time()
    print(f"{message}: {round(t2-t1,4)} sec")
    return t2

llm = Ollama(model = "mistral:latest")
# llm = Ollama(model='mixtral')   # bellek yetmediği için oldukça yavaş

# Load your OPENAI_API_KEY from your .env file
# load_dotenv()

# Choose the model for the agents
# model = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0.2)

# Tool to fetch and preprocess PDF content
@tool
def fetch_pdf_content(url: str) -> str:
    """
    Fetches and preprocesses content from a PDF given its URL.
    Returns the text of the PDF.
    """
    response = requests.get(url)
    with open('temp.pdf', 'wb') as f:
        f.write(response.content)

    with open('temp.pdf', 'rb') as f:
        pdf = PdfReader(f)
        text = '\n'.join(page.extract_text() for page in pdf.pages if page.extract_text())

    # Optional preprocessing of text
    processed_text = re.sub(r'\s+', ' ', text).strip()
    return processed_text

# Tool to fetch and preprocess PDF content
@tool
def get_pdf_content(addr: str) -> str:
    """
    Reads and preprocesses content from a PDF given its URL.
    Returns the text of the PDF.
    """

    with open(addr, 'rb') as f:
        pdf = PdfReader(f)
        text = '\n'.join(page.extract_text() for page in pdf.pages if page.extract_text())

    # Optional preprocessing of text
    processed_text = re.sub(r'\s+', ' ', text).strip()
    print(" text extracted and processed.")
    return processed_text

# Agents
# PDF Reader Agent
pdf_reader = Agent(
    role='PDF Content Extractor',
    goal='Extract and preprocess text from a PDF',
    backstory='Specializes in handling and interpreting PDF documents',
    verbose=True,
    tools=[get_pdf_content],
    allow_delegation=False,
    llm=llm
)

# Article Writer Agent
article_writer = Agent(
    role='Article Creator',
    goal='Write a concise and engaging article',
    backstory='Expert in creating informative and engaging articles',
    verbose=True,
    allow_delegation=False,
    llm=llm
)

# Title Creator Agent
title_creator = Agent(
    role='Title Generator',
    goal='Generate a compelling title for the article',
    backstory='Skilled in crafting engaging and relevant titles',
    verbose=True,
    allow_delegation=False,
    llm=llm
)

question_generator = Agent(
    role='Question Generator',
    goal='Extract meaningful questions from the extracted PDF content',
    backstory='Expert in generating informative and mind opening questions',
    verbose=True,
    allow_delegation=False,
    llm=llm    
)


# Tasks
def pdf_reading_task(pdf_url):
    return Task(
        description=f"Read and preprocess the text from the PDF at this URL: {pdf_url}",
        agent=pdf_reader,
        expected_output='extracted text from the given pdf'
    )

task_article_drafting = Task(
    description="Create a concise article with 8-10 paragraphs based on the extracted PDF content.",
    agent=article_writer,
    expected_output='a concise article with 8-10 paragraphs based on the extracted PDF content'
)

task_title_generation = Task(
    description="Generate an engaging and relevant title for the article.",
    agent=title_creator,
    expected_output='an engaging and relevant title for the article'
)

question_generation_task = Task(
    description="Generate questions from the extracted PDF content in markdown format.",
    agent=question_generator,
    expected_output='at least 10 questions based on the extracted PDF content'
)


# USER INPUTS
# pdf_url = input("Enter the PDF URL: ")
# twitter_url = input("Enter your Twitter URL: ")
# pdf_url = 'https://arxiv.org/pdf/2401.03462.pdf'
addr = "data/aliceeng.pdf"


t1 = time.time()
# Instantiate and run the crew
crew = Crew(
    agents=[pdf_reader, article_writer, title_creator, question_generator],
    tasks=[pdf_reading_task(addr), task_article_drafting, task_title_generation, question_generation_task],
    verbose=2
)

# Execute the crew
result = crew.kickoff()

# Combine results
# final_article = f"""
# Title\n
# {task_title_generation.output.result}\n\n
# Article\n
# {task_article_drafting.output.result}\n\n
# Questions\n
# {question_generation_task.output.result}
# """

final_article = f"""
{task_title_generation.output.exported_output}\n\n
{task_article_drafting.output.exported_output}\n\n
Questions\n
{question_generation_task.output.exported_output}
"""

print("--------------------------")
print(final_article)
print("--------------------------")

now = now = datetime.datetime.now()
now = now.strftime("%Y%m%d_%H%M%S")

with open(f"qg02-{now}.txt","w") as f:
    print("--------------------------",file=f)
    print(final_article,file=f)
    print("--------------------------",file=f)
    t2 = time.time()
    print(f"Total time: {round(t2-t1,4)} sec")

show_time("Total time: ", t1)
Post Views: 73
Yorum bırakın Yanıtı iptal et