Kod tanıtımı için https://youtu.be/43mdH4IuTyM adresindeki videoyu izleyebilirsiniz.
# https://medium.com/@foadmk/optimizing-everyday-tasks-with-crewai-fc655ca08944
# summarizer.py
# from dotenv import load_dotenv
from crewai import Agent, Task, Crew, Process
# from langchain_openai import ChatOpenAI
from langchain_community.tools import tool
import requests
from PyPDF2 import PdfReader
import re, time, datetime
from langchain_community.llms import Ollama
def show_time(message, t1):
"""
Show total elapsed at the end of a process
t1: start time
t2: finishing time
"""
t2 = time.time()
print(f"{message}: {round(t2-t1,4)} sec")
return t2
llm = Ollama(model = "mistral:latest")
# llm = Ollama(model='mixtral') # bellek yetmediği için oldukça yavaş
# Load your OPENAI_API_KEY from your .env file
# load_dotenv()
# Choose the model for the agents
# model = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0.2)
# Tool to fetch and preprocess PDF content
@tool
def fetch_pdf_content(url: str) -> str:
"""
Fetches and preprocesses content from a PDF given its URL.
Returns the text of the PDF.
"""
response = requests.get(url)
with open('temp.pdf', 'wb') as f:
f.write(response.content)
with open('temp.pdf', 'rb') as f:
pdf = PdfReader(f)
text = '\n'.join(page.extract_text() for page in pdf.pages if page.extract_text())
# Optional preprocessing of text
processed_text = re.sub(r'\s+', ' ', text).strip()
return processed_text
# Tool to fetch and preprocess PDF content
@tool
def get_pdf_content(addr: str) -> str:
"""
Reads and preprocesses content from a PDF given its URL.
Returns the text of the PDF.
"""
with open(addr, 'rb') as f:
pdf = PdfReader(f)
text = '\n'.join(page.extract_text() for page in pdf.pages if page.extract_text())
# Optional preprocessing of text
processed_text = re.sub(r'\s+', ' ', text).strip()
print(" text extracted and processed.")
return processed_text
# Agents
# PDF Reader Agent
pdf_reader = Agent(
role='PDF Content Extractor',
goal='Extract and preprocess text from a PDF',
backstory='Specializes in handling and interpreting PDF documents',
verbose=True,
tools=[get_pdf_content],
allow_delegation=False,
llm=llm
)
# Article Writer Agent
article_writer = Agent(
role='Article Creator',
goal='Write a concise and engaging article',
backstory='Expert in creating informative and engaging articles',
verbose=True,
allow_delegation=False,
llm=llm
)
# Title Creator Agent
title_creator = Agent(
role='Title Generator',
goal='Generate a compelling title for the article',
backstory='Skilled in crafting engaging and relevant titles',
verbose=True,
allow_delegation=False,
llm=llm
)
question_generator = Agent(
role='Question Generator',
goal='Extract meaningful questions from the extracted PDF content',
backstory='Expert in generating informative and mind opening questions',
verbose=True,
allow_delegation=False,
llm=llm
)
# Tasks
def pdf_reading_task(pdf_url):
return Task(
description=f"Read and preprocess the text from the PDF at this URL: {pdf_url}",
agent=pdf_reader,
expected_output='extracted text from the given pdf'
)
task_article_drafting = Task(
description="Create a concise article with 8-10 paragraphs based on the extracted PDF content.",
agent=article_writer,
expected_output='a concise article with 8-10 paragraphs based on the extracted PDF content'
)
task_title_generation = Task(
description="Generate an engaging and relevant title for the article.",
agent=title_creator,
expected_output='an engaging and relevant title for the article'
)
question_generation_task = Task(
description="Generate questions from the extracted PDF content in markdown format.",
agent=question_generator,
expected_output='at least 10 questions based on the extracted PDF content'
)
# USER INPUTS
# pdf_url = input("Enter the PDF URL: ")
# twitter_url = input("Enter your Twitter URL: ")
# pdf_url = 'https://arxiv.org/pdf/2401.03462.pdf'
addr = "data/aliceeng.pdf"
t1 = time.time()
# Instantiate and run the crew
crew = Crew(
agents=[pdf_reader, article_writer, title_creator, question_generator],
tasks=[pdf_reading_task(addr), task_article_drafting, task_title_generation, question_generation_task],
verbose=2
)
# Execute the crew
result = crew.kickoff()
# Combine results
# final_article = f"""
# Title\n
# {task_title_generation.output.result}\n\n
# Article\n
# {task_article_drafting.output.result}\n\n
# Questions\n
# {question_generation_task.output.result}
# """
final_article = f"""
{task_title_generation.output.exported_output}\n\n
{task_article_drafting.output.exported_output}\n\n
Questions\n
{question_generation_task.output.exported_output}
"""
print("--------------------------")
print(final_article)
print("--------------------------")
now = now = datetime.datetime.now()
now = now.strftime("%Y%m%d_%H%M%S")
with open(f"qg02-{now}.txt","w") as f:
print("--------------------------",file=f)
print(final_article,file=f)
print("--------------------------",file=f)
t2 = time.time()
print(f"Total time: {round(t2-t1,4)} sec")
show_time("Total time: ", t1)