requirements.txt - Opengist

hn_categorizer.py · 6.3 KiB · Python 原始文件

""" LangChain Hacker News Story Categorizer Tutorial This script demonstrates key LangChain concepts by building a simple application that fetches Hacker News stories and categorizes them using OpenAI's GPT model. Key LangChain Concepts Demonstrated: 1. Chains: Sequences of operations that can be combined 2. Prompts: Structured way to interact with LLMs 3. LLMs: Language Model integration 4. Pydantic Output Parsing: Type-safe structured output handling """ import os import requests from typing import List, Dict from rich.console import Console from rich.panel import Panel from rich.table import Table from rich import print as rprint from pydantic import BaseModel, Field from langchain.chains import LLMChain from langchain.chat_models import ChatOpenAI from langchain.prompts import ChatPromptTemplate from langchain.output_parsers import PydanticOutputParser # Initialize Rich console for pretty output console = Console() # Your OpenAI API key - Replace with your actual key os.environ["OPENAI_API_KEY"] = "YOUR-OPENAI-API-KEY" # Replace this with your actual API key # Define our Pydantic model for structured output class StoryAnalysis(BaseModel): """ Pydantic model for story analysis output. Using Pydantic provides type safety and validation. """ category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)") subcategory: str = Field(description="A more specific subcategory") summary: str = Field(description="A brief 1-2 sentence summary of the story's main points") def fetch_hn_stories(limit: int = 5) -> List[Dict]: """ Fetch top stories from Hacker News. This function demonstrates basic API interaction outside of LangChain. Later, we'll combine this with LangChain components. """ # Get top story IDs response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json") story_ids = response.json()[:limit] stories = [] for story_id in story_ids: # Fetch individual story details story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json" story_response = requests.get(story_url) story_data = story_response.json() if story_data and 'title' in story_data: stories.append({ 'title': story_data['title'], 'url': story_data.get('url', ''), 'score': story_data.get('score', 0) }) return stories def setup_langchain_categorizer(): """ Set up the LangChain components for story categorization. This demonstrates several key LangChain concepts: 1. Pydantic Output Parser: Type-safe structured output 2. Prompt Templates: Create reusable prompts 3. LLM Chain: Combine prompts and models """ # Create a Pydantic output parser # This is a more modern approach than using ResponseSchema output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis) # Create a prompt template with format instructions # This shows how to create structured prompts in LangChain prompt = ChatPromptTemplate.from_template(""" Analyze the following Hacker News story and provide a categorization and summary. Story Title: {title} URL: {url} {format_instructions} Provide your analysis in the exact format specified above: """) # Initialize the language model # ChatOpenAI is a LangChain wrapper around OpenAI's chat models llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo") # Create a chain that combines the prompt and model # LLMChain is a basic building block in LangChain for combining prompts with LLMs chain = LLMChain(llm=llm, prompt=prompt) return chain, output_parser def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]): """ Display the results in a pretty format using Rich. This function shows how to work with the structured output from our LangChain pipeline. The categorized_results are strongly typed thanks to our Pydantic model. """ table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta") table.add_column("Title", style="cyan", no_wrap=False) table.add_column("Category", style="green", no_wrap=True) table.add_column("Subcategory", style="yellow", no_wrap=True) table.add_column("Summary", style="white", no_wrap=False) for story, result in zip(stories, categorized_results): table.add_row( story['title'], result.category, # Note: Using dot notation because result is a Pydantic model result.subcategory, result.summary ) console.print() console.print(Panel.fit( "🚀 LangChain Hacker News Analyzer", subtitle="Analyzing top stories using LangChain and GPT-3.5", style="bold blue" )) console.print() console.print(table) console.print() def main(): """ Main function to run the HN story categorizer. This function orchestrates the entire pipeline: 1. Fetch stories from HN API 2. Set up LangChain components 3. Process stories through the LLM chain 4. Display results """ # Show a welcome message console.print(Panel.fit( "Fetching and analyzing Hacker News stories...", style="bold green" )) # Fetch stories stories = fetch_hn_stories(limit=5) # Setup LangChain components chain, output_parser = setup_langchain_categorizer() # Process each story categorized_results = [] with console.status("[bold green]Processing stories...") as status: for story in stories: # Get format instructions from the parser format_instructions = output_parser.get_format_instructions() # Run the chain result = chain.run( title=story['title'], url=story['url'], format_instructions=format_instructions ) # Parse the result into our Pydantic model parsed_result = output_parser.parse(result) categorized_results.append(parsed_result) # Display results display_results(stories, categorized_results) if __name__ == "__main__": main()

1	"""
2	LangChain Hacker News Story Categorizer Tutorial
3
4	This script demonstrates key LangChain concepts by building a simple application
5	that fetches Hacker News stories and categorizes them using OpenAI's GPT model.
6
7	Key LangChain Concepts Demonstrated:
8	1. Chains: Sequences of operations that can be combined
9	2. Prompts: Structured way to interact with LLMs
10	3. LLMs: Language Model integration
11	4. Pydantic Output Parsing: Type-safe structured output handling
12	"""
13
14	import os
15	import requests
16	from typing import List, Dict
17	from rich.console import Console
18	from rich.panel import Panel
19	from rich.table import Table
20	from rich import print as rprint
21	from pydantic import BaseModel, Field
22
23	from langchain.chains import LLMChain
24	from langchain.chat_models import ChatOpenAI
25	from langchain.prompts import ChatPromptTemplate
26	from langchain.output_parsers import PydanticOutputParser
27
28	# Initialize Rich console for pretty output
29	console = Console()
30
31	# Your OpenAI API key - Replace with your actual key
32	os.environ["OPENAI_API_KEY"] = "YOUR-OPENAI-API-KEY" # Replace this with your actual API key
33
34	# Define our Pydantic model for structured output
35	class StoryAnalysis(BaseModel):
36	"""
37	Pydantic model for story analysis output.
38	Using Pydantic provides type safety and validation.
39	"""
40	category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)")
41	subcategory: str = Field(description="A more specific subcategory")
42	summary: str = Field(description="A brief 1-2 sentence summary of the story's main points")
43
44	def fetch_hn_stories(limit: int = 5) -> List[Dict]:
45	"""
46	Fetch top stories from Hacker News.
47
48	This function demonstrates basic API interaction outside of LangChain.
49	Later, we'll combine this with LangChain components.
50	"""
51	# Get top story IDs
52	response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")
53	story_ids = response.json()[:limit]
54
55	stories = []
56	for story_id in story_ids:
57	# Fetch individual story details
58	story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
59	story_response = requests.get(story_url)
60	story_data = story_response.json()
61
62	if story_data and 'title' in story_data:
63	stories.append({
64	'title': story_data['title'],
65	'url': story_data.get('url', ''),
66	'score': story_data.get('score', 0)
67	})
68
69	return stories
70
71	def setup_langchain_categorizer():
72	"""
73	Set up the LangChain components for story categorization.
74
75	This demonstrates several key LangChain concepts:
76	1. Pydantic Output Parser: Type-safe structured output
77	2. Prompt Templates: Create reusable prompts
78	3. LLM Chain: Combine prompts and models
79	"""
80
81	# Create a Pydantic output parser
82	# This is a more modern approach than using ResponseSchema
83	output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis)
84
85	# Create a prompt template with format instructions
86	# This shows how to create structured prompts in LangChain
87	prompt = ChatPromptTemplate.from_template("""
88	Analyze the following Hacker News story and provide a categorization and summary.
89
90	Story Title: {title}
91	URL: {url}
92
93	{format_instructions}
94
95	Provide your analysis in the exact format specified above:
96	""")
97
98	# Initialize the language model
99	# ChatOpenAI is a LangChain wrapper around OpenAI's chat models
100	llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
101
102	# Create a chain that combines the prompt and model
103	# LLMChain is a basic building block in LangChain for combining prompts with LLMs
104	chain = LLMChain(llm=llm, prompt=prompt)
105
106	return chain, output_parser
107
108	def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]):
109	"""
110	Display the results in a pretty format using Rich.
111
112	This function shows how to work with the structured output from our LangChain pipeline.
113	The categorized_results are strongly typed thanks to our Pydantic model.
114	"""
115	table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta")
116	table.add_column("Title", style="cyan", no_wrap=False)
117	table.add_column("Category", style="green", no_wrap=True)
118	table.add_column("Subcategory", style="yellow", no_wrap=True)
119	table.add_column("Summary", style="white", no_wrap=False)
120
121	for story, result in zip(stories, categorized_results):
122	table.add_row(
123	story['title'],
124	result.category, # Note: Using dot notation because result is a Pydantic model
125	result.subcategory,
126	result.summary
127	)
128
129	console.print()
130	console.print(Panel.fit(
131	"🚀 LangChain Hacker News Analyzer",
132	subtitle="Analyzing top stories using LangChain and GPT-3.5",
133	style="bold blue"
134	))
135	console.print()
136	console.print(table)
137	console.print()
138
139	def main():
140	"""
141	Main function to run the HN story categorizer.
142
143	This function orchestrates the entire pipeline:
144	1. Fetch stories from HN API
145	2. Set up LangChain components
146	3. Process stories through the LLM chain
147	4. Display results
148	"""
149	# Show a welcome message
150	console.print(Panel.fit(
151	"Fetching and analyzing Hacker News stories...",
152	style="bold green"
153	))
154
155	# Fetch stories
156	stories = fetch_hn_stories(limit=5)
157
158	# Setup LangChain components
159	chain, output_parser = setup_langchain_categorizer()
160
161	# Process each story
162	categorized_results = []
163	with console.status("[bold green]Processing stories...") as status:
164	for story in stories:
165	# Get format instructions from the parser
166	format_instructions = output_parser.get_format_instructions()
167
168	# Run the chain
169	result = chain.run(
170	title=story['title'],
171	url=story['url'],
172	format_instructions=format_instructions
173	)
174
175	# Parse the result into our Pydantic model
176	parsed_result = output_parser.parse(result)
177	categorized_results.append(parsed_result)
178
179	# Display results
180	display_results(stories, categorized_results)
181
182	if __name__ == "__main__":
183	main()

requirements.txt · 106 B · Text 原始文件

1	langchain==0.0.340
2	openai==1.3.7
3	python-dotenv==1.0.0
4	requests==2.31.0
5	rich==13.7.0
6	beautifulsoup4==4.12.2