Dernière activité 1730886198

openhands a révisé ce gist 1730886198. Aller à la révision

2 files changed, 186 insertions

hn_categorizer.py(fichier créé)

@@ -0,0 +1,180 @@
1 + """
2 + LangChain Hacker News Story Categorizer Tutorial
3 +
4 + This script demonstrates key LangChain concepts by building a simple application
5 + that fetches Hacker News stories and categorizes them using OpenAI's GPT model.
6 +
7 + Key LangChain Concepts Demonstrated:
8 + 1. Chains: Sequences of operations that can be combined
9 + 2. Prompts: Structured way to interact with LLMs
10 + 3. LLMs: Language Model integration
11 + 4. Pydantic Output Parsing: Type-safe structured output handling
12 + """
13 +
14 + import os
15 + import requests
16 + from typing import List, Dict
17 + from rich.console import Console
18 + from rich.panel import Panel
19 + from rich.table import Table
20 + from rich import print as rprint
21 + from pydantic import BaseModel, Field
22 +
23 + from langchain.chains import LLMChain
24 + from langchain.chat_models import ChatOpenAI
25 + from langchain.prompts import ChatPromptTemplate
26 + from langchain.output_parsers import PydanticOutputParser
27 +
28 + # Initialize Rich console for pretty output
29 + console = Console()
30 +
31 + # Define our Pydantic model for structured output
32 + class StoryAnalysis(BaseModel):
33 + """
34 + Pydantic model for story analysis output.
35 + Using Pydantic provides type safety and validation.
36 + """
37 + category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)")
38 + subcategory: str = Field(description="A more specific subcategory")
39 + summary: str = Field(description="A brief 1-2 sentence summary of the story's main points")
40 +
41 + def fetch_hn_stories(limit: int = 5) -> List[Dict]:
42 + """
43 + Fetch top stories from Hacker News.
44 +
45 + This function demonstrates basic API interaction outside of LangChain.
46 + Later, we'll combine this with LangChain components.
47 + """
48 + # Get top story IDs
49 + response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")
50 + story_ids = response.json()[:limit]
51 +
52 + stories = []
53 + for story_id in story_ids:
54 + # Fetch individual story details
55 + story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
56 + story_response = requests.get(story_url)
57 + story_data = story_response.json()
58 +
59 + if story_data and 'title' in story_data:
60 + stories.append({
61 + 'title': story_data['title'],
62 + 'url': story_data.get('url', ''),
63 + 'score': story_data.get('score', 0)
64 + })
65 +
66 + return stories
67 +
68 + def setup_langchain_categorizer():
69 + """
70 + Set up the LangChain components for story categorization.
71 +
72 + This demonstrates several key LangChain concepts:
73 + 1. Pydantic Output Parser: Type-safe structured output
74 + 2. Prompt Templates: Create reusable prompts
75 + 3. LLM Chain: Combine prompts and models
76 + """
77 +
78 + # Create a Pydantic output parser
79 + # This is a more modern approach than using ResponseSchema
80 + output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis)
81 +
82 + # Create a prompt template with format instructions
83 + # This shows how to create structured prompts in LangChain
84 + prompt = ChatPromptTemplate.from_template("""
85 + Analyze the following Hacker News story and provide a categorization and summary.
86 +
87 + Story Title: {title}
88 + URL: {url}
89 +
90 + {format_instructions}
91 +
92 + Provide your analysis in the exact format specified above:
93 + """)
94 +
95 + # Initialize the language model
96 + # ChatOpenAI is a LangChain wrapper around OpenAI's chat models
97 + llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
98 +
99 + # Create a chain that combines the prompt and model
100 + # LLMChain is a basic building block in LangChain for combining prompts with LLMs
101 + chain = LLMChain(llm=llm, prompt=prompt)
102 +
103 + return chain, output_parser
104 +
105 + def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]):
106 + """
107 + Display the results in a pretty format using Rich.
108 +
109 + This function shows how to work with the structured output from our LangChain pipeline.
110 + The categorized_results are strongly typed thanks to our Pydantic model.
111 + """
112 + table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta")
113 + table.add_column("Title", style="cyan", no_wrap=False)
114 + table.add_column("Category", style="green", no_wrap=True)
115 + table.add_column("Subcategory", style="yellow", no_wrap=True)
116 + table.add_column("Summary", style="white", no_wrap=False)
117 +
118 + for story, result in zip(stories, categorized_results):
119 + table.add_row(
120 + story['title'],
121 + result.category, # Note: Using dot notation because result is a Pydantic model
122 + result.subcategory,
123 + result.summary
124 + )
125 +
126 + console.print()
127 + console.print(Panel.fit(
128 + "🚀 LangChain Hacker News Analyzer",
129 + subtitle="Analyzing top stories using LangChain and GPT-3.5",
130 + style="bold blue"
131 + ))
132 + console.print()
133 + console.print(table)
134 + console.print()
135 +
136 + def main():
137 + """
138 + Main function to run the HN story categorizer.
139 +
140 + This function orchestrates the entire pipeline:
141 + 1. Fetch stories from HN API
142 + 2. Set up LangChain components
143 + 3. Process stories through the LLM chain
144 + 4. Display results
145 + """
146 + # Show a welcome message
147 + console.print(Panel.fit(
148 + "Fetching and analyzing Hacker News stories...",
149 + style="bold green"
150 + ))
151 +
152 + # Fetch stories
153 + stories = fetch_hn_stories(limit=5)
154 +
155 + # Setup LangChain components
156 + chain, output_parser = setup_langchain_categorizer()
157 +
158 + # Process each story
159 + categorized_results = []
160 + with console.status("[bold green]Processing stories...") as status:
161 + for story in stories:
162 + # Get format instructions from the parser
163 + format_instructions = output_parser.get_format_instructions()
164 +
165 + # Run the chain
166 + result = chain.run(
167 + title=story['title'],
168 + url=story['url'],
169 + format_instructions=format_instructions
170 + )
171 +
172 + # Parse the result into our Pydantic model
173 + parsed_result = output_parser.parse(result)
174 + categorized_results.append(parsed_result)
175 +
176 + # Display results
177 + display_results(stories, categorized_results)
178 +
179 + if __name__ == "__main__":
180 + main()

requirements.txt(fichier créé)

@@ -0,0 +1,6 @@
1 + langchain==0.0.340
2 + openai==1.3.7
3 + python-dotenv==1.0.0
4 + requests==2.31.0
5 + rich==13.7.0
6 + beautifulsoup4==4.12.2
Plus récent Plus ancien