Naposledy aktivní 1730886079

openhands revidoval tento gist 1730886079. Přejít na revizi

2 files changed, 189 insertions

hn_categorizer.py(vytvořil soubor)

@@ -0,0 +1,183 @@
1 + """
2 + LangChain Hacker News Story Categorizer Tutorial
3 +
4 + This script demonstrates key LangChain concepts by building a simple application
5 + that fetches Hacker News stories and categorizes them using OpenAI's GPT model.
6 +
7 + Key LangChain Concepts Demonstrated:
8 + 1. Chains: Sequences of operations that can be combined
9 + 2. Prompts: Structured way to interact with LLMs
10 + 3. LLMs: Language Model integration
11 + 4. Pydantic Output Parsing: Type-safe structured output handling
12 + """
13 +
14 + import os
15 + import requests
16 + from typing import List, Dict
17 + from rich.console import Console
18 + from rich.panel import Panel
19 + from rich.table import Table
20 + from rich import print as rprint
21 + from pydantic import BaseModel, Field
22 +
23 + from langchain.chains import LLMChain
24 + from langchain.chat_models import ChatOpenAI
25 + from langchain.prompts import ChatPromptTemplate
26 + from langchain.output_parsers import PydanticOutputParser
27 +
28 + # Initialize Rich console for pretty output
29 + console = Console()
30 +
31 + # Your OpenAI API key - Replace with your actual key
32 + os.environ["OPENAI_API_KEY"] = "YOUR-OPENAI-API-KEY" # Replace this with your actual API key
33 +
34 + # Define our Pydantic model for structured output
35 + class StoryAnalysis(BaseModel):
36 + """
37 + Pydantic model for story analysis output.
38 + Using Pydantic provides type safety and validation.
39 + """
40 + category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)")
41 + subcategory: str = Field(description="A more specific subcategory")
42 + summary: str = Field(description="A brief 1-2 sentence summary of the story's main points")
43 +
44 + def fetch_hn_stories(limit: int = 5) -> List[Dict]:
45 + """
46 + Fetch top stories from Hacker News.
47 +
48 + This function demonstrates basic API interaction outside of LangChain.
49 + Later, we'll combine this with LangChain components.
50 + """
51 + # Get top story IDs
52 + response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")
53 + story_ids = response.json()[:limit]
54 +
55 + stories = []
56 + for story_id in story_ids:
57 + # Fetch individual story details
58 + story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
59 + story_response = requests.get(story_url)
60 + story_data = story_response.json()
61 +
62 + if story_data and 'title' in story_data:
63 + stories.append({
64 + 'title': story_data['title'],
65 + 'url': story_data.get('url', ''),
66 + 'score': story_data.get('score', 0)
67 + })
68 +
69 + return stories
70 +
71 + def setup_langchain_categorizer():
72 + """
73 + Set up the LangChain components for story categorization.
74 +
75 + This demonstrates several key LangChain concepts:
76 + 1. Pydantic Output Parser: Type-safe structured output
77 + 2. Prompt Templates: Create reusable prompts
78 + 3. LLM Chain: Combine prompts and models
79 + """
80 +
81 + # Create a Pydantic output parser
82 + # This is a more modern approach than using ResponseSchema
83 + output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis)
84 +
85 + # Create a prompt template with format instructions
86 + # This shows how to create structured prompts in LangChain
87 + prompt = ChatPromptTemplate.from_template("""
88 + Analyze the following Hacker News story and provide a categorization and summary.
89 +
90 + Story Title: {title}
91 + URL: {url}
92 +
93 + {format_instructions}
94 +
95 + Provide your analysis in the exact format specified above:
96 + """)
97 +
98 + # Initialize the language model
99 + # ChatOpenAI is a LangChain wrapper around OpenAI's chat models
100 + llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
101 +
102 + # Create a chain that combines the prompt and model
103 + # LLMChain is a basic building block in LangChain for combining prompts with LLMs
104 + chain = LLMChain(llm=llm, prompt=prompt)
105 +
106 + return chain, output_parser
107 +
108 + def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]):
109 + """
110 + Display the results in a pretty format using Rich.
111 +
112 + This function shows how to work with the structured output from our LangChain pipeline.
113 + The categorized_results are strongly typed thanks to our Pydantic model.
114 + """
115 + table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta")
116 + table.add_column("Title", style="cyan", no_wrap=False)
117 + table.add_column("Category", style="green", no_wrap=True)
118 + table.add_column("Subcategory", style="yellow", no_wrap=True)
119 + table.add_column("Summary", style="white", no_wrap=False)
120 +
121 + for story, result in zip(stories, categorized_results):
122 + table.add_row(
123 + story['title'],
124 + result.category, # Note: Using dot notation because result is a Pydantic model
125 + result.subcategory,
126 + result.summary
127 + )
128 +
129 + console.print()
130 + console.print(Panel.fit(
131 + "🚀 LangChain Hacker News Analyzer",
132 + subtitle="Analyzing top stories using LangChain and GPT-3.5",
133 + style="bold blue"
134 + ))
135 + console.print()
136 + console.print(table)
137 + console.print()
138 +
139 + def main():
140 + """
141 + Main function to run the HN story categorizer.
142 +
143 + This function orchestrates the entire pipeline:
144 + 1. Fetch stories from HN API
145 + 2. Set up LangChain components
146 + 3. Process stories through the LLM chain
147 + 4. Display results
148 + """
149 + # Show a welcome message
150 + console.print(Panel.fit(
151 + "Fetching and analyzing Hacker News stories...",
152 + style="bold green"
153 + ))
154 +
155 + # Fetch stories
156 + stories = fetch_hn_stories(limit=5)
157 +
158 + # Setup LangChain components
159 + chain, output_parser = setup_langchain_categorizer()
160 +
161 + # Process each story
162 + categorized_results = []
163 + with console.status("[bold green]Processing stories...") as status:
164 + for story in stories:
165 + # Get format instructions from the parser
166 + format_instructions = output_parser.get_format_instructions()
167 +
168 + # Run the chain
169 + result = chain.run(
170 + title=story['title'],
171 + url=story['url'],
172 + format_instructions=format_instructions
173 + )
174 +
175 + # Parse the result into our Pydantic model
176 + parsed_result = output_parser.parse(result)
177 + categorized_results.append(parsed_result)
178 +
179 + # Display results
180 + display_results(stories, categorized_results)
181 +
182 + if __name__ == "__main__":
183 + main()

requirements.txt(vytvořil soubor)

@@ -0,0 +1,6 @@
1 + langchain==0.0.340
2 + openai==1.3.7
3 + python-dotenv==1.0.0
4 + requests==2.31.0
5 + rich==13.7.0
6 + beautifulsoup4==4.12.2
Novější Starší