Create Graph from Triples¶
Create a WhyHow graph from triples using LangChain LLMGraphTransformer
In [ ]:
Copied!
import itertools
import os
import pickle
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from whyhow import WhyHow, Node, Relation, Triple
import itertools
import os
import pickle
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from whyhow import WhyHow, Node, Relation, Triple
In [ ]:
Copied!
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
In [ ]:
Copied!
llm = ChatOpenAI(model="gpt-4o")
llm_transformer = LLMGraphTransformer(llm=llm)
llm = ChatOpenAI(model="gpt-4o")
llm_transformer = LLMGraphTransformer(llm=llm)
Load Text from Selected File¶
In [ ]:
Copied!
filepath = "{YOUR FILEPATH}"
filepath = "{YOUR FILEPATH}"
In [ ]:
Copied!
loader = PyPDFLoader(filepath)
docs = loader.load()
loader = PyPDFLoader(filepath)
docs = loader.load()
Process Document¶
In [ ]:
Copied!
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)
Convert Processed Text to Triples¶
In [ ]:
Copied!
# Select the entity types and realtions you want for your triples
allowed_nodes=["Company", "Risk Factor", "Legal Proceeding", "Business Segment"]
allowed_relationships=["AFFECTS", "INVOLVED_IN", "WORKED_AT", "POSES_RISK"]
# Select the entity types and realtions you want for your triples
allowed_nodes=["Company", "Risk Factor", "Legal Proceeding", "Business Segment"]
allowed_relationships=["AFFECTS", "INVOLVED_IN", "WORKED_AT", "POSES_RISK"]
In [ ]:
Copied!
llm_transformer_props = LLMGraphTransformer(
llm=llm,
allowed_nodes=updated_allowed_nodes,
allowed_relationships=updated_allowed_relationships
)
llm_transformer_props = LLMGraphTransformer(
llm=llm,
allowed_nodes=updated_allowed_nodes,
allowed_relationships=updated_allowed_relationships
)
In [ ]:
Copied!
graph_documents_props = await llm_transformer_props.aconvert_to_graph_documents(split_docs)
graph_documents_props = await llm_transformer_props.aconvert_to_graph_documents(split_docs)
In [ ]:
Copied!
print(f"Nodes:{graph_documents_props[0].nodes}")
print(f"Relationships:{graph_documents_props[0].relationships}")
print(f"Nodes:{graph_documents_props[0].nodes}")
print(f"Relationships:{graph_documents_props[0].relationships}")
In [ ]:
Copied!
for i in range(10):
print(graph_documents_props[i].relationships)
for i in range(10):
print(graph_documents_props[i].relationships)
In [ ]:
Copied!
triples = [chunk.relationships for chunk in graph_documents_props]
flat_triples = list(itertools.chain(*triples))
triples = [chunk.relationships for chunk in graph_documents_props]
flat_triples = list(itertools.chain(*triples))
In [ ]:
Copied!
flat_triples[0]
flat_triples[0]
[Optional] Store the Triples¶
In [ ]:
Copied!
# Serialize the list and write it to the file
with open('langchain_triples_improved.pkl', 'wb') as file:
pickle.dump(flat_triples, file)
# Serialize the list and write it to the file
with open('langchain_triples_improved.pkl', 'wb') as file:
pickle.dump(flat_triples, file)
WhyHow Integration¶
In [346]:
Copied!
# Initialise the client with your WhyHow API key
client = WhyHow(api_key=os.environ["WHYHOW_API_KEY"], base_url="https://api.whyhow.ai")
# Initialise the client with your WhyHow API key
client = WhyHow(api_key=os.environ["WHYHOW_API_KEY"], base_url="https://api.whyhow.ai")
Initialise the Workspace¶
In [331]:
Copied!
workspace = client.workspaces.create(name="Amazon 10-K Testing")
# or, if you already have a workspace
# workspace = client.workspaces.get(workspace_id="<workspace_id>")
workspace = client.workspaces.create(name="Amazon 10-K Testing")
# or, if you already have a workspace
# workspace = client.workspaces.get(workspace_id="")
[Optional] Load the Triples¶
In [ ]:
Copied!
with open('langchain_triples.pkl', 'rb') as file:
flat_triples = pickle.load(file)
with open('langchain_triples.pkl', 'rb') as file:
flat_triples = pickle.load(file)
In [ ]:
Copied!
flat_triples[0]
flat_triples[0]
Preprocess the Triples¶
In [ ]:
Copied!
def format_triple(triple):
"""
Format the LangChain triple into the desired structure.
Args:
triple: An object containing source, target, and type attributes.
Returns:
Triple: A Triple object with formatted head, relation, and tail.
"""
# Extract source and target from the triple
source = triple.source
target = triple.target
# Create and return a formatted Triple object
return Triple(
head=Node(name=source.id, label=source.type), # Head node with source id and type
relation=Relation(name=triple.type), # Relation with triple type
tail=Node(name=target.id, label=target.type) # Tail node with target id and type
)
def format_triple(triple):
"""
Format the LangChain triple into the desired structure.
Args:
triple: An object containing source, target, and type attributes.
Returns:
Triple: A Triple object with formatted head, relation, and tail.
"""
# Extract source and target from the triple
source = triple.source
target = triple.target
# Create and return a formatted Triple object
return Triple(
head=Node(name=source.id, label=source.type), # Head node with source id and type
relation=Relation(name=triple.type), # Relation with triple type
tail=Node(name=target.id, label=target.type) # Tail node with target id and type
)
In [ ]:
Copied!
# Generate a list of formatted triples with indices
formatted_triples = [format_triple(triple) for triple in flat_triples]
# Generate a list of formatted triples with indices
formatted_triples = [format_triple(triple) for triple in flat_triples]
In [ ]:
Copied!
# View the first 3 triples
formatted_triples[:1]
# View the first 3 triples
formatted_triples[:1]
Create the graph¶
In [333]:
Copied!
graph = client.graphs.create_graph_from_triples(
workspace_id=workspace.workspace_id,
triples=formatted_triples,
name="Amazon 10-K Graph (triple id testing 5)"
)
graph = client.graphs.create_graph_from_triples(
workspace_id=workspace.workspace_id,
triples=formatted_triples,
name="Amazon 10-K Graph (triple id testing 5)"
)
Query the graph¶
In [337]:
Copied!
# Query graph for Amazon's business segments
question = "What are Amazons main business segments?"
query_response = client.graphs.query_unstructured(
graph_id='66cbd0f8781bfda4a5a4e2ef',
query=question,
)
# Query graph for Amazon's business segments
question = "What are Amazons main business segments?"
query_response = client.graphs.query_unstructured(
graph_id='66cbd0f8781bfda4a5a4e2ef',
query=question,
)
In [338]:
Copied!
print(f"LLM Response: {query_response.answer}")
print(f"Returned Triples: {query_response.triples}")
print(f"LLM Response: {query_response.answer}")
print(f"Returned Triples: {query_response.triples}")
LLM Response: North America, Amazon Web Services (AWS), International. Returned Triples: [Triple(triple_id='66cbd0fc781bfda4a5a4e751', head=Node(node_id='66cbd0f8781bfda4a5a4e2fc', label='Company', name='Amazon.Com, Inc.', chunk_ids=[], properties={}, created_at=None, updated_at=None), tail=Node(node_id='66cbd0f8781bfda4a5a4e300', label='Business segment', name='North America', chunk_ids=[], properties={}, created_at=None, updated_at=None), relation=Relation(name='INVOLVED_IN', properties={}), chunk_ids=[], created_at=None, updated_at=None), Triple(triple_id='66cbd0fc781bfda4a5a4e752', head=Node(node_id='66cbd0f8781bfda4a5a4e2fc', label='Company', name='Amazon.Com, Inc.', chunk_ids=[], properties={}, created_at=None, updated_at=None), tail=Node(node_id='66cbd0f8781bfda4a5a4e301', label='Business segment', name='International', chunk_ids=[], properties={}, created_at=None, updated_at=None), relation=Relation(name='INVOLVED_IN', properties={}), chunk_ids=[], created_at=None, updated_at=None), Triple(triple_id='66cbd0fc781bfda4a5a4e753', head=Node(node_id='66cbd0f8781bfda4a5a4e2fc', label='Company', name='Amazon.Com, Inc.', chunk_ids=[], properties={}, created_at=None, updated_at=None), tail=Node(node_id='66cbd0f8781bfda4a5a4e302', label='Business segment', name='Amazon Web Services (Aws)', chunk_ids=[], properties={}, created_at=None, updated_at=None), relation=Relation(name='INVOLVED_IN', properties={}), chunk_ids=[], created_at=None, updated_at=None)]