Create Graph from Chunks¶
Demonstration of how to create a graph from triples containing raw text chunks using the WhyHow SDK and the Langchain LLMGraphTransformer.
In [ ]:
Copied!
!pip install langchain langchain_community langchain_experimental langchain_openai pypdf
!pip install langchain langchain_community langchain_experimental langchain_openai pypdf
In [ ]:
Copied!
from whyhow import WhyHow
from whyhow.schemas import Chunk, GraphChunk, Triple, Node, Relation
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from whyhow import WhyHow
from whyhow.schemas import Chunk, GraphChunk, Triple, Node, Relation
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
In [ ]:
Copied!
WHYHOW_API_KEY = "<YOUR_WHYHOW_API_KEY>"
client = WhyHow(base_url="https://api.whyhow.ai", api_key=WHYHOW_API_KEY)
WHYHOW_API_KEY = ""
client = WhyHow(base_url="https://api.whyhow.ai", api_key=WHYHOW_API_KEY)
In [ ]:
Copied!
OPENAI_API_KEY = "<YOUR_OPENAI_API_KEY>"
llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY)
llm_transformer = LLMGraphTransformer(llm=llm)
OPENAI_API_KEY = ""
llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY)
llm_transformer = LLMGraphTransformer(llm=llm)
Load PDF
In [ ]:
Copied!
loader = PyPDFLoader("./documents/Harry_Potter_and_the_Chamber_of_Secrets.pdf")
docs = loader.load()
loader = PyPDFLoader("./documents/Harry_Potter_and_the_Chamber_of_Secrets.pdf")
docs = loader.load()
Process the PDF into chunks
In [ ]:
Copied!
# Limited to 1048 chunk size.
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=0
)
split_docs = text_splitter.split_documents(docs)
# Limited to 1048 chunk size.
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=0
)
split_docs = text_splitter.split_documents(docs)
Define the allowed nodes
and relations
for the graph.
In [ ]:
Copied!
# Select the entity types and realtions you want for your triples
allowed_nodes = [
"Character", # Represents all characters in the story
"Location", # Represents all places in the story
"Object", # Represents key objects in the story
"Event", # Represents significant events or actions
"Emotion", # Represents feelings or states of being
"Concept", # Represents abstract ideas or themes
"Action", # Represents actions or interactions
]
allowed_relationships = [
"interacts with", # Interaction between characters or objects
"travels to", # Movement from one location to another
"experiences", # Emotional or conceptual experience
"talks to", # Conversation between characters
"controls", # Exercising power over something or someone
"resides in", # Association with a specific location
"uses", # Utilisation of an object
]
# Select the entity types and realtions you want for your triples
allowed_nodes = [
"Character", # Represents all characters in the story
"Location", # Represents all places in the story
"Object", # Represents key objects in the story
"Event", # Represents significant events or actions
"Emotion", # Represents feelings or states of being
"Concept", # Represents abstract ideas or themes
"Action", # Represents actions or interactions
]
allowed_relationships = [
"interacts with", # Interaction between characters or objects
"travels to", # Movement from one location to another
"experiences", # Emotional or conceptual experience
"talks to", # Conversation between characters
"controls", # Exercising power over something or someone
"resides in", # Association with a specific location
"uses", # Utilisation of an object
]
In [ ]:
Copied!
# Initialise GraphTransformer and extract triples from PDF
llm_transformer_props = LLMGraphTransformer(
llm=llm,
allowed_nodes=allowed_nodes,
allowed_relationships=allowed_relationships,
)
# Initialise GraphTransformer and extract triples from PDF
llm_transformer_props = LLMGraphTransformer(
llm=llm,
allowed_nodes=allowed_nodes,
allowed_relationships=allowed_relationships,
)
In [ ]:
Copied!
lc_graph_documents = await llm_transformer_props.aconvert_to_graph_documents(
split_docs
)
lc_graph_documents = await llm_transformer_props.aconvert_to_graph_documents(
split_docs
)
Convert LangChain GraphDocuments
to WhyHow GraphChunks
In [ ]:
Copied!
from whyhow.schemas import GraphChunk
from whyhow.schemas import GraphChunk
In [ ]:
Copied!
whyhow_graph_chunks = []
for idx, doc in enumerate(lc_graph_documents):
relationships = doc.relationships
if len(relationships) == 0:
# No relationships found in document
continue
chunk = Chunk(
content=doc.source.page_content, user_metadata=doc.source.metadata
)
triples = [
Triple(
head=Node(
name=r.source.id,
label=r.source.type,
properties=r.source.properties,
),
tail=Node(
name=r.target.id,
label=r.target.type,
properties=r.target.properties,
),
relation=Relation(name=r.type, properties=r.properties),
)
for r in relationships
]
whyhow_graph_chunks.append(GraphChunk(chunk=chunk, triples=triples))
whyhow_graph_chunks = []
for idx, doc in enumerate(lc_graph_documents):
relationships = doc.relationships
if len(relationships) == 0:
# No relationships found in document
continue
chunk = Chunk(
content=doc.source.page_content, user_metadata=doc.source.metadata
)
triples = [
Triple(
head=Node(
name=r.source.id,
label=r.source.type,
properties=r.source.properties,
),
tail=Node(
name=r.target.id,
label=r.target.type,
properties=r.target.properties,
),
relation=Relation(name=r.type, properties=r.properties),
)
for r in relationships
]
whyhow_graph_chunks.append(GraphChunk(chunk=chunk, triples=triples))
Create a graph in WhyHow using the graph chunks
In [ ]:
Copied!
len(whyhow_graph_chunks)
len(whyhow_graph_chunks)
In [ ]:
Copied!
# Create a new workspace
workspace = client.workspaces.create(name="Demo graph from chunks")
workspace_id = workspace.workspace_id
# Create a new workspace
workspace = client.workspaces.create(name="Demo graph from chunks")
workspace_id = workspace.workspace_id
In [ ]:
Copied!
client.graphs.create_graph_from_graph_chunks(
name="Graph from chunks",
workspace_id=workspace_id,
graph_chunks=whyhow_graph_chunks,
)
client.graphs.create_graph_from_graph_chunks(
name="Graph from chunks",
workspace_id=workspace_id,
graph_chunks=whyhow_graph_chunks,
)