ArangoDB is a scalable graph database system to drive value from
connected data, faster. Native graphs, an integrated search engine, and JSON support, via
a single query language. ArangoDB runs on-prem or in the cloud.
This notebook shows how to use LLMs to provide a natural language interface to an ArangoDB database.
Setting up
You can get a localArangoDB instance running via the ArangoDB Docker image:
Copy
docker run -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb
Copy
pip install -qU python-arango # The ArangoDB Python Driver
pip install -qU adb-cloud-connector # The ArangoDB Cloud Instance provisioner
pip install -qU langchain-openai
pip install -qU langchain
Copy
# Instantiate ArangoDB Database
import json
from adb_cloud_connector import get_temp_credentials
from arango import ArangoClient
con = get_temp_credentials()
db = ArangoClient(hosts=con["url"]).db(
con["dbName"], con["username"], con["password"], verify=True
)
print(json.dumps(con, indent=2))
Copy
Log: requesting new credentials...
Succcess: new credentials acquired
{
"dbName": "TUT3sp29s3pjf1io0h4cfdsq",
"username": "TUTo6nkwgzkizej3kysgdyeo8",
"password": "TUT9vx0qjqt42i9bq8uik4v9",
"hostname": "tutorials.arangodb.cloud",
"port": 8529,
"url": "https://tutorials.arangodb.cloud:8529"
}
Copy
# Instantiate the ArangoDB-LangChain Graph
from langchain_community.graphs import ArangoGraph
graph = ArangoGraph(db)
Populating database
We will rely on thePython Driver to import our GameOfThrones data into our database.
Copy
if db.has_graph("GameOfThrones"):
db.delete_graph("GameOfThrones", drop_collections=True)
db.create_graph(
"GameOfThrones",
edge_definitions=[
{
"edge_collection": "ChildOf",
"from_vertex_collections": ["Characters"],
"to_vertex_collections": ["Characters"],
},
],
)
documents = [
{
"_key": "NedStark",
"name": "Ned",
"surname": "Stark",
"alive": True,
"age": 41,
"gender": "male",
},
{
"_key": "CatelynStark",
"name": "Catelyn",
"surname": "Stark",
"alive": False,
"age": 40,
"gender": "female",
},
{
"_key": "AryaStark",
"name": "Arya",
"surname": "Stark",
"alive": True,
"age": 11,
"gender": "female",
},
{
"_key": "BranStark",
"name": "Bran",
"surname": "Stark",
"alive": True,
"age": 10,
"gender": "male",
},
]
edges = [
{"_to": "Characters/NedStark", "_from": "Characters/AryaStark"},
{"_to": "Characters/NedStark", "_from": "Characters/BranStark"},
{"_to": "Characters/CatelynStark", "_from": "Characters/AryaStark"},
{"_to": "Characters/CatelynStark", "_from": "Characters/BranStark"},
]
db.collection("Characters").import_bulk(documents)
db.collection("ChildOf").import_bulk(edges)
Copy
{'error': False,
'created': 4,
'errors': 0,
'empty': 0,
'updated': 0,
'ignored': 0,
'details': []}
Getting and setting the ArangoDB schema
An initialArangoDB Schema is generated upon instantiating the ArangoDBGraph object. Below are the schema’s getter & setter methods should you be interested in viewing or modifying the schema:
Copy
# The schema should be empty here,
# since `graph` was initialized prior to ArangoDB Data ingestion (see above).
import json
print(json.dumps(graph.schema, indent=4))
Copy
{
"Graph Schema": [],
"Collection Schema": []
}
Copy
graph.set_schema()
Copy
# We can now view the generated schema
import json
print(json.dumps(graph.schema, indent=4))
Copy
{
"Graph Schema": [
{
"graph_name": "GameOfThrones",
"edge_definitions": [
{
"edge_collection": "ChildOf",
"from_vertex_collections": [
"Characters"
],
"to_vertex_collections": [
"Characters"
]
}
]
}
],
"Collection Schema": [
{
"collection_name": "ChildOf",
"collection_type": "edge",
"edge_properties": [
{
"name": "_key",
"type": "str"
},
{
"name": "_id",
"type": "str"
},
{
"name": "_from",
"type": "str"
},
{
"name": "_to",
"type": "str"
},
{
"name": "_rev",
"type": "str"
}
],
"example_edge": {
"_key": "266218884025",
"_id": "ChildOf/266218884025",
"_from": "Characters/AryaStark",
"_to": "Characters/NedStark",
"_rev": "_gVPKGSq---"
}
},
{
"collection_name": "Characters",
"collection_type": "document",
"document_properties": [
{
"name": "_key",
"type": "str"
},
{
"name": "_id",
"type": "str"
},
{
"name": "_rev",
"type": "str"
},
{
"name": "name",
"type": "str"
},
{
"name": "surname",
"type": "str"
},
{
"name": "alive",
"type": "bool"
},
{
"name": "age",
"type": "int"
},
{
"name": "gender",
"type": "str"
}
],
"example_document": {
"_key": "NedStark",
"_id": "Characters/NedStark",
"_rev": "_gVPKGPi---",
"name": "Ned",
"surname": "Stark",
"alive": true,
"age": 41,
"gender": "male"
}
}
]
}
Querying the ArangoDB database
We can now use theArangoDB Graph QA Chain to inquire about our data
Copy
import os
os.environ["OPENAI_API_KEY"] = "your-key-here"
Copy
from langchain.chains import ArangoGraphQAChain
from langchain_openai import ChatOpenAI
chain = ArangoGraphQAChain.from_llm(
ChatOpenAI(temperature=0), graph=graph, verbose=True
)
Copy
chain.run("Is Ned Stark alive?")
Copy
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters
FOR character IN Characters
FILTER character.name == "Ned" AND character.surname == "Stark"
RETURN character.alive
AQL Result:
[True]
> Finished chain.
Copy
'Yes, Ned Stark is alive.'
Copy
chain.run("How old is Arya Stark?")
Copy
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters
FOR character IN Characters
FILTER character.name == "Arya" && character.surname == "Stark"
RETURN character.age
AQL Result:
[11]
> Finished chain.
Copy
'Arya Stark is 11 years old.'
Copy
chain.run("Are Arya Stark and Ned Stark related?")
Copy
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters, ChildOf
FOR v, e, p IN 1..1 OUTBOUND 'Characters/AryaStark' ChildOf
FILTER p.vertices[-1]._key == 'NedStark'
RETURN p
AQL Result:
[{'vertices': [{'_key': 'AryaStark', '_id': 'Characters/AryaStark', '_rev': '_gVPKGPi--B', 'name': 'Arya', 'surname': 'Stark', 'alive': True, 'age': 11, 'gender': 'female'}, {'_key': 'NedStark', '_id': 'Characters/NedStark', '_rev': '_gVPKGPi---', 'name': 'Ned', 'surname': 'Stark', 'alive': True, 'age': 41, 'gender': 'male'}], 'edges': [{'_key': '266218884025', '_id': 'ChildOf/266218884025', '_from': 'Characters/AryaStark', '_to': 'Characters/NedStark', '_rev': '_gVPKGSq---'}], 'weights': [0, 1]}]
> Finished chain.
Copy
'Yes, Arya Stark and Ned Stark are related. According to the information retrieved from the database, there is a relationship between them. Arya Stark is the child of Ned Stark.'
Copy
chain.run("Does Arya Stark have a dead parent?")
Copy
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters, ChildOf
FOR v, e IN 1..1 OUTBOUND 'Characters/AryaStark' ChildOf
FILTER v.alive == false
RETURN e
AQL Result:
[{'_key': '266218884027', '_id': 'ChildOf/266218884027', '_from': 'Characters/AryaStark', '_to': 'Characters/CatelynStark', '_rev': '_gVPKGSu---'}]
> Finished chain.
Copy
'Yes, Arya Stark has a dead parent. The parent is Catelyn Stark.'
Chain modifiers
You can alter the values of the followingArangoDBGraphQAChain class variables to modify the behaviour of your chain results
Copy
# Specify the maximum number of AQL Query Results to return
chain.top_k = 10
# Specify whether or not to return the AQL Query in the output dictionary
chain.return_aql_query = True
# Specify whether or not to return the AQL JSON Result in the output dictionary
chain.return_aql_result = True
# Specify the maximum amount of AQL Generation attempts that should be made
chain.max_aql_generation_attempts = 5
# Specify a set of AQL Query Examples, which are passed to
# the AQL Generation Prompt Template to promote few-shot-learning.
# Defaults to an empty string.
chain.aql_examples = """
# Is Ned Stark alive?
RETURN DOCUMENT('Characters/NedStark').alive
# Is Arya Stark the child of Ned Stark?
FOR e IN ChildOf
FILTER e._from == "Characters/AryaStark" AND e._to == "Characters/NedStark"
RETURN e
"""
Copy
chain.run("Is Ned Stark alive?")
# chain("Is Ned Stark alive?") # Returns a dictionary with the AQL Query & AQL Result
Copy
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
RETURN DOCUMENT('Characters/NedStark').alive
AQL Result:
[True]
> Finished chain.
Copy
'Yes, according to the information in the database, Ned Stark is alive.'
Copy
chain.run("Is Bran Stark the child of Ned Stark?")
Copy
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
FOR e IN ChildOf
FILTER e._from == "Characters/BranStark" AND e._to == "Characters/NedStark"
RETURN e
AQL Result:
[{'_key': '266218884026', '_id': 'ChildOf/266218884026', '_from': 'Characters/BranStark', '_to': 'Characters/NedStark', '_rev': '_gVPKGSq--_'}]
> Finished chain.
Copy
'Yes, according to the information in the ArangoDB database, Bran Stark is indeed the child of Ned Stark.'
Connect these docs programmatically to Claude, VSCode, and more via MCP for real-time answers.