Closed
Description
If the LLM generates a JSON Array instead of a JSON Object (I think this is what gives rise to the Exception at least), the entity_relation_extrator fails:
Traceback (most recent call last):
File "/<...>/.venv/bin/grag", line 8, in <module>
sys.exit(main())
^^^^^^
File "/<...>/src/grag/main.py", line 43, in main
asyncio.run(process(args, args.data.rglob("*.pdf")))
File "/usr/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/base_events.py", line 687, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "/<...>/src/grag/main.py", line 36, in process
await kg_builder.run_async(file_path=file)
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/kg_builder.py", line 126, in run_async
return await self.runner.run({"file_path": file_path, "text": text})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/config/runner.py", line 130, in run
result = await self.pipeline.run(data=run_param)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 640, in run
await orchestrator.run(data)
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 327, in run
await asyncio.gather(*tasks)
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 153, in run_task
await self.on_task_complete(data=data, task=task, result=res)
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 192, in on_task_complete
await asyncio.gather(*[self.run_task(n, data) async for n in self.next(task)])
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 153, in run_task
await self.on_task_complete(data=data, task=task, result=res)
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 192, in on_task_complete
await asyncio.gather(*[self.run_task(n, data) async for n in self.next(task)])
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 153, in run_task
await self.on_task_complete(data=data, task=task, result=res)
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 192, in on_task_complete
await asyncio.gather(*[self.run_task(n, data) async for n in self.next(task)])
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 150, in run_task
res = await task.run(inputs)
^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 105, in run
res = await self.execute(**inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/pipeline/pipeline.py", line 95, in execute
component_result = await self.component.run(**kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/pydantic/_internal/_validate_call.py", line 33, in wrapper_function
return await wrapper(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/components/entity_relation_extractor.py", line 336, in run
chunk_graphs: list[Neo4jGraph] = list(await asyncio.gather(*tasks))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/components/entity_relation_extractor.py", line 280, in run_for_chunk
chunk_graph = await self.extract_for_chunk(schema, examples, chunk)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/<...>/.venv/lib/python3.12/site-packages/neo4j_graphrag/experimental/components/entity_relation_extractor.py", line 228, in extract_for_chunk
chunk_graph = Neo4jGraph(**result)
^^^^^^^^^^^^^^^^^^^^
TypeError: neo4j_graphrag.experimental.components.types.Neo4jGraph() argument after ** must be a mapping, not list
Not sure what the proper way to handle this would be, but what I do know is that it isn't fun too lose all progress on larger documents when this happens :(
Metadata
Metadata
Assignees
Labels
No labels