Bug description
The following error occurred when connecting to RagFlow's ES and searching:
DEBUG [ Test worker] org.elasticsearch.client.RestClient : request [POST http://localhost:1200/ragflow_9dd482844a7c11f0bfa74650b7bbd3b3/_search?typed_keys=true] returned [HTTP/1.1 400 Bad Request]
[es/search] failed: [search_phase_execution_exception] all shards failed
co.elastic.clients.elasticsearch._types.ElasticsearchException: [es/search] failed: [search_phase_execution_exception] all shards failed
at co.elastic.clients.transport.ElasticsearchTransportBase.getApiResponse(ElasticsearchTransportBase.java:357)
at co.elastic.clients.transport.ElasticsearchTransportBase.performRequest(ElasticsearchTransportBase.java:141)
at co.elastic.clients.elasticsearch.ElasticsearchClient.search(ElasticsearchClient.java:5374)
at co.elastic.clients.elasticsearch.ElasticsearchClient.search(ElasticsearchClient.java:5418)
at org.springframework.ai.vectorstore.elasticsearch.ElasticsearchVectorStore.doSimilaritySearch(ElasticsearchVectorStore.java:271)
at org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore.lambda$similaritySearch$7(AbstractObservationVectorStore.java:126)
at io.micrometer.observation.Observation.observe(Observation.java:564)
at org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore.similaritySearch(AbstractObservationVectorStore.java:125)
at org.springframework.ai.chat.client.advisor.vectorstore.QuestionAnswerAdvisor.before(QuestionAnswerAdvisor.java:119)
at org.springframework.ai.chat.client.advisor.api.BaseAdvisor.adviseCall(BaseAdvisor.java:51)
at org.springframework.ai.chat.client.advisor.DefaultAroundAdvisorChain.lambda$nextCall$1(DefaultAroundAdvisorChain.java:110)
at io.micrometer.observation.Observation.observe(Observation.java:564)
at org.springframework.ai.chat.client.advisor.DefaultAroundAdvisorChain.nextCall(DefaultAroundAdvisorChain.java:110)
at org.springframework.ai.chat.client.DefaultChatClient$DefaultCallResponseSpec.lambda$doGetObservableChatClientResponse$1(DefaultChatClient.java:469)
at io.micrometer.observation.Observation.observe(Observation.java:564)
at org.springframework.ai.chat.client.DefaultChatClient$DefaultCallResponseSpec.doGetObservableChatClientResponse(DefaultChatClient.java:467)
at org.springframework.ai.chat.client.DefaultChatClient$DefaultCallResponseSpec.doGetObservableChatClientResponse(DefaultChatClient.java:446)
at org.springframework.ai.chat.client.DefaultChatClient$DefaultCallResponseSpec.content(DefaultChatClient.java:441)
at cn.edu.zhku.dxy.DxyApplicationTests.chat(DxyApplicationTests.java:60)
at java.base/java.lang.reflect.Method.invoke(Method.java:568)
at java.base/java.util.ArrayList.forEach(ArrayList.java:1511)
at java.base/java.util.ArrayList.forEach(ArrayList.java:1511)
However, when I use the API tool to call the search interface request, it normally returns a 200 status and has results.
Environment
Java: 17 SpringBoot: 3.5.0 spring.dependency-management: 1.1.7 Spring AI: 1.0.0
vector store: ElasticSearch 8.11.3 (from RagFlow 0.19)
Steps to reproduce
- After starting RagFlow version 0.19 with Docker, ES8.11.3 will be included. In RagFlow, you can create a knowledge base, and after uploading and analyzing files, vectors are stored in ES.
- Using the API tool for querying returns results normally without any issues (RagFlow has default account password verification).
- In the SpringAI application, configure ES and write test code.
application.yml
spring:
elasticsearch:
uris: http://localhost:1200
username: elastic
password: infini_rag_flow
ai:
vectorstore:
elasticsearch:
initialize-schema: false
index-name: ragflow_9dd482844a7c11f0bfa74650b7bbd3b3
dimensions: 1024
similarity: cosine
embedding-field-name: q_1024_vec
siliconflow:
embeddingModel: BAAI/bge-large-zh-v1.5
openai:
api-key: sk-uoB***
chat:
completions-path: /v1/chat/completions
options:
model: gpt-4o-mini
logging:
level:
org.elasticsearch: debug
test
@Test
void chat() {
PromptTemplate customPromptTemplate = PromptTemplate.builder()
.renderer(StTemplateRenderer.builder().startDelimiterToken('<').endDelimiterToken('>').build())
.template("""
<query>
Context information is below.
---------------------
<question_answer_context>
---------------------
Given the context information and no prior knowledge, answer the query.
Follow these rules:
1. If the answer is not in the context, just say that you don't know.
2. Avoid statements like "Based on the context..." or "The provided information...".
""")
.build();
String question = "车费报销?";
QuestionAnswerAdvisor qaAdvisor = QuestionAnswerAdvisor.builder(vectorStore)
.promptTemplate(customPromptTemplate)
.build();
String response = ChatClient.builder(chatModel).build()
.prompt(question)
.advisors(qaAdvisor)
.call()
.content();
}
Expected behavior
Return 200 and include the result.
Comment From: Jin-Cheng-Ming
ES mapping:
{
"ragflow_9dd482844a7c11f0bfa74650b7bbd3b3": {
"mappings": {
"dynamic_templates": [
{
"int": {
"match": "*_int",
"mapping": {
"store": "true",
"type": "integer"
}
}
},
{
"ulong": {
"match": "*_ulong",
"mapping": {
"store": "true",
"type": "unsigned_long"
}
}
},
{
"long": {
"match": "*_long",
"mapping": {
"store": "true",
"type": "long"
}
}
},
{
"short": {
"match": "*_short",
"mapping": {
"store": "true",
"type": "short"
}
}
},
{
"numeric": {
"match": "*_flt",
"mapping": {
"store": true,
"type": "float"
}
}
},
{
"tks": {
"match": "*_tks",
"mapping": {
"analyzer": "whitespace",
"similarity": "scripted_sim",
"store": true,
"type": "text"
}
}
},
{
"ltks": {
"match": "*_ltks",
"mapping": {
"analyzer": "whitespace",
"store": true,
"type": "text"
}
}
},
{
"kwd": {
"match": "^(.*_(kwd|id|ids|uid|uids)|uid)$",
"match_pattern": "regex",
"mapping": {
"similarity": "boolean",
"store": true,
"type": "keyword"
}
}
},
{
"dt": {
"match": "^.*(_dt|_time|_at)$",
"match_pattern": "regex",
"mapping": {
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss",
"store": true,
"type": "date"
}
}
},
{
"nested": {
"match": "*_nst",
"mapping": {
"type": "nested"
}
}
},
{
"object": {
"match": "*_obj",
"mapping": {
"dynamic": "true",
"type": "object"
}
}
},
{
"string": {
"match": "^.*_(with_weight|list)$",
"match_pattern": "regex",
"mapping": {
"index": "false",
"store": true,
"type": "text"
}
}
},
{
"rank_feature": {
"match": "*_fea",
"mapping": {
"type": "rank_feature"
}
}
},
{
"rank_features": {
"match": "*_feas",
"mapping": {
"type": "rank_features"
}
}
},
{
"dense_vector": {
"match": "*_512_vec",
"mapping": {
"dims": 512,
"index": true,
"similarity": "cosine",
"type": "dense_vector"
}
}
},
{
"dense_vector": {
"match": "*_768_vec",
"mapping": {
"dims": 768,
"index": true,
"similarity": "cosine",
"type": "dense_vector"
}
}
},
{
"dense_vector": {
"match": "*_1024_vec",
"mapping": {
"dims": 1024,
"index": true,
"similarity": "cosine",
"type": "dense_vector"
}
}
},
{
"dense_vector": {
"match": "*_1536_vec",
"mapping": {
"dims": 1536,
"index": true,
"similarity": "cosine",
"type": "dense_vector"
}
}
},
{
"binary": {
"match": "*_bin",
"mapping": {
"type": "binary"
}
}
}
],
"date_detection": true,
"properties": {
"content_ltks": {
"type": "text",
"store": true,
"analyzer": "whitespace"
},
"content_sm_ltks": {
"type": "text",
"store": true,
"analyzer": "whitespace"
},
"content_with_weight": {
"type": "text",
"index": false,
"store": true
},
"create_time": {
"type": "date",
"store": true,
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss"
},
"create_timestamp_flt": {
"type": "float",
"store": true
},
"doc_id": {
"type": "keyword",
"store": true,
"similarity": "boolean"
},
"docnm_kwd": {
"type": "keyword",
"store": true,
"similarity": "boolean"
},
"img_id": {
"type": "keyword",
"store": true,
"similarity": "boolean"
},
"kb_id": {
"type": "keyword",
"store": true,
"similarity": "boolean"
},
"lat_lon": {
"type": "geo_point",
"store": true
},
"q_1024_vec": {
"type": "dense_vector",
"dims": 1024,
"index": true,
"similarity": "cosine"
},
"title_tks": {
"type": "text",
"store": true,
"analyzer": "whitespace",
"similarity": "scripted_sim"
},
"top_int": {
"type": "integer",
"store": true
}
}
}
}
}