MIMICLab · 2aslx · May 27, 2025 · May 29, 2025 · May 29, 2025 · May 29, 2025
diff --git a/README.md b/README.md
@@ -2,6 +2,9 @@
 
 This project is an example chatbot that analyzes PDF documents using an embedding model and generates LLM answers through a Coarse-to-Fine search (RAG) approach.
 
+- [Evangel](https://wordsbecameflesh.org/) : Catholic Priest AI powered by QueryDoc
+- [VerNova](https://www.lordandfaith.com/static/index.html): Presbyterian Pastor AI powered by QueryDoc
+
 ## QueryDoc
 ```bash
 QueryDoc/
@@ -41,11 +44,13 @@ python -m venv venv
 source venv/bin/activate
 pip install -r requirements.txt
 ```
-• For OCR features, install Tesseract and the appropriate language data, e.g.  
-  `sudo apt-get install tesseract-ocr`   # Debian/Ubuntu
-  `sudo apt-get install tesseract-ocr-kor`
-  `brew install tesseract-ocr`   # MacOS
-  `brew install tesseract-ocr-kor` 
+• For OCR features, install Tesseract and the appropriate language data, 
+```bash
+  sudo apt-get install tesseract-ocr   # Debian/Ubuntu
+  sudo apt-get install tesseract-ocr-kor
+  brew install tesseract-ocr   # MacOS
+  brew install tesseract-ocr-kor
+```
 (On Windows, activate with .\venv\Scripts\activate or a similar command.)
 
 2.	Extract PDF & Split into Chunks

diff --git a/__init__.py b/__init__.py
diff --git a/app.py b/app.py
@@ -5,6 +5,7 @@
 import os
 from fastapi import FastAPI, Body
 
+from scripts import section_rep_builder
 from src.chatbot import PDFChatBot
 
 app = FastAPI()
@@ -15,7 +16,8 @@
     sections_data = json.load(f)
 
 # 2) Load chunk index (sample_chunks_vectors.json)
-chunk_index_path = "data/index/sample_chunks_vectors.json"
+# section_rep_builder 매서드를 가져옴 / 파일 이름이 바뀌어도 실행 가능
+chunk_index_path = section_rep_builder.find_one_vectors_file(index_dir="data/index")
 with open(chunk_index_path, 'r', encoding='utf-8') as f:
     chunk_index_data = json.load(f)
 

diff --git a/scripts/section_rep_builder.py b/scripts/section_rep_builder.py
@@ -6,6 +6,7 @@
 import json
 import numpy as np
 from src.inference.embedding_model import embedding_model
+from glob import glob
 
 def build_section_reps(sections, chunk_index):
     """
@@ -45,15 +46,27 @@ def build_section_reps(sections, chunk_index):
 
     return sections
 
+# pdf의 이름이 바뀌어도 chunk_index_json 파일을 찾아들어갈 수 있도록 수정
+
+def find_one_vectors_file(index_dir="data/index"):
+    """_vectors.json 파일 중 하나를 자동으로 선택"""
+    vector_files = sorted(glob(os.path.join(index_dir, "*_vectors.json")))
+    if not vector_files:
+        raise FileNotFoundError("No *_vectors.json files found in index directory.")
+    return vector_files[0]  # 알파벳순으로 첫 번째 파일 선택
+
 if __name__ == "__main__":
     # 예시: data/extracted/sections.json (목차 기반 섹션 정보)
     sections_json = "data/extracted/sections.json"
-    # 예시: data/index/sample_chunks_vectors.json (청크 임베딩)
-    chunk_index_json = "data/index/sample_chunks_vectors.json"
+
+    # _vectors.json 파일 중 하나 자동 선택
+    chunk_index_json = find_one_vectors_file()
+
+    print(f"[INFO] Using chunk index file: {chunk_index_json}")
 
     with open(sections_json, 'r', encoding='utf-8') as f:
         sections_data = json.load(f)
-    
+
     with open(chunk_index_json, 'r', encoding='utf-8') as f:
         chunk_index_data = json.load(f)
 
@@ -65,4 +78,4 @@ def build_section_reps(sections, chunk_index):
     with open(out_path, 'w', encoding='utf-8') as f:
         json.dump(updated_sections, f, ensure_ascii=False, indent=2)
 
-    print("Section reps built and saved.")
+    print("✅ Section reps built and saved.")
diff --git a/web_demo.py b/web_demo.py
@@ -13,6 +13,15 @@
 from src.chatbot import PDFChatBot
 from scripts import pdf_extractor, chunker, build_index, section_rep_builder
 
+lightTheme = gr.themes.Soft().set(
+    body_background_fill="ffffff",
+    body_text_color="#000000"
+)
+
+darkTheme = gr.themes.Soft().set(
+    body_background_fill="1f1f1f",
+    body_text_color="ffffff"
+)
 # ---------------------------------------------------------------------
 # Persistent user database (credentials + uploads + prompts)
 # ---------------------------------------------------------------------
@@ -23,6 +32,7 @@
 _DB_LOCK = threading.RLock()
 
 
+
 def _load_user_db():
     if os.path.exists(USER_DB_PATH):
         with open(USER_DB_PATH, "r", encoding="utf-8") as f:
@@ -37,9 +47,17 @@ def _save_user_db(db: dict):
         with open(USER_DB_PATH, "w", encoding="utf-8") as f:
             json.dump(db, f, indent=2)
 
+QUICK_RESPONSE = ("Focus on a concise and accurate answer, 1-2 sentences")
+INDEPTH_RESPONSE = ("Make a detailed analysis based on the document and provide a comprehensive answer.")
+ANALYSE_RESPONSE = ("Analyze the topic and offer a thoughtful summary with your own insights or suggestions.")
+
 DEFAULT_PROMPT = (
-    "Answer the user's question based on the information provided in the document context below.\n"
-    "Your response should reference the context clearly, but you may paraphrase or summarize appropriately."
+    "You are a medical assistant chatbot.\n"
+    "Answer the user's question using only the information provided in the medical document context below.\n"
+    "Be clear, professional and concise.\n"
+    "Reference the exact section from the document.\n"
+    "You may paraphrase, but do not invent or assume information not present in the context."
+    "If the answer is not found in the context, say so explicitly."
 )
 
 # Max time (seconds) allowed for pdf_extractor.extract_pdf_content
@@ -304,23 +322,42 @@ def delete_cached_pdf(selected_name, username):
     return None, None, dropdown_update, msg
 
 
-def ask_question(question, sections, chunk_index, system_prompt, username, use_index):
+def ask_question(question, sections, chunk_index, system_prompt, username, use_index, mode):
     fine_only = not use_index 
     if not username:
         return "Please log in first."
     if sections is None or chunk_index is None:
         return "Please upload and process a PDF first."
+    # additional prompt for mode (Quick, In-Depth, Analyse)
+    if (mode == "Quick"):
+        mode_prompt = QUICK_RESPONSE
+    elif (mode == "In-Depth"):
+        mode_prompt = INDEPTH_RESPONSE
+    elif (mode == "Analyse"):
+        mode_prompt = ANALYSE_RESPONSE
+
     prompt = system_prompt or DEFAULT_PROMPT
-    bot = PDFChatBot(sections, chunk_index, system_prompt=prompt)
+    #full prompt with mode and basic prompt
+    full_prompt = f"{prompt} + {mode_prompt}"
+
+    bot = PDFChatBot(sections, chunk_index, system_prompt=full_prompt)
     answer = bot.answer(question, fine_only=fine_only)
     answer = answer.replace('<|endoftext|><|im_start|>user',"=== System Prompt ===")
     answer = answer.replace('<|im_end|>\n<|im_start|>assistant','')
     answer = answer.replace('<|im_end|>','')
     answer_output = answer.split("=== Answer ===")[-1].strip()
     reference_output = answer.split("=== User Question ===")[0].strip().split("=== Document Context ===")[-1].strip()
-
+    print(f"\n\n--- PROMPT LENGTH = {len(full_prompt)} ---\n\n")
     return answer_output, reference_output
 
+def switch_theme(theme_name):
+    if theme_name == "default":
+        return gr.themes.Default()
+    elif theme_name == "monochrome":
+        return gr.themes.Monochrome()
+    elif theme_name == "soft":
+        return gr.themes.Soft()
+    # Add more themes as needed
 
 with gr.Blocks() as demo:
     gr.Markdown("## QueryDoc Web Demo")
@@ -373,7 +410,13 @@ def ask_question(question, sections, chunk_index, system_prompt, username, use_i
                 gr.Markdown("### Ask a Question")
                 gr.Markdown("- Ask a question based on the uploaded PDF.")
                 gr.Markdown("- Check **Coarse-to-Fine Search** to enable Table of Contents based search.")
+                gr.Markdown("- Select a mode for the type of response")
                 question_input = gr.Textbox(label="Question")
+                dropdown_mode = gr.Dropdown(
+                    label="Answer mode",
+                    choices=["Quick", "In-Depth", "Analyse"],
+                    value="Quick"
+                )
                 use_index = gr.Checkbox(label="Coarse-to-Fine Search", value=False)
                 ask_btn = gr.Button("Ask", variant="primary")
         gr.Markdown("### Answer")
@@ -406,8 +449,8 @@ def ask_question(question, sections, chunk_index, system_prompt, username, use_i
         inputs=[existing_dropdown, username_state],
         outputs=[sections_state, index_state, existing_dropdown, status]
     )
-    question_input.submit(ask_question, inputs=[question_input, sections_state, index_state, prompt_input, username_state, use_index], outputs=[answer_output, reference_output])
-    ask_btn.click(ask_question, inputs=[question_input, sections_state, index_state, prompt_input, username_state, use_index], outputs=[answer_output, reference_output])
-
+    question_input.submit(ask_question, inputs=[question_input, sections_state, index_state, prompt_input, username_state, use_index, dropdown_mode], outputs=[answer_output, reference_output])
+    ask_btn.click(ask_question, inputs=[question_input, sections_state, index_state, prompt_input, username_state, use_index, dropdown_mode], outputs=[answer_output, reference_output])
+    
 if __name__ == "__main__":            
-    demo.launch(server_name="0.0.0.0", server_port=30987)
+    demo.launch(server_name="0.0.0.0", server_port=30000)