ALLOUNE
commited on
Commit
·
12d5a0c
1
Parent(s):
256eefa
add search bar for agenda items and suggestion marks in .txt files
Browse files- api/docs.py +59 -11
- static/index.html +3 -0
- static/js/ui.js +27 -19
api/docs.py
CHANGED
|
@@ -35,6 +35,59 @@ NSMAP = {
|
|
| 35 |
'v': 'urn:schemas-microsoft-com:vml'
|
| 36 |
}
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# ================================== Converting of files to .txt ====================================
|
| 39 |
|
| 40 |
KREUZBERG_CONFIG: ExtractionConfig = ExtractionConfig(
|
|
@@ -124,19 +177,14 @@ async def extract_text_contents(filename: str, ext: str, bytes: io.BytesIO) -> l
|
|
| 124 |
if ext == ".doc":
|
| 125 |
logging.debug(f"Converting {filename} .doc --> .docx")
|
| 126 |
docx_bytes = await convert_file_type(bytes, filename, "doc", "docx")
|
| 127 |
-
logging.debug(
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
final_text = extracted_data.content
|
| 131 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
| 132 |
elif ext == ".docx":
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
logging.debug(
|
| 137 |
-
f"Extracting content for filename: {filename}, ext: {ext} with converted docx")
|
| 138 |
-
extracted_data = await extract_bytes(applied_revision.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
|
| 139 |
-
final_text = extracted_data.content
|
| 140 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
| 141 |
elif ext == ".ppt":
|
| 142 |
logging.debug(f"Converting {filename} .ppt --> .pptx")
|
|
|
|
| 35 |
'v': 'urn:schemas-microsoft-com:vml'
|
| 36 |
}
|
| 37 |
|
| 38 |
+
SUGGESTION_START = '[[SUGGESTION_START]]'
|
| 39 |
+
SUGGESTION_END = '[[SUGGESTION_END]]'
|
| 40 |
+
|
| 41 |
+
def extract_docx_text_with_suggestions(docx_stream: io.BytesIO) -> str:
|
| 42 |
+
try:
|
| 43 |
+
with zipfile.ZipFile(docx_stream) as z:
|
| 44 |
+
xml = z.read('word/document.xml')
|
| 45 |
+
except KeyError:
|
| 46 |
+
raise FileNotFoundError('word/document.xml not found in the DOCX archive.')
|
| 47 |
+
|
| 48 |
+
root = etree.fromstring(xml)
|
| 49 |
+
body = root.find('w:body', NSMAP)
|
| 50 |
+
|
| 51 |
+
out: list[str] = []
|
| 52 |
+
|
| 53 |
+
def walk(el, collector: list[str]):
|
| 54 |
+
tag = etree.QName(el).localname
|
| 55 |
+
if tag == 'del':
|
| 56 |
+
return
|
| 57 |
+
if tag == 'ins':
|
| 58 |
+
temp: list[str] = []
|
| 59 |
+
for child in el:
|
| 60 |
+
walk(child, temp)
|
| 61 |
+
joined = ''.join(temp)
|
| 62 |
+
if joined.strip():
|
| 63 |
+
collector.append(SUGGESTION_START)
|
| 64 |
+
collector.append(joined)
|
| 65 |
+
collector.append(SUGGESTION_END)
|
| 66 |
+
else:
|
| 67 |
+
collector.append(joined)
|
| 68 |
+
return
|
| 69 |
+
if tag == 'p':
|
| 70 |
+
for child in el:
|
| 71 |
+
walk(child, collector)
|
| 72 |
+
collector.append('\n')
|
| 73 |
+
return
|
| 74 |
+
if tag == 't':
|
| 75 |
+
collector.append(el.text or '')
|
| 76 |
+
return
|
| 77 |
+
if tag == 'tab':
|
| 78 |
+
collector.append('\t')
|
| 79 |
+
return
|
| 80 |
+
if tag == 'br':
|
| 81 |
+
collector.append('\n')
|
| 82 |
+
return
|
| 83 |
+
for child in el:
|
| 84 |
+
walk(child, collector)
|
| 85 |
+
|
| 86 |
+
if body is not None:
|
| 87 |
+
walk(body, out)
|
| 88 |
+
text = ''.join(out).replace('\r', '')
|
| 89 |
+
return text
|
| 90 |
+
|
| 91 |
# ================================== Converting of files to .txt ====================================
|
| 92 |
|
| 93 |
KREUZBERG_CONFIG: ExtractionConfig = ExtractionConfig(
|
|
|
|
| 177 |
if ext == ".doc":
|
| 178 |
logging.debug(f"Converting {filename} .doc --> .docx")
|
| 179 |
docx_bytes = await convert_file_type(bytes, filename, "doc", "docx")
|
| 180 |
+
logging.debug(f"Extracting content with suggestion markers for filename: {filename}, ext: {ext} (converted)")
|
| 181 |
+
docx_bytes.seek(0)
|
| 182 |
+
final_text = extract_docx_text_with_suggestions(docx_bytes)
|
|
|
|
| 183 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
| 184 |
elif ext == ".docx":
|
| 185 |
+
logging.debug(f"Extracting .docx with suggestion markers for {filename}.")
|
| 186 |
+
bytes.seek(0)
|
| 187 |
+
final_text = extract_docx_text_with_suggestions(bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
| 189 |
elif ext == ".ppt":
|
| 190 |
logging.debug(f"Converting {filename} .ppt --> .pptx")
|
static/index.html
CHANGED
|
@@ -167,6 +167,9 @@
|
|
| 167 |
<span class="font-semibold">Tous</span>
|
| 168 |
</label>
|
| 169 |
</li>
|
|
|
|
|
|
|
|
|
|
| 170 |
<div id="agenda-options" class="flex flex-col gap-1"></div>
|
| 171 |
</ul>
|
| 172 |
</div>
|
|
|
|
| 167 |
<span class="font-semibold">Tous</span>
|
| 168 |
</label>
|
| 169 |
</li>
|
| 170 |
+
<li class="pb-2">
|
| 171 |
+
<input id="agenda-search-input" type="text" placeholder="Search agenda..." class="input input-bordered w-full" />
|
| 172 |
+
</li>
|
| 173 |
<div id="agenda-options" class="flex flex-col gap-1"></div>
|
| 174 |
</ul>
|
| 175 |
</div>
|
static/js/ui.js
CHANGED
|
@@ -81,10 +81,9 @@ export function populateSelect(selectId, options, defaultText = 'Sélectionner..
|
|
| 81 |
export function populateCheckboxDropdown(optionsContainerId, options, filterType, labelId, selectionSet, onSelect) {
|
| 82 |
const container = document.getElementById(optionsContainerId);
|
| 83 |
container.innerHTML = '';
|
| 84 |
-
selectionSet.clear();
|
| 85 |
|
| 86 |
-
|
| 87 |
-
options.forEach(option => {
|
| 88 |
const safeId = `${filterType}-${encodeURIComponent(option).replace(/[%\s]/g, '_')}`;
|
| 89 |
const label = document.createElement('label');
|
| 90 |
label.className = "flex items-center gap-2 cursor-pointer py-1";
|
|
@@ -92,42 +91,51 @@ export function populateCheckboxDropdown(optionsContainerId, options, filterType
|
|
| 92 |
<input type="checkbox" class="${filterType}-checkbox option-checkbox" id="${safeId}" value="${option}">
|
| 93 |
<span>${option}</span>
|
| 94 |
`;
|
| 95 |
-
label.querySelector('input')
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
selectionSet.delete(this.value);
|
| 100 |
-
}
|
| 101 |
-
|
| 102 |
-
// Gestion du label "Tous"
|
| 103 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
| 104 |
-
// Gestion du "Tous" global
|
| 105 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
| 106 |
if (allBox && allBox.checked) allBox.checked = false;
|
| 107 |
-
// Si plus rien n'est coché, recoche "Tous"
|
| 108 |
if (selectionSet.size === 0 && allBox) allBox.checked = true;
|
| 109 |
onSelect?.();
|
| 110 |
});
|
| 111 |
-
|
| 112 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
// Réinitialise le label
|
| 115 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
| 116 |
|
| 117 |
-
// Gestion de "Tous"
|
| 118 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
| 119 |
if (allBox) {
|
| 120 |
allBox.addEventListener('change', function () {
|
| 121 |
if (this.checked) {
|
| 122 |
-
// Décoche tout le reste
|
| 123 |
selectionSet.clear();
|
| 124 |
container.querySelectorAll('input[type="checkbox"]').forEach(cb => cb.checked = false);
|
| 125 |
-
this.checked = true;
|
| 126 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
| 127 |
applyFilters();
|
| 128 |
}
|
| 129 |
});
|
| 130 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
}
|
| 132 |
|
| 133 |
export function updateCheckboxDropdownLabel(type, labelId, set, totalCount) {
|
|
|
|
| 81 |
export function populateCheckboxDropdown(optionsContainerId, options, filterType, labelId, selectionSet, onSelect) {
|
| 82 |
const container = document.getElementById(optionsContainerId);
|
| 83 |
container.innerHTML = '';
|
| 84 |
+
selectionSet.clear();
|
| 85 |
|
| 86 |
+
const buildOptionLabel = (option) => {
|
|
|
|
| 87 |
const safeId = `${filterType}-${encodeURIComponent(option).replace(/[%\s]/g, '_')}`;
|
| 88 |
const label = document.createElement('label');
|
| 89 |
label.className = "flex items-center gap-2 cursor-pointer py-1";
|
|
|
|
| 91 |
<input type="checkbox" class="${filterType}-checkbox option-checkbox" id="${safeId}" value="${option}">
|
| 92 |
<span>${option}</span>
|
| 93 |
`;
|
| 94 |
+
const inputEl = label.querySelector('input');
|
| 95 |
+
inputEl.checked = selectionSet.has(option);
|
| 96 |
+
inputEl.addEventListener('change', function () {
|
| 97 |
+
if (this.checked) selectionSet.add(this.value); else selectionSet.delete(this.value);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
|
|
|
| 99 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
| 100 |
if (allBox && allBox.checked) allBox.checked = false;
|
|
|
|
| 101 |
if (selectionSet.size === 0 && allBox) allBox.checked = true;
|
| 102 |
onSelect?.();
|
| 103 |
});
|
| 104 |
+
return label;
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
const renderOptions = (list) => {
|
| 108 |
+
container.innerHTML = '';
|
| 109 |
+
list.forEach(option => container.appendChild(buildOptionLabel(option)));
|
| 110 |
+
};
|
| 111 |
+
|
| 112 |
+
renderOptions(options);
|
| 113 |
|
|
|
|
| 114 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
| 115 |
|
|
|
|
| 116 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
| 117 |
if (allBox) {
|
| 118 |
allBox.addEventListener('change', function () {
|
| 119 |
if (this.checked) {
|
|
|
|
| 120 |
selectionSet.clear();
|
| 121 |
container.querySelectorAll('input[type="checkbox"]').forEach(cb => cb.checked = false);
|
| 122 |
+
this.checked = true;
|
| 123 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
| 124 |
applyFilters();
|
| 125 |
}
|
| 126 |
});
|
| 127 |
}
|
| 128 |
+
|
| 129 |
+
if (filterType === 'agenda') {
|
| 130 |
+
const searchInput = document.getElementById('agenda-search-input');
|
| 131 |
+
if (searchInput) {
|
| 132 |
+
searchInput.addEventListener('input', () => {
|
| 133 |
+
const q = searchInput.value.toLowerCase();
|
| 134 |
+
const filtered = options.filter(o => o.toLowerCase().includes(q));
|
| 135 |
+
renderOptions(filtered);
|
| 136 |
+
});
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
}
|
| 140 |
|
| 141 |
export function updateCheckboxDropdownLabel(type, labelId, set, totalCount) {
|