Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ model = AutoModelForVision2Seq.from_pretrained(
|
|
| 8 |
model_name,
|
| 9 |
trust_remote_code=True,
|
| 10 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 11 |
device_map="auto",
|
| 12 |
)
|
| 13 |
processor = AutoProcessor.from_pretrained(
|
|
@@ -91,6 +92,7 @@ with gr.Blocks(title="NuExtract β zero-shot structured extraction") as demo:
|
|
| 91 |
<meta charset="UTF-8" />
|
| 92 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 93 |
<title>NuExtract-2 Overview</title>
|
|
|
|
| 94 |
<style>
|
| 95 |
img { display:block; margin-bottom:1rem; }
|
| 96 |
ul { margin:1rem 0; padding-left:1.5rem; }
|
|
@@ -98,83 +100,104 @@ with gr.Blocks(title="NuExtract β zero-shot structured extraction") as demo:
|
|
| 98 |
a:hover { text-decoration:underline; }
|
| 99 |
h1,h2 { margin:0 0 .5rem 0; font-weight:600; }
|
| 100 |
pre { overflow-x:auto; border-radius:6px; padding:1rem; }
|
| 101 |
-
code { border-radius:
|
|
|
|
|
|
|
| 102 |
html[data-theme="dark"],
|
| 103 |
@media (prefers-color-scheme: dark) {
|
| 104 |
-
body { background-color:#1e1e1e;}
|
| 105 |
-
code { background-color
|
| 106 |
-
pre { background-color:#2a2a2a;}
|
| 107 |
}
|
| 108 |
html[data-theme="light"],
|
| 109 |
@media (prefers-color-scheme: light) {
|
| 110 |
-
body { background-color
|
| 111 |
-
code { background-color
|
| 112 |
-
pre { background-color
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
}
|
| 114 |
-
</style>
|
| 115 |
</head>
|
| 116 |
<body>
|
| 117 |
-
<p align="center">
|
| 118 |
<a href="https://nuextract.ai/">
|
| 119 |
-
|
| 120 |
-
|
| 121 |
</a>
|
| 122 |
-
</p>
|
| 123 |
-
<p align="center">
|
| 124 |
-
π₯οΈ <a href="https://nuextract.ai/">API / Platform</a> 
|
| 125 |
-
</p>
|
| 126 |
-
|
| 127 |
-
<section>
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
<h1>
|
| 132 |
-
|
| 133 |
-
<p>
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
<
|
| 137 |
-
|
| 138 |
-
<
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
| 154 |
"first_name": "verbatim-string",
|
| 155 |
-
"last_name":
|
| 156 |
"description": "string",
|
| 157 |
-
"age":
|
| 158 |
"classes": [
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
],
|
| 165 |
"average_gpa": "number",
|
| 166 |
-
"birth_date":
|
| 167 |
"nationality": ["France", "England", "Japan", "USA", "China"],
|
| 168 |
"languages_spoken": [["English", "French", "Japanese", "Mandarin", "Spanish"]]
|
| 169 |
}</code></pre>
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
<
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
<
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
</body>
|
| 179 |
</html>
|
| 180 |
""")
|
|
@@ -191,7 +214,7 @@ with gr.Blocks(title="NuExtract β zero-shot structured extraction") as demo:
|
|
| 191 |
|
| 192 |
example_data = [
|
| 193 |
[
|
| 194 |
-
"
|
| 195 |
"", # no text
|
| 196 |
"""{
|
| 197 |
"movie_name": "verbatim-string",
|
|
|
|
| 8 |
model_name,
|
| 9 |
trust_remote_code=True,
|
| 10 |
torch_dtype=torch.bfloat16,
|
| 11 |
+
attn_implementation="flash_attention_2",
|
| 12 |
device_map="auto",
|
| 13 |
)
|
| 14 |
processor = AutoProcessor.from_pretrained(
|
|
|
|
| 92 |
<meta charset="UTF-8" />
|
| 93 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 94 |
<title>NuExtract-2 Overview</title>
|
| 95 |
+
|
| 96 |
<style>
|
| 97 |
img { display:block; margin-bottom:1rem; }
|
| 98 |
ul { margin:1rem 0; padding-left:1.5rem; }
|
|
|
|
| 100 |
a:hover { text-decoration:underline; }
|
| 101 |
h1,h2 { margin:0 0 .5rem 0; font-weight:600; }
|
| 102 |
pre { overflow-x:auto; border-radius:6px; padding:1rem; }
|
| 103 |
+
code { border-radius:1px; padding:.1em .1em; font-family:monospace; }
|
| 104 |
+
|
| 105 |
+
/* βββ Dark / light themes βββ */
|
| 106 |
html[data-theme="dark"],
|
| 107 |
@media (prefers-color-scheme: dark) {
|
| 108 |
+
body { background-color:#1e1e1e; }
|
| 109 |
+
code { background-color:#2d2d2d; }
|
| 110 |
+
pre { background-color:#2a2a2a; }
|
| 111 |
}
|
| 112 |
html[data-theme="light"],
|
| 113 |
@media (prefers-color-scheme: light) {
|
| 114 |
+
body { background-color:#ffffff; }
|
| 115 |
+
code { background-color:#f5f5f5; }
|
| 116 |
+
pre { background-color:#f5f5f5; }
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
/* βββ NEW: put the two articles side-by-side βββ */
|
| 120 |
+
.template-container {
|
| 121 |
+
display: flex;
|
| 122 |
+
flex-wrap: wrap; /* stacks on small screens */
|
| 123 |
+
gap: 2rem;
|
| 124 |
+
margin-top: 1rem;
|
| 125 |
+
}
|
| 126 |
+
.template-container article {
|
| 127 |
+
flex: 1 1 320px; /* grow / shrink with a sensible min width */
|
| 128 |
+
min-width: 280px;
|
| 129 |
}
|
| 130 |
+
</style>
|
| 131 |
</head>
|
| 132 |
<body>
|
| 133 |
+
<p align="center">
|
| 134 |
<a href="https://nuextract.ai/">
|
| 135 |
+
<img src="https://cdn.prod.website-files.com/638364a4e52e440048a9529c/64188f405afcf42d0b85b926_logo_numind_final.png"
|
| 136 |
+
alt="NuMind Logo" style="width:200px;height:50px;" />
|
| 137 |
</a>
|
| 138 |
+
</p>
|
| 139 |
+
<p align="center">
|
| 140 |
+
π₯οΈ <a href="https://nuextract.ai/">API / Platform</a> | π <a href="https://numind.ai/blog">Blog</a> | π£οΈ <a href="https://discord.gg/3tsEtJNCDe">Discord</a> | π οΈ <a href="https://github.com/numindai/nuextract">Github</a>
|
| 141 |
+
</p>
|
| 142 |
+
|
| 143 |
+
<section>
|
| 144 |
+
<h3>This space is a demo for <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a></h3>
|
| 145 |
+
<h3>You can also check: <a href="https://huggingface.co/numind/NuExtract-2.0-2B" target="_blank">NuExtract-2.0-2B</a> and <a href="https://huggingface.co/numind/NuExtract-2.0-8B" target="_blank">NuExtract-2.0-8B</a> and our top-performing model via the <a href="https://nuextract.ai/">API / Platform</a></h3>
|
| 146 |
+
|
| 147 |
+
<h1>NuExtract-2.0</h1>
|
| 148 |
+
<p>NuExtract 2.0 is a family of models trained specifically for structured information extraction tasks. It supports both multimodal inputs and is multilingual.</p>
|
| 149 |
+
<p>To use the model, provide an input text/image and a JSON template describing the information you need to extract. The template should be a JSON object, specifying field names and their expected type.</p>
|
| 150 |
+
|
| 151 |
+
<!-- ------------- SIDE-BY-SIDE CONTAINER ------------- -->
|
| 152 |
+
<div class="template-container">
|
| 153 |
+
<!-- Supported Template Types -->
|
| 154 |
+
<article>
|
| 155 |
+
<h3>Supported Template Types</h3>
|
| 156 |
+
<ul>
|
| 157 |
+
<li><code>verbatim-string</code> β extract text exactly as it appears.</li>
|
| 158 |
+
<li><code>string</code> β generic text, with possible paraphrasing.</li>
|
| 159 |
+
<li><code>integer</code> β whole number.</li>
|
| 160 |
+
<li><code>number</code> β decimal or whole number.</li>
|
| 161 |
+
<li><code>date-time</code> β ISO 8601 date format.</li>
|
| 162 |
+
<li><code>boolean</code> β True or False.</li>
|
| 163 |
+
<li>Array of any type above (e.g. <code>["string"]</code>).</li>
|
| 164 |
+
<li><code>enum</code> β one value from a predefined list (e.g. <code>["yes", "no", "maybe"]</code>).</li>
|
| 165 |
+
<li><code>multi-label</code> β multiple values from a list (e.g. <code>[["A", "B", "C"]]</code>).</li>
|
| 166 |
+
</ul>
|
| 167 |
+
<p>You can specify any nested structure, such as an object inside an object or a list of objects. If no relevant information is found, the model returns <code>null</code> or <code>[]</code>.</p>
|
| 168 |
+
</article>
|
| 169 |
+
<!-- Example Template -->
|
| 170 |
+
<article>
|
| 171 |
+
<h3>Example Template</h3>
|
| 172 |
+
<pre><code>{
|
| 173 |
"first_name": "verbatim-string",
|
| 174 |
+
"last_name": "verbatim-string",
|
| 175 |
"description": "string",
|
| 176 |
+
"age": "integer",
|
| 177 |
"classes": [
|
| 178 |
+
{
|
| 179 |
+
"name": "verbatim-string",
|
| 180 |
+
"professors": ["verbatim-string"],
|
| 181 |
+
"gpa": "number"
|
| 182 |
+
}
|
| 183 |
],
|
| 184 |
"average_gpa": "number",
|
| 185 |
+
"birth_date": "date-time",
|
| 186 |
"nationality": ["France", "England", "Japan", "USA", "China"],
|
| 187 |
"languages_spoken": [["English", "French", "Japanese", "Mandarin", "Spanish"]]
|
| 188 |
}</code></pre>
|
| 189 |
+
</article>
|
| 190 |
+
</div><!-- /.template-container -->
|
| 191 |
+
<br>
|
| 192 |
+
<strong>You can also provide a description of what you want to extract, use a non-JSON format (e.g. YAML, Pydantic) or even an example of input text. The model will automatically update the template field and generate a compatible JSON template based on our typing system.</strong>
|
| 193 |
+
</section>
|
| 194 |
+
|
| 195 |
+
<br>
|
| 196 |
+
|
| 197 |
+
<section>
|
| 198 |
+
<ul><h4><strong>Model used in this demo:</strong> <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a></h4></ul>
|
| 199 |
+
<i>β οΈ This demo restricts inputs to 10,000 tokens</i>
|
| 200 |
+
</section>
|
| 201 |
</body>
|
| 202 |
</html>
|
| 203 |
""")
|
|
|
|
| 214 |
|
| 215 |
example_data = [
|
| 216 |
[
|
| 217 |
+
"examples/affiche.jpg", # image file
|
| 218 |
"", # no text
|
| 219 |
"""{
|
| 220 |
"movie_name": "verbatim-string",
|