Skip to content

Commit fe13be1

Browse files
FlimFlammrom1504
andauthored
Updates simple_filter.ipynb (#312)
* Update laion5B_h14 guide -Ensured aria2 download commands use the correct filename and extension -Added warning to check for correct file count when combining metadata (if there are any missing files, it may ruin search results) -Capitalized "Laion5B_H14" folder name in the indices.json (makes it distinct from the index name itself, and aligns with the rest of the guide which assumes the folder name is capitalized) * Updates usage of KnnService /w ClipOptions note: indice_folder (ClipOptions property) appears to be unused by KnnService --------- Co-authored-by: Romain Beaumont <[email protected]>
1 parent 9fe1276 commit fe13be1

File tree

1 file changed

+73
-61
lines changed

1 file changed

+73
-61
lines changed

notebook/simple_filter.ipynb

Lines changed: 73 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -11,34 +11,46 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": 1,
14+
"execution_count": 25,
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
1818
"import os\n",
1919
"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"\"\n",
20-
"from clip_retrieval.clip_back import load_clip_indices, KnnService"
20+
"from clip_retrieval.clip_back import load_clip_indices, KnnService, ClipOptions"
2121
]
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": 2,
25+
"execution_count": 51,
2626
"metadata": {},
2727
"outputs": [
2828
{
2929
"name": "stdout",
3030
"output_type": "stream",
3131
"text": [
32-
"loading clip...\n",
33-
"loading metadata...\n",
34-
"loading indices...\n"
32+
"IO_FLAG_ONDISK_SAME_DIR: updating ondisk filename from combined-indices/merged_index.ivfdata to /home/chris/Documents/clip_client/Laion5B_H14/image.index/merged_index.ivfdata\n"
3533
]
3634
}
3735
],
3836
"source": [
37+
"clip_options = ClipOptions(\n",
38+
" indice_folder = \"currently unused by knn.query()\",\n",
39+
" clip_model = \"open_clip:ViT-H-14\",\n",
40+
" enable_hdf5 = False,\n",
41+
" enable_faiss_memory_mapping = True,\n",
42+
" columns_to_return = [\"url\", \"caption\"],\n",
43+
" reorder_metadata_by_ivf_index = False,\n",
44+
" enable_mclip_option = False,\n",
45+
" use_jit = False,\n",
46+
" use_arrow = True,\n",
47+
" provide_safety_model = False,\n",
48+
" provide_violence_detector = False,\n",
49+
" provide_aesthetic_embeddings = False,\n",
50+
")\n",
3951
"columns = [\"url\", \"caption\"]\n",
40-
"indices_loaded, indices, device, model, preprocess, mclip_model = load_clip_indices(\"/home/rom1504/indices_paths.json\", True, True, columns, False)\n",
41-
"knn_service = KnnService(indices_loaded=indices_loaded, device=device, model=model, preprocess=preprocess, columns_to_return=columns, metadata_is_ordered_by_ivf=False, mclip_model=mclip_model)"
52+
"resources = load_clip_indices(\"/home/rom1504/indices_paths.json\", clip_options)\n",
53+
"knn_service = KnnService(clip_resources=resources)"
4254
]
4355
},
4456
{
@@ -52,16 +64,16 @@
5264
},
5365
{
5466
"cell_type": "code",
55-
"execution_count": 5,
67+
"execution_count": 45,
5668
"metadata": {},
5769
"outputs": [],
5870
"source": [
59-
"results = knn_service.query(text_input=\"watermark\", modality=\"image\", indice_name=\"laion_400m\", num_images=1000, num_result_ids=1000)"
71+
"results = knn_service.query(text_input=\"watermark\", num_images=1000, num_result_ids=1000)"
6072
]
6173
},
6274
{
6375
"cell_type": "code",
64-
"execution_count": 8,
76+
"execution_count": 46,
6577
"metadata": {},
6678
"outputs": [],
6779
"source": [
@@ -71,7 +83,7 @@
7183
},
7284
{
7385
"cell_type": "code",
74-
"execution_count": 9,
86+
"execution_count": 47,
7587
"metadata": {},
7688
"outputs": [
7789
{
@@ -102,95 +114,95 @@
102114
" <tbody>\n",
103115
" <tr>\n",
104116
" <th>0</th>\n",
105-
" <td>https://static2.bigstockphoto.com/thumbs/7/8/3...</td>\n",
106-
" <td>Milch spritzt Sammlung, isolated on white back...</td>\n",
117+
" <td>http://lh3.googleusercontent.com/-q2Q0k45_bPA/...</td>\n",
118+
" <td>Watermark Intellectual Asset Management</td>\n",
107119
" </tr>\n",
108120
" <tr>\n",
109121
" <th>1</th>\n",
110-
" <td>https://t2.ftcdn.net/jpg/00/62/03/77/400_F_620...</td>\n",
111-
" <td>Abstract background</td>\n",
122+
" <td>https://tallncurly.com/wp-content/uploads/2014...</td>\n",
123+
" <td>TNC_feat_watermarks</td>\n",
112124
" </tr>\n",
113125
" <tr>\n",
114126
" <th>2</th>\n",
115-
" <td>https://as2.ftcdn.net/jpg/00/57/88/83/500_F_57...</td>\n",
116-
" <td>Spoed Foto op Canvas Abstract wave Abstract pa...</td>\n",
127+
" <td>https://tallncurly.com/wp-content/uploads/2014...</td>\n",
128+
" <td>TNC_feat_watermarks</td>\n",
117129
" </tr>\n",
118130
" <tr>\n",
119131
" <th>3</th>\n",
120-
" <td>https://image.shutterstock.com/image-photo/sto...</td>\n",
121-
" <td>Marine pattern with stylized blue waves. Cosme...</td>\n",
132+
" <td>https://tallncurly.com/wp-content/uploads/2014...</td>\n",
133+
" <td>TNC_feat_watermarks</td>\n",
122134
" </tr>\n",
123135
" <tr>\n",
124136
" <th>4</th>\n",
125-
" <td>https://thumb7.shutterstock.com/image-photo/st...</td>\n",
126-
" <td>water background - stock photo</td>\n",
137+
" <td>https://www.civiq.com.au/wp-content/uploads/20...</td>\n",
138+
" <td>An icons for WaterMark hygienic features</td>\n",
127139
" </tr>\n",
128140
" <tr>\n",
129141
" <th>...</th>\n",
130142
" <td>...</td>\n",
131143
" <td>...</td>\n",
132144
" </tr>\n",
133145
" <tr>\n",
134-
" <th>995</th>\n",
135-
" <td>https://t1.ftcdn.net/jpg/00/54/99/42/400_F_549...</td>\n",
136-
" <td>Abstract business background</td>\n",
146+
" <th>160</th>\n",
147+
" <td>https://www.aoaophoto.com/images/save-text-wat...</td>\n",
148+
" <td>save text watermark</td>\n",
137149
" </tr>\n",
138150
" <tr>\n",
139-
" <th>996</th>\n",
140-
" <td>https://img3.stockfresh.com/files/sstk/200/550...</td>\n",
141-
" <td>Brickwall as Background for Product Placement ...</td>\n",
151+
" <th>161</th>\n",
152+
" <td>https://magnusveteransfoundation.org/wp-conten...</td>\n",
153+
" <td>watermark design logo</td>\n",
142154
" </tr>\n",
143155
" <tr>\n",
144-
" <th>997</th>\n",
145-
" <td>https://thumb1.shutterstock.com/image-photo/st...</td>\n",
146-
" <td>motorboat and nature - stock vector</td>\n",
156+
" <th>162</th>\n",
157+
" <td>https://iseeocean.com/wp-content/uploads/job-m...</td>\n",
158+
" <td>Watermark Surf House Espinho</td>\n",
147159
" </tr>\n",
148160
" <tr>\n",
149-
" <th>998</th>\n",
150-
" <td>https://thumb1.shutterstock.com/image-photo/st...</td>\n",
151-
" <td>Molecular structure scientific vertical backgr...</td>\n",
161+
" <th>163</th>\n",
162+
" <td>http://a1.mzstatic.com/us/r30/Purple/v4/0d/04/...</td>\n",
163+
" <td>eZy Watermark lite - iTunes App Ranking and Ap...</td>\n",
152164
" </tr>\n",
153165
" <tr>\n",
154-
" <th>999</th>\n",
155-
" <td>https://thumb9.shutterstock.com/image-photo/st...</td>\n",
156-
" <td>Connections raster icon. This flat rounded squ...</td>\n",
166+
" <th>164</th>\n",
167+
" <td>http://www.mdameer.com/Media/Default/_Profiles...</td>\n",
168+
" <td>&lt;strong&gt;How To:&lt;/strong&gt; Image Watermark Filter</td>\n",
157169
" </tr>\n",
158170
" </tbody>\n",
159171
"</table>\n",
160-
"<p>1000 rows × 2 columns</p>\n",
172+
"<p>165 rows × 2 columns</p>\n",
161173
"</div>"
162174
],
163175
"text/plain": [
164176
" url \\\n",
165-
"0 https://static2.bigstockphoto.com/thumbs/7/8/3... \n",
166-
"1 https://t2.ftcdn.net/jpg/00/62/03/77/400_F_620... \n",
167-
"2 https://as2.ftcdn.net/jpg/00/57/88/83/500_F_57... \n",
168-
"3 https://image.shutterstock.com/image-photo/sto... \n",
169-
"4 https://thumb7.shutterstock.com/image-photo/st... \n",
177+
"0 http://lh3.googleusercontent.com/-q2Q0k45_bPA/... \n",
178+
"1 https://tallncurly.com/wp-content/uploads/2014... \n",
179+
"2 https://tallncurly.com/wp-content/uploads/2014... \n",
180+
"3 https://tallncurly.com/wp-content/uploads/2014... \n",
181+
"4 https://www.civiq.com.au/wp-content/uploads/20... \n",
170182
".. ... \n",
171-
"995 https://t1.ftcdn.net/jpg/00/54/99/42/400_F_549... \n",
172-
"996 https://img3.stockfresh.com/files/sstk/200/550... \n",
173-
"997 https://thumb1.shutterstock.com/image-photo/st... \n",
174-
"998 https://thumb1.shutterstock.com/image-photo/st... \n",
175-
"999 https://thumb9.shutterstock.com/image-photo/st... \n",
183+
"160 https://www.aoaophoto.com/images/save-text-wat... \n",
184+
"161 https://magnusveteransfoundation.org/wp-conten... \n",
185+
"162 https://iseeocean.com/wp-content/uploads/job-m... \n",
186+
"163 http://a1.mzstatic.com/us/r30/Purple/v4/0d/04/... \n",
187+
"164 http://www.mdameer.com/Media/Default/_Profiles... \n",
176188
"\n",
177189
" caption \n",
178-
"0 Milch spritzt Sammlung, isolated on white back... \n",
179-
"1 Abstract background \n",
180-
"2 Spoed Foto op Canvas Abstract wave Abstract pa... \n",
181-
"3 Marine pattern with stylized blue waves. Cosme... \n",
182-
"4 water background - stock photo \n",
190+
"0 Watermark Intellectual Asset Management \n",
191+
"1 TNC_feat_watermarks \n",
192+
"2 TNC_feat_watermarks \n",
193+
"3 TNC_feat_watermarks \n",
194+
"4 An icons for WaterMark hygienic features \n",
183195
".. ... \n",
184-
"995 Abstract business background \n",
185-
"996 Brickwall as Background for Product Placement ... \n",
186-
"997 motorboat and nature - stock vector \n",
187-
"998 Molecular structure scientific vertical backgr... \n",
188-
"999 Connections raster icon. This flat rounded squ... \n",
196+
"160 save text watermark \n",
197+
"161 watermark design logo \n",
198+
"162 Watermark Surf House Espinho \n",
199+
"163 eZy Watermark lite - iTunes App Ranking and Ap... \n",
200+
"164 <strong>How To:</strong> Image Watermark Filter \n",
189201
"\n",
190-
"[1000 rows x 2 columns]"
202+
"[165 rows x 2 columns]"
191203
]
192204
},
193-
"execution_count": 9,
205+
"execution_count": 47,
194206
"metadata": {},
195207
"output_type": "execute_result"
196208
}
@@ -725,7 +737,7 @@
725737
"name": "python",
726738
"nbconvert_exporter": "python",
727739
"pygments_lexer": "ipython3",
728-
"version": "3.8.10"
740+
"version": "3.9.17"
729741
},
730742
"orig_nbformat": 4
731743
},

0 commit comments

Comments
 (0)