An open API service indexing awesome lists of open source software.

https://github.com/mongodb-developer/vectorsearch-videotext

Learn how to search video within text using SDDB and Vector embeddings
https://github.com/mongodb-developer/vectorsearch-videotext

Last synced: 2 months ago
JSON representation

Learn how to search video within text using SDDB and Vector embeddings

Awesome Lists containing this project

README

        

# VectorSearch-VideoText 🍿 🎥
Learn how to search video within text using SDDB and Vector embeddings

lets go!!
```
pip3 install superduperdb
pip3 install opencv-python
pip3 install git+https://github.com/openai/CLIP.git
```

```
import clip
from IPython.display import *
from PIL import Image
import torch

from superduperdb import CFG
from superduperdb.ext.pillow import pil_image
from superduperdb.base.document import Document as D
from superduperdb import Model, Schema
from superduperdb.backends.mongodb.query import Collection
from superduperdb.ext.torch import tensor, TorchModel
```

lets Make the database superduper!

```
import os

# Uncomment one of the following lines to use a bespoke MongoDB deployment
# For testing the default connection is to mongomock

mongodb_uri = os.getenv("MONGODB_URI","mongomock://test")
# mongodb_uri = "mongodb://localhost:27017"
# mongodb_uri = "mongodb://superduper:superduper@mongodb:27017/documents"
# mongodb_uri = "mongodb://:@/"
# mongodb_uri = "mongodb+srv://:@/"

CFG.downloads.hybrid = True
CFG.downloads.root = './'

# Super-Duper your Database!
from superduperdb import superduper
db = superduper(mongodb_uri)
```

```
from superduperdb import Encoder

vid_enc = Encoder(
identifier='video_on_file',
load_hybrid=False,
)

db.add(vid_enc)
```

Let's get a sample video from the net

```
db.execute(
Collection('videos')
.insert_one(
D({'video': vid_enc(uri='https://superduperdb-public.s3.eu-west-1.amazonaws.com/animals_excerpt.mp4')})
)
)
```

```
list(db.execute(Collection('videos').find()))
)
```

```
import cv2
import tqdm

def video2images(video_file):
sample_freq = 10
cap = cv2.VideoCapture(video_file)

frame_count = 0

fps = cap.get(cv2.CAP_PROP_FPS)
print(fps)
extracted_frames = []
progress = tqdm.tqdm()

while True:
ret, frame = cap.read()
if not ret:
break
current_timestamp = frame_count // fps

if frame_count % sample_freq == 0:
extracted_frames.append({
'image': Image.fromarray(frame[:,:,::-1]),
'current_timestamp': current_timestamp,
})
frame_count += 1
progress.update(1)

cap.release()
cv2.destroyAllWindows()
return extracted_frames
```

Create a Listener which will continously download video urls and save best frames into other collection.

from superduperdb import Listener

```
video2images = Model(
identifier='video2images',
object=video2images,
flatten=True,
model_update_kwargs={'document_embedded': False},
output_schema=Schema(identifier='myschema', fields={'image': pil_image})
)

db.add(
Listener(
model=video2images,
select=Collection('videos').find(),
key='video',
)
)
```

```
db.execute(Collection('_outputs.video.video2images').find_one()).unpack()['_outputs']['video']['video2images']['image']
```

```
model, preprocess = clip.load("RN50", device='cpu')
t = tensor(torch.float, shape=(1024,))

visual_model = TorchModel(
identifier='clip_image',
preprocess=preprocess,
object=model.visual,
encoder=t,
)
text_model = TorchModel(
identifier='clip_text',
object=model,
preprocess=lambda x: clip.tokenize(x)[0],
forward_method='encode_text',
encoder=t,
device='cpu',
preferred_devices=None
)
```

Create VectorIndex with an indexing and compatible listener

```
from superduperdb import Listener, VectorIndex
from superduperdb.backends.mongodb import Collection

db.add(
VectorIndex(
identifier='video_search_index',
indexing_listener=Listener(
model=visual_model,
key='_outputs.video.video2images.image',
select=Collection('_outputs.video.video2images').find(),
),
compatible_listener=Listener(
model=text_model,
key='text',
select=None,
active=False
)
)
)
```

Now lets Test vector search by quering a text against saved frames.

Search for something that may have happened during the video:

```
search_phrase = 'An elephant'

r = next(db.execute(
Collection('_outputs.video.video2images').like(D({'text': 'An elephant'}), vector_index='video_search_index', n=1).find()
))

search_timestamp = r['_outputs']['video']['video2images']['current_timestamp']
```

Get the back-reference to the original video document:

```
video = db.execute(Collection('videos').find_one({'_id': r['_source']}))
```

Start the video from the resultant timestamp:

```
from IPython.display import display, HTML
video_html = f"""

var video = document.querySelector('video');
video.currentTime = {search_timestamp};
video.play();

```

```
display(HTML(video_html))
```