from mixpeek import Mixpeek
mix = Mixpeek(mixpeek_key="API_KEY")
# index our DOC file
mix.extract("file.doc")
# now we have clean DOC data
[
{
"filename": "file.doc",
"content": "This is the content of the doc file".
"embedding": [0.1, 0.2, 0.3, ...]
"metadata": {
"author": "John Doe",
"date": "2022-01-01"
}
}
]
Read the Docs