from mixpeek import Mixpeek
mix = Mixpeek(mixpeek_key="API_KEY")
# index our PDF file
mix.extract("file.pdf")
# now we have clean PDF data
[
{
"filename": "file.pdf",
"content": "This is the content of the pdf file".
"embedding": [0.1, 0.2, 0.3, ...]
"metadata": {
"author": "John Doe",
"date": "2022-01-01"
}
}
]
Read the Docs