505 lines
18 KiB
Python
505 lines
18 KiB
Python
# Copyright 2011-2015 MongoDB, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""GridFS implementation for Motor, an asynchronous driver for MongoDB."""
|
|
import hashlib
|
|
import warnings
|
|
|
|
import gridfs
|
|
import pymongo
|
|
import pymongo.errors
|
|
from gridfs import DEFAULT_CHUNK_SIZE, grid_file
|
|
|
|
from motor import docstrings
|
|
from motor.core import AgnosticCollection, AgnosticCursor, AgnosticDatabase
|
|
from motor.metaprogramming import (
|
|
AsyncCommand,
|
|
AsyncRead,
|
|
DelegateMethod,
|
|
ReadOnlyProperty,
|
|
coroutine_annotation,
|
|
create_class_with_framework,
|
|
)
|
|
|
|
|
|
class AgnosticGridOutCursor(AgnosticCursor):
|
|
__motor_class_name__ = "MotorGridOutCursor"
|
|
__delegate_class__ = gridfs.GridOutCursor
|
|
|
|
def next_object(self):
|
|
"""**DEPRECATED** - Get next GridOut object from cursor."""
|
|
# Note: the super() call will raise a warning for the deprecation.
|
|
grid_out = super().next_object()
|
|
if grid_out:
|
|
grid_out_class = create_class_with_framework(
|
|
AgnosticGridOut, self._framework, self.__module__
|
|
)
|
|
|
|
return grid_out_class(self.collection, delegate=grid_out)
|
|
else:
|
|
# Exhausted.
|
|
return None
|
|
|
|
|
|
class MotorGridOutProperty(ReadOnlyProperty):
|
|
"""Creates a readonly attribute on the wrapped PyMongo GridOut."""
|
|
|
|
def create_attribute(self, cls, attr_name):
|
|
def fget(obj):
|
|
if not obj.delegate._file:
|
|
raise pymongo.errors.InvalidOperation(
|
|
"You must call MotorGridOut.open() before accessing "
|
|
"the %s property" % attr_name
|
|
)
|
|
|
|
return getattr(obj.delegate, attr_name)
|
|
|
|
doc = getattr(cls.__delegate_class__, attr_name).__doc__
|
|
return property(fget=fget, doc=doc)
|
|
|
|
|
|
class AgnosticGridOut:
|
|
"""Class to read data out of GridFS.
|
|
|
|
MotorGridOut supports the same attributes as PyMongo's
|
|
:class:`~gridfs.grid_file.GridOut`, such as ``_id``, ``content_type``,
|
|
etc.
|
|
|
|
You don't need to instantiate this class directly - use the
|
|
methods provided by :class:`~motor.MotorGridFSBucket`. If it **is**
|
|
instantiated directly, call :meth:`open`, :meth:`read`, or
|
|
:meth:`readline` before accessing its attributes.
|
|
"""
|
|
|
|
__motor_class_name__ = "MotorGridOut"
|
|
__delegate_class__ = gridfs.GridOut
|
|
|
|
_id = MotorGridOutProperty()
|
|
aliases = MotorGridOutProperty()
|
|
chunk_size = MotorGridOutProperty()
|
|
close = MotorGridOutProperty()
|
|
content_type = MotorGridOutProperty()
|
|
filename = MotorGridOutProperty()
|
|
length = MotorGridOutProperty()
|
|
metadata = MotorGridOutProperty()
|
|
name = MotorGridOutProperty()
|
|
_open = AsyncCommand(attr_name="open")
|
|
read = AsyncRead()
|
|
readable = DelegateMethod()
|
|
readchunk = AsyncRead()
|
|
readline = AsyncRead()
|
|
seek = DelegateMethod()
|
|
seekable = DelegateMethod()
|
|
tell = DelegateMethod()
|
|
upload_date = MotorGridOutProperty()
|
|
write = DelegateMethod()
|
|
|
|
def __init__(
|
|
self, root_collection, file_id=None, file_document=None, delegate=None, session=None
|
|
):
|
|
collection_class = create_class_with_framework(
|
|
AgnosticCollection, self._framework, self.__module__
|
|
)
|
|
|
|
if not isinstance(root_collection, collection_class):
|
|
raise TypeError(
|
|
"First argument to MotorGridOut must be "
|
|
"MotorCollection, not %r" % root_collection
|
|
)
|
|
|
|
if delegate:
|
|
self.delegate = delegate
|
|
else:
|
|
self.delegate = self.__delegate_class__(
|
|
root_collection.delegate, file_id, file_document, session=session
|
|
)
|
|
|
|
self.io_loop = root_collection.get_io_loop()
|
|
|
|
def __aiter__(self):
|
|
return self
|
|
|
|
async def __anext__(self):
|
|
chunk = await self.readchunk()
|
|
if chunk:
|
|
return chunk
|
|
raise StopAsyncIteration()
|
|
|
|
def __getattr__(self, item):
|
|
if not self.delegate._file:
|
|
raise pymongo.errors.InvalidOperation(
|
|
"You must call MotorGridOut.open() before accessing the %s property" % item
|
|
)
|
|
|
|
return getattr(self.delegate, item)
|
|
|
|
@coroutine_annotation
|
|
def open(self):
|
|
"""Retrieve this file's attributes from the server.
|
|
|
|
Returns a Future.
|
|
|
|
.. versionchanged:: 2.0
|
|
No longer accepts a callback argument.
|
|
|
|
.. versionchanged:: 0.2
|
|
:class:`~motor.MotorGridOut` now opens itself on demand, calling
|
|
``open`` explicitly is rarely needed.
|
|
"""
|
|
return self._framework.chain_return_value(self._open(), self.get_io_loop(), self)
|
|
|
|
def get_io_loop(self):
|
|
return self.io_loop
|
|
|
|
async def stream_to_handler(self, request_handler):
|
|
"""Write the contents of this file to a
|
|
:class:`tornado.web.RequestHandler`. This method calls
|
|
:meth:`~tornado.web.RequestHandler.flush` on
|
|
the RequestHandler, so ensure all headers have already been set.
|
|
For a more complete example see the implementation of
|
|
:class:`~motor.web.GridFSHandler`.
|
|
|
|
.. code-block:: python
|
|
|
|
class FileHandler(tornado.web.RequestHandler):
|
|
@tornado.web.asynchronous
|
|
@gen.coroutine
|
|
def get(self, filename):
|
|
db = self.settings["db"]
|
|
fs = await motor.MotorGridFSBucket(db())
|
|
try:
|
|
gridout = await fs.open_download_stream_by_name(filename)
|
|
except gridfs.NoFile:
|
|
raise tornado.web.HTTPError(404)
|
|
|
|
self.set_header("Content-Type", gridout.content_type)
|
|
self.set_header("Content-Length", gridout.length)
|
|
await gridout.stream_to_handler(self)
|
|
self.finish()
|
|
|
|
.. seealso:: Tornado `RequestHandler <http://tornadoweb.org/en/stable/web.html#request-handlers>`_
|
|
"""
|
|
written = 0
|
|
while written < self.length:
|
|
# Reading chunk_size at a time minimizes buffering.
|
|
chunk = await self.read(self.chunk_size)
|
|
|
|
# write() simply appends the output to a list; flush() sends it
|
|
# over the network and minimizes buffering in the handler.
|
|
request_handler.write(chunk)
|
|
request_handler.flush()
|
|
written += len(chunk)
|
|
|
|
|
|
class AgnosticGridIn:
|
|
__motor_class_name__ = "MotorGridIn"
|
|
__delegate_class__ = gridfs.GridIn
|
|
|
|
__getattr__ = DelegateMethod()
|
|
_id = ReadOnlyProperty()
|
|
abort = AsyncCommand()
|
|
chunk_size = ReadOnlyProperty()
|
|
closed = ReadOnlyProperty()
|
|
close = AsyncCommand()
|
|
content_type = ReadOnlyProperty()
|
|
filename = ReadOnlyProperty()
|
|
length = ReadOnlyProperty()
|
|
name = ReadOnlyProperty()
|
|
read = DelegateMethod()
|
|
readable = DelegateMethod()
|
|
seekable = DelegateMethod()
|
|
upload_date = ReadOnlyProperty()
|
|
write = AsyncCommand().unwrap("MotorGridOut")
|
|
writeable = DelegateMethod()
|
|
writelines = AsyncCommand().unwrap("MotorGridOut")
|
|
_exit = AsyncCommand("__exit__")
|
|
set = AsyncCommand(
|
|
attr_name="__setattr__",
|
|
doc="""
|
|
Set an arbitrary metadata attribute on the file. Stores value on the server
|
|
as a key-value pair within the file document once the file is closed. If
|
|
the file is already closed, calling :meth:`set` will immediately update the file
|
|
document on the server.
|
|
|
|
Metadata set on the file appears as attributes on a
|
|
:class:`~motor.MotorGridOut` object created from the file.
|
|
|
|
:Parameters:
|
|
- `name`: Name of the attribute, will be stored as a key in the file
|
|
document on the server
|
|
- `value`: Value of the attribute
|
|
""",
|
|
)
|
|
|
|
def __init__(self, root_collection, delegate=None, session=None, **kwargs):
|
|
"""
|
|
Class to write data to GridFS. Application developers should not
|
|
generally need to instantiate this class - see
|
|
:meth:`~motor.MotorGridFSBucket.open_upload_stream`.
|
|
|
|
Any of the file level options specified in the `GridFS Spec
|
|
<http://dochub.mongodb.org/core/gridfs/>`_ may be passed as
|
|
keyword arguments. Any additional keyword arguments will be
|
|
set as additional fields on the file document. Valid keyword
|
|
arguments include:
|
|
|
|
- ``"_id"``: unique ID for this file (default:
|
|
:class:`~bson.objectid.ObjectId`) - this ``"_id"`` must
|
|
not have already been used for another file
|
|
|
|
- ``"filename"``: human name for the file
|
|
|
|
- ``"contentType"`` or ``"content_type"``: valid mime-type
|
|
for the file
|
|
|
|
- ``"chunkSize"`` or ``"chunk_size"``: size of each of the
|
|
chunks, in bytes (default: 256 kb)
|
|
|
|
- ``"encoding"``: encoding used for this file. In Python 2,
|
|
any :class:`unicode` that is written to the file will be
|
|
converted to a :class:`str`. In Python 3, any :class:`str`
|
|
that is written to the file will be converted to
|
|
:class:`bytes`.
|
|
|
|
:Parameters:
|
|
- `root_collection`: root collection to write to
|
|
- `session` (optional): a
|
|
:class:`~pymongo.client_session.ClientSession` to use for all
|
|
commands
|
|
- `**kwargs` (optional): file level options (see above)
|
|
|
|
.. versionchanged:: 3.0
|
|
Removed support for the `disable_md5` parameter (to match the
|
|
GridIn class in PyMongo).
|
|
.. versionchanged:: 0.2
|
|
``open`` method removed, no longer needed.
|
|
"""
|
|
collection_class = create_class_with_framework(
|
|
AgnosticCollection, self._framework, self.__module__
|
|
)
|
|
|
|
if not isinstance(root_collection, collection_class):
|
|
raise TypeError(
|
|
"First argument to MotorGridIn must be MotorCollection, not %r" % root_collection
|
|
)
|
|
|
|
self.io_loop = root_collection.get_io_loop()
|
|
# Short cut.
|
|
self.delegate = delegate or self.__delegate_class__(
|
|
root_collection.delegate, session=session, **kwargs
|
|
)
|
|
|
|
# Support "async with bucket.open_upload_stream() as f:"
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
await self._exit(exc_type, exc_val, exc_tb)
|
|
|
|
def get_io_loop(self):
|
|
return self.io_loop
|
|
|
|
|
|
class AgnosticGridFSBucket:
|
|
__motor_class_name__ = "MotorGridFSBucket"
|
|
__delegate_class__ = gridfs.GridFSBucket
|
|
|
|
delete = AsyncCommand(doc=docstrings.gridfs_delete_doc)
|
|
download_to_stream = AsyncCommand(doc=docstrings.gridfs_download_to_stream_doc)
|
|
download_to_stream_by_name = AsyncCommand(doc=docstrings.gridfs_download_to_stream_by_name_doc)
|
|
open_download_stream = AsyncCommand(doc=docstrings.gridfs_open_download_stream_doc).wrap(
|
|
gridfs.GridOut
|
|
)
|
|
open_download_stream_by_name = AsyncCommand(
|
|
doc=docstrings.gridfs_open_download_stream_by_name_doc
|
|
).wrap(gridfs.GridOut)
|
|
open_upload_stream = DelegateMethod(doc=docstrings.gridfs_open_upload_stream_doc).wrap(
|
|
gridfs.GridIn
|
|
)
|
|
open_upload_stream_with_id = DelegateMethod(
|
|
doc=docstrings.gridfs_open_upload_stream_with_id_doc
|
|
).wrap(gridfs.GridIn)
|
|
rename = AsyncCommand(doc=docstrings.gridfs_rename_doc)
|
|
upload_from_stream = AsyncCommand(doc=docstrings.gridfs_upload_from_stream_doc)
|
|
upload_from_stream_with_id = AsyncCommand(doc=docstrings.gridfs_upload_from_stream_with_id_doc)
|
|
|
|
def __init__(
|
|
self,
|
|
database,
|
|
bucket_name="fs",
|
|
chunk_size_bytes=DEFAULT_CHUNK_SIZE,
|
|
write_concern=None,
|
|
read_preference=None,
|
|
collection=None,
|
|
):
|
|
"""Create a handle to a GridFS bucket.
|
|
|
|
Raises :exc:`~pymongo.errors.ConfigurationError` if `write_concern`
|
|
is not acknowledged.
|
|
|
|
This class conforms to the `GridFS API Spec
|
|
<https://github.com/mongodb/specifications/blob/master/source/gridfs/gridfs-spec.rst>`_
|
|
for MongoDB drivers.
|
|
|
|
:Parameters:
|
|
- `database`: database to use.
|
|
- `bucket_name` (optional): The name of the bucket. Defaults to 'fs'.
|
|
- `chunk_size_bytes` (optional): The chunk size in bytes. Defaults
|
|
to 255KB.
|
|
- `write_concern` (optional): The
|
|
:class:`~pymongo.write_concern.WriteConcern` to use. If ``None``
|
|
(the default) db.write_concern is used.
|
|
- `read_preference` (optional): The read preference to use. If
|
|
``None`` (the default) db.read_preference is used.
|
|
- `collection` (optional): Deprecated, an alias for `bucket_name`
|
|
that exists solely to provide backwards compatibility.
|
|
|
|
.. versionchanged:: 3.0
|
|
Removed support for the `disable_md5` parameter (to match the
|
|
GridFSBucket class in PyMongo).
|
|
.. versionchanged:: 2.1
|
|
Added support for the `bucket_name`, `chunk_size_bytes`,
|
|
`write_concern`, and `read_preference` parameters.
|
|
Deprecated the `collection` parameter which is now an alias to
|
|
`bucket_name` (to match the GridFSBucket class in PyMongo).
|
|
.. versionadded:: 1.0
|
|
|
|
.. mongodoc:: gridfs
|
|
"""
|
|
# Preserve backwards compatibility of "collection" parameter
|
|
if collection is not None:
|
|
warnings.warn(
|
|
'the "collection" parameter is deprecated, use "bucket_name" instead',
|
|
DeprecationWarning,
|
|
stacklevel=2,
|
|
)
|
|
bucket_name = collection
|
|
|
|
db_class = create_class_with_framework(AgnosticDatabase, self._framework, self.__module__)
|
|
|
|
if not isinstance(database, db_class):
|
|
raise TypeError(
|
|
f"First argument to {self.__class__} must be MotorDatabase, not {database!r}"
|
|
)
|
|
|
|
self.io_loop = database.get_io_loop()
|
|
self.collection = database.get_collection(
|
|
bucket_name, write_concern=write_concern, read_preference=read_preference
|
|
)
|
|
self.delegate = self.__delegate_class__(
|
|
database.delegate,
|
|
bucket_name,
|
|
chunk_size_bytes=chunk_size_bytes,
|
|
write_concern=write_concern,
|
|
read_preference=read_preference,
|
|
)
|
|
|
|
def get_io_loop(self):
|
|
return self.io_loop
|
|
|
|
def wrap(self, obj):
|
|
if obj.__class__ is grid_file.GridIn:
|
|
grid_in_class = create_class_with_framework(
|
|
AgnosticGridIn, self._framework, self.__module__
|
|
)
|
|
|
|
return grid_in_class(root_collection=self.collection, delegate=obj)
|
|
|
|
elif obj.__class__ is grid_file.GridOut:
|
|
grid_out_class = create_class_with_framework(
|
|
AgnosticGridOut, self._framework, self.__module__
|
|
)
|
|
|
|
return grid_out_class(root_collection=self.collection, delegate=obj)
|
|
|
|
elif obj.__class__ is gridfs.GridOutCursor:
|
|
grid_out_class = create_class_with_framework(
|
|
AgnosticGridOutCursor, self._framework, self.__module__
|
|
)
|
|
|
|
return grid_out_class(cursor=obj, collection=self.collection)
|
|
|
|
def find(self, *args, **kwargs):
|
|
"""Find and return the files collection documents that match ``filter``.
|
|
|
|
Returns a cursor that iterates across files matching
|
|
arbitrary queries on the files collection. Can be combined
|
|
with other modifiers for additional control.
|
|
|
|
For example::
|
|
|
|
cursor = bucket.find({"filename": "lisa.txt"}, no_cursor_timeout=True)
|
|
while (await cursor.fetch_next):
|
|
grid_out = cursor.next_object()
|
|
data = await grid_out.read()
|
|
|
|
This iterates through all versions of "lisa.txt" stored in GridFS.
|
|
Note that setting no_cursor_timeout to True may be important to
|
|
prevent the cursor from timing out during long multi-file processing
|
|
work.
|
|
|
|
As another example, the call::
|
|
|
|
most_recent_three = fs.find().sort("uploadDate", -1).limit(3)
|
|
|
|
would return a cursor to the three most recently uploaded files
|
|
in GridFS.
|
|
|
|
Follows a similar interface to
|
|
:meth:`~motor.MotorCollection.find`
|
|
in :class:`~motor.MotorCollection`.
|
|
|
|
:Parameters:
|
|
- `filter`: Search query.
|
|
- `batch_size` (optional): The number of documents to return per
|
|
batch.
|
|
- `limit` (optional): The maximum number of documents to return.
|
|
- `no_cursor_timeout` (optional): The server normally times out idle
|
|
cursors after an inactivity period (10 minutes) to prevent excess
|
|
memory use. Set this option to True prevent that.
|
|
- `skip` (optional): The number of documents to skip before
|
|
returning.
|
|
- `sort` (optional): The order by which to sort results. Defaults to
|
|
None.
|
|
- `session` (optional): a
|
|
:class:`~pymongo.client_session.ClientSession`, created with
|
|
:meth:`~MotorClient.start_session`.
|
|
|
|
If a :class:`~pymongo.client_session.ClientSession` is passed to
|
|
:meth:`find`, all returned :class:`MotorGridOut` instances
|
|
are associated with that session.
|
|
|
|
.. versionchanged:: 1.2
|
|
Added session parameter.
|
|
"""
|
|
cursor = self.delegate.find(*args, **kwargs)
|
|
grid_out_cursor = create_class_with_framework(
|
|
AgnosticGridOutCursor, self._framework, self.__module__
|
|
)
|
|
|
|
return grid_out_cursor(cursor, self.collection)
|
|
|
|
|
|
def _hash_gridout(gridout):
|
|
"""Compute the effective hash of a GridOut object for use with an Etag header.
|
|
|
|
Create a FIPS-compliant Etag HTTP header hash using sha256
|
|
We use the _id + length + upload_date as a proxy for
|
|
uniqueness to avoid reading the entire file.
|
|
"""
|
|
grid_hash = hashlib.sha256(str(gridout._id).encode("utf8"))
|
|
grid_hash.update(str(gridout.length).encode("utf8"))
|
|
grid_hash.update(str(gridout.upload_date).encode("utf8"))
|
|
return grid_hash.hexdigest()
|