# Copyright 2011-2015 MongoDB, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """GridFS implementation for Motor, an asynchronous driver for MongoDB.""" import hashlib import warnings import gridfs import pymongo import pymongo.errors from gridfs import DEFAULT_CHUNK_SIZE, grid_file from motor import docstrings from motor.core import AgnosticCollection, AgnosticCursor, AgnosticDatabase from motor.metaprogramming import ( AsyncCommand, AsyncRead, DelegateMethod, ReadOnlyProperty, coroutine_annotation, create_class_with_framework, ) class AgnosticGridOutCursor(AgnosticCursor): __motor_class_name__ = "MotorGridOutCursor" __delegate_class__ = gridfs.GridOutCursor def next_object(self): """**DEPRECATED** - Get next GridOut object from cursor.""" # Note: the super() call will raise a warning for the deprecation. grid_out = super().next_object() if grid_out: grid_out_class = create_class_with_framework( AgnosticGridOut, self._framework, self.__module__ ) return grid_out_class(self.collection, delegate=grid_out) else: # Exhausted. return None class MotorGridOutProperty(ReadOnlyProperty): """Creates a readonly attribute on the wrapped PyMongo GridOut.""" def create_attribute(self, cls, attr_name): def fget(obj): if not obj.delegate._file: raise pymongo.errors.InvalidOperation( "You must call MotorGridOut.open() before accessing " "the %s property" % attr_name ) return getattr(obj.delegate, attr_name) doc = getattr(cls.__delegate_class__, attr_name).__doc__ return property(fget=fget, doc=doc) class AgnosticGridOut: """Class to read data out of GridFS. MotorGridOut supports the same attributes as PyMongo's :class:`~gridfs.grid_file.GridOut`, such as ``_id``, ``content_type``, etc. You don't need to instantiate this class directly - use the methods provided by :class:`~motor.MotorGridFSBucket`. If it **is** instantiated directly, call :meth:`open`, :meth:`read`, or :meth:`readline` before accessing its attributes. """ __motor_class_name__ = "MotorGridOut" __delegate_class__ = gridfs.GridOut _id = MotorGridOutProperty() aliases = MotorGridOutProperty() chunk_size = MotorGridOutProperty() close = MotorGridOutProperty() content_type = MotorGridOutProperty() filename = MotorGridOutProperty() length = MotorGridOutProperty() metadata = MotorGridOutProperty() name = MotorGridOutProperty() _open = AsyncCommand(attr_name="open") read = AsyncRead() readable = DelegateMethod() readchunk = AsyncRead() readline = AsyncRead() seek = DelegateMethod() seekable = DelegateMethod() tell = DelegateMethod() upload_date = MotorGridOutProperty() write = DelegateMethod() def __init__( self, root_collection, file_id=None, file_document=None, delegate=None, session=None ): collection_class = create_class_with_framework( AgnosticCollection, self._framework, self.__module__ ) if not isinstance(root_collection, collection_class): raise TypeError( "First argument to MotorGridOut must be " "MotorCollection, not %r" % root_collection ) if delegate: self.delegate = delegate else: self.delegate = self.__delegate_class__( root_collection.delegate, file_id, file_document, session=session ) self.io_loop = root_collection.get_io_loop() def __aiter__(self): return self async def __anext__(self): chunk = await self.readchunk() if chunk: return chunk raise StopAsyncIteration() def __getattr__(self, item): if not self.delegate._file: raise pymongo.errors.InvalidOperation( "You must call MotorGridOut.open() before accessing the %s property" % item ) return getattr(self.delegate, item) @coroutine_annotation def open(self): """Retrieve this file's attributes from the server. Returns a Future. .. versionchanged:: 2.0 No longer accepts a callback argument. .. versionchanged:: 0.2 :class:`~motor.MotorGridOut` now opens itself on demand, calling ``open`` explicitly is rarely needed. """ return self._framework.chain_return_value(self._open(), self.get_io_loop(), self) def get_io_loop(self): return self.io_loop async def stream_to_handler(self, request_handler): """Write the contents of this file to a :class:`tornado.web.RequestHandler`. This method calls :meth:`~tornado.web.RequestHandler.flush` on the RequestHandler, so ensure all headers have already been set. For a more complete example see the implementation of :class:`~motor.web.GridFSHandler`. .. code-block:: python class FileHandler(tornado.web.RequestHandler): @tornado.web.asynchronous @gen.coroutine def get(self, filename): db = self.settings["db"] fs = await motor.MotorGridFSBucket(db()) try: gridout = await fs.open_download_stream_by_name(filename) except gridfs.NoFile: raise tornado.web.HTTPError(404) self.set_header("Content-Type", gridout.content_type) self.set_header("Content-Length", gridout.length) await gridout.stream_to_handler(self) self.finish() .. seealso:: Tornado `RequestHandler `_ """ written = 0 while written < self.length: # Reading chunk_size at a time minimizes buffering. chunk = await self.read(self.chunk_size) # write() simply appends the output to a list; flush() sends it # over the network and minimizes buffering in the handler. request_handler.write(chunk) request_handler.flush() written += len(chunk) class AgnosticGridIn: __motor_class_name__ = "MotorGridIn" __delegate_class__ = gridfs.GridIn __getattr__ = DelegateMethod() _id = ReadOnlyProperty() abort = AsyncCommand() chunk_size = ReadOnlyProperty() closed = ReadOnlyProperty() close = AsyncCommand() content_type = ReadOnlyProperty() filename = ReadOnlyProperty() length = ReadOnlyProperty() name = ReadOnlyProperty() read = DelegateMethod() readable = DelegateMethod() seekable = DelegateMethod() upload_date = ReadOnlyProperty() write = AsyncCommand().unwrap("MotorGridOut") writeable = DelegateMethod() writelines = AsyncCommand().unwrap("MotorGridOut") _exit = AsyncCommand("__exit__") set = AsyncCommand( attr_name="__setattr__", doc=""" Set an arbitrary metadata attribute on the file. Stores value on the server as a key-value pair within the file document once the file is closed. If the file is already closed, calling :meth:`set` will immediately update the file document on the server. Metadata set on the file appears as attributes on a :class:`~motor.MotorGridOut` object created from the file. :Parameters: - `name`: Name of the attribute, will be stored as a key in the file document on the server - `value`: Value of the attribute """, ) def __init__(self, root_collection, delegate=None, session=None, **kwargs): """ Class to write data to GridFS. Application developers should not generally need to instantiate this class - see :meth:`~motor.MotorGridFSBucket.open_upload_stream`. Any of the file level options specified in the `GridFS Spec `_ may be passed as keyword arguments. Any additional keyword arguments will be set as additional fields on the file document. Valid keyword arguments include: - ``"_id"``: unique ID for this file (default: :class:`~bson.objectid.ObjectId`) - this ``"_id"`` must not have already been used for another file - ``"filename"``: human name for the file - ``"contentType"`` or ``"content_type"``: valid mime-type for the file - ``"chunkSize"`` or ``"chunk_size"``: size of each of the chunks, in bytes (default: 256 kb) - ``"encoding"``: encoding used for this file. In Python 2, any :class:`unicode` that is written to the file will be converted to a :class:`str`. In Python 3, any :class:`str` that is written to the file will be converted to :class:`bytes`. :Parameters: - `root_collection`: root collection to write to - `session` (optional): a :class:`~pymongo.client_session.ClientSession` to use for all commands - `**kwargs` (optional): file level options (see above) .. versionchanged:: 3.0 Removed support for the `disable_md5` parameter (to match the GridIn class in PyMongo). .. versionchanged:: 0.2 ``open`` method removed, no longer needed. """ collection_class = create_class_with_framework( AgnosticCollection, self._framework, self.__module__ ) if not isinstance(root_collection, collection_class): raise TypeError( "First argument to MotorGridIn must be MotorCollection, not %r" % root_collection ) self.io_loop = root_collection.get_io_loop() # Short cut. self.delegate = delegate or self.__delegate_class__( root_collection.delegate, session=session, **kwargs ) # Support "async with bucket.open_upload_stream() as f:" async def __aenter__(self): return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self._exit(exc_type, exc_val, exc_tb) def get_io_loop(self): return self.io_loop class AgnosticGridFSBucket: __motor_class_name__ = "MotorGridFSBucket" __delegate_class__ = gridfs.GridFSBucket delete = AsyncCommand(doc=docstrings.gridfs_delete_doc) download_to_stream = AsyncCommand(doc=docstrings.gridfs_download_to_stream_doc) download_to_stream_by_name = AsyncCommand(doc=docstrings.gridfs_download_to_stream_by_name_doc) open_download_stream = AsyncCommand(doc=docstrings.gridfs_open_download_stream_doc).wrap( gridfs.GridOut ) open_download_stream_by_name = AsyncCommand( doc=docstrings.gridfs_open_download_stream_by_name_doc ).wrap(gridfs.GridOut) open_upload_stream = DelegateMethod(doc=docstrings.gridfs_open_upload_stream_doc).wrap( gridfs.GridIn ) open_upload_stream_with_id = DelegateMethod( doc=docstrings.gridfs_open_upload_stream_with_id_doc ).wrap(gridfs.GridIn) rename = AsyncCommand(doc=docstrings.gridfs_rename_doc) upload_from_stream = AsyncCommand(doc=docstrings.gridfs_upload_from_stream_doc) upload_from_stream_with_id = AsyncCommand(doc=docstrings.gridfs_upload_from_stream_with_id_doc) def __init__( self, database, bucket_name="fs", chunk_size_bytes=DEFAULT_CHUNK_SIZE, write_concern=None, read_preference=None, collection=None, ): """Create a handle to a GridFS bucket. Raises :exc:`~pymongo.errors.ConfigurationError` if `write_concern` is not acknowledged. This class conforms to the `GridFS API Spec `_ for MongoDB drivers. :Parameters: - `database`: database to use. - `bucket_name` (optional): The name of the bucket. Defaults to 'fs'. - `chunk_size_bytes` (optional): The chunk size in bytes. Defaults to 255KB. - `write_concern` (optional): The :class:`~pymongo.write_concern.WriteConcern` to use. If ``None`` (the default) db.write_concern is used. - `read_preference` (optional): The read preference to use. If ``None`` (the default) db.read_preference is used. - `collection` (optional): Deprecated, an alias for `bucket_name` that exists solely to provide backwards compatibility. .. versionchanged:: 3.0 Removed support for the `disable_md5` parameter (to match the GridFSBucket class in PyMongo). .. versionchanged:: 2.1 Added support for the `bucket_name`, `chunk_size_bytes`, `write_concern`, and `read_preference` parameters. Deprecated the `collection` parameter which is now an alias to `bucket_name` (to match the GridFSBucket class in PyMongo). .. versionadded:: 1.0 .. mongodoc:: gridfs """ # Preserve backwards compatibility of "collection" parameter if collection is not None: warnings.warn( 'the "collection" parameter is deprecated, use "bucket_name" instead', DeprecationWarning, stacklevel=2, ) bucket_name = collection db_class = create_class_with_framework(AgnosticDatabase, self._framework, self.__module__) if not isinstance(database, db_class): raise TypeError( f"First argument to {self.__class__} must be MotorDatabase, not {database!r}" ) self.io_loop = database.get_io_loop() self.collection = database.get_collection( bucket_name, write_concern=write_concern, read_preference=read_preference ) self.delegate = self.__delegate_class__( database.delegate, bucket_name, chunk_size_bytes=chunk_size_bytes, write_concern=write_concern, read_preference=read_preference, ) def get_io_loop(self): return self.io_loop def wrap(self, obj): if obj.__class__ is grid_file.GridIn: grid_in_class = create_class_with_framework( AgnosticGridIn, self._framework, self.__module__ ) return grid_in_class(root_collection=self.collection, delegate=obj) elif obj.__class__ is grid_file.GridOut: grid_out_class = create_class_with_framework( AgnosticGridOut, self._framework, self.__module__ ) return grid_out_class(root_collection=self.collection, delegate=obj) elif obj.__class__ is gridfs.GridOutCursor: grid_out_class = create_class_with_framework( AgnosticGridOutCursor, self._framework, self.__module__ ) return grid_out_class(cursor=obj, collection=self.collection) def find(self, *args, **kwargs): """Find and return the files collection documents that match ``filter``. Returns a cursor that iterates across files matching arbitrary queries on the files collection. Can be combined with other modifiers for additional control. For example:: cursor = bucket.find({"filename": "lisa.txt"}, no_cursor_timeout=True) while (await cursor.fetch_next): grid_out = cursor.next_object() data = await grid_out.read() This iterates through all versions of "lisa.txt" stored in GridFS. Note that setting no_cursor_timeout to True may be important to prevent the cursor from timing out during long multi-file processing work. As another example, the call:: most_recent_three = fs.find().sort("uploadDate", -1).limit(3) would return a cursor to the three most recently uploaded files in GridFS. Follows a similar interface to :meth:`~motor.MotorCollection.find` in :class:`~motor.MotorCollection`. :Parameters: - `filter`: Search query. - `batch_size` (optional): The number of documents to return per batch. - `limit` (optional): The maximum number of documents to return. - `no_cursor_timeout` (optional): The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use. Set this option to True prevent that. - `skip` (optional): The number of documents to skip before returning. - `sort` (optional): The order by which to sort results. Defaults to None. - `session` (optional): a :class:`~pymongo.client_session.ClientSession`, created with :meth:`~MotorClient.start_session`. If a :class:`~pymongo.client_session.ClientSession` is passed to :meth:`find`, all returned :class:`MotorGridOut` instances are associated with that session. .. versionchanged:: 1.2 Added session parameter. """ cursor = self.delegate.find(*args, **kwargs) grid_out_cursor = create_class_with_framework( AgnosticGridOutCursor, self._framework, self.__module__ ) return grid_out_cursor(cursor, self.collection) def _hash_gridout(gridout): """Compute the effective hash of a GridOut object for use with an Etag header. Create a FIPS-compliant Etag HTTP header hash using sha256 We use the _id + length + upload_date as a proxy for uniqueness to avoid reading the entire file. """ grid_hash = hashlib.sha256(str(gridout._id).encode("utf8")) grid_hash.update(str(gridout.length).encode("utf8")) grid_hash.update(str(gridout.upload_date).encode("utf8")) return grid_hash.hexdigest()