queues/venv/lib/python3.11/site-packages/motor/motor_gridfs.py
Egor Matveev 6c6a549aff
All checks were successful
Deploy Prod / Build (pull_request) Successful in 9s
Deploy Prod / Push (pull_request) Successful in 12s
Deploy Prod / Deploy prod (pull_request) Successful in 10s
fix
2024-12-28 22:48:16 +03:00

505 lines
18 KiB
Python

# Copyright 2011-2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""GridFS implementation for Motor, an asynchronous driver for MongoDB."""
import hashlib
import warnings
import gridfs
import pymongo
import pymongo.errors
from gridfs import DEFAULT_CHUNK_SIZE, grid_file
from motor import docstrings
from motor.core import AgnosticCollection, AgnosticCursor, AgnosticDatabase
from motor.metaprogramming import (
AsyncCommand,
AsyncRead,
DelegateMethod,
ReadOnlyProperty,
coroutine_annotation,
create_class_with_framework,
)
class AgnosticGridOutCursor(AgnosticCursor):
__motor_class_name__ = "MotorGridOutCursor"
__delegate_class__ = gridfs.GridOutCursor
def next_object(self):
"""**DEPRECATED** - Get next GridOut object from cursor."""
# Note: the super() call will raise a warning for the deprecation.
grid_out = super().next_object()
if grid_out:
grid_out_class = create_class_with_framework(
AgnosticGridOut, self._framework, self.__module__
)
return grid_out_class(self.collection, delegate=grid_out)
else:
# Exhausted.
return None
class MotorGridOutProperty(ReadOnlyProperty):
"""Creates a readonly attribute on the wrapped PyMongo GridOut."""
def create_attribute(self, cls, attr_name):
def fget(obj):
if not obj.delegate._file:
raise pymongo.errors.InvalidOperation(
"You must call MotorGridOut.open() before accessing "
"the %s property" % attr_name
)
return getattr(obj.delegate, attr_name)
doc = getattr(cls.__delegate_class__, attr_name).__doc__
return property(fget=fget, doc=doc)
class AgnosticGridOut:
"""Class to read data out of GridFS.
MotorGridOut supports the same attributes as PyMongo's
:class:`~gridfs.grid_file.GridOut`, such as ``_id``, ``content_type``,
etc.
You don't need to instantiate this class directly - use the
methods provided by :class:`~motor.MotorGridFSBucket`. If it **is**
instantiated directly, call :meth:`open`, :meth:`read`, or
:meth:`readline` before accessing its attributes.
"""
__motor_class_name__ = "MotorGridOut"
__delegate_class__ = gridfs.GridOut
_id = MotorGridOutProperty()
aliases = MotorGridOutProperty()
chunk_size = MotorGridOutProperty()
close = MotorGridOutProperty()
content_type = MotorGridOutProperty()
filename = MotorGridOutProperty()
length = MotorGridOutProperty()
metadata = MotorGridOutProperty()
name = MotorGridOutProperty()
_open = AsyncCommand(attr_name="open")
read = AsyncRead()
readable = DelegateMethod()
readchunk = AsyncRead()
readline = AsyncRead()
seek = DelegateMethod()
seekable = DelegateMethod()
tell = DelegateMethod()
upload_date = MotorGridOutProperty()
write = DelegateMethod()
def __init__(
self, root_collection, file_id=None, file_document=None, delegate=None, session=None
):
collection_class = create_class_with_framework(
AgnosticCollection, self._framework, self.__module__
)
if not isinstance(root_collection, collection_class):
raise TypeError(
"First argument to MotorGridOut must be "
"MotorCollection, not %r" % root_collection
)
if delegate:
self.delegate = delegate
else:
self.delegate = self.__delegate_class__(
root_collection.delegate, file_id, file_document, session=session
)
self.io_loop = root_collection.get_io_loop()
def __aiter__(self):
return self
async def __anext__(self):
chunk = await self.readchunk()
if chunk:
return chunk
raise StopAsyncIteration()
def __getattr__(self, item):
if not self.delegate._file:
raise pymongo.errors.InvalidOperation(
"You must call MotorGridOut.open() before accessing the %s property" % item
)
return getattr(self.delegate, item)
@coroutine_annotation
def open(self):
"""Retrieve this file's attributes from the server.
Returns a Future.
.. versionchanged:: 2.0
No longer accepts a callback argument.
.. versionchanged:: 0.2
:class:`~motor.MotorGridOut` now opens itself on demand, calling
``open`` explicitly is rarely needed.
"""
return self._framework.chain_return_value(self._open(), self.get_io_loop(), self)
def get_io_loop(self):
return self.io_loop
async def stream_to_handler(self, request_handler):
"""Write the contents of this file to a
:class:`tornado.web.RequestHandler`. This method calls
:meth:`~tornado.web.RequestHandler.flush` on
the RequestHandler, so ensure all headers have already been set.
For a more complete example see the implementation of
:class:`~motor.web.GridFSHandler`.
.. code-block:: python
class FileHandler(tornado.web.RequestHandler):
@tornado.web.asynchronous
@gen.coroutine
def get(self, filename):
db = self.settings["db"]
fs = await motor.MotorGridFSBucket(db())
try:
gridout = await fs.open_download_stream_by_name(filename)
except gridfs.NoFile:
raise tornado.web.HTTPError(404)
self.set_header("Content-Type", gridout.content_type)
self.set_header("Content-Length", gridout.length)
await gridout.stream_to_handler(self)
self.finish()
.. seealso:: Tornado `RequestHandler <http://tornadoweb.org/en/stable/web.html#request-handlers>`_
"""
written = 0
while written < self.length:
# Reading chunk_size at a time minimizes buffering.
chunk = await self.read(self.chunk_size)
# write() simply appends the output to a list; flush() sends it
# over the network and minimizes buffering in the handler.
request_handler.write(chunk)
request_handler.flush()
written += len(chunk)
class AgnosticGridIn:
__motor_class_name__ = "MotorGridIn"
__delegate_class__ = gridfs.GridIn
__getattr__ = DelegateMethod()
_id = ReadOnlyProperty()
abort = AsyncCommand()
chunk_size = ReadOnlyProperty()
closed = ReadOnlyProperty()
close = AsyncCommand()
content_type = ReadOnlyProperty()
filename = ReadOnlyProperty()
length = ReadOnlyProperty()
name = ReadOnlyProperty()
read = DelegateMethod()
readable = DelegateMethod()
seekable = DelegateMethod()
upload_date = ReadOnlyProperty()
write = AsyncCommand().unwrap("MotorGridOut")
writeable = DelegateMethod()
writelines = AsyncCommand().unwrap("MotorGridOut")
_exit = AsyncCommand("__exit__")
set = AsyncCommand(
attr_name="__setattr__",
doc="""
Set an arbitrary metadata attribute on the file. Stores value on the server
as a key-value pair within the file document once the file is closed. If
the file is already closed, calling :meth:`set` will immediately update the file
document on the server.
Metadata set on the file appears as attributes on a
:class:`~motor.MotorGridOut` object created from the file.
:Parameters:
- `name`: Name of the attribute, will be stored as a key in the file
document on the server
- `value`: Value of the attribute
""",
)
def __init__(self, root_collection, delegate=None, session=None, **kwargs):
"""
Class to write data to GridFS. Application developers should not
generally need to instantiate this class - see
:meth:`~motor.MotorGridFSBucket.open_upload_stream`.
Any of the file level options specified in the `GridFS Spec
<http://dochub.mongodb.org/core/gridfs/>`_ may be passed as
keyword arguments. Any additional keyword arguments will be
set as additional fields on the file document. Valid keyword
arguments include:
- ``"_id"``: unique ID for this file (default:
:class:`~bson.objectid.ObjectId`) - this ``"_id"`` must
not have already been used for another file
- ``"filename"``: human name for the file
- ``"contentType"`` or ``"content_type"``: valid mime-type
for the file
- ``"chunkSize"`` or ``"chunk_size"``: size of each of the
chunks, in bytes (default: 256 kb)
- ``"encoding"``: encoding used for this file. In Python 2,
any :class:`unicode` that is written to the file will be
converted to a :class:`str`. In Python 3, any :class:`str`
that is written to the file will be converted to
:class:`bytes`.
:Parameters:
- `root_collection`: root collection to write to
- `session` (optional): a
:class:`~pymongo.client_session.ClientSession` to use for all
commands
- `**kwargs` (optional): file level options (see above)
.. versionchanged:: 3.0
Removed support for the `disable_md5` parameter (to match the
GridIn class in PyMongo).
.. versionchanged:: 0.2
``open`` method removed, no longer needed.
"""
collection_class = create_class_with_framework(
AgnosticCollection, self._framework, self.__module__
)
if not isinstance(root_collection, collection_class):
raise TypeError(
"First argument to MotorGridIn must be MotorCollection, not %r" % root_collection
)
self.io_loop = root_collection.get_io_loop()
# Short cut.
self.delegate = delegate or self.__delegate_class__(
root_collection.delegate, session=session, **kwargs
)
# Support "async with bucket.open_upload_stream() as f:"
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self._exit(exc_type, exc_val, exc_tb)
def get_io_loop(self):
return self.io_loop
class AgnosticGridFSBucket:
__motor_class_name__ = "MotorGridFSBucket"
__delegate_class__ = gridfs.GridFSBucket
delete = AsyncCommand(doc=docstrings.gridfs_delete_doc)
download_to_stream = AsyncCommand(doc=docstrings.gridfs_download_to_stream_doc)
download_to_stream_by_name = AsyncCommand(doc=docstrings.gridfs_download_to_stream_by_name_doc)
open_download_stream = AsyncCommand(doc=docstrings.gridfs_open_download_stream_doc).wrap(
gridfs.GridOut
)
open_download_stream_by_name = AsyncCommand(
doc=docstrings.gridfs_open_download_stream_by_name_doc
).wrap(gridfs.GridOut)
open_upload_stream = DelegateMethod(doc=docstrings.gridfs_open_upload_stream_doc).wrap(
gridfs.GridIn
)
open_upload_stream_with_id = DelegateMethod(
doc=docstrings.gridfs_open_upload_stream_with_id_doc
).wrap(gridfs.GridIn)
rename = AsyncCommand(doc=docstrings.gridfs_rename_doc)
upload_from_stream = AsyncCommand(doc=docstrings.gridfs_upload_from_stream_doc)
upload_from_stream_with_id = AsyncCommand(doc=docstrings.gridfs_upload_from_stream_with_id_doc)
def __init__(
self,
database,
bucket_name="fs",
chunk_size_bytes=DEFAULT_CHUNK_SIZE,
write_concern=None,
read_preference=None,
collection=None,
):
"""Create a handle to a GridFS bucket.
Raises :exc:`~pymongo.errors.ConfigurationError` if `write_concern`
is not acknowledged.
This class conforms to the `GridFS API Spec
<https://github.com/mongodb/specifications/blob/master/source/gridfs/gridfs-spec.rst>`_
for MongoDB drivers.
:Parameters:
- `database`: database to use.
- `bucket_name` (optional): The name of the bucket. Defaults to 'fs'.
- `chunk_size_bytes` (optional): The chunk size in bytes. Defaults
to 255KB.
- `write_concern` (optional): The
:class:`~pymongo.write_concern.WriteConcern` to use. If ``None``
(the default) db.write_concern is used.
- `read_preference` (optional): The read preference to use. If
``None`` (the default) db.read_preference is used.
- `collection` (optional): Deprecated, an alias for `bucket_name`
that exists solely to provide backwards compatibility.
.. versionchanged:: 3.0
Removed support for the `disable_md5` parameter (to match the
GridFSBucket class in PyMongo).
.. versionchanged:: 2.1
Added support for the `bucket_name`, `chunk_size_bytes`,
`write_concern`, and `read_preference` parameters.
Deprecated the `collection` parameter which is now an alias to
`bucket_name` (to match the GridFSBucket class in PyMongo).
.. versionadded:: 1.0
.. mongodoc:: gridfs
"""
# Preserve backwards compatibility of "collection" parameter
if collection is not None:
warnings.warn(
'the "collection" parameter is deprecated, use "bucket_name" instead',
DeprecationWarning,
stacklevel=2,
)
bucket_name = collection
db_class = create_class_with_framework(AgnosticDatabase, self._framework, self.__module__)
if not isinstance(database, db_class):
raise TypeError(
f"First argument to {self.__class__} must be MotorDatabase, not {database!r}"
)
self.io_loop = database.get_io_loop()
self.collection = database.get_collection(
bucket_name, write_concern=write_concern, read_preference=read_preference
)
self.delegate = self.__delegate_class__(
database.delegate,
bucket_name,
chunk_size_bytes=chunk_size_bytes,
write_concern=write_concern,
read_preference=read_preference,
)
def get_io_loop(self):
return self.io_loop
def wrap(self, obj):
if obj.__class__ is grid_file.GridIn:
grid_in_class = create_class_with_framework(
AgnosticGridIn, self._framework, self.__module__
)
return grid_in_class(root_collection=self.collection, delegate=obj)
elif obj.__class__ is grid_file.GridOut:
grid_out_class = create_class_with_framework(
AgnosticGridOut, self._framework, self.__module__
)
return grid_out_class(root_collection=self.collection, delegate=obj)
elif obj.__class__ is gridfs.GridOutCursor:
grid_out_class = create_class_with_framework(
AgnosticGridOutCursor, self._framework, self.__module__
)
return grid_out_class(cursor=obj, collection=self.collection)
def find(self, *args, **kwargs):
"""Find and return the files collection documents that match ``filter``.
Returns a cursor that iterates across files matching
arbitrary queries on the files collection. Can be combined
with other modifiers for additional control.
For example::
cursor = bucket.find({"filename": "lisa.txt"}, no_cursor_timeout=True)
while (await cursor.fetch_next):
grid_out = cursor.next_object()
data = await grid_out.read()
This iterates through all versions of "lisa.txt" stored in GridFS.
Note that setting no_cursor_timeout to True may be important to
prevent the cursor from timing out during long multi-file processing
work.
As another example, the call::
most_recent_three = fs.find().sort("uploadDate", -1).limit(3)
would return a cursor to the three most recently uploaded files
in GridFS.
Follows a similar interface to
:meth:`~motor.MotorCollection.find`
in :class:`~motor.MotorCollection`.
:Parameters:
- `filter`: Search query.
- `batch_size` (optional): The number of documents to return per
batch.
- `limit` (optional): The maximum number of documents to return.
- `no_cursor_timeout` (optional): The server normally times out idle
cursors after an inactivity period (10 minutes) to prevent excess
memory use. Set this option to True prevent that.
- `skip` (optional): The number of documents to skip before
returning.
- `sort` (optional): The order by which to sort results. Defaults to
None.
- `session` (optional): a
:class:`~pymongo.client_session.ClientSession`, created with
:meth:`~MotorClient.start_session`.
If a :class:`~pymongo.client_session.ClientSession` is passed to
:meth:`find`, all returned :class:`MotorGridOut` instances
are associated with that session.
.. versionchanged:: 1.2
Added session parameter.
"""
cursor = self.delegate.find(*args, **kwargs)
grid_out_cursor = create_class_with_framework(
AgnosticGridOutCursor, self._framework, self.__module__
)
return grid_out_cursor(cursor, self.collection)
def _hash_gridout(gridout):
"""Compute the effective hash of a GridOut object for use with an Etag header.
Create a FIPS-compliant Etag HTTP header hash using sha256
We use the _id + length + upload_date as a proxy for
uniqueness to avoid reading the entire file.
"""
grid_hash = hashlib.sha256(str(gridout._id).encode("utf8"))
grid_hash.update(str(gridout.length).encode("utf8"))
grid_hash.update(str(gridout.upload_date).encode("utf8"))
return grid_hash.hexdigest()