Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ keywords = [
"scraping",
]
dependencies = [
"apify-client>=2.3.0,<3.0.0",
"apify-client @ git+https://github.com/apify/apify-client-python.git@typed-clients",
"apify-shared>=2.0.0,<3.0.0",
"crawlee>=1.0.4,<2.0.0",
"cachetools>=5.5.0",
Expand Down
52 changes: 40 additions & 12 deletions src/apify/_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,7 +885,8 @@ async def start(
f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, `"RemainingTime"`, or a `timedelta`.'
)

api_result = await client.actor(actor_id).start(
actor_client = client.actor(actor_id)
run = await actor_client.start(
run_input=run_input,
content_type=content_type,
build=build,
Expand All @@ -895,7 +896,11 @@ async def start(
webhooks=serialized_webhooks,
)

return ActorRun.model_validate(api_result)
if run is None:
raise RuntimeError(f'Failed to start Actor with ID "{actor_id}".')

run_dict = run.model_dump(by_alias=True)
return ActorRun.model_validate(run_dict)

async def abort(
self,
Expand Down Expand Up @@ -923,13 +928,18 @@ async def abort(
self._raise_if_not_initialized()

client = self.new_client(token=token) if token else self.apify_client
run_client = client.run(run_id)

if status_message:
await client.run(run_id).update(status_message=status_message)
await run_client.update(status_message=status_message)

run = await run_client.abort(gracefully=gracefully)

api_result = await client.run(run_id).abort(gracefully=gracefully)
if run is None:
raise RuntimeError(f'Failed to abort Actor run with ID "{run_id}".')

return ActorRun.model_validate(api_result)
run_dict = run.model_dump(by_alias=True)
return ActorRun.model_validate(run_dict)

async def call(
self,
Expand Down Expand Up @@ -1002,7 +1012,8 @@ async def call(
f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, `"RemainingTime"`, or a `timedelta`.'
)

api_result = await client.actor(actor_id).call(
actor_client = client.actor(actor_id)
run = await actor_client.call(
run_input=run_input,
content_type=content_type,
build=build,
Expand All @@ -1013,7 +1024,11 @@ async def call(
logger=logger,
)

return ActorRun.model_validate(api_result)
if run is None:
raise RuntimeError(f'Failed to call Actor with ID "{actor_id}".')

run_dict = run.model_dump(by_alias=True)
return ActorRun.model_validate(run_dict)

async def call_task(
self,
Expand Down Expand Up @@ -1075,7 +1090,8 @@ async def call_task(
else:
raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.')

api_result = await client.task(task_id).call(
task_client = client.task(task_id)
run = await task_client.call(
task_input=task_input,
build=build,
memory_mbytes=memory_mbytes,
Expand All @@ -1084,7 +1100,11 @@ async def call_task(
wait_secs=int(wait.total_seconds()) if wait is not None else None,
)

return ActorRun.model_validate(api_result)
if run is None:
raise RuntimeError(f'Failed to call Task with ID "{task_id}".')

run_dict = run.model_dump(by_alias=True)
return ActorRun.model_validate(run_dict)

async def metamorph(
self,
Expand Down Expand Up @@ -1261,11 +1281,19 @@ async def set_status_message(
if not self.configuration.actor_run_id:
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')

api_result = await self.apify_client.run(self.configuration.actor_run_id).update(
status_message=status_message, is_status_message_terminal=is_terminal
run_client = self.apify_client.run(self.configuration.actor_run_id)
run = await run_client.update(
status_message=status_message,
is_status_message_terminal=is_terminal,
)

return ActorRun.model_validate(api_result)
if run is None:
raise RuntimeError(
f'Failed to set status message for Actor run with ID "{self.configuration.actor_run_id}".'
)

run_dict = run.model_dump(by_alias=True)
return ActorRun.model_validate(run_dict)

async def create_proxy_configuration(
self,
Expand Down
15 changes: 11 additions & 4 deletions src/apify/_charging.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,14 +351,21 @@ async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict:
if self._actor_run_id is None:
raise RuntimeError('Actor run ID not found even though the Actor is running on Apify')

run = run_validator.validate_python(await self._client.run(self._actor_run_id).get())
run = await self._client.run(self._actor_run_id).get()

if run is None:
raise RuntimeError('Actor run not found')

run_dict = run.model_dump(by_alias=True)
actor_run = run_validator.validate_python(run_dict)

if actor_run is None:
raise RuntimeError('Actor run not found')

return _FetchedPricingInfoDict(
pricing_info=run.pricing_info,
charged_event_counts=run.charged_event_counts or {},
max_total_charge_usd=run.options.max_total_charge_usd or Decimal('inf'),
pricing_info=actor_run.pricing_info,
charged_event_counts=actor_run.charged_event_counts or {},
max_total_charge_usd=actor_run.options.max_total_charge_usd or Decimal('inf'),
)

# Local development without environment variables
Expand Down
53 changes: 53 additions & 0 deletions src/apify/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,32 +94,82 @@ class ActorRunUsage(BaseModel):

@docs_group('Actor')
class ActorRun(BaseModel):
"""Represents an Actor run and its associated data."""

__model_config__ = ConfigDict(populate_by_name=True)

id: Annotated[str, Field(alias='id')]
"""Unique identifier of the Actor run."""

act_id: Annotated[str, Field(alias='actId')]
"""ID of the Actor that was run."""

user_id: Annotated[str, Field(alias='userId')]
"""ID of the user who started the run."""

actor_task_id: Annotated[str | None, Field(alias='actorTaskId')] = None
"""ID of the Actor task, if the run was started from a task."""

started_at: Annotated[datetime, Field(alias='startedAt')]
"""Time when the Actor run started."""

finished_at: Annotated[datetime | None, Field(alias='finishedAt')] = None
"""Time when the Actor run finished."""

status: Annotated[ActorJobStatus, Field(alias='status')]
"""Current status of the Actor run."""

status_message: Annotated[str | None, Field(alias='statusMessage')] = None
"""Detailed message about the run status."""

is_status_message_terminal: Annotated[bool | None, Field(alias='isStatusMessageTerminal')] = None
"""Whether the status message is terminal (final)."""

meta: Annotated[ActorRunMeta, Field(alias='meta')]
"""Metadata about the Actor run."""

stats: Annotated[ActorRunStats, Field(alias='stats')]
"""Statistics of the Actor run."""

options: Annotated[ActorRunOptions, Field(alias='options')]
"""Configuration options for the Actor run."""

build_id: Annotated[str, Field(alias='buildId')]
"""ID of the Actor build used for this run."""

exit_code: Annotated[int | None, Field(alias='exitCode')] = None
"""Exit code of the Actor run process."""

default_key_value_store_id: Annotated[str, Field(alias='defaultKeyValueStoreId')]
"""ID of the default key-value store for this run."""

default_dataset_id: Annotated[str, Field(alias='defaultDatasetId')]
"""ID of the default dataset for this run."""

default_request_queue_id: Annotated[str, Field(alias='defaultRequestQueueId')]
"""ID of the default request queue for this run."""

build_number: Annotated[str | None, Field(alias='buildNumber')] = None
"""Build number of the Actor build used for this run."""

container_url: Annotated[str, Field(alias='containerUrl')]
"""URL of the container running the Actor."""

is_container_server_ready: Annotated[bool | None, Field(alias='isContainerServerReady')] = None
"""Whether the container's HTTP server is ready to accept requests."""

git_branch_name: Annotated[str | None, Field(alias='gitBranchName')] = None
"""Name of the git branch used for the Actor build."""

usage: Annotated[ActorRunUsage | None, Field(alias='usage')] = None
"""Resource usage statistics for the run."""

usage_total_usd: Annotated[float | None, Field(alias='usageTotalUsd')] = None
"""Total cost of the run in USD."""

usage_usd: Annotated[ActorRunUsage | None, Field(alias='usageUsd')] = None
"""Resource usage costs in USD."""

pricing_info: Annotated[
FreeActorPricingInfo
| FlatPricePerMonthActorPricingInfo
Expand All @@ -128,10 +178,13 @@ class ActorRun(BaseModel):
| None,
Field(alias='pricingInfo', discriminator='pricing_model'),
] = None
"""Pricing information for the Actor."""

charged_event_counts: Annotated[
dict[str, int] | None,
Field(alias='chargedEventCounts'),
] = None
"""Count of charged events for pay-per-event pricing model."""


class FreeActorPricingInfo(BaseModel):
Expand Down
6 changes: 3 additions & 3 deletions src/apify/storage_clients/_apify/_alias_resolving.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from collections.abc import Callable
from types import TracebackType

from apify_client.clients import (
from apify_client._resource_clients import (
DatasetClientAsync,
DatasetCollectionClientAsync,
KeyValueStoreClientAsync,
Expand Down Expand Up @@ -105,8 +105,8 @@ async def open_by_alias(
# Create new unnamed storage and store alias mapping
raw_metadata = await collection_client.get_or_create()

await alias_resolver.store_mapping(storage_id=raw_metadata['id'])
return get_resource_client_by_id(raw_metadata['id'])
await alias_resolver.store_mapping(storage_id=raw_metadata.id)
return get_resource_client_by_id(raw_metadata.id)


class AliasResolver:
Expand Down
6 changes: 3 additions & 3 deletions src/apify/storage_clients/_apify/_api_client_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from apify.storage_clients._apify._alias_resolving import open_by_alias

if TYPE_CHECKING:
from apify_client.clients import DatasetClientAsync, KeyValueStoreClientAsync, RequestQueueClientAsync
from apify_client._resource_clients import DatasetClientAsync, KeyValueStoreClientAsync, RequestQueueClientAsync

from apify._configuration import Configuration

Expand Down Expand Up @@ -137,13 +137,13 @@ def get_resource_client(storage_id: str) -> DatasetClientAsync:
# Default storage does not exist. Create a new one.
if not raw_metadata:
raw_metadata = await collection_client.get_or_create()
resource_client = get_resource_client(raw_metadata['id'])
resource_client = get_resource_client(raw_metadata.id)
return resource_client

# Open by name.
case (None, str(), None, _):
raw_metadata = await collection_client.get_or_create(name=name)
return get_resource_client(raw_metadata['id'])
return get_resource_client(raw_metadata.id)

# Open by ID.
case (None, None, str(), _):
Expand Down
16 changes: 14 additions & 2 deletions src/apify/storage_clients/_apify/_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import warnings
from datetime import datetime
from logging import getLogger
from typing import TYPE_CHECKING, Any

Expand All @@ -17,7 +18,7 @@
if TYPE_CHECKING:
from collections.abc import AsyncIterator

from apify_client.clients import DatasetClientAsync
from apify_client._resource_clients import DatasetClientAsync
from crawlee._types import JsonSerializable

from apify import Configuration
Expand Down Expand Up @@ -65,7 +66,18 @@ def __init__(
@override
async def get_metadata(self) -> DatasetMetadata:
metadata = await self._api_client.get()
return DatasetMetadata.model_validate(metadata)

if metadata is None:
raise ValueError('Failed to retrieve dataset metadata.')

return DatasetMetadata(
id=metadata.id,
name=metadata.name,
created_at=datetime.fromisoformat(metadata.created_at.replace('Z', '+00:00')),
modified_at=datetime.fromisoformat(metadata.modified_at.replace('Z', '+00:00')),
accessed_at=datetime.fromisoformat(metadata.accessed_at.replace('Z', '+00:00')),
item_count=int(metadata.item_count),
)

@classmethod
async def open(
Expand Down
23 changes: 17 additions & 6 deletions src/apify/storage_clients/_apify/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import warnings
from datetime import datetime
from logging import getLogger
from typing import TYPE_CHECKING, Any

Expand All @@ -11,12 +12,12 @@
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata

from ._api_client_creation import create_storage_api_client
from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
from ._models import ApifyKeyValueStoreMetadata

if TYPE_CHECKING:
from collections.abc import AsyncIterator

from apify_client.clients import KeyValueStoreClientAsync
from apify_client._resource_clients import KeyValueStoreClientAsync

from apify import Configuration

Expand Down Expand Up @@ -54,7 +55,18 @@ def __init__(
@override
async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
metadata = await self._api_client.get()
return ApifyKeyValueStoreMetadata.model_validate(metadata)

if metadata is None:
raise ValueError('Failed to retrieve dataset metadata.')

return ApifyKeyValueStoreMetadata(
id=metadata.id,
name=metadata.name,
created_at=datetime.fromisoformat(metadata.created_at.replace('Z', '+00:00')),
modified_at=datetime.fromisoformat(metadata.modified_at.replace('Z', '+00:00')),
accessed_at=datetime.fromisoformat(metadata.accessed_at.replace('Z', '+00:00')),
url_signing_secret_key=metadata.url_signing_secret_key,
)

@classmethod
async def open(
Expand Down Expand Up @@ -143,14 +155,13 @@ async def iterate_keys(
count = 0

while True:
response = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key)
list_key_page = KeyValueStoreListKeysPage.model_validate(response)
list_key_page = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key)

for item in list_key_page.items:
# Convert KeyValueStoreKeyInfo to KeyValueStoreRecordMetadata
record_metadata = KeyValueStoreRecordMetadata(
key=item.key,
size=item.size,
size=int(item.size),
content_type='application/octet-stream', # Content type not available from list_keys
)
yield record_metadata
Expand Down
Loading
Loading