-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow for logging to Studio when not inside a repo #646
Changes from 38 commits
2160ae4
8653732
ca57b17
98ae256
df1a20b
9ddcf07
bf01031
3c68cb6
f6a0a29
aa3610a
2e538ce
0d21bb6
bb34d87
00ac887
5dcc43b
7d2528e
a02d160
6ccc959
a539c8e
7a374c4
c685bc1
f3ebcd0
68b4f90
8d2112f
937bc5b
524e2a6
0c49bea
83bb14a
71698f0
a9b028f
bb2a3b5
35c34a9
28cde80
12755f2
b9ba6f2
939ff0b
ade1b3d
56acdae
33288cd
73357b2
b5ff21d
53e77a6
bea5a83
bd431da
5003104
a4cae82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,6 +63,8 @@ | |
|
||
ParamLike = Union[int, float, str, bool, List["ParamLike"], Dict[str, "ParamLike"]] | ||
|
||
NULL_SHA: str = "0" * 40 | ||
|
||
|
||
class Live: | ||
def __init__( | ||
|
@@ -95,8 +97,8 @@ def __init__( | |
self._report_notebook = None | ||
self._init_report() | ||
|
||
self._baseline_rev: Optional[str] = None | ||
self._exp_name: Optional[str] = exp_name | ||
self._baseline_rev: str = os.getenv(env.DVC_EXP_BASELINE_REV, NULL_SHA) | ||
shcheklein marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self._exp_name: Optional[str] = exp_name or os.getenv(env.DVC_EXP_NAME) | ||
self._exp_message: Optional[str] = exp_message | ||
self._experiment_rev: Optional[str] = None | ||
self._inside_dvc_exp: bool = False | ||
|
@@ -148,28 +150,36 @@ def _init_cleanup(self): | |
os.remove(dvc_file) | ||
|
||
@catch_and_warn(DvcException, logger) | ||
def _init_dvc(self): | ||
def _init_dvc(self): # noqa: C901 | ||
from dvc.scm import NoSCM | ||
|
||
if os.getenv(env.DVC_ROOT, None): | ||
self._inside_dvc_pipeline = True | ||
self._init_dvc_pipeline() | ||
self._dvc_repo = get_dvc_repo() | ||
|
||
scm = self._dvc_repo.scm if self._dvc_repo else None | ||
if isinstance(scm, NoSCM): | ||
scm = None | ||
if scm: | ||
self._baseline_rev = scm.get_rev() | ||
self._exp_name = get_exp_name(self._exp_name, scm, self._baseline_rev) | ||
logger.info(f"Logging to experiment '{self._exp_name}'") | ||
Comment on lines
+202
to
+208
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once we have tried to find a repo, we can generate all exp info we need instead of scattering it throughout the code |
||
|
||
dvc_logger = logging.getLogger("dvc") | ||
dvc_logger.setLevel(os.getenv(env.DVCLIVE_LOGLEVEL, "WARNING").upper()) | ||
|
||
self._dvc_file = self._init_dvc_file() | ||
|
||
if (self._dvc_repo is None) or isinstance(self._dvc_repo.scm, NoSCM): | ||
if not scm: | ||
if self._save_dvc_exp: | ||
logger.warning( | ||
"Can't save experiment without a Git Repo." | ||
"\nCreate a Git repo (`git init`) and commit (`git commit`)." | ||
) | ||
self._save_dvc_exp = False | ||
return | ||
if self._dvc_repo.scm.no_commits: | ||
if scm.no_commits: | ||
if self._save_dvc_exp: | ||
logger.warning( | ||
"Can't save experiment to an empty Git Repo." | ||
|
@@ -189,12 +199,7 @@ def _init_dvc(self): | |
if self._inside_dvc_pipeline: | ||
return | ||
|
||
self._baseline_rev = self._dvc_repo.scm.get_rev() | ||
if self._save_dvc_exp: | ||
self._exp_name = get_exp_name( | ||
self._exp_name, self._dvc_repo.scm, self._baseline_rev | ||
) | ||
logger.info(f"Logging to experiment '{self._exp_name}'") | ||
mark_dvclive_only_started(self._exp_name) | ||
self._include_untracked.append(self.dir) | ||
|
||
|
@@ -208,8 +213,6 @@ def _init_dvc_file(self) -> str: | |
def _init_dvc_pipeline(self): | ||
if os.getenv(env.DVC_EXP_BASELINE_REV, None): | ||
# `dvc exp` execution | ||
self._baseline_rev = os.getenv(env.DVC_EXP_BASELINE_REV, "") | ||
self._exp_name = os.getenv(env.DVC_EXP_NAME, "") | ||
self._inside_dvc_exp = True | ||
if self._save_dvc_exp: | ||
logger.info("Ignoring `save_dvc_exp` because `dvc exp run` is running") | ||
|
@@ -234,22 +237,6 @@ def _init_studio(self): | |
logger.debug("Skipping `studio` report `start` and `done` events.") | ||
self._studio_events_to_skip.add("start") | ||
self._studio_events_to_skip.add("done") | ||
elif self._dvc_repo is None: | ||
logger.warning( | ||
"Can't connect to Studio without a DVC Repo." | ||
"\nYou can create a DVC Repo by calling `dvc init`." | ||
) | ||
self._studio_events_to_skip.add("start") | ||
self._studio_events_to_skip.add("data") | ||
self._studio_events_to_skip.add("done") | ||
elif not self._save_dvc_exp: | ||
logger.warning( | ||
"Can't connect to Studio without creating a DVC experiment." | ||
"\nIf you have a DVC Pipeline, run it with `dvc exp run`." | ||
) | ||
self._studio_events_to_skip.add("start") | ||
self._studio_events_to_skip.add("data") | ||
self._studio_events_to_skip.add("done") | ||
else: | ||
self.post_to_studio("start") | ||
|
||
|
@@ -571,7 +558,8 @@ def make_report(self): | |
|
||
@catch_and_warn(DvcException, logger) | ||
def make_dvcyaml(self): | ||
make_dvcyaml(self) | ||
if self.dvc_file: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the case for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean when can it be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the only (?) unresolved question? Approved, since it doesn't look anything major ... good stuff, Dave. thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, missed this one. Good catch. Maybe this came from an earlier iteration. I dropped this condition in the latest commit. |
||
make_dvcyaml(self) | ||
|
||
@catch_and_warn(DvcException, logger) | ||
def post_to_studio(self, event): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,27 +29,24 @@ def test_get_dvc_repo_subdir(tmp_dir): | |
def test_exp_save_on_end(tmp_dir, save, mocked_dvc_repo): | ||
live = Live(save_dvc_exp=save) | ||
live.end() | ||
assert live._baseline_rev is not None | ||
assert live._exp_name is not None | ||
Comment on lines
+32
to
+33
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These now get set always |
||
if save: | ||
assert live._baseline_rev is not None | ||
assert live._exp_name is not None | ||
mocked_dvc_repo.experiments.save.assert_called_with( | ||
name=live._exp_name, | ||
include_untracked=[live.dir, "dvc.yaml"], | ||
force=True, | ||
message=None, | ||
) | ||
else: | ||
assert live._baseline_rev is not None | ||
assert live._exp_name is None | ||
mocked_dvc_repo.experiments.save.assert_not_called() | ||
|
||
|
||
def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch, mocker): | ||
def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch): | ||
monkeypatch.setenv(DVC_EXP_BASELINE_REV, "foo") | ||
monkeypatch.setenv(DVC_EXP_NAME, "bar") | ||
monkeypatch.setenv(DVC_ROOT, tmp_dir) | ||
|
||
mocker.patch("dvclive.live.get_dvc_repo", return_value=None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed |
||
live = Live() | ||
live.end() | ||
|
||
|
@@ -60,31 +57,6 @@ def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch, mocker): | |
assert live._inside_dvc_pipeline | ||
|
||
|
||
def test_exp_save_run_on_dvc_repro(tmp_dir, mocker): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test was actually outdated and not working. It conflicts with |
||
dvc_repo = mocker.MagicMock() | ||
dvc_stage = mocker.MagicMock() | ||
dvc_file = mocker.MagicMock() | ||
dvc_repo.index.stages = [dvc_stage, dvc_file] | ||
dvc_repo.scm.get_rev.return_value = "current_rev" | ||
dvc_repo.scm.get_ref.return_value = None | ||
dvc_repo.scm.no_commits = False | ||
dvc_repo.config = {} | ||
dvc_repo.root_dir = tmp_dir | ||
mocker.patch("dvclive.live.get_dvc_repo", return_value=dvc_repo) | ||
live = Live() | ||
assert live._save_dvc_exp | ||
assert live._baseline_rev is not None | ||
assert live._exp_name is not None | ||
live.end() | ||
|
||
dvc_repo.experiments.save.assert_called_with( | ||
name=live._exp_name, | ||
include_untracked=[live.dir, "dvc.yaml"], | ||
force=True, | ||
message=None, | ||
) | ||
|
||
|
||
def test_exp_save_with_dvc_files(tmp_dir, mocker): | ||
dvc_repo = mocker.MagicMock() | ||
dvc_file = mocker.MagicMock() | ||
|
@@ -166,7 +138,7 @@ def test_errors_on_git_add_are_catched(tmp_dir, mocked_dvc_repo, monkeypatch): | |
mocked_dvc_repo.scm.untracked_files.return_value = ["dvclive/metrics.json"] | ||
mocked_dvc_repo.scm.add.side_effect = DvcException("foo") | ||
|
||
with Live(dvcyaml=False) as live: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed |
||
with Live() as live: | ||
live.summary["foo"] = 1 | ||
|
||
|
||
|
@@ -204,10 +176,12 @@ def test_no_scm_repo(tmp_dir, mocker): | |
assert live._save_dvc_exp is False | ||
|
||
|
||
def test_dvc_repro(tmp_dir, monkeypatch, mocker): | ||
def test_dvc_repro(tmp_dir, monkeypatch, mocked_dvc_repo, mocked_studio_post): | ||
monkeypatch.setenv(DVC_ROOT, "root") | ||
mocker.patch("dvclive.live.get_dvc_repo", return_value=None) | ||
live = Live(save_dvc_exp=True) | ||
assert live._baseline_rev is not None | ||
assert live._exp_name is not None | ||
assert not live._studio_events_to_skip | ||
Comment on lines
+182
to
+184
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consolidated from the deleted |
||
assert not live._save_dvc_exp | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -235,10 +235,9 @@ def test_post_to_studio_shorten_names(tmp_dir, mocked_dvc_repo, mocked_studio_po | |
|
||
@pytest.mark.studio() | ||
def test_post_to_studio_inside_dvc_exp( | ||
tmp_dir, mocker, monkeypatch, mocked_studio_post | ||
tmp_dir, mocker, monkeypatch, mocked_studio_post, mocked_dvc_repo | ||
): | ||
mocked_post, _ = mocked_studio_post | ||
mocker.patch("dvclive.live.get_dvc_repo", return_value=None) | ||
|
||
monkeypatch.setenv(DVC_EXP_BASELINE_REV, "f" * 40) | ||
monkeypatch.setenv(DVC_EXP_NAME, "bar") | ||
|
@@ -310,9 +309,8 @@ def test_post_to_studio_inside_subdir_dvc_exp( | |
) | ||
|
||
|
||
def test_post_to_studio_requires_exp(tmp_dir, mocked_dvc_repo, mocked_studio_post): | ||
assert Live(save_dvc_exp=False)._studio_events_to_skip == {"start", "data", "done"} | ||
assert not Live()._studio_events_to_skip | ||
def test_post_to_studio_without_exp(tmp_dir, mocked_dvc_repo, mocked_studio_post): | ||
assert not Live(save_dvc_exp=False)._studio_events_to_skip | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Live updates to Studio will happen regardless of |
||
|
||
|
||
def test_get_dvc_studio_config_none(mocker): | ||
|
@@ -396,3 +394,73 @@ def test_post_to_studio_if_done_skipped(tmp_dir, mocked_dvc_repo, mocked_studio_ | |
mocked_post, _ = mocked_studio_post | ||
call_types = [call.kwargs["json"]["type"] for call in mocked_post.call_args_list] | ||
assert "data" in call_types | ||
|
||
|
||
@pytest.mark.studio() | ||
def test_post_to_studio_no_repo(tmp_dir, monkeypatch, mocked_studio_post): | ||
monkeypatch.setenv(DVC_STUDIO_TOKEN, "STUDIO_TOKEN") | ||
monkeypatch.setenv(DVC_STUDIO_REPO_URL, "STUDIO_REPO_URL") | ||
|
||
live = Live(save_dvc_exp=True) | ||
live.log_param("fooparam", 1) | ||
|
||
foo_path = (Path(live.plots_dir) / Metric.subfolder / "foo.tsv").as_posix() | ||
|
||
mocked_post, _ = mocked_studio_post | ||
|
||
mocked_post.assert_called() | ||
mocked_post.assert_called_with( | ||
"https://0.0.0.0/api/live", | ||
**get_studio_call("start", baseline_sha="0" * 40, exp_name=live._exp_name), | ||
) | ||
|
||
live.log_metric("foo", 1) | ||
|
||
live.next_step() | ||
mocked_post.assert_called_with( | ||
"https://0.0.0.0/api/live", | ||
**get_studio_call( | ||
"data", | ||
baseline_sha="0" * 40, | ||
exp_name=live._exp_name, | ||
step=0, | ||
plots={f"{foo_path}": {"data": [{"step": 0, "foo": 1.0}]}}, | ||
), | ||
) | ||
|
||
live.log_metric("foo", 2) | ||
|
||
live.next_step() | ||
mocked_post.assert_called_with( | ||
"https://0.0.0.0/api/live", | ||
**get_studio_call( | ||
"data", | ||
baseline_sha="0" * 40, | ||
exp_name=live._exp_name, | ||
step=1, | ||
plots={f"{foo_path}": {"data": [{"step": 1, "foo": 2.0}]}}, | ||
), | ||
) | ||
|
||
live.end() | ||
mocked_post.assert_called_with( | ||
"https://0.0.0.0/api/live", | ||
**get_studio_call("done", baseline_sha="0" * 40, exp_name=live._exp_name), | ||
) | ||
|
||
|
||
@pytest.mark.studio() | ||
def test_post_to_studio_skip_if_no_repo_url( | ||
tmp_dir, | ||
mocker, | ||
monkeypatch, | ||
): | ||
mocked_post = mocker.patch("dvclive.studio.post_live_metrics", return_value=None) | ||
|
||
monkeypatch.setenv(DVC_STUDIO_TOKEN, "token") | ||
|
||
with Live() as live: | ||
live.log_metric("foo", 1) | ||
live.next_step() | ||
|
||
assert mocked_post.call_count == 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated
get_exp_name
to always be able to return an experiment name instead of only when inside a repo.