Add first draft of schema, setuptools, gitignore
This commit is contained in:
commit
6858489b0b
113
.gitignore
vendored
Normal file
113
.gitignore
vendored
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
# Created by https://www.gitignore.io/api/python
|
||||||
|
|
||||||
|
### Python ###
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# celery beat schedule file
|
||||||
|
celerybeat-schedule
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
|
||||||
|
### Python Patch ###
|
||||||
|
.venv/
|
||||||
|
|
||||||
|
|
||||||
|
# End of https://www.gitignore.io/api/python
|
||||||
131
dprov/store/schema.sql
Normal file
131
dprov/store/schema.sql
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
-- Schema file for dprov.db
|
||||||
|
PRAGMA user_version=0; -- version of this schema.
|
||||||
|
-- ^^^^^ Increment the above whenever meaningful updates are made to this file.
|
||||||
|
|
||||||
|
PRAGMA foreign_keys=ON;
|
||||||
|
|
||||||
|
-- computational environment which can execute "runs"
|
||||||
|
CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
hostname TEXT, -- platform.node(): 'lucky'
|
||||||
|
envvars_json TEXT, -- json.dumps(dict(os.environ))
|
||||||
|
python_implementation TEXT, -- platform.python_implementation(): 'cpython'
|
||||||
|
python_strversion TEXT, -- sys.version: '3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]'
|
||||||
|
python_hexversion INTEGER, -- sys.hexversion: 50923504
|
||||||
|
python_host_platform TEXT -- platform.platform(): Linux-5.15.0-46-generic-x86_64-with-glibc2.31
|
||||||
|
);
|
||||||
|
|
||||||
|
-- A "process" is an execution of a single graph, starting from a number of roots
|
||||||
|
-- It corresponds to a single host process. Note that if multiple calls to
|
||||||
|
-- dprov.run() are provided within a single python script, multiple "processes"
|
||||||
|
-- may be created. Also note that historical processes are automatically
|
||||||
|
-- imported and merged when possible when loading "data" from disk.
|
||||||
|
CREATE TABLE process (id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
-- we use POSIX timestamps in UTC for time recording.
|
||||||
|
-- e.g. datetime.datetime.now().timestamp()
|
||||||
|
start_time REAL,
|
||||||
|
end_time REAL,
|
||||||
|
process_id INTEGER, -- host PID of python process on host OS
|
||||||
|
environment INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY (environment) REFERENCES environment (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
-- We try to track all python packages that impact execution by traversing a
|
||||||
|
-- copy of sys.modules. This is done once before a "process" and once after in
|
||||||
|
-- case some calling code winds up calling a previously-unloaded module.
|
||||||
|
CREATE TABLE py_package (id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
version TEXT,
|
||||||
|
process INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY (process) REFERENCES process (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
-- A py_module describes any python module file containing decorated Functions.
|
||||||
|
-- Modules are tracked since they impact the global scope of function calls.
|
||||||
|
CREATE TABLE py_module(id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
code TEXT,
|
||||||
|
package INTEGER,
|
||||||
|
FOREIGN KEY (package) REFERENCES py_package (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
-- A "func" just describes a function, without reference to its arguments. It
|
||||||
|
-- can have inputs and outputs, which are described in the func_inputs and
|
||||||
|
-- func_outputs children tables.
|
||||||
|
CREATE TABLE func(id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
module INTEGER NOT NULL,
|
||||||
|
-- we use POSIX timestamps in UTC for time recording.
|
||||||
|
-- e.g. datetime.datetime.now().timestamp()
|
||||||
|
start_time REAL,
|
||||||
|
end_time REAL,
|
||||||
|
FOREIGN KEY (module) REFERENCES py_module (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
CREATE TABLE func_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
typename TEXT NOT NULL,
|
||||||
|
func INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY (func) REFERENCES func (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
CREATE TABLE func_output(id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
name TEXT,
|
||||||
|
position INTEGER NOT NULL,
|
||||||
|
typename TEXT NOT NULL,
|
||||||
|
func INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY (func) REFERENCES func (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
-- Tasks are executed calls to Functions: they correspond to a particular set of
|
||||||
|
-- inputs which are themselves data (see datum table). A task is executed in
|
||||||
|
-- the context of a "process". Within a process, tasks are typically evaluated
|
||||||
|
-- in a serial manner.
|
||||||
|
CREATE TABLE task(id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
func INTEGER NOT NULL,
|
||||||
|
process INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY (func) REFERENCES func (id) ON DELETE RESTRICT,
|
||||||
|
FOREIGN KEY (process) REFERENCES process (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
-- A datum is an object that is computed as the output of a task, given as a
|
||||||
|
-- literal value in a config file, or loaded from a file.
|
||||||
|
CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
-- provider_type describes where the data came from. Choices are:
|
||||||
|
-- - COMPUTED: output of a decorated Function
|
||||||
|
-- - IMPORTED: imported output from a prior process
|
||||||
|
-- - LITERAL: literal value provided in a config file
|
||||||
|
-- Note that literals are implicitly imported (hash computed) at runtime
|
||||||
|
provider_type TEXT,
|
||||||
|
|
||||||
|
-- task and task_output are only used when provider_type='COMPUTED'
|
||||||
|
task INTEGER,
|
||||||
|
task_output INTEGER,
|
||||||
|
|
||||||
|
literal_json TEXT, -- if provider_type='LITERAL', a JSON representation of the value
|
||||||
|
|
||||||
|
typename TEXT NOT NULL, -- string representation of the data type
|
||||||
|
|
||||||
|
path TEXT, -- if residing on disk, the path relative to this db file
|
||||||
|
|
||||||
|
-- hash of provenance summarizes all dependencies, independent of environment
|
||||||
|
-- computed _before_ computation
|
||||||
|
hash_provenance TEXT,
|
||||||
|
hash_content TEXT, -- hash of content
|
||||||
|
|
||||||
|
-- Version starts at 0 when provided/first computed. When passed as
|
||||||
|
-- a non-const input to a non-const function, this version recorded and
|
||||||
|
-- incremented
|
||||||
|
current_version INTEGER NOT NULL,
|
||||||
|
|
||||||
|
FOREIGN KEY (task) REFERENCES task (id) ON DELETE RESTRICT,
|
||||||
|
FOREIGN KEY (task_output) REFERENCES func_output (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
|
-- A task input records the version of a Datum that is passed to the function
|
||||||
|
CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
task INTEGER NOT NULL,
|
||||||
|
func_input INTEGER NOT NULL,
|
||||||
|
|
||||||
|
datum INTEGER NOT NULL,
|
||||||
|
-- Data have versions to facilitate tacking non-const operations. If a datum
|
||||||
|
-- is passed to a non-const operation, a new datum is provided
|
||||||
|
datum_version INTEGER NOT NULL,
|
||||||
|
|
||||||
|
FOREIGN KEY (task) REFERENCES task (id) ON DELETE RESTRICT,
|
||||||
|
FOREIGN KEY (func_input) REFERENCES func_input (id) ON DELETE RESTRICT,
|
||||||
|
FOREIGN KEY (datum) REFERENCES datum (id) ON DELETE RESTRICT
|
||||||
|
);
|
||||||
41
setup.cfg
Normal file
41
setup.cfg
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
[metadata]
|
||||||
|
name = DProv
|
||||||
|
author = Jacob Hinkle
|
||||||
|
author_email = hinklejd@ornl.gov
|
||||||
|
url = https://code.ornl.gov/4jh/dprov
|
||||||
|
keywords = data-science workflows reproducibility provenance
|
||||||
|
version = attr: setuptools_scm.get_version
|
||||||
|
|
||||||
|
[tool:pytest]
|
||||||
|
testpaths = tests
|
||||||
|
addopts = --verbose --cov dprov/
|
||||||
|
|
||||||
|
[aliases]
|
||||||
|
test=pytest
|
||||||
|
|
||||||
|
[options]
|
||||||
|
packages =
|
||||||
|
superfun
|
||||||
|
install_requires =
|
||||||
|
dill
|
||||||
|
setup_requires =
|
||||||
|
setuptools_scm
|
||||||
|
tests_require =
|
||||||
|
coverage
|
||||||
|
pytest
|
||||||
|
pytest-cov
|
||||||
|
pytest-runner
|
||||||
|
python_requires = >=3.6
|
||||||
|
use_scm_version = True
|
||||||
|
|
||||||
|
[flake8]
|
||||||
|
exclude = .eggs,
|
||||||
|
.git,
|
||||||
|
__pycache__,
|
||||||
|
dprov.egg-info,
|
||||||
|
build,
|
||||||
|
dist
|
||||||
|
# allow a little bit longer lines than default (79)
|
||||||
|
ignore = W504
|
||||||
|
max-line-length= 99
|
||||||
|
show-source = True
|
||||||
Loading…
x
Reference in New Issue
Block a user