server/continuedev/plugins/recipes/DDtoBQRecipe/steps.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

import os
from textwrap import dedent

from ....core.main import Step
from ....core.sdk import ContinueSDK, Models
from ....core.steps import MessageStep
from ....libs.util.paths import find_data_file
from ....plugins.steps.find_and_replace import FindAndReplaceStep

AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)"


class SetUpChessPipelineStep(Step):
    hide: bool = True
    name: str = "Setup Chess.com API dlt Pipeline"

    async def describe(self, models: Models):
        return "This step will create a new dlt pipeline that loads data from the chess.com API."

    async def run(self, sdk: ContinueSDK):
        # running commands to get started when creating a new dlt pipeline
        await sdk.run(
            [
                "python3 -m venv .env",
                "source .env/bin/activate",
                "pip install dlt",
                "dlt --non-interactive init chess duckdb",
                "pip install -r requirements.txt",
            ],
            name="Set up Python environment",
            description=dedent(
                """\
            Running the following commands:
            - `python3 -m venv .env`: Create a Python virtual environment
            - `source .env/bin/activate`: Activate the virtual environment
            - `pip install dlt`: Install dlt
            - `dlt init chess duckdb`: Create a new dlt pipeline called "chess" that loads data into a local DuckDB instance
            - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline"""
            ),
        )


class SwitchDestinationStep(Step):
    hide: bool = True

    async def run(self, sdk: ContinueSDK):
        # Switch destination from DuckDB to Google BigQuery
        filepath = os.path.join(sdk.ide.workspace_directory, "chess_pipeline.py")
        await sdk.run_step(
            FindAndReplaceStep(
                filepath=filepath,
                pattern="destination='duckdb'",
                replacement="destination='bigquery'",
            )
        )

        # Add BigQuery credentials to your secrets.toml file
        template = dedent(
            """\
            [destination.bigquery.credentials]
            location = "US"  # change the location of the data
            project_id = "project_id" # please set me up!
            private_key = "private_key" # please set me up!
            client_email = "client_email" # please set me up!"""
        )

        # wait for user to put API key in secrets.toml
        secrets_path = os.path.join(sdk.ide.workspace_directory, ".dlt/secrets.toml")
        await sdk.ide.setFileOpen(secrets_path)
        await sdk.append_to_file(secrets_path, template)

        # append template to bottom of secrets.toml
        await sdk.wait_for_user_confirmation(
            "Please add your GCP credentials to `secrets.toml` file and then press `Continue`"
        )


class LoadDataStep(Step):
    name: str = "Load data to BigQuery"
    hide: bool = True

    async def run(self, sdk: ContinueSDK):
        # Run the pipeline again to load data to BigQuery
        output = await sdk.run(
            ".env/bin/python3 chess_pipeline.py",
            name="Load data to BigQuery",
            description="Running `.env/bin/python3 chess_pipeline.py` to load data to Google BigQuery",
        )

        if "Traceback" in output or "SyntaxError" in output:
            with open(find_data_file("dlt_duckdb_to_bigquery_docs.md"), "r") as f:
                docs = f.read()

            output = "Traceback" + output.split("Traceback")[-1]
            suggestion = await sdk.models.default.complete(
                dedent(
                    f"""\
                When trying to load data into BigQuery, the following error occurred:

                ```ascii
                {output}
                ```

                Here is documentation describing common errors and their causes/solutions:

                {docs}

                This is a brief summary of the error followed by a suggestion on how it can be fixed:"""
                )
            )

            sdk.raise_exception(
                title="Error while running query",
                message=output,
                with_step=MessageStep(
                    name=f"Suggestion to solve error {AI_ASSISTED_STRING}",
                    message=suggestion,
                ),
            )