import logging
from pprint import pformat

import datalinks
from datalinks.api import DLConfig
from datalinks.links import MatchTypeConfig, ExactMatch
from datalinks.pipeline import Pipeline, ProcessUnstructured, Normalize, NormalizeModes, Validate, ValidateModes


def main():
    logging.basicConfig(level=logging.INFO)


    
    dl_config = DLConfig(
        host="https://api.prod.datalinks.com/api/v1",
        apikey="TXT", #Insert within "" your Datalinks API key 
        index="tests",
        namespace="TXT", #Name the namespace that will house your dataset
        objectname="TXT" #Name your dataset
    )

    dlapi = datalinks.api.DataLinksAPI(dl_config)
    dlapi.create_space(is_private=True) # default

    textfile = "TXT" #insert path starting at the python folder containing your data, inclusing file name such as "data/file.txt"
    logging.info(f"Loading text in {textfile}")

    with open(textfile) as f:
        lines = f.readlines()
        #data = {"text": f.read()}

    batches = []
    counter = 0
    for i in range(len(lines)):
        if counter > 60 and (lines[i].startswith("http") or
                             lines[i].startswith("+") or
                             lines[i].startswith("0")):
            batches.append(i+1)
            counter = 0
        counter += 1

    if batches[-1] != len(lines):
        batches.append(len(lines))

    steps = Pipeline(
        ProcessUnstructured(
            derive_from="text",
            helper_prompt="""TXT""", #substitute TXT with your prompt. Be as descriptive as possible.
            model="gpt-4.1-2025-04-14",
            provider="openai"
        )
    )
    entity_resolution = MatchTypeConfig(ExactMatch())
    cur_pos = 0
    for b in batches:
        data = {"text": "\n".join(lines[cur_pos:b])}
        result = dlapi.ingest(
            data = [data], # supports multiple files
            inference_steps=steps,
            entity_resolution=entity_resolution,
            max_attempts=1,
            batch_size=0# default (no file batching)
        )

        logging.info(f"Ingestion result:"
                     f"\nSuccessfully ingested {len(result.successful)} dataset(s)."
                     f"\nFailed {len(result.failed)} dataset(s)."
                     f"\nCurrent line {b} out of {len(lines)}")

        cur_pos = b


if __name__ == '__main__':
    main()
