Source code for caplena_api_demo

# -*- coding: utf-8 -*-
"""Demo script illustrating how to do basic operations on the Caplena API

Example
-------
Steps to run:

1. Make sure you have a compatible (version >= 2.7) python environment
    $ python --version
2. Make sure you have installed the requests library (if not, install using `pip install requests`)
    $ pip install requests
3. Set the `CAPLENA_API_KEY` variable at the bottom of this script
4. Call the script
    $ python caplena_api_demo.py

Notes
-----
This script is not intended to be shared with third parties.
Every receiving party agrees to use it solely for own purposes
and purposes that are intended by the original author (Caplena GmbH).

Copyright 2020 Caplena GmbH, Zurich.
"""
import requests
import time
import json
import six

if six.PY2:
    from urllib import urlencode
else:
    from urllib.parse import urlencode

from src.utils import CaplenaObj, ComplexEncoder


[docs]class CaplenaAPI(object):
    """Class enabling interaction with (parts of) the Caplena.co API

    Example
    -------
    To call an API instantiate a CaplenaAPI object and then call its methods
        >>> api = CaplenaAPI('de', '$(API_KEY)')
        >>> api.listProjects()
        [{"name": "project 1",
         "questions": [{"name": "question A"}, ...]},
         "rows": [{"answers": [{...}, ...], "auxiliary_columns": [...]}]
        ]

    """
    valid_languages = ['en', 'de']

    def __init__(self, language, api_key):
        """
        API Class Initializer.

        Sets some basic attributes of the instance (e.g. base URL and content language)
        and initializes a session object which will be used for all subsequent API calls,
        as authentication is based on session cookies.

        Parameters
        ----------
        language : str
            Content-Language for API calls (mainly relevant for error messages), either "de" or "en"
        api_key: str
            API key to authenticate to the Caplena API, if you don't have a key, please contact support@caplena.com

        Returns
        -------

        """
        super(CaplenaAPI, self).__init__()
        self.api_key = api_key
        self.authenticated = False
        self.baseURI = "https://api.caplena.com/api"

        if language not in self.valid_languages:
            raise ValueError(
                "Invalid language '{}', accepted values are {{{}}}".format(
                    language, ",".join(self.valid_languages)
                )
            )
        else:
            self.language = language

        self.sess = requests.Session()

    def _getHeaders(self):
        """
        Internal function to generate global header for all API calls

        The following headers are returned:
        * `Content-Type`:       We always use json format for the data we send
        * `Accept`:             We always expect json back from the server
        * `Accept-Language`:    The language for API messages (and in some cases content), supported: {en|de}
        * `X-CSRFTOKEN`:        Cross-site-request-forgery token (security). Token which set by the server to
                                a cookie and  required to be sent as a header in all DB-modifying requests
                                (i.e. types POST, PATCH, DELETE, PUT)

        Parameters
        ----------

        Returns
        -------
        headers : dict
            Dictionary with keys being the header names and values the header values

        """
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json",
            "Accept-Language": self.language,
            "Authorization": "apikey {}".format(self.api_key),
            "Referer": self.baseURI
        }
        return headers

    def _handleBadResponse(self, response):
        """
        Internal function to handle unsuccessful requests

        Currently a dummy function which just raises an error with the reponse text.
        Can be adjusted to do a more fine-grained error handling.

        Parameters
        ----------
        response : requests::response object
            The response object which failed

        -------

        """
        raise Exception("ERROR (status code {}): {}".format(response.status_code, response.text))

    def _makeRequest(self, method, apiURI, data=None, publicmethod=False):
        """
        Internal function to make the API call.

        Currently a very thin wrapper around `requests` library.
        Only does two things
        * Get and set headers by calling `_getHeaders()`
        * Concatenate the base URI and the api URI

        Parameters
        ----------
        method : str
            HTTP request method which should be called (i.e. GET / POST / ...)
            Needs to be a method of requests, otherwise function will fail
        apiURI : str
            The URI of the API method to call (only last part, the base URI including domain are class attributes)
        data : dict|list
            Data to be sent to API as json. Dictionary of key/value pairs (not-serialized!)
            Can contain all kind of JSON-serializeable objects, i.e. (in python terms)
            string|float|int|long|list|dictionary|bool|none
        publicmethod : bool
            Flag indicating if authentication is required for this API method.
            Only set to True for public endpoints
            (optional)

        Returns
        -------
        response : requests::response obj
            Response object containing information about the servers response

        """

        if not publicmethod and (self.api_key is None):
            raise Exception("API key not provided. Provide valid API key when instantiating CaplenaAPI")
        return getattr(self.sess, method)(
            "{}{}".format(self.baseURI, apiURI),
            data=json.dumps(data, cls=ComplexEncoder) if data else None,
            headers=self._getHeaders()
        )

[docs]    def listProjects(self):
        """
        API method to list all projects that belong to this user.

        List all projects of the user.

        *Note:* The returned projects contain global meta information of the projects *and* their questions, but not the response texts.

        Parameters
        ----------

        Returns
        -------
        projects : list(:class:`.Project`)
            A list of all projects belonging to the user

        """
        r = self._makeRequest('get', '/projects/')

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return [Project.from_json(data) for data in r.json()]

[docs]    def listInheritableProjects(self):
        """
        API method to list all projects of which inheritance is possible.

        List contains all projects belonging to user, as well as Caplena provided models.

        *Note:* The returned projects only contain basic meta information on the project and their questions, but not the response texts. To get more detailed information about a certain project call the `listprojects` method.

        Parameters
        ----------

        Returns
        -------
        projects : list(:class:`.Project`)
            A list of all projects that can be used for inheritance. This is the concatenation of all projects owned by the user and global Caplena models.

        """
        r = self._makeRequest('get', '/projects-inheritable/')

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return [Project.from_json(data) for data in r.json()]

[docs]    def listQuestions(self):
        """
        API method to list all questions that belong to this user.

        List all questions of the user.

        *Note:* The returned questions only contain global meta information of the questions and not the response texts.

        Parameters
        ----------

        Returns
        -------
        questions: list(:class:`.Question`)
            A list of all questions belonging to the user if the call was successful, `False` otherwise

        """
        r = self._makeRequest('get', '/questions/')

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return [Question.from_json(data) for data in r.json()]

[docs]    def getQuestion(self, question_id):
        """
        API method to get question info.

        Get question by ID.

        *Note:* The returned questions only contain meta information of the question and not the response texts.

        Parameters
        ----------

        Returns
        -------
        question : Question
            A question object

        """
        r = self._makeRequest('get', '/questions/{}'.format(question_id))

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return Question.from_json(r.json())

[docs]    def getProject(self, project_id):
        """
        API method to get project info.

        Get project by ID.

        *Note:* The returned questions only contain meta information of the question and not the response texts.

        Parameters
        ----------

        Returns
        -------
        project : Project
            A project object

        """
        r = self._makeRequest('get', '/projects/{}'.format(project_id))

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return Project.from_json(r.json())

[docs]    def createProject(
        self,
        name,
        language,
        translate=False,
        auxiliary_column_names=[],
        questions=[],
        rows=[],
        translation_engine='GT',
        upload_async=True,
        request_training=True
    ):
        """
        API method to create a new project

        *Note:*
        * When creating a new project you can also create questions and rows belonging to it.
        * Creating new questions is _only_ possible when creating a new project. Questions cannot be added to an
        existing project.
        * Rows can also be added to a project at a later time

        Parameters
        ----------
        name : str, required
            Name of the new project
        language : str, required
            Language of the project, valid choices are {en|de}
            Has nothing to do with the language the API is set to (the attribute `language`.)
        translate : bool, optional
            Flag indicating whether to translate this project (where other language than `language` detected)
            using the Google API.
        auxiliary_column_names : list, optional
            List of strings, naming additional columns that will be sent with each row.
            Can also be an empty list.
            The number of elements in this list must match the number of elements
            in the `auxiliary_columns` field when adding rows.
        questions : list(:class:`.Question`)
            List of questions to create
        rows : list(:class:`.Row`)
            List of objects of type Row
        async : bool
            If true, send async request, required if uploading more than 20 rows at once or if uploading answers
            with `reviewed=True`
        request_training : bool
            If true, automatically request training after uploading answers
        translation_engine : str
            Choice of translation engine, either 'GT' for Google Translate or 'DL' for DeepL
        Returns
        -------
        project : Project
            A new Project object

        """
        proj = Project(
            name=name,
            language=language,
            translate=translate,
            auxiliary_column_names=auxiliary_column_names,
            questions=questions,
            rows=rows,
            translation_engine=translation_engine
        )
        get_params = {'request_training': request_training}
        if upload_async:
            get_params.update({'async': upload_async})
        get_params = '?' + urlencode(get_params)
        r = self._makeRequest('post', '/projects/{}'.format(get_params), proj.to_dict())

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return Project.from_json(r.json())

[docs]    def addRowsToProject(self, project_id, rows, upload_async=True, request_training=True):
        """
        API method to add rows to a previously created project.


        Parameters
        ----------
        project_id : int
            ID of the project to add the rows to
        rows : list(:class:`.Row`)
            List of objects of type Row
        async : bool
            If true, send async request, required if uploading more than 20 rows at once or if uploading answers
            with `reviewed=True`
        request_training : bool
            If true, automatically request training after uploading answers

        Returns
        -------
        rows: list(:class:`.Row`)
            A list of the newly created rows

        """
        get_params = {'request_training': request_training}
        if upload_async:
            get_params.update({'async': upload_async})
        get_params = '?' + urlencode(get_params)
        r = self._makeRequest(
            'post', '/projects/{}/rows{}'.format(project_id, get_params), [row.to_dict() for row in rows]
        )

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return [Row.from_json(dat) for dat in r.json()]

[docs]    def listRows(self, project_id):
        """
        API method to list all rows of a specific project.


        Parameters
        ----------
        project_id : int
            ID of the project of which to return the rows

        Returns
        -------
        answers : list(:class:`.Row`)
            A list of all rows belonging to the question

        """
        r = self._makeRequest('get', '/projects/{}/rows'.format(project_id))

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return [Row.from_json(dat) for dat in r.json()]

[docs]    def listAnswers(self, question_id, no_group=False):
        """
        API method to list all answers of a specific question.


        Parameters
        ----------
        question_id : int
            ID of the question of which to return the answers
        no_group : bool
            If true, no grouping will be applied to answers list,
            overriding the `group_identical` property of the question

        Returns
        -------
        answers : list(:class:`.Answer`)
            A list of all answers belonging to the question

        """
        get_params = '?no_group' if no_group else ''
        r = self._makeRequest('get', '/questions/{}/answers{}'.format(question_id, get_params))

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return [Answer.from_json(dat) for dat in r.json()]

[docs]    def requestPredictions(self, question_id, **kwargs):
        """
        API method to request the AI-assistant to train itself based on coded answers of specified question. Only works
        if at least 6 answers have been coded.


        Parameters
        ----------
        question_id : int
            ID of the question of which to request AI to make predictions

        Returns
        -------
        success : bool
            True if request successful, False otherwise

        """
        request_url = '/questions/{}/request-training'.format(question_id)
        if kwargs:
            parameters = '?' + urlencode(kwargs)
            request_url += parameters
        r = self._makeRequest('post', request_url)

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return True

[docs]    def getPredictions(self, question_id):
        """
        API method to get AI-coded codes and respective answers. Requires previous call to
        :func:`~caplena_api_demo.CaplenaAPI.requestPredictions`.


        Parameters
        ----------
        question_id : int
            ID of the question of which to return the code predictions made by AI

        Returns
        -------
        result : Predictions|None
            None if no predictions are available (response code 204)
            Otherwise contains keys `answers` (with itself has keys `id` and `codes`) which are the predictions, model (meta information on model performance)

        """
        r = self._makeRequest('get', '/questions/{}/codes-predicted'.format(question_id))

        if (r.status_code == 204):
            # No content is available, i.e. no predictions are ready for this answer
            return None
        elif (r.status_code == 200):
            return Predictions.from_json(r.json())
        else:
            return self._handleBadResponse(r)

[docs]    def deleteQuestion(self, question_id):
        """
        API method to delete question and its answers.


        Parameters
        ----------
        question_id : int
            ID of the question to delete

        Returns
        -------
        success : bool
            True if request successful, False otherwise

        """
        r = self._makeRequest('delete', '/questions/{}'.format(question_id))

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return True

[docs]    def deleteProject(self, project_id):
        """
        API method to delete projects, its questions and corresponding answers.


        Parameters
        ----------
        project_id : int
            ID of the project to delete

        Returns
        -------
        success : bool
            True if request successful, False otherwise

        """
        r = self._makeRequest('delete', '/projects/{}'.format(project_id))

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return True

[docs]    def updateQuestion(self, question, request_training=False):
        """
        API method to update question


        Parameters
        ----------
        question: question
            modified question instance

        Returns
        -------
        question: Question
            newly updated question instance

        """
        get_params = {'request_training': request_training}
        get_params = '?' + urlencode(get_params)
        r = self._makeRequest('patch', '/questions/{}{}'.format(question.id, get_params), question.to_dict())

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return Question.from_json(r.json())

[docs]    def updateAnswers(self, answers, question, request_training=False):
        """
        API method to update question


        Parameters
        ----------
        question: question
            modified question instance

        Returns
        -------
        question: Question
            newly updated question instance

        """
        get_params = {'request_training': request_training}
        get_params = '?' + urlencode(get_params)
        r = self._makeRequest(
            'patch', '/questions/{}/answers{}'.format(question.id, get_params),
            [ans.to_dict() for ans in answers]
        )

        if (not r.ok):
            return self._handleBadResponse(r)
        else:
            return [Answer.from_json(el) for el in r.json()]


[docs]class Code(CaplenaObj):
    """
    Code object

    Attributes
    ----------
    id: int, required
        Code ID
    label: str, required
        Code name
    category: str, required
        Code category
    """
    def __init__(self, id, label, category, **kwargs):
        """
        """
        self.id = id
        self.label = label
        self.category = category
        super(Code, self).__init__(**kwargs)

[docs]    @classmethod
    def from_json(cls, json_data):
        return cls(**json_data)


[docs]class Question(CaplenaObj):
    """
    Question object

    Attributes
    ----------
    name : str, required
        Name of the question.
    description : str, optional
        String describing this question
    group_identical : bool, optional
        Flag indicating whether to group identical answers in coding view and when listing answers.
        Default=true
    group_identical_exclude : str, optional
        All answer texts matching this regular expression won't be grouped. Default=''
    smart_sort: bool, optional
        If the smart sorting feature should be enabled. Default=true
    codebook : list(:class:`.Code`), optional
        List of codes (dictionaries), each containing the keys `id`, `label` and `category`
        Can also be an empty list.
    inherits_from : int, optional
        ID of another question of this user, that the model should be based on.
        The codebook of that question should be *identical* or *almost* identical
        in order for the AI to deliver good results.

    """
    def __init__(
        self,
        name,
        description='',
        codebook=[],
        group_identical=True,
        group_identical_exclude='',
        smart_sort=False,
        inherits_from=None,
        id=None,
        question_category='NO',
        **kwargs
    ):
        self.name = name
        self.description = description
        self.group_identical = group_identical
        self.group_identical_exclude = group_identical_exclude
        self.smart_sort = smart_sort
        self.codebook = codebook
        self.inherits_from = inherits_from
        self.id = id
        self.question_category = question_category
        super(Question, self).__init__(**kwargs)

[docs]    @classmethod
    def from_json(cls, json_data):
        return cls(**json_data)


[docs]class Answer(CaplenaObj):
    """
    Answer object

    Attributes
    ----------
    text : str, required
        Text of the answer.
    question : str, required
        The name of the question this answer belongs to
    reviewed : bool, optional
        Answers having the "reviewed" are assumed to have all codes correct
        and will be used to train the AI.
    codes : list(int), optional
        List of integers (code IDs). Assigning codes to an answer.
        Will be used to train the AI.
    source_language : str, optional
        ISO Code (2 characters, e.g. 'de' or 'en') specifying in which language the text is written.
        Relevant for translation, taking precedance over automatic language detection

    """
    def __init__(self, text, question, source_language='', reviewed=False, codes=[], id=None, **kwargs):
        self.id = id
        self.text = text
        self.question = question
        self.reviewed = reviewed
        self.codes = codes
        self.source_language = source_language
        super(Answer, self).__init__(**kwargs)

[docs]    @classmethod
    def from_json(cls, json_data):
        return cls(**json_data)


[docs]class Row(CaplenaObj):
    """
    Row object

    Attributes
    ----------
    auxiliary_columns : list(str), required
        Needs to have the same number of elemenst as the `auxiliary_column_names` field of the project
        it belongs to
    answers : list(:class:`.Answer`), required
        A list of answers, whereby exactly one answer needs to be provided for every question of the project
        it belongs to
    """
    def __init__(self, auxiliary_columns, answers, **kwargs):
        self.auxiliary_columns = auxiliary_columns
        self.answers = answers
        super(Row, self).__init__(**kwargs)

[docs]    @classmethod
    def from_json(cls, json_data):
        ans = json_data.pop('answers')
        answers = list(map(Answer.from_json, ans))
        row = Row(answers=answers, **json_data)
        json_data['answers'] = ans
        return row


[docs]class Project(CaplenaObj):
    """
    Project object

    Attributes
    ----------
    name : str, required
        Name of the project.
    language: str, required
        Language of question, must be iso tag
    questions: list(:class:`.Questions`), required
        Questions belonging to the project
    auxiliary_column_names: list(str), optional
        Names of the auxiliary columns
    translation_engine: str, optional
        Which translation engine to use, can be either `google` or `deepl
    translate: bool, optional
        If true translate answers using translation_engine
    inherits_from : int, optional
        ID of another question of this user, that the model should be based on.
        The codebook of that question should be *identical* or *almost* identical
        in order for the AI to deliver good results.

    """
    def __init__(
        self,
        name,
        language,
        questions,
        rows=[],
        auxiliary_column_names=[],
        translation_engine='google',
        translate=False,
        translated=0,
        id=None,
        **kwargs
    ):
        self.name = name
        self.language = language
        self.auxiliary_column_names = auxiliary_column_names
        if translated:
            self.translate = True if translated else False
        else:
            self.translate = translate
        self.questions = questions
        self.rows = rows
        self.translation_engine = translation_engine
        self.id = id
        super(Project, self).__init__(**kwargs)

[docs]    def to_dict(self):
        data = {
            "id": self.id,
            "name": self.name,
            "language": self.language,
            "auxiliary_column_names": self.auxiliary_column_names,
            "translated": 1 if self.translate else 0,
            "translation_engine": self.translation_engine,
            "questions": self.questions,
            "rows": self.rows
        }
        return data

[docs]    @classmethod
    def from_json(cls, json_data):
        questions = list(map(Question.from_json, json_data.pop('questions')))
        if 'rows' in json_data.keys():
            row_data = json_data.pop('rows')
            rows = list(map(Row.from_json, row_data))
            proj = Project(rows=rows, questions=questions, **json_data)
            json_data['rows'] = row_data
            return proj
        else:
            proj = Project(questions=questions, **json_data)
            return proj


[docs]class Predictions(CaplenaObj):
    """
    answers : list(:class:`.Answer`), required
        A list of answers, whereby exactly one answer needs to be provided for every question of the project
        it belongs to
    model : dict, required
        Meta information about the model
    """
    def __init__(self, answers, model, **kwargs):
        self.answers = answers
        self.model = model
        super(Predictions, self).__init__(**kwargs)

[docs]    @classmethod
    def from_json(cls, json_data):
        return Predictions(**json_data)


if __name__ == '__main__':
    """ The main function invoked when calling this script directly"""

    ###########################################################
    # WARNING
    # This is only for demo purposes
    # Never hard-code credentials in a production environment
    # Rather pass them via environment variables or other means
    # >>> password = os.environ["MY_CAPLENA_PASSWORD"]
    ###########################################################

    CAPLENA_API_KEY = '*******'

    # Instantiate new instance of CaplenaAPI class
    api = CaplenaAPI('en', CAPLENA_API_KEY)

    ###########################################################
    # LIST PROJECTS: Get all existing projects of this user
    ###########################################################
    existing_projects = api.listProjects()

    # Count how many projects we have
    print("There are {} existing projects".format(len(existing_projects)))

    ###########################################################
    # CREATE PROJECT: Create new project with two questions and two rows (=> 4 answers)
    ###########################################################
    n_questions = 2
    new_questions = [
        Question(
            name='My new question {}'.format(question_number),
            description='Some description of question {}'.format(question_number),
            codebook=[
                {
                    'id': 1,
                    'label': 'Code 1 of question {}'.format(question_number),
                    'category': 'CATEGORY 1'
                }, {
                    'id': 20,
                    'label': 'Code 2 of question {}'.format(question_number),
                    'category': 'CATEGORY 2'
                }
            ]
        ) for question_number in range(n_questions)
    ]

    new_rows = [
        Row(
            auxiliary_columns=['ID 1', 'Some other column value 1'],
            answers=[
                Answer(
                    text='Answer-text row 1 of question {}'.format(question_number),
                    question='My new question {}'.format(question_number)
                ) for question_number in range(n_questions)
            ]
        ),
        Row(
            auxiliary_columns=['ID 2', 'Some other column value 2'],
            answers=[
                Answer(
                    text='Answer-text row 2 of question {}'.format(question_number),
                    question='My new question {}'.format(question_number)
                ) for question_number in range(n_questions)
            ]
        ),
    ]

    new_project = api.createProject(
        "My new project",
        language="de",
        auxiliary_column_names=['ID', 'some other column'],
        translate=True,
        questions=new_questions,
        rows=new_rows,
        request_training=False
    )

    if new_project is not False:
        print("Created new project with id {}".format(new_project.id))

    question_id_1 = new_project.questions[0].id
    question_id_2 = new_project.questions[1].id

    ###########################################################
    # ADD ROWS: Add one more row to existing project

    # Note: When adding rows to an _existing_ project, the questions need to referenced by their ID
    # not their name
    further_rows = [
        Row(
            auxiliary_columns=['ID 3', 'Some other column value 3'],
            answers=[
                Answer(text='Answer-text row 3 of question {}'.format(question_number), question=question_id)
                for question_id, question_number in zip([question_id_1, question_id_2], range(n_questions))
            ]
        )
    ]

    further_rows_result = api.addRowsToProject(new_project.id, further_rows, request_training=False)
    if further_rows_result is not False:
        print("Added {} new row to project {}".format(len(further_rows), new_project.id))

    ###########################################################
    # LIST ROWS: Get all rows of a specific project
    ###########################################################
    rows = api.listRows(new_project.id)

    print("This is the first row: {}".format(rows[0]))

    ###########################################################
    # LIST ANSWERS: Get all answers of a specific question
    ###########################################################
    answers = api.listAnswers(question_id_2)

    print(
        "The first answer ('{}') of question {} has been assigned the codes: {}".format(
            answers[0].text, question_id_2, answers[0].codes
        )
    )

    ###########################################################
    # REQUEST PREDICTIONS: Instruct backend to make code predictions for question
    ###########################################################

    if api.requestPredictions(question_id_1):
        print("Training request made, results will soon be available")
    else:
        print("An error occurred when requesting training")

    ###########################################################
    # GET PREDICTIONS: Return the predictions made by the model
    ###########################################################

    # In a practical setting, there needs to be some time in between requesting the predictions
    # and getting them back. In most cases, they will be ready within ~200s, but to be sure a value
    # of around 600s is recommended
    time.sleep(600)

    predictions = api.getPredictions(question_id_1)
    if predictions is None:
        print("No predictions are ready for this question")
    elif 'answers' in predictions and len(predictions['answers']) > 0:
        print(
            "For answer {} the codes {} were predicted".format(
                predictions['answers'][0].id, predictions['answers'][0].codes
            )
        )