# -*- coding: utf-8 -*-
"""Demo script illustrating how to do basic operations on the Caplena API
Example
-------
Steps to run:
1. Make sure you have a compatible (version >= 2.7) python environment
$ python --version
2. Make sure you have installed the requests library (if not, install using `pip install requests`)
$ pip install requests
3. Set the `CAPLENA_API_KEY` variable at the bottom of this script
4. Call the script
$ python caplena_api_demo.py
Notes
-----
This script is not intended to be shared with third parties.
Every receiving party agrees to use it solely for own purposes
and purposes that are intended by the original author (Caplena GmbH).
Copyright 2020 Caplena GmbH, Zurich.
"""
import requests
import time
import json
import six
if six.PY2:
from urllib import urlencode
else:
from urllib.parse import urlencode
from src.utils import CaplenaObj, ComplexEncoder
[docs]class CaplenaAPI(object):
"""Class enabling interaction with (parts of) the Caplena.co API
Example
-------
To call an API instantiate a CaplenaAPI object and then call its methods
>>> api = CaplenaAPI('de', '$(API_KEY)')
>>> api.listProjects()
[{"name": "project 1",
"questions": [{"name": "question A"}, ...]},
"rows": [{"answers": [{...}, ...], "auxiliary_columns": [...]}]
]
"""
valid_languages = ['en', 'de']
def __init__(self, language, api_key):
"""
API Class Initializer.
Sets some basic attributes of the instance (e.g. base URL and content language)
and initializes a session object which will be used for all subsequent API calls,
as authentication is based on session cookies.
Parameters
----------
language : str
Content-Language for API calls (mainly relevant for error messages), either "de" or "en"
api_key: str
API key to authenticate to the Caplena API, if you don't have a key, please contact support@caplena.com
Returns
-------
"""
super(CaplenaAPI, self).__init__()
self.api_key = api_key
self.authenticated = False
self.baseURI = "https://api.caplena.com/api"
if language not in self.valid_languages:
raise ValueError(
"Invalid language '{}', accepted values are {{{}}}".format(
language, ",".join(self.valid_languages)
)
)
else:
self.language = language
self.sess = requests.Session()
def _getHeaders(self):
"""
Internal function to generate global header for all API calls
The following headers are returned:
* `Content-Type`: We always use json format for the data we send
* `Accept`: We always expect json back from the server
* `Accept-Language`: The language for API messages (and in some cases content), supported: {en|de}
* `X-CSRFTOKEN`: Cross-site-request-forgery token (security). Token which set by the server to
a cookie and required to be sent as a header in all DB-modifying requests
(i.e. types POST, PATCH, DELETE, PUT)
Parameters
----------
Returns
-------
headers : dict
Dictionary with keys being the header names and values the header values
"""
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Accept-Language": self.language,
"Authorization": "apikey {}".format(self.api_key),
"Referer": self.baseURI
}
return headers
def _handleBadResponse(self, response):
"""
Internal function to handle unsuccessful requests
Currently a dummy function which just raises an error with the reponse text.
Can be adjusted to do a more fine-grained error handling.
Parameters
----------
response : requests::response object
The response object which failed
-------
"""
raise Exception("ERROR (status code {}): {}".format(response.status_code, response.text))
def _makeRequest(self, method, apiURI, data=None, publicmethod=False):
"""
Internal function to make the API call.
Currently a very thin wrapper around `requests` library.
Only does two things
* Get and set headers by calling `_getHeaders()`
* Concatenate the base URI and the api URI
Parameters
----------
method : str
HTTP request method which should be called (i.e. GET / POST / ...)
Needs to be a method of requests, otherwise function will fail
apiURI : str
The URI of the API method to call (only last part, the base URI including domain are class attributes)
data : dict|list
Data to be sent to API as json. Dictionary of key/value pairs (not-serialized!)
Can contain all kind of JSON-serializeable objects, i.e. (in python terms)
string|float|int|long|list|dictionary|bool|none
publicmethod : bool
Flag indicating if authentication is required for this API method.
Only set to True for public endpoints
(optional)
Returns
-------
response : requests::response obj
Response object containing information about the servers response
"""
if not publicmethod and (self.api_key is None):
raise Exception("API key not provided. Provide valid API key when instantiating CaplenaAPI")
return getattr(self.sess, method)(
"{}{}".format(self.baseURI, apiURI),
data=json.dumps(data, cls=ComplexEncoder) if data else None,
headers=self._getHeaders()
)
[docs] def listProjects(self):
"""
API method to list all projects that belong to this user.
List all projects of the user.
*Note:* The returned projects contain global meta information of the projects *and* their questions, but not the response texts.
Parameters
----------
Returns
-------
projects : list(:class:`.Project`)
A list of all projects belonging to the user
"""
r = self._makeRequest('get', '/projects/')
if (not r.ok):
return self._handleBadResponse(r)
else:
return [Project.from_json(data) for data in r.json()]
[docs] def listInheritableProjects(self):
"""
API method to list all projects of which inheritance is possible.
List contains all projects belonging to user, as well as Caplena provided models.
*Note:* The returned projects only contain basic meta information on the project and their questions, but not the response texts. To get more detailed information about a certain project call the `listprojects` method.
Parameters
----------
Returns
-------
projects : list(:class:`.Project`)
A list of all projects that can be used for inheritance. This is the concatenation of all projects owned by the user and global Caplena models.
"""
r = self._makeRequest('get', '/projects-inheritable/')
if (not r.ok):
return self._handleBadResponse(r)
else:
return [Project.from_json(data) for data in r.json()]
[docs] def listQuestions(self):
"""
API method to list all questions that belong to this user.
List all questions of the user.
*Note:* The returned questions only contain global meta information of the questions and not the response texts.
Parameters
----------
Returns
-------
questions: list(:class:`.Question`)
A list of all questions belonging to the user if the call was successful, `False` otherwise
"""
r = self._makeRequest('get', '/questions/')
if (not r.ok):
return self._handleBadResponse(r)
else:
return [Question.from_json(data) for data in r.json()]
[docs] def getQuestion(self, question_id):
"""
API method to get question info.
Get question by ID.
*Note:* The returned questions only contain meta information of the question and not the response texts.
Parameters
----------
Returns
-------
question : Question
A question object
"""
r = self._makeRequest('get', '/questions/{}'.format(question_id))
if (not r.ok):
return self._handleBadResponse(r)
else:
return Question.from_json(r.json())
[docs] def getProject(self, project_id):
"""
API method to get project info.
Get project by ID.
*Note:* The returned questions only contain meta information of the question and not the response texts.
Parameters
----------
Returns
-------
project : Project
A project object
"""
r = self._makeRequest('get', '/projects/{}'.format(project_id))
if (not r.ok):
return self._handleBadResponse(r)
else:
return Project.from_json(r.json())
[docs] def createProject(
self,
name,
language,
translate=False,
auxiliary_column_names=[],
questions=[],
rows=[],
translation_engine='GT',
upload_async=True,
request_training=True
):
"""
API method to create a new project
*Note:*
* When creating a new project you can also create questions and rows belonging to it.
* Creating new questions is _only_ possible when creating a new project. Questions cannot be added to an
existing project.
* Rows can also be added to a project at a later time
Parameters
----------
name : str, required
Name of the new project
language : str, required
Language of the project, valid choices are {en|de}
Has nothing to do with the language the API is set to (the attribute `language`.)
translate : bool, optional
Flag indicating whether to translate this project (where other language than `language` detected)
using the Google API.
auxiliary_column_names : list, optional
List of strings, naming additional columns that will be sent with each row.
Can also be an empty list.
The number of elements in this list must match the number of elements
in the `auxiliary_columns` field when adding rows.
questions : list(:class:`.Question`)
List of questions to create
rows : list(:class:`.Row`)
List of objects of type Row
async : bool
If true, send async request, required if uploading more than 20 rows at once or if uploading answers
with `reviewed=True`
request_training : bool
If true, automatically request training after uploading answers
translation_engine : str
Choice of translation engine, either 'GT' for Google Translate or 'DL' for DeepL
Returns
-------
project : Project
A new Project object
"""
proj = Project(
name=name,
language=language,
translate=translate,
auxiliary_column_names=auxiliary_column_names,
questions=questions,
rows=rows,
translation_engine=translation_engine
)
get_params = {'request_training': request_training}
if upload_async:
get_params.update({'async': upload_async})
get_params = '?' + urlencode(get_params)
r = self._makeRequest('post', '/projects/{}'.format(get_params), proj.to_dict())
if (not r.ok):
return self._handleBadResponse(r)
else:
return Project.from_json(r.json())
[docs] def addRowsToProject(self, project_id, rows, upload_async=True, request_training=True):
"""
API method to add rows to a previously created project.
Parameters
----------
project_id : int
ID of the project to add the rows to
rows : list(:class:`.Row`)
List of objects of type Row
async : bool
If true, send async request, required if uploading more than 20 rows at once or if uploading answers
with `reviewed=True`
request_training : bool
If true, automatically request training after uploading answers
Returns
-------
rows: list(:class:`.Row`)
A list of the newly created rows
"""
get_params = {'request_training': request_training}
if upload_async:
get_params.update({'async': upload_async})
get_params = '?' + urlencode(get_params)
r = self._makeRequest(
'post', '/projects/{}/rows{}'.format(project_id, get_params), [row.to_dict() for row in rows]
)
if (not r.ok):
return self._handleBadResponse(r)
else:
return [Row.from_json(dat) for dat in r.json()]
[docs] def listRows(self, project_id):
"""
API method to list all rows of a specific project.
Parameters
----------
project_id : int
ID of the project of which to return the rows
Returns
-------
answers : list(:class:`.Row`)
A list of all rows belonging to the question
"""
r = self._makeRequest('get', '/projects/{}/rows'.format(project_id))
if (not r.ok):
return self._handleBadResponse(r)
else:
return [Row.from_json(dat) for dat in r.json()]
[docs] def listAnswers(self, question_id, no_group=False):
"""
API method to list all answers of a specific question.
Parameters
----------
question_id : int
ID of the question of which to return the answers
no_group : bool
If true, no grouping will be applied to answers list,
overriding the `group_identical` property of the question
Returns
-------
answers : list(:class:`.Answer`)
A list of all answers belonging to the question
"""
get_params = '?no_group' if no_group else ''
r = self._makeRequest('get', '/questions/{}/answers{}'.format(question_id, get_params))
if (not r.ok):
return self._handleBadResponse(r)
else:
return [Answer.from_json(dat) for dat in r.json()]
[docs] def requestPredictions(self, question_id, **kwargs):
"""
API method to request the AI-assistant to train itself based on coded answers of specified question. Only works
if at least 6 answers have been coded.
Parameters
----------
question_id : int
ID of the question of which to request AI to make predictions
Returns
-------
success : bool
True if request successful, False otherwise
"""
request_url = '/questions/{}/request-training'.format(question_id)
if kwargs:
parameters = '?' + urlencode(kwargs)
request_url += parameters
r = self._makeRequest('post', request_url)
if (not r.ok):
return self._handleBadResponse(r)
else:
return True
[docs] def getPredictions(self, question_id):
"""
API method to get AI-coded codes and respective answers. Requires previous call to
:func:`~caplena_api_demo.CaplenaAPI.requestPredictions`.
Parameters
----------
question_id : int
ID of the question of which to return the code predictions made by AI
Returns
-------
result : Predictions|None
None if no predictions are available (response code 204)
Otherwise contains keys `answers` (with itself has keys `id` and `codes`) which are the predictions, model (meta information on model performance)
"""
r = self._makeRequest('get', '/questions/{}/codes-predicted'.format(question_id))
if (r.status_code == 204):
# No content is available, i.e. no predictions are ready for this answer
return None
elif (r.status_code == 200):
return Predictions.from_json(r.json())
else:
return self._handleBadResponse(r)
[docs] def deleteQuestion(self, question_id):
"""
API method to delete question and its answers.
Parameters
----------
question_id : int
ID of the question to delete
Returns
-------
success : bool
True if request successful, False otherwise
"""
r = self._makeRequest('delete', '/questions/{}'.format(question_id))
if (not r.ok):
return self._handleBadResponse(r)
else:
return True
[docs] def deleteProject(self, project_id):
"""
API method to delete projects, its questions and corresponding answers.
Parameters
----------
project_id : int
ID of the project to delete
Returns
-------
success : bool
True if request successful, False otherwise
"""
r = self._makeRequest('delete', '/projects/{}'.format(project_id))
if (not r.ok):
return self._handleBadResponse(r)
else:
return True
[docs] def updateQuestion(self, question, request_training=False):
"""
API method to update question
Parameters
----------
question: question
modified question instance
Returns
-------
question: Question
newly updated question instance
"""
get_params = {'request_training': request_training}
get_params = '?' + urlencode(get_params)
r = self._makeRequest('patch', '/questions/{}{}'.format(question.id, get_params), question.to_dict())
if (not r.ok):
return self._handleBadResponse(r)
else:
return Question.from_json(r.json())
[docs] def updateAnswers(self, answers, question, request_training=False):
"""
API method to update question
Parameters
----------
question: question
modified question instance
Returns
-------
question: Question
newly updated question instance
"""
get_params = {'request_training': request_training}
get_params = '?' + urlencode(get_params)
r = self._makeRequest(
'patch', '/questions/{}/answers{}'.format(question.id, get_params),
[ans.to_dict() for ans in answers]
)
if (not r.ok):
return self._handleBadResponse(r)
else:
return [Answer.from_json(el) for el in r.json()]
[docs]class Code(CaplenaObj):
"""
Code object
Attributes
----------
id: int, required
Code ID
label: str, required
Code name
category: str, required
Code category
"""
def __init__(self, id, label, category, **kwargs):
"""
"""
self.id = id
self.label = label
self.category = category
super(Code, self).__init__(**kwargs)
[docs] @classmethod
def from_json(cls, json_data):
return cls(**json_data)
[docs]class Question(CaplenaObj):
"""
Question object
Attributes
----------
name : str, required
Name of the question.
description : str, optional
String describing this question
group_identical : bool, optional
Flag indicating whether to group identical answers in coding view and when listing answers.
Default=true
group_identical_exclude : str, optional
All answer texts matching this regular expression won't be grouped. Default=''
smart_sort: bool, optional
If the smart sorting feature should be enabled. Default=true
codebook : list(:class:`.Code`), optional
List of codes (dictionaries), each containing the keys `id`, `label` and `category`
Can also be an empty list.
inherits_from : int, optional
ID of another question of this user, that the model should be based on.
The codebook of that question should be *identical* or *almost* identical
in order for the AI to deliver good results.
"""
def __init__(
self,
name,
description='',
codebook=[],
group_identical=True,
group_identical_exclude='',
smart_sort=False,
inherits_from=None,
id=None,
question_category='NO',
**kwargs
):
self.name = name
self.description = description
self.group_identical = group_identical
self.group_identical_exclude = group_identical_exclude
self.smart_sort = smart_sort
self.codebook = codebook
self.inherits_from = inherits_from
self.id = id
self.question_category = question_category
super(Question, self).__init__(**kwargs)
[docs] @classmethod
def from_json(cls, json_data):
return cls(**json_data)
[docs]class Answer(CaplenaObj):
"""
Answer object
Attributes
----------
text : str, required
Text of the answer.
question : str, required
The name of the question this answer belongs to
reviewed : bool, optional
Answers having the "reviewed" are assumed to have all codes correct
and will be used to train the AI.
codes : list(int), optional
List of integers (code IDs). Assigning codes to an answer.
Will be used to train the AI.
source_language : str, optional
ISO Code (2 characters, e.g. 'de' or 'en') specifying in which language the text is written.
Relevant for translation, taking precedance over automatic language detection
"""
def __init__(self, text, question, source_language='', reviewed=False, codes=[], id=None, **kwargs):
self.id = id
self.text = text
self.question = question
self.reviewed = reviewed
self.codes = codes
self.source_language = source_language
super(Answer, self).__init__(**kwargs)
[docs] @classmethod
def from_json(cls, json_data):
return cls(**json_data)
[docs]class Row(CaplenaObj):
"""
Row object
Attributes
----------
auxiliary_columns : list(str), required
Needs to have the same number of elemenst as the `auxiliary_column_names` field of the project
it belongs to
answers : list(:class:`.Answer`), required
A list of answers, whereby exactly one answer needs to be provided for every question of the project
it belongs to
"""
def __init__(self, auxiliary_columns, answers, **kwargs):
self.auxiliary_columns = auxiliary_columns
self.answers = answers
super(Row, self).__init__(**kwargs)
[docs] @classmethod
def from_json(cls, json_data):
ans = json_data.pop('answers')
answers = list(map(Answer.from_json, ans))
row = Row(answers=answers, **json_data)
json_data['answers'] = ans
return row
[docs]class Project(CaplenaObj):
"""
Project object
Attributes
----------
name : str, required
Name of the project.
language: str, required
Language of question, must be iso tag
questions: list(:class:`.Questions`), required
Questions belonging to the project
auxiliary_column_names: list(str), optional
Names of the auxiliary columns
translation_engine: str, optional
Which translation engine to use, can be either `google` or `deepl
translate: bool, optional
If true translate answers using translation_engine
inherits_from : int, optional
ID of another question of this user, that the model should be based on.
The codebook of that question should be *identical* or *almost* identical
in order for the AI to deliver good results.
"""
def __init__(
self,
name,
language,
questions,
rows=[],
auxiliary_column_names=[],
translation_engine='google',
translate=False,
translated=0,
id=None,
**kwargs
):
self.name = name
self.language = language
self.auxiliary_column_names = auxiliary_column_names
if translated:
self.translate = True if translated else False
else:
self.translate = translate
self.questions = questions
self.rows = rows
self.translation_engine = translation_engine
self.id = id
super(Project, self).__init__(**kwargs)
[docs] def to_dict(self):
data = {
"id": self.id,
"name": self.name,
"language": self.language,
"auxiliary_column_names": self.auxiliary_column_names,
"translated": 1 if self.translate else 0,
"translation_engine": self.translation_engine,
"questions": self.questions,
"rows": self.rows
}
return data
[docs] @classmethod
def from_json(cls, json_data):
questions = list(map(Question.from_json, json_data.pop('questions')))
if 'rows' in json_data.keys():
row_data = json_data.pop('rows')
rows = list(map(Row.from_json, row_data))
proj = Project(rows=rows, questions=questions, **json_data)
json_data['rows'] = row_data
return proj
else:
proj = Project(questions=questions, **json_data)
return proj
[docs]class Predictions(CaplenaObj):
"""
answers : list(:class:`.Answer`), required
A list of answers, whereby exactly one answer needs to be provided for every question of the project
it belongs to
model : dict, required
Meta information about the model
"""
def __init__(self, answers, model, **kwargs):
self.answers = answers
self.model = model
super(Predictions, self).__init__(**kwargs)
[docs] @classmethod
def from_json(cls, json_data):
return Predictions(**json_data)
if __name__ == '__main__':
""" The main function invoked when calling this script directly"""
###########################################################
# WARNING
# This is only for demo purposes
# Never hard-code credentials in a production environment
# Rather pass them via environment variables or other means
# >>> password = os.environ["MY_CAPLENA_PASSWORD"]
###########################################################
CAPLENA_API_KEY = '*******'
# Instantiate new instance of CaplenaAPI class
api = CaplenaAPI('en', CAPLENA_API_KEY)
###########################################################
# LIST PROJECTS: Get all existing projects of this user
###########################################################
existing_projects = api.listProjects()
# Count how many projects we have
print("There are {} existing projects".format(len(existing_projects)))
###########################################################
# CREATE PROJECT: Create new project with two questions and two rows (=> 4 answers)
###########################################################
n_questions = 2
new_questions = [
Question(
name='My new question {}'.format(question_number),
description='Some description of question {}'.format(question_number),
codebook=[
{
'id': 1,
'label': 'Code 1 of question {}'.format(question_number),
'category': 'CATEGORY 1'
}, {
'id': 20,
'label': 'Code 2 of question {}'.format(question_number),
'category': 'CATEGORY 2'
}
]
) for question_number in range(n_questions)
]
new_rows = [
Row(
auxiliary_columns=['ID 1', 'Some other column value 1'],
answers=[
Answer(
text='Answer-text row 1 of question {}'.format(question_number),
question='My new question {}'.format(question_number)
) for question_number in range(n_questions)
]
),
Row(
auxiliary_columns=['ID 2', 'Some other column value 2'],
answers=[
Answer(
text='Answer-text row 2 of question {}'.format(question_number),
question='My new question {}'.format(question_number)
) for question_number in range(n_questions)
]
),
]
new_project = api.createProject(
"My new project",
language="de",
auxiliary_column_names=['ID', 'some other column'],
translate=True,
questions=new_questions,
rows=new_rows,
request_training=False
)
if new_project is not False:
print("Created new project with id {}".format(new_project.id))
question_id_1 = new_project.questions[0].id
question_id_2 = new_project.questions[1].id
###########################################################
# ADD ROWS: Add one more row to existing project
# Note: When adding rows to an _existing_ project, the questions need to referenced by their ID
# not their name
further_rows = [
Row(
auxiliary_columns=['ID 3', 'Some other column value 3'],
answers=[
Answer(text='Answer-text row 3 of question {}'.format(question_number), question=question_id)
for question_id, question_number in zip([question_id_1, question_id_2], range(n_questions))
]
)
]
further_rows_result = api.addRowsToProject(new_project.id, further_rows, request_training=False)
if further_rows_result is not False:
print("Added {} new row to project {}".format(len(further_rows), new_project.id))
###########################################################
# LIST ROWS: Get all rows of a specific project
###########################################################
rows = api.listRows(new_project.id)
print("This is the first row: {}".format(rows[0]))
###########################################################
# LIST ANSWERS: Get all answers of a specific question
###########################################################
answers = api.listAnswers(question_id_2)
print(
"The first answer ('{}') of question {} has been assigned the codes: {}".format(
answers[0].text, question_id_2, answers[0].codes
)
)
###########################################################
# REQUEST PREDICTIONS: Instruct backend to make code predictions for question
###########################################################
if api.requestPredictions(question_id_1):
print("Training request made, results will soon be available")
else:
print("An error occurred when requesting training")
###########################################################
# GET PREDICTIONS: Return the predictions made by the model
###########################################################
# In a practical setting, there needs to be some time in between requesting the predictions
# and getting them back. In most cases, they will be ready within ~200s, but to be sure a value
# of around 600s is recommended
time.sleep(600)
predictions = api.getPredictions(question_id_1)
if predictions is None:
print("No predictions are ready for this question")
elif 'answers' in predictions and len(predictions['answers']) > 0:
print(
"For answer {} the codes {} were predicted".format(
predictions['answers'][0].id, predictions['answers'][0].codes
)
)