cleverdoc.models.ner package#

Submodules#

cleverdoc.models.ner.Ner module#

class cleverdoc.models.ner.Ner.Ner(*args, **kwargs)#

Bases: Transformer, HasInputCol, HasOutputCol, HasKeepInput, HasWhiteList, DefaultParamsReadable, DefaultParamsWritable, HasNumPartitions, LicenseValidator

static aggregate_ner_results(text, pipeline, max_length=500, stride=256)#
batchSize = Param(parent='undefined', name='batchSize', doc='batchSize.')#
device = Param(parent='undefined', name='device', doc='Device.')#
getDevice()#

Sets the value of device.

getModel()#

Sets the value of model.

getThreshold()#

Gets the value of threshold or its default value.

get_pipeline()#
model = Param(parent='undefined', name='model', doc='Model name.')#
setBatchSize(value)#

Sets the value of batchSize.

setDevice(value)#

Sets the value of device.

setModel(value)#

Sets the value of model.

setThreshold(value)#

Sets the value of threshold.

static split_text(text, max_length=500, stride=256)#
threshold = Param(parent='undefined', name='threshold', doc='Device.')#
transform_local(text)#
transform_udf(image)#
static transform_udf_pandas(texts: DataFrame, params: Series) DataFrame#
cleverdoc.models.ner.Ner.log_info(msg)#

cleverdoc.models.ner.NerLLM module#

class cleverdoc.models.ner.NerLLM.NerLLM#

Bases: Transformer, HasInputCol, HasOutputCol, HasKeepInput, HasWhiteList, DefaultParamsReadable, DefaultParamsWritable

getModel()#

Sets the value of model.

model = Param(parent='undefined', name='model', doc='Model name.')#
setModel(value)#

Sets the value of model.

transform_local(text)#
transform_udf(image)#

cleverdoc.models.ner.NerMerger module#

class cleverdoc.models.ner.NerMerger.NerMerger#

Bases: Transformer, DefaultParamsReadable, DefaultParamsWritable, HasInputCols, HasOutputCol

cleverdoc.models.ner.NerRuleBased module#

class cleverdoc.models.ner.NerRuleBased.NerRuleBased(*args, **kwargs)#

Bases: Transformer, HasInputCol, HasOutputCol, HasKeepInput, HasWhiteList, DefaultParamsReadable, DefaultParamsWritable, HasNumPartitions, LicenseValidator

getDevice()#

Sets the value of device.

getModel()#

Sets the value of model.

getThreshold()#

Gets the value of threshold or its default value.

setBatchSize(value)#

Sets the value of batchSize.

setDevice(value)#

Sets the value of device.

setModel(value)#

Sets the value of model.

setThreshold(value)#

Sets the value of threshold.

threshold = Param(parent='undefined', name='threshold', doc='Device.')#
static transform_udf_pandas(texts: DataFrame, params: Series) DataFrame#
cleverdoc.models.ner.NerRuleBased.log_info(msg)#

cleverdoc.models.ner.StringToKeyValue module#

class cleverdoc.models.ner.StringToKeyValue.StringToKeyValue#

Bases: Transformer, HasInputCol, HasOutputCol, HasKeepInput, HasWhiteList, DefaultParamsReadable, DefaultParamsWritable

getModel()#

Sets the value of model.

model = Param(parent='undefined', name='model', doc='Model name.')#
setModel(value)#

Sets the value of model.

transform_local(text)#
transform_udf(image)#

Module contents#