[{"classID":339,"classLabelLanguage":"en","classLabel":"String","parentClasses":[337],"ancestorClasses":[1],"classScopeNoteLanguage":"en","classScopeNote":"This class comprises the instances of E59 Primitive Values used for documentation such as free text strings, bitmaps, vector graphics, etc. E62 String is not further elaborated upon within the model","entityBasicType":0,"entityBasicTypeLabel":null,"classIdentifierInNamespace":"E62","classURI":"http://www.cidoc-crm.org/cidoc-crm/E62","namespaceURI":"http://www.cidoc-crm.org/cidoc-crm/","namespaceID":188,"namespacePrefix":"crm","namespaceLabelLanguage":"en","namespaceLabel":"CIDOC CRM version 7.1.3","profileAssociationType":"inferred","profileID":615,"profileLabelLanguage":"en","profileLabel":"Chunk - Token composition ongoing"},{"classID":1964,"classLabelLanguage":"en","classLabel":"Chunk","parentClasses":[32],"ancestorClasses":[1,27,64,65,66,67,70,81,82],"classScopeNoteLanguage":"en","classScopeNote":"This class comprises groups of words (tokens) that form a syntactically or semantically coherent unit within a sentence, typically below the sentence level. Instances of the class C16 Chunk capture intermediate linguistic structures sharing a common grammatical or semantic function, such as noun phrases, verb phrases, or adjectival phrases. They serve as building blocks between individual tokens and higher-level grammatical or semantic structures. They facilitate the identification and analysis of meaningful sub-sentential units that contribute to the overall syntactic organisation and interpretation of text.\r\nIn computational linguistics and natural language processing (NLP), instances of Chunk are often used to support shallow parsing and named entity recognition (NER) by identifying noun or verb phrase boundaries without requiring a full syntactic parse.\r\nInstances of the class C16 Chunk are not full syntactic constituents in a deep grammatical sense but rather practical linguistic groupings that aid in intermediate-level text analysis.","entityBasicType":8,"entityBasicTypeLabel":"Persistent Item","classIdentifierInNamespace":"C16","classURI":"https://sdhss.org/ontology/sources-information-metadata/C16","namespaceURI":"https://sdhss.org/ontology/sources-information-metadata/","namespaceID":74,"namespacePrefix":"sdh-info","namespaceLabelLanguage":"en","namespaceLabel":"SDHSS for Sources Information and Metadata ongoing","profileAssociationType":"selected","profileID":615,"profileLabelLanguage":"en","profileLabel":"Chunk - Token composition ongoing"},{"classID":2015,"classLabelLanguage":"en","classLabel":"Token","parentClasses":[32],"ancestorClasses":[1,27,64,65,66,67,70,81,82],"classScopeNoteLanguage":"en","classScopeNote":"Following the standard definition in Natural Language Processing, a token is the basic unit of text created by splitting a string into smaller segments, such as words, subwords, or characters. This process, known as tokenization, converts raw text into a structured format that algorithms can numerically encode and process. Tokens serve as the fundamental building blocks for models to understand syntax, semantics, and context within a given language.","entityBasicType":8,"entityBasicTypeLabel":"Persistent Item","classIdentifierInNamespace":"C24","classURI":"https://sdhss.org/ontology/sources-information-metadata/C24","namespaceURI":"https://sdhss.org/ontology/sources-information-metadata/","namespaceID":74,"namespacePrefix":"sdh-info","namespaceLabelLanguage":"en","namespaceLabel":"SDHSS for Sources Information and Metadata ongoing","profileAssociationType":"selected","profileID":615,"profileLabelLanguage":"en","profileLabel":"Chunk - Token composition ongoing"},{"classID":2016,"classLabelLanguage":"en","classLabel":"Chunk Type","parentClasses":[],"ancestorClasses":[],"classScopeNoteLanguage":"en","classScopeNote":"This class comprises concepts denoted by terms from thesauri and controlled vocabularies used to characterize and classify instances of the sdh-info:C16 Chunk class.","entityBasicType":0,"entityBasicTypeLabel":null,"classIdentifierInNamespace":"C25","classURI":"https://sdhss.org/ontology/sources-information-metadata/C25","namespaceURI":"https://sdhss.org/ontology/sources-information-metadata/","namespaceID":74,"namespacePrefix":"sdh-info","namespaceLabelLanguage":"en","namespaceLabel":"SDHSS for Sources Information and Metadata ongoing","profileAssociationType":"selected","profileID":615,"profileLabelLanguage":"en","profileLabel":"Chunk - Token composition ongoing"}]