@conference {2651, title = {Adaptive Item and Feedback Selection in Personalized Learning with a Network Approach}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Personalized learning is a term used to describe educational systems that adapt student-specific curriculum sequencing, pacing, and presentation based on their unique backgrounds, knowledge, preferences, interests, and learning goals. (Chen, 2008; Netcoh, 2016). The technological approach to personalized learning provides data-driven models to incorporate these adaptations automatically. Examples of applications include online learning systems, educational games, and revision-aid systems. In this study we introduce Bayesian networks as a methodology to implement an adaptive framework within a personalized learning environment. Existing ideas from Computerized Adaptive Testing (CAT) with Item Response Theory (IRT), where choices about content provision are based on maximizing information, are related to the goals of personalized learning environments. Personalized learning entails other goals besides efficient ability estimation by maximizing information, such as an adaptive configuration of preferences and feedback to the student. These considerations will be discussed and their application in networks will be illustrated.

Adaptivity in Personalized Learning.In standard CAT\’s there is a focus on selecting items that provide maximum information about the ability of an individual at a certain point in time (Van der Linden \& Glas, 2000). When learning is the main goal of testing, alternative adaptive item selection methods were explored by Eggen (2012). The adaptive choices made in personalized learning applications require additional adaptivity with respect to the following aspects; the moment of feedback, the kind of feedback, and the possibility for students to actively influence the learning process.

Bayesian Networks and Personalized Learning.Personalized learning aims at constructing a framework to incorporate all the aspects mentioned above. Therefore, the goal of this framework is not only to focus on retrieving ability estimates by choosing items on maximum information, but also to construct a framework that allows for these other factors to play a role. Plajner and Vomlel (2016) have already applied Bayesian Networks to adaptive testing, selecting items with help of entropy reduction. Almond et al. (2015) provide a reference work on Bayesian Networks in Educational Assessment. Both acknowledge the potential of the method in terms of features such as modularity options to build finer-grained models. IRT does not allow to model sub-skills very easily and to gather information on fine-grained level, due to its dependency on the assumption of generally one underlying trait. The local independence assumption in IRT implies being interested in mainly the student\’s overall ability on the subject of interest. When the goal is to improve student\’s learning, we are not just interested in efficiently coming to their test score on a global subject. One wants a model that is able to map\ educational problems and talents in detail over the whole educational program, while allowing for dependency between items. The moment in time can influence topics to be better mastered than others, and this is exactly what we can to get out of a model. The possibility to model flexible structures, estimate abilities on a very detailed level for sub-skills and to easily incorporate other variables such as feedback in Bayesian Networks makes it a very promising method for making adaptive choices in personalized learning. It is shown in this research how item and feedback selection can be performed with help of the promising Bayesian Networks. A student involvement possibility is also introduced and evaluated.

References

Almond, R. G., Mislevy, R. J., Steinberg, L. S., Yan, D., \& Williamson, D. M. (2015). Bayesian Networks in Educational Assessment. Test. New York: Springer Science+Business Media. http://doi.org/10.1007/978-0-387-98138-3

Eggen, T.J.H.M. (2012) Computerized Adaptive Testing Item Selection in Computerized Adaptive Learning Systems. In: Eggen. TJHM \& Veldkamp, BP.. (Eds). Psychometrics in Practice at RCEC. Enschede: RCEC

Netcoh, S. (2016, March). \“What Do You Mean by \‘Personalized Learning?\’. Croscutting Conversations in Education \– Research, Reflections \& Practice. Blogpost.

Plajner, M., \& Vomlel, J. (2016). Student Skill Models in Adaptive Testing. In Proceedings of the Eighth International Conference on Probabilistic Graphical Models (pp. 403-414).

Van der Linden, W. J., \& Glas, C. A. (2000). Computerized adaptive testing: Theory and practice. Dordrecht: Kluwer Academic Publishers.

Session Video

}, keywords = {feedback selection, item selection, network approach, personalized learning}, author = {Nikky van Buuren and Hendrik Straat and Theo Eggen and Jean-Paul Fox} } @conference {2655, title = {Computerized Adaptive Testing for Cognitive Diagnosis in Classroom: A Nonparametric Approach}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

In the past decade, CDMs of educational test performance have received increasing attention among educational researchers (for details, see Fu \& Li, 2007, and Rupp, Templin, \& Henson, 2010). CDMs of educational test performance decompose the ability domain of a given test into specific skills, called attributes, each of which an examinee may or may not have mastered. The resulting attribute profile documents the individual\’s strengths and weaknesses within the ability domain. The Cognitive Diagnostic Computerized Adaptive Testing (CD-CAT) has been suggested by researchers as a diagnostic tool for assessment and evaluation (e.g., Cheng \& Chang, 2007; Cheng, 2009; Liu, You, Wang, Ding, \& Chang, 2013; Tatsuoka \& Tatsuoka, 1997). While model-based CD-CAT is relatively well-researched in the context of large-scale assessments, this type of system has not received the same degree of development in small-scale settings, where it would be most useful. The main challenge is that the statistical estimation techniques successfully applied to the parametric CD-CAT require large samples to guarantee the reliable calibration of item parameters and accurate estimation of examinees\’ attribute profiles. In response to the challenge, a nonparametric approach that does not require any parameter calibration, and thus can be used in small educational programs, is proposed. The proposed nonparametric CD-CAT relies on the same principle as the regular CAT algorithm, but uses the nonparametric classification method (Chiu \& Douglas, 2013) to assess and update the student\’s ability state while the test proceeds. Based on a student\’s initial responses, 2 a neighborhood of candidate proficiency classes is identified, and items not characteristic of the chosen proficiency classes are precluded from being chosen next. The response to the next item then allows for an update of the skill profile, and the set of possible proficiency classes is further narrowed. In this manner, the nonparametric CD-CAT cycles through item administration and update stages until the most likely proficiency class has been pinpointed. The simulation results show that the proposed method outperformed the compared parametric CD-CAT algorithms and the differences were significant when the item parameter calibration was not optimal.

References

Cheng, Y. (2009). When cognitive diagnosis meets computerized adaptive testing: CD-CAT. Psychometrika, 74, 619-632.

Cheng, Y., \& Chang, H. (2007). The modified maximum global discrimination index method for cognitive diagnostic CAT. In D. Weiss (Ed.) Proceedings of the 2007 GMAC Computerized Adaptive Testing Conference.

Chiu, C.-Y., \& Douglas, J. A. (2013). A nonparametric approach to cognitive diagnosis by proximity to ideal response patterns. Journal of Classification, 30, 225-250.

Fu, J., \& Li, Y. (2007). An integrative review of cognitively diagnostic psychometric models. Paper presented at the Annual Meeting of the National Council on Measurement in Education. Chicago, Illinois.

Liu, H., You, X., Wang, W., Ding, S., \& Chang, H. (2013). The development of computerized adaptive testing with cognitive diagnosis for an English achievement test in China. Journal of Classification, 30, 152-172.

Rupp, A. A., \& Templin, J. L., \& Henson, R. A. (2010). Diagnostic Measurement. Theory, Methods, and Applications. New York: Guilford.

Tatsuoka, K.K., \& Tatsuoka, M.M. (1997), Computerized cognitive diagnostic adaptive testing: Effect on remedial instruction as empirical validation. Journal of Educational Measurement, 34, 3\–20.

Session Video

}, keywords = {CD-CAT, non-parametric approach}, author = {Yuan-Pei Chang and Chia-Yi Chiu and Rung-Ching Tsai} } @conference {2642, title = {The Implementation of Nationwide High Stakes Computerized (adaptive) Testing in the Netherlands}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

In this presentation the challenges of implementation of (adaptive) digital testing in the Facet system in the Netherlands is discussed. In the Netherlands there is a long tradition of implementing adaptive testing in educational settings. Already since the late nineties of the last century adaptive testing was used mostly in low stakes testing. Several CATs were implemented in student monitoring systems for primary education and in the general subjects language and arithmetic in vocational education. The only nationwide implemented high stakes CAT is the WISCAT-pabo: an arithmetic test for students in the first year of primary school teacher colleges. The psychometric advantages of item based adaptive testing are obvious. For example efficiency and high measurement precision. But there are also some disadvantages such as the impossibility of reviewing items during and after the test. During the test the student is not in control of his own test; e.q . he can only navigate forward to the next item. This is one of the reasons other methods of testing, such as multistage-testing, with adaptivity not on the item level but on subtest level, has become more popular to use in high stakes testing.

A main challenge of computerized (adaptive) testing is the implementation of the item bank and the test workflow in a digital system. Since 2014 a nationwide new digital system (Facet) was introduced in the Netherlands, with connections to the digital systems of different parties based on international standards (LTI and QTI). The first nationwide tests in the Facet-system were flexible exams Dutch and arithmetic for vocational (and secondary) education, taken as item response theory-based equated linear multiple forms tests, which are administered during 5 periods in a year. Nowadays there are some implementations of different methods of (multistage) adaptive testing in the same Facet system (DTT en Acet).

In this conference, other presenters of Cito will elaborate on the psychometric characteristics of this other adaptive testing methods. In this contribution, the system architecture and interoperability of the Facet system will be explained. The emphasis is on the implementation and the problems to be solved by using this digital system in all phases of the (adaptive) testing process: item banking, test construction, designing, publication, test taking, analyzing and reporting to the student. An evaluation of the use of the system will be presented.

Session Video

}, keywords = {High stakes CAT, Netherlands, WISCAT}, url = {https://drive.google.com/open?id=1Kn1PvgioUYaOJ5pykq-_XWnwDU15rRsf}, author = {Mia van Boxel and Theo Eggen} } @conference {2080, title = {Adaptive Item Calibration and Norming: Unique Considerations of a Global Deployment}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, keywords = {CAT, common item equating, Figural Reasoning Test, item calibration, norming}, author = {Alexander Schwall and Evan Sinar} } @article {293, title = {The NAPLEX: evolution, purpose, scope, and educational implications}, journal = {American Journal of Pharmaceutical Education}, volume = {72}, number = {2}, year = {2008}, note = {Newton, David WBoyle, MariaCatizone, Carmen AHistorical ArticleUnited StatesAmerican journal of pharmaceutical educationAm J Pharm Educ. 2008 Apr 15;72(2):33.}, month = {Apr 15}, pages = {33}, edition = {2008/05/17}, abstract = {Since 2004, passing the North American Pharmacist Licensure Examination (NAPLEX) has been a requirement for earning initial pharmacy licensure in all 50 United States. The creation and evolution from 1952-2005 of the particular pharmacy competency testing areas and quantities of questions are described for the former paper-and-pencil National Association of Boards of Pharmacy Licensure Examination (NABPLEX) and the current candidate-specific computer adaptive NAPLEX pharmacy licensure examinations. A 40\% increase in the weighting of NAPLEX Blueprint Area 2 in May 2005, compared to that in the preceding 1997-2005 Blueprint, has implications for candidates{\textquoteright} NAPLEX performance and associated curricular content and instruction. New pharmacy graduates{\textquoteright} scores on the NAPLEX are neither intended nor validated to serve as a criterion for assessing or judging the quality or effectiveness of pharmacy curricula and instruction. The newest cycle of NAPLEX Blueprint revision, a continual process to ensure representation of nationwide contemporary practice, began in early 2008. It may take up to 2 years, including surveying several thousand national pharmacists, to complete.}, keywords = {*Educational Measurement, Education, Pharmacy/*standards, History, 20th Century, History, 21st Century, Humans, Licensure, Pharmacy/history/*legislation \& jurisprudence, North America, Pharmacists/*legislation \& jurisprudence, Software}, isbn = {1553-6467 (Electronic)0002-9459 (Linking)}, author = {Newton, D. W. and Boyle, M. and Catizone, C. A.} } @article {210, title = {Computerized adaptive testing for measuring development of young children}, journal = {Statistics in Medicine}, volume = {26}, number = {13}, year = {2007}, note = {Jacobusse, GertBuuren, Stef vanEnglandStatistics in medicineStat Med. 2007 Jun 15;26(13):2629-38.}, month = {Jun 15}, pages = {2629-38}, edition = {2006/11/30}, abstract = {Developmental indicators that are used for routine measurement in The Netherlands are usually chosen to optimally identify delayed children. Measurements on the majority of children without problems are therefore quite imprecise. This study explores the use of computerized adaptive testing (CAT) to monitor the development of young children. CAT is expected to improve the measurement precision of the instrument. We do two simulation studies - one with real data and one with simulated data - to evaluate the usefulness of CAT. It is shown that CAT selects developmental indicators that maximally match the individual child, so that all children can be measured to the same precision.}, keywords = {*Child Development, *Models, Statistical, Child, Preschool, Diagnosis, Computer-Assisted/*statistics \& numerical data, Humans, Netherlands}, isbn = {0277-6715 (Print)}, author = {Jacobusse, G. and Buuren, S.} } @article {363, title = {The initial development of an item bank to assess and screen for psychological distress in cancer patients}, journal = {Psycho-Oncology}, volume = {16}, number = {8}, year = {2007}, note = {10.1002/pon.1117Journal; Peer Reviewed Journal; Journal Article}, pages = {724-732}, abstract = {Psychological distress is a common problem among cancer patients. Despite the large number of instruments that have been developed to assess distress, their utility remains disappointing. This study aimed to use Rasch models to develop an item-bank which would provide the basis for better means of assessing psychological distress in cancer patients. An item bank was developed from eight psychological distress questionnaires using Rasch analysis to link common items. Items from the questionnaires were added iteratively with common items as anchor points and misfitting items (infit mean square > 1.3) removed, and unidimensionality assessed. A total of 4914 patients completed the questionnaires providing an initial pool of 83 items. Twenty items were removed resulting in a final pool of 63 items. Good fit was demonstrated and no additional factor structure was evident from the residuals. However, there was little overlap between item locations and person measures, since items mainly targeted higher levels of distress. The Rasch analysis allowed items to be pooled and generated a unidimensional instrument for measuring psychological distress in cancer patients. Additional items are required to more accurately assess patients across the whole continuum of psychological distress. (PsycINFO Database Record (c) 2007 APA ) (journal abstract)}, keywords = {3293 Cancer, cancer patients, Distress, initial development, Item Response Theory, Models, Neoplasms, Patients, Psychological, psychological distress, Rasch, Stress}, isbn = {1057-9249}, author = {Smith, A. B. and Rush, R. and Velikova, G. and Wall, L. and Wright, E. P. and Stark, D. and Selby, P. and Sharpe, M.} } @article {86, title = {IRT health outcomes data analysis project: an overview and summary}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl. 1}, year = {2007}, note = {Cook, Karon FTeal, Cayla RBjorner, Jakob BCella, DavidChang, Chih-HungCrane, Paul KGibbons, Laura EHays, Ron DMcHorney, Colleen AOcepek-Welikson, KatjaRaczek, Anastasia ETeresi, Jeanne AReeve, Bryce B1U01AR52171-01/AR/United States NIAMSR01 (CA60068)/CA/United States NCIY1-PC-3028-01/PC/United States NCIResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:121-32. Epub 2007 Mar 10.}, pages = {121-132}, edition = {2007/03/14}, abstract = {BACKGROUND: In June 2004, the National Cancer Institute and the Drug Information Association co-sponsored the conference, "Improving the Measurement of Health Outcomes through the Applications of Item Response Theory (IRT) Modeling: Exploration of Item Banks and Computer-Adaptive Assessment." A component of the conference was presentation of a psychometric and content analysis of a secondary dataset. OBJECTIVES: A thorough psychometric and content analysis was conducted of two primary domains within a cancer health-related quality of life (HRQOL) dataset. RESEARCH DESIGN: HRQOL scales were evaluated using factor analysis for categorical data, IRT modeling, and differential item functioning analyses. In addition, computerized adaptive administration of HRQOL item banks was simulated, and various IRT models were applied and compared. SUBJECTS: The original data were collected as part of the NCI-funded Quality of Life Evaluation in Oncology (Q-Score) Project. A total of 1,714 patients with cancer or HIV/AIDS were recruited from 5 clinical sites. MEASURES: Items from 4 HRQOL instruments were evaluated: Cancer Rehabilitation Evaluation System-Short Form, European Organization for Research and Treatment of Cancer Quality of Life Questionnaire, Functional Assessment of Cancer Therapy and Medical Outcomes Study Short-Form Health Survey. RESULTS AND CONCLUSIONS: Four lessons learned from the project are discussed: the importance of good developmental item banks, the ambiguity of model fit results, the limits of our knowledge regarding the practical implications of model misfit, and the importance in the measurement of HRQOL of construct definition. With respect to these lessons, areas for future research are suggested. The feasibility of developing item banks for broad definitions of health is discussed.}, keywords = {*Data Interpretation, Statistical, *Health Status, *Quality of Life, *Questionnaires, *Software, Female, HIV Infections/psychology, Humans, Male, Neoplasms/psychology, Outcome Assessment (Health Care)/*methods, Psychometrics, Stress, Psychological}, isbn = {0962-9343 (Print)}, author = {Cook, K. F. and Teal, C. R. and Bjorner, J. B. and Cella, D. and Chang, C-H. and Crane, P. K. and Gibbons, L. E. and Hays, R. D. and McHorney, C. A. and Ocepek-Welikson, K. and Raczek, A. E. and Teresi, J. A. and Reeve, B. B.} } @article {306, title = {Test design optimization in CAT early stage with the nominal response model}, journal = {Applied Psychological Measurement}, volume = {31}, number = {3}, year = {2007}, pages = {213-232}, publisher = {Sage Publications: US}, abstract = {The early stage of computerized adaptive testing (CAT) refers to the phase of the trait estimation during the administration of only a few items. This phase can be characterized by bias and instability of estimation. In this study, an item selection criterion is introduced in an attempt to lessen this instability: the D-optimality criterion. A polytomous unconstrained CAT simulation is carried out to evaluate this criterion{\textquoteright}s performance under different test premises. The simulation shows that the extent of early stage instability depends primarily on the quality of the item pool information and its size and secondarily on the item selection criteria. The efficiency of the D-optimality criterion is similar to the efficiency of other known item selection criteria. Yet, it often yields estimates that, at the beginning of CAT, display a more robust performance against instability. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, nominal response model, robust performance, test design optimization}, isbn = {0146-6216 (Print)}, author = {Passos, V. L. and Berger, M. P. F. and Tan, F. E.} } @article {237, title = {Factor analysis techniques for assessing sufficient unidimensionality of cancer related fatigue}, journal = {Quality of Life Research}, volume = {15}, number = {7}, year = {2006}, note = {0962-9343 (Print)Journal ArticleResearch Support, N.I.H., Extramural}, month = {Sep}, pages = {1179-90}, abstract = {BACKGROUND: Fatigue is the most common unrelieved symptom experienced by people with cancer. The purpose of this study was to examine whether cancer-related fatigue (CRF) can be summarized using a single score, that is, whether CRF is sufficiently unidimensional for measurement approaches that require or assume unidimensionality. We evaluated this question using factor analysis techniques including the theory-driven bi-factor model. METHODS: Five hundred and fifty five cancer patients from the Chicago metropolitan area completed a 72-item fatigue item bank, covering a range of fatigue-related concerns including intensity, frequency and interference with physical, mental, and social activities. Dimensionality was assessed using exploratory and confirmatory factor analysis (CFA) techniques. RESULTS: Exploratory factor analysis (EFA) techniques identified from 1 to 17 factors. The bi-factor model suggested that CRF was sufficiently unidimensional. CONCLUSIONS: CRF can be considered sufficiently unidimensional for applications that require unidimensionality. One such application, item response theory (IRT), will facilitate the development of short-form and computer-adaptive testing. This may further enable practical and accurate clinical assessment of CRF.}, keywords = {*Factor Analysis, Statistical, *Quality of Life, Aged, Chicago, Fatigue/*etiology, Female, Humans, Male, Middle Aged, Neoplasms/*complications, Questionnaires}, author = {Lai, J-S. and Crane, P. K. and Cella, D.} } @article {236, title = {An item bank was created to improve the measurement of cancer-related fatigue}, journal = {Journal of Clinical Epidemiology}, volume = {58}, number = {2}, year = {2005}, note = {Lai, Jin-SheiCella, DavidDineen, KellyBode, RitaVon Roenn, JamieGershon, Richard CShevrin, DanielEnglandJ Clin Epidemiol. 2005 Feb;58(2):190-7.}, month = {Feb}, pages = {190-7}, type = {Multicenter Study}, edition = {2005/02/01}, abstract = {OBJECTIVE: Cancer-related fatigue (CRF) is one of the most common unrelieved symptoms experienced by patients. CRF is underrecognized and undertreated due to a lack of clinically sensitive instruments that integrate easily into clinics. Modern computerized adaptive testing (CAT) can overcome these obstacles by enabling precise assessment of fatigue without requiring the administration of a large number of questions. A working item bank is essential for development of a CAT platform. The present report describes the building of an operational item bank for use in clinical settings with the ultimate goal of improving CRF identification and treatment. STUDY DESIGN AND SETTING: The sample included 301 cancer patients. Psychometric properties of items were examined by using Rasch analysis, an Item Response Theory (IRT) model. RESULTS AND CONCLUSION: The final bank includes 72 items. These 72 unidimensional items explained 57.5\% of the variance, based on factor analysis results. Excellent internal consistency (alpha=0.99) and acceptable item-total correlation were found (range: 0.51-0.85). The 72 items covered a reasonable range of the fatigue continuum. No significant ceiling effects, floor effects, or gaps were found. A sample short form was created for demonstration purposes. The resulting bank is amenable to the development of a CAT platform.}, keywords = {Adult, Aged, Aged, 80 and over, Factor Analysis, Statistical, Fatigue/*etiology/psychology, Female, Humans, Male, Middle Aged, Neoplasms/*complications/psychology, Psychometrics, Questionnaires}, isbn = {0895-4356 (Print)0895-4356 (Linking)}, author = {Lai, J-S. and Cella, D. and Dineen, K. and Bode, R. and Von Roenn, J. and Gershon, R. C. and Shevrin, D.} } @booklet {201, title = {The AMC Linear Disability Score project in a population requiring residential care: psychometric properties}, journal = {Health and Quality of Life Outcomes}, volume = {2}, year = {2004}, note = {Holman, RebeccaLindeboom, RobertVermeulen, Marinusde Haan, Rob JResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesEnglandHealth and quality of life outcomesHealth Qual Life Outcomes. 2004 Aug 3;2:42.}, month = {Aug 3}, pages = {42}, edition = {2004/08/05}, abstract = {BACKGROUND: Currently there is a lot of interest in the flexible framework offered by item banks for measuring patient relevant outcomes, including functional status. However, there are few item banks, which have been developed to quantify functional status, as expressed by the ability to perform activities of daily life. METHOD: This paper examines the psychometric properties of the AMC Linear Disability Score (ALDS) project item bank using an item response theory model and full information factor analysis. Data were collected from 555 respondents on a total of 160 items. RESULTS: Following the analysis, 79 items remained in the item bank. The remaining 81 items were excluded because of: difficulties in presentation (1 item); low levels of variation in response pattern (28 items); significant differences in measurement characteristics for males and females or for respondents under or over 85 years old (26 items); or lack of model fit to the data at item level (26 items). CONCLUSIONS: It is conceivable that the item bank will have different measurement characteristics for other patient or demographic populations. However, these results indicate that the ALDS item bank has sound psychometric properties for respondents in residential care settings and could form a stable base for measuring functional status in a range of situations, including the implementation of computerised adaptive testing of functional status.}, keywords = {*Disability Evaluation, *Health Status Indicators, Activities of Daily Living/*classification, Adult, Aged, Aged, 80 and over, Data Collection/methods, Female, Humans, Logistic Models, Male, Middle Aged, Netherlands, Pilot Projects, Probability, Psychometrics/*instrumentation, Questionnaires/standards, Residential Facilities/*utilization, Severity of Illness Index}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Holman, R. and Lindeboom, R. and Vermeulen, M. and de Haan, R. J.} } @article {34, title = {Developing an initial physical function item bank from existing sources}, journal = {Journal of Applied Measurement}, volume = {4}, number = {2}, year = {2003}, note = {1529-7713Journal Article}, pages = {124-36}, abstract = {The objective of this article is to illustrate incremental item banking using health-related quality of life data collected from two samples of patients receiving cancer treatment. The kinds of decisions one faces in establishing an item bank for computerized adaptive testing are also illustrated. Pre-calibration procedures include: identifying common items across databases; creating a new database with data from each pool; reverse-scoring "negative" items; identifying rating scales used in items; identifying pivot points in each rating scale; pivot anchoring items at comparable rating scale categories; and identifying items in each instrument that measure the construct of interest. A series of calibrations were conducted in which a small proportion of new items were added to the common core and misfitting items were identified and deleted until an initial item bank has been developed.}, keywords = {*Databases, *Sickness Impact Profile, Adaptation, Psychological, Data Collection, Humans, Neoplasms/*physiopathology/psychology/therapy, Psychometrics, Quality of Life/*psychology, Research Support, U.S. Gov{\textquoteright}t, P.H.S., United States}, author = {Bode, R. K. and Cella, D. and Lai, J. S. and Heinemann, A. W.} } @article {238, title = {Item banking to improve, shorten and computerized self-reported fatigue: an illustration of steps to create a core item bank from the FACIT-Fatigue Scale}, journal = {Quality of Life Research}, volume = {12}, number = {5}, year = {2003}, note = {0962-9343Journal Article}, month = {Aug}, pages = {485-501}, abstract = {Fatigue is a common symptom among cancer patients and the general population. Due to its subjective nature, fatigue has been difficult to effectively and efficiently assess. Modern computerized adaptive testing (CAT) can enable precise assessment of fatigue using a small number of items from a fatigue item bank. CAT enables brief assessment by selecting questions from an item bank that provide the maximum amount of information given a person{\textquoteright}s previous responses. This article illustrates steps to prepare such an item bank, using 13 items from the Functional Assessment of Chronic Illness Therapy Fatigue Subscale (FACIT-F) as the basis. Samples included 1022 cancer patients and 1010 people from the general population. An Item Response Theory (IRT)-based rating scale model, a polytomous extension of the Rasch dichotomous model was utilized. Nine items demonstrating acceptable psychometric properties were selected and positioned on the fatigue continuum. The fatigue levels measured by these nine items along with their response categories covered 66.8\% of the general population and 82.6\% of the cancer patients. Although the operational CAT algorithms to handle polytomously scored items are still in progress, we illustrated how CAT may work by using nine core items to measure level of fatigue. Using this illustration, a fatigue measure comparable to its full-length 13-item scale administration was obtained using four items. The resulting item bank can serve as a core to which will be added a psychometrically sound and operational item bank covering the entire fatigue continuum.}, keywords = {*Health Status Indicators, *Questionnaires, Adult, Fatigue/*diagnosis/etiology, Female, Humans, Male, Middle Aged, Neoplasms/complications, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Sickness Impact Profile}, author = {Lai, J-S. and Crane, P. K. and Cella, D. and Chang, C-H. and Bode, R. K. and Heinemann, A. W.} } @article {48, title = {Advances in quality of life measurements in oncology patients}, journal = {Seminars in Oncology}, volume = {29}, number = {3 Suppl 8}, year = {2002}, note = {0093-7754 (Print)Journal ArticleReview}, month = {Jun}, pages = {60-8}, abstract = {Accurate assessment of the quality of life (QOL) of patients can provide important clinical information to physicians, especially in the area of oncology. Changes in QOL are important indicators of the impact of a new cytotoxic therapy, can affect a patient{\textquoteright}s willingness to continue treatment, and may aid in defining response in the absence of quantifiable endpoints such as tumor regression. Because QOL is becoming an increasingly important aspect in the management of patients with malignant disease, it is vital that the instruments used to measure QOL are reliable and accurate. Assessment of QOL involves a multidimensional approach that includes physical, functional, social, and emotional well-being, and the most comprehensive instruments measure at least three of these domains. Instruments to measure QOL can be generic (eg, the Nottingham Health Profile), targeted toward specific illnesses (eg, Functional Assessment of Cancer Therapy - Lung), or be a combination of generic and targeted. Two of the most widely used examples of the combination, or hybrid, instruments are the European Organization for Research and Treatment of Cancer Quality of Life Questionnaire Core 30 Items and the Functional Assessment of Chronic Illness Therapy. A consequence of the increasing international collaboration in clinical trials has been the growing necessity for instruments that are valid across languages and cultures. To assure the continuing reliability and validity of QOL instruments in this regard, item response theory can be applied. Techniques such as item response theory may be used in the future to construct QOL item banks containing large sets of validated questions that represent various levels of QOL domains. As QOL becomes increasingly important in understanding and approaching the overall management of cancer patients, the tools available to clinicians and researchers to assess QOL will continue to evolve. While the instruments currently available provide reliable and valid measurement, further improvements in precision and application are anticipated.}, keywords = {*Quality of Life, *Sickness Impact Profile, Cross-Cultural Comparison, Culture, Humans, Language, Neoplasms/*physiopathology, Questionnaires}, author = {Cella, D. and Chang, C-H. and Lai, J. S. and Webster, K.} } @article {187, title = {Development of an index of physical functional health status in rehabilitation}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {83}, number = {5}, year = {2002}, note = {0003-9993 (Print)Journal Article}, month = {May}, pages = {655-65}, abstract = {OBJECTIVE: To describe (1) the development of an index of physical functional health status (FHS) and (2) its hierarchical structure, unidimensionality, reproducibility of item calibrations, and practical application. DESIGN: Rasch analysis of existing data sets. SETTING: A total of 715 acute, orthopedic outpatient centers and 62 long-term care facilities in 41 states participating with Focus On Therapeutic Outcomes, Inc. PATIENTS: A convenience sample of 92,343 patients (40\% male; mean age +/- standard deviation [SD], 48+/-17y; range, 14-99y) seeking rehabilitation between 1993 and 1999. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Patients completed self-report health status surveys at admission and discharge. The Medical Outcomes Study 36-Item Short-Form Health Survey{\textquoteright}s physical functioning scale (PF-10) is the foundation of the physical FHS. The Oswestry Low Back Pain Disability Questionnaire, Neck Disability Index, Lysholm Knee Questionnaire, items pertinent to patients with upper-extremity impairments, and items pertinent to patients with more involved neuromusculoskeletal impairments were cocalibrated into the PF-10. RESULTS: The final FHS item bank contained 36 items (patient separation, 2.3; root mean square measurement error, 5.9; mean square +/- SD infit, 0.9+/-0.5; outfit, 0.9+/-0.9). Analyses supported empirical item hierarchy, unidimensionality, reproducibility of item calibrations, and content and construct validity of the FHS-36. CONCLUSIONS: Results support the reliability and validity of FHS-36 measures in the present sample. Analyses show the potential for a dynamic, computer-controlled, adaptive survey for FHS assessment applicable for group analysis and clinical decision making for individual patients.}, keywords = {*Health Status Indicators, *Rehabilitation Centers, Adolescent, Adult, Aged, Aged, 80 and over, Female, Health Surveys, Humans, Male, Middle Aged, Musculoskeletal Diseases/*physiopathology/*rehabilitation, Nervous System Diseases/*physiopathology/*rehabilitation, Physical Fitness/*physiology, Recovery of Function/physiology, Reproducibility of Results, Retrospective Studies}, author = {Hart, D. L. and Wright, B. D.} } @article {358, title = {Developments in measurement of persons and items by means of item response models}, journal = {Behaviormetrika}, volume = {28}, number = {1}, year = {2001}, pages = {65-94}, abstract = {This paper starts with a general introduction into measurement of hypothetical constructs typical of the social and behavioral sciences. After the stages ranging from theory through operationalization and item domain to preliminary test or questionnaire have been treated, the general assumptions of item response theory are discussed. The family of parametric item response models for dichotomous items is introduced and it is explained how parameters for respondents and items are estimated from the scores collected from a sample of respondents who took the test or questionnaire. Next, the family of nonparametric item response models is explained, followed by the 3 classes of item response models for polytomous item scores (e.g., rating scale scores). Then, to what degree the mean item score and the unweighted sum of item scores for persons are useful for measuring items and persons in the context of item response theory is discussed. Methods for fitting parametric and nonparametric models to data are briefly discussed. Finally, the main applications of item response models are discussed, which include equating and item banking, computerized and adaptive testing, research into differential item functioning, person fit research, and cognitive modeling. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Cognitive, Computer Assisted Testing, Item Response Theory, Models, Nonparametric Statistical Tests, Processes}, author = {Sijtsma, K.} } @article {216, title = {Dispelling myths about the new NCLEX exam}, journal = {Recruitment, Retention, and Restructuring Report}, volume = {9}, number = {1}, year = {1996}, note = {Journal Article}, month = {Jan-Feb}, pages = {6-7}, abstract = {The new computerized NCLEX system is working well. Most new candidates, employers, and board of nursing representatives like the computerized adaptive testing system and the fast report of results. But, among the candidates themselves some myths have grown which cause them needless anxiety.}, keywords = {*Educational Measurement, *Licensure, Humans, Nursing Staff, Personnel Selection, United States}, author = {Johnson, S. H.} } @article {126, title = {Computerized adaptive testing for NCLEX-PN}, journal = {Journal of Practical Nursing}, volume = {42}, number = {2}, year = {1992}, note = {Fields, F AUnited statesThe Journal of practical nursingJ Pract Nurs. 1992 Jun;42(2):8-10.}, month = {Jun}, pages = {8-10}, edition = {1992/06/01}, keywords = {*Licensure, *Programmed Instruction, Educational Measurement/*methods, Humans, Nursing, Practical/*education}, isbn = {0022-3867 (Print)}, author = {Fields, F. A.} }